#!/bin/bash # Copyright 2014 Pegah Ghahremani # Apache 2.0 # decode BNF + sgmm_mmi system set -e set -o pipefail . conf/common_vars.sh || exit 1; . ./lang.conf || exit 1; type=dev10h data_only=false fast_path=true skip_kws=false extra_kws=false skip_stt=false skip_scoring=false tmpdir=`pwd` semisupervised=true . utils/parse_options.sh if [ $# -ne 0 ]; then echo "Usage: $(basename $0) --type (dev10h|dev2h|eval|shadow)" echo "--semisupervised #set to false to skip unsupervised training." exit 1 fi if [ $babel_type == "full" ] && $semisupervised; then echo "Error: Using unsupervised training for fullLP is meaningless, use semisupervised=false " exit 1 fi if $semisupervised ; then unsup_string="_semi_supervised" else unsup_string="" #" ": supervised training, _semi_supervised: unsupervised BNF training fi if ! echo {dev10h,dev2h,eval,unsup}{,.uem,.seg} | grep -w "$type" >/dev/null; then # note: echo dev10.uem | grep -w dev10h will produce a match, but this # doesn't matter because dev10h is also a valid value. echo "Invalid variable type=${type}, valid values are " {dev10h,dev2h,eval,unsup}{,.uem,.seg} exit 1; fi dirid=${type} exp_dir=exp_bnf${unsup_string} data_bnf_dir=data_bnf${unsup_string} param_bnf_dir=param_bnf${unsup_string} datadir=$data_bnf_dir/${dirid} [ ! -d data/${dirid} ] && echo "No such directory data/${dirid}" && exit 1; [ ! -d exp/tri5/decode_${dirid} ] && echo "No such directory exp/tri5/decode_${dirid}" && exit 1; # Set my_nj; typically 64. my_nj=`cat exp/tri5/decode_${dirid}/num_jobs` || exit 1; if [ ! $data_bnf_dir/${dirid}_bnf/.done -nt exp/tri5/decode_${dirid}/.done ] || \ [ ! $data_bnf_dir/${dirid}_bnf/.done -nt $exp_dir/tri6_bnf/.done ]; then # put the archives in $param_bnf_dir/. local/nnet2/dump_bottleneck_features.sh --nj $my_nj --cmd "$train_cmd" \ --transform-dir exp/tri5/decode_${dirid} data/${dirid} $data_bnf_dir/${dirid}_bnf $exp_dir/tri6_bnf $param_bnf_dir $exp_dir/dump_bnf touch $data_bnf_dir/${dirid}_bnf/.done fi if [ ! $data_bnf_dir/${dirid}/.done -nt $data_bnf_dir/${dirid}_bnf/.done ]; then steps/nnet/make_fmllr_feats.sh --cmd "$train_cmd -tc 10" \ --nj $train_nj --transform-dir exp/tri5/decode_${dirid} $data_bnf_dir/${dirid}_sat data/${dirid} \ exp/tri5_ali $exp_dir/make_fmllr_feats/log $param_bnf_dir/ steps/append_feats.sh --cmd "$train_cmd" --nj 4 \ $data_bnf_dir/${dirid}_bnf $data_bnf_dir/${dirid}_sat $data_bnf_dir/${dirid} \ $exp_dir/append_feats/log $param_bnf_dir/ steps/compute_cmvn_stats.sh --fake $data_bnf_dir/${dirid} $exp_dir/make_fmllr_feats $param_bnf_dir rm -r $data_bnf_dir/${dirid}_sat if ! $skip_kws ; then cp -r data/${dirid}/kws* $data_bnf_dir/${dirid}/ fi touch $data_bnf_dir/${dirid}/.done fi if $data_only ; then echo "Exiting, as data-only was requested... " fi #################################################################### ## ## FMLLR decoding ## #################################################################### decode=$exp_dir/tri6/decode_${dirid} if [ ! -f ${decode}/.done ]; then echo --------------------------------------------------------------------- echo "Decoding with SAT models on top of bottleneck features on" `date` echo --------------------------------------------------------------------- utils/mkgraph.sh \ data/lang $exp_dir/tri6 $exp_dir/tri6/graph |tee $exp_dir/tri6/mkgraph.log mkdir -p $decode #By default, we do not care about the lattices for this step -- we just want the transforms #Therefore, we will reduce the beam sizes, to reduce the decoding times steps/decode_fmllr_extra.sh --skip-scoring true --beam 10 --lattice-beam 4 \ --acwt $bnf_decode_acwt \ --nj $my_nj --cmd "$decode_cmd" "${decode_extra_opts[@]}"\ $exp_dir/tri6/graph ${datadir} ${decode} |tee ${decode}/decode.log touch ${decode}/.done fi if ! $fast_path ; then local/run_kws_stt_task.sh --cer $cer --max-states $max_states --skip-scoring $skip_scoring\ --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt --extra-kws $extra_kws --wip $wip\ "${shadow_set_extra_opts[@]}" "${lmwt_bnf_extra_opts[@]}" \ ${datadir} data/lang ${decode} local/run_kws_stt_task.sh --cer $cer --max-states $max_states --skip-scoring $skip_scoring\ --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt --extra-kws $extra_kws --wip $wip \ "${shadow_set_extra_opts[@]}" "${lmwt_bnf_extra_opts[@]}" \ ${datadir} data/lang ${decode}.si fi #################################################################### ## SGMM2 decoding #################################################################### decode=$exp_dir/sgmm7/decode_fmllr_${dirid} if [ ! -f $decode/.done ]; then echo --------------------------------------------------------------------- echo "Spawning $decode on" `date` echo --------------------------------------------------------------------- utils/mkgraph.sh \ data/lang $exp_dir/sgmm7 $exp_dir/sgmm7/graph |tee $exp_dir/sgmm7/mkgraph.log mkdir -p $decode steps/decode_sgmm2.sh --skip-scoring true --use-fmllr true --nj $my_nj \ --acwt $bnf_decode_acwt \ --cmd "$decode_cmd" --transform-dir $exp_dir/tri6/decode_${dirid} "${decode_extra_opts[@]}"\ $exp_dir/sgmm7/graph ${datadir} $decode |tee $decode/decode.log touch $decode/.done fi if ! $fast_path ; then local/run_kws_stt_task.sh --cer $cer --max-states $max_states --skip-scoring $skip_scoring \ --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt --extra-kws $extra_kws --wip $wip \ "${shadow_set_extra_opts[@]}" "${lmwt_bnf_extra_opts[@]}" \ ${datadir} data/lang $exp_dir/sgmm7/decode_fmllr_${dirid} fi #################################################################### ## ## SGMM_MMI rescoring ## #################################################################### for iter in 1 2 3 4; do # Decode SGMM+MMI (via rescoring). decode=$exp_dir/sgmm7_mmi_b0.1/decode_fmllr_${dirid}_it$iter if [ ! -f $decode/.done ]; then mkdir -p $decode steps/decode_sgmm2_rescore.sh --skip-scoring true \ --cmd "$decode_cmd" --iter $iter --transform-dir $exp_dir/tri6/decode_${dirid} \ data/lang ${datadir} $exp_dir/sgmm7/decode_fmllr_${dirid} $decode | tee ${decode}/decode.log touch $decode/.done fi done #We are done -- all lattices has been generated. We have to #a)Run MBR decoding #b)Run KW search for iter in 1 2 3 4; do # Decode SGMM+MMI (via rescoring). decode=$exp_dir/sgmm7_mmi_b0.1/decode_fmllr_${dirid}_it$iter local/run_kws_stt_task.sh --cer $cer --max-states $max_states --skip-scoring $skip_scoring\ --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt --extra-kws $extra_kws --wip $wip \ "${shadow_set_extra_opts[@]}" "${lmwt_bnf_extra_opts[@]}" \ ${datadir} data/lang $decode done if [ ! exp_bnf/tri7_nnet/decode_${dirid}/.done -nt data_bnf/${dirid}_bnf/.done ] || \ [ ! exp_bnf/tri7_nnet/decode_${dirid}/.done -nt exp_bnf/tri7_nnet/.done ]; then echo --------------------------------------------------------------------- echo "Decoding hybrid system on top of bottleneck features on" `date` echo --------------------------------------------------------------------- # We use the graph from tri6. utils/mkgraph.sh \ data/lang exp_bnf/tri6 exp_bnf/tri6/graph |tee exp_bnf/tri6/mkgraph.log decode=exp_bnf/tri7_nnet/decode_${dirid} if [ ! -f $decode/.done ]; then mkdir -p $decode steps/nnet2/decode.sh --cmd "$decode_cmd" --nj $my_nj \ --acwt $bnf_decode_acwt \ --beam $dnn_beam --lat-beam $dnn_lat_beam \ --skip-scoring true "${decode_extra_opts[@]}" \ --feat-type raw \ exp_bnf/tri6/graph ${datadir} $decode | tee $decode/decode.log touch $decode/.done fi local/run_kws_stt_task.sh --cer $cer --max-states $max_states --skip-scoring $skip_scoring\ --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt --extra-kws $extra_kws --wip $wip \ "${shadow_set_extra_opts[@]}" "${lmwt_bnf_extra_opts[@]}" \ ${datadir} data/lang $decode fi echo "$0: Everything looking good...." exit 0