#!/bin/bash # Warning-- this recipe is deprecated. See ../s5b/ for the latest recipe. . cmd.sh exit 1; # This is a shell script, but it's recommended that you run the commands one by # one by copying and pasting into the shell. # Caution: some of the graph creation steps use quite a bit of memory, so you # should run this on a machine that has sufficient memory. # Data prep #local/swbd_p1_data_prep.sh /mnt/matylda2/data/SWITCHBOARD_1R2 local/swbd_p1_data_prep.sh /data/corpora0/LDC97S62/ #local/swbd_p1_data_prep.sh /export/corpora3/LDC/LDC97S62 local/swbd_p1_prepare_dict.sh utils/prepare_lang.sh data/local/dict "" data/local/lang data/lang local/swbd_p1_train_lms.sh local/swbd_p1_format_data.sh # Data preparation and formatting for eval2000 (note: the "text" file # is not very much preprocessed; for actual WER reporting we'll use # sclite. #local/eval2000_data_prep.sh /mnt/matylda2/data/HUB5_2000/ /mnt/matylda2/data/HUB5_2000/2000_hub5_eng_eval_tr #local/eval2000_data_prep.sh /export/corpora2/LDC/LDC2002S09/hub5e_00 /export/corpora2/LDC/LDC2002T43 local/eval2000_data_prep.sh /data/corpora0/LDC2002S09/hub5e_00 /data/corpora0/LDC2002T43 || exit 1; . cmd.sh # mfccdir should be some place with a largish disk where you # want to store MFCC features. mfccdir=`pwd`/mfcc steps/make_mfcc.sh --nj 20 --cmd "$train_cmd" data/train exp/make_mfcc/train $mfccdir || exit 1; # Don't do "|| exit 1" because actually some speakers don't have data, # we'll get rid of them later. Ignore this error. steps/compute_cmvn_stats.sh data/train exp/make_mfcc/train $mfccdir # after this, the next command will remove the small number of utterances # that couldn't be extracted for some reason (e.g. too short; no such file). utils/fix_data_dir.sh data/train || exit 1; steps/make_mfcc.sh --cmd "$train_cmd" --nj 10 data/eval2000 exp/make_mfcc/eval2000 $mfccdir || exit 1; steps/compute_cmvn_stats.sh data/eval2000 exp/make_mfcc/eval2000 $mfccdir || exit 1; utils/fix_data_dir.sh data/eval2000 # remove segments that had problems, e.g. too short. # Use the first 4k sentences as dev set. Note: when we trained the LM, we used # the 1st 10k sentences as dev set, so the 1st 4k won't have been used in the # LM training data. However, they will be in the lexicon, plus speakers # may overlap, so it's still not quite equivalent to a test set. utils/subset_data_dir.sh --first data/train 4000 data/train_dev # 5.3 hours. n=$[`cat data/train/segments | wc -l` - 4000] utils/subset_data_dir.sh --last data/train $n data/train_nodev # Now-- there are 264k utterances, and we want to start the monophone training # on relatively short utterances (easier to align), but not only the very shortest # ones (mostly uh-huh). So take the 100k shortest ones, and then take 10k random # utterances from those. utils/subset_data_dir.sh --shortest data/train_nodev 100000 data/train_100kshort utils/subset_data_dir.sh data/train_100kshort 10000 data/train_10k local/remove_dup_utts.sh 100 data/train_10k data/train_10k_nodup # Take the first 30k utterances (about 1/8th of the data) utils/subset_data_dir.sh --first data/train_nodev 30000 data/train_30k local/remove_dup_utts.sh 200 data/train_30k data/train_30k_nodup local/remove_dup_utts.sh 300 data/train_nodev data/train_nodup # Take the first 100k utterances (just under half the data); we'll use # this for later stages of training. utils/subset_data_dir.sh --first data/train_nodev 100000 data/train_100k local/remove_dup_utts.sh 200 data/train_100k data/train_100k_nodup # The next commands are not necessary for the scripts to run, but increase # efficiency of data access by putting the mfcc's of the subset # in a contiguous place in a file. ( . path.sh; # make sure mfccdir is defined as above.. cp data/train_10k_nodup/feats.scp{,.bak} copy-feats scp:data/train_10k_nodup/feats.scp ark,scp:$mfccdir/kaldi_swbd_10k_nodup.ark,$mfccdir/kaldi_swbd_10k_nodup.scp \ && cp $mfccdir/kaldi_swbd_10k_nodup.scp data/train_10k_nodup/feats.scp ) ( . path.sh; # make sure mfccdir is defined as above.. cp data/train_30k_nodup/feats.scp{,.bak} copy-feats scp:data/train_30k_nodup/feats.scp ark,scp:$mfccdir/kaldi_swbd_30k_nodup.ark,$mfccdir/kaldi_swbd_30k_nodup.scp \ && cp $mfccdir/kaldi_swbd_30k_nodup.scp data/train_30k_nodup/feats.scp ) steps/train_mono.sh --nj 10 --cmd "$train_cmd" \ data/train_10k_nodup data/lang exp/mono0a || exit 1; steps/align_si.sh --nj 30 --cmd "$train_cmd" \ data/train_30k_nodup data/lang exp/mono0a exp/mono0a_ali || exit 1; steps/train_deltas.sh --cmd "$train_cmd" \ 2500 20000 data/train_30k_nodup data/lang exp/mono0a_ali exp/tri1 || exit 1; utils/mkgraph.sh data/lang_test exp/tri1 exp/tri1/graph steps/decode.sh --nj 30 --cmd "$decode_cmd" --config conf/decode.config \ exp/tri1/graph data/eval2000 exp/tri1/decode_eval2000 #MAP-adapted decoding example. #steps/decode_with_map.sh --nj 30 --cmd "$decode_cmd" --config conf/decode.config \ # exp/tri1/graph data/eval2000 exp/tri1/decode_eval2000_map steps/align_si.sh --nj 30 --cmd "$train_cmd" \ data/train_30k_nodup data/lang exp/tri1 exp/tri1_ali || exit 1; steps/train_deltas.sh --cmd "$train_cmd" \ 2500 20000 data/train_30k_nodup data/lang exp/tri1_ali exp/tri2 || exit 1; ( utils/mkgraph.sh data/lang_test exp/tri2 exp/tri2/graph || exit 1; steps/decode.sh --nj 30 --cmd "$decode_cmd" --config conf/decode.config \ exp/tri2/graph data/eval2000 exp/tri2/decode_eval2000 || exit 1; )& steps/align_si.sh --nj 30 --cmd "$train_cmd" \ data/train_30k_nodup data/lang exp/tri2 exp/tri2_ali || exit 1; # Train tri3a, which is LDA+MLLT, on 30k_nodup data. steps/train_lda_mllt.sh --cmd "$train_cmd" \ --splice-opts "--left-context=3 --right-context=3" \ 2500 20000 data/train_30k_nodup data/lang exp/tri2_ali exp/tri3a || exit 1; ( utils/mkgraph.sh data/lang_test exp/tri3a exp/tri3a/graph || exit 1; steps/decode.sh --nj 30 --cmd "$decode_cmd" --config conf/decode.config \ exp/tri3a/graph data/eval2000 exp/tri3a/decode_eval2000 || exit 1; )& # From now, we start building a more serious system (with SAT), and we'll # do the alignment with fMLLR. steps/align_fmllr.sh --nj 30 --cmd "$train_cmd" \ data/train_100k_nodup data/lang exp/tri3a exp/tri3a_ali_100k_nodup || exit 1; steps/train_sat.sh --cmd "$train_cmd" \ 4000 100000 data/train_100k_nodup data/lang exp/tri3a_ali_100k_nodup exp/tri4a || exit 1; ( utils/mkgraph.sh data/lang_test exp/tri4a exp/tri4a/graph steps/decode_fmllr.sh --nj 30 --cmd "$decode_cmd" --config conf/decode.config \ exp/tri4a/graph data/eval2000 exp/tri4a/decode_eval2000 steps/decode_fmllr.sh --nj 30 --cmd "$decode_cmd" --config conf/decode.config \ exp/tri4a/graph data/train_dev exp/tri4a/decode_train_dev )& steps/align_fmllr.sh --nj 30 --cmd "$train_cmd" \ data/train_100k_nodup data/lang exp/tri4a exp/tri4a_ali_100k_nodup #local/run_sgmm.sh local/run_sgmm2.sh # Building a larger SAT system. steps/train_sat.sh --cmd "$train_cmd" \ 4000 100000 data/train_100k_nodup data/lang exp/tri4a_ali_100k_nodup exp/tri5a || exit 1; ( utils/mkgraph.sh data/lang_test exp/tri5a exp/tri5a/graph || exit 1; steps/decode_fmllr.sh --cmd "$decode_cmd" --config conf/decode.config \ --nj 30 exp/tri5a/graph data/eval2000 exp/tri5a/decode_eval2000 || exit 1; steps/decode_fmllr.sh --nj 30 --cmd "$decode_cmd" --config conf/decode.config \ exp/tri5a/graph data/train_dev exp/tri5a/decode_train_dev || exit 1; ) # MMI starting from system in tri5a. Use the same data (100k_nodup). # Later we'll use all of it. steps/align_fmllr.sh --nj 40 --cmd "$train_cmd" \ data/train_100k_nodup data/lang exp/tri5a exp/tri5a_ali_100k_nodup || exit 1; steps/make_denlats.sh --nj 40 --cmd "$decode_cmd" --transform-dir exp/tri5a_ali_100k_nodup \ --config conf/decode.config \ --sub-split 50 data/train_100k_nodup data/lang exp/tri5a exp/tri5a_denlats_100k_nodup || exit 1; steps/train_mmi.sh --cmd "$decode_cmd" --boost 0.1 \ data/train_100k_nodup data/lang exp/tri5a_{ali,denlats}_100k_nodup exp/tri5a_mmi_b0.1 || exit 1; steps/decode.sh --nj 30 --cmd "$decode_cmd" --config conf/decode.config \ --transform-dir exp/tri5a/decode_eval2000 \ exp/tri5a/graph data/eval2000 exp/tri5a_mmi_b0.1/decode_eval2000 & steps/train_diag_ubm.sh --silence-weight 0.5 --nj 40 --cmd "$train_cmd" \ 700 data/train_100k_nodup data/lang exp/tri5a_ali_100k_nodup exp/tri5a_dubm steps/train_mmi_fmmi.sh --learning-rate 0.005 \ --boost 0.1 --cmd "$train_cmd" \ data/train_100k_nodup data/lang exp/tri5a_ali_100k_nodup exp/tri5a_dubm exp/tri5a_denlats_100k_nodup \ exp/tri5a_fmmi_b0.1 || exit 1; for iter in 4 5 6 7 8; do steps/decode_fmmi.sh --nj 30 --cmd "$decode_cmd" --iter $iter \ --config conf/decode.config --transform-dir exp/tri5a/decode_eval2000 \ exp/tri5a/graph data/eval2000 exp/tri5a_fmmi_b0.1/decode_eval2000_it$iter & done # Recipe with indirect differential [doesn't make difference here] steps/train_mmi_fmmi_indirect.sh \ --boost 0.1 --cmd "$train_cmd" \ data/train_100k_nodup data/lang exp/tri5a_ali_100k_nodup exp/tri5a_dubm exp/tri5a_denlats_100k_nodup \ exp/tri5a_fmmi_b0.1_indirect || exit 1; for iter in 4 5 6 7 8; do steps/decode_fmmi.sh --nj 30 --cmd "$decode_cmd" --iter $iter \ --config conf/decode.config --transform-dir exp/tri5a/decode_eval2000 \ exp/tri5a/graph data/eval2000 exp/tri5a_fmmi_b0.1_indirect/decode_eval2000_it$iter & done ### At this point used to be "Karel's DNN recipe", which was removed. # # For the most recent DNN recipe, please have a look at: # egs/swbd/s5b/local/run_dnn.sh # # It features: # - RBM pre-trainig # - Frame-based Cross-entropy training # - Sequence-discriminative training (sMBR) # # It is the setup that was used for the Interspeech 2013 paper: # "Sequence-discriminative training of deep neural networks" # # Note: we haven't yet run with all the data. # utils/make_phone_bigram_lang.sh data/lang exp/tri4a_ali_100k_nodup data/lang_phone_bg # utils/mkgraph.sh data/lang_phone_bg exp/tri4a exp/tri4a/graph_phone_bg # steps/decode_fmllr.sh --nj 30 --cmd "$decode_cmd" --config conf/decode.config \ # exp/tri4a/graph_phone_bg data/train_dev exp/tri4a/decode_train_dev_phone_bg # getting results (see RESULTS file) for x in exp/*/decode*; do [ -d $x ] && grep Sum $x/score_*/*.sys | utils/best_wer.sh; done 2>/dev/null for x in exp/*/decode*; do [ -d $x ] && grep WER $x/wer_* | utils/best_wer.sh; done 2>/dev/null