#!/bin/bash # Copyright 2010-2011 Microsoft Corporation Arnab Ghoshal # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, # MERCHANTABLITY OR NON-INFRINGEMENT. # See the Apache 2 License for the specific language governing permissions and # limitations under the License. # This is Subspace Gaussian Mixture Model (SGMM) training-- # see "The subspace Gaussian mixture model--A structured model for speech recognition" # by D. Povey et al, Computer Speech and Language, 2011. function error_exit () { echo -e "$@" >&2; exit 1; } function readint () { local retval=${1/#*=/}; # In case --switch=ARG format was used retval=${retval#0*} # Strip any leading 0's [[ "$retval" =~ ^-?[1-9][0-9]*$ ]] \ || error_exit "Argument \"$retval\" not an integer." echo $retval } nj=4 # Default number of jobs stage=-4 # Default starting stage (start with tree building) qcmd="" # Options for the submit_jobs.sh script sjopts="" # Options for the submit_jobs.sh script PROG=`basename $0`; usage="Usage: $PROG [options] \n e.g.: $PROG 10000 40 39 data/train data/lang exp/tri2a_ali exp/ubm3c/final.ubm exp/sgmm3c\n\n Options:\n --help\t\tPrint this message and exit\n --num-jobs INT\tNumber of parallel jobs to run (default=$nj).\n --qcmd STRING\tCommand for submitting a job to a grid engine (e.g. qsub) including switches.\n --sjopts STRING\tOptions for the 'submit_jobs.sh' script\n --stage INT\tStarting stage (e.g. -4 for SGMM init; 2 for iter 2; default=$stage)\n "; while [ $# -gt 0 ]; do case "${1# *}" in # ${1# *} strips any leading spaces from the arguments --help) echo -e $usage; exit 0 ;; --num-jobs) shift; nj=`readint $1`; [ $nj -lt 1 ] && error_exit "--num-jobs arg '$nj' not positive."; shift ;; --qcmd) shift; qcmd=" --qcmd=${1}"; shift ;; --sjopts) shift; sjopts="$1"; shift ;; --stage) shift; stage=`readint $1`; shift ;; -*) echo "Unknown argument: $1, exiting"; echo -e $usage; exit 1 ;; *) break ;; # end of options: interpreted as num-leaves esac done if [ $# != 8 ]; then error_exit $usage; fi [ -f path.sh ] && . path.sh # This is SGMM with speaker vectors, on top of LDA+[something] features. # Any speaker-specific transforms are obtained from the alignment directory. # To be run from .. totsubstates=$1 phndim=$2 spkdim=$3 data=$4 lang=$5 alidir=$6 ubm=$7 dir=$8 mkdir -p $dir || exit 1; scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1" numiters=25 # Total number of iterations numiters_alimdl=3 # Number of iterations for estimating alignment model. maxiterinc=15 # Last iter to increase #substates on. realign_iters="5 10 15"; spkvec_iters="5 8 12 17" add_dim_iters="6 8 10 12"; # Iters on which to increase phn dim and/or spk dim, # if necessary, In most cases, either none of these or only the first of these # will have any effect (we increase in increments of [feature dim]) oov_sym=`cat $lang/oov.txt` silphonelist=`cat $lang/silphones.csl` numsubstates=`cat $dir/numleaves` # Initial #-substates. # per-iter increment for #substates incsubstates=$[($totsubstates-$numsubstates)/$maxiterinc] # Initially don't have speaker vectors, but change this after we estimate them. spkvecs_opt= gselect_opt="--gselect=ark,s,cs:gunzip -c $dir/TASK_ID.gselect.gz|" randprune=0.1 mkdir -p $dir/log featspart="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$data/split$nj/TASK_ID/utt2spk ark:$alidir/TASK_ID.cmvn scp:$data/split$nj/TASK_ID/feats.scp ark:- | add-deltas ark:- ark:- |" if [ ! -f $ubm ]; then echo "No UBM in $ubm" exit 1; fi if [ $stage -le -4 ]; then submit_jobs.sh "$qcmd" --log=$dir/log/init_sgmm.log $sjopts \ sgmm-init --phn-space-dim=$phndim --spk-space-dim=$spkdim $lang/topo \ $dir/tree $ubm $dir/0.mdl || error_exit "SGMM init failed." fi if [ $stage -le -3 ]; then # Make training graphs (this is split in $nj parts). echo "Compiling training graphs" submit_jobs.sh "$qcmd" --njobs=$nj --log=$dir/log/compile_graphsTASK_ID.log \ $sjopts compile-train-graphs $dir/tree $dir/0.mdl $lang/L.fst \ "ark:sym2int.pl --map-oov '$oov_sym' --ignore-first-field $lang/words.txt < $data/split$nj/TASK_ID/text |" \ "ark:|gzip -c >$dir/TASK_ID.fsts.gz" \ || error_exit "Error compiling training graphs" fi if [ $stage -le -2 ]; then echo "Doing Gaussian selection" submit_jobs.sh "$qcmd" --njobs=$nj --log=$dir/log/gselectTASK_ID.log \ $sjopts sgmm-gselect $dir/0.mdl "$featspart" "ark,t:|gzip -c > $dir/TASK_ID.gselect.gz" \ || error_exit "Error doing Gaussian selection" fi if [ $stage -le -1 ]; then echo "Converting alignments" # don't bother parallelizing; very fast. for n in `seq 1 $nj`; do convert-ali $alidir/final.mdl $dir/0.mdl $dir/tree \ "ark:gunzip -c $alidir/$n.ali.gz|" "ark:|gzip -c >$dir/$n.ali.gz" \ 2>$dir/log/convert.$n.log done fi x=0 while [ $x -lt $numiters ]; do if [ $x -eq 0 ]; then flags=vwcSt # On first iter, don't update M or N. elif [ $spkdim -gt 0 -a $[$x%2] -eq 1 -a \ $x -ge `echo $spkvec_iters | awk '{print $1}'` ]; then # Update N on odd iterations after 1st spkvec iter, if we have spk-space. flags=vNwcSt else # Else update M but not N. flags=vMwcSt fi if [ $stage -le $x ]; then echo "Pass $x: update flags = '$flags' " if echo $realign_iters | grep -w $x >/dev/null; then echo "Aligning data" submit_jobs.sh "$qcmd" --njobs=$nj --log=$dir/log/align.$x.TASK_ID.log \ $sjopts sgmm-align-compiled $spkvecs_opt $scale_opts "$gselect_opt" \ --utt2spk=ark:$data/split$nj/TASK_ID/utt2spk --beam=8 --retry-beam=40 \ $dir/$x.mdl "ark:gunzip -c $dir/TASK_ID.fsts.gz|" "$featspart" \ "ark:|gzip -c >$dir/TASK_ID.ali.gz" \ || error_exit "Error realigning data on iter $x" fi if [ $spkdim -gt 0 ] && echo $spkvec_iters | grep -w $x >/dev/null; then submit_jobs.sh "$qcmd" --njobs=$nj --log=$dir/log/spkvecs.$x.TASK_ID.log \ $sjopts ali-to-post "ark:gunzip -c $dir/TASK_ID.ali.gz|" ark:- \| \ weight-silence-post 0.01 $silphonelist $dir/$x.mdl ark:- ark:- \| \ sgmm-est-spkvecs --spk2utt=ark:$data/split$nj/TASK_ID/spk2utt \ $spkvecs_opt "$gselect_opt" --rand-prune=$randprune $dir/$x.mdl \ "$featspart" ark,s,cs:- ark:$dir/tmpTASK_ID.vecs \ || error_exit "Error computing speaker vectors on iter $x" for n in `seq 1 $nj`; do mv $dir/tmp${n}.vecs $dir/${n}.vecs; done spkvecs_opt="--spk-vecs=ark:$dir/TASK_ID.vecs" fi submit_jobs.sh "$qcmd" --njobs=$nj --log=$dir/log/acc.$x.TASK_ID.log \ $sjopts sgmm-acc-stats --utt2spk=ark:$data/split$nj/TASK_ID/utt2spk \ --update-flags=$flags --rand-prune=$randprune $spkvecs_opt \ "$gselect_opt" $dir/$x.mdl "$featspart" \ "ark,s,cs:ali-to-post 'ark:gunzip -c $dir/TASK_ID.ali.gz|' ark:-|" \ $dir/$x.TASK_ID.acc || error_exit "Error accumulating stats on iter $x" add_dim_opts= if echo $add_dim_iters | grep -w $x >/dev/null; then add_dim_opts="--increase-phn-dim=$phndim --increase-spk-dim=$spkdim" fi submit_jobs.sh "$qcmd" --log=$dir/log/update.$x.log $sjopts \ sgmm-est --update-flags=$flags --split-substates=$numsubstates \ $add_dim_opts --write-occs=$dir/$[$x+1].occs $dir/$x.mdl \ "sgmm-sum-accs - $dir/$x.*.acc|" $dir/$[$x+1].mdl \ || error_exit "Error in pass $x estimation." rm -f $dir/$x.mdl $dir/$x.*.acc $dir/$x.occs fi if [ $x -lt $maxiterinc ]; then numsubstates=$[$numsubstates+$incsubstates] fi x=$[$x+1]; done ( cd $dir; rm final.mdl final.occs 2>/dev/null; ln -s $x.mdl final.mdl; ln -s $x.occs final.occs ) if [ $spkdim -gt 0 ]; then # If we have speaker vectors, we need an alignment model. # The point of this last phase of accumulation is to get Gaussian-level # alignments with the speaker vectors but accumulate stats without # any speaker vectors; we re-estimate M, w, c and S to get a model # that's compatible with not having speaker vectors. # We do this for a few iters, in this recipe. cur_alimdl=$dir/$x.mdl y=0; while [ $y -lt $numiters_alimdl ]; do echo "Pass $y of building alignment model" if [ $y -eq 0 ]; then flags=MwcS # First time don't update v... else flags=vMwcS # don't update transitions-- will probably share graph with normal model. fi if [ $stage -le $[$y+100] ]; then submit_jobs.sh "$qcmd" --njobs=$nj --log=$dir/log/acc_ali.$y.TASK_ID.log \ $sjopts ali-to-post "ark:gunzip -c $dir/TASK_ID.ali.gz|" ark:- \| \ sgmm-post-to-gpost $spkvecs_opt "$gselect_opt" \ --utt2spk=ark:$data/split$nj/TASK_ID/utt2spk $dir/$x.mdl \ "$featspart" ark,s,cs:- ark:- \| \ sgmm-acc-stats-gpost --update-flags=$flags $cur_alimdl "$featspart" \ ark,s,cs:- $dir/$y.TASK_ID.aliacc \ || error_exit "Error accumulating stats for alignment model on iter $y" submit_jobs.sh "$qcmd" --log=$dir/log/update_ali.$y.log $sjopts \ sgmm-est --update-flags=$flags --remove-speaker-space=true \ $cur_alimdl "sgmm-sum-accs - $dir/$y.*.aliacc|" $dir/$[$y+1].alimdl \ || error_exit "Error estimating alignment model on iter $y"; rm $dir/$y.*.aliacc || exit 1; [ $y -gt 0 ] && rm $dir/$y.alimdl fi cur_alimdl=$dir/$[$y+1].alimdl y=$[$y+1] done (cd $dir; rm final.alimdl 2>/dev/null; ln -s $y.alimdl final.alimdl ) fi # Print out summary of the warning messages. for x in $dir/log/*.log; do n=`grep WARNING $x | wc -l`; if [ $n -ne 0 ]; then echo $n warnings in $x; fi; done echo Done