#!/bin/bash # Copyright 2012 Johns Hopkins University (Author: Daniel Povey). # 2013 Xiaohui Zhang # 2013 Guoguo Chen # 2013 Johns Hopkins University (Author: Jan Trmal) # 2013 Vimal Manohar # Apache 2.0. # This script updates an existing neural network model without initializing it. # Begin configuration section. cmd=run.pl num_epochs=20 # Number of epochs during which we reduce # the learning rate; number of iteration is worked out from this. num_iters_final=20 # Maximum number of final iterations to give to the # optimization over the validation set. learning_rates="0.0008:0.0008:0.0008:0" combine_regularizer=1.0e-14 # Small regularizer so that parameters won't go crazy. minibatch_size=128 # by default use a smallish minibatch size for neural net # training; this controls instability which would otherwise # be a problem with multi-threaded update. Note: it also # interacts with the "preconditioned" update which generally # works better with larger minibatch size, so it's not # completely cost free. samples_per_iter=200000 # each iteration of training, see this many samples # per job. This option is passed to get_egs.sh num_jobs_nnet=16 # Number of neural net jobs to run in parallel. This option # is passed to get_egs.sh. get_egs_stage=0 spk_vecs_dir= shuffle_buffer_size=5000 # This "buffer_size" variable controls randomization of the samples # on each iter. You could set it to 0 or to a large value for complete # randomization, but this would both consume memory and cause spikes in # disk I/O. Smaller is easier on disk and memory but less random. It's # not a huge deal though, as samples are anyway randomized right at the start. stage=-5 io_opts="-tc 5" # for jobs with a lot of I/O, limits the number running at one time. These don't splice_width=4 # meaning +- 4 frames on each side for second LDA randprune=4.0 # speeds up LDA. alpha=4.0 max_change=10.0 mix_up=0 # Number of components to mix up to (should be > #tree leaves, if # specified.) num_threads=16 parallel_opts="-pe smp 16 -l ram_free=1G,mem_free=1G" # by default we use 16 threads; this lets the queue know. # note: parallel_opts doesn't automatically get adjusted if you adjust num-threads. cleanup=false egs_dir= egs_opts= transform_dir= # If supplied, overrides alidir # End configuration section. echo "$0 $@" # Print the command line for logging if [ -f path.sh ]; then . ./path.sh; fi . parse_options.sh || exit 1; if [ $# != 5 ]; then echo "Usage: $0 [opts] " echo " e.g.: $0 data/train data/lang exp/tri3_ali exp/tri4_nnet exp/tri4b_nnet" echo "" echo "Main options (for others, see top of script file)" echo " --config # config file containing options" echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." echo " --num-epochs <#epochs|15> # Number of epochs of main training" echo " # while reducing learning rate (determines #iterations, together" echo " # with --samples-per-iter and --num-jobs-nnet)" echo " --num-jobs-nnet # Number of parallel jobs to use for main neural net" echo " # training (will affect results as well as speed; try 8, 16)" echo " # Note: if you increase this, you may want to also increase" echo " # the learning rate." echo " --num-threads # Number of parallel threads per job (will affect results" echo " # as well as speed; may interact with batch size; if you increase" echo " # this, you may want to decrease the batch size." echo " --parallel-opts # extra options to pass to e.g. queue.pl for processes that" echo " # use multiple threads... note, you might have to reduce mem_free,ram_free" echo " # versus your defaults, because it gets multiplied by the -pe smp argument." echo " --io-opts # Options given to e.g. queue.pl for jobs that do a lot of I/O." echo " --minibatch-size # Size of minibatch to process (note: product with --num-threads" echo " # should not get too large, e.g. >2k)." echo " --samples-per-iter <#samples|400000> # Number of samples of data to process per iteration, per" echo " # process." echo " --splice-width # Number of frames on each side to append for feature input" echo " # (note: we splice processed, typically 40-dimensional frames" echo " --num-iters-final <#iters|10> # Number of final iterations to give to nnet-combine-fast to " echo " # interpolate parameters (the weights are learned with a validation set)" echo " --num-utts-subset <#utts|300> # Number of utterances in subsets used for validation and diagnostics" echo " # (the validation subset is held out from training)" echo " --num-frames-diagnostic <#frames|4000> # Number of frames used in computing (train,valid) diagnostics" echo " --num-valid-frames-combine <#frames|10000> # Number of frames used in getting combination weights at the" echo " # very end." echo " --stage # Used to run a partially-completed training process from somewhere in" echo " # the middle." echo " --transform-dir # Directory with fMLLR transforms. Overrides alidir if provided." exit 1; fi data=$1 lang=$2 alidir=$3 sdir=$4 dir=$5 # Check some files. for f in $data/feats.scp $lang/L.fst $alidir/ali.1.gz $alidir/final.mdl $alidir/tree; do [ ! -f $f ] && echo "$0: no such file $f" && exit 1; done # Set some variables. num_leaves=`gmm-info $alidir/final.mdl 2>/dev/null | awk '/number of pdfs/{print $NF}'` || exit 1; nj=`cat $alidir/num_jobs` || exit 1; # number of jobs in alignment dir... # in this dir we'll have just one job. sdata=$data/split$nj utils/split_data.sh $data $nj mkdir -p $dir/log echo $nj > $dir/num_jobs splice_opts=`cat $alidir/splice_opts 2>/dev/null` cp $alidir/splice_opts $dir 2>/dev/null cp $alidir/tree $dir [ -z "$transform_dir" ] && transform_dir=$alidir if [ $stage -le -3 ] && [ -z "$egs_dir" ]; then echo "$0: calling get_egs.sh" [ ! -z $spk_vecs_dir ] && spk_vecs_opt="--spk-vecs-dir $spk_vecs_dir"; steps/nnet2/get_egs.sh $spk_vecs_opt --samples-per-iter $samples_per_iter --num-jobs-nnet $num_jobs_nnet \ --splice-width $splice_width --stage $get_egs_stage --cmd "$cmd" $egs_opts --io-opts "$io_opts" --transform-dir $transform_dir \ $data $lang $alidir $dir || exit 1; fi echo $egs_dir if [ -z $egs_dir ]; then egs_dir=$dir/egs fi echo $egs_dir iters_per_epoch=`cat $egs_dir/iters_per_epoch` || exit 1; ! [ $num_jobs_nnet -eq `cat $egs_dir/num_jobs_nnet` ] && \ echo "$0: Warning: using --num-jobs-nnet=`cat $egs_dir/num_jobs_nnet` from $egs_dir" num_jobs_nnet=`cat $egs_dir/num_jobs_nnet` || exit 1; if [ $stage -le -2 ]; then echo "$0: using existing neural net"; source_model=$sdir/final.mdl nnet-am-copy --learning-rates=${learning_rates} $source_model $dir/0.mdl fi num_iters=$[$num_epochs * $iters_per_epoch]; echo "$0: Will train for $num_epochs epochs, equalling $num_iters iterations" if [ $num_threads -eq 1 ]; then train_suffix="-simple" # this enables us to use GPU code if # we have just one thread. else train_suffix="-parallel --num-threads=$num_threads" fi x=0 while [ $x -lt $num_iters ]; do if [ $x -ge 0 ] && [ $stage -le $x ]; then # Set off jobs doing some diagnostics, in the background. $cmd $dir/log/compute_prob_valid.$x.log \ nnet-compute-prob $dir/$x.mdl ark:$egs_dir/valid_diagnostic.egs & $cmd $dir/log/compute_prob_train.$x.log \ nnet-compute-prob $dir/$x.mdl ark:$egs_dir/train_diagnostic.egs & if [ $x -gt 0 ] ; then $cmd $dir/log/progress.$x.log \ nnet-show-progress --use-gpu=no $dir/$[$x-1].mdl $dir/$x.mdl ark:$egs_dir/train_diagnostic.egs & fi echo "Training neural net (pass $x)" mdl=$dir/$x.mdl $cmd $parallel_opts JOB=1:$num_jobs_nnet $dir/log/train.$x.JOB.log \ nnet-shuffle-egs --buffer-size=$shuffle_buffer_size --srand=$x \ ark:$egs_dir/egs.JOB.$[$x%$iters_per_epoch].ark ark:- \| \ nnet-train$train_suffix \ --minibatch-size=$minibatch_size --srand=$x "$mdl" \ ark:- $dir/$[$x+1].JOB.mdl \ || exit 1; nnets_list= for n in `seq 1 $num_jobs_nnet`; do nnets_list="$nnets_list $dir/$[$x+1].$n.mdl" done $cmd $dir/log/average.$x.log \ nnet-am-average $nnets_list $dir/$[$x+1].mdl || exit 1; rm $nnets_list fi x=$[$x+1] done # Now do combination. # At the end, final.mdl will be a combination of the last e.g. 10 models. nnets_list=() if [ $num_iters_final -gt $num_iters ]; then echo "Setting num_iters_final=$num_iters" fi start=$[$num_iters-$num_iters_final+1] for x in `seq $start $num_iters`; do idx=$[$x-$start] nnets_list[$idx]=$dir/$x.mdl # "nnet-am-copy --remove-dropout=true $dir/$x.mdl - |" done if [ $stage -le $num_iters ]; then # Below, use --use-gpu=no to disable nnet-combine-fast from using a GPU, as # if there are many models it can give out-of-memory error; set num-threads to 8 # to speed it up (this isn't ideal...) this_num_threads=$num_threads [ $this_num_threads -lt 8 ] && this_num_threads=8 num_egs=`nnet-copy-egs ark:$egs_dir/combine.egs ark:/dev/null 2>&1 | tail -n 1 | awk '{print $NF}'` mb=$[($num_egs+$this_num_threads-1)/$this_num_threads] [ $mb -gt 512 ] && mb=512 # Setting --initial-model to a large value makes it initialize the combination # with the average of all the models. It's important not to start with a # single model, or, due to the invariance to scaling that these nonlinearities # give us, we get zero diagonal entries in the fisher matrix that # nnet-combine-fast uses for scaling, which after flooring and inversion, has # the effect that the initial model chosen gets much higher learning rates # than the others. This prevents the optimization from working well. $cmd $parallel_opts $dir/log/combine.log \ nnet-combine-fast --initial-model=100000 --num-lbfgs-iters=40 --use-gpu=no \ --num-threads=$this_num_threads --regularizer=$combine_regularizer \ --verbose=3 --minibatch-size=$mb "${nnets_list[@]}" ark:$egs_dir/combine.egs \ $dir/final.mdl || exit 1; fi # Compute the probability of the final, combined model with # the same subset we used for the previous compute_probs, as the # different subsets will lead to different probs. $cmd $dir/log/compute_prob_valid.final.log \ nnet-compute-prob $dir/final.mdl ark:$egs_dir/valid_diagnostic.egs & $cmd $dir/log/compute_prob_train.final.log \ nnet-compute-prob $dir/final.mdl ark:$egs_dir/train_diagnostic.egs & sleep 2 echo Done if $cleanup; then echo Cleaning up data if [ $egs_dir == "$dir/egs" ]; then echo Removing training examples rm $dir/egs/egs* fi echo Removing most of the models for x in `seq 0 $num_iters`; do if [ $[$x%10] -ne 0 ] && [ $x -lt $[$num_iters-$num_iters_final+1] ]; then # delete all but every 10th model; don't delete the ones which combine to form the final model. rm $dir/$x.mdl fi done fi