#!/bin/bash # Copyright 2013 Daniel Povey # Apache 2.0. # This script extracts iVectors for a set of utterances, given # features and a trained iVector extractor. # Begin configuration section. nj=30 cmd="run.pl" stage=0 num_gselect=20 # Gaussian-selection using diagonal model: number of Gaussians to select min_post=0.025 # Minimum posterior to use (posteriors below this are pruned out) # End configuration section. echo "$0 $@" # Print the command line for logging if [ -f path.sh ]; then . ./path.sh; fi . parse_options.sh || exit 1; if [ $# != 3 ]; then echo "Usage: $0 " echo " e.g.: $0 exp/extractor_2048_male data/train_male exp/ivectors_male" echo "main options (for others, see top of script file)" echo " --config # config containing options" echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." echo " --num-iters <#iters|10> # Number of iterations of E-M" echo " --nj # Number of jobs (also see num-processes and num-threads)" echo " --num-threads # Number of threads for each process" echo " --stage # To control partial reruns" echo " --num-gselect # Number of Gaussians to select using" echo " # diagonal model." echo " --min-post # Pruning threshold for posteriors" exit 1; fi srcdir=$1 data=$2 dir=$3 for f in $srcdir/final.ie $srcdir/final.ubm $data/feats.scp ; do [ ! -f $f ] && echo "No such file $f" && exit 1; done # Set various variables. mkdir -p $dir/log sdata=$data/split$nj; utils/split_data.sh $data $nj || exit 1; ## Set up features. feats="ark,s,cs:add-deltas scp:$sdata/JOB/feats.scp ark:- | apply-cmvn-sliding --norm-vars=false --center=true --cmn-window=300 ark:- ark:- | select-voiced-frames ark:- scp,s,cs:$sdata/JOB/vad.scp ark:- |" if [ $stage -le 0 ]; then echo "$0: extracting iVectors" dubm="fgmm-global-to-gmm $srcdir/final.ubm -|" $cmd JOB=1:$nj $dir/log/extract_ivectors.JOB.log \ gmm-gselect --n=$num_gselect "$dubm" "$feats" ark:- \| \ fgmm-global-gselect-to-post --min-post=$min_post $srcdir/final.ubm "$feats" \ ark,s,cs:- ark:- \| \ ivector-extract --verbose=2 $srcdir/final.ie "$feats" ark,s,cs:- \ ark,scp,t:$dir/ivector.JOB.ark,$dir/ivector.JOB.scp || exit 1; fi if [ $stage -le 1 ]; then echo "$0: combining iVectors across jobs" for j in $(seq $nj); do cat $dir/ivector.$j.scp; done >$dir/ivector.scp || exit 1; fi if [ $stage -le 2 ]; then # Be careful here: the speaker-level iVectors are now length-normalized, # even if they are otherwise the same as the utterance-level ones. echo "$0: computing mean of iVectors for each speaker and length-normalizing" $cmd $dir/log/speaker_mean.log \ ivector-normalize-length scp:$dir/ivector.scp ark:- \| \ ivector-mean ark:$data/spk2utt ark:- ark:- ark,t:$dir/num_utts.ark \| \ ivector-normalize-length ark:- ark,scp:$dir/spk_ivector.ark,$dir/spk_ivector.scp || exit 1; fi