#!/usr/bin/perl # Converts intermediate representation of tree into Kaldi-format ContextDependency # object. Assumes triphone. if (@ARGV != 2) { die "Usage: tree_convert.pl phone2len.txt tree.txt > kaldi.tree\n"; } ($phone2len, $tree_in) = @ARGV; open(P, "<$phone2len") || die "Opening file $phone2len"; $maxphone = 0; while(

) { @A = split(" ", $_); @A == 2 || die "bad phone2len file: line is $_\n"; $len{$A[0]} = $A[1]; if($A[0] > $maxphone) { $maxphone = $A[0]; } } open(T, "<$tree_in") || die "Opening tree file $tree_in"; while() { @A = split(" ", $_); $phone = shift @A; $pos = shift @A; $tree{$phone,$pos} = join(" ", @A); } # standard triphone settings: $N = 3; $P = 1; print "ContextDependency $N $P\n"; $np = $maxphone+1; # printing out to-pdf map.. 1==split-on-central-position; # $np is size of array in table-event-map. print "ToPdf TE 1 $np (\n"; for($p = 0; $p < $np; $p++) { if(!defined $len{$p}) { # probably eps. print "NULL\n"; } else { print " TE -1 $len{$p} (\n"; # table-event-map splitting on pdf-class == hmm-position. for($pos = 0; $pos < $len{$p}; $pos++) { # for each HMM-position (0,1,2) $treestr = $tree{$p,$pos}; defined $treestr || die "No tree defined for phone=$p, pos=$pos\n"; print " $treestr\n"; # E.g.: treestr = ( -1 ( 40 42 10 30 6 34 29 31 ) ( -1 ( 10 30 6 31 ) ( 1 ( 36 0 ) ( 507 ) ( 506 ) ) ( 505 ) ) ( -1 ( 40 10 30 6 34 29 31 18 43 9 12 39 25 4 20 ) ( 504 ) ( -1 ( 22 ) ( 503 ) ( -1 ( 26 7 ) ( 502 ) ( 1 ( 37 ) ( 501 ) ( 500 ) ) ) ) ) ) # First map the position to a "kaldi-format" position whose number starts form zero, # by adding P. } print " )\n"; } } print ")\n"; print "EndContextDependency\n";