/**
* @file vsegment.c
*
*
* @brief 入力に対するViterbi アライメントの実行
*
*
*
* @brief Do viterbi alignment for the input
*
*
* @author Akinobu LEE
* @date Fri Feb 18 19:29:22 2005
*
* $Revision: 1.4 $
*
*/
/*
* Copyright (c) 1991-2012 Kawahara Lab., Kyoto University
* Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
* Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology
* All rights reserved
*/
#include
#include
#include
/**
* @brief Perform Viterbi alignment.
*
* This function performs viterbi alignment for the given sentence %HMM,
* input parameter and unit definition. Any segmentatino unit (word, phoneme
* state, etc.) is allowed: the segmentation unit should be specified by
* specifying a list of state id which are the end of each unit.
* For example, if you want to obtain phoneme alignment, the list of state
* number that exist at the end of phones should be specified by @a endstates.
*
* @param hmm [in] sentence HMM to be matched
* @param param [in] input parameter data
* @param wrk [i/o] HMM computation work area
* @param multipath [in] TRUE if need multi-path handling
* @param endstates [in] list of state id that corrsponds to the ends of units
* @param ulen [in] total number of units in the @a hmm
* @param id_ret [out] Pointer to store the newly allocated array of the resulting id sequence of units on the best path.
* @param seg_ret [out] Pointer to store the newly allocated array of the resulting end frame of each unit on the best path.
* @param uscore_ret [out] Pointer to store the newly allocated array of the resulting score at the end frame of each unit on the best path.
* @param slen_ret [out] Pointer to store the total number of units on the best path.
*
* @return the total acoustic score for the whole input.
*/
LOGPROB
viterbi_segment(HMM *hmm, HTK_Param *param, HMMWork *wrk, boolean multipath, int *endstates, int ulen, int **id_ret, int **seg_ret, LOGPROB **uscore_ret, int *slen_ret)
{
/* for viterbi */
LOGPROB *nodescore[2]; /* node buffer */
SEGTOKEN **tokenp[2]; /* propagating token which holds segment info */
int startt, endt;
int *from_node;
int *u_end, *u_start; /* the node is an end of the word, or -1 for non-multipath mode*/
int i, n;
unsigned int t;
int tl,tn;
LOGPROB tmpsum;
A_CELL *ac;
SEGTOKEN *newtoken, *token, *tmptoken, *root;
LOGPROB result_score;
LOGPROB maxscore, minscore; /* for debug */
int maxnode; /* for debug */
int *id, *seg, slen;
LOGPROB *uscore;
/* assume more than 1 units */
if (ulen < 1) {
jlog("Error: vsegment: no unit?\n");
return LOG_ZERO;
}
if (!multipath) {
/* initialize unit start/end marker */
u_start = (int *)mymalloc(hmm->len * sizeof(int));
u_end = (int *)mymalloc(hmm->len * sizeof(int));
for (n = 0; n < hmm->len; n++) {
u_start[n] = -1;
u_end[n] = -1;
}
u_start[0] = 0;
u_end[endstates[0]] = 0;
for (i=1;ilen;i++) {
printf("unit %d: start=%d, end=%d\n", i, u_start[i], u_end[i]);
}
#endif
}
/* initialize node buffers */
tn = 0;
tl = 1;
root = NULL;
for (i=0;i<2;i++){
nodescore[i] = (LOGPROB *)mymalloc(hmm->len * sizeof(LOGPROB));
tokenp[i] = (SEGTOKEN **)mymalloc(hmm->len * sizeof(SEGTOKEN *));
for (n = 0; n < hmm->len; n++) {
tokenp[i][n] = NULL;
}
}
for (n = 0; n < hmm->len; n++) {
nodescore[tn][n] = LOG_ZERO;
newtoken = (SEGTOKEN *)mymalloc(sizeof(SEGTOKEN));
newtoken->last_id = -1;
newtoken->last_end_frame = -1;
newtoken->last_end_score = 0.0;
newtoken->list = root;
root = newtoken;
newtoken->next = NULL;
tokenp[tn][n] = newtoken;
}
from_node = (int *)mymalloc(sizeof(int) * hmm->len);
/* first frame: only set initial score */
/*if (hmm->state[0].is_pseudo_state) {
jlog("Warning: state %d: pseudo state?\n", 0);
}*/
if (multipath) {
nodescore[tn][0] = 0.0;
} else {
nodescore[tn][0] = outprob(wrk, 0, &(hmm->state[0]), param);
}
/* do viterbi for rest frame */
if (multipath) {
startt = 0; endt = param->samplenum;
} else {
startt = 1; endt = param->samplenum - 1;
}
for (t = startt; t <= endt; t++) {
i = tl;
tl = tn;
tn = i;
maxscore = LOG_ZERO;
minscore = 0.0;
/* clear next scores */
for (i=0;ilen;i++) {
nodescore[tn][i] = LOG_ZERO;
from_node[i] = -1;
}
/* select viterbi path for each node */
for (n = 0; n < hmm->len; n++) {
if (nodescore[tl][n] <= LOG_ZERO) continue;
for (ac = hmm->state[n].ac; ac; ac = ac->next) {
tmpsum = nodescore[tl][n] + ac->a;
if (nodescore[tn][ac->arc] < tmpsum) {
nodescore[tn][ac->arc] = tmpsum;
from_node[ac->arc] = n;
}
}
}
/* propagate token, appending new if path was selected between units */
if (multipath) {
for (n = 0; n < hmm->len; n++) {
if (from_node[n] == -1 || nodescore[tn][n] <= LOG_ZERO) {
/*tokenp[tn][n] = NULL;*/
} else {
i=0;
while (from_node[n] > endstates[i]) i++;
if (n > endstates[i]) {
newtoken = (SEGTOKEN *)mymalloc(sizeof(SEGTOKEN));
newtoken->last_id = i;
newtoken->last_end_frame = t-1;
newtoken->last_end_score = nodescore[tl][from_node[n]];
newtoken->list = root;
root = newtoken;
newtoken->next = tokenp[tl][from_node[n]];
tokenp[tn][n] = newtoken;
} else {
tokenp[tn][n] = tokenp[tl][from_node[n]];
}
}
}
} else { /* not multipath */
for (n = 0; n < hmm->len; n++) {
if (from_node[n] == -1) {
tokenp[tn][n] = NULL;
} else if (nodescore[tn][n] <= LOG_ZERO) {
tokenp[tn][n] = tokenp[tl][from_node[n]];
} else {
if (u_end[from_node[n]] != -1 && u_start[n] != -1
&& from_node[n] != n) {
newtoken = (SEGTOKEN *)mymalloc(sizeof(SEGTOKEN));
newtoken->last_id = u_end[from_node[n]];
newtoken->last_end_frame = t-1;
newtoken->last_end_score = nodescore[tl][from_node[n]];
newtoken->list = root;
root = newtoken;
newtoken->next = tokenp[tl][from_node[n]];
tokenp[tn][n] = newtoken;
} else {
tokenp[tn][n] = tokenp[tl][from_node[n]];
}
}
}
}
if (multipath) {
/* if this is next of last frame, loop ends here */
if (t == param->samplenum) break;
}
/* calc outprob to new nodes */
for (n = 0; n < hmm->len; n++) {
if (multipath) {
if (hmm->state[n].out.state == NULL) continue;
}
if (nodescore[tn][n] > LOG_ZERO) {
if (hmm->state[n].is_pseudo_state) {
jlog("Warning: vsegment: state %d: pseudo state?\n", n);
}
nodescore[tn][n] += outprob(wrk, t, &(hmm->state[n]), param);
}
if (nodescore[tn][n] > maxscore) { /* for debug */
maxscore = nodescore[tn][n];
maxnode = n;
}
}
#if 0
for (i=0;i 0) ? endstates[i-1]+1 : 0, endstates[i],
(multipath && tokenp[tl][endstates[i]] == NULL) ? -1 : tokenp[tl][endstates[i]]->last_end_frame + 1);
}
#endif
/* printf("t=%3d max=%f n=%d\n",t,maxscore, maxnode); */
}
result_score = nodescore[tn][hmm->len-1];
/* parse back the last token to see the trail of best viterbi path */
/* and store the informations to returning buffer */
slen = 0;
if (!multipath) slen++;
for(token = tokenp[tn][hmm->len-1]; token; token = token->next) {
if (token->last_end_frame == -1) break;
slen++;
}
id = (int *)mymalloc(sizeof(int)*slen);
seg = (int *)mymalloc(sizeof(int)*slen);
uscore = (LOGPROB *)mymalloc(sizeof(LOGPROB)*slen);
if (multipath) {
i = slen - 1;
} else {
id[slen-1] = ulen - 1;
seg[slen-1] = t - 1;
uscore[slen-1] = result_score;
i = slen - 2;
}
for(token = tokenp[tn][hmm->len-1]; token; token = token->next) {
if (i < 0 || token->last_end_frame == -1) break;
id[i] = token->last_id;
seg[i] = token->last_end_frame;
uscore[i] = token->last_end_score;
i--;
}
/* normalize scores by frame */
for (i=slen-1;i>0;i--) {
uscore[i] = (uscore[i] - uscore[i-1]) / (seg[i] - seg[i-1]);
}
uscore[0] = uscore[0] / (seg[0] + 1);
/* set return value */
*id_ret = id;
*seg_ret = seg;
*uscore_ret = uscore;
*slen_ret = slen;
/* free memory */
if (!multipath) {
free(u_start);
free(u_end);
}
free(from_node);
token = root;
while(token) {
tmptoken = token->list;
free(token);
token = tmptoken;
}
for (i=0;i<2;i++) {
free(nodescore[i]);
free(tokenp[i]);
}
return(result_score);
}