/** * @file gprune_heu.c * * * @brief 混合ガウス分布計算: Gaussian pruning (heuristic algorithm) * * gprune_heu()は混合ガウス分布集合の計算ルーチンの一つです. * heuristic pruning を使って上位のガウス分布の出力確率のみを高速に求めます. * Tied-mixture %HMM 使用時に Julius でGPRUNE_DEFAULT_HEURISTIC が * 定義されているか,あるいはJuliusのオプション "-gprune heuristic" を * 指定することでこの関数が使用されます. * * heuristic pruning はあまり使われていない手法で,高速化の効果は * 他の手法の中間です. * * gprune_heu() は outprob_init() によってその関数へのポインタが * compute_gaussset にセットされることで使用されます.このポインタが * calc_tied_mix() または calc_mix() から呼び出されます. * * * * @brief Calculate probability of a set of Gaussian densities by * Gaussian pruning: heuristic algorithm * * gprune_heu() is one of the functions to compute output probability of * a set of Gaussian densities. This function does heuristic pruning, trying * to compute only the best ones for faster computation. If a tied-mixture * %HMM model with GPRUNE_DEFAULT_HEURISTIC defined in Julius, or explicitly * specified by "-gprune heuristic" option, this function will be used. * * The effect of heuristic pruning is practically a middle of others. * * gprune_heu() will be used by calling outprob_init() to set its pointer * to the global variable @a compute_gaussset. Then it will be called from * calc_tied_mix() or calc_mix(). * * * @author Akinobu LEE * @date Thu Feb 17 05:44:52 2005 * * $Revision: 1.6 $ * */ /* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ #include #include #include #include #include /* mail routines to calculate mixtures for given frame, and store to cache */ /* best_mixtures_on_last_frame[] dim: 0 1 2 3 4 .... veclen-1 -> sum up ================================ backsum <-<-<-<-<-<-<-<-<- -------------------------------- mix1 ->-> mix2 ->-> mix3 ->-> ... mixN ->-> ================================ score = forward sum + backmax algorithm 1: // store sub-optimal max prob for each dimension init backsum[] foreach all_mixtures in best_mixtures_on_last_frame { foreach dim { p = prob of this dim if (backmax[dim] < p) backmax[dim] = p sumup score } store to cache } // sum-up backward to make backmax foreach dim-1 backward { backmax[dim] += backmax[dim+1] } // compute rest with pruning with heuristics threshold = the current lowest score foreach rest_mixtures { foreach dim { sumup score if (score + backmax[dim+1] < threshold) { skip this mixture } } update threshold } } */ /** * Clear backmax information. * * @param wrk [i/o] HMM computation work area * */ static void init_backmax(HMMWork *wrk) { int i; for(i=0;ibackmax_num;i++) wrk->backmax[i] = 0; } /** * Build backmax information for each frame, by summing up * current maximum values of each dimensions. * * @param wrk [i/o] HMM computation work area */ /* | * 0 1 2 ... max-3 max-2 | max-1 * * a b c x y | ???? * | * v * ..... x+y y 0.0 */ static void make_backmax(HMMWork *wrk) { int i; wrk->backmax[wrk->backmax_num-1] = 0.0; /* backmax[len-1] = backmax[len-1];*/ for(i=wrk->backmax_num-2;i>=0;i--) { wrk->backmax[i] += wrk->backmax[i+1]; } /* for(i=0;i<=len;i++) { printf("backmax[%d]=%f\n",i,backmax[i]); }*/ } /** * @brief Calculate probability with maximum value update. * * Calculate probability of a Gaussian toward OP_vec, * while storing the maximum values of each dimension to @a backmax. * for future pruning. The pruning itself is not performed here. * This function will be used to compute the first N Gaussians. * * @param wrk [i/o] HMM computation work area * @param binfo [in] Gaussian density * * @return the output log probability. */ static LOGPROB compute_g_heu_updating(HMMWork *wrk, HTK_HMM_Dens *binfo) { VECT tmp, x, sum = 0.0; VECT *mean; VECT *var; VECT *bm = wrk->backmax; VECT *vec = wrk->OP_vec; short veclen = wrk->OP_veclen; if (binfo == NULL) return(LOG_ZERO); mean = binfo->mean; var = binfo->var->vec; tmp = 0.0; for (; veclen > 0; veclen--) { x = *(vec++) - *(mean++); tmp = x * x * *(var++); sum += tmp; if ( *bm < tmp) *bm = tmp; bm++; } return((sum + binfo->gconst) * -0.5); } /** * @brief Calculate probability with pruning. * * Calculate probability of a Gaussian toward OP_vec, * performing pruning using threshold and the backmax information * that has been set by compute_g_heu_updating() and * make_backmax(). * * @param wrk [i/o] HMM computation work area * @param binfo [in] Gaussian density * @param thres [in] threshold * * @return the output log probability. */ static LOGPROB compute_g_heu_pruning(HMMWork *wrk, HTK_HMM_Dens *binfo, LOGPROB thres) { VECT tmp, x; VECT *mean; VECT *var; VECT *bm = wrk->backmax; VECT *vec = wrk->OP_vec; short veclen = wrk->OP_veclen; LOGPROB fthres; if (binfo == NULL) return(LOG_ZERO); mean = binfo->mean; var = binfo->var->vec; fthres = thres * (-2.0); tmp = 0.0; bm++; for (; veclen > 0; veclen--) { x = *(vec++) - *(mean++); tmp += x * x * *(var++); if ( tmp + *bm > fthres) { return LOG_ZERO; } bm++; } return((tmp + binfo->gconst) * -0.5); } /** * Initialize and setup work area for Gaussian pruning by heuristic algorithm. * * @param wrk [i/o] HMM computation work area * * @return TRUE on success, FALSE on failure. */ boolean gprune_heu_init(HMMWork *wrk) { int i; /* maximum Gaussian set size = maximum mixture size * nstream */ wrk->OP_calced_maxnum = wrk->OP_hmminfo->maxmixturenum * wrk->OP_nstream; wrk->OP_calced_score = (LOGPROB *)mymalloc(sizeof(LOGPROB) * wrk->OP_calced_maxnum); wrk->OP_calced_id = (int *)mymalloc(sizeof(int) * wrk->OP_calced_maxnum); wrk->mixcalced = (boolean *)mymalloc(sizeof(int) * wrk->OP_calced_maxnum); for(i=0;iOP_calced_maxnum;i++) wrk->mixcalced[i] = FALSE; wrk->backmax_num = wrk->OP_hmminfo->opt.vec_size + 1; wrk->backmax = (LOGPROB *)mymalloc(sizeof(LOGPROB) * wrk->backmax_num); return TRUE; } /** * Free gprune_heu related work area. * * @param wrk [i/o] HMM computation work area * */ void gprune_heu_free(HMMWork *wrk) { free(wrk->OP_calced_score); free(wrk->OP_calced_id); free(wrk->mixcalced); free(wrk->backmax); } /** * @brief Compute a set of Gaussians with heuristic pruning. * * If the N-best mixtures in the previous frame is specified in @a last_id, * They are first computed to get the maximum value for each dimension. * After that, the rest of the Gaussians will be computed using the maximum * values as heuristics of uncomputed dimensions to drop unpromising * Gaussians from computation at early stage * of likelihood computation. If the @a last_id is not specified (typically * at the first frame of the input), a safe pruning as same as one in * gprune_safe.c will be applied. * * The calculated scores will be stored to OP_calced_score, with its * corresponding mixture id to OP_calced_id. These are done by calling * cache_push(). * The number of calculated mixtures is also stored in OP_calced_num. * * This can be called from calc_tied_mix() or calc_mix(). * * @param wrk [i/o] HMM computation work area * @param g [in] set of Gaussian densities to compute the output probability * @param gnum [in] length of above * @param last_id [in] ID list of N-best mixture in previous input frame, * or NULL if not exist * @param lnum [in] length of last_id */ void gprune_heu(HMMWork *wrk, HTK_HMM_Dens **g, int gnum, int *last_id, int lnum) { int i, j, num = 0; LOGPROB score, thres; if (last_id != NULL) { /* compute them first to form thresholds */ /* 1. clear backmax */ init_backmax(wrk); /* 2. calculate first $OP_gprune_num with setting max for each dimension */ for (j=0; jmixcalced[i] = TRUE; /* mark them as calculated */ } /* 3. set backmax for each dimension */ make_backmax(wrk); /* 4. calculate the rest with pruning*/ thres = wrk->OP_calced_score[num-1]; for (i = 0; i < gnum; i++) { /* skip calced ones in 1. */ if (wrk->mixcalced[i]) { wrk->mixcalced[i] = FALSE; continue; } /* compute with safe pruning */ score = compute_g_heu_pruning(wrk, g[i], thres); if (score > LOG_ZERO) { num = cache_push(wrk, i, score, num); thres = wrk->OP_calced_score[num-1]; } } } else { /* in case the last_id not available */ /* at the first 0 frame */ /* calculate with safe pruning */ thres = LOG_ZERO; for (i = 0; i < gnum; i++) { if (num < wrk->OP_gprune_num) { score = compute_g_base(wrk, g[i]); } else { score = compute_g_safe(wrk, g[i], thres); if (score <= thres) continue; } num = cache_push(wrk, i, score, num); thres = wrk->OP_calced_score[num-1]; } } wrk->OP_calced_num = num; }