/** * @file rdhmmdef.c * * * @brief HTK %HMM 定義ファイルの読み込み:メイン * * ここには HTK 形式の %HMM 定義ファイルを読み込むための関数群を呼び出す * メイン関数が収められています. * * このファイルはまた,読み込み関数群で共通して用いられるトークン単位の * ファイル読み込み関数を提供します. * %HMM 定義ファイルは read_token() によってトークン単位で順次読み込まれ, * グローバル変数 rdhmmdef_token に格納されます.各関数群はこの * rdhmmdef_token を参照して現在のトークンを得ます. * * * * @brief Read HTK %HMM definition file: the main * * This file includes the main routine to read the %HMM definition file in * HTK format. * * This file also contains functions and global variables for per-token * reading tailored for reading HTK %HMM definition file. The read_token() * will read the file per token, and the read token is stored in a global * variable rdhmmdef_token. The other reading function will refer to this * variable to read the current token. * * * @author Akinobu LEE * @date Wed Feb 16 00:17:18 2005 * * $Revision: 1.7 $ * */ /* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ #include #include #include #define MAXBUFLEN 4096 ///< Maximum length of a line in the input char *rdhmmdef_token; ///< Current token string (GLOBAL) static char buf[MAXBUFLEN]; ///< Local work area for token reading static int line; ///< Input Line count /* global functions for rdhmmdef_*.c */ /** * Output error message, with current reading status, and terminate * * @param str [in] error string */ void rderr(char *str) { if (rdhmmdef_token == NULL) { /* end of file */ jlog("Error: rdhmmdef: %s on end of file\n", str); } else { jlog("Error: rdhmmdef: read error at line %d: %s\n", line, (str) ? str : "parse error"); } jlog_flush(); exit(1); } /** * Read next token and ste it to rdhmmdef_token. * * @param fp [in] file pointer * * @return the pointer to the read token, or NULL on end of file or error. */ char * read_token(FILE *fp) { if ((rdhmmdef_token = mystrtok_quote(NULL, HMMDEF_DELM)) != NULL) { /* return next token */ return rdhmmdef_token; } /* read new 1 line */ if (getl(buf, MAXBUFLEN, fp) == NULL) { rdhmmdef_token = NULL; } else { rdhmmdef_token = mystrtok_quote(buf, HMMDEF_DELM); line++; } return rdhmmdef_token; } /** * Convert all the transition probabilities to log10 scale. * * @param hmm [i/o] %HMM definition data to modify. */ static void conv_log_arc(HTK_HMM_INFO *hmm) { HTK_HMM_Trans *tr; int i,j; LOGPROB l; for (tr = hmm->trstart; tr; tr = tr->next) { for(i=0;istatenum;i++) { for(j=0;jstatenum;j++) { l = tr->a[i][j]; tr->a[i][j] = (l != 0.0) ? (float)log10(l) : LOG_ZERO; } } } } /** * Invert all the variance values. * * @param hmm [i/o] %HMM definition data to modify. */ void htk_hmm_inverse_variances(HTK_HMM_INFO *hmm) { HTK_HMM_Var *v; int i; for (v = hmm->vrstart; v; v = v->next) { for(i=0;ilen;i++) { v->vec[i] = 1.0 / v->vec[i]; } } } #ifdef ENABLE_MSD /** * Check if this HMM contains MSD-HMM. The status will be set to hmm->has_msd. * * @param hmm [i/o] %HMM definition data to check. */ void htk_hmm_check_msd(HTK_HMM_INFO *hmm) { HTK_HMM_PDF *m; int vlen; int i; hmm->has_msd = FALSE; for (m = hmm->pdfstart; m; m = m->next) { /* skip tied-mixture pdf */ if (m->tmix) continue; /* check if vector length are the same */ vlen = hmm->opt.stream_info.vsize[m->stream_id]; for(i=0;imix_num;i++) { if (m->b[i]->meanlen != vlen) { jlog("Stat: rdhmmdef: assume MSD-HMM since Gaussian dimension are not consistent\n"); hmm->has_msd = TRUE; return; } } } } #endif /** * @brief Main top routine to read in HTK %HMM definition file. * * A HTK %HMM definition file will be read from @a fp. After reading, * the parameter type is checked and calculate some statistics. * * @param fp [in] file pointer * @param hmm [out] pointer to a %HMM definition structure to store data. * * @return TRUE on success, FALSE on failure. */ boolean rdhmmdef(FILE *fp, HTK_HMM_INFO *hmm) { char macrosw; char *name; /* variances in htkdefs are not inversed yet */ hmm->variance_inversed = FALSE; /* read the first token */ /* read new 1 line */ line = 1; if (getl(buf, MAXBUFLEN, fp) == NULL) { rdhmmdef_token = NULL; } else { rdhmmdef_token = mystrtok_quote(buf, HMMDEF_DELM); } /* the toplevel loop */ while (rdhmmdef_token != NULL) {/* break on EOF */ if (rdhmmdef_token[0] != '~') { /* toplevel commands are always macro */ return FALSE; } macrosw = rdhmmdef_token[1]; read_token(fp); /* read next token after the "~.." */ switch(macrosw) { case 'o': /* global option */ if (set_global_opt(fp,hmm) == FALSE) { return FALSE; } break; case 't': /* transition macro */ name = mybstrdup2(rdhmmdef_token, &(hmm->mroot)); if (strlen(name) >= MAX_HMMNAME_LEN) rderr("Macro name too long"); read_token(fp); def_trans_macro(name, fp, hmm); break; case 's': /* state macro */ name = mybstrdup2(rdhmmdef_token, &(hmm->mroot)); if (strlen(name) >= MAX_HMMNAME_LEN) rderr("Macro name too long"); read_token(fp); def_state_macro(name, fp, hmm); break; case 'm': /* density (mixture) macro */ name = mybstrdup2(rdhmmdef_token, &(hmm->mroot)); if (strlen(name) >= MAX_HMMNAME_LEN) rderr("Macro name too long"); read_token(fp); def_dens_macro(name, fp, hmm); break; case 'h': /* HMM define */ name = mybstrdup2(rdhmmdef_token, &(hmm->mroot)); if (strlen(name) >= MAX_HMMNAME_LEN) rderr("Macro name too long"); read_token(fp); def_HMM(name, fp, hmm); break; case 'v': /* Variance macro */ name = mybstrdup2(rdhmmdef_token, &(hmm->mroot)); if (strlen(name) >= MAX_HMMNAME_LEN) rderr("Macro name too long"); read_token(fp); def_var_macro(name, fp, hmm); break; case 'w': /* Stream weight macro */ name = mybstrdup2(rdhmmdef_token, &(hmm->mroot)); if (strlen(name) >= MAX_HMMNAME_LEN) rderr("Macro name too long"); read_token(fp); def_streamweight_macro(name, fp, hmm); break; case 'r': /* Regression class macro (ignore) */ name = mybstrdup2(rdhmmdef_token, &(hmm->mroot)); if (strlen(name) >= MAX_HMMNAME_LEN) rderr("Macro name too long"); read_token(fp); def_regtree_macro(name, fp, hmm); break; case 'p': /* Mixture pdf macro (extension of HTS) */ name = mybstrdup2(rdhmmdef_token, &(hmm->mroot)); if (strlen(name) >= MAX_HMMNAME_LEN) rderr("Macro name too long"); read_token(fp); def_mpdf_macro(name, fp, hmm); break; } } /* convert transition prob to log scale */ conv_log_arc(hmm); jlog("Stat: rdhmmdef: ascii format HMM definition\n"); /* check limitation */ if (check_all_hmm_limit(hmm)) { jlog("Stat: rdhmmdef: limit check passed\n"); } else { jlog("Error: rdhmmdef: cannot handle this HMM due to system limitation\n"); return FALSE; } /* determine whether this model needs multi-path handling */ hmm->need_multipath = htk_hmm_has_several_arc_on_edge(hmm); if (hmm->need_multipath) { jlog("Stat: rdhmmdef: this HMM requires multipath handling at decoding\n"); } else { jlog("Stat: rdhmmdef: this HMM does not need multipath handling\n"); } /* inverse all variance values for faster computation */ if (! hmm->variance_inversed) { htk_hmm_inverse_variances(hmm); hmm->variance_inversed = TRUE; } /* check HMM parameter option type */ if (!check_hmm_options(hmm)) { jlog("Error: rdhmmdef: hmm options check failed\n"); return FALSE; } /* add ID number for all HTK_HMM_State */ /* also calculate the maximum number of mixture */ { HTK_HMM_State *stmp; int n, max, s, mix; n = 0; max = 0; for (stmp = hmm->ststart; stmp; stmp = stmp->next) { for(s=0;snstream;s++) { mix = stmp->pdf[s]->mix_num; if (max < mix) max = mix; } stmp->id = n++; if (n >= MAX_STATE_NUM) { jlog("Error: rdhmmdef: too much states in a model > %d\n", MAX_STATE_NUM); return FALSE; } } hmm->totalstatenum = n; hmm->maxmixturenum = max; } /* compute total number of HMM models and maximum length */ { HTK_HMM_Data *dtmp; int n, maxlen; n = 0; maxlen = 0; for (dtmp = hmm->start; dtmp; dtmp = dtmp->next) { if (maxlen < dtmp->state_num) maxlen = dtmp->state_num; n++; } hmm->maxstatenum = maxlen; hmm->totalhmmnum = n; } /* compute total number of Gaussians */ { HTK_HMM_Dens *dtmp; int n = 0; for (dtmp = hmm->dnstart; dtmp; dtmp = dtmp->next) { n++; } hmm->totalmixnum = n; } /* check of HMM name length exceed the maximum */ { HTK_HMM_Dens *dtmp; int n = 0; for (dtmp = hmm->dnstart; dtmp; dtmp = dtmp->next) { n++; } hmm->totalmixnum = n; } /* compute total number of mixture PDFs */ { HTK_HMM_PDF *p; int n = 0; for (p = hmm->pdfstart; p; p = p->next) { n++; } hmm->totalpdfnum = n; } /* assign ID number for all HTK_HMM_Trans */ { HTK_HMM_Trans *ttmp; int n = 0; for (ttmp = hmm->trstart; ttmp; ttmp = ttmp->next) { ttmp->id = n++; } hmm->totaltransnum = n; } #ifdef ENABLE_MSD /* check if MSD-HMM */ htk_hmm_check_msd(hmm); #endif return(TRUE); /* success */ }