/** * @file wav2mfcc-pipe.c * * * @brief 音声波形から MFCC 特徴量へ変換する (フレーム単位) * * ここでは wav2mfcc.c の関数をフレーム同期に処理するために変換した * 関数が納められています．認識処理を音声入力と平行して行う場合，こちらの * 関数が用いられます． * * * * @brief Convert speech inputs into MFCC parameter vectors (per input frame) * * There are functions are derived from wav2mfcc.c, to compute * MFCC vectors in per-frame basis. When performing on-line recognition, * these functions will be used instead of ones in wav2mfcc.c * * * @author Akinobu LEE * @date Thu Feb 17 18:12:30 2005 * * $Revision: 1.8 $ * */ /* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ /* wav2mfcc-pipe.c --- split Wav2MFCC to perform per-frame-basis, and also realtime CMN for 1st-pass pipe-lining */ /************************************************************************/ /* wav2mfcc.c Convert Speech file to MFCC_E_D_(Z) file */ /*----------------------------------------------------------------------*/ /* Author : Yuichiro Nakano */ /* */ /* Copyright(C) Yuichiro Nakano 1996-1998 */ /*----------------------------------------------------------------------*/ /************************************************************************/ #include #include #include /***********************************************************************/ /** * Allocate a new delta cycle buffer. * * @param veclen [in] length of a vector * @param windowlen [in] window width for computing delta * * @return pointer to newly allocated delta cycle buffer structure. */ DeltaBuf * WMP_deltabuf_new(int veclen, int windowlen) { int i; DeltaBuf *db; db = (DeltaBuf *)mymalloc(sizeof(DeltaBuf)); db->veclen = veclen; db->win = windowlen; db->len = windowlen * 2 + 1; db->mfcc = (float **)mymalloc(sizeof(float *) * db->len); db->is_on = (boolean *) mymalloc(sizeof(boolean) * db->len); for (i=0;ilen;i++) { db->mfcc[i] = (float *)mymalloc(sizeof(float) * veclen * 2); } db->B = 0; for(i = 1; i <= windowlen; i++) db->B += i * i; db->B *= 2; return (db); } /** * Destroy the delta cycle buffer. * * @param db [i/o] delta cycle buffer */ void WMP_deltabuf_free(DeltaBuf *db) { int i; for (i=0;ilen;i++) { free(db->mfcc[i]); } free(db->is_on); free(db->mfcc); free(db); } /** * Reset and clear the delta cycle buffer. * * @param db [i/o] delta cycle buffer */ void WMP_deltabuf_prepare(DeltaBuf *db) { int i; db->store = 0; for (i=0;ilen;i++) { db->is_on[i] = FALSE; } } /** * Calculate delta coefficients of the specified point in the cycle buffer. * * @param db [i/o] delta cycle buffer * @param cur [in] target point to calculate the delta coefficients */ static void WMP_deltabuf_calc(DeltaBuf *db, int cur) { int n, theta, p; float A1, A2, sum; int last_valid_left, last_valid_right; for (n = 0; n < db->veclen; n++) { sum = 0.0; last_valid_left = last_valid_right = cur; for (theta = 1; theta <= db->win; theta++) { p = cur - theta; if (p < 0) p += db->len; if (db->is_on[p]) { A1 = db->mfcc[p][n]; last_valid_left = p; } else { A1 = db->mfcc[last_valid_left][n]; } p = cur + theta; if (p >= db->len) p -= db->len; if (db->is_on[p]) { A2 = db->mfcc[p][n]; last_valid_right = p; } else { A2 = db->mfcc[last_valid_right][n]; } sum += theta * (A2 - A1); } db->mfcc[cur][db->veclen + n] = sum / db->B; } } /** * Store the given MFCC vector into the delta cycle buffer, and compute the * latest delta coefficients. * * @param db [i/o] delta cycle buffer * @param new_mfcc [in] MFCC vector * * @return TRUE if next delta coeff. computed, in that case it is saved * in db->delta[], or FALSE if delta is not yet computed by short of data. */ boolean WMP_deltabuf_proceed(DeltaBuf *db, float *new_mfcc) { int cur; boolean ret; /* copy data to store point */ memcpy(db->mfcc[db->store], new_mfcc, sizeof(float) * db->veclen); db->is_on[db->store] = TRUE; /* get current calculation point */ cur = db->store - db->win; if (cur < 0) cur += db->len; /* if the current point is fulfilled, compute delta */ if (db->is_on[cur]) { WMP_deltabuf_calc(db, cur); db->vec = db->mfcc[cur]; ret = TRUE; } else { ret = FALSE; } /* move store pointer to next */ db->store++; if (db->store >= db->len) db->store -= db->len; /* return TRUE if delta computed for current, or -1 if not calculated yet */ return (ret); } /** * Flush the delta cycle buffer the delta coefficients * left in the cycle buffer. * * @param db [i/o] delta cycle buffer * * @return TRUE if next delta coeff. computed, in that case it is saved * in db->delta[], or FALSE if all delta computation has been flushed and * no data is available. * */ boolean WMP_deltabuf_flush(DeltaBuf *db) { int cur; boolean ret; /* clear store point */ db->is_on[db->store] = FALSE; /* get current calculation point */ cur = db->store - db->win; if (cur < 0) cur += db->len; /* if the current point if fulfilled, compute delta */ if (db->is_on[cur]) { WMP_deltabuf_calc(db, cur); db->vec = db->mfcc[cur]; ret = TRUE; } else { ret = FALSE; } /* move store pointer to next */ db->store++; if (db->store >= db->len) db->store -= db->len; /* return TRUE if delta computed for current, or -1 if not calculated yet */ return (ret); } /***********************************************************************/ /* MAP-CMN */ /***********************************************************************/ /** * Initialize MAP-CMN at startup. * * @param para [in] MFCC computation configuration parameter * @param weight [in] initial cepstral mean weight * */ CMNWork * CMN_realtime_new(Value *para, float weight) { int i; CMNWork *c; c = (CMNWork *)mymalloc(sizeof(CMNWork)); c->cweight = weight; c->mfcc_dim = para->mfcc_dim + (para->c0 ? 1 : 0); c->veclen = para->veclen; c->mean = para->cmn ? TRUE : FALSE; c->var = para->cvn ? TRUE : FALSE; c->clist_max = CPSTEP; c->clist_num = 0; c->clist = (CMEAN *)mymalloc(sizeof(CMEAN) * c->clist_max); for(i=0;iclist_max;i++) { c->clist[i].mfcc_sum = (float *)mymalloc(sizeof(float)*c->veclen); if (c->var) c->clist[i].mfcc_var = (float *)mymalloc(sizeof(float)*c->veclen); c->clist[i].framenum = 0; } c->now.mfcc_sum = (float *)mymalloc(sizeof(float) * c->veclen); if (c->var) c->now.mfcc_var = (float *)mymalloc(sizeof(float) * c->veclen); c->cmean_init = (float *)mymalloc(sizeof(float) * c->veclen); if (c->var) c->cvar_init = (float *)mymalloc(sizeof(float) * c->veclen); c->cmean_init_set = FALSE; return c; } /** * Free work area for real-time CMN. * * @param c [i/o] CMN calculation work area * */ void CMN_realtime_free(CMNWork *c) { int i; free(c->cmean_init); free(c->now.mfcc_sum); if (c->var) { free(c->cvar_init); free(c->now.mfcc_var); } for(i=0;iclist_max;i++) { if (c->var) free(c->clist[i].mfcc_var); free(c->clist[i].mfcc_sum); } free(c->clist); free(c); } /** * Prepare for MAP-CMN at start of each input * * @param c [i/o] CMN calculation work area */ void CMN_realtime_prepare(CMNWork *c) { int d; for(d=0;dveclen;d++) c->now.mfcc_sum[d] = 0.0; if (c->var) { for(d=0;dveclen;d++) c->now.mfcc_var[d] = 0.0; } c->now.framenum = 0; } /** * Perform MAP-CMN for incoming MFCC vectors * * @param c [i/o] CMN calculation work area * @param mfcc [in] MFCC vector * */ void CMN_realtime(CMNWork *c, float *mfcc) { int d; double x, y; c->now.framenum++; if (c->cmean_init_set) { /* initial data exists */ for(d=0;dveclen;d++) { /* accumulate current MFCC to sum */ c->now.mfcc_sum[d] += mfcc[d]; /* calculate map-mean */ x = c->now.mfcc_sum[d] + c->cweight * c->cmean_init[d]; y = (double)c->now.framenum + c->cweight; x /= y; if (c->var) { /* calculate map-var */ c->now.mfcc_var[d] += (mfcc[d] - x) * (mfcc[d] - x); } if (c->mean && d < c->mfcc_dim) { /* mean normalization */ mfcc[d] -= x; } if (c->var) { /* variance normalization */ x = c->now.mfcc_var[d] + c->cweight * c->cvar_init[d]; y = (double)c->now.framenum + c->cweight; mfcc[d] /= sqrt(x / y); } } } else { /* no initial data */ for(d=0;dveclen;d++) { /* accumulate current MFCC to sum */ c->now.mfcc_sum[d] += mfcc[d]; /* calculate current mean */ x = c->now.mfcc_sum[d] / c->now.framenum; if (c->var) { /* calculate current variance */ c->now.mfcc_var[d] += (mfcc[d] - x) * (mfcc[d] - x); } if (c->mean && d < c->mfcc_dim) { /* mean normalization */ mfcc[d] -= x; } #if 0 /* not perform variance normalization on no initial data */ if (c->var) { /* variance normalization */ mfcc[d] /= sqrt(c->now.mfcc_var[d] / c->now.framenum); } #endif } } } /** * Update initial cepstral mean from previous utterances for next input. * * @param c [i/o] CMN calculation work area */ void CMN_realtime_update(CMNWork *c, HTK_Param *param) { float *tmp, *tmp2; int i, d; int frames; /* if CMN_realtime was never called before this, return immediately */ /* this may occur by pausing just after startup */ if (c->now.framenum == 0) return; /* re-calculate variance based on the final mean at the given param */ if (c->var && param != NULL) { float m, x; if (param->samplenum != c->now.framenum) { jlog("InternalError: CMN_realtime_update: param->samplenum != c->now.framenum\n"); } else if (param->veclen != c->veclen) { jlog("InternalError: CMN_realtime_update: param->veclen != c->veclen\n"); } else { for(d=0;dveclen;d++) { m = c->now.mfcc_sum[d] / (float) c->now.framenum; x = 0; for(i=0;isamplenum;i++) { x += (param->parvec[i][d] - m) * (param->parvec[i][d] - m); } c->now.mfcc_var[d] = x; } } } /* compute cepstral mean from now and previous sums up to CPMAX frames */ for(d=0;dveclen;d++) c->cmean_init[d] = c->now.mfcc_sum[d]; if (c->var) { for(d=0;dveclen;d++) c->cvar_init[d] = c->now.mfcc_var[d]; } frames = c->now.framenum; for(i=0;iclist_num;i++) { for(d=0;dveclen;d++) c->cmean_init[d] += c->clist[i].mfcc_sum[d]; if (c->var) { for(d=0;dveclen;d++) c->cvar_init[d] += c->clist[i].mfcc_var[d]; } frames += c->clist[i].framenum; if (frames >= CPMAX) break; } for(d=0;dveclen;d++) c->cmean_init[d] /= (float) frames; if (c->var) { for(d=0;dveclen;d++) c->cvar_init[d] /= (float) frames; } c->cmean_init_set = TRUE; /* expand clist if neccessary */ if (c->clist_num == c->clist_max && frames < CPMAX) { c->clist_max += CPSTEP; c->clist = (CMEAN *)myrealloc(c->clist, sizeof(CMEAN) * c->clist_max); for(i=c->clist_num;iclist_max;i++) { c->clist[i].mfcc_sum = (float *)mymalloc(sizeof(float)*c->veclen); if (c->var) c->clist[i].mfcc_var = (float *)mymalloc(sizeof(float)*c->veclen); c->clist[i].framenum = 0; } } /* shift clist */ tmp = c->clist[c->clist_max-1].mfcc_sum; if (c->var) tmp2 = c->clist[c->clist_max-1].mfcc_var; memmove(&(c->clist[1]), &(c->clist[0]), sizeof(CMEAN) * (c->clist_max - 1)); c->clist[0].mfcc_sum = tmp; if (c->var) c->clist[0].mfcc_var = tmp2; /* copy now to clist[0] */ memcpy(c->clist[0].mfcc_sum, c->now.mfcc_sum, sizeof(float) * c->veclen); if (c->var) memcpy(c->clist[0].mfcc_var, c->now.mfcc_var, sizeof(float) * c->veclen); c->clist[0].framenum = c->now.framenum; if (c->clist_num < c->clist_max) c->clist_num++; } /** * Read binary with byte swap (assume file is Big Endian) * * @param buf [out] data buffer * @param unitbyte [in] size of unit in bytes * @param unitnum [in] number of units to be read * @param fp [in] file pointer * * @return TRUE if required number of units are fully read, FALSE if failed. */ static boolean myread(void *buf, size_t unitbyte, int unitnum, FILE *fp) { if (myfread(buf, unitbyte, unitnum, fp) < (size_t)unitnum) { return(FALSE); } #ifndef WORDS_BIGENDIAN swap_bytes(buf, unitbyte, unitnum); #endif return(TRUE); } /** * Write binary with byte swap (assume data is Big Endian) * * @param buf [in] data buffer * @param unitbyte [in] size of unit in bytes * @param unitnum [in] number of units to write * @param fd [in] file descriptor * * @return TRUE if required number of units are fully written, FALSE if failed. */ static boolean mywrite(void *buf, size_t unitbyte, size_t unitnum, int fd) { #ifndef WORDS_BIGENDIAN swap_bytes(buf, unitbyte, unitnum); #endif if (write(fd, buf, unitbyte * unitnum) < unitbyte * unitnum) { return(FALSE); } #ifndef WORDS_BIGENDIAN swap_bytes(buf, unitbyte, unitnum); #endif return(TRUE); } /** * Load CMN parameter from file. If the number of MFCC dimension in the * file does not match the specified one, an error will occur. * * @param c [i/o] CMN calculation work area * @param filename [in] file name * * @return TRUE on success, FALSE on failure. */ boolean CMN_load_from_file(CMNWork *c, char *filename) { FILE *fp; int veclen; jlog("Stat: wav2mfcc-pipe: reading initial CMN from file \"%s\"\n", filename); if ((fp = fopen_readfile(filename)) == NULL) { jlog("Error: wav2mfcc-pipe: failed to open\n"); return(FALSE); } /* read header */ if (myread(&veclen, sizeof(int), 1, fp) == FALSE) { jlog("Error: wav2mfcc-pipe: failed to read header\n"); fclose_readfile(fp); return(FALSE); } /* check length */ if (veclen != c->veclen) { jlog("Error: wav2mfcc-pipe: cepstral dimension mismatch\n"); jlog("Error: wav2mfcc-pipe: process = %d, file = %d\n", c->veclen, veclen); fclose_readfile(fp); return(FALSE); } /* read body */ if (myread(c->cmean_init, sizeof(float), c->veclen, fp) == FALSE) { jlog("Error: wav2mfcc-pipe: failed to read mean for CMN\n"); fclose_readfile(fp); return(FALSE); } if (c->var) { if (myread(c->cvar_init, sizeof(float), c->veclen, fp) == FALSE) { jlog("Error: wav2mfcc-pipe: failed to read variance for CVN\n"); fclose_readfile(fp); return(FALSE); } } if (fclose_readfile(fp) == -1) { jlog("Error: wav2mfcc-pipe: failed to close\n"); return(FALSE); } c->cmean_init_set = TRUE; jlog("Stat: wav2mfcc-pipe: read CMN parameter\n"); return(TRUE); } /** * Save the current CMN vector to a file. * * @param c [i/o] CMN calculation work area * @param filename [in] filename to save the data. * * @return TRUE on success, FALSE on failure. */ boolean CMN_save_to_file(CMNWork *c, char *filename) { int fd; jlog("Stat: wav2mfcc-pipe: writing current cepstral data to file \"%s\"\n", filename); if ((fd = open(filename, O_CREAT | O_RDWR #ifdef O_BINARY | O_BINARY #endif , 0644)) == -1) { jlog("Error: wav2mfcc-pipe: failed to open \"%s\" to write current cepstral data\n", filename); return(FALSE); } /* write header */ if (mywrite(&(c->veclen), sizeof(int), 1, fd) == FALSE) { jlog("Error: wav2mfcc-pipe: cannot write header to \"%s\" as current cepstral data\n", filename); close(fd); return(FALSE); } /* write body */ if (mywrite(c->cmean_init, sizeof(float), c->veclen, fd) == FALSE) { jlog("Error: wav2mfcc-pipe: cannot write mean to \"%s\" as current cepstral data\n", filename); close(fd); return(FALSE); } if (c->var) { if (mywrite(c->cvar_init, sizeof(float), c->veclen, fd) == FALSE) { jlog("Error: wav2mfcc-pipe: cannot write variance to \"%s\" as current cepstrum\n", filename); close(fd); return(FALSE); } } close(fd); jlog("Stat: wav2mfcc-pipe: current cepstral data written to \"%s\"\n", filename); return(TRUE); } /***********************************************************************/ /* energy normalization and scaling on live input */ /***********************************************************************/ /** * Initialize work area for energy normalization on live input. * This should be called once on startup. * * @param energy [in] energy normalization work area * */ void energy_max_init(ENERGYWork *energy) { energy->max = 5.0; } /** * Prepare values for energy normalization on live input. * This should be called before each input segment. * * @param energy [in] energy normalization work area * @param para [in] MFCC computation configuration parameter */ void energy_max_prepare(ENERGYWork *energy, Value *para) { energy->max_last = energy->max; energy->min_last = energy->max - (para->silFloor * LOG_TEN) / 10.0; energy->max = 0.0; } /** * Peform energy normalization using maximum of last input. * * @param energy [in] energy normalization work area * @param f [in] raw energy * @param para [in] MFCC computation configuration parameter * * @return value of the normalized log energy. */ LOGPROB energy_max_normalize(ENERGYWork *energy, LOGPROB f, Value *para) { if (energy->max < f) energy->max = f; if (f < energy->min_last) f = energy->min_last; return(1.0 - (energy->max_last - f) * para->escale); }