/**
* @file para.c
*
*
* @brief 特徴量抽出条件の扱い
*
* 音響分析の設定パラメータを保持する Value 構造体を扱う.
*
*
*
* @brief Acoustic analysis condition parameter handling
*
*
* Value structure holds acoustic analysis configuration parameters.
*
* @author Akinobu Lee
* @date Fri Oct 27 14:55:00 2006
*
* $Revision: 1.11 $
*
*/
/*
* Copyright (c) 1991-2012 Kawahara Lab., Kyoto University
* Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
* Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology
* All rights reserved
*/
#include
#include
/**
* Reset configuration parameters for MFCC computation.
*
* @param para [out] feature extraction parameters
*
*/
void
undef_para(Value *para)
{
para->smp_period = -1;
para->smp_freq = -1;
para->framesize = -1;
para->frameshift = -1;
para->preEmph = -1;
para->mfcc_dim = -1;
para->lifter = -1;
para->fbank_num = -1;
para->delWin = -1;
para->accWin = -1;
para->silFloor = -1;
para->escale = -1;
para->enormal = -1;
para->hipass = -2; /* undef */
para->lopass = -2; /* undef */
para->cmn = -1;
para->cvn = -1;
para->raw_e = -1;
para->c0 = -1;
//para->ss_alpha = -1;
//para->ss_floor = -1;
para->vtln_alpha = -1;
para->vtln_upper = -1;
para->vtln_lower = -1;
para->zmeanframe = -1;
para->usepower = -1;
para->delta = -1;
para->acc = -1;
para->energy = -1;
para->absesup = -1;
para->baselen = -1;
para->vecbuflen = -1;
para->veclen = -1;
para->loaded = 0;
}
/**
* Set Julius default parameters for MFCC computation.
*
* @param para [out] feature extraction parameters
*
*/
void
make_default_para(Value *para)
{
para->smp_period = 625; /* 16kHz = 625 100ns unit */
para->smp_freq = 16000; /* 16kHz = 625 100ns unit */
para->framesize = DEF_FRAMESIZE;
para->frameshift = DEF_FRAMESHIFT;
para->preEmph = DEF_PREENPH;
para->fbank_num = DEF_FBANK;
para->lifter = DEF_CEPLIF;
para->delWin = DEF_DELWIN;
para->accWin = DEF_ACCWIN;
para->raw_e = FALSE;
para->enormal = FALSE;
para->escale = DEF_ESCALE;
para->silFloor = DEF_SILFLOOR;
para->cvn = FALSE;
para->hipass = -1; /* disabled */
para->lopass = -1; /* disabled */
//para->ss_alpha = DEF_SSALPHA;
//para->ss_floor = DEF_SSFLOOR;
para->vtln_alpha = 1.0; /* disabled */
para->zmeanframe = FALSE;
para->usepower = FALSE;
}
/**
* Set HTK default configuration parameters for MFCC computation.
* This will be refered when parameters are given as HTK Config file.
*
* @param para [out] feature extraction parameters
*
*/
void
make_default_para_htk(Value *para)
{
para->framesize = 256000.0; /* dummy! */
para->preEmph = 0.97;
para->fbank_num = 20;
para->lifter = 22;
para->delWin = 2;
para->accWin = 2;
para->raw_e = TRUE;
para->enormal = TRUE;
para->escale = 0.1;
para->silFloor = 50.0;
para->hipass = -1; /* disabled */
para->lopass = -1; /* disabled */
para->vtln_alpha = 1.0; /* disabled */
para->zmeanframe = FALSE;
para->usepower = FALSE;
}
/**
* Merge two configuration parameters for MFCC computation.
*
* @param dst [out] feature extraction parameters to set to
* @param src [out] feature extraction parameters to set from
*
*/
void
apply_para(Value *dst, Value *src)
{
if (dst->smp_period == -1) dst->smp_period = src->smp_period;
if (dst->smp_freq == -1) dst->smp_freq = src->smp_freq;
if (dst->framesize == -1) dst->framesize = src->framesize;
if (dst->frameshift == -1) dst->frameshift = src->frameshift;
if (dst->preEmph == -1) dst->preEmph = src->preEmph;
if (dst->mfcc_dim == -1) dst->mfcc_dim = src->mfcc_dim;
if (dst->lifter == -1) dst->lifter = src->lifter;
if (dst->fbank_num == -1) dst->fbank_num = src->fbank_num;
if (dst->delWin == -1) dst->delWin = src->delWin;
if (dst->accWin == -1) dst->accWin = src->accWin;
if (dst->silFloor == -1) dst->silFloor = src->silFloor;
if (dst->escale == -1) dst->escale = src->escale;
if (dst->enormal == -1) dst->enormal = src->enormal;
if (dst->hipass == -2) dst->hipass = src->hipass;
if (dst->lopass == -2) dst->lopass = src->lopass;
if (dst->cmn == -1) dst->cmn = src->cmn;
if (dst->cvn == -1) dst->cvn = src->cvn;
if (dst->raw_e == -1) dst->raw_e = src->raw_e;
if (dst->c0 == -1) dst->c0 = src->c0;
//if (dst->ss_alpha == -1) dst->ss_alpha = src->ss_alpha;
//if (dst->ss_floor == -1) dst->ss_floor = src->ss_floor;
if (dst->vtln_alpha == -1) dst->vtln_alpha = src->vtln_alpha;
if (dst->vtln_upper == -1) dst->vtln_upper = src->vtln_upper;
if (dst->vtln_lower == -1) dst->vtln_lower = src->vtln_lower;
if (dst->zmeanframe == -1) dst->zmeanframe = src->zmeanframe;
if (dst->usepower == -1) dst->usepower = src->usepower;
if (dst->delta == -1) dst->delta = src->delta;
if (dst->acc == -1) dst->acc = src->acc;
if (dst->energy == -1) dst->energy = src->energy;
if (dst->absesup == -1) dst->absesup = src->absesup;
if (dst->baselen == -1) dst->baselen = src->baselen;
if (dst->vecbuflen == -1) dst->vecbuflen = src->vecbuflen;
if (dst->veclen == -1) dst->veclen = src->veclen;
}
#define ISTOKEN(A) (A == ' ' || A == '\t' || A == '\n') ///< Determine token characters
/**
* Read and parse an HTK Config file, and set the specified option values.
*
* @param HTKconffile [in] HTK Config file path name
* @param para [out] MFCC parameter to set
*
* @return TRUE on success, FALSE on failure.
*/
boolean
htk_config_file_parse(char *HTKconffile, Value *para)
{
FILE *fp;
char buf[512];
char *p, *d, *a;
float srate;
boolean skipped;
jlog("Stat: para: parsing HTK Config file: %s\n", HTKconffile);
/* convert the content into argument list c_argv[1..c_argc-1] */
/* c_argv[0] will be the original conffile name */
if ((fp = fopen(HTKconffile, "r")) == NULL) {
jlog("Error: para: failed to open HTK Config file: %s\n", HTKconffile);
return FALSE;
}
srate = 0.0;
while (getl_fp(buf, 512, fp) != NULL) {
p = buf;
if (*p == 35) { /* skip comment line */
continue;
}
/* parse the input line to get directive and argument */
while (*p != '\0' && ISTOKEN(*p)) p++;
if (*p == '\0') continue;
d = p;
while (*p != '\0' && (!ISTOKEN(*p)) && *p != '=') p++;
if (*p == '\0') continue;
*p = '\0'; p++;
while (*p != '\0' && ((ISTOKEN(*p)) || *p == '=')) p++;
if (*p == '\0') continue;
a = p;
while (*p != '\0' && (!ISTOKEN(*p))) p++;
*p = '\0';
/* process arguments */
skipped = FALSE;
if (strmatch(d, "SOURCERATE")) { /* -smpPeriod */
srate = atof(a);
} else if (strmatch(d, "TARGETRATE")) { /* -fshift */
para->frameshift = atof(a);
} else if (strmatch(d, "WINDOWSIZE")) { /* -fsize */
para->framesize = atof(a);
} else if (strmatch(d, "ZMEANSOURCE")) { /* -zmeansource */
para->zmeanframe = (a[0] == 'T') ? TRUE : FALSE;
} else if (strmatch(d, "USEPOWER")) { /* -usepower */
para->usepower = (a[0] == 'T') ? TRUE : FALSE;
} else if (strmatch(d, "PREEMCOEF")) { /* -preemph */
para->preEmph = atof(a);
} else if (strmatch(d, "USEHAMMING")) { /* (fixed to T) */
if (a[0] != 'T') {
jlog("Error: para: USEHAMMING should be T\n", HTKconffile);
return FALSE;
}
} else if (strmatch(d, "NUMCHANS")) { /* -fbank */
para->fbank_num = atoi(a);
} else if (strmatch(d, "CEPLIFTER")) { /* -ceplif */
para->lifter = atoi(a);
} else if (strmatch(d, "DELTAWINDOW")) { /* -delwin */
para->delWin = atoi(a);
} else if (strmatch(d, "ACCWINDOW")) { /* -accwin */
para->accWin = atoi(a);
} else if (strmatch(d, "LOFREQ")) { /* -lofreq */
para->lopass = atof(a);
} else if (strmatch(d, "HIFREQ")) { /* -hifreq */
para->hipass = atof(a);
} else if (strmatch(d, "RAWENERGY")) { /* -rawe */
para->raw_e = (a[0] == 'T') ? TRUE : FALSE;
} else if (strmatch(d, "ENORMALISE")) { /* -enormal */
para->enormal = (a[0] == 'T') ? TRUE : FALSE;
} else if (strmatch(d, "ESCALE")) { /* -escale */
para->escale = atof(a);
} else if (strmatch(d, "SILFLOOR")) { /* -silfloor */
para->silFloor = atof(a);
} else if (strmatch(d, "WARPFREQ")) { /* -vtln (1) */
para->vtln_alpha = atof(a);
} else if (strmatch(d, "WARPLCUTOFF")) { /* -vtln (2) */
para->vtln_lower = atof(a);
} else if (strmatch(d, "WARPUCUTOFF")) { /* -vtln (3) */
para->vtln_upper = atof(a);
} else if (strmatch(d, "TARGETKIND")) {
jlog("Warning: para: TARGETKIND skipped (will be determined by AM header)\n");
skipped = TRUE;
} else if (strmatch(d, "NUMCEPS")) {
jlog("Warning: para: NUMCEPS skipped (will be determined by AM header)\n");
skipped = TRUE;
} else {
jlog("Warning: para: \"%s\" ignored (not supported, or irrelevant)\n", d);
skipped = TRUE;
}
if (!skipped) {
jlog("Stat: para: %s=%s\n", d, a);
}
}
if (srate == 0.0) {
jlog("Warning: no SOURCERATE found\n");
jlog("Warning: assume source waveform sample rate to 625 (16kHz)\n");
srate = 625;
}
para->smp_period = srate;
para->smp_freq = period2freq(para->smp_period);
para->frameshift /= srate;
para->framesize /= srate;
if (fclose(fp) == -1) {
jlog("Error: para: failed to close file\n");
return FALSE;
}
para->loaded = 1;
return TRUE;
}
/**
* Set acoustic analysis parameters from HTK HMM definition header information.
*
* @param para [out] acoustic analysis parameters
* @param param_type [in] parameter type specified at HMM header
* @param vec_size [in] vector size type specified at HMM header
*/
void
calc_para_from_header(Value *para, short param_type, short vec_size)
{
int dim;
/* decode required parameter extraction types */
para->delta = (param_type & F_DELTA) ? TRUE : FALSE;
para->acc = (param_type & F_ACCL) ? TRUE : FALSE;
para->energy = (param_type & F_ENERGY) ? TRUE : FALSE;
para->c0 = (param_type & F_ZEROTH) ? TRUE : FALSE;
para->absesup = (param_type & F_ENERGY_SUP) ? TRUE : FALSE;
para->cmn = (param_type & F_CEPNORM) ? TRUE : FALSE;
/* guess MFCC dimension from the vector size and parameter type in the
acoustic HMM */
dim = vec_size;
if (para->absesup) dim++;
dim /= 1 + (para->delta ? 1 : 0) + (para->acc ? 1 : 0);
if (para->energy) dim--;
if (para->c0) dim--;
para->mfcc_dim = dim;
/* determine base size */
para->baselen = para->mfcc_dim + (para->c0 ? 1 : 0) + (para->energy ? 1 : 0);
/* set required size of parameter vector for MFCC computation */
para->vecbuflen = para->baselen * (1 + (para->delta ? 1 : 0) + (para->acc ? 1 : 0));
/* set size of final parameter vector */
para->veclen = para->vecbuflen - (para->absesup ? 1 : 0);
}
/**
* Output acoustic analysis configuration parameters to stdout.
*
* @param fp [in] file pointer
* @param para [in] configuration parameter
*
*/
void
put_para(FILE *fp, Value *para)
{
fprintf(fp, " Acoustic analysis condition:\n");
fprintf(fp, "\t parameter = MFCC");
if (para->c0) fprintf(fp, "_0");
if (para->energy) fprintf(fp, "_E");
if (para->delta) fprintf(fp, "_D");
if (para->acc) fprintf(fp, "_A");
if (para->absesup) fprintf(fp, "_N");
if (para->cmn) fprintf(fp, "_Z");
fprintf(fp, " (%d dim. from %d cepstrum", para->veclen, para->mfcc_dim);
if (para->c0) fprintf(fp, " + c0");
if (para->energy) fprintf(fp, " + energy");
if (para->absesup) fprintf(fp, ", abs energy supressed");
if (para->cmn) fprintf(fp, " with CMN");
fprintf(fp, ")\n");
fprintf(fp, "\tsample frequency = %5d Hz\n", para->smp_freq);
fprintf(fp, "\t sample period = %4d (1 = 100ns)\n", para->smp_period);
fprintf(fp, "\t window size = %4d samples (%.1f ms)\n", para->framesize,
(float)para->smp_period * (float)para->framesize / 10000.0);
fprintf(fp, "\t frame shift = %4d samples (%.1f ms)\n", para->frameshift,
(float)para->smp_period * (float)para->frameshift / 10000.0);
fprintf(fp, "\t pre-emphasis = %.2f\n", para->preEmph);
fprintf(fp, "\t # filterbank = %d\n", para->fbank_num);
fprintf(fp, "\t cepst. lifter = %d\n", para->lifter);
fprintf(fp, "\t raw energy = %s\n", para->raw_e ? "True" : "False");
if (para->enormal) {
fprintf(fp, "\tenergy normalize = True (scale = %.1f, silence floor = %.1f dB)\n", para->escale, para->silFloor);
} else {
fprintf(fp, "\tenergy normalize = False\n");
}
if (para->delta) {
fprintf(fp, "\t delta window = %d frames (%.1f ms) around\n", para->delWin, (float)para->delWin * (float)para->smp_period * (float)para->frameshift / 10000.0);
}
if (para->acc) {
fprintf(fp, "\t acc window = %d frames (%.1f ms) around\n", para->accWin, (float)para->accWin * (float)para->smp_period * (float)para->frameshift / 10000.0);
}
fprintf(fp, "\t hi freq cut = ");
if (para->hipass < 0) fprintf(fp, "OFF\n");
else fprintf(fp, "%5d Hz\n", para->hipass);
fprintf(fp, "\t lo freq cut = ");
if (para->lopass < 0) fprintf(fp, "OFF\n");
else fprintf(fp, "%5d Hz\n", para->lopass);
fprintf(fp, "\t zero mean frame = ");
if (para->zmeanframe) fprintf(fp, "ON\n");
else fprintf(fp, "OFF\n");
fprintf(fp, "\t use power = ");
if (para->usepower) fprintf(fp, "ON\n");
else fprintf(fp, "OFF\n");
fprintf(fp, "\t CVN = ");
switch (para->cvn) {
case TRUE:
fprintf(fp, "ON\n");
break;
case FALSE:
fprintf(fp, "OFF\n");
break;
default:
fprintf(fp, "UNKNOWN\n");
break;
}
fprintf(fp, "\t VTLN = ");
if(para->vtln_alpha != 1.0) {
fprintf(fp, "ON, alpha=%.3f, f_low=%.1f, f_high=%.1f\n", para->vtln_alpha, para->vtln_lower, para->vtln_upper);
} else fprintf(fp, "OFF\n");
}