/**
* @file htk_hmm.h
*
*
* @brief Data structures for handling HTK %HMM definition
*
* This file defines data structures for %HMM definition file in HTK format.
*
*
* @brief HTK形式の%HMMを扱うデータ構造の定義
*
* このファイルには, HTK形式の%HMM定義ファイルを読み込むための構造体が
* 定義されています.
*
*
* @author Akinobu LEE
* @date Thu Feb 10 19:36:47 2005
*
* $Revision: 1.10 $
*
*/
/*
* Copyright (c) 1991-2012 Kawahara Lab., Kyoto University
* Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
* Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology
* All rights reserved
*/
#ifndef __SENT_HTK_HMM_2_H__
#define __SENT_HTK_HMM_2_H__
#include
#include
#include
#include
/// Macro to check whether the next token is "A"
#define currentis(A) (!strcasecmp(A, rdhmmdef_token))
/// Macro to jump to error if no token left
#define NoTokErr(S) if (!rdhmmdef_token) rderr(S)
/// Delimiter string for parsing %HMM definition file
#define HMMDEF_DELM " \t\n<>"
/**
* @defgroup hmminfo HTK HMM definition
*
* @brief Data structures for HTK %HMM definition
*
* The data is defined in each levels from model, state to Gaussian
* components (mean and variance). Each level unit almost corresponds
* to the macro
* definition in the HTK definition language. Each data has links to
* data of lower level, and also has a linked list to the data in the
* same level.
*
*
* @brief HTKの%HMMを格納するためのデータ構造定義
*
* データ構造はモデル・状態からガウス分布の平均・分散まで各レベルごとに
* 定義されています.各レベルはおおよそ HTK のマクロ定義に対応しています.
* 各データは, 下位のデータ構造へのポインタ
* および同レベルの構造体同士のリンクリストを保持しています.
*
*
*/
//@{
/// @ingroup hmminfo
/// Possible maximum value of state ID (in unsigned short)
#define MAX_STATE_NUM 2147483647
/// Delimiter strings/characters to generate logical triphone names
#define HMM_RC_DLIM "+" ///< Right context delimiter in string
#define HMM_LC_DLIM "-" ///< Left context delimiter in string
#define HMM_RC_DLIM_C '+' ///< Right context delimiter in character
#define HMM_LC_DLIM_C '-' ///< Left context delimiter in character
/// Default logical name of short pause model
#define SPMODEL_NAME_DEFAULT "sp"
/// Length limit of HMM name (including ones generated in Julius)
#define MAX_HMMNAME_LEN 256
/// Specify method of calculating approximated acoustic score at inter-word context pseudo phones on word edge
enum iwcd_type {
IWCD_UNDEF, ///< not specified explicitly
IWCD_MAX, ///< Use maximum score among context variants
IWCD_AVG, ///< Use average score among context variants
IWCD_NBEST ///< Use average of N-best scores among context variants
};
/* options info */
/// Stream information (although current Julius supports only single stream)
typedef struct {
short num; ///< Number of stream
short vsize[MAXSTREAMNUM]; ///< Vector size for each stream
} HTK_HMM_StreamInfo;
/// %HMM Option
typedef struct {
HTK_HMM_StreamInfo stream_info; ///< Stream information of this %HMM
short vec_size; ///< Size of parameter vector in number of dimension
short cov_type; ///< Type of covariance matrix , see also htk_defs.h
short dur_type; ///< Type of duration , see also htk_defs.h
short param_type; ///< Type of parameter , see also htk_defs.h
} HTK_HMM_Options;
/// %HMM transition table
typedef struct _HTK_HMM_trans {
char *name; ///< Name (NULL if not defined as Macro)
short statenum; ///< Number of state
PROB **a; ///< Matrix of transition probabilities
int id; ///< Uniq transition id starting from 0
struct _HTK_HMM_trans *next; ///< Pointer to next data, NULL if last
} HTK_HMM_Trans;
/// %HMM variance data
typedef struct _HTK_HMM_variance {
char *name; ///< Name (NULL if not defined as Macro)
VECT *vec; ///< Covariance vector (diagonal)
short len; ///< Length of above
struct _HTK_HMM_variance *next; ///< Pointer to next data, NULL if last
} HTK_HMM_Var;
/// %HMM Gaussian density (or mixture) data
typedef struct _HTK_HMM_dens {
char *name; ///< Name (NULL if not defined as Macro)
VECT *mean; ///< Mean vector
short meanlen; ///< Length of above
HTK_HMM_Var *var; ///< Link to assigned variance vector
/**
* Constant value in log scale for calculating Gaussiann output probability.
* @sa libsent/sec/hmminfo/rdhmmdef_dens.c
*/
LOGPROB gconst;
struct _HTK_HMM_dens *next; ///< Pointer to next data, NULL if last
} HTK_HMM_Dens;
/// %HMM stream weight definition
typedef struct _HTK_HMM_stream_weight {
char *name; ///< Name (NULL for in-line definition)
VECT *weight; ///< Weight of each stream in log scale
short len; ///< Length of above
struct _HTK_HMM_stream_weight *next; ///< Pointer to next data, NULL on last
} HTK_HMM_StreamWeight;
/**
* @brief %HMM mixture PDF for a stream
*
* @note
* In a tied-mixture model, @a b points to a codebook defined as GCODEBOOK
* intead of the array of densities.
*
*/
typedef struct _HTK_HMM_PDF {
char *name; ///< Name (NULL for in-line definition)
boolean tmix; ///< TRUE if this is assigned to tied-mixture codebook
short stream_id; ///< Stream ID to which this pdf is assigned, begins from 0
short mix_num; ///< Number of densities (mixtures) assigned.
HTK_HMM_Dens **b; ///< Link array to assigned densities, or pointer to GCODEBOOK in tied-mixture model
PROB *bweight; ///< Weights corresponding to above
struct _HTK_HMM_PDF *next; ///< Pointer to next data, or NULL at last
} HTK_HMM_PDF;
/**
* @brief %HMM state data
*
*/
typedef struct _HTK_HMM_state {
char *name; ///< Name (NULL if not defined as Macro)
short nstream; ///< Num of stream
HTK_HMM_StreamWeight *w; ///< Pointer to stream weight data, or NULL is not specified
HTK_HMM_PDF **pdf; ///< Array of mixture PDFs for each stream
int id; ///< Uniq state id starting from 0 for caching of output probability
struct _HTK_HMM_state *next; ///< Pointer to next data, NULL if last
} HTK_HMM_State;
/// Top %HMM model, corresponds to "~h" macro in hmmdefs
typedef struct _HTK_HMM_data {
char *name; ///< Name (NULL if not defined as Macro)
short state_num; ///< Number of states in this model
HTK_HMM_State **s; ///< Array of states in this model
HTK_HMM_Trans *tr; ///< Link to assigned transition matrix
struct _HTK_HMM_data *next; ///< Pointer to next data, NULL if last
} HTK_HMM_Data;
/// Gaussian mixture codebook in tied-mixture model
typedef struct {
char *name; ///< Codebook name (NULL if not defined as Macro)
int num; ///< Number of mixtures in this codebook
HTK_HMM_Dens **d; ///< Array of links to mixture instances
unsigned short id; ///< Uniq id for caching of output probability
} GCODEBOOK;
//@}
/// Set of %HMM states for Gaussian Mixture Selection
typedef struct {
HTK_HMM_State *state; ///< Pointer to %HMM states defined for GMS
/* GCODEBOOK *book;*/ /* pointer to the corresponding codebook in hmminfo */
} GS_SET;
/**
* @defgroup cdset Context-Dependent HMM set
*
* @brief Set of %HMM states with the same base phone and state location
*
* This structure will be used to handle cross-word triphone on the 1st pass.
* At a triphone %HMM at the edge of a word in the tree lexicon,
* the state nodes should have a set of %HMM states with the same base phone of
* all triphones at the same location instead of a single state information.
* This context-dependent %HMM set for cross-word triphone is also
* called as "pseudo" phone in Julius.
*
* When computing the 1st pass, the maximum (or average or N-best average)
* value from the likelihoods of state set will be taken as the output
* probability of the states instead of the actual cross-word triphone.
*
*
* This approximated value will be fixed by re-computation on the 2nd pass.
*
*
* @brief 同じベース音素の同じ位置にある%HMM状態の集合
*
* この構造体は第1パスで単語間トライフォンを扱うのに用いられます.
* 木構造化辞書上で,単語の末端のトライフォン%HMMにおける各状態は,
* 通常の%HMMとは異なりその終端音素と同じベース音素を持つトライフォンの
* 同じ位置の状態のリストを持ちます.このリスト化されたコンテキスト依存
* %HMMの集合は,"pseudo" phone とも呼ばれます.
*
* 第1パス計算時には,その状態の音響尤度は,真の単語間トライフォンの
* 近似値として,リスト中の各状態の音響尤度の最大値(あるいは平均値,
* あるいはNbestの状態の平均値)が用いられる.
*
* この近似値は第2パスで再計算される.
*
*
* @sa htk_hmm.h
* @sa libsent/src/hmminfo/cdhmm.c
* @sa libsent/src/hmminfo/cdset.c
* @sa libsent/src/hmminfo/guess_cdHMM.c
*
*/
//@{
/// @ingroup cdset
/// Context-dependent state set, equivalent to HTK_HMM_State, part of pseudo phone
typedef struct {
HTK_HMM_State **s; ///< Link Array to belonging states
unsigned short num; ///< Number of states
unsigned short maxnum; ///< Allocated number of above
} CD_State_Set;
/**
* @brief Context-dependent %HMM set (called "pseudo") for a logical context
*
* Context-dependent %HMM set for a logical context
* (e.g. "a-k", "e+b", "e", each corresponds to triphone list of
* "a-k+*", "*-e+b", "*-e+*").
*/
typedef struct _cd_set{
char *name; ///< Logical name of this %HMM set ("a-k", "e+b", "e", etc.)
CD_State_Set *stateset; ///< Array of state set for each state location
unsigned short state_num; ///< Number of state set
HTK_HMM_Trans *tr; ///< Transition matrix
struct _cd_set *next; ///< Pointer to next data, NULL if last
} CD_Set;
/// Top structure to hold all the %HMM sets
typedef struct {
boolean binary_malloc; ///< TRUE if read from binary
APATNODE *cdtree; ///< Root of index tree for name lookup
} HMM_CDSET_INFO;
//@}
/**
* @ingroup cdset
*
* @brief Logical %HMM to map logical names to physical/pseudo %HMM
*
* This data maps logical %HMM name to physical (defined) %HMM or pseudo %HMM.
* The logical %HMM names are basically loaded from %HMMList mapping file.
* Biphone/monophone %HMM names, not listed in the %HMMList file,
* are mapped to pseudo phones, which represents the context-dependent %HMM
* set.
*
* For example, if logical biphone %HMM name "e-k" is defined in %HMM definition
* file or its mapping is specified in the HMMList file, the Logical %HMM name
* "e-k" will be mapped to the corresponding defined %HMM.
* If "e-k" does not exist in
* both %HMM definition file and HMMList file, triphones whose name matches
* "e-k+*" will be gathered to phone context-dependent %HMM set "e-k", and
* the logical %HMM name "e-k" will be mapped to this %HMM set.
*
* The context-dependent %HMM is also called a "pseudo" phone in Julius.
*
*/
typedef struct _HMM_logical {
char *name; ///< Name string of this logical %HMM
boolean is_pseudo; ///< TRUE if this is mapped to pseudo %HMM
/// Actual body of state definition
union {
HTK_HMM_Data *defined; ///< pointer to the mapped physical %HMM
CD_Set *pseudo; ///< pointer to the mapped pseudo %HMM
} body;
struct _HMM_logical *next; ///< Pointer to next data, NULL if last
} HMM_Logical;
/**
* @ingroup hmminfo
*
* @brief Basephone information extracted from hmminfo
*/
typedef struct {
char *name; ///< Base phone name
boolean bgnflag; ///< TRUE if it can appear on word beginning determined by word dictionary
boolean endflag; ///< TRUE if it can appear on word end determined by word dictionary
} BASEPHONE;
/**
* @ingroup hmminfo
*
* @brief List of all basephone in hmminfo
*/
typedef struct {
int num; ///< Total number of base phone
int bgnnum; ///< Number of phones that can appear on word beginning
int endnum; ///< Number of phones that can appear on word end
APATNODE *root; ///< Root of index tree for name lookup
} HMM_basephone;
/**
* @ingroup hmminfo
*
* @brief Top %HMM structure that holds all the HTK %HMM definition
*/
typedef struct {
/**
* @name %HMM definitions from hmmdefs
*/
//@{
HTK_HMM_Options opt; ///< Global option
HTK_HMM_Trans *trstart; ///< Root pointer to the list of transition matrixes
HTK_HMM_Var *vrstart; ///< Root pointer to the list of variance data
HTK_HMM_Dens *dnstart; ///< Root pointer to the list of density (mixture) data
HTK_HMM_PDF *pdfstart; ///< Root pointer to the list of mixture pdf data
HTK_HMM_StreamWeight *swstart; ///< Root pointer to the list of stream weight data
HTK_HMM_State *ststart; ///< Root pointer to the list of state data
HTK_HMM_Data *start; ///< Root pointer to the list of models
//@}
/**
* @name logical %HMM
*/
//@{
HMM_Logical *lgstart; ///< Root pointer to the list of Logical %HMMs
//@}
/**
* @name Root nodes of index tree for name lookup of %HMM instances
*/
//@{
APATNODE *tr_root; ///< Root index node for transition matrixes
APATNODE *vr_root; ///< Root index node for variance data
APATNODE *sw_root; ///< Root index node for stream weight data
APATNODE *dn_root; ///< Root index node for density data
APATNODE *pdf_root; ///< Root index node for mixture PDF
APATNODE *st_root; ///< Root index node for state data
APATNODE *physical_root; ///< Root index node for defined %HMM name
APATNODE *logical_root; ///< Root index node for logical %HMM name
APATNODE *codebook_root; ///< Root index node for Gaussian codebook of tied mixture %HMM
//@}
/**
* @name Information extracted from %HMM instances
*/
//@{
HMM_basephone basephone; ///< Base phone names extracted from logical %HMM
HMM_CDSET_INFO cdset_info; ///< Context-dependent pseudo phone set
//@}
/**
* @name Misc. model information
*/
//@{
boolean need_multipath; ///< TRUE if this model needs multipath handling
boolean multipath; ///< TRUE if this model is treated in multipath mode
boolean is_triphone; ///< TRUE if this is triphone model
boolean is_tied_mixture; ///< TRUE if this is tied-mixture model
short cdset_method; ///< Selected method of computing pseudo phones in iwcd_type
short cdmax_num; ///< Number of N-best states when IWCD_NBEST
HMM_Logical *sp; ///< Link to short pause model
LOGPROB iwsp_penalty; ///< Extra ransition penalty for interword skippable short pause insertion for multi-path mode
boolean variance_inversed; ///< TRUE if variances are inversed
int totaltransnum; ///< Total number of transitions
int totalmixnum; ///< Total number of defined mixtures
int totalstatenum; ///< Total number of states
int totalhmmnum; ///< Total number of physical %HMM
int totallogicalnum; ///< Total number of logical %HMM
int totalpseudonum; ///< Total number of pseudo %HMM
int totalpdfnum; ///< Total number of mixture PDF
int codebooknum; ///< Total number of codebook on tied-mixture model
int maxcodebooksize; ///< Maximum size of codebook on tied-mixture model
int maxmixturenum; ///< Maximum number of Gaussian per mixture
int maxstatenum; ///< Maximum number of state per model
BMALLOC_BASE *mroot; ///< Pointer for block memory allocation
BMALLOC_BASE *lroot; ///< Pointer for block memory allocation for logical HMM
BMALLOC_BASE *cdset_root; ///< Pointer for block memory allocation for logical HMM
int *tmp_mixnum; ///< Work area for state reading
#ifdef ENABLE_MSD
boolean has_msd; ///< TRUE if this model contains MSD part
#endif
void *hook; ///< General purpose hook
//@}
} HTK_HMM_INFO;
#ifdef __cplusplus
extern "C" {
#endif
/* init_phmm.c */
void htk_hmm_set_pause_model(HTK_HMM_INFO *hmminfo, char *spmodel_name);
/* rdhmmdef.c */
void rderr(char *str);
char *read_token(FILE *fp);
boolean rdhmmdef(FILE *, HTK_HMM_INFO *);
void htk_hmm_inverse_variances(HTK_HMM_INFO *hmm);
#ifdef ENABLE_MSD
void htk_hmm_check_msd(HTK_HMM_INFO *hmm);
#endif
/* rdhmmdef_options.c */
boolean set_global_opt(FILE *fp, HTK_HMM_INFO *hmm);
char *get_cov_str(short covtype);
char *get_dur_str(short durtype);
/* rdhmmdef_trans.c */
void trans_add(HTK_HMM_INFO *hmm, HTK_HMM_Trans *newParam);
HTK_HMM_Trans *get_trans_data(FILE *, HTK_HMM_INFO *);
void def_trans_macro(char *, FILE *, HTK_HMM_INFO *);
/* rdhmmdef_state.c */
HTK_HMM_State *get_state_data(FILE *, HTK_HMM_INFO *);
void def_state_macro(char *, FILE *, HTK_HMM_INFO *);
HTK_HMM_State *state_lookup(HTK_HMM_INFO *hmm, char *keyname);
void state_add(HTK_HMM_INFO *hmm, HTK_HMM_State *newParam);
/* rdhmmdef_mpdf.c */
void mpdf_add(HTK_HMM_INFO *hmm, HTK_HMM_PDF *newParam);
HTK_HMM_PDF *mpdf_lookup(HTK_HMM_INFO *hmm, char *keyname);
HTK_HMM_PDF *get_mpdf_data(FILE *fp, HTK_HMM_INFO *hmm, int mix_num, short stream_id);
void def_mpdf_macro(char *name, FILE *fp, HTK_HMM_INFO *hmm);
/* rdhmmdef_dens.c */
HTK_HMM_Dens *get_dens_data(FILE *, HTK_HMM_INFO *);
void def_dens_macro(char *, FILE *, HTK_HMM_INFO *);
HTK_HMM_Dens *dens_lookup(HTK_HMM_INFO *hmm, char *keyname);
void dens_add(HTK_HMM_INFO *hmm, HTK_HMM_Dens *newParam);
/* rdhmmdef_var.c */
HTK_HMM_Var *get_var_data(FILE *, HTK_HMM_INFO *);
void def_var_macro(char *, FILE *, HTK_HMM_INFO *);
void var_add(HTK_HMM_INFO *hmm, HTK_HMM_Var *newParam);
/* rdhmmdef_streamweight.c */
HTK_HMM_StreamWeight *get_streamweight_data(FILE *fp, HTK_HMM_INFO *hmm);
void def_streamweight_macro(char *, FILE *, HTK_HMM_INFO *);
void sw_add(HTK_HMM_INFO *hmm, HTK_HMM_StreamWeight *newParam);
/* rdhmmdef_data.c */
void def_HMM(char *, FILE *, HTK_HMM_INFO *);
HTK_HMM_Data *htk_hmmdata_new(HTK_HMM_INFO *);
void htk_hmmdata_add(HTK_HMM_INFO *hmm, HTK_HMM_Data *newParam);
/* rdhmmdef_tiedmix.c */
void tmix_read(FILE *fp, HTK_HMM_PDF *mpdf, HTK_HMM_INFO *hmm);
void codebook_add(HTK_HMM_INFO *hmm, GCODEBOOK *newParam);
/* rdhmmdef_regtree.c */
void def_regtree_macro(char *name, FILE *fp, HTK_HMM_INFO *hmm);
/* rdhmmdef_hmmlist.c */
boolean rdhmmlist(FILE *fp, HTK_HMM_INFO *hmminfo);
boolean save_hmmlist_bin(FILE *fp, HTK_HMM_INFO *hmminfo);
boolean load_hmmlist_bin(FILE *fp, HTK_HMM_INFO *hmminfo);
/* put_htkdata_info.c */
void put_htk_trans(FILE *fp, HTK_HMM_Trans *t);
void put_htk_var(FILE *fp, HTK_HMM_Var *v);
void put_htk_dens(FILE *fp, HTK_HMM_Dens *d);
void put_htk_mpdf(FILE *fp, HTK_HMM_PDF *m);
void put_htk_state(FILE *fp, HTK_HMM_State *s);
void put_htk_hmm(FILE *fp, HTK_HMM_Data *h);
void put_logical_hmm(FILE *fp, HMM_Logical *l);
void print_hmmdef_info(FILE *fp, HTK_HMM_INFO *);
HTK_HMM_INFO *hmminfo_new();
boolean hmminfo_free(HTK_HMM_INFO *);
boolean init_hmminfo(HTK_HMM_INFO *hmminfo, char *filename, char *mapfile, Value *para);
HTK_HMM_Data *htk_hmmdata_lookup_physical(HTK_HMM_INFO *, char *);
HMM_Logical *htk_hmmdata_lookup_logical(HTK_HMM_INFO *, char *);
void hmm_add_physical_to_logical(HTK_HMM_INFO *);
void hmm_add_pseudo_phones(HTK_HMM_INFO *hmminfo);
/* chkhmmlist.c */
void make_hmm_basephone_list(HTK_HMM_INFO *hmminfo);
/* HMM type check functions */
boolean htk_hmm_has_several_arc_on_edge(HTK_HMM_INFO *hmminfo);
boolean check_hmm_limit(HTK_HMM_Data *dt);
boolean check_all_hmm_limit(HTK_HMM_INFO *hmm);
boolean check_hmm_options(HTK_HMM_INFO *hmm);
boolean is_skippable_model(HTK_HMM_Data *d);
/* CCD related */
boolean guess_if_cd_hmm(HTK_HMM_INFO *hmm);
HMM_Logical *get_right_context_HMM(HMM_Logical *base, char *rc_name, HTK_HMM_INFO *hmminfo);
HMM_Logical *get_left_context_HMM(HMM_Logical *base, char *lc_name, HTK_HMM_INFO *hmminfo);
void add_right_context(char name[], char *rc);
void add_left_context(char name[], char *lc);
char *center_name(char *hmmname, char *buf);
char *leftcenter_name(char *hmmname, char *buf);
char *rightcenter_name(char *hmmname, char *buf);
/* CD_SET related */
boolean regist_cdset(APATNODE **root, HTK_HMM_Data *d, char *cdname, BMALLOC_BASE **mroot);
boolean make_cdset(HTK_HMM_INFO *hmminfo);
void put_all_cdinfo(HTK_HMM_INFO *hmminfo);
void free_cdset(APATNODE **root, BMALLOC_BASE **mroot);
CD_Set *cdset_lookup(HTK_HMM_INFO *hmminfo, char *cdstr);
CD_Set *lcdset_lookup_by_hmmname(HTK_HMM_INFO *hmminfo, char *hmmname);
CD_Set *rcdset_lookup_by_hmmname(HTK_HMM_INFO *hmminfo, char *hmmname);
int hmm_logical_state_num(HMM_Logical *lg);
HTK_HMM_Trans *hmm_logical_trans(HMM_Logical *lg);
#include
boolean check_param_coherence(HTK_HMM_INFO *hmm, HTK_Param *pinfo);
boolean check_param_basetype(HTK_HMM_INFO *hmm, HTK_Param *pinfo);
int param_check_and_adjust(HTK_HMM_INFO *hmm, HTK_Param *pinfo, boolean vflag);
/* binary format */
boolean write_binhmm(FILE *fp, HTK_HMM_INFO *hmm, Value *para);
boolean read_binhmm(FILE *fp, HTK_HMM_INFO *hmm, boolean gzfile_p, Value *para);
#ifdef __cplusplus
}
#endif
#endif /* __SENT_HTK_HMM_2_H__ */