/** * @file dfa.h * * * @brief Definitions for DFA grammar and category-pair information * * This file includes definitions for a finite state grammar called DFA. * * DFA is a deterministic finite state automaton describing grammartical * constraint, using the category number of each dictionary word as an input. * It also holds lists of words belonging for each categories. * * Additionaly, the category-pair information will be generated from the given * DFA by extracting allowed connections between categories. It will be used * as a degenerated constraint of word connection at the 1st pass. * * * @brief 決定性有限状態オートマトン文法(DFA)およびカテゴリ対情報の構造体定義 * * このファイルには, DFAと呼ばれる有限状態文法の構造体が定義されています. * * DFAは, 単語のカテゴリ番号を入力とする決定性オートマトンで,構文制約を * 表現します.カテゴリごとの単語リストも保持します. * * また,第1パスの認識のために,DFAカテゴリ間の接続関係のみを抜き出した * 単語対情報も保持します.これは文法を読みだし後に内部でDFAから抽出されます. * * * @author Akinobu LEE * @date Thu Feb 10 18:21:27 2005 * * $Revision: 1.7 $ * */ /* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ #ifndef __SENT_DFA_H__ #define __SENT_DFA_H__ #include #define DFA_STATESTEP 1000 ///< Allocation step of DFA state #define DFA_CP_MINSTEP 20 ///< Minimum initial CP data size per category #define INITIAL_S 0x10000000 ///< Status flag mask specifying an initial state #define ACCEPT_S 0x00000001 ///< Status flag mask specifying an accept state /// Transition arc of DFA typedef struct _dfa_arc { short label; ///< Input(=category ID) corresponding to this arc int to_state; ///< Next state to move struct _dfa_arc *next; ///< Pointer to the next arc in the same state, NULL if last } DFA_ARC; /// State of DFA typedef struct { int number; ///< Unique ID unsigned int status; ///< Status flag DFA_ARC *arc; ///< Pointer to its arc list, NULL if has no arc } DFA_STATE; /// Information of each terminal symbol (=category) typedef struct { int term_num; ///< Total number of category WORD_ID **tw; ///< Word lists in each category as @c [c][0..wnum[c]-1] int *wnum; ///< Number of words in each category } TERM_INFO; /// Top structure of a DFA typedef struct { DFA_STATE *st; ///< Array of all states int maxstatenum; ///< Number of maximum allocated states int state_num; ///< Total number of states actually defined int arc_num; ///< Total number of arcs int term_num; ///< Total number of categories int **cp; ///< Store constraint whether @c c2 can follow @c c1 int *cplen; ///< Lengthes of each bcp int *cpalloclen; ///< Allocated lengthes of each cp int *cp_begin; ///< Store constraint whether @c c can appear at beginning of sentence int cp_begin_len; ///< Length of cp_begin int cp_begin_alloclen; ///< Allocated length of cp_begin int *cp_end; ///< Store constraint whether @c c can appear at end of sentence int cp_end_len; ///< Length of cp_end int cp_end_alloclen; ///< Allocated length of cp_end TERM_INFO term; ///< Information of terminal symbols (category) boolean *is_sp; ///< TRUE if the category contains only \a sp word WORD_ID sp_id; ///< Word ID of short pause word } DFA_INFO; #ifdef __cplusplus extern "C" { #endif DFA_INFO *dfa_info_new(); void dfa_info_free(DFA_INFO *dfa); void dfa_state_init(DFA_INFO *dinfo); void dfa_state_expand(DFA_INFO *dinfo, int needed); boolean rddfa(FILE *fp, DFA_INFO *dinfo); boolean rddfa_fp(FILE *fp, DFA_INFO *dinfo); boolean rddfa_line(char *line, DFA_INFO *dinfo, int *state_max, int *arc_num, int *terminal_max); void dfa_append(DFA_INFO *dst, DFA_INFO *src, int soffset, int coffset); boolean init_dfa(DFA_INFO *dinfo, char *filename); WORD_ID dfa_symbol_lookup(DFA_INFO *dinfo, char *terminalname); boolean extract_cpair(DFA_INFO *dinfo); boolean cpair_append(DFA_INFO *dst, DFA_INFO *src, int coffset); void print_dfa_info(FILE *fp, DFA_INFO *dinfo); void print_dfa_cp(FILE *fp, DFA_INFO *dinfo); boolean dfa_cp(DFA_INFO *dfa, int i, int j); boolean dfa_cp_begin(DFA_INFO *dfa, int i); boolean dfa_cp_end(DFA_INFO *dfa, int i); void set_dfa_cp(DFA_INFO *dfa, int i, int j, boolean value); void set_dfa_cp_begin(DFA_INFO *dfa, int i, boolean value); void set_dfa_cp_end(DFA_INFO *dfa, int i, boolean value); void init_dfa_cp(DFA_INFO *dfa); void malloc_dfa_cp(DFA_INFO *dfa, int term_num, int size); void realloc_dfa_cp(DFA_INFO *dfa, int old_term_num, int new_term_num); void free_dfa_cp(DFA_INFO *dfa); void dfa_cp_output_rawdata(FILE *fp, DFA_INFO *dfa); void dfa_cp_count_size(DFA_INFO *dfa, unsigned long *size_ret, unsigned long *allocsize_ret); boolean dfa_cp_append(DFA_INFO *dfa, DFA_INFO *src, int offset); #include boolean make_dfa_voca_ref(DFA_INFO *dinfo, WORD_INFO *winfo); void make_terminfo(TERM_INFO *tinfo, DFA_INFO *dinfo, WORD_INFO *winfo); void free_terminfo(TERM_INFO *tinfo); void terminfo_append(TERM_INFO *dst, TERM_INFO *src, int coffset, int woffset); #include void dfa_find_pause_word(DFA_INFO *dfa, WORD_INFO *winfo, HTK_HMM_INFO *hmminfo); boolean dfa_pause_word_append(DFA_INFO *dst, DFA_INFO *src, int coffset); #ifdef __cplusplus } #endif #endif /* __SENT_DFA_H__ */