/** * @file search.h * * * @brief 第2パスで使用する仮説候補を扱う構造体 * * ここでは,第2パスのスタックデコーディングで用いられる仮説候補の構造体 * が定義されています. NODE は部分文候補を保持し,合計スコアや現在のViterbi * スコア,言語スコア,信頼度スコア,推定された終端フレームなどの様々な仮説 * 情報を保持します. WordGraph は単語グラフ生成時にグラフ中の単語をあらわす * のに用いられます. NEXTWORD は単語展開時に次単語候補を表現します. POPNODE * は探索空間可視化機能 (--enable-visualize) 指定時に,探索の過程を残しておく * のに使われます. * * * * @brief Strucures for handling hypotheses on the 2nd pass. * * * This file includes definitions for handling hypothesis used on the 2nd * pass stack decoding. Partial sentence hypotheses are stored in NODE * structure, with its various information about total scores, viterbi scores, * language scores, confidence scores, estimated end frame, and so on. * WordGraph express a word in graph, generated through the 2nd pass. * NEXTWORD is used to hold next word information at * hypothesis expantion stage. POPNODE will be used when Visualization is * enabled to store the search trail. * * @author Akinobu Lee * @date Wed Sep 07 07:40:11 2005 * * $Revision: 1.4 $ * */ /* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ #ifndef __J_SEARCH_H__ #define __J_SEARCH_H__ /** * * 第2パスの次単語候補. ある仮説から次に接続しうる単語の集合をあらわすのに * 用いられる. * * * Next word candidate in the 2nd pass. This will be used to hold word * candidates that can be connected to a given hypothesis. * */ typedef struct __nextword__ { WORD_ID id; ///< Word ID LOGPROB lscore; ///< Language score of this word (always 0 for dfa) int next_state; ///< (dfa) Next DFA grammar state ID boolean can_insert_sp; ///< (dfa) TRUE if a short pause can insert between source hypothesis and this word TRELLIS_ATOM *tre; ///< Pointer to the corresponding word in trellis } NEXTWORD; #ifdef VISUALIZE /** * * 可視化機能用に,第2パスでpopされたトレリス単語の情報を保持する. * * * Store popped trellis words on the 2nd pass for visualization. * */ typedef struct __popnode__ { TRELLIS_ATOM *tre; ///< Last referred trellis word LOGPROB score; ///< Total score when expanded (g(x)+h(x)) struct __popnode__ *last; ///< Link to previous word context struct __popnode__ *next; ///< List pointer to next data } POPNODE; #endif /* VISUALIZE */ /** * * 第2パスの文仮説 * * * Sentence hypothesis at 2nd pass * */ typedef struct __node__ { struct __node__ *next; ///< Link to next hypothesis, used in stack struct __node__ *prev; ///< Link to previous hypothesis, used in stack boolean endflag; ///< TRUE if this is a final sentence result WORD_ID seq[MAXSEQNUM]; ///< Word sequence short seqnum; ///< Length of @a seq LOGPROB score; ///< Total score (forward+backward, LM+AM) short bestt; ///< Best connection frame of last word in word trellis short estimated_next_t; ///< Estimated next connection time frame (= beginning of last word on word trellis): next word hypothesis will be looked up near this frame on word trellis LOGPROB *g; ///< Current forward viterbi score in each frame LOGPROB final_g; ///< Extra forward score on end of frame for multipath mode int state; ///< (dfa) Current DFA state ID TRELLIS_ATOM *tre; ///< Trellis word of last word #ifndef PASS2_STRICT_IWCD /* for inter-word context dependency, the last phone on previous word need to be calculated later */ LOGPROB *g_prev; ///< Viterbi score back to last 1 phoneme #endif HMM_Logical *last_ph; ///< Last applied triphone boolean last_ph_sp_attached; ///< Last phone which the inter-word sp has been attached for multipath mode LOGPROB lscore; ///< N-gram score of last word (will be used for 1-phoneme backscan and graph output, always 0 for dfa LOGPROB totallscore; ///< (n-gram) Accumulated language score (LM only) #ifdef CONFIDENCE_MEASURE #ifdef CM_MULTIPLE_ALPHA LOGPROB cmscore[MAXSEQNUM][100]; ///< Confidence score of each word (multiple) #else LOGPROB cmscore[MAXSEQNUM]; ///< Confidence score of each word #endif /* CM_MULTIPLE_ALPHA */ #endif /* CONFIDENCE_MEASURE */ #ifdef VISUALIZE POPNODE *popnode; ///< Pointer to last popped node #endif #ifdef GRAPHOUT_PRECISE_BOUNDARY short *wordend_frame; ///< Buffer to store propagated word end score for word boundary adjustment LOGPROB *wordend_gscore; ///< Buffer to store propagated scores at word end for word boundary adjustment #endif WordGraph *prevgraph; ///< Graph word corresponding to the last word WordGraph *lastcontext; ///< Graph word of next previous word #ifndef GRAPHOUT_PRECISE_BOUNDARY LOGPROB tail_g_score; ///< forward g score for later score adjustment #endif struct __recogprocess__ *region; ///> Where this node belongs to } NODE; /* HOW SCORES ARE CALCULATED: 0 bestt T-1 |-h(n)---->|<------------g(n)--------------| ============================================================== |\ | ..... ..... | \estimated_next_t | =backward trellis --------------------\------------------------------------| (1st pass) | \ | seq[seqnum-1] | \_ | | \bestt | =========================+==================================================== | \ |<-g[0..T-1] | \ | seq[seqnum-2] | \__ | | \ | --------------------------------\------------------------| (last_ph)| \__ | |_ _ _ _ _ _ _ _ _ _ _\ _ _ _ _ _ _ _ _ _ _| seq[seqnum-3] | \______ |<--g_prev[0..T-1] | \___ | | \ | -------------------------------------------------\-------| ...... ...... (2nd pass) | \_| =============================================================== */ #endif /* __J_SEARCH_H__ */