/** * @file ngram_lookup.c * * * @brief N-gram上の語彙エントリの検索 * * * * @brief Look up N-gram entries from its name string * * * @author Akinobu LEE * @date Wed Feb 16 16:42:38 2005 * * $Revision: 1.6 $ * */ /* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ #include #include #include /** * Make index tree for searching N-gram ID from the entry name. * * @param ndata [in] N-gram data */ void ngram_make_lookup_tree(NGRAM_INFO *ndata) { int i; int *windex; char **wnameindex; windex = (int *)mymalloc(sizeof(int)*ndata->max_word_num); for (i=0;imax_word_num;i++) { windex[i] = i; } wnameindex = (char **)mymalloc(sizeof(char *)*ndata->max_word_num); for (i=0;imax_word_num;i++) { wnameindex[i] = ndata->wname[i]; } ndata->root = make_ptree(wnameindex, windex, ndata->max_word_num, 0, &(ndata->mroot)); free(windex); free(wnameindex); } /** * Look up N-gram ID by entry name. * * @param ndata [in] N-gram data * @param wordstr [in] entry name to search * * @return the found class/word ID, or WORD_INVALID if not found. */ WORD_ID ngram_lookup_word(NGRAM_INFO *ndata, char *wordstr) { int data; data = ptree_search_data(wordstr, ndata->root); if (data == -1 || strcmp(wordstr, ndata->wname[data]) != 0) { return WORD_INVALID; } else { return(data); } } /** * Return N-gram ID of entry name, or unknown class ID if not found. * * @param ndata [in] N-gram data * @param wstr [in] entry name to search * * @return the found class/word ID, or unknown ID if not found. */ WORD_ID make_ngram_ref(NGRAM_INFO *ndata, char *wstr) { WORD_ID nw; nw = ngram_lookup_word(ndata, wstr); if (nw == WORD_INVALID) { /* not found */ if (ndata->isopen) { jlog("Warning: ngram_lookup: \"%s\" not exist in N-gram, treat as unknown\n", wstr); return(ndata->unk_id); } else { jlog("Error: ngram_lookup: \"%s\" not exist in N-gram\n", wstr); return WORD_INVALID; } } else { return(nw); } }