/** * @file ngram_compact_context.c * * * @brief N-gram構造体のバックオフデータのコンパクト化 * * * * @brief Compaction of back-off elements in N-gram data. * * * @author Akinobu Lee * @date Sat Aug 11 11:50:58 2007 * * $Revision: 1.9 $ * */ /* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ #include #include /** * * Compaction of back-off elements in N-gram data. * * @param ndata [i/o] N-gram information * @param n [i] N of N-gram * * @return TRUE on success, or FALSE on failure. * */ boolean ngram_compact_context(NGRAM_INFO *ndata, int n) { NNID i; NNID c; NNID dst; NNID ntmp; NGRAM_TUPLE_INFO *this, *up; this = &(ndata->d[n-1]); up = &(ndata->d[n]); /* count number of valid context */ c = 0; for(i=0;ibgnlistlen;i++) { if ((up->is24bit == TRUE && up->bgn_upper[i] != NNID_INVALID_UPPER) || (up->is24bit == FALSE && up->bgn[i] != NNID_INVALID)) { c++; } else { if (up->num[i] != 0) { jlog("Error: ngram_compact_context: internal error\n"); return FALSE; } if (this->bo_wt[i] != 0.0) { jlog("Warning: ngram_compact_context: found a %d-gram that has non-zero back-off weight but not a context of upper N-gram (%f)\n", n, this->bo_wt[i]); jlog("Warning: ngram_compact_context: context compaction disabled\n"); ndata->d[n-1].ct_compaction = FALSE; return TRUE; /* no op */ } } } if (this->totalnum == c) { jlog("Stat: ngram_compact_context: %d-gram has full bo_wt, compaction disabled\n", n); ndata->d[n-1].ct_compaction = FALSE; return TRUE; /* no op */ } if (c >= NNID_MAX_24) { jlog("Stat: ngram_compact_context: %d-gram bo_wt exceeds 24bit, compaction diabled\n", n); ndata->d[n-1].ct_compaction = FALSE; return TRUE; /* no op */ } this->context_num = c; jlog("Stat: ngram_compact_context: %d-gram back-off weight compaction: %d -> %d\n", n, this->totalnum, this->context_num); /* allocate index buffer */ this->nnid2ctid_upper = (NNID_UPPER *)mymalloc(sizeof(NNID_UPPER) * this->totalnum); this->nnid2ctid_lower = (NNID_LOWER *)mymalloc(sizeof(NNID_LOWER) * this->totalnum); /* make index and do compaction of context informations */ dst = 0; for(i=0;ibgnlistlen;i++) { if ((up->is24bit == TRUE && up->bgn_upper[i] != NNID_INVALID_UPPER) || (up->is24bit == FALSE && up->bgn[i] != NNID_INVALID)) { this->bo_wt[dst] = this->bo_wt[i]; if (up->is24bit) { up->bgn_upper[dst] = up->bgn_upper[i]; up->bgn_lower[dst] = up->bgn_lower[i]; } else { up->bgn[dst] = up->bgn[i]; } up->num[dst] = up->num[i]; ntmp = dst & 0xffff; this->nnid2ctid_lower[i] = ntmp; ntmp = dst >> 16; this->nnid2ctid_upper[i] = ntmp; dst++; } else { this->nnid2ctid_upper[i] = NNID_INVALID_UPPER; this->nnid2ctid_lower[i] = 0; } } up->bgnlistlen = this->context_num; /* shrink the memory area */ this->bo_wt = (LOGPROB *)myrealloc(this->bo_wt, sizeof(LOGPROB) * this->context_num); if (up->is24bit) { up->bgn_upper = (NNID_UPPER *)myrealloc(up->bgn_upper, sizeof(NNID_UPPER) * up->bgnlistlen); up->bgn_lower = (NNID_LOWER *)myrealloc(up->bgn_lower, sizeof(NNID_LOWER) * up->bgnlistlen); } else { up->bgn = (NNID *)myrealloc(up->bgn, sizeof(NNID) * up->bgnlistlen); } up->num = (WORD_ID *)myrealloc(up->num, sizeof(WORD_ID) * up->bgnlistlen); /* finished compaction */ ndata->d[n-1].ct_compaction = TRUE; return TRUE; }