/**
* @file ngram_util.c
*
*
* @brief N-gramの情報をテキスト出力する
*
*
*
* @brief Output some N-gram information to stdout
*
*
* @author Akinobu LEE
* @date Wed Feb 16 17:18:55 2005
*
* $Revision: 1.9 $
*
*/
/*
* Copyright (c) 1991-2012 Kawahara Lab., Kyoto University
* Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
* Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology
* All rights reserved
*/
#include
#include
/**
* Get the work area size of an N-gram tuple.
*
* @param t [in] N-gram tuple structure
*
* @return the size in bytes
*
*/
static unsigned int
get_ngram_tuple_bytes(NGRAM_TUPLE_INFO *t)
{
unsigned int size, unit;
size = 0;
if (t->num != NULL) { /* other than 1-gram */
/* bgn */
if (t->is24bit) {
unit = sizeof(NNID_UPPER) + sizeof(NNID_LOWER);
} else {
unit = sizeof(NNID);
}
/* num */
unit += sizeof(WORD_ID);
size += unit * t->bgnlistlen;
}
/* prob */
unit = sizeof(LOGPROB);
/* nnid2wid */
if (t->nnid2wid) unit += sizeof(WORD_ID);
size += unit * t->totalnum;
if (t->bo_wt) {
if (t->ct_compaction) {
/* nnid2ctid */
unit = sizeof(NNID_UPPER) + sizeof(NNID_LOWER);
size += unit * t->totalnum;
}
/* bo_wt */
size += sizeof(LOGPROB) * t->context_num;
}
return size;
}
/**
* Output misccelaneous information of N-gram to standard output.
*
* @param fp [in] file pointer
* @param ndata [in] N-gram data
*/
void
print_ngram_info(FILE *fp, NGRAM_INFO *ndata)
{
int i;
fprintf(fp, " N-gram info:\n");
//fprintf(fp, "\t struct version = %d\n", ndata->version);
fprintf(fp, "\t spec = %d-gram", ndata->n);
if (ndata->dir == DIR_RL) {
fprintf(fp, ", backward (right-to-left)\n");
} else {
fprintf(fp, ", forward (left-to-right)\n");
}
if (ndata->isopen) {
fprintf(fp, "\t OOV word = %s(id=%d)\n", ndata->wname[ndata->unk_id],ndata->unk_id);
if (ndata->unk_num != 0) {
fprintf(fp, "\t OOV size = %d words in dict\n", ndata->unk_num);
}
} else {
fprintf(fp, "\t OOV word = none (assume close vocabulary)\n");
}
fprintf(fp, "\t wordset size = %d\n", ndata->max_word_num);
for(i=0;in;i++) {
fprintf(fp, "\t %d-gram entries = %10lu (%5.1f MB)", i+1, (long unsigned int)ndata->d[i].totalnum, get_ngram_tuple_bytes(&(ndata->d[i])) / 1048576.0);
if (ndata->d[i].bo_wt != NULL && ndata->d[i].totalnum != ndata->d[i].context_num) {
fprintf(fp, " (%d%% are valid contexts)", ndata->d[i].context_num * 100 / ndata->d[i].totalnum);
}
fprintf(fp, "\n");
}
if (ndata->bo_wt_1) {
fprintf(fp, "\tLR 2-gram entries= %10lu (%5.1f MB)\n", (long unsigned int)ndata->d[1].totalnum,
(sizeof(LOGPROB) * ndata->d[1].totalnum + sizeof(LOGPROB) * ndata->d[0].context_num) / 1048576.0);
}
fprintf(fp, "\t pass1 = ");
if (ndata->dir == DIR_RL) {
if (ndata->bo_wt_1) {
fprintf(fp, "given additional forward 2-gram\n");
} else {
fprintf(fp, "estimate 2-gram from the backward 2-gram\n");
}
} else {
fprintf(fp, "2-gram in the forward n-gram\n");
}
}