/**
* @file cpair.c
*
*
* @brief カテゴリ対制約へのアクセス関数およびメモリ管理
*
* カテゴリ対制約のメモリ確保,およびカテゴリ間の接続の可否を返す関数です.
*
*
*
* @brief Category-pair constraint handling
*
* Functions to allocate memory for category-pair constraint, and functions
* to return whether the given category pairs can be connected or not are
* defined here.
*
*
* @author Akinobu LEE
* @date Tue Feb 15 13:54:44 2005
*
* $Revision: 1.6 $
*
*/
/*
* Copyright (c) 1991-2012 Kawahara Lab., Kyoto University
* Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
* Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology
* All rights reserved
*/
#include
#include
/**
* Search for a terminal ID in a cp list. Set its location to loc.
* When not found, the location where to insert the key data iwill be set.
*
* @param list [in] cp list
* @param len [in] length of list
* @param key [in] a terminal ID value to find
* @param loc [out] return the to-be-inserted location of the key
*
* @return its location when found, or -1 when not found.
*
*/
static int
cp_find(int *list, int len, int key, int *loc)
{
int left, right, mid;
int ret;
if (len == 0) {
*loc = 0;
return -1;
}
left = 0;
right = len - 1;
while (left < right) {
mid = (left + right) / 2;
if (list[mid] < key) {
left = mid + 1;
} else {
right = mid;
}
}
if (list[left] == key) {
*loc = left;
ret = left;
} else {
ret = -1;
if (list[left] > key) {
*loc = left;
} else {
*loc = left + 1;
}
}
return ret;
}
/**
* Return whether the given two category can be connected or not.
*
* @param dfa [in] DFA grammar holding category pair matrix
* @param i [in] category id of left word
* @param j [in] category id of right word
*
* @return TRUE if connection is allowed by the grammar, FALSE if prohibited.
*/
boolean
dfa_cp(DFA_INFO *dfa, int i, int j)
{
int loc;
/*return(dfa->cp[i][j]);*/
//return((dfa->cp[i][j>>3] & cp_table[j&7]) ? TRUE : FALSE);
return(cp_find(dfa->cp[i], dfa->cplen[i], j, &loc) != -1 ? TRUE : FALSE);
}
/**
* Return whether the category can be appear at the beginning of sentence.
*
* @param dfa [in] DFA grammar holding category pair matrix
* @param i [in] category id of the word
*
* @return TRUE if it can appear at the beginning of sentence, FALSE if not.
*/
boolean
dfa_cp_begin(DFA_INFO *dfa, int i)
{
int loc;
/*return(dfa->cp_begin[i]);*/
//return((dfa->cp_begin[i>>3] & cp_table[i&7]) ? TRUE : FALSE);
return(cp_find(dfa->cp_begin, dfa->cp_begin_len, i, &loc) != -1 ? TRUE : FALSE);
}
/**
* Return whether the category can be appear at the end of sentence.
*
* @param dfa [in] DFA grammar holding category pair matrix
* @param i [in] category id of the word
*
* @return TRUE if it can appear at the end of sentence, FALSE if not.
*/
boolean
dfa_cp_end(DFA_INFO *dfa, int i)
{
int loc;
/*return(dfa->cp_end[i]);*/
//return((dfa->cp_end[i>>3] & cp_table[i&7]) ? TRUE : FALSE);
return(cp_find(dfa->cp_end, dfa->cp_end_len, i, &loc) != -1 ? TRUE : FALSE);
}
/**
* Add an terminal ID to a specified location in the cp list.
*
* @param list [i/o] cp list
* @param len [i/o] data num in the list
* @param alloclen [i/o] allocated length of the list
* @param val [in] value to be added
* @param loc [in] location where to add the val
*
* @return TRUE on success, FALSE on failure.
*
*/
static boolean
cp_add(int **list, int *len, int *alloclen, int val, int loc)
{
if (loc > *len) {
jlog("InternalError: skip?\n");
return FALSE;
}
if (*len + 1 > *alloclen) {
/* expand cplist if necessary */
*alloclen *= 2;
*list = (int *)myrealloc(*list, sizeof(int) * (*alloclen));
}
if (loc < *len) {
memmove(&((*list)[loc+1]), &((*list)[loc]), sizeof(int) * (*len - loc));
}
(*list)[loc] = val;
(*len)++;
return TRUE;
}
/**
* Remove an element from the cp list.
*
* @param list [i/o] cp list
* @param len [i/o] data num in the list
* @param loc [in] location of removing value
*
* @return TRUE on success, FALSE on error.
*
*/static boolean
cp_remove(int **list, int *len, int loc)
{
if (*len == 0) return TRUE;
if (loc >= *len) {
jlog("InternalError: skip?\n");
return FALSE;
}
if (loc < *len - 1) {
memmove(&((*list)[loc]), &((*list)[loc+1]), sizeof(int) * (*len - loc - 1));
}
(*len) --;
return TRUE;
}
/**
* Set a category-pair matrix bit.
*
* @param dfa [out] DFA grammar holding category pair matrix
* @param i [in] category id of left word
* @param j [in] category id of right word
* @param value TRUE if connection allowed, FALSE if connection prohibited.
*/
void
set_dfa_cp(DFA_INFO *dfa, int i, int j, boolean value)
{
int loc;
if (value) {
/* add j to cp list of i */
if (cp_find(dfa->cp[i], dfa->cplen[i], j, &loc) == -1) { /* not exist */
cp_add(&(dfa->cp[i]), &(dfa->cplen[i]), &(dfa->cpalloclen[i]), j, loc);
}
} else {
/* remove j from cp list of i */
if (cp_find(dfa->cp[i], dfa->cplen[i], j, &loc) != -1) { /* already exist */
cp_remove(&(dfa->cp[i]), &(dfa->cplen[i]), loc);
}
}
}
/**
* Set a category-pair matrix bit for the beginning of sentence
*
* @param dfa [out] DFA grammar holding category pair matrix
* @param i [in] category id of the word
* @param value TRUE if the category can appear at the beginning of
* sentence, FALSE if not.
*/
void
set_dfa_cp_begin(DFA_INFO *dfa, int i, boolean value)
{
int loc;
if (value) {
/* add j to cp list of i */
if (cp_find(dfa->cp_begin, dfa->cp_begin_len, i, &loc) == -1) { /* not exist */
cp_add(&(dfa->cp_begin), &(dfa->cp_begin_len), &(dfa->cp_begin_alloclen), i, loc);
}
} else {
/* remove j from cp list of i */
if (cp_find(dfa->cp_begin, dfa->cp_begin_len, i, &loc) != -1) { /* already exist */
cp_remove(&(dfa->cp_begin), &(dfa->cp_begin_len), loc);
}
}
}
/**
* Set a category-pair matrix bit for the end of sentence
*
* @param dfa [out] DFA grammar holding category pair matrix
* @param i [in] category id of the word
* @param value TRUE if the category can appear at the end of
* sentence, FALSE if not.
*/
void
set_dfa_cp_end(DFA_INFO *dfa, int i, boolean value)
{
int loc;
if (value) {
/* add j to cp list of i */
if (cp_find(dfa->cp_end, dfa->cp_end_len, i, &loc) == -1) { /* not exist */
cp_add(&(dfa->cp_end), &(dfa->cp_end_len), &(dfa->cp_end_alloclen), i, loc);
}
} else {
/* remove j from cp list of i */
if (cp_find(dfa->cp_end, dfa->cp_end_len, i, &loc) != -1) { /* already exist */
cp_remove(&(dfa->cp_end), &(dfa->cp_end_len), loc);
}
}
}
/**
* Initialize category pair matrix in the grammar data.
*
* @param dfa [out] DFA grammar to hold category pair matrix
*/
void
init_dfa_cp(DFA_INFO *dfa)
{
dfa->cp = NULL;
}
/**
* Allocate memory for category pair matrix and initialize it.
*
* @param dfa [out] DFA grammar to hold category pair matrix
* @param term_num [in] number of categories in the grammar
* @param size [in] memory allocation length for each cp list as initial
*/
void
malloc_dfa_cp(DFA_INFO *dfa, int term_num, int size)
{
int i;
dfa->cp = (int **)mymalloc(sizeof(int *) * term_num);
dfa->cplen = (int *)mymalloc(sizeof(int) * term_num);
dfa->cpalloclen = (int *)mymalloc(sizeof(int) * term_num);
for(i=0;icp[i] = (int *)mymalloc(sizeof(int) * size);
dfa->cpalloclen[i] = size;
dfa->cplen[i] = 0;
}
dfa->cp_begin = (int *)mymalloc(sizeof(int) * size);
dfa->cp_begin_alloclen = size;
dfa->cp_begin_len = 0;
dfa->cp_end = (int *)mymalloc(sizeof(int) * size);
dfa->cp_end_alloclen = size;
dfa->cp_end_len = 0;
}
/**
* Append a categori-pair matrix to another.
* This function assumes that other grammar information has been already
* appended and dfa->term_num contains the new size.
*
* @param dfa [i/o] DFA grammar to which the new category pair will be appended
* @param src [in] source DFA
* @param offset [in] appending point at dfa
*
* @return TRUE on success, FALSE on error.
*/
boolean
dfa_cp_append(DFA_INFO *dfa, DFA_INFO *src, int offset)
{
int i, j, size;
if (dfa->cp == NULL) {
/* no category pair information exist on target */
if (dfa->term_num != src->term_num) {
jlog("InternalError: dfa_cp_append\n");
return FALSE;
}
/* just malloc and copy */
dfa->cp = (int **)mymalloc(sizeof(int *) * dfa->term_num);
dfa->cplen = (int *)mymalloc(sizeof(int) * dfa->term_num);
dfa->cpalloclen = (int *)mymalloc(sizeof(int) * dfa->term_num);
for(i=0;iterm_num;i++) {
size = src->cplen[i];
if (size < DFA_CP_MINSTEP) size = DFA_CP_MINSTEP;
dfa->cp[i] = (int *)mymalloc(sizeof(int) * size);
dfa->cpalloclen[i] = size;
memcpy(dfa->cp[i], src->cp[i], sizeof(int) * src->cplen[i]);
dfa->cplen[i] = src->cplen[i];
}
size = src->cp_begin_len;
if (size < DFA_CP_MINSTEP) size = DFA_CP_MINSTEP;
dfa->cp_begin = (int *)mymalloc(sizeof(int) * size);
dfa->cp_begin_alloclen = size;
memcpy(dfa->cp_begin, src->cp_begin, sizeof(int) * src->cp_begin_len);
dfa->cp_begin_len = src->cp_begin_len;
size = src->cp_end_len;
if (size < DFA_CP_MINSTEP) size = DFA_CP_MINSTEP;
dfa->cp_end = (int *)mymalloc(sizeof(int) * size);
dfa->cp_end_alloclen = size;
memcpy(dfa->cp_end, src->cp_end, sizeof(int) * src->cp_end_len);
dfa->cp_end_len = src->cp_end_len;
return TRUE;
}
/* expand index */
dfa->cp = (int **)myrealloc(dfa->cp, sizeof(int *) * dfa->term_num);
dfa->cplen = (int *)myrealloc(dfa->cplen, sizeof(int) * dfa->term_num);
dfa->cpalloclen = (int *)myrealloc(dfa->cpalloclen, sizeof(int) * dfa->term_num);
/* copy src->cp[i][j] to target->cp[i+offset][j+offset] */
for(i=offset;iterm_num;i++) {
size = src->cplen[i-offset];
if (size < DFA_CP_MINSTEP) size = DFA_CP_MINSTEP;
dfa->cp[i] = (int *)mymalloc(sizeof(int) * size);
dfa->cpalloclen[i] = size;
for(j=0;jcplen[i-offset];j++) {
dfa->cp[i][j] = src->cp[i-offset][j] + offset;
}
dfa->cplen[i] = src->cplen[i-offset];
}
if (dfa->cp_begin_alloclen < dfa->cp_begin_len + src->cp_begin_len) {
dfa->cp_begin_alloclen = dfa->cp_begin_len + src->cp_begin_len;
dfa->cp_begin = (int *)myrealloc(dfa->cp_begin, sizeof(int) * dfa->cp_begin_alloclen);
}
for(j=0;jcp_begin_len;j++) {
dfa->cp_begin[dfa->cp_begin_len + j] = src->cp_begin[j] + offset;
}
dfa->cp_begin_len += src->cp_begin_len;
if (dfa->cp_end_alloclen < dfa->cp_end_len + src->cp_end_len) {
dfa->cp_end_alloclen = dfa->cp_end_len + src->cp_end_len;
dfa->cp_end = (int *)myrealloc(dfa->cp_end, sizeof(int) * dfa->cp_end_alloclen);
}
for(j=0;jcp_end_len;j++) {
dfa->cp_end[dfa->cp_end_len + j] = src->cp_end[j] + offset;
}
dfa->cp_end_len += src->cp_end_len;
return TRUE;
}
/**
* Free the category pair matrix from DFA grammar.
*
* @param dfa [i/o] DFA grammar holding category pair matrix
*/
void
free_dfa_cp(DFA_INFO *dfa)
{
int i;
if (dfa->cp != NULL) {
free(dfa->cp_end);
free(dfa->cp_begin);
for(i=0;iterm_num;i++) free(dfa->cp[i]);
free(dfa->cpalloclen);
free(dfa->cplen);
free(dfa->cp);
dfa->cp = NULL;
}
}
void
dfa_cp_output_rawdata(FILE *fp, DFA_INFO *dfa)
{
int i, j;
for(i=0;iterm_num;i++) {
fprintf(fp, "%d:", i);
for(j=0;jcplen[i];j++) {
fprintf(fp, " %d", dfa->cp[i][j]);
}
fprintf(fp, "\n");
}
fprintf(fp, "bgn:");
for(j=0;jcp_begin_len;j++) {
fprintf(fp, " %d", dfa->cp_begin[j]);
}
fprintf(fp, "\n");
fprintf(fp, "end:");
for(j=0;jcp_end_len;j++) {
fprintf(fp, " %d", dfa->cp_end[j]);
}
fprintf(fp, "\n");
}
void
dfa_cp_count_size(DFA_INFO *dfa, unsigned long *size_ret, unsigned long *allocsize_ret)
{
int i;
unsigned long size, allocsize;
size = 0;
allocsize = 0;
for(i=0;iterm_num;i++) {
size += sizeof(int) * dfa->cplen[i];
allocsize += sizeof(int) * dfa->cpalloclen[i];
}
size += sizeof(int) * dfa->cp_begin_len;
allocsize += sizeof(int) * dfa->cp_begin_alloclen;
size += sizeof(int) * dfa->cp_end_len;
allocsize += sizeof(int) * dfa->cp_end_alloclen;
allocsize += (sizeof(int *) + sizeof(int) + sizeof(int)) * dfa->term_num;
*size_ret = size;
*allocsize_ret = allocsize;
}