/** * @file rddfa.c * * * @brief DFA文法の読み込み * * * * @brief Read DFA grammar from a file * * * @author Akinobu LEE * @date Tue Feb 15 14:54:40 2005 * * $Revision: 1.5 $ * */ /* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ #include #include static char buf[MAXLINELEN]; ///< Local text buffer /** * Initialize and allocate DFA state information list in the grammar. * * @param dinfo [i/o] DFA grammar */ void dfa_state_init(DFA_INFO *dinfo) { int i; dinfo->maxstatenum = DFA_STATESTEP; dinfo->st = (DFA_STATE *)mymalloc(sizeof(DFA_STATE) * dinfo->maxstatenum); for (i=0;imaxstatenum;i++) { dinfo->st[i].number = i; dinfo->st[i].status = 0; dinfo->st[i].arc = NULL; } dinfo->state_num = dinfo->arc_num = dinfo->term_num = 0; dinfo->sp_id = WORD_INVALID; } /** * Expand the state information list to the required length. * * @param dinfo [i/o] DFA grammar * @param needed [in] required new length */ void dfa_state_expand(DFA_INFO *dinfo, int needed) { int oldnum, i; oldnum = dinfo->maxstatenum; dinfo->maxstatenum += DFA_STATESTEP; if (dinfo->maxstatenum < needed) dinfo->maxstatenum = needed; dinfo->st = (DFA_STATE *)myrealloc(dinfo->st, sizeof(DFA_STATE) * dinfo->maxstatenum); for (i=oldnum;imaxstatenum;i++) { dinfo->st[i].number = i; dinfo->st[i].status = 0; dinfo->st[i].arc = NULL; } } /** * Top loop function to read DFA grammar via file pointer (gzip enabled) * * @param fp [in] file pointer that points to the DFA grammar data * @param dinfo [out] the read data will be stored in this DFA grammar structure * * @return TRUE on success, FALSE on failure. */ boolean rddfa(FILE *fp, DFA_INFO *dinfo) { int state_max, arc_num, terminal_max; /* initialize */ dfa_state_init(dinfo); state_max = 0; arc_num = 0; terminal_max = 0; while (getl(buf, MAXLINELEN, fp) != NULL) { if (rddfa_line(buf, dinfo, &state_max, &arc_num, &terminal_max) == FALSE) { break; } } dinfo->state_num = state_max + 1; dinfo->arc_num = arc_num; dinfo->term_num = terminal_max + 1; return(TRUE); } /** * Top loop function to read DFA grammar via file descriptor * * @param fp [in] file pointer that points to the DFA grammar data * @param dinfo [out] the read data will be stored in this DFA grammar structure * * @return TRUE on success, FALSE on failure. */ boolean rddfa_fp(FILE *fp, DFA_INFO *dinfo) { int state_max, arc_num, terminal_max; /* initialize */ dfa_state_init(dinfo); state_max = 0; arc_num = 0; terminal_max = 0; while(getl_fp(buf, MAXLINELEN, fp) != NULL) { if (rddfa_line(buf, dinfo, &state_max, &arc_num, &terminal_max) == FALSE) { break; } } dinfo->state_num = state_max + 1; dinfo->arc_num = arc_num; dinfo->term_num = terminal_max + 1; return(TRUE); } /** * Parse the input line and set grammar information, one by line. * * @param line [in] text buffer that holds a line of DFA file * @param dinfo [i/o] the read data will be appended to this DFA data * @param state_max [i/o] maximum number of state id appeared, will be updated * @param arc_num [i/o] number of read arcs, will be updated * @param terminal_max [i/o] maximum number of state id appended, will be updated * * @return TRUE if the line was successfully parsed, FALSE if failed. */ boolean rddfa_line(char *line, DFA_INFO *dinfo, int *state_max, int *arc_num, int *terminal_max) { DFA_ARC *newarc; int state, terminal, next_state; unsigned int status; char *p; if (strmatch(buf, "DFAEND")) return(FALSE); /* format: state terminalID nextstate statuscode_of_state */ if ((p = strtok(line, DELM)) == NULL) { jlog("Error: rddfa: failed to parse, corrupted or invalid data?\n"); return FALSE; } state = atoi(p); if ((p = strtok(NULL, DELM)) == NULL) { jlog("Error: rddfa: failed to parse, corrupted or invalid data?\n"); return FALSE; } terminal = atoi(p); if ((p = strtok(NULL, DELM)) == NULL) { jlog("Error: rddfa: failed to parse, corrupted or invalid data?\n"); return FALSE; } next_state = atoi(p); if ((p = strtok(NULL, DELM)) == NULL) { jlog("Error: rddfa: failed to parse, corrupted or invalid data?\n"); return FALSE; } sscanf(p, "%x", &status); if (state >= dinfo->maxstatenum) { /* expand */ dfa_state_expand(dinfo, state+1); } if (next_state >= dinfo->maxstatenum) { /* expand */ dfa_state_expand(dinfo, next_state+1); } /* set state status (accept / initial) */ if (status & ACCEPT_S) { dinfo->st[state].status |= ACCEPT_S; } /* the state #0 is an initial state */ if (state == 0) { dinfo->st[state].status |= INITIAL_S; } /* skip line with negative terminalID/nextstate */ if (terminal > 0 || next_state > 0) { /* add new arc to the state */ newarc = (DFA_ARC *)mymalloc(sizeof(DFA_ARC)); newarc->label = terminal; newarc->to_state = next_state; newarc->next = dinfo->st[state].arc; dinfo->st[state].arc = newarc; (*arc_num)++; } if (*state_max < state) *state_max = state; if (*terminal_max < terminal) *terminal_max = terminal; return(TRUE); } /* append dfa info to other */ /* soffset: state offset coffset: category(terminal) offset */ /** * Append the DFA state information to other * * @param dst [i/o] DFA grammar * @param src [i/o] DFA grammar to be appended to @a dst * @param soffset [in] offset state number in @a dst where the new state should be stored * @param coffset [in] category id offset in @a dst where the new data should be stored */ void dfa_append(DFA_INFO *dst, DFA_INFO *src, int soffset, int coffset) { DFA_ARC *arc, *newarc; int s, state, terminal, next_state; unsigned int status; for (s = 0; s < src->state_num; s++) { state = s + soffset; status = src->st[s].status; if (state >= dst->maxstatenum) { /* expand */ dfa_state_expand(dst, state+1); } /* set state status (accept / initial) */ if (status & ACCEPT_S) { dst->st[state].status |= ACCEPT_S; } /* the state #0 is an initial state */ if (s == 0) { dst->st[state].status |= INITIAL_S; } for (arc = src->st[s].arc; arc; arc = arc->next) { terminal = arc->label + coffset; next_state = arc->to_state + soffset; if (next_state >= dst->maxstatenum) { /* expand */ dfa_state_expand(dst, next_state+1); } /* add new arc to the state */ newarc = (DFA_ARC *)mymalloc(sizeof(DFA_ARC)); newarc->label = terminal; newarc->to_state = next_state; newarc->next = dst->st[state].arc; dst->st[state].arc = newarc; dst->arc_num++; if (dst->term_num < terminal + 1) dst->term_num = terminal + 1; } if (dst->state_num < state + 1) dst->state_num = state + 1; } }