/**
* @file voca_lookup.c
*
*
* @brief 単語辞書上の単語の検索
*
* 単語を,「言語エントリ名」あるいは「言語エントリ名[出力文字列]」
* ,あるいは「#単語番号」から検索します.
*
*
*
* @brief Look up a word on dictionary by string
*
* String can be "langentry" or "langentry[outputstring]", or
* "#number".
*
*
* @author Akinobu LEE
* @date Fri Feb 18 21:24:01 2005
*
* $Revision: 1.4 $
*
*/
/*
* Copyright (c) 1991-2012 Kawahara Lab., Kyoto University
* Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
* Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology
* All rights reserved
*/
#include
#include
/**
* Look up a word on dictionary by string.
*
* @param keyword [in] keyword to search
* @param winfo [in] word dictionary
*
* @return the word id if found, or WORD_INVALID if not found.
*/
WORD_ID
voca_lookup_wid(char *keyword, WORD_INFO *winfo)
{
WORD_ID i, found;
int plen,totallen;
boolean numflag = TRUE;
int wid;
char *c;
if (keyword == NULL) return WORD_INVALID;
if (keyword[0] == '#') {
for(i=1;i '9') {
numflag = FALSE;
break;
}
}
if (numflag) {
wid = atoi(&(keyword[1]));
if (wid < 0 || wid >= winfo->num) {
return(WORD_INVALID);
} else {
return(wid);
}
} else {
return(WORD_INVALID);
}
}
found = WORD_INVALID;
totallen = strlen(keyword);
if ((c = strchr(keyword, '[')) != NULL) {
plen = c - keyword;
for (i=0;inum;i++) {
if (strnmatch(keyword,winfo->wname[i], plen)
&& strnmatch(c+1, winfo->woutput[i], totallen-plen-2)) {
if (found == WORD_INVALID) {
found = i;
} else {
jlog("Warning: voca_lookup: several \"%s\" found in dictionary, use the first one..\n");
break;
}
}
}
} else {
for (i=0;inum;i++) {
if (strmatch(keyword,winfo->wname[i])) {
if (found == WORD_INVALID) {
found = i;
} else {
jlog("Warning: voca_lookup: several \"%s\" found in dictionary, use the first one..\n");
break;
}
}
}
}
return found;
}
/* convert space-separated words string -> array of wid */
/* return malloced array */
#define WSSTEP 10 ///< Allocation step
/**
* Convert string of space-separated word strings to array of word ids.
*
* @param winfo [in] word dictionary
* @param s [in] string of space-separated word strings
* @param len_return [out] number of found words
*
* @return pointer to a newly allocated word list.
*/
WORD_ID *
new_str2wordseq(WORD_INFO *winfo, char *s, int *len_return)
{
char *p;
int num;
int maxnum;
WORD_ID *wseq;
maxnum = WSSTEP;
wseq = (WORD_ID *)mymalloc(sizeof(WORD_ID)*maxnum);
num = 0;
for (p = strtok(s, " "); p != NULL; p = strtok(NULL, " ")) {
if (num >= maxnum) {
maxnum += WSSTEP;
wseq = (WORD_ID *)myrealloc(wseq, sizeof(WORD_ID) * maxnum);
}
if ((wseq[num] = voca_lookup_wid(p, winfo)) == WORD_INVALID) {
/* not found */
jlog("Error: voca_lookup: word \"%s\" not found in dict\n", p);
free(wseq);
return NULL;
}
num++;
}
*len_return = num;
return(wseq);
}