/* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ #include #include #define strmatch !strcmp #if defined(_WIN32) /* winnls */ #include #include #define UNICODE_BUFFER_SIZE 4096 ///< Buffer length for unicode conversion static unsigned int from_cp; ///< Source codepage static unsigned int to_cp; ///< Target codepage static wchar_t unibuf[UNICODE_BUFFER_SIZE]; ///< buffer for unicode conversion #else /* iconv */ #include #include #include static iconv_t cd = (iconv_t)-1; ///< Converstion descriptor #endif static int convert_enabled = 0; ///< 1 if charset converstion is enabled #if defined(_WIN32) static int str2code(char *codestr, unsigned int *code) { if (strmatch(codestr, "euc-jp") || strmatch(codestr, "euc") || strmatch(codestr, "eucjp")) { /* input = Shift_jis (codepage 932) */ *code = CODE_JPN_EUC; } else if (strmatch(codestr, "ansi")) { /* ANSI codepage (MBCS) ex. shift-jis in Windows XP Japanese edition.*/ *code = CP_ACP; } else if (strmatch(codestr, "mac")) { /* Macintosh codepage */ *code = CP_MACCP; } else if (strmatch(codestr, "oem")) { /* OEM localized default codepage */ *code = CP_OEMCP; } else if (strmatch(codestr, "utf-7")) { /* UTF-7 codepage */ *code = CP_UTF7; } else if (strmatch(codestr, "utf-8")) { /* UTF-8 codepage */ *code = CP_UTF8; } else if (strmatch(codestr, "sjis") || strmatch(codestr, "sjis-win") || strmatch(codestr, "shift-jis") || strmatch(codestr, "shift_jis")) { /* sjis codepage = 932 */ *code = 932; } else if (codestr[0] >= '0' && codestr[0] <= '9') { /* codepage number */ *code = atoi(codestr); if (! IsValidCodePage(*code)) { jlog("Error: charconv_win32: codepage \"%d\" not found\n", codestr); return -1; } } else { fprintf(stderr, "Error: str2code: unknown source codepage \"%s\"\n", codestr); fprintf(stderr, "Error: str2code: valids are \"euc-jp\", \"ansi\", \"mac\", \"oem\", \"utf-7\", \"utf-8\", \"sjis\" and codepage number\n"); return -1; } return 0; } #endif /** * Setup charset conversion. * * @param fromcode [in] input charset name (only libjcode accepts NULL) * @param tocode [in] output charset name, or NULL when disable conversion * * @return 0 on success, -1 on failure. */ int charconv_setup(char *fromcode, char *tocode) { convert_enabled = 0; if (fromcode == NULL || tocode == NULL) { fprintf(stderr, "Error: charconv_setup: input code or output code not specified\n"); return -1; } #if defined(_WIN32) if (str2code(fromcode, &from_cp) == -1) { fpritnf(stderr, "Error: charconv_setup: unknown codepage specified\n"); return -1; } if (str2code(tocode, &to_cp) == -1) { fpritnf(stderr, "Error: charconv_setup: unknown codepage specified\n"); return -1; } #else /* clear already allocated descriptor */ if (cd != (iconv_t)-1) { if (iconv_close(cd) < 0) { fprintf(stderr, "Error: charconv_setup: failed to close iconv\n"); return -1; } cd = (iconv_t)-1; } /* allocate conversion descriptor */ cd = iconv_open(tocode, fromcode); if (cd == (iconv_t)-1) { /* allocation failed */ fprintf(stderr, "Error: charconv_setup: unknown charset name in \"%s\" or \"%s\"\n", fromcode, tocode); fprintf(stderr, "Error: charconv_setup: do \"iconv --list\" to get the list of available charset names.\n"); return -1; } #endif convert_enabled = 1; return(0); } /** * Apply charset conversion to a string. * * @param instr [in] source string * @param outstr [in] destination buffer * @param maxoutlen [in] allocated length of outstr in byte. * * @return either of instr or outstr, that holds the result string. * */ char * charconv(char *instr, char *outstr, int maxoutlen) { #if defined(_WIN32) int unilen, newlen; char *srcbuf; /* if diabled return instr itself */ if (convert_enabled == 0) return(instr); /* no conversion */ srcbuf = instr; /* get length of unicode string */ unilen = MultiByteToWideChar(from_cp, 0, srcbuf, -1, NULL, 0); if (unilen <= 0) { jlog("Error: charconv: conversion error?\n"); return(instr); } if (unilen > UNICODE_BUFFER_SIZE) { jlog("Error: charconv: unicode buffer size exceeded (%d > %d)!\n", unilen, UNICODE_BUFFER_SIZE); return(instr); } /* convert source string to unicode */ MultiByteToWideChar(from_cp, 0, srcbuf, -1, unibuf, unilen); /* get length of target string */ newlen = WideCharToMultiByte(to_cp, 0, unibuf, -1, outstr, 0, NULL, NULL); if (newlen <= 0) { jlog("Error: charconv: conversion error?\n"); return(instr); } if (newlen > maxoutlen) { jlog("Error: charconv: target buffer size exceeded (%d > %d)!\n", newlen, maxoutlen); return(instr); } /* convert unicode to target string */ WideCharToMultiByte(to_cp, 0, unibuf, -1, outstr, newlen, NULL, NULL); return(outstr); #else char *src, *dst; size_t srclen, dstlen; size_t ret; /* if diabled return instr itself */ if (convert_enabled == 0) return(instr); /* no conversion */ if (cd == (iconv_t)-1) { fprintf(stderr, "Error: charconv: conversion descriptor not allocated\n"); return(instr); } srclen = strlen(instr)+1; dstlen = maxoutlen; src = instr; dst = outstr; ret = iconv(cd, &src, &srclen, &dst, &dstlen); if (ret == -1) { switch(errno) { case EILSEQ: fprintf(stderr, "Error: charconv: invalid multibyte sequence in the input\n"); exit(-1); break; case EINVAL: fprintf(stderr, "Error: charconv: incomplete multibyte sequence in the input\n"); exit(-1); break; case E2BIG: fprintf(stderr, "Error: charconv: converted string size exceeded buffer (>%d)\n", maxoutlen); exit(-1); break; } } return(outstr); #endif }