/**
* @file charconv_win32.c
*
*
* @brief 文字コード変換 (Win32 API + libjcode 使用)
*
* Windows の WideCharToMultiByte() および MultiByteToWideChar() を
* 使用した文字コード変換を行う. "ansi" "oem" "mac" "utf-7" "utf-8" あるいは
* 任意のコードページ番号間の変換を,unicode を介して行う.
*
* Windows では EUC のコードページに対応していないので,変換元の文字コードが
* euc-jp のときは,libjcode で SJIS に変換してからunicodeへ変換する.
*
*
*
*
* @brief Character set conversion using Win32 MultiByte function + libjcode
*
* Perform character set conversion using Windows native API
* WideCharToMultiByte() and MultiByteToWideChar(). Conversion between
* codepages of "ansi" "oem" "mac" "utf-7" "utf-8" or codepage number supported
* at the running OS are supported using unicode.
*
* Conversion from Japanese-euc ("euc-jp") is optionally supported by the
* libjcode library.
*
*
*
* @author Akinobu LEE
* @date Thu Feb 17 16:02:41 2005
*
* $Revision: 1.5 $
*
*/
/*
* Copyright (c) 1991-2012 Kawahara Lab., Kyoto University
* Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
* Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology
* All rights reserved
*/
#include "app.h"
#ifdef CHARACTER_CONVERSION
#ifdef USE_WIN32_MULTIBYTE
#include
#include
#include "libjcode/jlib.h"
static boolean euctosjis = FALSE; ///< TRUE if use libjcode for euc->sjis conv.
static boolean only_euc_conv = FALSE; ///< Perform only euc->sjis
static unsigned int from_cp; ///< Source codepage
static unsigned int to_cp; ///< Target codepage
/**
* Setup charset conversion for win32.
*
* @param fromcode [in] input charset code name or codepage number string, NULL invalid
* @param tocode [in] output charset code name or codepage number string, or NULL when disable conversion
* @param enable_conv [out] store whether conversion should be enabled or not
*
* @return TRUE on success, FALSE on failure (unknown codename or unsupported codepage).
*/
boolean
charconv_win32_setup(char *fromcode, char *tocode, boolean *enable_conv)
{
unsigned int src_p, dst_p;
if (tocode == NULL) {
/* just disable conversion */
*enable_conv = FALSE;
} else {
/* determine source character set */
if (fromcode == NULL) {
jlog("Error: charconv_win32: charset names of both input and output should be given.\n");
jlog("Error: charconv_win32: use \"-charconv from to\" instead of \"-kanji\".\n");
*enable_conv = FALSE;
return FALSE;
}
euctosjis = FALSE;
if (strmatch(fromcode, "euc-jp")
|| strmatch(fromcode, "euc")
|| strmatch(fromcode, "eucjp")) {
/* pre-convert Japanese euc to Shift-jis */
euctosjis = TRUE;
/* input = Shift_jis (codepage 932) */
from_cp = 932;
} else if (strmatch(fromcode, "ansi")) {
/* ANSI codepage (MBCS) ex. shift-jis in Windows XP Japanese edition.*/
from_cp = CP_ACP;
} else if (strmatch(fromcode, "mac")) {
/* Macintosh codepage */
from_cp = CP_MACCP;
} else if (strmatch(fromcode, "oem")) {
/* OEM localized default codepage */
from_cp = CP_OEMCP;
} else if (strmatch(fromcode, "utf-7")) {
/* UTF-7 codepage */
from_cp = CP_UTF7;
} else if (strmatch(fromcode, "utf-8")) {
/* UTF-8 codepage */
from_cp = CP_UTF8;
} else if (strmatch(fromcode, "sjis")
|| strmatch(fromcode, "sjis-win")
|| strmatch(fromcode, "shift-jis")
|| strmatch(fromcode, "shift_jis")) {
/* sjis codepage = 932 */
from_cp = 932;
} else if (fromcode[0] >= '0' && fromcode[0] <= '9') {
/* codepage number */
from_cp = atoi(fromcode);
if (! IsValidCodePage(from_cp)) {
jlog("Error: charconv_win32: codepage #%d not found\n", from_cp);
*enable_conv = FALSE;
return FALSE;
}
} else {
jlog("Error: charconv_win32: unknown source codepage \"%s\"\n", fromcode);
jlog("Error: charconv_win32: valids are \"ansi\", \"mac\", \"oem\", \"utf-7\", \"utf-8\" and codepage number\n");
jlog("Error: charconv_win32: the default local charcode can be speicified by \"ansi\".\n");
*enable_conv = FALSE;
return FALSE;
}
/* determine the target character set */
if (strmatch(tocode, "ansi")) {
/* ANSI codepage (MBCS) ex. shift-jis in Windows XP Japanese edition.*/
to_cp = CP_ACP;
} else if (strmatch(tocode, "mac")) {
/* Macintosh codepage */
to_cp = CP_MACCP;
} else if (strmatch(tocode, "oem")) {
/* OEM codepage */
to_cp = CP_OEMCP;
} else if (strmatch(tocode, "utf-7")) {
/* UTF-7 codepage */
to_cp = CP_UTF7;
} else if (strmatch(tocode, "utf-8")) {
/* UTF-8 codepage */
to_cp = CP_UTF8;
} else if (strmatch(tocode, "sjis")
|| strmatch(tocode, "sjis-win")
|| strmatch(tocode, "shift-jis")
|| strmatch(tocode, "shift_jis")) {
/* sjis codepage = 932 */
to_cp = 932;
} else if (tocode[0] >= '0' && tocode[0] <= '9') {
/* codepage number */
to_cp = atoi(tocode);
if (! IsValidCodePage(to_cp)) {
jlog("Error: charconv_win32: codepage #%d not found\n", to_cp);
*enable_conv = FALSE;
return FALSE;
}
} else {
jlog("Error: charconv_win32: unknown target codepage \"%s\"\n", tocode);
jlog("Error: charconv_win32: valids are \"ansi\", \"mac\", \"oem\", \"utf-7\", \"utf-8\" and codepage number\n");
jlog("Error: charconv_win32: the default local charcode can be speicified by \"ansi\".\n");
*enable_conv = FALSE;
return FALSE;
}
/* check whether the actual conversion is needed */
src_p = from_cp;
dst_p = to_cp;
if (src_p == CP_ACP) src_p = GetACP();
if (dst_p == CP_ACP) dst_p = GetACP();
if (src_p == CP_OEMCP) src_p = GetOEMCP();
if (dst_p == CP_OEMCP) dst_p = GetOEMCP();
if (src_p == dst_p) {
if (euctosjis == FALSE) {
only_euc_conv = FALSE;
*enable_conv = FALSE;
} else {
only_euc_conv = TRUE;
*enable_conv = TRUE;
}
} else {
only_euc_conv = FALSE;
*enable_conv = TRUE;
}
}
return TRUE;
}
#define UNICODE_BUFFER_SIZE 4096 ///< Buffer length to use for unicode conversion
static wchar_t unibuf[UNICODE_BUFFER_SIZE]; ///< Local work area for unicode conversion
/**
* Apply charset conversion to a string using win32 functions
*
* @param instr [in] source string
* @param outstr [in] destination buffer
* @param maxoutlen [in] allocated length of outstr in byte.
*
* @return either of instr or outstr, that holds the result string.
*
*/
char *
charconv_win32(char *instr, char *outstr, int maxoutlen)
{
int unilen, newlen;
char *srcbuf;
srcbuf = instr;
if (euctosjis == TRUE) {
/* euc->sjis conversion */
//toStringSJIS(instr, outstr, maxoutlen);
EUCtoSJIS(instr, outstr, maxoutlen);
srcbuf = outstr;
if (only_euc_conv) {
return(outstr);
}
}
/* get length of unicode string */
unilen = MultiByteToWideChar(from_cp, 0, srcbuf, -1, NULL, 0);
if (unilen <= 0) {
jlog("Error: charconv_win32: conversion error?\n");
return(instr);
}
if (unilen > UNICODE_BUFFER_SIZE) {
jlog("Error: charconv_win32: unicode buffer size exceeded (%d > %d)!\n", unilen, UNICODE_BUFFER_SIZE);
return(instr);
}
/* convert source string to unicode */
MultiByteToWideChar(from_cp, 0, srcbuf, -1, unibuf, unilen);
/* get length of target string */
newlen = WideCharToMultiByte(to_cp, 0, unibuf, -1, outstr, 0, NULL, NULL);
if (newlen <= 0) {
jlog("Error: charconv_win32: conversion error?\n");
return(instr);
}
if (newlen > maxoutlen) {
jlog("Error: charconv_win32: target buffer size exceeded (%d > %d)!\n", newlen, maxoutlen);
return(instr);
}
/* convert unicode to target string */
WideCharToMultiByte(to_cp, 0, unibuf, -1, outstr, newlen, NULL, NULL);
return(outstr);
}
#endif /* USE_WIN32_MULTIBYTE */
#endif /* CHARACTER_CONVERSION */