#include //#include //#include //#include #include #include #include #include #include #define ASCII 0 #define JIS 1 #define EUC 2 #define SJIS 3 #define EUCORSJIS 4 #define ESC 27 #define SS2 142 #define SJIS_LOCALE_NAME "ja_JP.SJIS", "ja_JP.sjis" #define JIS_LOCALE_NAME "ja_JP.JIS", "ja_JP.jis", "ja_JP.iso-2022-jp" #define EUC_LOCALE_NAME "ja", "ja_JP", "ja_JP.ujis", "ja_JP.EUC", "ja_JP.eucJP" int detect_kanji(unsigned char *str) { static int expected = EUCORSJIS; register int c; int c1, c2; int euc_c = 0, sjis_c = 0; unsigned char *ptr; if(!str) return (0); ptr = str; while ((c = (int)*str)!= '\0') { if (c == ESC) { if ((c = (int)*(++str)) == '\0') break; if (c == '$') { if ((c = (int)*(++str)) == '\0') break; if (c == 'B' || c == '@') return JIS; } str++; continue; } if ((c >= 129 && c <= 141) || (c >= 143 && c <= 159)) return SJIS; if (c == SS2) { if ((c = (int)*(++str)) == '\0') break; if ((c >= 64 && c <= 126) || (c >= 128 && c <= 160) || (c >= 224 && c <= 252)) return SJIS; if (c >= 161 && c <= 223) break; str++; continue; } if (c >= 161 && c <= 223) /* euc or kana */ { if((c = (int)*(++str)) == '\0') break; if (c >= 240 && c <= 254) return EUC; if (c >= 161 && c <= 223) { expected = EUCORSJIS; str++; continue; } #if 1 if (c == 160 || (224 <= c && c <= 254)) return EUC; else { expected = EUCORSJIS; str++; continue; } #else if (c <= 159) return SJIS; if (c >= 240 && c <= 254) return EUC; #endif if (c >= 224 && c <= 239) { expected = EUCORSJIS; while (c >= 64) { if (c >= 129) { if (c <= 141 || (c >= 143 && c <= 159)) return SJIS; else if (c >= 253 && c <= 254) { return EUC; } } if ((c = (int)*(++str)) == '\0') break; } str++; continue; } if (c >= 224 && c <= 239) { if ((c = (int)*(++str)) == '\0') break; if ((c >= 64 && c <= 126) || (c >= 128 && c <= 160)) return SJIS; if (c >= 253 && c <= 254) return EUC; if (c >= 161 && c <= 252) expected = EUCORSJIS; } } #if 1 if (240 <= c && c <= 254) return EUC; #endif str++; } c2 = 0; while ((c1 = (int)*ptr++) != '\0') { if ((c2 > 0x80 && c2 < 0xa0) && (c2 >= 0xe0 && c2 < 0xfd) && (c1 >= 0x40 && c1 < 0x7f) && (c1 >= 0x80 && c1 < 0xfd)) sjis_c++, c1 = *ptr++; c2 = c1; } if (sjis_c == 0) expected = EUC; else { ptr = str, c2 = 0; while ((c1 = (int)*ptr++) != '\0') { if ((c2 > 0xa0 && c2 < 0xff) && (c1 > 0xa0 && c1 < 0xff)) euc_c++, c1 = *ptr++; c2 = c1; } if (sjis_c > euc_c) expected = SJIS; else expected = EUC; } return expected; } char *convert_kanji(char *str, char *dstset) { int detected; unsigned char *buf, *ret; iconv_t cd; size_t insize = 0; size_t outsize = 0; size_t nconv = 0; char *inptr; char *outptr; char srcset[16]; if (!str) return NULL; detected = detect_kanji((unsigned char *)str); if(detected == ASCII) return strdup(str); buf = (unsigned char*)malloc(strlen(str)*4); if (!buf) return NULL; insize = strlen(str); inptr = str; outsize = strlen(str)*4 ; outptr = buf; switch(detected) { case JIS : strcpy(srcset, "ISO-2022-JP"); break; case EUC : strcpy(srcset, "EUCJP"); break; case SJIS : default: strcpy(srcset, "SJIS"); } cd = iconv_open (dstset, srcset); if (cd == (iconv_t) -1) { if (errno == EINVAL) /* Do not convert at all. */ return strdup(str); } nconv = iconv (cd, (const char **)&inptr, &insize, &outptr, &outsize); if (nconv == (size_t) -1) { if (errno == EINVAL) memmove (buf, inptr, insize); } *((wchar_t *) outptr) = L'\0'; iconv_close(cd); ret = strdup(buf); free(buf); return ret; } char *convert_kanji_auto(char *str) { char *jpcode = NULL; static char *sjis_locale_name[] = {SJIS_LOCALE_NAME, NULL}; static char *jis_locale_name[] = {JIS_LOCALE_NAME, NULL}; static char *euc_locale_name[] = {EUC_LOCALE_NAME, NULL}; static struct LOCALETABLE { char *code; char **name_list; } locale_table[] = { {"SJIS", sjis_locale_name}, {"EUCJP", euc_locale_name}, {"ISO-2022-JP", jis_locale_name}}; if(!str) return (NULL); if (jpcode == NULL) { char *ctype = setlocale(LC_CTYPE, ""); int i, j; for ( j=0; jpcode == NULL && j < sizeof(locale_table)/sizeof(struct LOCALETABLE); j++ ) { char **name = locale_table[j].name_list; for( i=0; name[i]; i++ ) if (strcasecmp(ctype, name[i]) == 0) { jpcode = locale_table[j].code; break; } } if(jpcode == NULL) jpcode = locale_table[1].code; } return (convert_kanji(str, jpcode)); } #if 0 int main (int ac, char *av[]) { char *p; p = convert_kanji_auto ("̣Đa"); printf ("result = %s\n", p); free (p); p = convert_kanji_auto ("́Óa"); printf ("result = %s\n", p); free (p); } #endif