diff options
Diffstat (limited to 'libX11/src/xlibi18n/lcUniConv/8bit_tab_to_h.c')
-rw-r--r-- | libX11/src/xlibi18n/lcUniConv/8bit_tab_to_h.c | 1070 |
1 files changed, 535 insertions, 535 deletions
diff --git a/libX11/src/xlibi18n/lcUniConv/8bit_tab_to_h.c b/libX11/src/xlibi18n/lcUniConv/8bit_tab_to_h.c index 1caa28a21..22ac969b2 100644 --- a/libX11/src/xlibi18n/lcUniConv/8bit_tab_to_h.c +++ b/libX11/src/xlibi18n/lcUniConv/8bit_tab_to_h.c @@ -1,535 +1,535 @@ - -/* - * Generates an 8-bit character set table from a .TXT table as found on - * ftp.unicode.org or from a table containing the 256 Unicode values as - * hexadecimal integers. - * Examples: - * - * ./8bit_tab_to_h ISO-8859-1 iso8859_1 < tab8859_1 - * ./8bit_tab_to_h ISO-8859-2 iso8859_2 < tab8859_2 - * ./8bit_tab_to_h ISO-8859-3 iso8859_3 < tab8859_3 - * ./8bit_tab_to_h ISO-8859-4 iso8859_4 < tab8859_4 - * ./8bit_tab_to_h ISO-8859-5 iso8859_5 < tab8859_5 - * ./8bit_tab_to_h ISO-8859-6 iso8859_6 < tab8859_6 - * ./8bit_tab_to_h ISO-8859-7 iso8859_7 < tab8859_7 - * ./8bit_tab_to_h ISO-8859-8 iso8859_8 < tab8859_8 - * ./8bit_tab_to_h ISO-8859-9 iso8859_9 < tab8859_9 - * ./8bit_tab_to_h ISO-8859-10 iso8859_10 < tab8859_10 - * ./8bit_tab_to_h ISO-8859-14 iso8859_14 < tab8859_14 - * ./8bit_tab_to_h ISO-8859-15 iso8859_15 < tab8859_15 - * ./8bit_tab_to_h JISX0201.1976-0 jisx0201 < jis0201 - * ./8bit_tab_to_h TIS620-0 tis620 < tabtis620 - * ./8bit_tab_to_h KOI8-R koi8_r < tabkoi8_r - * ./8bit_tab_to_h KOI8-U koi8_u < tabkoi8_u - * ./8bit_tab_to_h ARMSCII-8 armscii_8 < tabarmscii_8 - * ./8bit_tab_to_h CP1133 cp1133 < tabibm_cp1133 - * ./8bit_tab_to_h MULELAO-1 mulelao < tabmulelao_1 - * ./8bit_tab_to_h VISCII1.1-1 viscii1 < tabviscii - * ./8bit_tab_to_h TCVN-5712 tcvn < tabtcvn - * ./8bit_tab_to_h GEORGIAN-ACADEMY georgian_ac < tabgeorgian_academy - * ./8bit_tab_to_h GEORGIAN-PS georgian_ps < tabgeorgian_ps - * - * ./8bit_tab_to_h ISO-8859-1 iso8859_1 < 8859-1.TXT - * ./8bit_tab_to_h ISO-8859-2 iso8859_2 < 8859-2.TXT - * ./8bit_tab_to_h ISO-8859-3 iso8859_3 < 8859-3.TXT - * ./8bit_tab_to_h ISO-8859-4 iso8859_4 < 8859-4.TXT - * ./8bit_tab_to_h ISO-8859-5 iso8859_5 < 8859-5.TXT - * ./8bit_tab_to_h ISO-8859-6 iso8859_6 < 8859-6.TXT - * ./8bit_tab_to_h ISO-8859-7 iso8859_7 < 8859-7.TXT - * ./8bit_tab_to_h ISO-8859-8 iso8859_8 < 8859-8.TXT - * ./8bit_tab_to_h ISO-8859-9 iso8859_9 < 8859-9.TXT - * ./8bit_tab_to_h ISO-8859-10 iso8859_10 < 8859-10.TXT - * ./8bit_tab_to_h ISO-8859-14 iso8859_14 < 8859-14.TXT - * ./8bit_tab_to_h ISO-8859-15 iso8859_15 < 8859-15.TXT - * ./8bit_tab_to_h JISX0201.1976-0 jisx0201 < JIS0201.TXT - * ./8bit_tab_to_h KOI8-R koi8_r < KOI8-R.TXT - */ - -#include <stdio.h> -#include <stdlib.h> -#include <stdbool.h> -#include <string.h> - -int main (int argc, char *argv[]) -{ - const char* charsetname; - const char* c_charsetname; - const char* filename; - const char* directory; - int charset2uni[0x100]; - - if (argc != 3 && argc != 4 && argc != 5) - exit(1); - charsetname = argv[1]; - c_charsetname = argv[2]; - if (argc > 3) { - filename = argv[3]; - } else { - char* s = (char*) malloc(strlen(c_charsetname)+strlen(".h")+1); - strcpy(s,c_charsetname); strcat(s,".h"); - filename = s; - } - directory = (argc > 4 ? argv[4] : ""); - - fprintf(stderr, "Creating %s%s\n", directory, filename); - - { - int i, c; - c = getc(stdin); - ungetc(c,stdin); - if (c == '#') { - /* Read a unicode.org style .TXT file. */ - for (i = 0; i < 0x100; i++) - charset2uni[i] = 0xfffd; - for (;;) { - c = getc(stdin); - if (c == EOF) - break; - if (c == '\n' || c == ' ' || c == '\t') - continue; - if (c == '#') { - do { c = getc(stdin); } while (!(c == EOF || c == '\n')); - continue; - } - ungetc(c,stdin); - if (scanf("0x%x", &i) != 1 || !(i >= 0 && i < 0x100)) - exit(1); - do { c = getc(stdin); } while (c == ' ' || c == '\t'); - if (c != EOF) - ungetc(c,stdin); - if (c == '\n' || c == '#') - continue; - if (scanf("0x%x", &charset2uni[i]) != 1) - exit(1); - } - } else { - /* Read a table of hexadecimal Unicode values. */ - for (i = 0; i < 0x100; i++) { - if (scanf("%x", &charset2uni[i]) != 1) - exit(1); - if (charset2uni[i] < 0 || charset2uni[i] == 0xffff) - charset2uni[i] = 0xfffd; - } - if (scanf("%x", &i) != EOF) - exit(1); - } - } - - /* Write the output file. */ - { - FILE* f; - - { - char* fname = malloc(strlen(directory)+strlen(filename)+1); - strcpy(fname,directory); strcat(fname,filename); - f = fopen(fname,"w"); - if (f == NULL) - exit(1); - } - - fprintf(f, "\n"); - fprintf(f, "/*\n"); - fprintf(f, " * %s\n", charsetname); - fprintf(f, " */\n"); - fprintf(f, "\n"); - - { - int i, i1, i2, i3; - int line[16]; - int tableno; - struct { int minline; int maxline; } tables[16]; - bool some_invalid; - bool final_ret_reached; - - for (i1 = 0; i1 < 16; i1++) { - bool all_invalid = true; - bool all_identity = true; - for (i2 = 0; i2 < 16; i2++) { - i = 16*i1+i2; - if (charset2uni[i] != 0xfffd) - all_invalid = false; - if (charset2uni[i] != i) - all_identity = false; - } - if (all_invalid) - line[i1] = -2; - else if (all_identity) - line[i1] = -1; - else - line[i1] = 0; - } - tableno = 0; - for (i1 = 0; i1 < 16; i1++) { - if (line[i1] >= 0) { - if (i1 > 0 && tableno > 0 && line[i1-1] == tableno-1) { - line[i1] = tableno-1; - tables[tableno-1].maxline = i1; - } else { - tableno++; - line[i1] = tableno-1; - tables[tableno-1].minline = tables[tableno-1].maxline = i1; - } - } - } - some_invalid = false; - for (i = 0; i < 0x100; i++) - if (charset2uni[i] == 0xfffd) - some_invalid = true; - if (tableno > 0) { - int t; - for (t = 0; t < tableno; t++) { - fprintf(f, "static const unsigned short %s_2uni", c_charsetname); - if (tableno > 1) - fprintf(f, "_%d", t+1); - fprintf(f, "[%d] = {\n", 16*(tables[t].maxline-tables[t].minline+1)); - for (i1 = tables[t].minline; i1 <= tables[t].maxline; i1++) { - fprintf(f, " /* 0x%02x */\n", 16*i1); - for (i2 = 0; i2 < 2; i2++) { - fprintf(f, " "); - for (i3 = 0; i3 < 8; i3++) { - i = 16*i1+8*i2+i3; - fprintf(f, " 0x%04x,", charset2uni[i]); - } - fprintf(f, "\n"); - } - } - fprintf(f, "};\n"); - } - fprintf(f, "\n"); - } - final_ret_reached = false; - fprintf(f, "static int\n%s_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)\n", c_charsetname); - fprintf(f, "{\n"); - fprintf(f, " unsigned char c = *s;\n"); - if (some_invalid) { - for (i1 = 0; i1 < 16;) { - int t = line[i1]; - const char* indent; - for (i2 = i1; i2 < 16 && line[i2] == t; i2++); - indent = (i1 == 0 && i2 == 16 ? " " : " "); - if (i1 == 0) { - if (i2 == 16) { - } else { - fprintf(f, " if (c < 0x%02x) {\n", 16*i2); - } - } else { - if (i2 == 16) { - fprintf(f, " else {\n"); - } else { - fprintf(f, " else if (c < 0x%02x) {\n", 16*i2); - } - } - if (t == -2) { - final_ret_reached = true; - } else if (t == -1) { - fprintf(f, "%s*pwc = (ucs4_t) c;\n", indent); - fprintf(f, "%sreturn 1;\n", indent); - } else { - fprintf(f, "%s", indent); - some_invalid = false; - for (i = 16*i1; i < 16*i2; i++) - if (charset2uni[i] == 0xfffd) - some_invalid = true; - if (some_invalid) - fprintf(f, "unsigned short wc = "); - else - fprintf(f, "*pwc = (ucs4_t) "); - fprintf(f, "%s_2uni", c_charsetname); - if (tableno > 1) - fprintf(f, "_%d", t+1); - fprintf(f, "[c"); - if (tables[t].minline > 0) - fprintf(f, "-0x%02x", 16*tables[t].minline); - fprintf(f, "];\n"); - if (some_invalid) { - fprintf(f, "%sif (wc != 0xfffd) {\n", indent); - fprintf(f, "%s *pwc = (ucs4_t) wc;\n", indent); - fprintf(f, "%s return 1;\n", indent); - fprintf(f, "%s}\n", indent); - final_ret_reached = true; - } else { - fprintf(f, "%sreturn 1;\n", indent); - } - } - if (!(i1 == 0 && i2 == 16)) - fprintf(f, " }\n"); - i1 = i2; - } - if (final_ret_reached) - fprintf(f, " return RET_ILSEQ;\n"); - } else { - for (i1 = 0; i1 < 16;) { - int t = line[i1]; - for (i2 = i1; i2 < 16 && line[i2] == t; i2++); - if (i1 == 0) { - if (i2 == 16) { - fprintf(f, " "); - } else { - fprintf(f, " if (c < 0x%02x)\n ", 16*i2); - } - } else { - if (i2 == 16) { - fprintf(f, " else\n "); - } else { - fprintf(f, " else if (c < 0x%02x)\n ", 16*i2); - } - } - if (t == -1) - fprintf(f, "*pwc = (ucs4_t) c;\n"); - else { - fprintf(f, "*pwc = (ucs4_t) %s_2uni", c_charsetname); - if (tableno > 1) - fprintf(f, "_%d", t+1); - fprintf(f, "[c"); - if (tables[t].minline > 0) - fprintf(f, "-0x%02x", 16*tables[t].minline); - fprintf(f, "];\n"); - } - i1 = i2; - } - fprintf(f, " return 1;\n"); - } - fprintf(f, "}\n"); - - } - - fprintf(f, "\n"); - - { - int uni2charset[0x10000]; - bool pages[0x100]; - int line[0x2000]; - int tableno; - struct { int minline; int maxline; int usecount; const char* suffix; } tables[0x2000]; - bool need_c; - bool fix_0000; - int i, j, p, j1, j2, t; - - for (j = 0; j < 0x10000; j++) - uni2charset[j] = 0; - for (p = 0; p < 0x100; p++) - pages[p] = false; - for (i = 0; i < 0x100; i++) { - j = charset2uni[i]; - if (j != 0xfffd) { - uni2charset[j] = i; - pages[j>>8] = true; - } - } - for (j1 = 0; j1 < 0x2000; j1++) { - bool all_invalid = true; - bool all_identity = true; - for (j2 = 0; j2 < 8; j2++) { - j = 8*j1+j2; - if (uni2charset[j] != 0) - all_invalid = false; - if (uni2charset[j] != j) - all_identity = false; - } - if (all_invalid) - line[j1] = -2; - else if (all_identity) - line[j1] = -1; - else - line[j1] = 0; - } - tableno = 0; - for (j1 = 0; j1 < 0x2000; j1++) { - if (line[j1] >= 0) { - if (tableno > 0 - && ((j1 > 0 && line[j1-1] == tableno-1) - || ((tables[tableno-1].maxline >> 5) == (j1 >> 5) - && j1 - tables[tableno-1].maxline <= 8))) { - line[j1] = tableno-1; - tables[tableno-1].maxline = j1; - } else { - tableno++; - line[j1] = tableno-1; - tables[tableno-1].minline = tables[tableno-1].maxline = j1; - } - } - } - for (t = 0; t < tableno; t++) { - tables[t].usecount = 0; - j1 = 8*tables[t].minline; - j2 = 8*(tables[t].maxline+1); - for (j = j1; j < j2; j++) - if (uni2charset[j] != 0) - tables[t].usecount++; - } - for (t = 0, p = -1, i = 0; t < tableno; t++) { - if (tables[t].usecount > 1) { - char* s; - if (p == tables[t].minline >> 5) { - s = (char*) malloc(5+1); - sprintf(s, "%02x_%d", p, ++i); - } else { - p = tables[t].minline >> 5; - s = (char*) malloc(2+1); - sprintf(s, "%02x", p); - } - tables[t].suffix = s; - } else - tables[t].suffix = NULL; - } - { - p = -1; - for (t = 0; t < tableno; t++) - if (tables[t].usecount > 1) { - p = 0; - fprintf(f, "static const unsigned char %s_page%s[%d] = {\n", c_charsetname, tables[t].suffix, 8*(tables[t].maxline-tables[t].minline+1)); - for (j1 = tables[t].minline; j1 <= tables[t].maxline; j1++) { - if ((j1 % 0x20) == 0 && j1 > tables[t].minline) - fprintf(f, " /* 0x%04x */\n", 8*j1); - fprintf(f, " "); - for (j2 = 0; j2 < 8; j2++) { - j = 8*j1+j2; - fprintf(f, " 0x%02x,", uni2charset[j]); - } - fprintf(f, " /* 0x%02x-0x%02x */\n", 8*(j1 % 0x20), 8*(j1 % 0x20)+7); - } - fprintf(f, "};\n"); - } - if (p >= 0) - fprintf(f, "\n"); - } - need_c = false; - for (j1 = 0; j1 < 0x2000;) { - t = line[j1]; - for (j2 = j1; j2 < 0x2000 && line[j2] == t; j2++); - if (t >= 0) - j2 = tables[t].maxline+1; - if (!(t == -2 || (t == -1 && j1 == 0))) - need_c = true; - j1 = j2; - } - fix_0000 = false; - fprintf(f, "static int\n%s_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)\n", c_charsetname); - fprintf(f, "{\n"); - if (need_c) - fprintf(f, " unsigned char c = 0;\n"); - for (j1 = 0; j1 < 0x2000;) { - t = line[j1]; - for (j2 = j1; j2 < 0x2000 && line[j2] == t; j2++); - if (t >= 0) { - if (j1 != tables[t].minline) abort(); - if (j2 > tables[t].maxline+1) abort(); - j2 = tables[t].maxline+1; - } - if (t == -2) { - } else { - if (j1 == 0) - fprintf(f, " "); - else - fprintf(f, " else "); - if (t >= 0 && tables[t].usecount == 0) abort(); - if (t >= 0 && tables[t].usecount == 1) { - if (j2 != j1+1) abort(); - for (j = 8*j1; j < 8*j2; j++) - if (uni2charset[j] != 0) { - fprintf(f, "if (wc == 0x%04x)\n c = 0x%02x;\n", j, uni2charset[j]); - break; - } - } else { - if (j1 == 0) { - fprintf(f, "if (wc < 0x%04x)", 8*j2); - } else { - fprintf(f, "if (wc >= 0x%04x && wc < 0x%04x)", 8*j1, 8*j2); - } - if (t == -1) { - if (j1 == 0) - /* If wc == 0, the function must return 1, not -1. */ - fprintf(f, " {\n *r = wc;\n return 1;\n }\n"); - else - fprintf(f, "\n c = wc;\n"); - } else { - fprintf(f, "\n c = %s_page%s[wc", c_charsetname, tables[t].suffix); - if (tables[t].minline > 0) - fprintf(f, "-0x%04x", 8*j1); - fprintf(f, "];\n"); - if (j1 == 0 && uni2charset[0] == 0) - /* If wc == 0, the function must return 1, not -1. */ - fix_0000 = true; - } - } - } - j1 = j2; - } - if (need_c) { - if (fix_0000) - fprintf(f, " if (c != 0 || wc == 0) {\n"); - else - fprintf(f, " if (c != 0) {\n"); - fprintf(f, " *r = c;\n"); - fprintf(f, " return 1;\n"); - fprintf(f, " }\n"); - } - fprintf(f, " return RET_ILSEQ;\n"); - fprintf(f, "}\n"); - - } - - if (ferror(f) || fclose(f)) - exit(1); - } - -#if 0 - - int i1, i2, i3, i1_min, i1_max, j1, j2; - - i1_min = 16; - i1_max = -1; - for (i1 = 0; i1 < 16; i1++) - for (i2 = 0; i2 < 16; i2++) - if (charset2uni[16*i1+i2] != 0xfffd) { - if (i1_min > i1) i1_min = i1; - if (i1_max < i1) i1_max = i1; - } - printf("static const unsigned short %s_2uni[%d] = {\n", - name, 16*(i1_max-i1_min+1)); - for (i1 = i1_min; i1 <= i1_max; i1++) { - printf(" /""* 0x%02x *""/\n", 16*i1); - for (i2 = 0; i2 < 2; i2++) { - printf(" "); - for (i3 = 0; i3 < 8; i3++) { - if (i3 > 0) printf(" "); - printf("0x%04x,", charset2uni[16*i1+8*i2+i3]); - } - printf("\n"); - } - } - printf("};\n"); - printf("\n"); - - for (p = 0; p < 0x100; p++) - pages[p] = 0; - for (i = 0; i < 0x100; i++) - if (charset2uni[i] != 0xfffd) - pages[charset2uni[i]>>8] = 1; - for (p = 0; p < 0x100; p++) - if (pages[p]) { - int j1_min = 32; - int j1_max = -1; - for (j1 = 0; j1 < 32; j1++) - for (j2 = 0; j2 < 8; j2++) - if (uni2charset[256*p+8*j1+j2] != 0) { - if (j1_min > j1) j1_min = j1; - if (j1_max < j1) j1_max = j1; - } - printf("static const unsigned char %s_page%02x[%d] = {\n", - name, p, 8*(j1_max-j1_min+1)); - for (j1 = j1_min; j1 <= j1_max; j1++) { - printf(" "); - for (j2 = 0; j2 < 8; j2++) - printf("0x%02x, ", uni2charset[256*p+8*j1+j2]); - printf("/""* 0x%02x-0x%02x *""/\n", 8*j1, 8*j1+7); - } - printf("};\n"); - } - printf("\n"); - -} -#endif - - exit(0); -} +
+/*
+ * Generates an 8-bit character set table from a .TXT table as found on
+ * ftp.unicode.org or from a table containing the 256 Unicode values as
+ * hexadecimal integers.
+ * Examples:
+ *
+ * ./8bit_tab_to_h ISO-8859-1 iso8859_1 < tab8859_1
+ * ./8bit_tab_to_h ISO-8859-2 iso8859_2 < tab8859_2
+ * ./8bit_tab_to_h ISO-8859-3 iso8859_3 < tab8859_3
+ * ./8bit_tab_to_h ISO-8859-4 iso8859_4 < tab8859_4
+ * ./8bit_tab_to_h ISO-8859-5 iso8859_5 < tab8859_5
+ * ./8bit_tab_to_h ISO-8859-6 iso8859_6 < tab8859_6
+ * ./8bit_tab_to_h ISO-8859-7 iso8859_7 < tab8859_7
+ * ./8bit_tab_to_h ISO-8859-8 iso8859_8 < tab8859_8
+ * ./8bit_tab_to_h ISO-8859-9 iso8859_9 < tab8859_9
+ * ./8bit_tab_to_h ISO-8859-10 iso8859_10 < tab8859_10
+ * ./8bit_tab_to_h ISO-8859-14 iso8859_14 < tab8859_14
+ * ./8bit_tab_to_h ISO-8859-15 iso8859_15 < tab8859_15
+ * ./8bit_tab_to_h JISX0201.1976-0 jisx0201 < jis0201
+ * ./8bit_tab_to_h TIS620-0 tis620 < tabtis620
+ * ./8bit_tab_to_h KOI8-R koi8_r < tabkoi8_r
+ * ./8bit_tab_to_h KOI8-U koi8_u < tabkoi8_u
+ * ./8bit_tab_to_h ARMSCII-8 armscii_8 < tabarmscii_8
+ * ./8bit_tab_to_h CP1133 cp1133 < tabibm_cp1133
+ * ./8bit_tab_to_h MULELAO-1 mulelao < tabmulelao_1
+ * ./8bit_tab_to_h VISCII1.1-1 viscii1 < tabviscii
+ * ./8bit_tab_to_h TCVN-5712 tcvn < tabtcvn
+ * ./8bit_tab_to_h GEORGIAN-ACADEMY georgian_ac < tabgeorgian_academy
+ * ./8bit_tab_to_h GEORGIAN-PS georgian_ps < tabgeorgian_ps
+ *
+ * ./8bit_tab_to_h ISO-8859-1 iso8859_1 < 8859-1.TXT
+ * ./8bit_tab_to_h ISO-8859-2 iso8859_2 < 8859-2.TXT
+ * ./8bit_tab_to_h ISO-8859-3 iso8859_3 < 8859-3.TXT
+ * ./8bit_tab_to_h ISO-8859-4 iso8859_4 < 8859-4.TXT
+ * ./8bit_tab_to_h ISO-8859-5 iso8859_5 < 8859-5.TXT
+ * ./8bit_tab_to_h ISO-8859-6 iso8859_6 < 8859-6.TXT
+ * ./8bit_tab_to_h ISO-8859-7 iso8859_7 < 8859-7.TXT
+ * ./8bit_tab_to_h ISO-8859-8 iso8859_8 < 8859-8.TXT
+ * ./8bit_tab_to_h ISO-8859-9 iso8859_9 < 8859-9.TXT
+ * ./8bit_tab_to_h ISO-8859-10 iso8859_10 < 8859-10.TXT
+ * ./8bit_tab_to_h ISO-8859-14 iso8859_14 < 8859-14.TXT
+ * ./8bit_tab_to_h ISO-8859-15 iso8859_15 < 8859-15.TXT
+ * ./8bit_tab_to_h JISX0201.1976-0 jisx0201 < JIS0201.TXT
+ * ./8bit_tab_to_h KOI8-R koi8_r < KOI8-R.TXT
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+
+int main (int argc, char *argv[])
+{
+ const char* charsetname;
+ const char* c_charsetname;
+ const char* filename;
+ const char* directory;
+ int charset2uni[0x100];
+
+ if (argc != 3 && argc != 4 && argc != 5)
+ exit(1);
+ charsetname = argv[1];
+ c_charsetname = argv[2];
+ if (argc > 3) {
+ filename = argv[3];
+ } else {
+ char* s = (char*) malloc(strlen(c_charsetname)+strlen(".h")+1);
+ strcpy(s,c_charsetname); strcat(s,".h");
+ filename = s;
+ }
+ directory = (argc > 4 ? argv[4] : "");
+
+ fprintf(stderr, "Creating %s%s\n", directory, filename);
+
+ {
+ int i, c;
+ c = getc(stdin);
+ ungetc(c,stdin);
+ if (c == '#') {
+ /* Read a unicode.org style .TXT file. */
+ for (i = 0; i < 0x100; i++)
+ charset2uni[i] = 0xfffd;
+ for (;;) {
+ c = getc(stdin);
+ if (c == EOF)
+ break;
+ if (c == '\n' || c == ' ' || c == '\t')
+ continue;
+ if (c == '#') {
+ do { c = getc(stdin); } while (!(c == EOF || c == '\n'));
+ continue;
+ }
+ ungetc(c,stdin);
+ if (scanf("0x%x", &i) != 1 || !(i >= 0 && i < 0x100))
+ exit(1);
+ do { c = getc(stdin); } while (c == ' ' || c == '\t');
+ if (c != EOF)
+ ungetc(c,stdin);
+ if (c == '\n' || c == '#')
+ continue;
+ if (scanf("0x%x", &charset2uni[i]) != 1)
+ exit(1);
+ }
+ } else {
+ /* Read a table of hexadecimal Unicode values. */
+ for (i = 0; i < 0x100; i++) {
+ if (scanf("%x", &charset2uni[i]) != 1)
+ exit(1);
+ if (charset2uni[i] < 0 || charset2uni[i] == 0xffff)
+ charset2uni[i] = 0xfffd;
+ }
+ if (scanf("%x", &i) != EOF)
+ exit(1);
+ }
+ }
+
+ /* Write the output file. */
+ {
+ FILE* f;
+
+ {
+ char* fname = malloc(strlen(directory)+strlen(filename)+1);
+ strcpy(fname,directory); strcat(fname,filename);
+ f = fopen(fname,"w");
+ if (f == NULL)
+ exit(1);
+ }
+
+ fprintf(f, "\n");
+ fprintf(f, "/*\n");
+ fprintf(f, " * %s\n", charsetname);
+ fprintf(f, " */\n");
+ fprintf(f, "\n");
+
+ {
+ int i, i1, i2, i3;
+ int line[16];
+ int tableno;
+ struct { int minline; int maxline; } tables[16];
+ bool some_invalid;
+ bool final_ret_reached;
+
+ for (i1 = 0; i1 < 16; i1++) {
+ bool all_invalid = true;
+ bool all_identity = true;
+ for (i2 = 0; i2 < 16; i2++) {
+ i = 16*i1+i2;
+ if (charset2uni[i] != 0xfffd)
+ all_invalid = false;
+ if (charset2uni[i] != i)
+ all_identity = false;
+ }
+ if (all_invalid)
+ line[i1] = -2;
+ else if (all_identity)
+ line[i1] = -1;
+ else
+ line[i1] = 0;
+ }
+ tableno = 0;
+ for (i1 = 0; i1 < 16; i1++) {
+ if (line[i1] >= 0) {
+ if (i1 > 0 && tableno > 0 && line[i1-1] == tableno-1) {
+ line[i1] = tableno-1;
+ tables[tableno-1].maxline = i1;
+ } else {
+ tableno++;
+ line[i1] = tableno-1;
+ tables[tableno-1].minline = tables[tableno-1].maxline = i1;
+ }
+ }
+ }
+ some_invalid = false;
+ for (i = 0; i < 0x100; i++)
+ if (charset2uni[i] == 0xfffd)
+ some_invalid = true;
+ if (tableno > 0) {
+ int t;
+ for (t = 0; t < tableno; t++) {
+ fprintf(f, "static const unsigned short %s_2uni", c_charsetname);
+ if (tableno > 1)
+ fprintf(f, "_%d", t+1);
+ fprintf(f, "[%d] = {\n", 16*(tables[t].maxline-tables[t].minline+1));
+ for (i1 = tables[t].minline; i1 <= tables[t].maxline; i1++) {
+ fprintf(f, " /* 0x%02x */\n", 16*i1);
+ for (i2 = 0; i2 < 2; i2++) {
+ fprintf(f, " ");
+ for (i3 = 0; i3 < 8; i3++) {
+ i = 16*i1+8*i2+i3;
+ fprintf(f, " 0x%04x,", charset2uni[i]);
+ }
+ fprintf(f, "\n");
+ }
+ }
+ fprintf(f, "};\n");
+ }
+ fprintf(f, "\n");
+ }
+ final_ret_reached = false;
+ fprintf(f, "static int\n%s_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)\n", c_charsetname);
+ fprintf(f, "{\n");
+ fprintf(f, " unsigned char c = *s;\n");
+ if (some_invalid) {
+ for (i1 = 0; i1 < 16;) {
+ int t = line[i1];
+ const char* indent;
+ for (i2 = i1; i2 < 16 && line[i2] == t; i2++);
+ indent = (i1 == 0 && i2 == 16 ? " " : " ");
+ if (i1 == 0) {
+ if (i2 == 16) {
+ } else {
+ fprintf(f, " if (c < 0x%02x) {\n", 16*i2);
+ }
+ } else {
+ if (i2 == 16) {
+ fprintf(f, " else {\n");
+ } else {
+ fprintf(f, " else if (c < 0x%02x) {\n", 16*i2);
+ }
+ }
+ if (t == -2) {
+ final_ret_reached = true;
+ } else if (t == -1) {
+ fprintf(f, "%s*pwc = (ucs4_t) c;\n", indent);
+ fprintf(f, "%sreturn 1;\n", indent);
+ } else {
+ fprintf(f, "%s", indent);
+ some_invalid = false;
+ for (i = 16*i1; i < 16*i2; i++)
+ if (charset2uni[i] == 0xfffd)
+ some_invalid = true;
+ if (some_invalid)
+ fprintf(f, "unsigned short wc = ");
+ else
+ fprintf(f, "*pwc = (ucs4_t) ");
+ fprintf(f, "%s_2uni", c_charsetname);
+ if (tableno > 1)
+ fprintf(f, "_%d", t+1);
+ fprintf(f, "[c");
+ if (tables[t].minline > 0)
+ fprintf(f, "-0x%02x", 16*tables[t].minline);
+ fprintf(f, "];\n");
+ if (some_invalid) {
+ fprintf(f, "%sif (wc != 0xfffd) {\n", indent);
+ fprintf(f, "%s *pwc = (ucs4_t) wc;\n", indent);
+ fprintf(f, "%s return 1;\n", indent);
+ fprintf(f, "%s}\n", indent);
+ final_ret_reached = true;
+ } else {
+ fprintf(f, "%sreturn 1;\n", indent);
+ }
+ }
+ if (!(i1 == 0 && i2 == 16))
+ fprintf(f, " }\n");
+ i1 = i2;
+ }
+ if (final_ret_reached)
+ fprintf(f, " return RET_ILSEQ;\n");
+ } else {
+ for (i1 = 0; i1 < 16;) {
+ int t = line[i1];
+ for (i2 = i1; i2 < 16 && line[i2] == t; i2++);
+ if (i1 == 0) {
+ if (i2 == 16) {
+ fprintf(f, " ");
+ } else {
+ fprintf(f, " if (c < 0x%02x)\n ", 16*i2);
+ }
+ } else {
+ if (i2 == 16) {
+ fprintf(f, " else\n ");
+ } else {
+ fprintf(f, " else if (c < 0x%02x)\n ", 16*i2);
+ }
+ }
+ if (t == -1)
+ fprintf(f, "*pwc = (ucs4_t) c;\n");
+ else {
+ fprintf(f, "*pwc = (ucs4_t) %s_2uni", c_charsetname);
+ if (tableno > 1)
+ fprintf(f, "_%d", t+1);
+ fprintf(f, "[c");
+ if (tables[t].minline > 0)
+ fprintf(f, "-0x%02x", 16*tables[t].minline);
+ fprintf(f, "];\n");
+ }
+ i1 = i2;
+ }
+ fprintf(f, " return 1;\n");
+ }
+ fprintf(f, "}\n");
+
+ }
+
+ fprintf(f, "\n");
+
+ {
+ int uni2charset[0x10000];
+ bool pages[0x100];
+ int line[0x2000];
+ int tableno;
+ struct { int minline; int maxline; int usecount; const char* suffix; } tables[0x2000];
+ bool need_c;
+ bool fix_0000;
+ int i, j, p, j1, j2, t;
+
+ for (j = 0; j < 0x10000; j++)
+ uni2charset[j] = 0;
+ for (p = 0; p < 0x100; p++)
+ pages[p] = false;
+ for (i = 0; i < 0x100; i++) {
+ j = charset2uni[i];
+ if (j != 0xfffd) {
+ uni2charset[j] = i;
+ pages[j>>8] = true;
+ }
+ }
+ for (j1 = 0; j1 < 0x2000; j1++) {
+ bool all_invalid = true;
+ bool all_identity = true;
+ for (j2 = 0; j2 < 8; j2++) {
+ j = 8*j1+j2;
+ if (uni2charset[j] != 0)
+ all_invalid = false;
+ if (uni2charset[j] != j)
+ all_identity = false;
+ }
+ if (all_invalid)
+ line[j1] = -2;
+ else if (all_identity)
+ line[j1] = -1;
+ else
+ line[j1] = 0;
+ }
+ tableno = 0;
+ for (j1 = 0; j1 < 0x2000; j1++) {
+ if (line[j1] >= 0) {
+ if (tableno > 0
+ && ((j1 > 0 && line[j1-1] == tableno-1)
+ || ((tables[tableno-1].maxline >> 5) == (j1 >> 5)
+ && j1 - tables[tableno-1].maxline <= 8))) {
+ line[j1] = tableno-1;
+ tables[tableno-1].maxline = j1;
+ } else {
+ tableno++;
+ line[j1] = tableno-1;
+ tables[tableno-1].minline = tables[tableno-1].maxline = j1;
+ }
+ }
+ }
+ for (t = 0; t < tableno; t++) {
+ tables[t].usecount = 0;
+ j1 = 8*tables[t].minline;
+ j2 = 8*(tables[t].maxline+1);
+ for (j = j1; j < j2; j++)
+ if (uni2charset[j] != 0)
+ tables[t].usecount++;
+ }
+ for (t = 0, p = -1, i = 0; t < tableno; t++) {
+ if (tables[t].usecount > 1) {
+ char* s;
+ if (p == tables[t].minline >> 5) {
+ s = (char*) malloc(5+1);
+ sprintf(s, "%02x_%d", p, ++i);
+ } else {
+ p = tables[t].minline >> 5;
+ s = (char*) malloc(2+1);
+ sprintf(s, "%02x", p);
+ }
+ tables[t].suffix = s;
+ } else
+ tables[t].suffix = NULL;
+ }
+ {
+ p = -1;
+ for (t = 0; t < tableno; t++)
+ if (tables[t].usecount > 1) {
+ p = 0;
+ fprintf(f, "static const unsigned char %s_page%s[%d] = {\n", c_charsetname, tables[t].suffix, 8*(tables[t].maxline-tables[t].minline+1));
+ for (j1 = tables[t].minline; j1 <= tables[t].maxline; j1++) {
+ if ((j1 % 0x20) == 0 && j1 > tables[t].minline)
+ fprintf(f, " /* 0x%04x */\n", 8*j1);
+ fprintf(f, " ");
+ for (j2 = 0; j2 < 8; j2++) {
+ j = 8*j1+j2;
+ fprintf(f, " 0x%02x,", uni2charset[j]);
+ }
+ fprintf(f, " /* 0x%02x-0x%02x */\n", 8*(j1 % 0x20), 8*(j1 % 0x20)+7);
+ }
+ fprintf(f, "};\n");
+ }
+ if (p >= 0)
+ fprintf(f, "\n");
+ }
+ need_c = false;
+ for (j1 = 0; j1 < 0x2000;) {
+ t = line[j1];
+ for (j2 = j1; j2 < 0x2000 && line[j2] == t; j2++);
+ if (t >= 0)
+ j2 = tables[t].maxline+1;
+ if (!(t == -2 || (t == -1 && j1 == 0)))
+ need_c = true;
+ j1 = j2;
+ }
+ fix_0000 = false;
+ fprintf(f, "static int\n%s_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)\n", c_charsetname);
+ fprintf(f, "{\n");
+ if (need_c)
+ fprintf(f, " unsigned char c = 0;\n");
+ for (j1 = 0; j1 < 0x2000;) {
+ t = line[j1];
+ for (j2 = j1; j2 < 0x2000 && line[j2] == t; j2++);
+ if (t >= 0) {
+ if (j1 != tables[t].minline) abort();
+ if (j2 > tables[t].maxline+1) abort();
+ j2 = tables[t].maxline+1;
+ }
+ if (t == -2) {
+ } else {
+ if (j1 == 0)
+ fprintf(f, " ");
+ else
+ fprintf(f, " else ");
+ if (t >= 0 && tables[t].usecount == 0) abort();
+ if (t >= 0 && tables[t].usecount == 1) {
+ if (j2 != j1+1) abort();
+ for (j = 8*j1; j < 8*j2; j++)
+ if (uni2charset[j] != 0) {
+ fprintf(f, "if (wc == 0x%04x)\n c = 0x%02x;\n", j, uni2charset[j]);
+ break;
+ }
+ } else {
+ if (j1 == 0) {
+ fprintf(f, "if (wc < 0x%04x)", 8*j2);
+ } else {
+ fprintf(f, "if (wc >= 0x%04x && wc < 0x%04x)", 8*j1, 8*j2);
+ }
+ if (t == -1) {
+ if (j1 == 0)
+ /* If wc == 0, the function must return 1, not -1. */
+ fprintf(f, " {\n *r = wc;\n return 1;\n }\n");
+ else
+ fprintf(f, "\n c = wc;\n");
+ } else {
+ fprintf(f, "\n c = %s_page%s[wc", c_charsetname, tables[t].suffix);
+ if (tables[t].minline > 0)
+ fprintf(f, "-0x%04x", 8*j1);
+ fprintf(f, "];\n");
+ if (j1 == 0 && uni2charset[0] == 0)
+ /* If wc == 0, the function must return 1, not -1. */
+ fix_0000 = true;
+ }
+ }
+ }
+ j1 = j2;
+ }
+ if (need_c) {
+ if (fix_0000)
+ fprintf(f, " if (c != 0 || wc == 0) {\n");
+ else
+ fprintf(f, " if (c != 0) {\n");
+ fprintf(f, " *r = c;\n");
+ fprintf(f, " return 1;\n");
+ fprintf(f, " }\n");
+ }
+ fprintf(f, " return RET_ILSEQ;\n");
+ fprintf(f, "}\n");
+
+ }
+
+ if (ferror(f) || fclose(f))
+ exit(1);
+ }
+
+#if 0
+
+ int i1, i2, i3, i1_min, i1_max, j1, j2;
+
+ i1_min = 16;
+ i1_max = -1;
+ for (i1 = 0; i1 < 16; i1++)
+ for (i2 = 0; i2 < 16; i2++)
+ if (charset2uni[16*i1+i2] != 0xfffd) {
+ if (i1_min > i1) i1_min = i1;
+ if (i1_max < i1) i1_max = i1;
+ }
+ printf("static const unsigned short %s_2uni[%d] = {\n",
+ name, 16*(i1_max-i1_min+1));
+ for (i1 = i1_min; i1 <= i1_max; i1++) {
+ printf(" /""* 0x%02x *""/\n", 16*i1);
+ for (i2 = 0; i2 < 2; i2++) {
+ printf(" ");
+ for (i3 = 0; i3 < 8; i3++) {
+ if (i3 > 0) printf(" ");
+ printf("0x%04x,", charset2uni[16*i1+8*i2+i3]);
+ }
+ printf("\n");
+ }
+ }
+ printf("};\n");
+ printf("\n");
+
+ for (p = 0; p < 0x100; p++)
+ pages[p] = 0;
+ for (i = 0; i < 0x100; i++)
+ if (charset2uni[i] != 0xfffd)
+ pages[charset2uni[i]>>8] = 1;
+ for (p = 0; p < 0x100; p++)
+ if (pages[p]) {
+ int j1_min = 32;
+ int j1_max = -1;
+ for (j1 = 0; j1 < 32; j1++)
+ for (j2 = 0; j2 < 8; j2++)
+ if (uni2charset[256*p+8*j1+j2] != 0) {
+ if (j1_min > j1) j1_min = j1;
+ if (j1_max < j1) j1_max = j1;
+ }
+ printf("static const unsigned char %s_page%02x[%d] = {\n",
+ name, p, 8*(j1_max-j1_min+1));
+ for (j1 = j1_min; j1 <= j1_max; j1++) {
+ printf(" ");
+ for (j2 = 0; j2 < 8; j2++)
+ printf("0x%02x, ", uni2charset[256*p+8*j1+j2]);
+ printf("/""* 0x%02x-0x%02x *""/\n", 8*j1, 8*j1+7);
+ }
+ printf("};\n");
+ }
+ printf("\n");
+
+}
+#endif
+
+ exit(0);
+}
|