From 949f5e04efd80f2892f960f04a7454bc58f1d212 Mon Sep 17 00:00:00 2001
From: Mike Gabriel <mike.gabriel@das-netzwerkteam.de>
Date: Sat, 4 Feb 2017 10:52:05 +0100
Subject: doc/libNX_X11/lcUniConv: Move over the rather-documentary files
 8bit_tab_to_h.c and cjk_tab_to_h.c to nx-libs's doc/ folder.

---
 doc/libNX_X11/lcUniConv/8bit_tab_to_h.c |  535 +++++++++++++++
 doc/libNX_X11/lcUniConv/cjk_tab_to_h.c  | 1071 +++++++++++++++++++++++++++++++
 2 files changed, 1606 insertions(+)
 create mode 100644 doc/libNX_X11/lcUniConv/8bit_tab_to_h.c
 create mode 100644 doc/libNX_X11/lcUniConv/cjk_tab_to_h.c

(limited to 'doc')

diff --git a/doc/libNX_X11/lcUniConv/8bit_tab_to_h.c b/doc/libNX_X11/lcUniConv/8bit_tab_to_h.c
new file mode 100644
index 000000000..993979aeb
--- /dev/null
+++ b/doc/libNX_X11/lcUniConv/8bit_tab_to_h.c
@@ -0,0 +1,535 @@
+
+/*
+ * Generates an 8-bit character set table from a .TXT table as found on
+ * ftp.unicode.org or from a table containing the 256 Unicode values as
+ * hexadecimal integers.
+ * Examples:
+ *
+ *   ./8bit_tab_to_h ISO-8859-1 iso8859_1 < tab8859_1
+ *   ./8bit_tab_to_h ISO-8859-2 iso8859_2 < tab8859_2
+ *   ./8bit_tab_to_h ISO-8859-3 iso8859_3 < tab8859_3
+ *   ./8bit_tab_to_h ISO-8859-4 iso8859_4 < tab8859_4
+ *   ./8bit_tab_to_h ISO-8859-5 iso8859_5 < tab8859_5
+ *   ./8bit_tab_to_h ISO-8859-6 iso8859_6 < tab8859_6
+ *   ./8bit_tab_to_h ISO-8859-7 iso8859_7 < tab8859_7
+ *   ./8bit_tab_to_h ISO-8859-8 iso8859_8 < tab8859_8
+ *   ./8bit_tab_to_h ISO-8859-9 iso8859_9 < tab8859_9
+ *   ./8bit_tab_to_h ISO-8859-10 iso8859_10 < tab8859_10
+ *   ./8bit_tab_to_h ISO-8859-14 iso8859_14 < tab8859_14
+ *   ./8bit_tab_to_h ISO-8859-15 iso8859_15 < tab8859_15
+ *   ./8bit_tab_to_h JISX0201.1976-0 jisx0201 < jis0201
+ *   ./8bit_tab_to_h TIS620-0 tis620 < tabtis620
+ *   ./8bit_tab_to_h KOI8-R koi8_r < tabkoi8_r
+ *   ./8bit_tab_to_h KOI8-U koi8_u < tabkoi8_u
+ *   ./8bit_tab_to_h ARMSCII-8 armscii_8 < tabarmscii_8
+ *   ./8bit_tab_to_h CP1133 cp1133 < tabibm_cp1133
+ *   ./8bit_tab_to_h MULELAO-1 mulelao < tabmulelao_1
+ *   ./8bit_tab_to_h VISCII1.1-1 viscii1 < tabviscii
+ *   ./8bit_tab_to_h TCVN-5712 tcvn < tabtcvn
+ *   ./8bit_tab_to_h GEORGIAN-ACADEMY georgian_ac < tabgeorgian_academy
+ *   ./8bit_tab_to_h GEORGIAN-PS georgian_ps < tabgeorgian_ps
+ *
+ *   ./8bit_tab_to_h ISO-8859-1 iso8859_1 < 8859-1.TXT
+ *   ./8bit_tab_to_h ISO-8859-2 iso8859_2 < 8859-2.TXT
+ *   ./8bit_tab_to_h ISO-8859-3 iso8859_3 < 8859-3.TXT
+ *   ./8bit_tab_to_h ISO-8859-4 iso8859_4 < 8859-4.TXT
+ *   ./8bit_tab_to_h ISO-8859-5 iso8859_5 < 8859-5.TXT
+ *   ./8bit_tab_to_h ISO-8859-6 iso8859_6 < 8859-6.TXT
+ *   ./8bit_tab_to_h ISO-8859-7 iso8859_7 < 8859-7.TXT
+ *   ./8bit_tab_to_h ISO-8859-8 iso8859_8 < 8859-8.TXT
+ *   ./8bit_tab_to_h ISO-8859-9 iso8859_9 < 8859-9.TXT
+ *   ./8bit_tab_to_h ISO-8859-10 iso8859_10 < 8859-10.TXT
+ *   ./8bit_tab_to_h ISO-8859-14 iso8859_14 < 8859-14.TXT
+ *   ./8bit_tab_to_h ISO-8859-15 iso8859_15 < 8859-15.TXT
+ *   ./8bit_tab_to_h JISX0201.1976-0 jisx0201 < JIS0201.TXT
+ *   ./8bit_tab_to_h KOI8-R koi8_r < KOI8-R.TXT
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+
+int main (int argc, char *argv[])
+{
+  const char* charsetname;
+  const char* c_charsetname;
+  const char* filename;
+  const char* directory;
+  int charset2uni[0x100];
+
+  if (argc != 3 && argc != 4 && argc != 5)
+    exit(1);
+  charsetname = argv[1];
+  c_charsetname = argv[2];
+  if (argc > 3) {
+    filename = argv[3];
+  } else {
+    char* s = malloc(strlen(c_charsetname)+strlen(".h")+1);
+    strcpy(s,c_charsetname); strcat(s,".h");
+    filename = s;
+  }
+  directory = (argc > 4 ? argv[4] : "");
+
+  fprintf(stderr, "Creating %s%s\n", directory, filename);
+
+  {
+    int i, c;
+    c = getc(stdin);
+    ungetc(c,stdin);
+    if (c == '#') {
+      /* Read a unicode.org style .TXT file. */
+      for (i = 0; i < 0x100; i++)
+        charset2uni[i] = 0xfffd;
+      for (;;) {
+        c = getc(stdin);
+        if (c == EOF)
+          break;
+        if (c == '\n' || c == ' ' || c == '\t')
+          continue;
+        if (c == '#') {
+          do { c = getc(stdin); } while (!(c == EOF || c == '\n'));
+          continue;
+        }
+        ungetc(c,stdin);
+        if (scanf("0x%x", &i) != 1 || !(i >= 0 && i < 0x100))
+          exit(1);
+        do { c = getc(stdin); } while (c == ' ' || c == '\t');
+        if (c != EOF)
+          ungetc(c,stdin);
+        if (c == '\n' || c == '#')
+          continue;
+        if (scanf("0x%x", &charset2uni[i]) != 1)
+          exit(1);
+      }
+    } else {
+      /* Read a table of hexadecimal Unicode values. */
+      for (i = 0; i < 0x100; i++) {
+        if (scanf("%x", &charset2uni[i]) != 1)
+          exit(1);
+        if (charset2uni[i] < 0 || charset2uni[i] == 0xffff)
+          charset2uni[i] = 0xfffd;
+      }
+      if (scanf("%x", &i) != EOF)
+        exit(1);
+    }
+  }
+
+  /* Write the output file. */
+  {
+    FILE* f;
+
+    {
+      char* fname = malloc(strlen(directory)+strlen(filename)+1);
+      strcpy(fname,directory); strcat(fname,filename);
+      f = fopen(fname,"w");
+      if (f == NULL)
+        exit(1);
+    }
+
+    fprintf(f, "\n");
+    fprintf(f, "/*\n");
+    fprintf(f, " * %s\n", charsetname);
+    fprintf(f, " */\n");
+    fprintf(f, "\n");
+
+    {
+      int i, i1, i2, i3;
+      int line[16];
+      int tableno;
+      struct { int minline; int maxline; } tables[16];
+      bool some_invalid;
+      bool final_ret_reached;
+
+      for (i1 = 0; i1 < 16; i1++) {
+        bool all_invalid = true;
+        bool all_identity = true;
+        for (i2 = 0; i2 < 16; i2++) {
+          i = 16*i1+i2;
+          if (charset2uni[i] != 0xfffd)
+            all_invalid = false;
+          if (charset2uni[i] != i)
+            all_identity = false;
+        }
+        if (all_invalid)
+          line[i1] = -2;
+        else if (all_identity)
+          line[i1] = -1;
+        else
+          line[i1] = 0;
+      }
+      tableno = 0;
+      for (i1 = 0; i1 < 16; i1++) {
+        if (line[i1] >= 0) {
+          if (i1 > 0 && tableno > 0 && line[i1-1] == tableno-1) {
+            line[i1] = tableno-1;
+            tables[tableno-1].maxline = i1;
+          } else {
+            tableno++;
+            line[i1] = tableno-1;
+            tables[tableno-1].minline = tables[tableno-1].maxline = i1;
+          }
+        }
+      }
+      some_invalid = false;
+      for (i = 0; i < 0x100; i++)
+        if (charset2uni[i] == 0xfffd)
+          some_invalid = true;
+      if (tableno > 0) {
+        int t;
+        for (t = 0; t < tableno; t++) {
+          fprintf(f, "static const unsigned short %s_2uni", c_charsetname);
+          if (tableno > 1)
+            fprintf(f, "_%d", t+1);
+          fprintf(f, "[%d] = {\n", 16*(tables[t].maxline-tables[t].minline+1));
+          for (i1 = tables[t].minline; i1 <= tables[t].maxline; i1++) {
+            fprintf(f, "  /* 0x%02x */\n", 16*i1);
+            for (i2 = 0; i2 < 2; i2++) {
+              fprintf(f, " ");
+              for (i3 = 0; i3 < 8; i3++) {
+                i = 16*i1+8*i2+i3;
+                fprintf(f, " 0x%04x,", charset2uni[i]);
+              }
+              fprintf(f, "\n");
+            }
+          }
+          fprintf(f, "};\n");
+        }
+        fprintf(f, "\n");
+      }
+      final_ret_reached = false;
+      fprintf(f, "static int\n%s_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)\n", c_charsetname);
+      fprintf(f, "{\n");
+      fprintf(f, "  unsigned char c = *s;\n");
+      if (some_invalid) {
+        for (i1 = 0; i1 < 16;) {
+          int t = line[i1];
+          const char* indent;
+          for (i2 = i1; i2 < 16 && line[i2] == t; i2++);
+          indent = (i1 == 0 && i2 == 16 ? "  " : "    ");
+          if (i1 == 0) {
+            if (i2 == 16) {
+            } else {
+              fprintf(f, "  if (c < 0x%02x) {\n", 16*i2);
+            }
+          } else {
+            if (i2 == 16) {
+              fprintf(f, "  else {\n");
+            } else {
+              fprintf(f, "  else if (c < 0x%02x) {\n", 16*i2);
+            }
+          }
+          if (t == -2) {
+            final_ret_reached = true;
+          } else if (t == -1) {
+            fprintf(f, "%s*pwc = (ucs4_t) c;\n", indent);
+            fprintf(f, "%sreturn 1;\n", indent);
+          } else {
+            fprintf(f, "%s", indent);
+            some_invalid = false;
+            for (i = 16*i1; i < 16*i2; i++)
+              if (charset2uni[i] == 0xfffd)
+                some_invalid = true;
+            if (some_invalid)
+              fprintf(f, "unsigned short wc = ");
+            else
+              fprintf(f, "*pwc = (ucs4_t) ");
+            fprintf(f, "%s_2uni", c_charsetname);
+            if (tableno > 1)
+              fprintf(f, "_%d", t+1);
+            fprintf(f, "[c");
+            if (tables[t].minline > 0)
+              fprintf(f, "-0x%02x", 16*tables[t].minline);
+            fprintf(f, "];\n");
+            if (some_invalid) {
+              fprintf(f, "%sif (wc != 0xfffd) {\n", indent);
+              fprintf(f, "%s  *pwc = (ucs4_t) wc;\n", indent);
+              fprintf(f, "%s  return 1;\n", indent);
+              fprintf(f, "%s}\n", indent);
+              final_ret_reached = true;
+            } else {
+              fprintf(f, "%sreturn 1;\n", indent);
+            }
+          }
+          if (!(i1 == 0 && i2 == 16))
+            fprintf(f, "  }\n");
+          i1 = i2;
+        }
+        if (final_ret_reached)
+          fprintf(f, "  return RET_ILSEQ;\n");
+      } else {
+        for (i1 = 0; i1 < 16;) {
+          int t = line[i1];
+          for (i2 = i1; i2 < 16 && line[i2] == t; i2++);
+          if (i1 == 0) {
+            if (i2 == 16) {
+              fprintf(f, "  ");
+            } else {
+              fprintf(f, "  if (c < 0x%02x)\n    ", 16*i2);
+            }
+          } else {
+            if (i2 == 16) {
+              fprintf(f, "  else\n    ");
+            } else {
+              fprintf(f, "  else if (c < 0x%02x)\n    ", 16*i2);
+            }
+          }
+          if (t == -1)
+            fprintf(f, "*pwc = (ucs4_t) c;\n");
+          else {
+            fprintf(f, "*pwc = (ucs4_t) %s_2uni", c_charsetname);
+            if (tableno > 1)
+              fprintf(f, "_%d", t+1);
+            fprintf(f, "[c");
+            if (tables[t].minline > 0)
+              fprintf(f, "-0x%02x", 16*tables[t].minline);
+            fprintf(f, "];\n");
+          }
+          i1 = i2;
+        }
+        fprintf(f, "  return 1;\n");
+      }
+      fprintf(f, "}\n");
+
+    }
+
+    fprintf(f, "\n");
+
+    {
+      int uni2charset[0x10000];
+      bool pages[0x100];
+      int line[0x2000];
+      int tableno;
+      struct { int minline; int maxline; int usecount; const char* suffix; } tables[0x2000];
+      bool need_c;
+      bool fix_0000;
+      int i, j, p, j1, j2, t;
+
+      for (j = 0; j < 0x10000; j++)
+        uni2charset[j] = 0;
+      for (p = 0; p < 0x100; p++)
+        pages[p] = false;
+      for (i = 0; i < 0x100; i++) {
+        j = charset2uni[i];
+        if (j != 0xfffd) {
+          uni2charset[j] = i;
+          pages[j>>8] = true;
+        }
+      }
+      for (j1 = 0; j1 < 0x2000; j1++) {
+        bool all_invalid = true;
+        bool all_identity = true;
+        for (j2 = 0; j2 < 8; j2++) {
+          j = 8*j1+j2;
+          if (uni2charset[j] != 0)
+            all_invalid = false;
+          if (uni2charset[j] != j)
+            all_identity = false;
+        }
+        if (all_invalid)
+          line[j1] = -2;
+        else if (all_identity)
+          line[j1] = -1;
+        else
+          line[j1] = 0;
+      }
+      tableno = 0;
+      for (j1 = 0; j1 < 0x2000; j1++) {
+        if (line[j1] >= 0) {
+          if (tableno > 0
+              && ((j1 > 0 && line[j1-1] == tableno-1)
+                  || ((tables[tableno-1].maxline >> 5) == (j1 >> 5)
+                      && j1 - tables[tableno-1].maxline <= 8))) {
+            line[j1] = tableno-1;
+            tables[tableno-1].maxline = j1;
+          } else {
+            tableno++;
+            line[j1] = tableno-1;
+            tables[tableno-1].minline = tables[tableno-1].maxline = j1;
+          }
+        }
+      }
+      for (t = 0; t < tableno; t++) {
+        tables[t].usecount = 0;
+        j1 = 8*tables[t].minline;
+        j2 = 8*(tables[t].maxline+1);
+        for (j = j1; j < j2; j++)
+          if (uni2charset[j] != 0)
+            tables[t].usecount++;
+      }
+      for (t = 0, p = -1, i = 0; t < tableno; t++) {
+        if (tables[t].usecount > 1) {
+          char* s;
+          if (p == tables[t].minline >> 5) {
+            s = malloc(5+1);
+            sprintf(s, "%02x_%d", p, ++i);
+          } else {
+            p = tables[t].minline >> 5;
+            s = malloc(2+1);
+            sprintf(s, "%02x", p);
+          }
+          tables[t].suffix = s;
+        } else
+          tables[t].suffix = NULL;
+      }
+      {
+        p = -1;
+        for (t = 0; t < tableno; t++)
+          if (tables[t].usecount > 1) {
+            p = 0;
+            fprintf(f, "static const unsigned char %s_page%s[%d] = {\n", c_charsetname, tables[t].suffix, 8*(tables[t].maxline-tables[t].minline+1));
+            for (j1 = tables[t].minline; j1 <= tables[t].maxline; j1++) {
+              if ((j1 % 0x20) == 0 && j1 > tables[t].minline)
+                fprintf(f, "  /* 0x%04x */\n", 8*j1);
+              fprintf(f, " ");
+              for (j2 = 0; j2 < 8; j2++) {
+                j = 8*j1+j2;
+                fprintf(f, " 0x%02x,", uni2charset[j]);
+              }
+              fprintf(f, " /* 0x%02x-0x%02x */\n", 8*(j1 % 0x20), 8*(j1 % 0x20)+7);
+            }
+            fprintf(f, "};\n");
+          }
+        if (p >= 0)
+          fprintf(f, "\n");
+      }
+      need_c = false;
+      for (j1 = 0; j1 < 0x2000;) {
+        t = line[j1];
+        for (j2 = j1; j2 < 0x2000 && line[j2] == t; j2++);
+        if (t >= 0)
+          j2 = tables[t].maxline+1;
+        if (!(t == -2 || (t == -1 && j1 == 0)))
+          need_c = true;
+        j1 = j2;
+      }
+      fix_0000 = false;
+      fprintf(f, "static int\n%s_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)\n", c_charsetname);
+      fprintf(f, "{\n");
+      if (need_c)
+        fprintf(f, "  unsigned char c = 0;\n");
+      for (j1 = 0; j1 < 0x2000;) {
+        t = line[j1];
+        for (j2 = j1; j2 < 0x2000 && line[j2] == t; j2++);
+        if (t >= 0) {
+          if (j1 != tables[t].minline) abort();
+          if (j2 > tables[t].maxline+1) abort();
+          j2 = tables[t].maxline+1;
+        }
+        if (t == -2) {
+        } else {
+          if (j1 == 0)
+            fprintf(f, "  ");
+          else
+            fprintf(f, "  else ");
+          if (t >= 0 && tables[t].usecount == 0) abort();
+          if (t >= 0 && tables[t].usecount == 1) {
+            if (j2 != j1+1) abort();
+            for (j = 8*j1; j < 8*j2; j++)
+              if (uni2charset[j] != 0) {
+                fprintf(f, "if (wc == 0x%04x)\n    c = 0x%02x;\n", j, uni2charset[j]);
+                break;
+              }
+          } else {
+            if (j1 == 0) {
+              fprintf(f, "if (wc < 0x%04x)", 8*j2);
+            } else {
+              fprintf(f, "if (wc >= 0x%04x && wc < 0x%04x)", 8*j1, 8*j2);
+            }
+            if (t == -1) {
+              if (j1 == 0)
+                /* If wc == 0, the function must return 1, not -1. */
+                fprintf(f, " {\n    *r = wc;\n    return 1;\n  }\n");
+              else
+                fprintf(f, "\n    c = wc;\n");
+            } else {
+              fprintf(f, "\n    c = %s_page%s[wc", c_charsetname, tables[t].suffix);
+              if (tables[t].minline > 0)
+                fprintf(f, "-0x%04x", 8*j1);
+              fprintf(f, "];\n");
+              if (j1 == 0 && uni2charset[0] == 0)
+                /* If wc == 0, the function must return 1, not -1. */
+                fix_0000 = true;
+            }
+          }
+        }
+        j1 = j2;
+      }
+      if (need_c) {
+        if (fix_0000)
+          fprintf(f, "  if (c != 0 || wc == 0) {\n");
+        else
+          fprintf(f, "  if (c != 0) {\n");
+        fprintf(f, "    *r = c;\n");
+        fprintf(f, "    return 1;\n");
+        fprintf(f, "  }\n");
+      }
+      fprintf(f, "  return RET_ILSEQ;\n");
+      fprintf(f, "}\n");
+
+    }
+
+    if (ferror(f) || fclose(f))
+      exit(1);
+  }
+
+#if 0
+
+    int i1, i2, i3, i1_min, i1_max, j1, j2;
+
+  i1_min = 16;
+  i1_max = -1;
+  for (i1 = 0; i1 < 16; i1++)
+    for (i2 = 0; i2 < 16; i2++)
+      if (charset2uni[16*i1+i2] != 0xfffd) {
+        if (i1_min > i1) i1_min = i1;
+        if (i1_max < i1) i1_max = i1;
+      }
+  printf("static const unsigned short %s_2uni[%d] = {\n",
+         name, 16*(i1_max-i1_min+1));
+  for (i1 = i1_min; i1 <= i1_max; i1++) {
+    printf("  /""* 0x%02x *""/\n", 16*i1);
+    for (i2 = 0; i2 < 2; i2++) {
+      printf("  ");
+      for (i3 = 0; i3 < 8; i3++) {
+        if (i3 > 0) printf(" ");
+        printf("0x%04x,", charset2uni[16*i1+8*i2+i3]);
+      }
+      printf("\n");
+    }
+  }
+  printf("};\n");
+  printf("\n");
+
+  for (p = 0; p < 0x100; p++)
+    pages[p] = 0;
+  for (i = 0; i < 0x100; i++)
+    if (charset2uni[i] != 0xfffd)
+      pages[charset2uni[i]>>8] = 1;
+  for (p = 0; p < 0x100; p++)
+    if (pages[p]) {
+      int j1_min = 32;
+      int j1_max = -1;
+      for (j1 = 0; j1 < 32; j1++)
+        for (j2 = 0; j2 < 8; j2++)
+          if (uni2charset[256*p+8*j1+j2] != 0) {
+            if (j1_min > j1) j1_min = j1;
+            if (j1_max < j1) j1_max = j1;
+          }
+      printf("static const unsigned char %s_page%02x[%d] = {\n",
+             name, p, 8*(j1_max-j1_min+1));
+      for (j1 = j1_min; j1 <= j1_max; j1++) {
+        printf("  ");
+        for (j2 = 0; j2 < 8; j2++)
+          printf("0x%02x, ", uni2charset[256*p+8*j1+j2]);
+        printf("/""* 0x%02x-0x%02x *""/\n", 8*j1, 8*j1+7);
+      }
+      printf("};\n");
+    }
+  printf("\n");
+
+}
+#endif
+
+  exit(0);
+}
diff --git a/doc/libNX_X11/lcUniConv/cjk_tab_to_h.c b/doc/libNX_X11/lcUniConv/cjk_tab_to_h.c
new file mode 100644
index 000000000..f70fe5e11
--- /dev/null
+++ b/doc/libNX_X11/lcUniConv/cjk_tab_to_h.c
@@ -0,0 +1,1071 @@
+
+/*
+ * Generates a CJK character set table from a .TXT table as found on
+ * ftp.unicode.org or in the X nls directory.
+ * Examples:
+ *
+ *   ./cjk_tab_to_h GB2312.1980-0 gb2312 > gb2312.h < gb2312
+ *   ./cjk_tab_to_h JISX0208.1983-0 jisx0208 > jisx0208.h < jis0208
+ *   ./cjk_tab_to_h KSC5601.1987-0 ksc5601 > ksc5601.h < ksc5601
+ *
+ *   ./cjk_tab_to_h GB2312.1980-0 gb2312 > gb2312.h < GB2312.TXT
+ *   ./cjk_tab_to_h JISX0208.1983-0 jisx0208 > jisx0208.h < JIS0208.TXT
+ *   ./cjk_tab_to_h JISX0212.1990-0 jisx0212 > jisx0212.h < JIS0212.TXT
+ *   ./cjk_tab_to_h KSC5601.1987-0 ksc5601 > ksc5601.h < KSC5601.TXT
+ *   ./cjk_tab_to_h KSX1001.1992-0 ksc5601 > ksc5601.h < KSX1001.TXT
+ *
+ *   ./cjk_tab_to_h BIG5 big5 > big5.h < BIG5.TXT
+ *
+ *   ./cjk_tab_to_h JOHAB johab > johab.h < JOHAB.TXT
+ *
+ *   ./cjk_tab_to_h BIG5HKSCS-0 big5hkscs >big5hkscs.h < BIG5HKSCS.TXT
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+
+typedef struct {
+  int start;
+  int end;
+} Block;
+
+typedef struct {
+  int rows;    /* number of possible values for the 1st byte */
+  int cols;    /* number of possible values for the 2nd byte */
+  int (*row_byte) (int row); /* returns the 1st byte value for a given row */
+  int (*col_byte) (int col); /* returns the 2nd byte value for a given col */
+  int (*byte_row) (int byte); /* converts a 1st byte value to a row, else -1 */
+  int (*byte_col) (int byte); /* converts a 2nd byte value to a col, else -1 */
+  const char* check_row_expr; /* format string for 1st byte value checking */
+  const char* check_col_expr; /* format string for 2nd byte value checking */
+  const char* byte_row_expr; /* format string for 1st byte value to row */
+  const char* byte_col_expr; /* format string for 2nd byte value to col */
+  int** charset2uni; /* charset2uni[0..rows-1][0..cols-1] is valid */
+  /* You'll understand the terms "row" and "col" when you buy Ken Lunde's book.
+     Once a row is fixed, choosing a "col" is the same as choosing a "cell". */
+  int* charsetpage; /* charsetpage[0..rows]: how large is a page for a row */
+  int ncharsetblocks;
+  Block* charsetblocks; /* blocks[0..nblocks-1] */
+  int* uni2charset; /* uni2charset[0x0000..0xffff] */
+} Encoding;
+
+/*
+ * Outputs the file title.
+ */
+static void output_title (const char *charsetname)
+{
+  printf("\n");
+  printf("/*\n");
+  printf(" * %s\n", charsetname);
+  printf(" */\n");
+  printf("\n");
+}
+
+/*
+ * Reads the charset2uni table from standard input.
+ */
+static void read_table (Encoding* enc)
+{
+  int row, col, i, i1, i2, c, j;
+
+  enc->charset2uni = malloc(enc->rows*sizeof(int*));
+  for (row = 0; row < enc->rows; row++)
+    enc->charset2uni[row] = malloc(enc->cols*sizeof(int));
+
+  for (row = 0; row < enc->rows; row++)
+    for (col = 0; col < enc->cols; col++)
+      enc->charset2uni[row][col] = 0xfffd;
+
+  c = getc(stdin);
+  ungetc(c,stdin);
+  if (c == '#') {
+    /* Read a unicode.org style .TXT file. */
+    for (;;) {
+      c = getc(stdin);
+      if (c == EOF)
+        break;
+      if (c == '\n' || c == ' ' || c == '\t')
+        continue;
+      if (c == '#') {
+        do { c = getc(stdin); } while (!(c == EOF || c == '\n'));
+        continue;
+      }
+      ungetc(c,stdin);
+      if (scanf("0x%x", &j) != 1)
+        exit(1);
+      i1 = j >> 8;
+      i2 = j & 0xff;
+      row = enc->byte_row(i1);
+      col = enc->byte_col(i2);
+      if (row < 0 || col < 0) {
+        fprintf(stderr, "lost entry for %02x %02x\n", i1, i2);
+        exit(1);
+      }
+      if (scanf(" 0x%x", &enc->charset2uni[row][col]) != 1)
+        exit(1);
+    }
+  } else {
+    /* Read a table of hexadecimal Unicode values. */
+    for (i1 = 32; i1 < 132; i1++)
+      for (i2 = 32; i2 < 132; i2++) {
+        i = scanf("%x", &j);
+        if (i == EOF)
+          goto read_done;
+        if (i != 1)
+          exit(1);
+        if (j < 0 || j == 0xffff)
+          j = 0xfffd;
+        if (j != 0xfffd) {
+          if (enc->byte_row(i1) < 0 || enc->byte_col(i2) < 0) {
+            fprintf(stderr, "lost entry at %02x %02x\n", i1, i2);
+            exit (1);
+          }
+          enc->charset2uni[enc->byte_row(i1)][enc->byte_col(i2)] = j;
+        }
+      }
+   read_done: ;
+  }
+}
+
+/*
+ * Computes the charsetpage[0..rows] array.
+ */
+static void find_charset2uni_pages (Encoding* enc)
+{
+  int row, col;
+
+  enc->charsetpage = malloc((enc->rows+1)*sizeof(int));
+
+  for (row = 0; row <= enc->rows; row++)
+    enc->charsetpage[row] = 0;
+
+  for (row = 0; row < enc->rows; row++) {
+    int used = 0;
+    for (col = 0; col < enc->cols; col++)
+      if (enc->charset2uni[row][col] != 0xfffd)
+        used = col+1;
+    enc->charsetpage[row] = used;
+  }
+}
+
+/*
+ * Fills in nblocks and blocks.
+ */
+static void find_charset2uni_blocks (Encoding* enc)
+{
+  int n, row, lastrow;
+
+  enc->charsetblocks = malloc(enc->rows*sizeof(Block));
+
+  n = 0;
+  for (row = 0; row < enc->rows; row++)
+    if (enc->charsetpage[row] > 0 && (row == 0 || enc->charsetpage[row-1] == 0)) {
+      for (lastrow = row; enc->charsetpage[lastrow+1] > 0; lastrow++);
+      enc->charsetblocks[n].start = row * enc->cols;
+      enc->charsetblocks[n].end = lastrow * enc->cols + enc->charsetpage[lastrow];
+      n++;
+    }
+  enc->ncharsetblocks = n;
+}
+
+/*
+ * Outputs the charset to unicode table and function.
+ */
+static void output_charset2uni (const char* name, Encoding* enc)
+{
+  int row, col, lastrow, col_max, i, i1_min, i1_max;
+
+  find_charset2uni_pages(enc);
+
+  find_charset2uni_blocks(enc);
+
+  for (row = 0; row < enc->rows; row++)
+    if (enc->charsetpage[row] > 0) {
+      if (row == 0 || enc->charsetpage[row-1] == 0) {
+        /* Start a new block. */
+        for (lastrow = row; enc->charsetpage[lastrow+1] > 0; lastrow++);
+        printf("static const unsigned short %s_2uni_page%02x[%d] = {\n",
+               name, enc->row_byte(row),
+               (lastrow-row) * enc->cols + enc->charsetpage[lastrow]);
+      }
+      printf("  /""* 0x%02x *""/\n ", enc->row_byte(row));
+      col_max = (enc->charsetpage[row+1] > 0 ? enc->cols : enc->charsetpage[row]);
+      for (col = 0; col < col_max; col++) {
+        printf(" 0x%04x,", enc->charset2uni[row][col]);
+        if ((col % 8) == 7 && (col+1 < col_max)) printf("\n ");
+      }
+      printf("\n");
+      if (enc->charsetpage[row+1] == 0) {
+        /* End a block. */
+        printf("};\n");
+      }
+    }
+  printf("\n");
+
+  printf("static int\n");
+  printf("%s_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)\n", name);
+  printf("{\n");
+  printf("  unsigned char c1 = s[0];\n");
+  printf("  if (");
+  for (i = 0; i < enc->ncharsetblocks; i++) {
+    i1_min = enc->row_byte(enc->charsetblocks[i].start / enc->cols);
+    i1_max = enc->row_byte((enc->charsetblocks[i].end-1) / enc->cols);
+    if (i > 0)
+      printf(" || ");
+    if (i1_min == i1_max)
+      printf("(c1 == 0x%02x)", i1_min);
+    else
+      printf("(c1 >= 0x%02x && c1 <= 0x%02x)", i1_min, i1_max);
+  }
+  printf(") {\n");
+  printf("    if (n >= 2) {\n");
+  printf("      unsigned char c2 = s[1];\n");
+  printf("      if (");
+  printf(enc->check_col_expr, "c2");
+  printf(") {\n");
+  printf("        unsigned int i = %d * (", enc->cols);
+  printf(enc->byte_row_expr, "c1");
+  printf(") + (");
+  printf(enc->byte_col_expr, "c2");
+  printf(");\n");
+  printf("        unsigned short wc = 0xfffd;\n");
+  for (i = 0; i < enc->ncharsetblocks; i++) {
+    printf("        ");
+    if (i > 0)
+      printf("} else ");
+    if (i < enc->ncharsetblocks-1)
+      printf("if (i < %d) ", enc->charsetblocks[i+1].start);
+    printf("{\n");
+    printf("          if (i < %d)\n", enc->charsetblocks[i].end);
+    printf("            wc = %s_2uni_page%02x[i", name, enc->row_byte(enc->charsetblocks[i].start / enc->cols));
+    if (enc->charsetblocks[i].start > 0)
+      printf("-%d", enc->charsetblocks[i].start);
+    printf("];\n");
+  }
+  printf("        }\n");
+  printf("        if (wc != 0xfffd) {\n");
+  printf("          *pwc = (ucs4_t) wc;\n");
+  printf("          return 2;\n");
+  printf("        }\n");
+  printf("      }\n");
+  printf("      return RET_ILSEQ;\n");
+  printf("    }\n");
+  printf("    return RET_TOOFEW(0);\n");
+  printf("  }\n");
+  printf("  return RET_ILSEQ;\n");
+  printf("}\n");
+  printf("\n");
+}
+
+/*
+ * Computes the uni2charset[0x0000..0xffff] array.
+ */
+static void invert (Encoding* enc)
+{
+  int row, col, j;
+
+  enc->uni2charset = malloc(0x10000*sizeof(int));
+
+  for (j = 0; j < 0x10000; j++)
+    enc->uni2charset[j] = 0;
+
+  for (row = 0; row < enc->rows; row++)
+    for (col = 0; col < enc->cols; col++) {
+      j = enc->charset2uni[row][col];
+      if (j != 0xfffd)
+        enc->uni2charset[j] = 0x100 * enc->row_byte(row) + enc->col_byte(col);
+    }
+}
+
+/*
+ * Outputs the unicode to charset table and function, using a linear array.
+ * (Suitable if the table is dense.)
+ */
+static void output_uni2charset_dense (const char* name, Encoding* enc)
+{
+  /* Like in 8bit_tab_to_h.c */
+  bool pages[0x100];
+  int line[0x2000];
+  int tableno;
+  struct { int minline; int maxline; int usecount; } tables[0x2000];
+  bool first;
+  int row, col, j, p, j1, j2, t;
+
+  for (p = 0; p < 0x100; p++)
+    pages[p] = false;
+  for (row = 0; row < enc->rows; row++)
+    for (col = 0; col < enc->cols; col++) {
+      j = enc->charset2uni[row][col];
+      if (j != 0xfffd)
+        pages[j>>8] = true;
+    }
+  for (j1 = 0; j1 < 0x2000; j1++) {
+    bool all_invalid = true;
+    for (j2 = 0; j2 < 8; j2++) {
+      j = 8*j1+j2;
+      if (enc->uni2charset[j] != 0)
+        all_invalid = false;
+    }
+    if (all_invalid)
+      line[j1] = -1;
+    else
+      line[j1] = 0;
+  }
+  tableno = 0;
+  for (j1 = 0; j1 < 0x2000; j1++) {
+    if (line[j1] >= 0) {
+      if (tableno > 0
+          && ((j1 > 0 && line[j1-1] == tableno-1)
+              || ((tables[tableno-1].maxline >> 5) == (j1 >> 5)
+                  && j1 - tables[tableno-1].maxline <= 8))) {
+        line[j1] = tableno-1;
+        tables[tableno-1].maxline = j1;
+      } else {
+        tableno++;
+        line[j1] = tableno-1;
+        tables[tableno-1].minline = tables[tableno-1].maxline = j1;
+      }
+    }
+  }
+  for (t = 0; t < tableno; t++) {
+    tables[t].usecount = 0;
+    j1 = 8*tables[t].minline;
+    j2 = 8*(tables[t].maxline+1);
+    for (j = j1; j < j2; j++)
+      if (enc->uni2charset[j] != 0)
+        tables[t].usecount++;
+  }
+  {
+    p = -1;
+    for (t = 0; t < tableno; t++)
+      if (tables[t].usecount > 1) {
+        p = tables[t].minline >> 5;
+        printf("static const unsigned short %s_page%02x[%d] = {\n", name, p, 8*(tables[t].maxline-tables[t].minline+1));
+        for (j1 = tables[t].minline; j1 <= tables[t].maxline; j1++) {
+          if ((j1 % 0x20) == 0 && j1 > tables[t].minline)
+            printf("  /* 0x%04x */\n", 8*j1);
+          printf(" ");
+          for (j2 = 0; j2 < 8; j2++) {
+            j = 8*j1+j2;
+            printf(" 0x%04x,", enc->uni2charset[j]);
+          }
+          printf(" /*0x%02x-0x%02x*/\n", 8*(j1 % 0x20), 8*(j1 % 0x20)+7);
+        }
+        printf("};\n");
+      }
+    if (p >= 0)
+      printf("\n");
+  }
+  printf("static int\n%s_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)\n", name);
+  printf("{\n");
+  printf("  if (n >= 2) {\n");
+  printf("    unsigned short c = 0;\n");
+  first = true;
+  for (j1 = 0; j1 < 0x2000;) {
+    t = line[j1];
+    for (j2 = j1; j2 < 0x2000 && line[j2] == t; j2++);
+    if (t >= 0) {
+      if (j1 != tables[t].minline) abort();
+      if (j2 > tables[t].maxline+1) abort();
+      j2 = tables[t].maxline+1;
+      if (first)
+        printf("    ");
+      else
+        printf("    else ");
+      first = false;
+      if (tables[t].usecount == 0) abort();
+      if (tables[t].usecount == 1) {
+        if (j2 != j1+1) abort();
+        for (j = 8*j1; j < 8*j2; j++)
+          if (enc->uni2charset[j] != 0) {
+            printf("if (wc == 0x%04x)\n      c = 0x%02x;\n", j, enc->uni2charset[j]);
+            break;
+          }
+      } else {
+        if (j1 == 0) {
+          printf("if (wc < 0x%04x)", 8*j2);
+        } else {
+          printf("if (wc >= 0x%04x && wc < 0x%04x)", 8*j1, 8*j2);
+        }
+        printf("\n      c = %s_page%02x[wc", name, j1 >> 5);
+        if (tables[t].minline > 0)
+          printf("-0x%04x", 8*j1);
+        printf("];\n");
+      }
+    }
+    j1 = j2;
+  }
+  printf("    if (c != 0) {\n");
+  printf("      r[0] = (c >> 8); r[1] = (c & 0xff);\n");
+  printf("      return 2;\n");
+  printf("    }\n");
+  printf("    return RET_ILSEQ;\n");
+  printf("  }\n");
+  printf("  return RET_TOOSMALL;\n");
+  printf("}\n");
+}
+
+/*
+ * Outputs the unicode to charset table and function, using a packed array.
+ * (Suitable if the table is sparse.)
+ */
+static void output_uni2charset_sparse (const char* name, Encoding* enc)
+{
+  bool pages[0x100];
+  Block pageblocks[0x100]; int npageblocks;
+  int indx2charset[0x10000];
+  int summary_indx[0x1000];
+  int summary_used[0x1000];
+  int i, row, col, j, p, j1, j2, indx;
+
+  /* Fill pages[0x100]. */
+  for (p = 0; p < 0x100; p++)
+    pages[p] = false;
+  for (row = 0; row < enc->rows; row++)
+    for (col = 0; col < enc->cols; col++) {
+      j = enc->charset2uni[row][col];
+      if (j != 0xfffd)
+        pages[j>>8] = true;
+    }
+
+#if 0
+  for (p = 0; p < 0x100; p++)
+    if (pages[p]) {
+      printf("static const unsigned short %s_page%02x[256] = {\n", name, p);
+      for (j1 = 0; j1 < 32; j1++) {
+        printf("  ");
+        for (j2 = 0; j2 < 8; j2++)
+          printf("0x%04x, ", enc->uni2charset[256*p+8*j1+j2]);
+        printf("/""*0x%02x-0x%02x*""/\n", 8*j1, 8*j1+7);
+      }
+      printf("};\n");
+    }
+  printf("\n");
+#endif
+
+  /* Fill summary_indx[] and summary_used[]. */
+  indx = 0;
+  for (j1 = 0; j1 < 0x1000; j1++) {
+    summary_indx[j1] = indx;
+    summary_used[j1] = 0;
+    for (j2 = 0; j2 < 16; j2++) {
+      j = 16*j1+j2;
+      if (enc->uni2charset[j] != 0) {
+        indx2charset[indx++] = enc->uni2charset[j];
+        summary_used[j1] |= (1 << j2);
+      }
+    }
+  }
+
+  /* Fill npageblocks and pageblocks[]. */
+  npageblocks = 0;
+  for (p = 0; p < 0x100; ) {
+    if (pages[p] && (p == 0 || !pages[p-1])) {
+      pageblocks[npageblocks].start = 16*p;
+      do p++; while (p < 0x100 && pages[p]);
+      j1 = 16*p;
+      while (summary_used[j1-1] == 0) j1--;
+      pageblocks[npageblocks].end = j1;
+      npageblocks++;
+    } else
+      p++;
+  }
+
+  printf("static const unsigned short %s_2charset[%d] = {\n", name, indx);
+  for (i = 0; i < indx; ) {
+    if ((i % 8) == 0) printf(" ");
+    printf(" 0x%04x,", indx2charset[i]);
+    i++;
+    if ((i % 8) == 0 || i == indx) printf("\n");
+  }
+  printf("};\n");
+  printf("\n");
+  for (i = 0; i < npageblocks; i++) {
+    printf("static const Summary16 %s_uni2indx_page%02x[%d] = {\n", name,
+           pageblocks[i].start/16, pageblocks[i].end-pageblocks[i].start);
+    for (j1 = pageblocks[i].start; j1 < pageblocks[i].end; ) {
+      if (((16*j1) % 0x100) == 0) printf("  /""* 0x%04x *""/\n", 16*j1);
+      if ((j1 % 4) == 0) printf(" ");
+      printf(" { %4d, 0x%04x },", summary_indx[j1], summary_used[j1]);
+      j1++;
+      if ((j1 % 4) == 0 || j1 == pageblocks[i].end) printf("\n");
+    }
+    printf("};\n");
+  }
+  printf("\n");
+
+  printf("static int\n");
+  printf("%s_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)\n", name);
+  printf("{\n");
+  printf("  if (n >= 2) {\n");
+  printf("    const Summary16 *summary = NULL;\n");
+  for (i = 0; i < npageblocks; i++) {
+    printf("    ");
+    if (i > 0)
+      printf("else ");
+    printf("if (wc >= 0x%04x && wc < 0x%04x)\n",
+           16*pageblocks[i].start, 16*pageblocks[i].end);
+    printf("      summary = &%s_uni2indx_page%02x[(wc>>4)", name,
+           pageblocks[i].start/16);
+    if (pageblocks[i].start > 0)
+      printf("-0x%03x", pageblocks[i].start);
+    printf("];\n");
+  }
+  printf("    if (summary) {\n");
+  printf("      unsigned short used = summary->used;\n");
+  printf("      unsigned int i = wc & 0x0f;\n");
+  printf("      if (used & ((unsigned short) 1 << i)) {\n");
+  printf("        unsigned short c;\n");
+  printf("        /* Keep in `used' only the bits 0..i-1. */\n");
+  printf("        used &= ((unsigned short) 1 << i) - 1;\n");
+  printf("        /* Add `summary->indx' and the number of bits set in `used'. */\n");
+  printf("        used = (used & 0x5555) + ((used & 0xaaaa) >> 1);\n");
+  printf("        used = (used & 0x3333) + ((used & 0xcccc) >> 2);\n");
+  printf("        used = (used & 0x0f0f) + ((used & 0xf0f0) >> 4);\n");
+  printf("        used = (used & 0x00ff) + (used >> 8);\n");
+  printf("        c = %s_2charset[summary->indx + used];\n", name);
+  printf("        r[0] = (c >> 8); r[1] = (c & 0xff);\n");
+  printf("        return 2;\n");
+  printf("      }\n");
+  printf("    }\n");
+  printf("    return RET_ILSEQ;\n");
+  printf("  }\n");
+  printf("  return RET_TOOSMALL;\n");
+  printf("}\n");
+}
+
+/* ISO-2022/EUC specifics */
+
+static int row_byte_normal (int row) { return 0x21+row; }
+static int col_byte_normal (int col) { return 0x21+col; }
+static int byte_row_normal (int byte) { return byte-0x21; }
+static int byte_col_normal (int byte) { return byte-0x21; }
+
+static void do_normal (const char* name)
+{
+  Encoding enc;
+
+  enc.rows = 94;
+  enc.cols = 94;
+  enc.row_byte = row_byte_normal;
+  enc.col_byte = col_byte_normal;
+  enc.byte_row = byte_row_normal;
+  enc.byte_col = byte_col_normal;
+  enc.check_row_expr = "%1$s >= 0x21 && %1$s < 0x7f";
+  enc.check_col_expr = "%1$s >= 0x21 && %1$s < 0x7f";
+  enc.byte_row_expr = "%1$s - 0x21";
+  enc.byte_col_expr = "%1$s - 0x21";
+
+  read_table(&enc);
+  output_charset2uni(name,&enc);
+  invert(&enc); output_uni2charset_sparse(name,&enc);
+}
+
+/* Note: On first sight, the jisx0212_2charset[] table seems to be in order,
+   starting from the charset=0x3021/uni=0x4e02 pair. But it's only mostly in
+   order. There are 75 out-of-order values, scattered all throughout the table.
+ */
+
+static void do_normal_only_charset2uni (const char* name)
+{
+  Encoding enc;
+
+  enc.rows = 94;
+  enc.cols = 94;
+  enc.row_byte = row_byte_normal;
+  enc.col_byte = col_byte_normal;
+  enc.byte_row = byte_row_normal;
+  enc.byte_col = byte_col_normal;
+  enc.check_row_expr = "%1$s >= 0x21 && %1$s < 0x7f";
+  enc.check_col_expr = "%1$s >= 0x21 && %1$s < 0x7f";
+  enc.byte_row_expr = "%1$s - 0x21";
+  enc.byte_col_expr = "%1$s - 0x21";
+
+  read_table(&enc);
+  output_charset2uni(name,&enc);
+}
+
+/* CNS 11643 specifics - trick to put two tables into one */
+
+static int row_byte_cns11643 (int row) {
+  return 0x100 * (row / 94) + (row % 94) + 0x21;
+}
+static int byte_row_cns11643 (int byte) {
+  return (byte >= 0x100 && byte < 0x200 ? byte-0x121 :
+          byte >= 0x200 && byte < 0x300 ? byte-0x221+94 :
+          byte >= 0x300 && byte < 0x400 ? byte-0x321+2*94 :
+          -1);
+}
+
+static void do_cns11643_only_uni2charset (const char* name)
+{
+  Encoding enc;
+  int j, x;
+
+  enc.rows = 3*94;
+  enc.cols = 94;
+  enc.row_byte = row_byte_cns11643;
+  enc.col_byte = col_byte_normal;
+  enc.byte_row = byte_row_cns11643;
+  enc.byte_col = byte_col_normal;
+  enc.check_row_expr = "%1$s >= 0x21 && %1$s < 0x7f";
+  enc.check_col_expr = "%1$s >= 0x21 && %1$s < 0x7f";
+  enc.byte_row_expr = "%1$s - 0x21";
+  enc.byte_col_expr = "%1$s - 0x21";
+
+  read_table(&enc);
+  invert(&enc);
+  /* Move the 2 plane bits into the unused bits 15 and 7. */
+  for (j = 0; j < 0x10000; j++) {
+    x = enc.uni2charset[j];
+    if (x != 0) {
+      if (x & 0x8080) abort();
+      switch (x >> 16) {
+        case 0: /* plane 1 */ x = (x & 0xffff) | 0x0000; break;
+        case 1: /* plane 2 */ x = (x & 0xffff) | 0x0080; break;
+        case 2: /* plane 3 */ x = (x & 0xffff) | 0x8000; break;
+        default: abort();
+      }
+      enc.uni2charset[j] = x;
+    }
+  }
+  output_uni2charset_sparse(name,&enc);
+}
+
+/* GBK specifics */
+
+static int row_byte_gbk1 (int row) {
+  return 0x81+row;
+}
+static int col_byte_gbk1 (int col) {
+  return (col >= 0x3f ? 0x41 : 0x40) + col;
+}
+static int byte_row_gbk1 (int byte) {
+  if (byte >= 0x81 && byte < 0xff)
+    return byte-0x81;
+  else
+    return -1;
+}
+static int byte_col_gbk1 (int byte) {
+  if (byte >= 0x40 && byte < 0x7f)
+    return byte-0x40;
+  else if (byte >= 0x80 && byte < 0xff)
+    return byte-0x41;
+  else
+    return -1;
+}
+
+static void do_gbk1 (const char* name)
+{
+  Encoding enc;
+
+  enc.rows = 126;
+  enc.cols = 190;
+  enc.row_byte = row_byte_gbk1;
+  enc.col_byte = col_byte_gbk1;
+  enc.byte_row = byte_row_gbk1;
+  enc.byte_col = byte_col_gbk1;
+  enc.check_row_expr = "%1$s >= 0x81 && %1$s < 0xff";
+  enc.check_col_expr = "(%1$s >= 0x40 && %1$s < 0x7f) || (%1$s >= 0x80 && %1$s < 0xff)";
+  enc.byte_row_expr = "%1$s - 0x81";
+  enc.byte_col_expr = "%1$s - (%1$s >= 0x80 ? 0x41 : 0x40)";
+
+  read_table(&enc);
+  output_charset2uni(name,&enc);
+  invert(&enc); output_uni2charset_dense(name,&enc);
+}
+
+static void do_gbk1_only_charset2uni (const char* name)
+{
+  Encoding enc;
+
+  enc.rows = 126;
+  enc.cols = 190;
+  enc.row_byte = row_byte_gbk1;
+  enc.col_byte = col_byte_gbk1;
+  enc.byte_row = byte_row_gbk1;
+  enc.byte_col = byte_col_gbk1;
+  enc.check_row_expr = "%1$s >= 0x81 && %1$s < 0xff";
+  enc.check_col_expr = "(%1$s >= 0x40 && %1$s < 0x7f) || (%1$s >= 0x80 && %1$s < 0xff)";
+  enc.byte_row_expr = "%1$s - 0x81";
+  enc.byte_col_expr = "%1$s - (%1$s >= 0x80 ? 0x41 : 0x40)";
+
+  read_table(&enc);
+  output_charset2uni(name,&enc);
+}
+
+static int row_byte_gbk2 (int row) {
+  return 0x81+row;
+}
+static int col_byte_gbk2 (int col) {
+  return (col >= 0x3f ? 0x41 : 0x40) + col;
+}
+static int byte_row_gbk2 (int byte) {
+  if (byte >= 0x81 && byte < 0xff)
+    return byte-0x81;
+  else
+    return -1;
+}
+static int byte_col_gbk2 (int byte) {
+  if (byte >= 0x40 && byte < 0x7f)
+    return byte-0x40;
+  else if (byte >= 0x80 && byte < 0xa1)
+    return byte-0x41;
+  else
+    return -1;
+}
+
+static void do_gbk2_only_charset2uni (const char* name)
+{
+  Encoding enc;
+
+  enc.rows = 126;
+  enc.cols = 96;
+  enc.row_byte = row_byte_gbk2;
+  enc.col_byte = col_byte_gbk2;
+  enc.byte_row = byte_row_gbk2;
+  enc.byte_col = byte_col_gbk2;
+  enc.check_row_expr = "%1$s >= 0x81 && %1$s < 0xff";
+  enc.check_col_expr = "(%1$s >= 0x40 && %1$s < 0x7f) || (%1$s >= 0x80 && %1$s < 0xa1)";
+  enc.byte_row_expr = "%1$s - 0x81";
+  enc.byte_col_expr = "%1$s - (%1$s >= 0x80 ? 0x41 : 0x40)";
+
+  read_table(&enc);
+  output_charset2uni(name,&enc);
+}
+
+static void do_gbk1_only_uni2charset (const char* name)
+{
+  Encoding enc;
+
+  enc.rows = 126;
+  enc.cols = 190;
+  enc.row_byte = row_byte_gbk1;
+  enc.col_byte = col_byte_gbk1;
+  enc.byte_row = byte_row_gbk1;
+  enc.byte_col = byte_col_gbk1;
+  enc.check_row_expr = "%1$s >= 0x81 && %1$s < 0xff";
+  enc.check_col_expr = "(%1$s >= 0x40 && %1$s < 0x7f) || (%1$s >= 0x80 && %1$s < 0xff)";
+  enc.byte_row_expr = "%1$s - 0x81";
+  enc.byte_col_expr = "%1$s - (%1$s >= 0x80 ? 0x41 : 0x40)";
+
+  read_table(&enc);
+  invert(&enc); output_uni2charset_sparse(name,&enc);
+}
+
+/* KSC 5601 specifics */
+
+/*
+ * Reads the charset2uni table from standard input.
+ */
+static void read_table_ksc5601 (Encoding* enc)
+{
+  int row, col, i, i1, i2, c, j;
+
+  enc->charset2uni = malloc(enc->rows*sizeof(int*));
+  for (row = 0; row < enc->rows; row++)
+    enc->charset2uni[row] = malloc(enc->cols*sizeof(int));
+
+  for (row = 0; row < enc->rows; row++)
+    for (col = 0; col < enc->cols; col++)
+      enc->charset2uni[row][col] = 0xfffd;
+
+  c = getc(stdin);
+  ungetc(c,stdin);
+  if (c == '#') {
+    /* Read a unicode.org style .TXT file. */
+    for (;;) {
+      c = getc(stdin);
+      if (c == EOF)
+        break;
+      if (c == '\n' || c == ' ' || c == '\t')
+        continue;
+      if (c == '#') {
+        do { c = getc(stdin); } while (!(c == EOF || c == '\n'));
+        continue;
+      }
+      ungetc(c,stdin);
+      if (scanf("0x%x", &j) != 1)
+        exit(1);
+      i1 = j >> 8;
+      i2 = j & 0xff;
+      if (scanf(" 0x%x", &j) != 1)
+        exit(1);
+      /* Take only the range covered by KS C 5601.1987-0 = KS C 5601.1989-0
+         = KS X 1001.1992, ignore the rest. */
+      if (!(i1 >= 128+33 && i1 < 128+127 && i2 >= 128+33 && i2 < 128+127))
+        continue;  /* KSC5601 specific */
+      i1 &= 0x7f;  /* KSC5601 specific */
+      i2 &= 0x7f;  /* KSC5601 specific */
+      row = enc->byte_row(i1);
+      col = enc->byte_col(i2);
+      if (row < 0 || col < 0) {
+        fprintf(stderr, "lost entry for %02x %02x\n", i1, i2);
+        exit(1);
+      }
+      enc->charset2uni[row][col] = j;
+    }
+  } else {
+    /* Read a table of hexadecimal Unicode values. */
+    for (i1 = 33; i1 < 127; i1++)
+      for (i2 = 33; i2 < 127; i2++) {
+        i = scanf("%x", &j);
+        if (i == EOF)
+          goto read_done;
+        if (i != 1)
+          exit(1);
+        if (j < 0 || j == 0xffff)
+          j = 0xfffd;
+        if (j != 0xfffd) {
+          if (enc->byte_row(i1) < 0 || enc->byte_col(i2) < 0) {
+            fprintf(stderr, "lost entry at %02x %02x\n", i1, i2);
+            exit (1);
+          }
+          enc->charset2uni[enc->byte_row(i1)][enc->byte_col(i2)] = j;
+        }
+      }
+   read_done: ;
+  }
+}
+
+static void do_ksc5601 (const char* name)
+{
+  Encoding enc;
+
+  enc.rows = 94;
+  enc.cols = 94;
+  enc.row_byte = row_byte_normal;
+  enc.col_byte = col_byte_normal;
+  enc.byte_row = byte_row_normal;
+  enc.byte_col = byte_col_normal;
+  enc.check_row_expr = "%1$s >= 0x21 && %1$s < 0x7f";
+  enc.check_col_expr = "%1$s >= 0x21 && %1$s < 0x7f";
+  enc.byte_row_expr = "%1$s - 0x21";
+  enc.byte_col_expr = "%1$s - 0x21";
+
+  read_table_ksc5601(&enc);
+  output_charset2uni(name,&enc);
+  invert(&enc); output_uni2charset_sparse(name,&enc);
+}
+
+/* Big5 specifics */
+
+static int row_byte_big5 (int row) {
+  return 0xa1+row;
+}
+static int col_byte_big5 (int col) {
+  return (col >= 0x3f ? 0x62 : 0x40) + col;
+}
+static int byte_row_big5 (int byte) {
+  if (byte >= 0xa1 && byte < 0xff)
+    return byte-0xa1;
+  else
+    return -1;
+}
+static int byte_col_big5 (int byte) {
+  if (byte >= 0x40 && byte < 0x7f)
+    return byte-0x40;
+  else if (byte >= 0xa1 && byte < 0xff)
+    return byte-0x62;
+  else
+    return -1;
+}
+
+static void do_big5 (const char* name)
+{
+  Encoding enc;
+
+  enc.rows = 94;
+  enc.cols = 157;
+  enc.row_byte = row_byte_big5;
+  enc.col_byte = col_byte_big5;
+  enc.byte_row = byte_row_big5;
+  enc.byte_col = byte_col_big5;
+  enc.check_row_expr = "%1$s >= 0xa1 && %1$s < 0xff";
+  enc.check_col_expr = "(%1$s >= 0x40 && %1$s < 0x7f) || (%1$s >= 0xa1 && %1$s < 0xff)";
+  enc.byte_row_expr = "%1$s - 0xa1";
+  enc.byte_col_expr = "%1$s - (%1$s >= 0xa1 ? 0x62 : 0x40)";
+
+  read_table(&enc);
+  output_charset2uni(name,&enc);
+  invert(&enc); output_uni2charset_sparse(name,&enc);
+}
+
+/* Big5-HKSCS specifics */
+
+static int row_byte_big5hkscs (int row) {
+  return 0x81+row;
+}
+static int col_byte_big5hkscs (int col) {
+  return (col >= 0x3f ? 0x62 : 0x40) + col;
+}
+static int byte_row_big5hkscs (int byte) {
+  if (byte >= 0x81 && byte < 0xff)
+    return byte-0x81;
+  else
+    return -1;
+}
+static int byte_col_big5hkscs (int byte) {
+  if (byte >= 0x40 && byte < 0x7f)
+    return byte-0x40;
+  else if (byte >= 0xa1 && byte < 0xff)
+    return byte-0x62;
+  else
+    return -1;
+}
+
+static void do_big5hkscs (const char* name)
+{
+  Encoding enc;
+
+  enc.rows = 126;
+  enc.cols = 157;
+  enc.row_byte = row_byte_big5hkscs;
+  enc.col_byte = col_byte_big5hkscs;
+  enc.byte_row = byte_row_big5hkscs;
+  enc.byte_col = byte_col_big5hkscs;
+  enc.check_row_expr = "%1$s >= 0x81 && %1$s < 0xff";
+  enc.check_col_expr = "(%1$s >= 0x40 && %1$s < 0x7f) || (%1$s >= 0xa1 && %1$s < 0xff)";
+  enc.byte_row_expr = "%1$s - 0x81";
+  enc.byte_col_expr = "%1$s - (%1$s >= 0xa1 ? 0x62 : 0x40)";
+
+  read_table(&enc);
+  output_charset2uni(name,&enc);
+  invert(&enc); output_uni2charset_sparse(name,&enc);
+}
+
+/* Johab Hangul specifics */
+
+static int row_byte_johab_hangul (int row) {
+  return 0x84+row;
+}
+static int col_byte_johab_hangul (int col) {
+  return (col >= 0x3e ? 0x43 : 0x41) + col;
+}
+static int byte_row_johab_hangul (int byte) {
+  if (byte >= 0x84 && byte < 0xd4)
+    return byte-0x84;
+  else
+    return -1;
+}
+static int byte_col_johab_hangul (int byte) {
+  if (byte >= 0x41 && byte < 0x7f)
+    return byte-0x41;
+  else if (byte >= 0x81 && byte < 0xff)
+    return byte-0x43;
+  else
+    return -1;
+}
+
+static void do_johab_hangul (const char* name)
+{
+  Encoding enc;
+
+  enc.rows = 80;
+  enc.cols = 188;
+  enc.row_byte = row_byte_johab_hangul;
+  enc.col_byte = col_byte_johab_hangul;
+  enc.byte_row = byte_row_johab_hangul;
+  enc.byte_col = byte_col_johab_hangul;
+  enc.check_row_expr = "%1$s >= 0x84 && %1$s < 0xd4";
+  enc.check_col_expr = "(%1$s >= 0x41 && %1$s < 0x7f) || (%1$s >= 0x81 && %1$s < 0xff)";
+  enc.byte_row_expr = "%1$s - 0x84";
+  enc.byte_col_expr = "%1$s - (%1$s >= 0x81 ? 0x43 : 0x41)";
+
+  read_table(&enc);
+  output_charset2uni(name,&enc);
+  invert(&enc); output_uni2charset_dense(name,&enc);
+}
+
+/* SJIS specifics */
+
+static int row_byte_sjis (int row) {
+  return (row >= 0x1f ? 0xc1 : 0x81) + row;
+}
+static int col_byte_sjis (int col) {
+  return (col >= 0x3f ? 0x41 : 0x40) + col;
+}
+static int byte_row_sjis (int byte) {
+  if (byte >= 0x81 && byte < 0xa0)
+    return byte-0x81;
+  else if (byte >= 0xe0)
+    return byte-0xc1;
+  else
+    return -1;
+}
+static int byte_col_sjis (int byte) {
+  if (byte >= 0x40 && byte < 0x7f)
+    return byte-0x40;
+  else if (byte >= 0x80 && byte < 0xfd)
+    return byte-0x41;
+  else
+    return -1;
+}
+
+static void do_sjis (const char* name)
+{
+  Encoding enc;
+
+  enc.rows = 94;
+  enc.cols = 188;
+  enc.row_byte = row_byte_sjis;
+  enc.col_byte = col_byte_sjis;
+  enc.byte_row = byte_row_sjis;
+  enc.byte_col = byte_col_sjis;
+  enc.check_row_expr = "(%1$s >= 0x81 && %1$s < 0xa0) || (%1$s >= 0xe0)";
+  enc.check_col_expr = "(%1$s >= 0x40 && %1$s < 0x7f) || (%1$s >= 0x80 && %1$s < 0xfd)";
+  enc.byte_row_expr = "%1$s - (%1$s >= 0xe0 ? 0xc1 : 0x81)";
+  enc.byte_col_expr = "%1$s - (%1$s >= 0x80 ? 0x41 : 0x40)";
+
+  read_table(&enc);
+  output_charset2uni(name,&enc);
+  invert(&enc); output_uni2charset_sparse(name,&enc);
+}
+
+/* Main program */
+
+int main (int argc, char *argv[])
+{
+  const char* charsetname;
+  const char* name;
+
+  if (argc != 3)
+    exit(1);
+  charsetname = argv[1];
+  name = argv[2];
+
+  output_title(charsetname);
+
+  if (!strcmp(name,"gb2312") || !strcmp(name,"gb12345ext")
+      || !strcmp(name,"jisx0208") || !strcmp(name,"jisx0212"))
+    do_normal(name);
+  else if (!strcmp(name,"cns11643_1") || !strcmp(name,"cns11643_2")
+           || !strcmp(name,"cns11643_3"))
+    do_normal_only_charset2uni(name);
+  else if (!strcmp(name,"cns11643_inv"))
+    do_cns11643_only_uni2charset(name);
+  else if (!strcmp(name,"gbkext1"))
+    do_gbk1_only_charset2uni(name);
+  else if (!strcmp(name,"gbkext2"))
+    do_gbk2_only_charset2uni(name);
+  else if (!strcmp(name,"gbkext_inv"))
+    do_gbk1_only_uni2charset(name);
+  else if (!strcmp(name,"cp936ext"))
+    do_gbk1(name);
+  else if (!strcmp(name,"ksc5601"))
+    do_ksc5601(name);
+  else if (!strcmp(name,"big5") || !strcmp(name,"cp950ext"))
+    do_big5(name);
+  else if (!strcmp(name,"big5hkscs"))
+    do_big5hkscs(name);
+  else if (!strcmp(name,"johab_hangul"))
+    do_johab_hangul(name);
+  else if (!strcmp(name,"cp932ext"))
+    do_sjis(name);
+  else
+    exit(1);
+
+  return 0;
+}
-- 
cgit v1.2.3