diff options
Diffstat (limited to 'libX11/src/xlibi18n/lcCT.c')
-rw-r--r-- | libX11/src/xlibi18n/lcCT.c | 2606 |
1 files changed, 1303 insertions, 1303 deletions
diff --git a/libX11/src/xlibi18n/lcCT.c b/libX11/src/xlibi18n/lcCT.c index 2fbe8aa88..a230567f9 100644 --- a/libX11/src/xlibi18n/lcCT.c +++ b/libX11/src/xlibi18n/lcCT.c @@ -1,1303 +1,1303 @@ -/* - * Copyright 1992, 1993 by TOSHIBA Corp. - * - * Permission to use, copy, modify, and distribute this software and its - * documentation for any purpose and without fee is hereby granted, provided - * that the above copyright notice appear in all copies and that both that - * copyright notice and this permission notice appear in supporting - * documentation, and that the name of TOSHIBA not be used in advertising - * or publicity pertaining to distribution of the software without specific, - * written prior permission. TOSHIBA make no representations about the - * suitability of this software for any purpose. It is provided "as is" - * without express or implied warranty. - * - * TOSHIBA DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING - * ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL - * TOSHIBA BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR - * ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, - * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, - * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS - * SOFTWARE. - * - * Author: Katsuhisa Yano TOSHIBA Corp. - * mopi@osa.ilab.toshiba.co.jp - */ -/* - * Copyright 1995 by FUJITSU LIMITED - * This is source code modified by FUJITSU LIMITED under the Joint - * Development Agreement for the CDE/Motif PST. - * - * Modifier: Takanori Tateno FUJITSU LIMITED - * - */ -/* - * 2000 - * Modifier: Ivan Pascal The XFree86 Project - * Modifier: Bruno Haible The XFree86 Project - */ - -#ifdef HAVE_CONFIG_H -#include <config.h> -#endif -#include "Xlibint.h" -#include "XlcPubI.h" -#include <X11/Xos.h> -#include <stdio.h> - - -/* ====================== Built-in Character Sets ====================== */ - -/* - * Static representation of a character set that can be used in Compound Text. - */ -typedef struct _CTDataRec { - const char name[19]; - const char ct_sequence[5]; /* Compound Text encoding, ESC sequence */ -} CTDataRec, *CTData; - -static const CTDataRec default_ct_data[] = -{ - /* */ - /* X11 registry name MIME name ISO-IR ESC sequence */ - /* */ - - /* Registered character sets with one byte per character */ - { "ISO8859-1:GL", /* US-ASCII 6 */ "\033(B" }, - { "ISO8859-1:GR", /* ISO-8859-1 100 */ "\033-A" }, - { "ISO8859-2:GR", /* ISO-8859-2 101 */ "\033-B" }, - { "ISO8859-3:GR", /* ISO-8859-3 109 */ "\033-C" }, - { "ISO8859-4:GR", /* ISO-8859-4 110 */ "\033-D" }, - { "ISO8859-5:GR", /* ISO-8859-5 144 */ "\033-L" }, - { "ISO8859-6:GR", /* ISO-8859-6 127 */ "\033-G" }, - { "ISO8859-7:GR", /* ISO-8859-7 126 */ "\033-F" }, - { "ISO8859-8:GR", /* ISO-8859-8 138 */ "\033-H" }, - { "ISO8859-9:GR", /* ISO-8859-9 148 */ "\033-M" }, - { "ISO8859-10:GR", /* ISO-8859-10 157 */ "\033-V" }, - { "ISO8859-11:GR", /* ISO-8859-11 166 */ "\033-T" }, - { "ISO8859-13:GR", /* ISO-8859-13 179 */ "\033-Y" }, - { "ISO8859-14:GR", /* ISO-8859-14 199 */ "\033-_" }, - { "ISO8859-15:GR", /* ISO-8859-15 203 */ "\033-b" }, - { "ISO8859-16:GR", /* ISO-8859-16 226 */ "\033-f" }, - { "JISX0201.1976-0:GL", /* ISO-646-JP 14 */ "\033(J" }, - { "JISX0201.1976-0:GR", "\033)I" }, -#if 0 - { "TIS620-0:GR", /* TIS-620 166 */ "\033-T" }, -#endif - - /* Registered character sets with two byte per character */ - { "GB2312.1980-0:GL", /* GB_2312-80 58 */ "\033$(A" }, - { "GB2312.1980-0:GR", /* GB_2312-80 58 */ "\033$)A" }, - { "JISX0208.1983-0:GL", /* JIS_X0208-1983 87 */ "\033$(B" }, - { "JISX0208.1983-0:GR", /* JIS_X0208-1983 87 */ "\033$)B" }, - { "JISX0208.1990-0:GL", /* JIS_X0208-1990 168 */ "\033$(B" }, - { "JISX0208.1990-0:GR", /* JIS_X0208-1990 168 */ "\033$)B" }, - { "JISX0212.1990-0:GL", /* JIS_X0212-1990 159 */ "\033$(D" }, - { "JISX0212.1990-0:GR", /* JIS_X0212-1990 159 */ "\033$)D" }, - { "KSC5601.1987-0:GL", /* KS_C_5601-1987 149 */ "\033$(C" }, - { "KSC5601.1987-0:GR", /* KS_C_5601-1987 149 */ "\033$)C" }, - { "CNS11643.1986-1:GL", /* CNS 11643-1992 pl.1 171 */ "\033$(G" }, - { "CNS11643.1986-1:GR", /* CNS 11643-1992 pl.1 171 */ "\033$)G" }, - { "CNS11643.1986-2:GL", /* CNS 11643-1992 pl.2 172 */ "\033$(H" }, - { "CNS11643.1986-2:GR", /* CNS 11643-1992 pl.2 172 */ "\033$)H" }, - { "CNS11643.1992-3:GL", /* CNS 11643-1992 pl.3 183 */ "\033$(I" }, - { "CNS11643.1992-3:GR", /* CNS 11643-1992 pl.3 183 */ "\033$)I" }, - { "CNS11643.1992-4:GL", /* CNS 11643-1992 pl.4 184 */ "\033$(J" }, - { "CNS11643.1992-4:GR", /* CNS 11643-1992 pl.4 184 */ "\033$)J" }, - { "CNS11643.1992-5:GL", /* CNS 11643-1992 pl.5 185 */ "\033$(K" }, - { "CNS11643.1992-5:GR", /* CNS 11643-1992 pl.5 185 */ "\033$)K" }, - { "CNS11643.1992-6:GL", /* CNS 11643-1992 pl.6 186 */ "\033$(L" }, - { "CNS11643.1992-6:GR", /* CNS 11643-1992 pl.6 186 */ "\033$)L" }, - { "CNS11643.1992-7:GL", /* CNS 11643-1992 pl.7 187 */ "\033$(M" }, - { "CNS11643.1992-7:GR", /* CNS 11643-1992 pl.7 187 */ "\033$)M" }, - - /* Registered encodings with a varying number of bytes per character */ - { "ISO10646-1", /* UTF-8 196 */ "\033%G" }, - - /* Encodings without ISO-IR assigned escape sequence must be - defined in XLC_LOCALE files, using "\033%/1" or "\033%/2". */ - - /* Backward compatibility with XFree86 3.x */ -#if 1 - { "ISO8859-14:GR", "\033%/1" }, - { "ISO8859-15:GR", "\033%/1" }, -#endif - /* For use by utf8 -> ctext */ - { "BIG5-0:GLGR", "\033%/2"}, - { "BIG5HKSCS-0:GLGR", "\033%/2"}, - { "GBK-0:GLGR", "\033%/2"}, - /* used by Emacs, but not backed by ISO-IR */ - { "BIG5-E0:GL", "\033$(0" }, - { "BIG5-E0:GR", "\033$)0" }, - { "BIG5-E1:GL", "\033$(1" }, - { "BIG5-E1:GR", "\033$)1" }, - -}; - -/* We represent UTF-8 as an XlcGLGR charset, not in extended segments. */ -#define UTF8_IN_EXTSEQ 0 - -/* ======================= Parsing ESC Sequences ======================= */ - -#define XctC0 0x0000 -#define XctHT 0x0009 -#define XctNL 0x000a -#define XctESC 0x001b -#define XctGL 0x0020 -#define XctC1 0x0080 -#define XctCSI 0x009b -#define XctGR 0x00a0 -#define XctSTX 0x0002 - -#define XctCntrlFunc 0x0023 -#define XctMB 0x0024 -#define XctOtherCoding 0x0025 -#define XctGL94 0x0028 -#define XctGR94 0x0029 -#define XctGR96 0x002d -#define XctNonStandard 0x002f -#define XctIgnoreExt 0x0030 -#define XctNotIgnoreExt 0x0031 -#define XctLeftToRight 0x0031 -#define XctRightToLeft 0x0032 -#define XctDirection 0x005d -#define XctDirectionEnd 0x005d - -#define XctGL94MB 0x2428 -#define XctGR94MB 0x2429 -#define XctExtSeg 0x252f -#define XctReturn 0x2540 - -/* - * Parses the header of a Compound Text segment, i.e. the charset designator. - * The string starts at *text and has *length bytes. - * Return value is one of: - * 0 (no valid charset designator), - * XctGL94, XctGR94, XctGR96, XctGL94MB, XctGR94MB, - * XctLeftToRight, XctRightToLeft, XctDirectionEnd, - * XctExtSeg, XctOtherCoding, XctReturn, XctIgnoreExt, XctNotIgnoreExt. - * If the return value is not 0, *text is incremented and *length decremented, - * to point past the charset designator. If the return value is one of - * XctGL94, XctGR94, XctGR96, XctGL94MB, XctGR94MB, - * XctExtSeg, XctOtherCoding, XctIgnoreExt, XctNotIgnoreExt, - * *final_byte is set to the "final byte" of the charset designator. - */ -static unsigned int -_XlcParseCT( - const char **text, - int *length, - unsigned char *final_byte) -{ - unsigned int ret = 0; - unsigned char ch; - const unsigned char *str = (const unsigned char *) *text; - - *final_byte = 0; - - if (*length < 1) - return 0; - switch (ch = *str++) { - case XctESC: - if (*length < 2) - return 0; - switch (ch = *str++) { - case XctOtherCoding: /* % */ - if (*length < 3) - return 0; - ch = *str++; - if (ch == XctNonStandard) { /* / */ - if (*length < 4) - return 0; - ret = XctExtSeg; - ch = *str++; - } else if (ch == '@') { - ret = XctReturn; - } else { - ret = XctOtherCoding; - } - *final_byte = ch; - break; - - case XctCntrlFunc: /* # */ - if (*length < 4) - return 0; - *final_byte = *str++; - switch (*str++) { - case XctIgnoreExt: /* 0 */ - ret = XctIgnoreExt; - break; - case XctNotIgnoreExt: /* 1 */ - ret = XctNotIgnoreExt; - break; - default: - ret = 0; - break; - } - break; - - case XctMB: /* $ */ - if (*length < 4) - return 0; - ch = *str++; - switch (ch) { - case XctGL94: /* ( */ - ret = XctGL94MB; - break; - case XctGR94: /* ) */ - ret = XctGR94MB; - break; - default: - ret = 0; - break; - } - *final_byte = *str++; - break; - - case XctGL94: /* ( */ - if (*length < 3) - return 0; - ret = XctGL94; - *final_byte = *str++; - break; - case XctGR94: /* ) */ - if (*length < 3) - return 0; - ret = XctGR94; - *final_byte = *str++; - break; - case XctGR96: /* - */ - if (*length < 3) - return 0; - ret = XctGR96; - *final_byte = *str++; - break; - } - break; - case XctCSI: - /* direction */ - if (*length < 2) - return 0; - switch (*str++) { - case XctLeftToRight: - if (*length < 3) - return 0; - if (*str++ == XctDirection) - ret = XctLeftToRight; - break; - case XctRightToLeft: - if (*length < 3) - return 0; - if (*str++ == XctDirection) - ret = XctRightToLeft; - break; - case XctDirectionEnd: - ret = XctDirectionEnd; - break; - } - break; - } - - if (ret) { - *length -= (const char *) str - *text; - *text = (const char *) str; - } - return ret; -} - -/* - * Fills into a freshly created XlcCharSet the fields that can be inferred - * from the ESC sequence. These are side, char_size, set_size. - * Returns True if the charset can be used with Compound Text. - * - * Used by _XlcCreateDefaultCharSet. - */ -Bool -_XlcParseCharSet( - XlcCharSet charset) -{ - unsigned int type; - unsigned char final_byte; - const char *ptr = charset->ct_sequence; - int length; - int char_size; - - if (*ptr == '\0') - return False; - - length = strlen(ptr); - - type = _XlcParseCT(&ptr, &length, &final_byte); - - /* Check for validity and determine char_size. - char_size = 0 means varying number of bytes per character. */ - switch (type) { - case XctGL94: - case XctGR94: - case XctGR96: - char_size = 1; - break; - case XctGL94MB: - case XctGR94MB: - char_size = (final_byte < 0x60 ? 2 : final_byte < 0x70 ? 3 : 4); - break; - case XctExtSeg: - char_size = final_byte - '0'; - if (!(char_size >= 0 && char_size <= 4)) - return False; - break; - case XctOtherCoding: - char_size = 0; - break; - default: - return False; - } - - charset->char_size = char_size; - - /* Fill in other values. */ - switch (type) { - case XctGL94: - case XctGL94MB: - charset->side = XlcGL; - charset->set_size = 94; - break; - case XctGR94: - case XctGR94MB: - charset->side = XlcGR; - charset->set_size = 94; - break; - case XctGR96: - charset->side = XlcGR; - charset->set_size = 96; - break; - case XctExtSeg: - case XctOtherCoding: - charset->side = XlcGLGR; - charset->set_size = 0; - break; - } - return True; -} - - -/* =============== Management of the List of Character Sets =============== */ - -/* - * Representation of a character set that can be used for Compound Text, - * at run time. - * Note: This information is not contained in the XlcCharSet, because - * multiple ESC sequences may be used for the same XlcCharSet. - */ -typedef struct _CTInfoRec { - XlcCharSet charset; - const char *ct_sequence; /* Compound Text ESC sequence */ - unsigned int type; - unsigned char final_byte; - /* If type == XctExtSeg: */ - const char *ext_segment; /* extended segment name, then '\002' */ - int ext_segment_len; /* length of above, including final '\002' */ - - struct _CTInfoRec *next; -} CTInfoRec, *CTInfo; - -/* - * List of character sets that can be used for Compound Text, - * Includes all that are listed in default_ct_data, but more can be added - * at runtime through _XlcAddCT. - */ -static CTInfo ct_list = NULL; -static CTInfo ct_list_end = NULL; - -/* - * Returns a Compound Text info record for an ESC sequence. - * The first part of the ESC sequence has already been parsed into 'type' - * and 'final_byte'. The remainder starts at 'text', at least 'text_len' - * bytes (only used if type == XctExtSeg). - */ -static CTInfo -_XlcGetCTInfo( - unsigned int type, - unsigned char final_byte, - const char *text, - int text_len) -{ - CTInfo ct_info; - - for (ct_info = ct_list; ct_info; ct_info = ct_info->next) - if (ct_info->type == type - && ct_info->final_byte == final_byte - && (type != XctExtSeg - || (text_len >= ct_info->ext_segment_len - && memcmp(text, ct_info->ext_segment, - ct_info->ext_segment_len) == 0))) - return ct_info; - - return (CTInfo) NULL; -} - -/* Returns the Compound Text info for a given XlcCharSet. - Returns NULL if none is found. */ -static CTInfo -_XlcGetCTInfoFromCharSet( - XlcCharSet charset) -{ - CTInfo ct_info; - - for (ct_info = ct_list; ct_info; ct_info = ct_info->next) - if (ct_info->charset == charset) - return ct_info; - - return (CTInfo) NULL; -} - -/* Creates a new XlcCharSet, given its name (including side suffix) and - Compound Text ESC sequence (normally at most 4 bytes), and makes it - eligible for Compound Text processing. */ -XlcCharSet -_XlcAddCT( - const char *name, - const char *ct_sequence) -{ - CTInfo ct_info, existing_info; - XlcCharSet charset; - const char *ct_ptr; - int length; - unsigned int type; - unsigned char final_byte; - - charset = _XlcGetCharSet(name); - if (charset != NULL) { - /* Even if the charset already exists, it is OK to register a second - Compound Text sequence for it. */ - } else { - /* Attempt to create the charset. */ - charset = _XlcCreateDefaultCharSet(name, ct_sequence); - if (charset == NULL) - return (XlcCharSet) NULL; - _XlcAddCharSet(charset); - } - - /* Allocate a CTinfo record. */ - length = strlen(ct_sequence); - ct_info = (CTInfo) Xmalloc(sizeof(CTInfoRec) + length+1); - if (ct_info == NULL) - return charset; - - ct_info->charset = charset; - ct_info->ct_sequence = strcpy((char *) (ct_info + 1), ct_sequence); - - /* Parse the Compound Text sequence. */ - ct_ptr = ct_sequence; - type = _XlcParseCT(&ct_ptr, &length, &final_byte); - - ct_info->type = type; - ct_info->final_byte = final_byte; - - switch (type) { - case XctGL94: - case XctGR94: - case XctGR96: - case XctGL94MB: - case XctGR94MB: - case XctOtherCoding: - ct_info->ext_segment = NULL; - ct_info->ext_segment_len = 0; - break; - case XctExtSeg: { - /* By convention, the extended segment name is the encoding_name - in lowercase. */ - const char *q = charset->encoding_name; - int n = strlen(q); - char *p; - - /* Ensure ct_info->ext_segment_len <= 0x3fff - 6. */ - if (n > 0x3fff - 6 - 1) { - Xfree(ct_info); - return charset; - } - p = (char *) Xmalloc(n+1); - if (p == NULL) { - Xfree(ct_info); - return charset; - } - ct_info->ext_segment = p; - ct_info->ext_segment_len = n+1; - for ( ; n > 0; p++, q++, n--) - *p = (*q >= 'A' && *q <= 'Z' ? *q - 'A' + 'a' : *q); - *p = XctSTX; - break; - } - default: - Xfree(ct_info); - return (XlcCharSet) NULL; - } - - /* Insert it into the list, if not already present. */ - existing_info = - _XlcGetCTInfo(type, ct_info->final_byte, - ct_info->ext_segment, ct_info->ext_segment_len); - if (existing_info == NULL) { - /* Insert it at the end. If there are duplicates CTinfo entries - for the same XlcCharSet, we want the first (standard) one to - override the second (user defined) one. */ - ct_info->next = NULL; - if (ct_list_end) - ct_list_end->next = ct_info; - else - ct_list = ct_info; - ct_list_end = ct_info; - } else { - if (existing_info->charset != charset - /* We have a conflict, with one exception: JISX0208.1983-0 and - JISX0208.1990-0 are the same for all practical purposes. */ - && !(strncmp(existing_info->charset->name, "JISX0208", 8) == 0 - && strncmp(charset->name, "JISX0208", 8) == 0)) { - fprintf(stderr, - "Xlib: charsets %s and %s have the same CT sequence\n", - charset->name, existing_info->charset->name); - if (strcmp(charset->ct_sequence, ct_sequence) == 0) - charset->ct_sequence = ""; - } - Xfree(ct_info); - } - - return charset; -} - - -/* ========== Converters String <--> CharSet <--> Compound Text ========== */ - -/* - * Structure representing the parse state of a Compound Text string. - */ -typedef struct _StateRec { - XlcCharSet charset; /* The charset of the current segment */ - XlcCharSet GL_charset; /* The charset responsible for 0x00..0x7F */ - XlcCharSet GR_charset; /* The charset responsible for 0x80..0xFF */ - XlcCharSet Other_charset; /* != NULL if currently in an other segment */ - int ext_seg_left; /* > 0 if currently in an extended segment */ -} StateRec, *State; - - -/* Subroutine for parsing an ESC sequence. */ - -typedef enum { - resOK, /* Charset saved in 'state', sequence skipped */ - resNotInList, /* Charset not found, sequence skipped */ - resNotCTSeq /* EscSeq not recognized, pointers not changed */ -} CheckResult; - -static CheckResult -_XlcCheckCTSequence( - State state, - const char **ctext, - int *ctext_len) -{ - XlcCharSet charset; - CTInfo ct_info; - const char *tmp_ctext = *ctext; - int tmp_ctext_len = *ctext_len; - unsigned int type; - unsigned char final_byte; - int ext_seg_left = 0; - - /* Check for validity. */ - type = _XlcParseCT(&tmp_ctext, &tmp_ctext_len, &final_byte); - - switch (type) { - case XctGL94: - case XctGR94: - case XctGR96: - case XctGL94MB: - case XctGR94MB: - case XctOtherCoding: - *ctext = tmp_ctext; - *ctext_len = tmp_ctext_len; - break; - case XctReturn: - *ctext = tmp_ctext; - *ctext_len = tmp_ctext_len; - state->Other_charset = NULL; - return resOK; - case XctExtSeg: - if (tmp_ctext_len > 2 - && (tmp_ctext[0] & 0x80) && (tmp_ctext[0] & 0x80)) { - unsigned int msb = tmp_ctext[0] & 0x7f; - unsigned int lsb = tmp_ctext[1] & 0x7f; - ext_seg_left = (msb << 7) + lsb; - if (ext_seg_left <= tmp_ctext_len - 2) { - *ctext = tmp_ctext + 2; - *ctext_len = tmp_ctext_len - 2; - break; - } - } - return resNotCTSeq; - default: - return resNotCTSeq; - } - - ct_info = _XlcGetCTInfo(type, final_byte, *ctext, ext_seg_left); - - if (ct_info) { - charset = ct_info->charset; - state->ext_seg_left = ext_seg_left; - if (type == XctExtSeg) { - state->charset = charset; - /* Skip past the extended segment name and the separator. */ - *ctext += ct_info->ext_segment_len; - *ctext_len -= ct_info->ext_segment_len; - state->ext_seg_left -= ct_info->ext_segment_len; - } else if (type == XctOtherCoding) { - state->Other_charset = charset; - } else { - if (charset->side == XlcGL) { - state->GL_charset = charset; - } else if (charset->side == XlcGR) { - state->GR_charset = charset; - } else { - state->GL_charset = charset; - state->GR_charset = charset; - } - } - return resOK; - } else { - state->ext_seg_left = 0; - if (type == XctExtSeg) { - /* Skip the entire extended segment. */ - *ctext += ext_seg_left; - *ctext_len -= ext_seg_left; - } - return resNotInList; - } -} - -static void -init_state( - XlcConv conv) -{ - State state = (State) conv->state; - static XlcCharSet default_GL_charset = NULL; - static XlcCharSet default_GR_charset = NULL; - - if (default_GL_charset == NULL) { - default_GL_charset = _XlcGetCharSet("ISO8859-1:GL"); - default_GR_charset = _XlcGetCharSet("ISO8859-1:GR"); - } - - /* The initial state is ISO-8859-1 on both sides. */ - state->GL_charset = state->charset = default_GL_charset; - state->GR_charset = default_GR_charset; - - state->Other_charset = NULL; - - state->ext_seg_left = 0; -} - -/* from XlcNCompoundText to XlcNCharSet */ - -static int -cttocs( - XlcConv conv, - XPointer *from, - int *from_left, - XPointer *to, - int *to_left, - XPointer *args, - int num_args) -{ - State state = (State) conv->state; - XlcCharSet charset = NULL; - const char *ctptr; - char *bufptr; - int ctext_len, buf_len; - int unconv_num = 0; - - ctptr = (const char *) *from; - bufptr = (char *) *to; - ctext_len = *from_left; - buf_len = *to_left; - - while (ctext_len > 0 && buf_len > 0) { - if (state->ext_seg_left == 0) { - /* Not in the middle of an extended segment; look at next byte. */ - unsigned char ch = *ctptr; - XlcCharSet ch_charset; - - if (ch == XctESC) { - CheckResult ret = - _XlcCheckCTSequence(state, &ctptr, &ctext_len); - if (ret == resOK) - /* state has been modified. */ - continue; - if (ret == resNotInList) { - /* XXX Just continue with previous charset. */ - unconv_num++; - continue; - } - } else if (ch == XctCSI) { - /* XXX Simply ignore the XctLeftToRight, XctRightToLeft, - XctDirectionEnd sequences for the moment. */ - unsigned char dummy; - if (_XlcParseCT(&ctptr, &ctext_len, &dummy)) { - unconv_num++; - continue; - } - } - - /* Find the charset which is responsible for this byte. */ - ch_charset = (state->Other_charset != NULL ? state->Other_charset : - (ch & 0x80 ? state->GR_charset : state->GL_charset)); - - /* Set the charset of this run, or continue the current run, - or stop the current run. */ - if (charset) { - if (charset != ch_charset) - break; - } else { - state->charset = charset = ch_charset; - } - - /* We don't want to split a character into multiple pieces. */ - if (buf_len < 6) { - if (charset->char_size > 0) { - if (buf_len < charset->char_size) - break; - } else { - /* char_size == 0 is tricky. The code here is good only - for valid UTF-8 input. */ - if (charset->ct_sequence[0] == XctESC - && charset->ct_sequence[1] == XctOtherCoding - && charset->ct_sequence[2] == 'G') { - int char_size = (ch < 0xc0 ? 1 : - ch < 0xe0 ? 2 : - ch < 0xf0 ? 3 : - ch < 0xf8 ? 4 : - ch < 0xfc ? 5 : - 6); - if (buf_len < char_size) - break; - } - } - } - - *bufptr++ = *ctptr++; - ctext_len--; - buf_len--; - } else { - /* Copy as much as possible from the current extended segment - to the buffer. */ - int char_size; - - /* Set the charset of this run, or continue the current run, - or stop the current run. */ - if (charset) { - if (charset != state->charset) - break; - } else { - charset = state->charset; - } - - char_size = charset->char_size; - - if (state->ext_seg_left <= buf_len || char_size > 0) { - int n = (state->ext_seg_left <= buf_len - ? state->ext_seg_left - : (buf_len / char_size) * char_size); - memcpy(bufptr, ctptr, n); - ctptr += n; ctext_len -= n; - bufptr += n; buf_len -= n; - state->ext_seg_left -= n; - } else { -#if UTF8_IN_EXTSEQ - /* char_size == 0 is tricky. The code here is good only - for valid UTF-8 input. */ - if (strcmp(charset->name, "ISO10646-1") == 0) { - unsigned char ch = *ctptr; - int char_size = (ch < 0xc0 ? 1 : - ch < 0xe0 ? 2 : - ch < 0xf0 ? 3 : - ch < 0xf8 ? 4 : - ch < 0xfc ? 5 : - 6); - int i; - if (buf_len < char_size) - break; - /* A small loop is faster than calling memcpy. */ - for (i = char_size; i > 0; i--) - *bufptr++ = *ctptr++; - ctext_len -= char_size; - buf_len -= char_size; - state->ext_seg_left -= char_size; - } else -#endif - { - /* Here ctext_len >= state->ext_seg_left > buf_len. - We may be splitting a character into multiple pieces. - Oh well. */ - int n = buf_len; - memcpy(bufptr, ctptr, n); - ctptr += n; ctext_len -= n; - bufptr += n; buf_len -= n; - state->ext_seg_left -= n; - } - } - } - } - - /* 'charset' is the charset for the current run. In some cases, - 'state->charset' contains the charset for the next run. Therefore, - return 'charset'. - 'charset' may still be NULL only if no output was produced. */ - if (num_args > 0) - *((XlcCharSet *) args[0]) = charset; - - *from_left -= ctptr - *((const char **) from); - *from = (XPointer) ctptr; - - *to_left -= bufptr - *((char **) to); - *to = (XPointer) bufptr; - - return unconv_num; -} - -/* from XlcNCharSet to XlcNCompoundText */ - -static int -cstoct( - XlcConv conv, - XPointer *from, - int *from_left, - XPointer *to, - int *to_left, - XPointer *args, - int num_args) -{ - State state = (State) conv->state; - XlcSide side; - unsigned char min_ch = 0, max_ch = 0; - int length, unconv_num; - CTInfo ct_info; - XlcCharSet charset; - const char *csptr; - char *ctptr; - int csstr_len, ct_len; - char *ext_segment_start; - int char_size; - - /* One argument is required, of type XlcCharSet. */ - if (num_args < 1) - return -1; - - csptr = *((const char **) from); - ctptr = *((char **) to); - csstr_len = *from_left; - ct_len = *to_left; - - charset = (XlcCharSet) args[0]; - - ct_info = _XlcGetCTInfoFromCharSet(charset); - if (ct_info == NULL) - return -1; - - side = charset->side; - length = strlen(ct_info->ct_sequence); - - ext_segment_start = NULL; - - if (ct_info->type == XctOtherCoding) { - /* Output the Escape sequence for switching to the charset, and - reserve room now for the XctReturn sequence at the end. */ - if (ct_len < length + 3) - return -1; - - memcpy(ctptr, ct_info->ct_sequence, length); - ctptr += length; - ct_len -= length + 3; - } else - /* Test whether the charset is already active. */ - if (((side == XlcGR || side == XlcGLGR) - && charset != state->GR_charset) - || ((side == XlcGL || side == XlcGLGR) - && charset != state->GL_charset)) { - - /* Output the Escape sequence for switching to the charset. */ - if (ct_info->type == XctExtSeg) { - if (ct_len < length + 2 + ct_info->ext_segment_len) - return -1; - - memcpy(ctptr, ct_info->ct_sequence, length); - ctptr += length; - ct_len -= length; - - ctptr += 2; - ct_len -= 2; - ext_segment_start = ctptr; - - /* The size of an extended segment must fit in 14 bits. */ - if (ct_len > 0x3fff) - ct_len = 0x3fff; - - memcpy(ctptr, ct_info->ext_segment, ct_info->ext_segment_len); - ctptr += ct_info->ext_segment_len; - ct_len -= ct_info->ext_segment_len; - } else { - if (ct_len < length) - return -1; - - memcpy(ctptr, ct_info->ct_sequence, length); - ctptr += length; - ct_len -= length; - } - } - - /* If the charset has side GL or GR, prepare remapping the characters - to the correct side. */ - if (charset->set_size) { - min_ch = 0x20; - max_ch = 0x7f; - if (charset->set_size == 94) { - max_ch--; - if (charset->char_size > 1 || side == XlcGR) - min_ch++; - } - } - - /* Actually copy the contents. */ - unconv_num = 0; - char_size = charset->char_size; - if (char_size == 1) { - while (csstr_len > 0 && ct_len > 0) { - if (charset->set_size) { - /* The CompoundText specification says that the only - control characters allowed are 0x09, 0x0a, 0x1b, 0x9b. - Therefore here we eliminate other control characters. */ - unsigned char ch = *((unsigned char *) csptr) & 0x7f; - if (!((ch >= min_ch && ch <= max_ch) - || (side == XlcGL - && (ch == 0x00 || ch == 0x09 || ch == 0x0a)) - || ((side == XlcGL || side == XlcGR) - && (ch == 0x1b)))) { - csptr++; - csstr_len--; - unconv_num++; - continue; - } - } - - if (side == XlcGL) - *ctptr++ = *csptr++ & 0x7f; - else if (side == XlcGR) - *ctptr++ = *csptr++ | 0x80; - else - *ctptr++ = *csptr++; - csstr_len--; - ct_len--; - } - } else if (char_size > 1) { - while (csstr_len >= char_size && ct_len >= char_size) { - if (side == XlcGL) { - int i; - for (i = char_size; i > 0; i--) - *ctptr++ = *csptr++ & 0x7f; - } else if (side == XlcGR) { - int i; - for (i = char_size; i > 0; i--) - *ctptr++ = *csptr++ | 0x80; - } else { - int i; - for (i = char_size; i > 0; i--) - *ctptr++ = *csptr++; - } - csstr_len -= char_size; - ct_len -= char_size; - } - } else { - /* char_size = 0. The code here is good only for valid UTF-8 input. */ - if ((charset->ct_sequence[0] == XctESC - && charset->ct_sequence[1] == XctOtherCoding - && charset->ct_sequence[2] == 'G') -#if UTF8_IN_EXTSEQ - || strcmp(charset->name, "ISO10646-1") == 0 -#endif - ) { - while (csstr_len > 0 && ct_len > 0) { - unsigned char ch = * (unsigned char *) csptr; - int char_size = (ch < 0xc0 ? 1 : - ch < 0xe0 ? 2 : - ch < 0xf0 ? 3 : - ch < 0xf8 ? 4 : - ch < 0xfc ? 5 : - 6); - int i; - if (!(csstr_len >= char_size && ct_len >= char_size)) - break; - for (i = char_size; i > 0; i--) - *ctptr++ = *csptr++; - csstr_len -= char_size; - ct_len -= char_size; - } - } else { - while (csstr_len > 0 && ct_len > 0) { - *ctptr++ = *csptr++; - csstr_len--; - ct_len--; - } - } - } - - if (ct_info->type == XctOtherCoding) { - /* Terminate with an XctReturn sequence. */ - ctptr[0] = XctESC; - ctptr[1] = XctOtherCoding; - ctptr[2] = '@'; - ctptr += 3; - } else if (ext_segment_start != NULL) { - /* Backpatch the extended segment's length. */ - int ext_segment_length = ctptr - ext_segment_start; - *(ext_segment_start - 2) = (ext_segment_length >> 7) | 0x80; - *(ext_segment_start - 1) = (ext_segment_length & 0x7f) | 0x80; - } else { - if (side == XlcGR || side == XlcGLGR) - state->GR_charset = charset; - if (side == XlcGL || side == XlcGLGR) - state->GL_charset = charset; - } - - *from_left -= csptr - *((const char **) from); - *from = (XPointer) csptr; - - *to_left -= ctptr - *((char **) to); - *to = (XPointer) ctptr; - - return 0; -} - -/* from XlcNString to XlcNCharSet */ - -static int -strtocs( - XlcConv conv, - XPointer *from, - int *from_left, - XPointer *to, - int *to_left, - XPointer *args, - int num_args) -{ - State state = (State) conv->state; - const char *src; - char *dst; - unsigned char side; - int length; - - src = (const char *) *from; - dst = (char *) *to; - - length = min(*from_left, *to_left); - side = *((unsigned char *) src) & 0x80; - - while (side == (*((unsigned char *) src) & 0x80) && length-- > 0) - *dst++ = *src++; - - *from_left -= src - (const char *) *from; - *from = (XPointer) src; - *to_left -= dst - (char *) *to; - *to = (XPointer) dst; - - if (num_args > 0) - *((XlcCharSet *)args[0]) = (side ? state->GR_charset : state->GL_charset); - - return 0; -} - -/* from XlcNCharSet to XlcNString */ - -static int -cstostr( - XlcConv conv, - XPointer *from, - int *from_left, - XPointer *to, - int *to_left, - XPointer *args, - int num_args) -{ - State state = (State) conv->state; - const char *csptr; - char *string_ptr; - int csstr_len, str_len; - unsigned char ch; - int unconv_num = 0; - - /* This converter can only convert from ISO8859-1:GL and ISO8859-1:GR. */ - if (num_args < 1 - || !((XlcCharSet) args[0] == state->GL_charset - || (XlcCharSet) args[0] == state->GR_charset)) - return -1; - - csptr = *((const char **) from); - string_ptr = *((char **) to); - csstr_len = *from_left; - str_len = *to_left; - - while (csstr_len > 0 && str_len > 0) { - ch = *((unsigned char *) csptr++); - csstr_len--; - /* Citing ICCCM: "STRING as a type specifies the ISO Latin-1 character - set plus the control characters TAB and NEWLINE." */ - if ((ch < 0x20 && ch != 0x00 && ch != 0x09 && ch != 0x0a) - || (ch >= 0x7f && ch < 0xa0)) { - unconv_num++; - continue; - } - *((unsigned char *) string_ptr++) = ch; - str_len--; - } - - *from_left -= csptr - *((const char **) from); - *from = (XPointer) csptr; - - *to_left -= string_ptr - *((char **) to); - *to = (XPointer) string_ptr; - - return unconv_num; -} - - -static XlcConv -create_conv( - XlcConvMethods methods) -{ - XlcConv conv; - - conv = (XlcConv) Xmalloc(sizeof(XlcConvRec) + sizeof(StateRec)); - if (conv == NULL) - return (XlcConv) NULL; - - conv->state = (XPointer) &conv[1]; - - conv->methods = methods; - - init_state(conv); - - return conv; -} - -static void -close_converter( - XlcConv conv) -{ - /* conv->state is allocated together with conv, free both at once. */ - Xfree((char *) conv); -} - - -static XlcConvMethodsRec cttocs_methods = { - close_converter, - cttocs, - init_state -}; - -static XlcConv -open_cttocs( - XLCd from_lcd, - const char *from_type, - XLCd to_lcd, - const char *to_type) -{ - return create_conv(&cttocs_methods); -} - - -static XlcConvMethodsRec cstoct_methods = { - close_converter, - cstoct, - init_state -}; - -static XlcConv -open_cstoct( - XLCd from_lcd, - const char *from_type, - XLCd to_lcd, - const char *to_type) -{ - return create_conv(&cstoct_methods); -} - - -static XlcConvMethodsRec strtocs_methods = { - close_converter, - strtocs, - init_state -}; - -static XlcConv -open_strtocs( - XLCd from_lcd, - const char *from_type, - XLCd to_lcd, - const char *to_type) -{ - return create_conv(&strtocs_methods); -} - - -static XlcConvMethodsRec cstostr_methods = { - close_converter, - cstostr, - init_state -}; - -static XlcConv -open_cstostr( - XLCd from_lcd, - const char *from_type, - XLCd to_lcd, - const char *to_type) -{ - return create_conv(&cstostr_methods); -} - - -/* =========================== Initialization =========================== */ - -Bool -_XlcInitCTInfo(void) -{ - if (ct_list == NULL) { - const CTDataRec *ct_data; - int num; - XlcCharSet charset; - - /* Initialize ct_list. */ - - num = sizeof(default_ct_data) / sizeof(CTDataRec); - for (ct_data = default_ct_data; num > 0; ct_data++, num--) { - charset = _XlcAddCT(ct_data->name, ct_data->ct_sequence); - if (charset == NULL) - continue; - if (strncmp(charset->ct_sequence, "\x1b\x25\x2f", 3) != 0) - charset->source = CSsrcStd; - else - charset->source = CSsrcXLC; - } - - /* Register CompoundText and CharSet converters. */ - - _XlcSetConverter((XLCd) NULL, XlcNCompoundText, - (XLCd) NULL, XlcNCharSet, - open_cttocs); - _XlcSetConverter((XLCd) NULL, XlcNString, - (XLCd) NULL, XlcNCharSet, - open_strtocs); - - _XlcSetConverter((XLCd) NULL, XlcNCharSet, - (XLCd) NULL, XlcNCompoundText, - open_cstoct); - _XlcSetConverter((XLCd) NULL, XlcNCharSet, - (XLCd) NULL, XlcNString, - open_cstostr); - } - - return True; -} +/*
+ * Copyright 1992, 1993 by TOSHIBA Corp.
+ *
+ * Permission to use, copy, modify, and distribute this software and its
+ * documentation for any purpose and without fee is hereby granted, provided
+ * that the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of TOSHIBA not be used in advertising
+ * or publicity pertaining to distribution of the software without specific,
+ * written prior permission. TOSHIBA make no representations about the
+ * suitability of this software for any purpose. It is provided "as is"
+ * without express or implied warranty.
+ *
+ * TOSHIBA DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ * ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+ * TOSHIBA BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ *
+ * Author: Katsuhisa Yano TOSHIBA Corp.
+ * mopi@osa.ilab.toshiba.co.jp
+ */
+/*
+ * Copyright 1995 by FUJITSU LIMITED
+ * This is source code modified by FUJITSU LIMITED under the Joint
+ * Development Agreement for the CDE/Motif PST.
+ *
+ * Modifier: Takanori Tateno FUJITSU LIMITED
+ *
+ */
+/*
+ * 2000
+ * Modifier: Ivan Pascal The XFree86 Project
+ * Modifier: Bruno Haible The XFree86 Project
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+#include "Xlibint.h"
+#include "XlcPubI.h"
+#include <X11/Xos.h>
+#include <stdio.h>
+
+
+/* ====================== Built-in Character Sets ====================== */
+
+/*
+ * Static representation of a character set that can be used in Compound Text.
+ */
+typedef struct _CTDataRec {
+ const char name[19];
+ const char ct_sequence[5]; /* Compound Text encoding, ESC sequence */
+} CTDataRec, *CTData;
+
+static const CTDataRec default_ct_data[] =
+{
+ /* */
+ /* X11 registry name MIME name ISO-IR ESC sequence */
+ /* */
+
+ /* Registered character sets with one byte per character */
+ { "ISO8859-1:GL", /* US-ASCII 6 */ "\033(B" },
+ { "ISO8859-1:GR", /* ISO-8859-1 100 */ "\033-A" },
+ { "ISO8859-2:GR", /* ISO-8859-2 101 */ "\033-B" },
+ { "ISO8859-3:GR", /* ISO-8859-3 109 */ "\033-C" },
+ { "ISO8859-4:GR", /* ISO-8859-4 110 */ "\033-D" },
+ { "ISO8859-5:GR", /* ISO-8859-5 144 */ "\033-L" },
+ { "ISO8859-6:GR", /* ISO-8859-6 127 */ "\033-G" },
+ { "ISO8859-7:GR", /* ISO-8859-7 126 */ "\033-F" },
+ { "ISO8859-8:GR", /* ISO-8859-8 138 */ "\033-H" },
+ { "ISO8859-9:GR", /* ISO-8859-9 148 */ "\033-M" },
+ { "ISO8859-10:GR", /* ISO-8859-10 157 */ "\033-V" },
+ { "ISO8859-11:GR", /* ISO-8859-11 166 */ "\033-T" },
+ { "ISO8859-13:GR", /* ISO-8859-13 179 */ "\033-Y" },
+ { "ISO8859-14:GR", /* ISO-8859-14 199 */ "\033-_" },
+ { "ISO8859-15:GR", /* ISO-8859-15 203 */ "\033-b" },
+ { "ISO8859-16:GR", /* ISO-8859-16 226 */ "\033-f" },
+ { "JISX0201.1976-0:GL", /* ISO-646-JP 14 */ "\033(J" },
+ { "JISX0201.1976-0:GR", "\033)I" },
+#if 0
+ { "TIS620-0:GR", /* TIS-620 166 */ "\033-T" },
+#endif
+
+ /* Registered character sets with two byte per character */
+ { "GB2312.1980-0:GL", /* GB_2312-80 58 */ "\033$(A" },
+ { "GB2312.1980-0:GR", /* GB_2312-80 58 */ "\033$)A" },
+ { "JISX0208.1983-0:GL", /* JIS_X0208-1983 87 */ "\033$(B" },
+ { "JISX0208.1983-0:GR", /* JIS_X0208-1983 87 */ "\033$)B" },
+ { "JISX0208.1990-0:GL", /* JIS_X0208-1990 168 */ "\033$(B" },
+ { "JISX0208.1990-0:GR", /* JIS_X0208-1990 168 */ "\033$)B" },
+ { "JISX0212.1990-0:GL", /* JIS_X0212-1990 159 */ "\033$(D" },
+ { "JISX0212.1990-0:GR", /* JIS_X0212-1990 159 */ "\033$)D" },
+ { "KSC5601.1987-0:GL", /* KS_C_5601-1987 149 */ "\033$(C" },
+ { "KSC5601.1987-0:GR", /* KS_C_5601-1987 149 */ "\033$)C" },
+ { "CNS11643.1986-1:GL", /* CNS 11643-1992 pl.1 171 */ "\033$(G" },
+ { "CNS11643.1986-1:GR", /* CNS 11643-1992 pl.1 171 */ "\033$)G" },
+ { "CNS11643.1986-2:GL", /* CNS 11643-1992 pl.2 172 */ "\033$(H" },
+ { "CNS11643.1986-2:GR", /* CNS 11643-1992 pl.2 172 */ "\033$)H" },
+ { "CNS11643.1992-3:GL", /* CNS 11643-1992 pl.3 183 */ "\033$(I" },
+ { "CNS11643.1992-3:GR", /* CNS 11643-1992 pl.3 183 */ "\033$)I" },
+ { "CNS11643.1992-4:GL", /* CNS 11643-1992 pl.4 184 */ "\033$(J" },
+ { "CNS11643.1992-4:GR", /* CNS 11643-1992 pl.4 184 */ "\033$)J" },
+ { "CNS11643.1992-5:GL", /* CNS 11643-1992 pl.5 185 */ "\033$(K" },
+ { "CNS11643.1992-5:GR", /* CNS 11643-1992 pl.5 185 */ "\033$)K" },
+ { "CNS11643.1992-6:GL", /* CNS 11643-1992 pl.6 186 */ "\033$(L" },
+ { "CNS11643.1992-6:GR", /* CNS 11643-1992 pl.6 186 */ "\033$)L" },
+ { "CNS11643.1992-7:GL", /* CNS 11643-1992 pl.7 187 */ "\033$(M" },
+ { "CNS11643.1992-7:GR", /* CNS 11643-1992 pl.7 187 */ "\033$)M" },
+
+ /* Registered encodings with a varying number of bytes per character */
+ { "ISO10646-1", /* UTF-8 196 */ "\033%G" },
+
+ /* Encodings without ISO-IR assigned escape sequence must be
+ defined in XLC_LOCALE files, using "\033%/1" or "\033%/2". */
+
+ /* Backward compatibility with XFree86 3.x */
+#if 1
+ { "ISO8859-14:GR", "\033%/1" },
+ { "ISO8859-15:GR", "\033%/1" },
+#endif
+ /* For use by utf8 -> ctext */
+ { "BIG5-0:GLGR", "\033%/2"},
+ { "BIG5HKSCS-0:GLGR", "\033%/2"},
+ { "GBK-0:GLGR", "\033%/2"},
+ /* used by Emacs, but not backed by ISO-IR */
+ { "BIG5-E0:GL", "\033$(0" },
+ { "BIG5-E0:GR", "\033$)0" },
+ { "BIG5-E1:GL", "\033$(1" },
+ { "BIG5-E1:GR", "\033$)1" },
+
+};
+
+/* We represent UTF-8 as an XlcGLGR charset, not in extended segments. */
+#define UTF8_IN_EXTSEQ 0
+
+/* ======================= Parsing ESC Sequences ======================= */
+
+#define XctC0 0x0000
+#define XctHT 0x0009
+#define XctNL 0x000a
+#define XctESC 0x001b
+#define XctGL 0x0020
+#define XctC1 0x0080
+#define XctCSI 0x009b
+#define XctGR 0x00a0
+#define XctSTX 0x0002
+
+#define XctCntrlFunc 0x0023
+#define XctMB 0x0024
+#define XctOtherCoding 0x0025
+#define XctGL94 0x0028
+#define XctGR94 0x0029
+#define XctGR96 0x002d
+#define XctNonStandard 0x002f
+#define XctIgnoreExt 0x0030
+#define XctNotIgnoreExt 0x0031
+#define XctLeftToRight 0x0031
+#define XctRightToLeft 0x0032
+#define XctDirection 0x005d
+#define XctDirectionEnd 0x005d
+
+#define XctGL94MB 0x2428
+#define XctGR94MB 0x2429
+#define XctExtSeg 0x252f
+#define XctReturn 0x2540
+
+/*
+ * Parses the header of a Compound Text segment, i.e. the charset designator.
+ * The string starts at *text and has *length bytes.
+ * Return value is one of:
+ * 0 (no valid charset designator),
+ * XctGL94, XctGR94, XctGR96, XctGL94MB, XctGR94MB,
+ * XctLeftToRight, XctRightToLeft, XctDirectionEnd,
+ * XctExtSeg, XctOtherCoding, XctReturn, XctIgnoreExt, XctNotIgnoreExt.
+ * If the return value is not 0, *text is incremented and *length decremented,
+ * to point past the charset designator. If the return value is one of
+ * XctGL94, XctGR94, XctGR96, XctGL94MB, XctGR94MB,
+ * XctExtSeg, XctOtherCoding, XctIgnoreExt, XctNotIgnoreExt,
+ * *final_byte is set to the "final byte" of the charset designator.
+ */
+static unsigned int
+_XlcParseCT(
+ const char **text,
+ int *length,
+ unsigned char *final_byte)
+{
+ unsigned int ret = 0;
+ unsigned char ch;
+ const unsigned char *str = (const unsigned char *) *text;
+
+ *final_byte = 0;
+
+ if (*length < 1)
+ return 0;
+ switch (ch = *str++) {
+ case XctESC:
+ if (*length < 2)
+ return 0;
+ switch (ch = *str++) {
+ case XctOtherCoding: /* % */
+ if (*length < 3)
+ return 0;
+ ch = *str++;
+ if (ch == XctNonStandard) { /* / */
+ if (*length < 4)
+ return 0;
+ ret = XctExtSeg;
+ ch = *str++;
+ } else if (ch == '@') {
+ ret = XctReturn;
+ } else {
+ ret = XctOtherCoding;
+ }
+ *final_byte = ch;
+ break;
+
+ case XctCntrlFunc: /* # */
+ if (*length < 4)
+ return 0;
+ *final_byte = *str++;
+ switch (*str++) {
+ case XctIgnoreExt: /* 0 */
+ ret = XctIgnoreExt;
+ break;
+ case XctNotIgnoreExt: /* 1 */
+ ret = XctNotIgnoreExt;
+ break;
+ default:
+ ret = 0;
+ break;
+ }
+ break;
+
+ case XctMB: /* $ */
+ if (*length < 4)
+ return 0;
+ ch = *str++;
+ switch (ch) {
+ case XctGL94: /* ( */
+ ret = XctGL94MB;
+ break;
+ case XctGR94: /* ) */
+ ret = XctGR94MB;
+ break;
+ default:
+ ret = 0;
+ break;
+ }
+ *final_byte = *str++;
+ break;
+
+ case XctGL94: /* ( */
+ if (*length < 3)
+ return 0;
+ ret = XctGL94;
+ *final_byte = *str++;
+ break;
+ case XctGR94: /* ) */
+ if (*length < 3)
+ return 0;
+ ret = XctGR94;
+ *final_byte = *str++;
+ break;
+ case XctGR96: /* - */
+ if (*length < 3)
+ return 0;
+ ret = XctGR96;
+ *final_byte = *str++;
+ break;
+ }
+ break;
+ case XctCSI:
+ /* direction */
+ if (*length < 2)
+ return 0;
+ switch (*str++) {
+ case XctLeftToRight:
+ if (*length < 3)
+ return 0;
+ if (*str++ == XctDirection)
+ ret = XctLeftToRight;
+ break;
+ case XctRightToLeft:
+ if (*length < 3)
+ return 0;
+ if (*str++ == XctDirection)
+ ret = XctRightToLeft;
+ break;
+ case XctDirectionEnd:
+ ret = XctDirectionEnd;
+ break;
+ }
+ break;
+ }
+
+ if (ret) {
+ *length -= (const char *) str - *text;
+ *text = (const char *) str;
+ }
+ return ret;
+}
+
+/*
+ * Fills into a freshly created XlcCharSet the fields that can be inferred
+ * from the ESC sequence. These are side, char_size, set_size.
+ * Returns True if the charset can be used with Compound Text.
+ *
+ * Used by _XlcCreateDefaultCharSet.
+ */
+Bool
+_XlcParseCharSet(
+ XlcCharSet charset)
+{
+ unsigned int type;
+ unsigned char final_byte;
+ const char *ptr = charset->ct_sequence;
+ int length;
+ int char_size;
+
+ if (*ptr == '\0')
+ return False;
+
+ length = strlen(ptr);
+
+ type = _XlcParseCT(&ptr, &length, &final_byte);
+
+ /* Check for validity and determine char_size.
+ char_size = 0 means varying number of bytes per character. */
+ switch (type) {
+ case XctGL94:
+ case XctGR94:
+ case XctGR96:
+ char_size = 1;
+ break;
+ case XctGL94MB:
+ case XctGR94MB:
+ char_size = (final_byte < 0x60 ? 2 : final_byte < 0x70 ? 3 : 4);
+ break;
+ case XctExtSeg:
+ char_size = final_byte - '0';
+ if (!(char_size >= 0 && char_size <= 4))
+ return False;
+ break;
+ case XctOtherCoding:
+ char_size = 0;
+ break;
+ default:
+ return False;
+ }
+
+ charset->char_size = char_size;
+
+ /* Fill in other values. */
+ switch (type) {
+ case XctGL94:
+ case XctGL94MB:
+ charset->side = XlcGL;
+ charset->set_size = 94;
+ break;
+ case XctGR94:
+ case XctGR94MB:
+ charset->side = XlcGR;
+ charset->set_size = 94;
+ break;
+ case XctGR96:
+ charset->side = XlcGR;
+ charset->set_size = 96;
+ break;
+ case XctExtSeg:
+ case XctOtherCoding:
+ charset->side = XlcGLGR;
+ charset->set_size = 0;
+ break;
+ }
+ return True;
+}
+
+
+/* =============== Management of the List of Character Sets =============== */
+
+/*
+ * Representation of a character set that can be used for Compound Text,
+ * at run time.
+ * Note: This information is not contained in the XlcCharSet, because
+ * multiple ESC sequences may be used for the same XlcCharSet.
+ */
+typedef struct _CTInfoRec {
+ XlcCharSet charset;
+ const char *ct_sequence; /* Compound Text ESC sequence */
+ unsigned int type;
+ unsigned char final_byte;
+ /* If type == XctExtSeg: */
+ const char *ext_segment; /* extended segment name, then '\002' */
+ int ext_segment_len; /* length of above, including final '\002' */
+
+ struct _CTInfoRec *next;
+} CTInfoRec, *CTInfo;
+
+/*
+ * List of character sets that can be used for Compound Text,
+ * Includes all that are listed in default_ct_data, but more can be added
+ * at runtime through _XlcAddCT.
+ */
+static CTInfo ct_list = NULL;
+static CTInfo ct_list_end = NULL;
+
+/*
+ * Returns a Compound Text info record for an ESC sequence.
+ * The first part of the ESC sequence has already been parsed into 'type'
+ * and 'final_byte'. The remainder starts at 'text', at least 'text_len'
+ * bytes (only used if type == XctExtSeg).
+ */
+static CTInfo
+_XlcGetCTInfo(
+ unsigned int type,
+ unsigned char final_byte,
+ const char *text,
+ int text_len)
+{
+ CTInfo ct_info;
+
+ for (ct_info = ct_list; ct_info; ct_info = ct_info->next)
+ if (ct_info->type == type
+ && ct_info->final_byte == final_byte
+ && (type != XctExtSeg
+ || (text_len >= ct_info->ext_segment_len
+ && memcmp(text, ct_info->ext_segment,
+ ct_info->ext_segment_len) == 0)))
+ return ct_info;
+
+ return (CTInfo) NULL;
+}
+
+/* Returns the Compound Text info for a given XlcCharSet.
+ Returns NULL if none is found. */
+static CTInfo
+_XlcGetCTInfoFromCharSet(
+ XlcCharSet charset)
+{
+ CTInfo ct_info;
+
+ for (ct_info = ct_list; ct_info; ct_info = ct_info->next)
+ if (ct_info->charset == charset)
+ return ct_info;
+
+ return (CTInfo) NULL;
+}
+
+/* Creates a new XlcCharSet, given its name (including side suffix) and
+ Compound Text ESC sequence (normally at most 4 bytes), and makes it
+ eligible for Compound Text processing. */
+XlcCharSet
+_XlcAddCT(
+ const char *name,
+ const char *ct_sequence)
+{
+ CTInfo ct_info, existing_info;
+ XlcCharSet charset;
+ const char *ct_ptr;
+ int length;
+ unsigned int type;
+ unsigned char final_byte;
+
+ charset = _XlcGetCharSet(name);
+ if (charset != NULL) {
+ /* Even if the charset already exists, it is OK to register a second
+ Compound Text sequence for it. */
+ } else {
+ /* Attempt to create the charset. */
+ charset = _XlcCreateDefaultCharSet(name, ct_sequence);
+ if (charset == NULL)
+ return (XlcCharSet) NULL;
+ _XlcAddCharSet(charset);
+ }
+
+ /* Allocate a CTinfo record. */
+ length = strlen(ct_sequence);
+ ct_info = (CTInfo) Xmalloc(sizeof(CTInfoRec) + length+1);
+ if (ct_info == NULL)
+ return charset;
+
+ ct_info->charset = charset;
+ ct_info->ct_sequence = strcpy((char *) (ct_info + 1), ct_sequence);
+
+ /* Parse the Compound Text sequence. */
+ ct_ptr = ct_sequence;
+ type = _XlcParseCT(&ct_ptr, &length, &final_byte);
+
+ ct_info->type = type;
+ ct_info->final_byte = final_byte;
+
+ switch (type) {
+ case XctGL94:
+ case XctGR94:
+ case XctGR96:
+ case XctGL94MB:
+ case XctGR94MB:
+ case XctOtherCoding:
+ ct_info->ext_segment = NULL;
+ ct_info->ext_segment_len = 0;
+ break;
+ case XctExtSeg: {
+ /* By convention, the extended segment name is the encoding_name
+ in lowercase. */
+ const char *q = charset->encoding_name;
+ int n = strlen(q);
+ char *p;
+
+ /* Ensure ct_info->ext_segment_len <= 0x3fff - 6. */
+ if (n > 0x3fff - 6 - 1) {
+ Xfree(ct_info);
+ return charset;
+ }
+ p = (char *) Xmalloc(n+1);
+ if (p == NULL) {
+ Xfree(ct_info);
+ return charset;
+ }
+ ct_info->ext_segment = p;
+ ct_info->ext_segment_len = n+1;
+ for ( ; n > 0; p++, q++, n--)
+ *p = (*q >= 'A' && *q <= 'Z' ? *q - 'A' + 'a' : *q);
+ *p = XctSTX;
+ break;
+ }
+ default:
+ Xfree(ct_info);
+ return (XlcCharSet) NULL;
+ }
+
+ /* Insert it into the list, if not already present. */
+ existing_info =
+ _XlcGetCTInfo(type, ct_info->final_byte,
+ ct_info->ext_segment, ct_info->ext_segment_len);
+ if (existing_info == NULL) {
+ /* Insert it at the end. If there are duplicates CTinfo entries
+ for the same XlcCharSet, we want the first (standard) one to
+ override the second (user defined) one. */
+ ct_info->next = NULL;
+ if (ct_list_end)
+ ct_list_end->next = ct_info;
+ else
+ ct_list = ct_info;
+ ct_list_end = ct_info;
+ } else {
+ if (existing_info->charset != charset
+ /* We have a conflict, with one exception: JISX0208.1983-0 and
+ JISX0208.1990-0 are the same for all practical purposes. */
+ && !(strncmp(existing_info->charset->name, "JISX0208", 8) == 0
+ && strncmp(charset->name, "JISX0208", 8) == 0)) {
+ fprintf(stderr,
+ "Xlib: charsets %s and %s have the same CT sequence\n",
+ charset->name, existing_info->charset->name);
+ if (strcmp(charset->ct_sequence, ct_sequence) == 0)
+ charset->ct_sequence = "";
+ }
+ Xfree(ct_info);
+ }
+
+ return charset;
+}
+
+
+/* ========== Converters String <--> CharSet <--> Compound Text ========== */
+
+/*
+ * Structure representing the parse state of a Compound Text string.
+ */
+typedef struct _StateRec {
+ XlcCharSet charset; /* The charset of the current segment */
+ XlcCharSet GL_charset; /* The charset responsible for 0x00..0x7F */
+ XlcCharSet GR_charset; /* The charset responsible for 0x80..0xFF */
+ XlcCharSet Other_charset; /* != NULL if currently in an other segment */
+ int ext_seg_left; /* > 0 if currently in an extended segment */
+} StateRec, *State;
+
+
+/* Subroutine for parsing an ESC sequence. */
+
+typedef enum {
+ resOK, /* Charset saved in 'state', sequence skipped */
+ resNotInList, /* Charset not found, sequence skipped */
+ resNotCTSeq /* EscSeq not recognized, pointers not changed */
+} CheckResult;
+
+static CheckResult
+_XlcCheckCTSequence(
+ State state,
+ const char **ctext,
+ int *ctext_len)
+{
+ XlcCharSet charset;
+ CTInfo ct_info;
+ const char *tmp_ctext = *ctext;
+ int tmp_ctext_len = *ctext_len;
+ unsigned int type;
+ unsigned char final_byte;
+ int ext_seg_left = 0;
+
+ /* Check for validity. */
+ type = _XlcParseCT(&tmp_ctext, &tmp_ctext_len, &final_byte);
+
+ switch (type) {
+ case XctGL94:
+ case XctGR94:
+ case XctGR96:
+ case XctGL94MB:
+ case XctGR94MB:
+ case XctOtherCoding:
+ *ctext = tmp_ctext;
+ *ctext_len = tmp_ctext_len;
+ break;
+ case XctReturn:
+ *ctext = tmp_ctext;
+ *ctext_len = tmp_ctext_len;
+ state->Other_charset = NULL;
+ return resOK;
+ case XctExtSeg:
+ if (tmp_ctext_len > 2
+ && (tmp_ctext[0] & 0x80) && (tmp_ctext[0] & 0x80)) {
+ unsigned int msb = tmp_ctext[0] & 0x7f;
+ unsigned int lsb = tmp_ctext[1] & 0x7f;
+ ext_seg_left = (msb << 7) + lsb;
+ if (ext_seg_left <= tmp_ctext_len - 2) {
+ *ctext = tmp_ctext + 2;
+ *ctext_len = tmp_ctext_len - 2;
+ break;
+ }
+ }
+ return resNotCTSeq;
+ default:
+ return resNotCTSeq;
+ }
+
+ ct_info = _XlcGetCTInfo(type, final_byte, *ctext, ext_seg_left);
+
+ if (ct_info) {
+ charset = ct_info->charset;
+ state->ext_seg_left = ext_seg_left;
+ if (type == XctExtSeg) {
+ state->charset = charset;
+ /* Skip past the extended segment name and the separator. */
+ *ctext += ct_info->ext_segment_len;
+ *ctext_len -= ct_info->ext_segment_len;
+ state->ext_seg_left -= ct_info->ext_segment_len;
+ } else if (type == XctOtherCoding) {
+ state->Other_charset = charset;
+ } else {
+ if (charset->side == XlcGL) {
+ state->GL_charset = charset;
+ } else if (charset->side == XlcGR) {
+ state->GR_charset = charset;
+ } else {
+ state->GL_charset = charset;
+ state->GR_charset = charset;
+ }
+ }
+ return resOK;
+ } else {
+ state->ext_seg_left = 0;
+ if (type == XctExtSeg) {
+ /* Skip the entire extended segment. */
+ *ctext += ext_seg_left;
+ *ctext_len -= ext_seg_left;
+ }
+ return resNotInList;
+ }
+}
+
+static void
+init_state(
+ XlcConv conv)
+{
+ State state = (State) conv->state;
+ static XlcCharSet default_GL_charset = NULL;
+ static XlcCharSet default_GR_charset = NULL;
+
+ if (default_GL_charset == NULL) {
+ default_GL_charset = _XlcGetCharSet("ISO8859-1:GL");
+ default_GR_charset = _XlcGetCharSet("ISO8859-1:GR");
+ }
+
+ /* The initial state is ISO-8859-1 on both sides. */
+ state->GL_charset = state->charset = default_GL_charset;
+ state->GR_charset = default_GR_charset;
+
+ state->Other_charset = NULL;
+
+ state->ext_seg_left = 0;
+}
+
+/* from XlcNCompoundText to XlcNCharSet */
+
+static int
+cttocs(
+ XlcConv conv,
+ XPointer *from,
+ int *from_left,
+ XPointer *to,
+ int *to_left,
+ XPointer *args,
+ int num_args)
+{
+ State state = (State) conv->state;
+ XlcCharSet charset = NULL;
+ const char *ctptr;
+ char *bufptr;
+ int ctext_len, buf_len;
+ int unconv_num = 0;
+
+ ctptr = (const char *) *from;
+ bufptr = (char *) *to;
+ ctext_len = *from_left;
+ buf_len = *to_left;
+
+ while (ctext_len > 0 && buf_len > 0) {
+ if (state->ext_seg_left == 0) {
+ /* Not in the middle of an extended segment; look at next byte. */
+ unsigned char ch = *ctptr;
+ XlcCharSet ch_charset;
+
+ if (ch == XctESC) {
+ CheckResult ret =
+ _XlcCheckCTSequence(state, &ctptr, &ctext_len);
+ if (ret == resOK)
+ /* state has been modified. */
+ continue;
+ if (ret == resNotInList) {
+ /* XXX Just continue with previous charset. */
+ unconv_num++;
+ continue;
+ }
+ } else if (ch == XctCSI) {
+ /* XXX Simply ignore the XctLeftToRight, XctRightToLeft,
+ XctDirectionEnd sequences for the moment. */
+ unsigned char dummy;
+ if (_XlcParseCT(&ctptr, &ctext_len, &dummy)) {
+ unconv_num++;
+ continue;
+ }
+ }
+
+ /* Find the charset which is responsible for this byte. */
+ ch_charset = (state->Other_charset != NULL ? state->Other_charset :
+ (ch & 0x80 ? state->GR_charset : state->GL_charset));
+
+ /* Set the charset of this run, or continue the current run,
+ or stop the current run. */
+ if (charset) {
+ if (charset != ch_charset)
+ break;
+ } else {
+ state->charset = charset = ch_charset;
+ }
+
+ /* We don't want to split a character into multiple pieces. */
+ if (buf_len < 6) {
+ if (charset->char_size > 0) {
+ if (buf_len < charset->char_size)
+ break;
+ } else {
+ /* char_size == 0 is tricky. The code here is good only
+ for valid UTF-8 input. */
+ if (charset->ct_sequence[0] == XctESC
+ && charset->ct_sequence[1] == XctOtherCoding
+ && charset->ct_sequence[2] == 'G') {
+ int char_size = (ch < 0xc0 ? 1 :
+ ch < 0xe0 ? 2 :
+ ch < 0xf0 ? 3 :
+ ch < 0xf8 ? 4 :
+ ch < 0xfc ? 5 :
+ 6);
+ if (buf_len < char_size)
+ break;
+ }
+ }
+ }
+
+ *bufptr++ = *ctptr++;
+ ctext_len--;
+ buf_len--;
+ } else {
+ /* Copy as much as possible from the current extended segment
+ to the buffer. */
+ int char_size;
+
+ /* Set the charset of this run, or continue the current run,
+ or stop the current run. */
+ if (charset) {
+ if (charset != state->charset)
+ break;
+ } else {
+ charset = state->charset;
+ }
+
+ char_size = charset->char_size;
+
+ if (state->ext_seg_left <= buf_len || char_size > 0) {
+ int n = (state->ext_seg_left <= buf_len
+ ? state->ext_seg_left
+ : (buf_len / char_size) * char_size);
+ memcpy(bufptr, ctptr, n);
+ ctptr += n; ctext_len -= n;
+ bufptr += n; buf_len -= n;
+ state->ext_seg_left -= n;
+ } else {
+#if UTF8_IN_EXTSEQ
+ /* char_size == 0 is tricky. The code here is good only
+ for valid UTF-8 input. */
+ if (strcmp(charset->name, "ISO10646-1") == 0) {
+ unsigned char ch = *ctptr;
+ int char_size = (ch < 0xc0 ? 1 :
+ ch < 0xe0 ? 2 :
+ ch < 0xf0 ? 3 :
+ ch < 0xf8 ? 4 :
+ ch < 0xfc ? 5 :
+ 6);
+ int i;
+ if (buf_len < char_size)
+ break;
+ /* A small loop is faster than calling memcpy. */
+ for (i = char_size; i > 0; i--)
+ *bufptr++ = *ctptr++;
+ ctext_len -= char_size;
+ buf_len -= char_size;
+ state->ext_seg_left -= char_size;
+ } else
+#endif
+ {
+ /* Here ctext_len >= state->ext_seg_left > buf_len.
+ We may be splitting a character into multiple pieces.
+ Oh well. */
+ int n = buf_len;
+ memcpy(bufptr, ctptr, n);
+ ctptr += n; ctext_len -= n;
+ bufptr += n; buf_len -= n;
+ state->ext_seg_left -= n;
+ }
+ }
+ }
+ }
+
+ /* 'charset' is the charset for the current run. In some cases,
+ 'state->charset' contains the charset for the next run. Therefore,
+ return 'charset'.
+ 'charset' may still be NULL only if no output was produced. */
+ if (num_args > 0)
+ *((XlcCharSet *) args[0]) = charset;
+
+ *from_left -= ctptr - *((const char **) from);
+ *from = (XPointer) ctptr;
+
+ *to_left -= bufptr - *((char **) to);
+ *to = (XPointer) bufptr;
+
+ return unconv_num;
+}
+
+/* from XlcNCharSet to XlcNCompoundText */
+
+static int
+cstoct(
+ XlcConv conv,
+ XPointer *from,
+ int *from_left,
+ XPointer *to,
+ int *to_left,
+ XPointer *args,
+ int num_args)
+{
+ State state = (State) conv->state;
+ XlcSide side;
+ unsigned char min_ch = 0, max_ch = 0;
+ int length, unconv_num;
+ CTInfo ct_info;
+ XlcCharSet charset;
+ const char *csptr;
+ char *ctptr;
+ int csstr_len, ct_len;
+ char *ext_segment_start;
+ int char_size;
+
+ /* One argument is required, of type XlcCharSet. */
+ if (num_args < 1)
+ return -1;
+
+ csptr = *((const char **) from);
+ ctptr = *((char **) to);
+ csstr_len = *from_left;
+ ct_len = *to_left;
+
+ charset = (XlcCharSet) args[0];
+
+ ct_info = _XlcGetCTInfoFromCharSet(charset);
+ if (ct_info == NULL)
+ return -1;
+
+ side = charset->side;
+ length = strlen(ct_info->ct_sequence);
+
+ ext_segment_start = NULL;
+
+ if (ct_info->type == XctOtherCoding) {
+ /* Output the Escape sequence for switching to the charset, and
+ reserve room now for the XctReturn sequence at the end. */
+ if (ct_len < length + 3)
+ return -1;
+
+ memcpy(ctptr, ct_info->ct_sequence, length);
+ ctptr += length;
+ ct_len -= length + 3;
+ } else
+ /* Test whether the charset is already active. */
+ if (((side == XlcGR || side == XlcGLGR)
+ && charset != state->GR_charset)
+ || ((side == XlcGL || side == XlcGLGR)
+ && charset != state->GL_charset)) {
+
+ /* Output the Escape sequence for switching to the charset. */
+ if (ct_info->type == XctExtSeg) {
+ if (ct_len < length + 2 + ct_info->ext_segment_len)
+ return -1;
+
+ memcpy(ctptr, ct_info->ct_sequence, length);
+ ctptr += length;
+ ct_len -= length;
+
+ ctptr += 2;
+ ct_len -= 2;
+ ext_segment_start = ctptr;
+
+ /* The size of an extended segment must fit in 14 bits. */
+ if (ct_len > 0x3fff)
+ ct_len = 0x3fff;
+
+ memcpy(ctptr, ct_info->ext_segment, ct_info->ext_segment_len);
+ ctptr += ct_info->ext_segment_len;
+ ct_len -= ct_info->ext_segment_len;
+ } else {
+ if (ct_len < length)
+ return -1;
+
+ memcpy(ctptr, ct_info->ct_sequence, length);
+ ctptr += length;
+ ct_len -= length;
+ }
+ }
+
+ /* If the charset has side GL or GR, prepare remapping the characters
+ to the correct side. */
+ if (charset->set_size) {
+ min_ch = 0x20;
+ max_ch = 0x7f;
+ if (charset->set_size == 94) {
+ max_ch--;
+ if (charset->char_size > 1 || side == XlcGR)
+ min_ch++;
+ }
+ }
+
+ /* Actually copy the contents. */
+ unconv_num = 0;
+ char_size = charset->char_size;
+ if (char_size == 1) {
+ while (csstr_len > 0 && ct_len > 0) {
+ if (charset->set_size) {
+ /* The CompoundText specification says that the only
+ control characters allowed are 0x09, 0x0a, 0x1b, 0x9b.
+ Therefore here we eliminate other control characters. */
+ unsigned char ch = *((unsigned char *) csptr) & 0x7f;
+ if (!((ch >= min_ch && ch <= max_ch)
+ || (side == XlcGL
+ && (ch == 0x00 || ch == 0x09 || ch == 0x0a))
+ || ((side == XlcGL || side == XlcGR)
+ && (ch == 0x1b)))) {
+ csptr++;
+ csstr_len--;
+ unconv_num++;
+ continue;
+ }
+ }
+
+ if (side == XlcGL)
+ *ctptr++ = *csptr++ & 0x7f;
+ else if (side == XlcGR)
+ *ctptr++ = *csptr++ | 0x80;
+ else
+ *ctptr++ = *csptr++;
+ csstr_len--;
+ ct_len--;
+ }
+ } else if (char_size > 1) {
+ while (csstr_len >= char_size && ct_len >= char_size) {
+ if (side == XlcGL) {
+ int i;
+ for (i = char_size; i > 0; i--)
+ *ctptr++ = *csptr++ & 0x7f;
+ } else if (side == XlcGR) {
+ int i;
+ for (i = char_size; i > 0; i--)
+ *ctptr++ = *csptr++ | 0x80;
+ } else {
+ int i;
+ for (i = char_size; i > 0; i--)
+ *ctptr++ = *csptr++;
+ }
+ csstr_len -= char_size;
+ ct_len -= char_size;
+ }
+ } else {
+ /* char_size = 0. The code here is good only for valid UTF-8 input. */
+ if ((charset->ct_sequence[0] == XctESC
+ && charset->ct_sequence[1] == XctOtherCoding
+ && charset->ct_sequence[2] == 'G')
+#if UTF8_IN_EXTSEQ
+ || strcmp(charset->name, "ISO10646-1") == 0
+#endif
+ ) {
+ while (csstr_len > 0 && ct_len > 0) {
+ unsigned char ch = * (unsigned char *) csptr;
+ int char_size = (ch < 0xc0 ? 1 :
+ ch < 0xe0 ? 2 :
+ ch < 0xf0 ? 3 :
+ ch < 0xf8 ? 4 :
+ ch < 0xfc ? 5 :
+ 6);
+ int i;
+ if (!(csstr_len >= char_size && ct_len >= char_size))
+ break;
+ for (i = char_size; i > 0; i--)
+ *ctptr++ = *csptr++;
+ csstr_len -= char_size;
+ ct_len -= char_size;
+ }
+ } else {
+ while (csstr_len > 0 && ct_len > 0) {
+ *ctptr++ = *csptr++;
+ csstr_len--;
+ ct_len--;
+ }
+ }
+ }
+
+ if (ct_info->type == XctOtherCoding) {
+ /* Terminate with an XctReturn sequence. */
+ ctptr[0] = XctESC;
+ ctptr[1] = XctOtherCoding;
+ ctptr[2] = '@';
+ ctptr += 3;
+ } else if (ext_segment_start != NULL) {
+ /* Backpatch the extended segment's length. */
+ int ext_segment_length = ctptr - ext_segment_start;
+ *(ext_segment_start - 2) = (ext_segment_length >> 7) | 0x80;
+ *(ext_segment_start - 1) = (ext_segment_length & 0x7f) | 0x80;
+ } else {
+ if (side == XlcGR || side == XlcGLGR)
+ state->GR_charset = charset;
+ if (side == XlcGL || side == XlcGLGR)
+ state->GL_charset = charset;
+ }
+
+ *from_left -= csptr - *((const char **) from);
+ *from = (XPointer) csptr;
+
+ *to_left -= ctptr - *((char **) to);
+ *to = (XPointer) ctptr;
+
+ return 0;
+}
+
+/* from XlcNString to XlcNCharSet */
+
+static int
+strtocs(
+ XlcConv conv,
+ XPointer *from,
+ int *from_left,
+ XPointer *to,
+ int *to_left,
+ XPointer *args,
+ int num_args)
+{
+ State state = (State) conv->state;
+ const char *src;
+ char *dst;
+ unsigned char side;
+ int length;
+
+ src = (const char *) *from;
+ dst = (char *) *to;
+
+ length = min(*from_left, *to_left);
+ side = *((unsigned char *) src) & 0x80;
+
+ while (side == (*((unsigned char *) src) & 0x80) && length-- > 0)
+ *dst++ = *src++;
+
+ *from_left -= src - (const char *) *from;
+ *from = (XPointer) src;
+ *to_left -= dst - (char *) *to;
+ *to = (XPointer) dst;
+
+ if (num_args > 0)
+ *((XlcCharSet *)args[0]) = (side ? state->GR_charset : state->GL_charset);
+
+ return 0;
+}
+
+/* from XlcNCharSet to XlcNString */
+
+static int
+cstostr(
+ XlcConv conv,
+ XPointer *from,
+ int *from_left,
+ XPointer *to,
+ int *to_left,
+ XPointer *args,
+ int num_args)
+{
+ State state = (State) conv->state;
+ const char *csptr;
+ char *string_ptr;
+ int csstr_len, str_len;
+ unsigned char ch;
+ int unconv_num = 0;
+
+ /* This converter can only convert from ISO8859-1:GL and ISO8859-1:GR. */
+ if (num_args < 1
+ || !((XlcCharSet) args[0] == state->GL_charset
+ || (XlcCharSet) args[0] == state->GR_charset))
+ return -1;
+
+ csptr = *((const char **) from);
+ string_ptr = *((char **) to);
+ csstr_len = *from_left;
+ str_len = *to_left;
+
+ while (csstr_len > 0 && str_len > 0) {
+ ch = *((unsigned char *) csptr++);
+ csstr_len--;
+ /* Citing ICCCM: "STRING as a type specifies the ISO Latin-1 character
+ set plus the control characters TAB and NEWLINE." */
+ if ((ch < 0x20 && ch != 0x00 && ch != 0x09 && ch != 0x0a)
+ || (ch >= 0x7f && ch < 0xa0)) {
+ unconv_num++;
+ continue;
+ }
+ *((unsigned char *) string_ptr++) = ch;
+ str_len--;
+ }
+
+ *from_left -= csptr - *((const char **) from);
+ *from = (XPointer) csptr;
+
+ *to_left -= string_ptr - *((char **) to);
+ *to = (XPointer) string_ptr;
+
+ return unconv_num;
+}
+
+
+static XlcConv
+create_conv(
+ XlcConvMethods methods)
+{
+ XlcConv conv;
+
+ conv = (XlcConv) Xmalloc(sizeof(XlcConvRec) + sizeof(StateRec));
+ if (conv == NULL)
+ return (XlcConv) NULL;
+
+ conv->state = (XPointer) &conv[1];
+
+ conv->methods = methods;
+
+ init_state(conv);
+
+ return conv;
+}
+
+static void
+close_converter(
+ XlcConv conv)
+{
+ /* conv->state is allocated together with conv, free both at once. */
+ Xfree((char *) conv);
+}
+
+
+static XlcConvMethodsRec cttocs_methods = {
+ close_converter,
+ cttocs,
+ init_state
+};
+
+static XlcConv
+open_cttocs(
+ XLCd from_lcd,
+ const char *from_type,
+ XLCd to_lcd,
+ const char *to_type)
+{
+ return create_conv(&cttocs_methods);
+}
+
+
+static XlcConvMethodsRec cstoct_methods = {
+ close_converter,
+ cstoct,
+ init_state
+};
+
+static XlcConv
+open_cstoct(
+ XLCd from_lcd,
+ const char *from_type,
+ XLCd to_lcd,
+ const char *to_type)
+{
+ return create_conv(&cstoct_methods);
+}
+
+
+static XlcConvMethodsRec strtocs_methods = {
+ close_converter,
+ strtocs,
+ init_state
+};
+
+static XlcConv
+open_strtocs(
+ XLCd from_lcd,
+ const char *from_type,
+ XLCd to_lcd,
+ const char *to_type)
+{
+ return create_conv(&strtocs_methods);
+}
+
+
+static XlcConvMethodsRec cstostr_methods = {
+ close_converter,
+ cstostr,
+ init_state
+};
+
+static XlcConv
+open_cstostr(
+ XLCd from_lcd,
+ const char *from_type,
+ XLCd to_lcd,
+ const char *to_type)
+{
+ return create_conv(&cstostr_methods);
+}
+
+
+/* =========================== Initialization =========================== */
+
+Bool
+_XlcInitCTInfo(void)
+{
+ if (ct_list == NULL) {
+ const CTDataRec *ct_data;
+ int num;
+ XlcCharSet charset;
+
+ /* Initialize ct_list. */
+
+ num = sizeof(default_ct_data) / sizeof(CTDataRec);
+ for (ct_data = default_ct_data; num > 0; ct_data++, num--) {
+ charset = _XlcAddCT(ct_data->name, ct_data->ct_sequence);
+ if (charset == NULL)
+ continue;
+ if (strncmp(charset->ct_sequence, "\x1b\x25\x2f", 3) != 0)
+ charset->source = CSsrcStd;
+ else
+ charset->source = CSsrcXLC;
+ }
+
+ /* Register CompoundText and CharSet converters. */
+
+ _XlcSetConverter((XLCd) NULL, XlcNCompoundText,
+ (XLCd) NULL, XlcNCharSet,
+ open_cttocs);
+ _XlcSetConverter((XLCd) NULL, XlcNString,
+ (XLCd) NULL, XlcNCharSet,
+ open_strtocs);
+
+ _XlcSetConverter((XLCd) NULL, XlcNCharSet,
+ (XLCd) NULL, XlcNCompoundText,
+ open_cstoct);
+ _XlcSetConverter((XLCd) NULL, XlcNCharSet,
+ (XLCd) NULL, XlcNString,
+ open_cstostr);
+ }
+
+ return True;
+}
|