aboutsummaryrefslogtreecommitdiff
path: root/nx-X11/programs/xterm/charclass.c
blob: 96220607c0e2b306f168840acd7cca4883fa3d14 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
/* $XTermId: charclass.c,v 1.10 2005/01/14 01:50:02 tom Exp $ */

/*
 * Compact and efficient reimplementation of the
 * xterm character class mechanism for large character sets
 *
 * Markus Kuhn -- mkuhn@acm.org -- 2000-07-03
 *
 * Xterm allows users to select entire words with a double-click on
 * the left mouse button. Opinions might differ on what type of
 * characters are part of separate words, therefore xterm allows users
 * to configure a class code for each 8-bit character. Words are
 * maximum length sequences of neighboring characters with identical
 * class code. Extending this mechanism to Unicode naively would
 * create an at least 2^16 entries (128 kB) long class code table.
 * Instead, we transform the character class table into a list
 * of intervals, that will be accessed via a linear search.
 * Changes made to the table by the user will be appended. A special
 * class code -1 (default) marks characters who have their code number
 * as the class code. We could alternatively use a sorted table of
 * non-overlapping intervals that can be accessed via binary search,
 * but merging in new intervals is significantly more hassle and
 * not worth the effort here.
 */
/* $XFree86: xc/programs/xterm/charclass.c,v 1.5 2005/01/14 01:50:02 dickey Exp $ */

#include <xterm.h>
#include <charclass.h>

#if OPT_WIDE_CHARS

static struct classentry {
    int cclass;
    int first;
    int last;
} *classtab;

/*
 * Special convention for classtab[0]:
 * - classtab[0].cclass is the allocated number of entries in classtab
 * - classtab[0].first = 1 (first used entry in classtab)
 * - classtab[0].last is the last used entry in classtab
 */

int
SetCharacterClassRange(int low, int high, int value)
{
    if (high < low)
	return -1;		/* nothing to do */

    /* make sure we have at least one free entry left at table end */
    if (classtab[0].last > classtab[0].cclass - 2) {
	classtab[0].cclass += 5 + classtab[0].cclass / 4;
	classtab = TypeRealloc(struct classentry, classtab[0].cclass, classtab);
	if (!classtab)
	    abort();
    }

    /* simply append new interval to end of interval array */
    classtab[0].last++;
    classtab[classtab[0].last].first = low;
    classtab[classtab[0].last].last = high;
    classtab[classtab[0].last].cclass = value;

    return 0;
}

void
init_classtab(void)
{
    const int size = 50;

    classtab = TypeMallocN(struct classentry, size);
    if (!classtab)
	abort();
    classtab[0].cclass = size;
    classtab[0].first = 1;
    classtab[0].last = 0;

    /* old xterm default classes */
    SetCharacterClassRange(0, 0, 32);
    SetCharacterClassRange(1, 31, 1);
    SetCharacterClassRange('\t', '\t', 32);
    SetCharacterClassRange('0', '9', 48);
    SetCharacterClassRange('A', 'Z', 48);
    SetCharacterClassRange('_', '_', 48);
    SetCharacterClassRange('a', 'z', 48);
    SetCharacterClassRange(127, 159, 1);
    SetCharacterClassRange(160, 191, -1);
    SetCharacterClassRange(192, 255, 48);
    SetCharacterClassRange(215, 215, 216);
    SetCharacterClassRange(247, 247, 248);

    /* added Unicode classes */
    SetCharacterClassRange(0x0100, 0xffdf, 48);		/* mostly characters */
    SetCharacterClassRange(0x037e, 0x037e, -1);		/* Greek question mark */
    SetCharacterClassRange(0x0387, 0x0387, -1);		/* Greek ano teleia */
    SetCharacterClassRange(0x055a, 0x055f, -1);		/* Armenian punctuation */
    SetCharacterClassRange(0x0589, 0x0589, -1);		/* Armenian full stop */
    SetCharacterClassRange(0x0700, 0x070d, -1);		/* Syriac punctuation */
    SetCharacterClassRange(0x104a, 0x104f, -1);		/* Myanmar punctuation */
    SetCharacterClassRange(0x10fb, 0x10fb, -1);		/* Georgian punctuation */
    SetCharacterClassRange(0x1361, 0x1368, -1);		/* Ethiopic punctuation */
    SetCharacterClassRange(0x166d, 0x166e, -1);		/* Canadian Syl. punctuation */
    SetCharacterClassRange(0x17d4, 0x17dc, -1);		/* Khmer punctuation */
    SetCharacterClassRange(0x1800, 0x180a, -1);		/* Mongolian punctuation */
    SetCharacterClassRange(0x2000, 0x200a, 32);		/* spaces */
    SetCharacterClassRange(0x200b, 0x27ff, -1);		/* punctuation and symbols */
    SetCharacterClassRange(0x2070, 0x207f, 0x2070);	/* superscript */
    SetCharacterClassRange(0x2080, 0x208f, 0x2080);	/* subscript */
    SetCharacterClassRange(0x3000, 0x3000, 32);		/* ideographic space */
    SetCharacterClassRange(0x3001, 0x3020, -1);		/* ideographic punctuation */
    SetCharacterClassRange(0x3040, 0x309f, 0x3040);	/* Hiragana */
    SetCharacterClassRange(0x30a0, 0x30ff, 0x30a0);	/* Katakana */
    SetCharacterClassRange(0x3300, 0x9fff, 0x4e00);	/* CJK Ideographs */
    SetCharacterClassRange(0xac00, 0xd7a3, 0xac00);	/* Hangul Syllables */
    SetCharacterClassRange(0xf900, 0xfaff, 0x4e00);	/* CJK Ideographs */
    SetCharacterClassRange(0xfe30, 0xfe6b, -1);		/* punctuation forms */
    SetCharacterClassRange(0xff00, 0xff0f, -1);		/* half/fullwidth ASCII */
    SetCharacterClassRange(0xff1a, 0xff20, -1);		/* half/fullwidth ASCII */
    SetCharacterClassRange(0xff3b, 0xff40, -1);		/* half/fullwidth ASCII */
    SetCharacterClassRange(0xff5b, 0xff64, -1);		/* half/fullwidth ASCII */

    return;
}

int
CharacterClass(int c)
{
    int i, cclass = -1;

    for (i = classtab[0].first; i <= classtab[0].last; i++)
	if (classtab[i].first <= c && classtab[i].last >= c)
	    cclass = classtab[i].cclass;

    if (cclass < 0)
	cclass = c;

    return cclass;
}

#endif /* OPT_WIDE_CHARS */