aboutsummaryrefslogtreecommitdiff
path: root/expat/xmlwf/ct.c
diff options
context:
space:
mode:
Diffstat (limited to 'expat/xmlwf/ct.c')
-rw-r--r--expat/xmlwf/ct.c147
1 files changed, 147 insertions, 0 deletions
diff --git a/expat/xmlwf/ct.c b/expat/xmlwf/ct.c
new file mode 100644
index 000000000..95903a345
--- /dev/null
+++ b/expat/xmlwf/ct.c
@@ -0,0 +1,147 @@
+#define CHARSET_MAX 41
+
+static const char *
+getTok(const char **pp)
+{
+ enum { inAtom, inString, init, inComment };
+ int state = init;
+ const char *tokStart = 0;
+ for (;;) {
+ switch (**pp) {
+ case '\0':
+ return 0;
+ case ' ':
+ case '\r':
+ case '\t':
+ case '\n':
+ if (state == inAtom)
+ return tokStart;
+ break;
+ case '(':
+ if (state == inAtom)
+ return tokStart;
+ if (state != inString)
+ state++;
+ break;
+ case ')':
+ if (state > init)
+ --state;
+ else if (state != inString)
+ return 0;
+ break;
+ case ';':
+ case '/':
+ case '=':
+ if (state == inAtom)
+ return tokStart;
+ if (state == init)
+ return (*pp)++;
+ break;
+ case '\\':
+ ++*pp;
+ if (**pp == '\0')
+ return 0;
+ break;
+ case '"':
+ switch (state) {
+ case inString:
+ ++*pp;
+ return tokStart;
+ case inAtom:
+ return tokStart;
+ case init:
+ tokStart = *pp;
+ state = inString;
+ break;
+ }
+ break;
+ default:
+ if (state == init) {
+ tokStart = *pp;
+ state = inAtom;
+ }
+ break;
+ }
+ ++*pp;
+ }
+ /* not reached */
+}
+
+/* key must be lowercase ASCII */
+
+static int
+matchkey(const char *start, const char *end, const char *key)
+{
+ if (!start)
+ return 0;
+ for (; start != end; start++, key++)
+ if (*start != *key && *start != 'A' + (*key - 'a'))
+ return 0;
+ return *key == '\0';
+}
+
+void
+getXMLCharset(const char *buf, char *charset)
+{
+ const char *next, *p;
+
+ charset[0] = '\0';
+ next = buf;
+ p = getTok(&next);
+ if (matchkey(p, next, "text"))
+ strcpy(charset, "us-ascii");
+ else if (!matchkey(p, next, "application"))
+ return;
+ p = getTok(&next);
+ if (!p || *p != '/')
+ return;
+ p = getTok(&next);
+ if (matchkey(p, next, "xml"))
+ isXml = 1;
+ p = getTok(&next);
+ while (p) {
+ if (*p == ';') {
+ p = getTok(&next);
+ if (matchkey(p, next, "charset")) {
+ p = getTok(&next);
+ if (p && *p == '=') {
+ p = getTok(&next);
+ if (p) {
+ char *s = charset;
+ if (*p == '"') {
+ while (++p != next - 1) {
+ if (*p == '\\')
+ ++p;
+ if (s == charset + CHARSET_MAX - 1) {
+ charset[0] = '\0';
+ break;
+ }
+ *s++ = *p;
+ }
+ *s++ = '\0';
+ }
+ else {
+ if (next - p > CHARSET_MAX - 1)
+ break;
+ while (p != next)
+ *s++ = *p++;
+ *s = 0;
+ break;
+ }
+ }
+ }
+ }
+ }
+ else
+ p = getTok(&next);
+ }
+}
+
+int
+main(int argc, char **argv)
+{
+ char buf[CHARSET_MAX];
+ getXMLCharset(argv[1], buf);
+ printf("charset = \"%s\"\n", buf);
+ return 0;
+}