diff options
author | marha <marha@users.sourceforge.net> | 2014-04-14 23:43:21 +0200 |
---|---|---|
committer | marha <marha@users.sourceforge.net> | 2014-04-14 23:43:21 +0200 |
commit | a3fe3e22d85e8aa795df85c21814fc84cac42e99 (patch) | |
tree | 0b696c0a3e836781bc527015dcd28cacc9d0ef9f /tools/plink/wildcard.c | |
parent | 242d48135a12fc9167430f391ba0d27d9ad44c6b (diff) | |
download | vcxsrv-a3fe3e22d85e8aa795df85c21814fc84cac42e99.tar.gz vcxsrv-a3fe3e22d85e8aa795df85c21814fc84cac42e99.tar.bz2 vcxsrv-a3fe3e22d85e8aa795df85c21814fc84cac42e99.zip |
plink: updated to revision 10170 of putty
Diffstat (limited to 'tools/plink/wildcard.c')
-rw-r--r-- | tools/plink/wildcard.c | 945 |
1 files changed, 473 insertions, 472 deletions
diff --git a/tools/plink/wildcard.c b/tools/plink/wildcard.c index 75a7573b2..c1cb0b49e 100644 --- a/tools/plink/wildcard.c +++ b/tools/plink/wildcard.c @@ -1,472 +1,473 @@ -/*
- * Wildcard matching engine for use with SFTP-based file transfer
- * programs (PSFTP, new-look PSCP): since SFTP has no notion of
- * getting the remote side to do globbing (and rightly so) we have
- * to do it locally, by retrieving all the filenames in a directory
- * and checking each against the wildcard pattern.
- */
-
-#include <assert.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "putty.h"
-
-/*
- * Definition of wildcard syntax:
- *
- * - * matches any sequence of characters, including zero.
- * - ? matches exactly one character which can be anything.
- * - [abc] matches exactly one character which is a, b or c.
- * - [a-f] matches anything from a through f.
- * - [^a-f] matches anything _except_ a through f.
- * - [-_] matches - or _; [^-_] matches anything else. (The - is
- * non-special if it occurs immediately after the opening
- * bracket or ^.)
- * - [a^] matches an a or a ^. (The ^ is non-special if it does
- * _not_ occur immediately after the opening bracket.)
- * - \*, \?, \[, \], \\ match the single characters *, ?, [, ], \.
- * - All other characters are non-special and match themselves.
- */
-
-/*
- * Some notes on differences from POSIX globs (IEEE Std 1003.1, 2003 ed.):
- * - backslashes act as escapes even within [] bracket expressions
- * - does not support [!...] for non-matching list (POSIX are weird);
- * NB POSIX allows [^...] as well via "A bracket expression starting
- * with an unquoted circumflex character produces unspecified
- * results". If we wanted to allow [!...] we might want to define
- * [^!] as having its literal meaning (match '^' or '!').
- * - none of the scary [[:class:]] stuff, etc
- */
-
-/*
- * The wildcard matching technique we use is very simple and
- * potentially O(N^2) in running time, but I don't anticipate it
- * being that bad in reality (particularly since N will be the size
- * of a filename, which isn't all that much). Perhaps one day, once
- * PuTTY has grown a regexp matcher for some other reason, I might
- * come back and reimplement wildcards by translating them into
- * regexps or directly into NFAs; but for the moment, in the
- * absence of any other need for the NFA->DFA translation engine,
- * anything more than the simplest possible wildcard matcher is
- * vast code-size overkill.
- *
- * Essentially, these wildcards are much simpler than regexps in
- * that they consist of a sequence of rigid fragments (? and [...]
- * can never match more or less than one character) separated by
- * asterisks. It is therefore extremely simple to look at a rigid
- * fragment and determine whether or not it begins at a particular
- * point in the test string; so we can search along the string
- * until we find each fragment, then search for the next. As long
- * as we find each fragment in the _first_ place it occurs, there
- * will never be a danger of having to backpedal and try to find it
- * again somewhere else.
- */
-
-enum {
- WC_TRAILINGBACKSLASH = 1,
- WC_UNCLOSEDCLASS,
- WC_INVALIDRANGE
-};
-
-/*
- * Error reporting is done by returning various negative values
- * from the wildcard routines. Passing any such value to wc_error
- * will give a human-readable message.
- */
-const char *wc_error(int value)
-{
- value = abs(value);
- switch (value) {
- case WC_TRAILINGBACKSLASH:
- return "'\' occurred at end of string (expected another character)";
- case WC_UNCLOSEDCLASS:
- return "expected ']' to close character class";
- case WC_INVALIDRANGE:
- return "character range was not terminated (']' just after '-')";
- }
- return "INTERNAL ERROR: unrecognised wildcard error number";
-}
-
-/*
- * This is the routine that tests a target string to see if an
- * initial substring of it matches a fragment. If successful, it
- * returns 1, and advances both `fragment' and `target' past the
- * fragment and matching substring respectively. If unsuccessful it
- * returns zero. If the wildcard fragment suffers a syntax error,
- * it returns <0 and the precise value indexes into wc_error.
- */
-static int wc_match_fragment(const char **fragment, const char **target)
-{
- const char *f, *t;
-
- f = *fragment;
- t = *target;
- /*
- * The fragment terminates at either the end of the string, or
- * the first (unescaped) *.
- */
- while (*f && *f != '*' && *t) {
- /*
- * Extract one character from t, and one character's worth
- * of pattern from f, and step along both. Return 0 if they
- * fail to match.
- */
- if (*f == '\\') {
- /*
- * Backslash, which means f[1] is to be treated as a
- * literal character no matter what it is. It may not
- * be the end of the string.
- */
- if (!f[1])
- return -WC_TRAILINGBACKSLASH; /* error */
- if (f[1] != *t)
- return 0; /* failed to match */
- f += 2;
- } else if (*f == '?') {
- /*
- * Question mark matches anything.
- */
- f++;
- } else if (*f == '[') {
- int invert = 0;
- int matched = 0;
- /*
- * Open bracket introduces a character class.
- */
- f++;
- if (*f == '^') {
- invert = 1;
- f++;
- }
- while (*f != ']') {
- if (*f == '\\')
- f++; /* backslashes still work */
- if (!*f)
- return -WC_UNCLOSEDCLASS; /* error again */
- if (f[1] == '-') {
- int lower, upper, ourchr;
- lower = (unsigned char) *f++;
- f++; /* eat the minus */
- if (*f == ']')
- return -WC_INVALIDRANGE; /* different error! */
- if (*f == '\\')
- f++; /* backslashes _still_ work */
- if (!*f)
- return -WC_UNCLOSEDCLASS; /* error again */
- upper = (unsigned char) *f++;
- ourchr = (unsigned char) *t;
- if (lower > upper) {
- int t = lower; lower = upper; upper = t;
- }
- if (ourchr >= lower && ourchr <= upper)
- matched = 1;
- } else {
- matched |= (*t == *f++);
- }
- }
- if (invert == matched)
- return 0; /* failed to match character class */
- f++; /* eat the ] */
- } else {
- /*
- * Non-special character matches itself.
- */
- if (*f != *t)
- return 0;
- f++;
- }
- /*
- * Now we've done that, increment t past the character we
- * matched.
- */
- t++;
- }
- if (!*f || *f == '*') {
- /*
- * We have reached the end of f without finding a mismatch;
- * so we're done. Update the caller pointers and return 1.
- */
- *fragment = f;
- *target = t;
- return 1;
- }
- /*
- * Otherwise, we must have reached the end of t before we
- * reached the end of f; so we've failed. Return 0.
- */
- return 0;
-}
-
-/*
- * This is the real wildcard matching routine. It returns 1 for a
- * successful match, 0 for an unsuccessful match, and <0 for a
- * syntax error in the wildcard.
- */
-int wc_match(const char *wildcard, const char *target)
-{
- int ret;
-
- /*
- * Every time we see a '*' _followed_ by a fragment, we just
- * search along the string for a location at which the fragment
- * matches. The only special case is when we see a fragment
- * right at the start, in which case we just call the matching
- * routine once and give up if it fails.
- */
- if (*wildcard != '*') {
- ret = wc_match_fragment(&wildcard, &target);
- if (ret <= 0)
- return ret; /* pass back failure or error alike */
- }
-
- while (*wildcard) {
- assert(*wildcard == '*');
- while (*wildcard == '*')
- wildcard++;
-
- /*
- * It's possible we've just hit the end of the wildcard
- * after seeing a *, in which case there's no need to
- * bother searching any more because we've won.
- */
- if (!*wildcard)
- return 1;
-
- /*
- * Now `wildcard' points at the next fragment. So we
- * attempt to match it against `target', and if that fails
- * we increment `target' and try again, and so on. When we
- * find we're about to try matching against the empty
- * string, we give up and return 0.
- */
- ret = 0;
- while (*target) {
- const char *save_w = wildcard, *save_t = target;
-
- ret = wc_match_fragment(&wildcard, &target);
-
- if (ret < 0)
- return ret; /* syntax error */
-
- if (ret > 0 && !*wildcard && *target) {
- /*
- * Final special case - literally.
- *
- * This situation arises when we are matching a
- * _terminal_ fragment of the wildcard (that is,
- * there is nothing after it, e.g. "*a"), and it
- * has matched _too early_. For example, matching
- * "*a" against "parka" will match the "a" fragment
- * against the _first_ a, and then (if it weren't
- * for this special case) matching would fail
- * because we're at the end of the wildcard but not
- * at the end of the target string.
- *
- * In this case what we must do is measure the
- * length of the fragment in the target (which is
- * why we saved `target'), jump straight to that
- * distance from the end of the string using
- * strlen, and match the same fragment again there
- * (which is why we saved `wildcard'). Then we
- * return whatever that operation returns.
- */
- target = save_t + strlen(save_t) - (target - save_t);
- wildcard = save_w;
- return wc_match_fragment(&wildcard, &target);
- }
-
- if (ret > 0)
- break;
- target++;
- }
- if (ret > 0)
- continue;
- return 0;
- }
-
- /*
- * If we reach here, it must be because we successfully matched
- * a fragment and then found ourselves right at the end of the
- * wildcard. Hence, we return 1 if and only if we are also
- * right at the end of the target.
- */
- return (*target ? 0 : 1);
-}
-
-/*
- * Another utility routine that translates a non-wildcard string
- * into its raw equivalent by removing any escaping backslashes.
- * Expects a target string buffer of anything up to the length of
- * the original wildcard. You can also pass NULL as the output
- * buffer if you're only interested in the return value.
- *
- * Returns 1 on success, or 0 if a wildcard character was
- * encountered. In the latter case the output string MAY not be
- * zero-terminated and you should not use it for anything!
- */
-int wc_unescape(char *output, const char *wildcard)
-{
- while (*wildcard) {
- if (*wildcard == '\\') {
- wildcard++;
- /* We are lenient about trailing backslashes in non-wildcards. */
- if (*wildcard) {
- if (output)
- *output++ = *wildcard;
- wildcard++;
- }
- } else if (*wildcard == '*' || *wildcard == '?' ||
- *wildcard == '[' || *wildcard == ']') {
- return 0; /* it's a wildcard! */
- } else {
- if (output)
- *output++ = *wildcard;
- wildcard++;
- }
- }
- *output = '\0';
- return 1; /* it's clean */
-}
-
-#ifdef TESTMODE
-
-struct test {
- const char *wildcard;
- const char *target;
- int expected_result;
-};
-
-const struct test fragment_tests[] = {
- /*
- * We exhaustively unit-test the fragment matching routine
- * itself, which should save us the need to test all its
- * intricacies during the full wildcard tests.
- */
- {"abc", "abc", 1},
- {"abc", "abd", 0},
- {"abc", "abcd", 1},
- {"abcd", "abc", 0},
- {"ab[cd]", "abc", 1},
- {"ab[cd]", "abd", 1},
- {"ab[cd]", "abe", 0},
- {"ab[^cd]", "abc", 0},
- {"ab[^cd]", "abd", 0},
- {"ab[^cd]", "abe", 1},
- {"ab\\", "abc", -WC_TRAILINGBACKSLASH},
- {"ab\\*", "ab*", 1},
- {"ab\\?", "ab*", 0},
- {"ab?", "abc", 1},
- {"ab?", "ab", 0},
- {"ab[", "abc", -WC_UNCLOSEDCLASS},
- {"ab[c-", "abb", -WC_UNCLOSEDCLASS},
- {"ab[c-]", "abb", -WC_INVALIDRANGE},
- {"ab[c-e]", "abb", 0},
- {"ab[c-e]", "abc", 1},
- {"ab[c-e]", "abd", 1},
- {"ab[c-e]", "abe", 1},
- {"ab[c-e]", "abf", 0},
- {"ab[e-c]", "abb", 0},
- {"ab[e-c]", "abc", 1},
- {"ab[e-c]", "abd", 1},
- {"ab[e-c]", "abe", 1},
- {"ab[e-c]", "abf", 0},
- {"ab[^c-e]", "abb", 1},
- {"ab[^c-e]", "abc", 0},
- {"ab[^c-e]", "abd", 0},
- {"ab[^c-e]", "abe", 0},
- {"ab[^c-e]", "abf", 1},
- {"ab[^e-c]", "abb", 1},
- {"ab[^e-c]", "abc", 0},
- {"ab[^e-c]", "abd", 0},
- {"ab[^e-c]", "abe", 0},
- {"ab[^e-c]", "abf", 1},
- {"ab[a^]", "aba", 1},
- {"ab[a^]", "ab^", 1},
- {"ab[a^]", "abb", 0},
- {"ab[^a^]", "aba", 0},
- {"ab[^a^]", "ab^", 0},
- {"ab[^a^]", "abb", 1},
- {"ab[-c]", "ab-", 1},
- {"ab[-c]", "abc", 1},
- {"ab[-c]", "abd", 0},
- {"ab[^-c]", "ab-", 0},
- {"ab[^-c]", "abc", 0},
- {"ab[^-c]", "abd", 1},
- {"ab[\\[-\\]]", "abZ", 0},
- {"ab[\\[-\\]]", "ab[", 1},
- {"ab[\\[-\\]]", "ab\\", 1},
- {"ab[\\[-\\]]", "ab]", 1},
- {"ab[\\[-\\]]", "ab^", 0},
- {"ab[^\\[-\\]]", "abZ", 1},
- {"ab[^\\[-\\]]", "ab[", 0},
- {"ab[^\\[-\\]]", "ab\\", 0},
- {"ab[^\\[-\\]]", "ab]", 0},
- {"ab[^\\[-\\]]", "ab^", 1},
- {"ab[a-fA-F]", "aba", 1},
- {"ab[a-fA-F]", "abF", 1},
- {"ab[a-fA-F]", "abZ", 0},
-};
-
-const struct test full_tests[] = {
- {"a", "argh", 0},
- {"a", "ba", 0},
- {"a", "a", 1},
- {"a*", "aardvark", 1},
- {"a*", "badger", 0},
- {"*a", "park", 0},
- {"*a", "pArka", 1},
- {"*a", "parka", 1},
- {"*a*", "park", 1},
- {"*a*", "perk", 0},
- {"?b*r?", "abracadabra", 1},
- {"?b*r?", "abracadabr", 0},
- {"?b*r?", "abracadabzr", 0},
-};
-
-int main(void)
-{
- int i;
- int fails, passes;
-
- fails = passes = 0;
-
- for (i = 0; i < sizeof(fragment_tests)/sizeof(*fragment_tests); i++) {
- const char *f, *t;
- int eret, aret;
- f = fragment_tests[i].wildcard;
- t = fragment_tests[i].target;
- eret = fragment_tests[i].expected_result;
- aret = wc_match_fragment(&f, &t);
- if (aret != eret) {
- printf("failed test: /%s/ against /%s/ returned %d not %d\n",
- fragment_tests[i].wildcard, fragment_tests[i].target,
- aret, eret);
- fails++;
- } else
- passes++;
- }
-
- for (i = 0; i < sizeof(full_tests)/sizeof(*full_tests); i++) {
- const char *f, *t;
- int eret, aret;
- f = full_tests[i].wildcard;
- t = full_tests[i].target;
- eret = full_tests[i].expected_result;
- aret = wc_match(f, t);
- if (aret != eret) {
- printf("failed test: /%s/ against /%s/ returned %d not %d\n",
- full_tests[i].wildcard, full_tests[i].target,
- aret, eret);
- fails++;
- } else
- passes++;
- }
-
- printf("passed %d, failed %d\n", passes, fails);
-
- return 0;
-}
-
-#endif
+/* + * Wildcard matching engine for use with SFTP-based file transfer + * programs (PSFTP, new-look PSCP): since SFTP has no notion of + * getting the remote side to do globbing (and rightly so) we have + * to do it locally, by retrieving all the filenames in a directory + * and checking each against the wildcard pattern. + */ + +#include <assert.h> +#include <stdlib.h> +#include <string.h> + +#include "putty.h" + +/* + * Definition of wildcard syntax: + * + * - * matches any sequence of characters, including zero. + * - ? matches exactly one character which can be anything. + * - [abc] matches exactly one character which is a, b or c. + * - [a-f] matches anything from a through f. + * - [^a-f] matches anything _except_ a through f. + * - [-_] matches - or _; [^-_] matches anything else. (The - is + * non-special if it occurs immediately after the opening + * bracket or ^.) + * - [a^] matches an a or a ^. (The ^ is non-special if it does + * _not_ occur immediately after the opening bracket.) + * - \*, \?, \[, \], \\ match the single characters *, ?, [, ], \. + * - All other characters are non-special and match themselves. + */ + +/* + * Some notes on differences from POSIX globs (IEEE Std 1003.1, 2003 ed.): + * - backslashes act as escapes even within [] bracket expressions + * - does not support [!...] for non-matching list (POSIX are weird); + * NB POSIX allows [^...] as well via "A bracket expression starting + * with an unquoted circumflex character produces unspecified + * results". If we wanted to allow [!...] we might want to define + * [^!] as having its literal meaning (match '^' or '!'). + * - none of the scary [[:class:]] stuff, etc + */ + +/* + * The wildcard matching technique we use is very simple and + * potentially O(N^2) in running time, but I don't anticipate it + * being that bad in reality (particularly since N will be the size + * of a filename, which isn't all that much). Perhaps one day, once + * PuTTY has grown a regexp matcher for some other reason, I might + * come back and reimplement wildcards by translating them into + * regexps or directly into NFAs; but for the moment, in the + * absence of any other need for the NFA->DFA translation engine, + * anything more than the simplest possible wildcard matcher is + * vast code-size overkill. + * + * Essentially, these wildcards are much simpler than regexps in + * that they consist of a sequence of rigid fragments (? and [...] + * can never match more or less than one character) separated by + * asterisks. It is therefore extremely simple to look at a rigid + * fragment and determine whether or not it begins at a particular + * point in the test string; so we can search along the string + * until we find each fragment, then search for the next. As long + * as we find each fragment in the _first_ place it occurs, there + * will never be a danger of having to backpedal and try to find it + * again somewhere else. + */ + +enum { + WC_TRAILINGBACKSLASH = 1, + WC_UNCLOSEDCLASS, + WC_INVALIDRANGE +}; + +/* + * Error reporting is done by returning various negative values + * from the wildcard routines. Passing any such value to wc_error + * will give a human-readable message. + */ +const char *wc_error(int value) +{ + value = abs(value); + switch (value) { + case WC_TRAILINGBACKSLASH: + return "'\' occurred at end of string (expected another character)"; + case WC_UNCLOSEDCLASS: + return "expected ']' to close character class"; + case WC_INVALIDRANGE: + return "character range was not terminated (']' just after '-')"; + } + return "INTERNAL ERROR: unrecognised wildcard error number"; +} + +/* + * This is the routine that tests a target string to see if an + * initial substring of it matches a fragment. If successful, it + * returns 1, and advances both `fragment' and `target' past the + * fragment and matching substring respectively. If unsuccessful it + * returns zero. If the wildcard fragment suffers a syntax error, + * it returns <0 and the precise value indexes into wc_error. + */ +static int wc_match_fragment(const char **fragment, const char **target) +{ + const char *f, *t; + + f = *fragment; + t = *target; + /* + * The fragment terminates at either the end of the string, or + * the first (unescaped) *. + */ + while (*f && *f != '*' && *t) { + /* + * Extract one character from t, and one character's worth + * of pattern from f, and step along both. Return 0 if they + * fail to match. + */ + if (*f == '\\') { + /* + * Backslash, which means f[1] is to be treated as a + * literal character no matter what it is. It may not + * be the end of the string. + */ + if (!f[1]) + return -WC_TRAILINGBACKSLASH; /* error */ + if (f[1] != *t) + return 0; /* failed to match */ + f += 2; + } else if (*f == '?') { + /* + * Question mark matches anything. + */ + f++; + } else if (*f == '[') { + int invert = 0; + int matched = 0; + /* + * Open bracket introduces a character class. + */ + f++; + if (*f == '^') { + invert = 1; + f++; + } + while (*f != ']') { + if (*f == '\\') + f++; /* backslashes still work */ + if (!*f) + return -WC_UNCLOSEDCLASS; /* error again */ + if (f[1] == '-') { + int lower, upper, ourchr; + lower = (unsigned char) *f++; + f++; /* eat the minus */ + if (*f == ']') + return -WC_INVALIDRANGE; /* different error! */ + if (*f == '\\') + f++; /* backslashes _still_ work */ + if (!*f) + return -WC_UNCLOSEDCLASS; /* error again */ + upper = (unsigned char) *f++; + ourchr = (unsigned char) *t; + if (lower > upper) { + int t = lower; lower = upper; upper = t; + } + if (ourchr >= lower && ourchr <= upper) + matched = 1; + } else { + matched |= (*t == *f++); + } + } + if (invert == matched) + return 0; /* failed to match character class */ + f++; /* eat the ] */ + } else { + /* + * Non-special character matches itself. + */ + if (*f != *t) + return 0; + f++; + } + /* + * Now we've done that, increment t past the character we + * matched. + */ + t++; + } + if (!*f || *f == '*') { + /* + * We have reached the end of f without finding a mismatch; + * so we're done. Update the caller pointers and return 1. + */ + *fragment = f; + *target = t; + return 1; + } + /* + * Otherwise, we must have reached the end of t before we + * reached the end of f; so we've failed. Return 0. + */ + return 0; +} + +/* + * This is the real wildcard matching routine. It returns 1 for a + * successful match, 0 for an unsuccessful match, and <0 for a + * syntax error in the wildcard. + */ +int wc_match(const char *wildcard, const char *target) +{ + int ret; + + /* + * Every time we see a '*' _followed_ by a fragment, we just + * search along the string for a location at which the fragment + * matches. The only special case is when we see a fragment + * right at the start, in which case we just call the matching + * routine once and give up if it fails. + */ + if (*wildcard != '*') { + ret = wc_match_fragment(&wildcard, &target); + if (ret <= 0) + return ret; /* pass back failure or error alike */ + } + + while (*wildcard) { + assert(*wildcard == '*'); + while (*wildcard == '*') + wildcard++; + + /* + * It's possible we've just hit the end of the wildcard + * after seeing a *, in which case there's no need to + * bother searching any more because we've won. + */ + if (!*wildcard) + return 1; + + /* + * Now `wildcard' points at the next fragment. So we + * attempt to match it against `target', and if that fails + * we increment `target' and try again, and so on. When we + * find we're about to try matching against the empty + * string, we give up and return 0. + */ + ret = 0; + while (*target) { + const char *save_w = wildcard, *save_t = target; + + ret = wc_match_fragment(&wildcard, &target); + + if (ret < 0) + return ret; /* syntax error */ + + if (ret > 0 && !*wildcard && *target) { + /* + * Final special case - literally. + * + * This situation arises when we are matching a + * _terminal_ fragment of the wildcard (that is, + * there is nothing after it, e.g. "*a"), and it + * has matched _too early_. For example, matching + * "*a" against "parka" will match the "a" fragment + * against the _first_ a, and then (if it weren't + * for this special case) matching would fail + * because we're at the end of the wildcard but not + * at the end of the target string. + * + * In this case what we must do is measure the + * length of the fragment in the target (which is + * why we saved `target'), jump straight to that + * distance from the end of the string using + * strlen, and match the same fragment again there + * (which is why we saved `wildcard'). Then we + * return whatever that operation returns. + */ + target = save_t + strlen(save_t) - (target - save_t); + wildcard = save_w; + return wc_match_fragment(&wildcard, &target); + } + + if (ret > 0) + break; + target++; + } + if (ret > 0) + continue; + return 0; + } + + /* + * If we reach here, it must be because we successfully matched + * a fragment and then found ourselves right at the end of the + * wildcard. Hence, we return 1 if and only if we are also + * right at the end of the target. + */ + return (*target ? 0 : 1); +} + +/* + * Another utility routine that translates a non-wildcard string + * into its raw equivalent by removing any escaping backslashes. + * Expects a target string buffer of anything up to the length of + * the original wildcard. You can also pass NULL as the output + * buffer if you're only interested in the return value. + * + * Returns 1 on success, or 0 if a wildcard character was + * encountered. In the latter case the output string MAY not be + * zero-terminated and you should not use it for anything! + */ +int wc_unescape(char *output, const char *wildcard) +{ + while (*wildcard) { + if (*wildcard == '\\') { + wildcard++; + /* We are lenient about trailing backslashes in non-wildcards. */ + if (*wildcard) { + if (output) + *output++ = *wildcard; + wildcard++; + } + } else if (*wildcard == '*' || *wildcard == '?' || + *wildcard == '[' || *wildcard == ']') { + return 0; /* it's a wildcard! */ + } else { + if (output) + *output++ = *wildcard; + wildcard++; + } + } + if (output) + *output = '\0'; + return 1; /* it's clean */ +} + +#ifdef TESTMODE + +struct test { + const char *wildcard; + const char *target; + int expected_result; +}; + +const struct test fragment_tests[] = { + /* + * We exhaustively unit-test the fragment matching routine + * itself, which should save us the need to test all its + * intricacies during the full wildcard tests. + */ + {"abc", "abc", 1}, + {"abc", "abd", 0}, + {"abc", "abcd", 1}, + {"abcd", "abc", 0}, + {"ab[cd]", "abc", 1}, + {"ab[cd]", "abd", 1}, + {"ab[cd]", "abe", 0}, + {"ab[^cd]", "abc", 0}, + {"ab[^cd]", "abd", 0}, + {"ab[^cd]", "abe", 1}, + {"ab\\", "abc", -WC_TRAILINGBACKSLASH}, + {"ab\\*", "ab*", 1}, + {"ab\\?", "ab*", 0}, + {"ab?", "abc", 1}, + {"ab?", "ab", 0}, + {"ab[", "abc", -WC_UNCLOSEDCLASS}, + {"ab[c-", "abb", -WC_UNCLOSEDCLASS}, + {"ab[c-]", "abb", -WC_INVALIDRANGE}, + {"ab[c-e]", "abb", 0}, + {"ab[c-e]", "abc", 1}, + {"ab[c-e]", "abd", 1}, + {"ab[c-e]", "abe", 1}, + {"ab[c-e]", "abf", 0}, + {"ab[e-c]", "abb", 0}, + {"ab[e-c]", "abc", 1}, + {"ab[e-c]", "abd", 1}, + {"ab[e-c]", "abe", 1}, + {"ab[e-c]", "abf", 0}, + {"ab[^c-e]", "abb", 1}, + {"ab[^c-e]", "abc", 0}, + {"ab[^c-e]", "abd", 0}, + {"ab[^c-e]", "abe", 0}, + {"ab[^c-e]", "abf", 1}, + {"ab[^e-c]", "abb", 1}, + {"ab[^e-c]", "abc", 0}, + {"ab[^e-c]", "abd", 0}, + {"ab[^e-c]", "abe", 0}, + {"ab[^e-c]", "abf", 1}, + {"ab[a^]", "aba", 1}, + {"ab[a^]", "ab^", 1}, + {"ab[a^]", "abb", 0}, + {"ab[^a^]", "aba", 0}, + {"ab[^a^]", "ab^", 0}, + {"ab[^a^]", "abb", 1}, + {"ab[-c]", "ab-", 1}, + {"ab[-c]", "abc", 1}, + {"ab[-c]", "abd", 0}, + {"ab[^-c]", "ab-", 0}, + {"ab[^-c]", "abc", 0}, + {"ab[^-c]", "abd", 1}, + {"ab[\\[-\\]]", "abZ", 0}, + {"ab[\\[-\\]]", "ab[", 1}, + {"ab[\\[-\\]]", "ab\\", 1}, + {"ab[\\[-\\]]", "ab]", 1}, + {"ab[\\[-\\]]", "ab^", 0}, + {"ab[^\\[-\\]]", "abZ", 1}, + {"ab[^\\[-\\]]", "ab[", 0}, + {"ab[^\\[-\\]]", "ab\\", 0}, + {"ab[^\\[-\\]]", "ab]", 0}, + {"ab[^\\[-\\]]", "ab^", 1}, + {"ab[a-fA-F]", "aba", 1}, + {"ab[a-fA-F]", "abF", 1}, + {"ab[a-fA-F]", "abZ", 0}, +}; + +const struct test full_tests[] = { + {"a", "argh", 0}, + {"a", "ba", 0}, + {"a", "a", 1}, + {"a*", "aardvark", 1}, + {"a*", "badger", 0}, + {"*a", "park", 0}, + {"*a", "pArka", 1}, + {"*a", "parka", 1}, + {"*a*", "park", 1}, + {"*a*", "perk", 0}, + {"?b*r?", "abracadabra", 1}, + {"?b*r?", "abracadabr", 0}, + {"?b*r?", "abracadabzr", 0}, +}; + +int main(void) +{ + int i; + int fails, passes; + + fails = passes = 0; + + for (i = 0; i < sizeof(fragment_tests)/sizeof(*fragment_tests); i++) { + const char *f, *t; + int eret, aret; + f = fragment_tests[i].wildcard; + t = fragment_tests[i].target; + eret = fragment_tests[i].expected_result; + aret = wc_match_fragment(&f, &t); + if (aret != eret) { + printf("failed test: /%s/ against /%s/ returned %d not %d\n", + fragment_tests[i].wildcard, fragment_tests[i].target, + aret, eret); + fails++; + } else + passes++; + } + + for (i = 0; i < sizeof(full_tests)/sizeof(*full_tests); i++) { + const char *f, *t; + int eret, aret; + f = full_tests[i].wildcard; + t = full_tests[i].target; + eret = full_tests[i].expected_result; + aret = wc_match(f, t); + if (aret != eret) { + printf("failed test: /%s/ against /%s/ returned %d not %d\n", + full_tests[i].wildcard, full_tests[i].target, + aret, eret); + fails++; + } else + passes++; + } + + printf("passed %d, failed %d\n", passes, fails); + + return 0; +} + +#endif |