/* * Regular expression matching for expr(1). Bugs: The upper bound of * a range specified by the \{ feature cannot be zero. * * Copyright (C) 1989 by Kenneth Almquist. All rights reserved. * This file is part of ash, which is distributed under the terms specified * by the Ash General Public License. See the file named LICENSE. */ #include "bltin.h" #define RE_END 0 /* end of regular expression */ #define RE_LITERAL 1 /* normal character follows */ #define RE_DOT 2 /* "." */ #define RE_CCL 3 /* "[...]" */ #define RE_NCCL 4 /* "[^...]" */ #define RE_LP 5 /* "\(" */ #define RE_RP 6 /* "\)" */ #define RE_MATCHED 7 /* "\digit" */ #define RE_EOS 8 /* "$" matches end of string */ #define RE_STAR 9 /* "*" */ #define RE_RANGE 10 /* "\{num,num\}" */ char *match_begin[10]; short match_length[10]; short number_parens; static int match(); char * re_compile(pattern) char *pattern; { register char *p; register char c; char *comp; register char *q; char *begin; char *endp; register int len; int first; int type; char *stackp; char stack[10]; int paren_num; int i; char *malloc(); p = pattern; if (*p == '^') p++; comp = q = malloc(2 * strlen(p) + 1); begin = q; stackp = stack; paren_num = 0; for (;;) { switch (c = *p++) { case '\0': *q = '\0'; goto out; case '.': *q++ = RE_DOT; len = 1; break; case '[': begin = q; *q = RE_CCL; if (*p == '^') { *q = RE_NCCL; p++; } q++; first = 1; while (*p != ']' || first == 1) { if (p[1] == '-' && p[2] != ']') { *q++ = '-'; *q++ = p[0]; *q++ = p[2]; p += 3; } else if (*p == '-') { *q++ = '-'; *q++ = '-'; *q++ = '-'; p++; } else { *q++ = *p++; } first = 0; } p++; *q++ = '\0'; len = q - begin; break; case '$': if (*p != '\0') goto dft; *q++ = RE_EOS; break; case '*': if (len == 0) goto dft; type = RE_STAR; range: i = (type == RE_RANGE)? 3 : 1; endp = q + i; begin = q - len; do { --q; *(q + i) = *q; } while (--len > 0); q = begin; *q++ = type; if (type == RE_RANGE) { i = 0; while ((unsigned)(*p - '0') <= 9) i = 10 * i + (*p++ - '0'); *q++ = i; if (*p != ',') { *q++ = i; } else { p++; i = 0; while ((unsigned)(*p - '0') <= 9) i = 10 * i + (*p++ - '0'); *q++ = i; } if (*p != '\\' || *++p != '}') error("RE error"); p++; } q = endp; break; case '\\': if ((c = *p++) == '(') { if (++paren_num > 9) error("RE error"); *q++ = RE_LP; *q++ = paren_num; *stackp++ = paren_num; len = 0; } else if (c == ')') { if (stackp == stack) error("RE error"); *q++ = RE_RP; *q++ = *--stackp; len = 0; } else if (c == '{') { type = RE_RANGE; goto range; } else if ((unsigned)(c - '1') < 9) { /* should check validity here */ *q++ = RE_MATCHED; *q++ = c - '0'; len = 2; } else { goto dft; } break; default: dft: *q++ = RE_LITERAL; *q++ = c; len = 2; break; } } out: if (stackp != stack) error("RE error"); number_parens = paren_num; return comp; } re_match(pattern, string) char *pattern; char *string; { char **pp; match_begin[0] = string; for (pp = &match_begin[1] ; pp <= &match_begin[9] ; pp++) *pp = 0; return match(pattern, string); } static match(pattern, string) char *pattern; char *string; { register char *p, *q; int counting; int low, high, count; char *curpat; char *start_count; int negate; int found; char *r; int len; char c; p = pattern; q = string; counting = 0; for (;;) { if (counting) { if (++count > high) goto bad; p = curpat; } switch (*p++) { case RE_END: match_length[0] = q - match_begin[0]; return 1; case RE_LITERAL: if (*q++ != *p++) goto bad; break; case RE_DOT: if (*q++ == '\0') goto bad; break; case RE_CCL: negate = 0; goto ccl; case RE_NCCL: negate = 1; ccl: found = 0; c = *q++; while (*p) { if (*p == '-') { if (c >= *++p && c <= *++p) found = 1; } else { if (c == *p) found = 1; } p++; } p++; if (found == negate || c == 0) goto bad; break; case RE_LP: match_begin[*p++] = q; break; case RE_RP: match_length[*p] = q - match_begin[*p]; p++; break; case RE_MATCHED: r = match_begin[*p]; len = match_length[*p++]; while (--len >= 0) { if (*q++ != *r++) goto bad; } break; case RE_EOS: if (*q != '\0') goto bad; break; case RE_STAR: low = 0; high = 32767; goto range; case RE_RANGE: low = *p++; high = *p++; if (high == 0) high = 32767; range: curpat = p; start_count = q; count = 0; counting++; break; } } bad: if (! counting) return 0; len = 1; if (*curpat == RE_MATCHED) len = match_length[curpat[1]]; while (--count >= low) { if (match(p, start_count + count * len)) return 1; } return 0; }