[9] | 1 | /*
|
---|
| 2 | * Regular expression matching for expr(1). Bugs: The upper bound of
|
---|
| 3 | * a range specified by the \{ feature cannot be zero.
|
---|
| 4 | *
|
---|
| 5 | * Copyright (C) 1989 by Kenneth Almquist. All rights reserved.
|
---|
| 6 | * This file is part of ash, which is distributed under the terms specified
|
---|
| 7 | * by the Ash General Public License. See the file named LICENSE.
|
---|
| 8 | */
|
---|
| 9 |
|
---|
| 10 | #include "bltin.h"
|
---|
| 11 |
|
---|
| 12 |
|
---|
| 13 | #define RE_END 0 /* end of regular expression */
|
---|
| 14 | #define RE_LITERAL 1 /* normal character follows */
|
---|
| 15 | #define RE_DOT 2 /* "." */
|
---|
| 16 | #define RE_CCL 3 /* "[...]" */
|
---|
| 17 | #define RE_NCCL 4 /* "[^...]" */
|
---|
| 18 | #define RE_LP 5 /* "\(" */
|
---|
| 19 | #define RE_RP 6 /* "\)" */
|
---|
| 20 | #define RE_MATCHED 7 /* "\digit" */
|
---|
| 21 | #define RE_EOS 8 /* "$" matches end of string */
|
---|
| 22 | #define RE_STAR 9 /* "*" */
|
---|
| 23 | #define RE_RANGE 10 /* "\{num,num\}" */
|
---|
| 24 |
|
---|
| 25 |
|
---|
| 26 |
|
---|
| 27 | char *match_begin[10];
|
---|
| 28 | short match_length[10];
|
---|
| 29 | short number_parens;
|
---|
| 30 | static int match();
|
---|
| 31 |
|
---|
| 32 |
|
---|
| 33 |
|
---|
| 34 | char *
|
---|
| 35 | re_compile(pattern)
|
---|
| 36 | char *pattern;
|
---|
| 37 | {
|
---|
| 38 | register char *p;
|
---|
| 39 | register char c;
|
---|
| 40 | char *comp;
|
---|
| 41 | register char *q;
|
---|
| 42 | char *begin;
|
---|
| 43 | char *endp;
|
---|
| 44 | register int len;
|
---|
| 45 | int first;
|
---|
| 46 | int type;
|
---|
| 47 | char *stackp;
|
---|
| 48 | char stack[10];
|
---|
| 49 | int paren_num;
|
---|
| 50 | int i;
|
---|
| 51 | char *malloc();
|
---|
| 52 |
|
---|
| 53 | p = pattern;
|
---|
| 54 | if (*p == '^')
|
---|
| 55 | p++;
|
---|
| 56 | comp = q = malloc(2 * strlen(p) + 1);
|
---|
| 57 | begin = q;
|
---|
| 58 | stackp = stack;
|
---|
| 59 | paren_num = 0;
|
---|
| 60 | for (;;) {
|
---|
| 61 | switch (c = *p++) {
|
---|
| 62 | case '\0':
|
---|
| 63 | *q = '\0';
|
---|
| 64 | goto out;
|
---|
| 65 | case '.':
|
---|
| 66 | *q++ = RE_DOT;
|
---|
| 67 | len = 1;
|
---|
| 68 | break;
|
---|
| 69 | case '[':
|
---|
| 70 | begin = q;
|
---|
| 71 | *q = RE_CCL;
|
---|
| 72 | if (*p == '^') {
|
---|
| 73 | *q = RE_NCCL;
|
---|
| 74 | p++;
|
---|
| 75 | }
|
---|
| 76 | q++;
|
---|
| 77 | first = 1;
|
---|
| 78 | while (*p != ']' || first == 1) {
|
---|
| 79 | if (p[1] == '-' && p[2] != ']') {
|
---|
| 80 | *q++ = '-';
|
---|
| 81 | *q++ = p[0];
|
---|
| 82 | *q++ = p[2];
|
---|
| 83 | p += 3;
|
---|
| 84 | } else if (*p == '-') {
|
---|
| 85 | *q++ = '-';
|
---|
| 86 | *q++ = '-';
|
---|
| 87 | *q++ = '-';
|
---|
| 88 | p++;
|
---|
| 89 | } else {
|
---|
| 90 | *q++ = *p++;
|
---|
| 91 | }
|
---|
| 92 | first = 0;
|
---|
| 93 | }
|
---|
| 94 | p++;
|
---|
| 95 | *q++ = '\0';
|
---|
| 96 | len = q - begin;
|
---|
| 97 | break;
|
---|
| 98 | case '$':
|
---|
| 99 | if (*p != '\0')
|
---|
| 100 | goto dft;
|
---|
| 101 | *q++ = RE_EOS;
|
---|
| 102 | break;
|
---|
| 103 | case '*':
|
---|
| 104 | if (len == 0)
|
---|
| 105 | goto dft;
|
---|
| 106 | type = RE_STAR;
|
---|
| 107 | range:
|
---|
| 108 | i = (type == RE_RANGE)? 3 : 1;
|
---|
| 109 | endp = q + i;
|
---|
| 110 | begin = q - len;
|
---|
| 111 | do {
|
---|
| 112 | --q;
|
---|
| 113 | *(q + i) = *q;
|
---|
| 114 | } while (--len > 0);
|
---|
| 115 | q = begin;
|
---|
| 116 | *q++ = type;
|
---|
| 117 | if (type == RE_RANGE) {
|
---|
| 118 | i = 0;
|
---|
| 119 | while ((unsigned)(*p - '0') <= 9)
|
---|
| 120 | i = 10 * i + (*p++ - '0');
|
---|
| 121 | *q++ = i;
|
---|
| 122 | if (*p != ',') {
|
---|
| 123 | *q++ = i;
|
---|
| 124 | } else {
|
---|
| 125 | p++;
|
---|
| 126 | i = 0;
|
---|
| 127 | while ((unsigned)(*p - '0') <= 9)
|
---|
| 128 | i = 10 * i + (*p++ - '0');
|
---|
| 129 | *q++ = i;
|
---|
| 130 | }
|
---|
| 131 | if (*p != '\\' || *++p != '}')
|
---|
| 132 | error("RE error");
|
---|
| 133 | p++;
|
---|
| 134 | }
|
---|
| 135 | q = endp;
|
---|
| 136 | break;
|
---|
| 137 | case '\\':
|
---|
| 138 | if ((c = *p++) == '(') {
|
---|
| 139 | if (++paren_num > 9)
|
---|
| 140 | error("RE error");
|
---|
| 141 | *q++ = RE_LP;
|
---|
| 142 | *q++ = paren_num;
|
---|
| 143 | *stackp++ = paren_num;
|
---|
| 144 | len = 0;
|
---|
| 145 | } else if (c == ')') {
|
---|
| 146 | if (stackp == stack)
|
---|
| 147 | error("RE error");
|
---|
| 148 | *q++ = RE_RP;
|
---|
| 149 | *q++ = *--stackp;
|
---|
| 150 | len = 0;
|
---|
| 151 | } else if (c == '{') {
|
---|
| 152 | type = RE_RANGE;
|
---|
| 153 | goto range;
|
---|
| 154 | } else if ((unsigned)(c - '1') < 9) {
|
---|
| 155 | /* should check validity here */
|
---|
| 156 | *q++ = RE_MATCHED;
|
---|
| 157 | *q++ = c - '0';
|
---|
| 158 | len = 2;
|
---|
| 159 | } else {
|
---|
| 160 | goto dft;
|
---|
| 161 | }
|
---|
| 162 | break;
|
---|
| 163 | default:
|
---|
| 164 | dft: *q++ = RE_LITERAL;
|
---|
| 165 | *q++ = c;
|
---|
| 166 | len = 2;
|
---|
| 167 | break;
|
---|
| 168 | }
|
---|
| 169 | }
|
---|
| 170 | out:
|
---|
| 171 | if (stackp != stack)
|
---|
| 172 | error("RE error");
|
---|
| 173 | number_parens = paren_num;
|
---|
| 174 | return comp;
|
---|
| 175 | }
|
---|
| 176 |
|
---|
| 177 |
|
---|
| 178 |
|
---|
| 179 | re_match(pattern, string)
|
---|
| 180 | char *pattern;
|
---|
| 181 | char *string;
|
---|
| 182 | {
|
---|
| 183 | char **pp;
|
---|
| 184 |
|
---|
| 185 | match_begin[0] = string;
|
---|
| 186 | for (pp = &match_begin[1] ; pp <= &match_begin[9] ; pp++)
|
---|
| 187 | *pp = 0;
|
---|
| 188 | return match(pattern, string);
|
---|
| 189 | }
|
---|
| 190 |
|
---|
| 191 |
|
---|
| 192 |
|
---|
| 193 | static
|
---|
| 194 | match(pattern, string)
|
---|
| 195 | char *pattern;
|
---|
| 196 | char *string;
|
---|
| 197 | {
|
---|
| 198 | register char *p, *q;
|
---|
| 199 | int counting;
|
---|
| 200 | int low, high, count;
|
---|
| 201 | char *curpat;
|
---|
| 202 | char *start_count;
|
---|
| 203 | int negate;
|
---|
| 204 | int found;
|
---|
| 205 | char *r;
|
---|
| 206 | int len;
|
---|
| 207 | char c;
|
---|
| 208 |
|
---|
| 209 | p = pattern;
|
---|
| 210 | q = string;
|
---|
| 211 | counting = 0;
|
---|
| 212 | for (;;) {
|
---|
| 213 | if (counting) {
|
---|
| 214 | if (++count > high)
|
---|
| 215 | goto bad;
|
---|
| 216 | p = curpat;
|
---|
| 217 | }
|
---|
| 218 | switch (*p++) {
|
---|
| 219 | case RE_END:
|
---|
| 220 | match_length[0] = q - match_begin[0];
|
---|
| 221 | return 1;
|
---|
| 222 | case RE_LITERAL:
|
---|
| 223 | if (*q++ != *p++)
|
---|
| 224 | goto bad;
|
---|
| 225 | break;
|
---|
| 226 | case RE_DOT:
|
---|
| 227 | if (*q++ == '\0')
|
---|
| 228 | goto bad;
|
---|
| 229 | break;
|
---|
| 230 | case RE_CCL:
|
---|
| 231 | negate = 0;
|
---|
| 232 | goto ccl;
|
---|
| 233 | case RE_NCCL:
|
---|
| 234 | negate = 1;
|
---|
| 235 | ccl:
|
---|
| 236 | found = 0;
|
---|
| 237 | c = *q++;
|
---|
| 238 | while (*p) {
|
---|
| 239 | if (*p == '-') {
|
---|
| 240 | if (c >= *++p && c <= *++p)
|
---|
| 241 | found = 1;
|
---|
| 242 | } else {
|
---|
| 243 | if (c == *p)
|
---|
| 244 | found = 1;
|
---|
| 245 | }
|
---|
| 246 | p++;
|
---|
| 247 | }
|
---|
| 248 | p++;
|
---|
| 249 | if (found == negate || c == 0)
|
---|
| 250 | goto bad;
|
---|
| 251 | break;
|
---|
| 252 | case RE_LP:
|
---|
| 253 | match_begin[*p++] = q;
|
---|
| 254 | break;
|
---|
| 255 | case RE_RP:
|
---|
| 256 | match_length[*p] = q - match_begin[*p];
|
---|
| 257 | p++;
|
---|
| 258 | break;
|
---|
| 259 | case RE_MATCHED:
|
---|
| 260 | r = match_begin[*p];
|
---|
| 261 | len = match_length[*p++];
|
---|
| 262 | while (--len >= 0) {
|
---|
| 263 | if (*q++ != *r++)
|
---|
| 264 | goto bad;
|
---|
| 265 | }
|
---|
| 266 | break;
|
---|
| 267 | case RE_EOS:
|
---|
| 268 | if (*q != '\0')
|
---|
| 269 | goto bad;
|
---|
| 270 | break;
|
---|
| 271 | case RE_STAR:
|
---|
| 272 | low = 0;
|
---|
| 273 | high = 32767;
|
---|
| 274 | goto range;
|
---|
| 275 | case RE_RANGE:
|
---|
| 276 | low = *p++;
|
---|
| 277 | high = *p++;
|
---|
| 278 | if (high == 0)
|
---|
| 279 | high = 32767;
|
---|
| 280 | range:
|
---|
| 281 | curpat = p;
|
---|
| 282 | start_count = q;
|
---|
| 283 | count = 0;
|
---|
| 284 | counting++;
|
---|
| 285 | break;
|
---|
| 286 | }
|
---|
| 287 | }
|
---|
| 288 | bad:
|
---|
| 289 | if (! counting)
|
---|
| 290 | return 0;
|
---|
| 291 | len = 1;
|
---|
| 292 | if (*curpat == RE_MATCHED)
|
---|
| 293 | len = match_length[curpat[1]];
|
---|
| 294 | while (--count >= low) {
|
---|
| 295 | if (match(p, start_count + count * len))
|
---|
| 296 | return 1;
|
---|
| 297 | }
|
---|
| 298 | return 0;
|
---|
| 299 | }
|
---|