source: trunk/minix/commands/ash/bltin/regexp.c@ 15

Last change on this file since 15 was 9, checked in by Mattia Monga, 14 years ago

Minix 3.1.2a

File size: 5.0 KB
RevLine 
[9]1/*
2 * Regular expression matching for expr(1). Bugs: The upper bound of
3 * a range specified by the \{ feature cannot be zero.
4 *
5 * Copyright (C) 1989 by Kenneth Almquist. All rights reserved.
6 * This file is part of ash, which is distributed under the terms specified
7 * by the Ash General Public License. See the file named LICENSE.
8 */
9
10#include "bltin.h"
11
12
13#define RE_END 0 /* end of regular expression */
14#define RE_LITERAL 1 /* normal character follows */
15#define RE_DOT 2 /* "." */
16#define RE_CCL 3 /* "[...]" */
17#define RE_NCCL 4 /* "[^...]" */
18#define RE_LP 5 /* "\(" */
19#define RE_RP 6 /* "\)" */
20#define RE_MATCHED 7 /* "\digit" */
21#define RE_EOS 8 /* "$" matches end of string */
22#define RE_STAR 9 /* "*" */
23#define RE_RANGE 10 /* "\{num,num\}" */
24
25
26
27char *match_begin[10];
28short match_length[10];
29short number_parens;
30static int match();
31
32
33
34char *
35re_compile(pattern)
36 char *pattern;
37 {
38 register char *p;
39 register char c;
40 char *comp;
41 register char *q;
42 char *begin;
43 char *endp;
44 register int len;
45 int first;
46 int type;
47 char *stackp;
48 char stack[10];
49 int paren_num;
50 int i;
51 char *malloc();
52
53 p = pattern;
54 if (*p == '^')
55 p++;
56 comp = q = malloc(2 * strlen(p) + 1);
57 begin = q;
58 stackp = stack;
59 paren_num = 0;
60 for (;;) {
61 switch (c = *p++) {
62 case '\0':
63 *q = '\0';
64 goto out;
65 case '.':
66 *q++ = RE_DOT;
67 len = 1;
68 break;
69 case '[':
70 begin = q;
71 *q = RE_CCL;
72 if (*p == '^') {
73 *q = RE_NCCL;
74 p++;
75 }
76 q++;
77 first = 1;
78 while (*p != ']' || first == 1) {
79 if (p[1] == '-' && p[2] != ']') {
80 *q++ = '-';
81 *q++ = p[0];
82 *q++ = p[2];
83 p += 3;
84 } else if (*p == '-') {
85 *q++ = '-';
86 *q++ = '-';
87 *q++ = '-';
88 p++;
89 } else {
90 *q++ = *p++;
91 }
92 first = 0;
93 }
94 p++;
95 *q++ = '\0';
96 len = q - begin;
97 break;
98 case '$':
99 if (*p != '\0')
100 goto dft;
101 *q++ = RE_EOS;
102 break;
103 case '*':
104 if (len == 0)
105 goto dft;
106 type = RE_STAR;
107range:
108 i = (type == RE_RANGE)? 3 : 1;
109 endp = q + i;
110 begin = q - len;
111 do {
112 --q;
113 *(q + i) = *q;
114 } while (--len > 0);
115 q = begin;
116 *q++ = type;
117 if (type == RE_RANGE) {
118 i = 0;
119 while ((unsigned)(*p - '0') <= 9)
120 i = 10 * i + (*p++ - '0');
121 *q++ = i;
122 if (*p != ',') {
123 *q++ = i;
124 } else {
125 p++;
126 i = 0;
127 while ((unsigned)(*p - '0') <= 9)
128 i = 10 * i + (*p++ - '0');
129 *q++ = i;
130 }
131 if (*p != '\\' || *++p != '}')
132 error("RE error");
133 p++;
134 }
135 q = endp;
136 break;
137 case '\\':
138 if ((c = *p++) == '(') {
139 if (++paren_num > 9)
140 error("RE error");
141 *q++ = RE_LP;
142 *q++ = paren_num;
143 *stackp++ = paren_num;
144 len = 0;
145 } else if (c == ')') {
146 if (stackp == stack)
147 error("RE error");
148 *q++ = RE_RP;
149 *q++ = *--stackp;
150 len = 0;
151 } else if (c == '{') {
152 type = RE_RANGE;
153 goto range;
154 } else if ((unsigned)(c - '1') < 9) {
155 /* should check validity here */
156 *q++ = RE_MATCHED;
157 *q++ = c - '0';
158 len = 2;
159 } else {
160 goto dft;
161 }
162 break;
163 default:
164dft: *q++ = RE_LITERAL;
165 *q++ = c;
166 len = 2;
167 break;
168 }
169 }
170out:
171 if (stackp != stack)
172 error("RE error");
173 number_parens = paren_num;
174 return comp;
175}
176
177
178
179re_match(pattern, string)
180 char *pattern;
181 char *string;
182 {
183 char **pp;
184
185 match_begin[0] = string;
186 for (pp = &match_begin[1] ; pp <= &match_begin[9] ; pp++)
187 *pp = 0;
188 return match(pattern, string);
189}
190
191
192
193static
194match(pattern, string)
195 char *pattern;
196 char *string;
197 {
198 register char *p, *q;
199 int counting;
200 int low, high, count;
201 char *curpat;
202 char *start_count;
203 int negate;
204 int found;
205 char *r;
206 int len;
207 char c;
208
209 p = pattern;
210 q = string;
211 counting = 0;
212 for (;;) {
213 if (counting) {
214 if (++count > high)
215 goto bad;
216 p = curpat;
217 }
218 switch (*p++) {
219 case RE_END:
220 match_length[0] = q - match_begin[0];
221 return 1;
222 case RE_LITERAL:
223 if (*q++ != *p++)
224 goto bad;
225 break;
226 case RE_DOT:
227 if (*q++ == '\0')
228 goto bad;
229 break;
230 case RE_CCL:
231 negate = 0;
232 goto ccl;
233 case RE_NCCL:
234 negate = 1;
235ccl:
236 found = 0;
237 c = *q++;
238 while (*p) {
239 if (*p == '-') {
240 if (c >= *++p && c <= *++p)
241 found = 1;
242 } else {
243 if (c == *p)
244 found = 1;
245 }
246 p++;
247 }
248 p++;
249 if (found == negate || c == 0)
250 goto bad;
251 break;
252 case RE_LP:
253 match_begin[*p++] = q;
254 break;
255 case RE_RP:
256 match_length[*p] = q - match_begin[*p];
257 p++;
258 break;
259 case RE_MATCHED:
260 r = match_begin[*p];
261 len = match_length[*p++];
262 while (--len >= 0) {
263 if (*q++ != *r++)
264 goto bad;
265 }
266 break;
267 case RE_EOS:
268 if (*q != '\0')
269 goto bad;
270 break;
271 case RE_STAR:
272 low = 0;
273 high = 32767;
274 goto range;
275 case RE_RANGE:
276 low = *p++;
277 high = *p++;
278 if (high == 0)
279 high = 32767;
280range:
281 curpat = p;
282 start_count = q;
283 count = 0;
284 counting++;
285 break;
286 }
287 }
288bad:
289 if (! counting)
290 return 0;
291 len = 1;
292 if (*curpat == RE_MATCHED)
293 len = match_length[curpat[1]];
294 while (--count >= low) {
295 if (match(p, start_count + count * len))
296 return 1;
297 }
298 return 0;
299}
Note: See TracBrowser for help on using the repository browser.