1 | /*
|
---|
2 | * a small awk clone
|
---|
3 | *
|
---|
4 | * (C) 1989 Saeko Hirabauashi & Kouichi Hirabayashi
|
---|
5 | *
|
---|
6 | * Absolutely no warranty. Use this software with your own risk.
|
---|
7 | *
|
---|
8 | * Permission to use, copy, modify and distribute this software for any
|
---|
9 | * purpose and without fee is hereby granted, provided that the above
|
---|
10 | * copyright and disclaimer notice.
|
---|
11 | *
|
---|
12 | * This program was written to fit into 64K+64K memory of the Minix 1.2.
|
---|
13 | */
|
---|
14 |
|
---|
15 |
|
---|
16 | #include <stdio.h>
|
---|
17 | #include <ctype.h>
|
---|
18 | #include "awk.h"
|
---|
19 |
|
---|
20 | extern char *srcprg; /* inline program */
|
---|
21 | extern FILE *pfp; /* program file */
|
---|
22 |
|
---|
23 | int sym; /* lexical token */
|
---|
24 | int sym1; /* auxiliary lexical token */
|
---|
25 | int regexflg; /* set by parser (y.c) to indicate parsing REGEXPR */
|
---|
26 | int funflg; /* set by parser (y.c) to indicate parsing FUNCTION */
|
---|
27 | int printflg; /* set by parser (y.c) to indicate parsing PRINT */
|
---|
28 | int getlineflg; /* set by parser (y.c) to indicate parsing GETLINE */
|
---|
29 | char text[BUFSIZ]; /* lexical word */
|
---|
30 | char line[BUFSIZ]; /* program line for error message (ring buffer) */
|
---|
31 | char *linep = line; /* line pointer */
|
---|
32 | char funnam[128]; /* function name for error message */
|
---|
33 | int lineno = 1;
|
---|
34 |
|
---|
35 | lex()
|
---|
36 | {
|
---|
37 | int c, d;
|
---|
38 | char *s;
|
---|
39 |
|
---|
40 | if (regexflg)
|
---|
41 | return sym = scanreg();
|
---|
42 | next:
|
---|
43 | while ((c = Getc()) == ' ' || c == '\t')
|
---|
44 | ;
|
---|
45 | while (c == '#')
|
---|
46 | for (c = Getc(); c != '\n'; c = Getc())
|
---|
47 | ;
|
---|
48 | switch (c) {
|
---|
49 | case '\\':
|
---|
50 | if ((c = Getc()) == '\n') {
|
---|
51 | lineno++;
|
---|
52 | goto next;
|
---|
53 | }
|
---|
54 | break;
|
---|
55 | case '\n':
|
---|
56 | lineno++;
|
---|
57 | break;
|
---|
58 | }
|
---|
59 | switch (c) {
|
---|
60 | case EOF: return sym = 0;
|
---|
61 | case '+': return sym = follow2('=', '+', ADDEQ, INC, ADD);
|
---|
62 | case '-': return sym = follow2('=', '-', SUBEQ, DEC, SUB);
|
---|
63 | case '*': return sym = follow('=', MULTEQ, MULT);
|
---|
64 | case '/': return sym = follow('=', DIVEQ, DIV);
|
---|
65 | case '%': return sym = follow('=', MODEQ, MOD);
|
---|
66 | case '^': return sym = follow('=', POWEQ, POWER);
|
---|
67 | case '=': return sym = follow('=', EQ, ASSIGN);
|
---|
68 | case '!': return sym = follow2('=', '~', NE, NOMATCH, NOT);
|
---|
69 | case '&': return sym = follow('&', AND, BINAND);
|
---|
70 | case '|': sym = follow('|', OR, BINOR);
|
---|
71 | if (printflg && sym == BINOR)
|
---|
72 | sym = R_POUT;
|
---|
73 | return sym;
|
---|
74 | case '<': sym = follow2('=', '<', LE, SHIFTL, LT);
|
---|
75 | if (getlineflg && sym == LT)
|
---|
76 | sym = R_IN;
|
---|
77 | return sym;
|
---|
78 | case '>': sym = follow2('=', '>', GE, SHIFTR, GT);
|
---|
79 | if (printflg) {
|
---|
80 | switch (sym) {
|
---|
81 | case GT: sym = R_OUT; break;
|
---|
82 | case SHIFTR: sym = R_APD; break;
|
---|
83 | }
|
---|
84 | }
|
---|
85 | return sym;
|
---|
86 | case '~': return sym = MATCH; break;
|
---|
87 | case ';': case '\n': return sym = EOL;
|
---|
88 | }
|
---|
89 | if (isalpha(c) || c == '_') {
|
---|
90 | for (s = text; isalnum(c) || c == '_'; ) {
|
---|
91 | *s++ = c; c = Getc();
|
---|
92 | }
|
---|
93 | Ungetc(c);
|
---|
94 | *s = '\0';
|
---|
95 | if ((d = iskeywd(text)) == 0 &&
|
---|
96 | (d = isbuiltin(text, &sym1)) == 0) {
|
---|
97 | if (c == '(')
|
---|
98 | return sym = CALL;
|
---|
99 | else if (funflg) {
|
---|
100 | if ((sym1 = isarg(text)) != -1)
|
---|
101 | return sym = ARG;
|
---|
102 | }
|
---|
103 | }
|
---|
104 | return sym = d ? d : IDENT;
|
---|
105 | }
|
---|
106 | else if (c == '.' || (isdigit(c))) {
|
---|
107 | Ungetc(c);
|
---|
108 | return sym = scannum(text); /* NUMBER */
|
---|
109 | }
|
---|
110 | else if (c == '"')
|
---|
111 | return sym = scanstr(text); /* STRING */
|
---|
112 | return sym = c;
|
---|
113 | }
|
---|
114 |
|
---|
115 | static
|
---|
116 | follow(c1, r1, r2)
|
---|
117 | {
|
---|
118 | register int c;
|
---|
119 |
|
---|
120 | if ((c = Getc()) == c1)
|
---|
121 | return r1;
|
---|
122 | else {
|
---|
123 | Ungetc(c);
|
---|
124 | return r2;
|
---|
125 | }
|
---|
126 | }
|
---|
127 |
|
---|
128 | static
|
---|
129 | follow2(c1, c2, r1, r2, r3)
|
---|
130 | {
|
---|
131 | register int c;
|
---|
132 |
|
---|
133 | if ((c = Getc()) == c1)
|
---|
134 | return r1;
|
---|
135 | else if (c == c2)
|
---|
136 | return r2;
|
---|
137 | else {
|
---|
138 | Ungetc(c);
|
---|
139 | return r3;
|
---|
140 | }
|
---|
141 | }
|
---|
142 |
|
---|
143 | static
|
---|
144 | iskeywd(s) char *s;
|
---|
145 | {
|
---|
146 | static struct { char *kw; int token; } tab[] = {
|
---|
147 | "BEGIN", BEGIN,
|
---|
148 | "END", END,
|
---|
149 | "break", BREAK,
|
---|
150 | "continue", CONTIN,
|
---|
151 | "delete", DELETE,
|
---|
152 | "do", DO,
|
---|
153 | "else", ELSE,
|
---|
154 | "exit", EXIT,
|
---|
155 | "for", FOR,
|
---|
156 | "func", FUNC,
|
---|
157 | "function", FUNC,
|
---|
158 | "getline", GETLINE,
|
---|
159 | "if", IF,
|
---|
160 | "in", IN,
|
---|
161 | "next", NEXT,
|
---|
162 | "print", PRINT,
|
---|
163 | "printf", PRINTF,
|
---|
164 | "return", RETURN,
|
---|
165 | "sprint", SPRINT,
|
---|
166 | "sprintf", SPRINTF,
|
---|
167 | "while", WHILE,
|
---|
168 | "", 0, 0
|
---|
169 | };
|
---|
170 | register int i;
|
---|
171 |
|
---|
172 | for (i = 0; tab[i].token; i++)
|
---|
173 | if (strcmp(tab[i].kw, s) == 0)
|
---|
174 | break;
|
---|
175 | return tab[i].token;
|
---|
176 | }
|
---|
177 |
|
---|
178 | static
|
---|
179 | isbuiltin(s, p) char *s; int *p;
|
---|
180 | {
|
---|
181 | static struct { char *kw; int type; int token; } tab[] = {
|
---|
182 | "atan2", MATHFUN, ATAN2,
|
---|
183 | "close", STRFUN, CLOSE,
|
---|
184 | "cos", MATHFUN, COS,
|
---|
185 | "exp", MATHFUN, EXP,
|
---|
186 | "gsub", SUBST, RGSUB,
|
---|
187 | "index", STRFUN, INDEX,
|
---|
188 | "int", MATHFUN, INT,
|
---|
189 | "length", STRFUN, LENGTH,
|
---|
190 | "log", MATHFUN, LOG,
|
---|
191 | "match", STRFUN, RMATCH,
|
---|
192 | "sin", MATHFUN, SIN,
|
---|
193 | "sqrt", MATHFUN, SQRT,
|
---|
194 | "rand", MATHFUN, RAND,
|
---|
195 | "srand", MATHFUN, SRAND,
|
---|
196 | "split", STRFUN, SPLIT,
|
---|
197 | "sub", SUBST, RSUB,
|
---|
198 | "substr", STRFUN, SUBSTR,
|
---|
199 | "system", STRFUN, SYSTEM,
|
---|
200 | "", 0, 0
|
---|
201 | };
|
---|
202 | register int i;
|
---|
203 |
|
---|
204 | for (i = 0; tab[i].token; i++)
|
---|
205 | if (strcmp(tab[i].kw, s) == 0)
|
---|
206 | break;
|
---|
207 | *p = tab[i].token;
|
---|
208 | return tab[i].type;
|
---|
209 | }
|
---|
210 |
|
---|
211 | static
|
---|
212 | scannum(s) char *s;
|
---|
213 | {
|
---|
214 | register int c;
|
---|
215 | char *strchr();
|
---|
216 |
|
---|
217 | if ((c = Getc()) && strchr("+-", c) != NULL) {
|
---|
218 | *s++ = c; c = Getc();
|
---|
219 | }
|
---|
220 | while (isdigit(c)) {
|
---|
221 | *s++ = c; c = Getc();
|
---|
222 | }
|
---|
223 | if (c == '.') {
|
---|
224 | *s++ = c; c = Getc();
|
---|
225 | while (isdigit(c)) {
|
---|
226 | *s++ = c; c = Getc();
|
---|
227 | }
|
---|
228 | }
|
---|
229 | if (c && strchr("eE", c) != NULL) {
|
---|
230 | *s++ = c; c = Getc();
|
---|
231 | if (c && strchr("+-", c) != NULL) {
|
---|
232 | *s++ = c; c = Getc();
|
---|
233 | }
|
---|
234 | while (isdigit(c)) {
|
---|
235 | *s++ = c; c = Getc();
|
---|
236 | }
|
---|
237 | }
|
---|
238 | *s = '\0';
|
---|
239 | Ungetc(c);
|
---|
240 | return NUMBER;
|
---|
241 | }
|
---|
242 |
|
---|
243 | static
|
---|
244 | scanstr(s) char *s;
|
---|
245 | {
|
---|
246 | register int c, i, j;
|
---|
247 |
|
---|
248 | for (c = Getc(); c != EOF & c != '"'; ) {
|
---|
249 | if (c == '\\') {
|
---|
250 | switch (c = Getc()) {
|
---|
251 | case 'b': c = '\b'; break;
|
---|
252 | case 'f': c = '\f'; break;
|
---|
253 | case 'n': c = '\n'; break;
|
---|
254 | case 'r': c = '\r'; break;
|
---|
255 | case 't': c = '\t'; break;
|
---|
256 | default:
|
---|
257 | if (isdigit(c)) {
|
---|
258 | for (i = j = 0; i < 3 && isdigit(c); c = Getc(), i++)
|
---|
259 | j = j * 8 + c - '0';
|
---|
260 | Ungetc(c);
|
---|
261 | c = j;
|
---|
262 | }
|
---|
263 | break;
|
---|
264 | }
|
---|
265 | }
|
---|
266 | *s++ = c;
|
---|
267 | if (isKanji(c))
|
---|
268 | *s++ = Getc();
|
---|
269 | c = Getc();
|
---|
270 | }
|
---|
271 | *s = '\0';
|
---|
272 | return STRING;
|
---|
273 | }
|
---|
274 |
|
---|
275 | static
|
---|
276 | scanreg()
|
---|
277 | {
|
---|
278 | register int c;
|
---|
279 | register char *s;
|
---|
280 |
|
---|
281 | for (s = text; (c = Getc()) != '/'; )
|
---|
282 | if (c == '\n')
|
---|
283 | error("newline in regular expression");
|
---|
284 | else {
|
---|
285 | if (isKanji(c) || c == '\\') {
|
---|
286 | *s++ = c; c = Getc();
|
---|
287 | }
|
---|
288 | *s++ = c;
|
---|
289 | }
|
---|
290 | *s = '\0';
|
---|
291 | return REGEXP;
|
---|
292 | }
|
---|
293 |
|
---|
294 | static int c0;
|
---|
295 |
|
---|
296 | Ungetc(c)
|
---|
297 | {
|
---|
298 | c0 = c;
|
---|
299 |
|
---|
300 | if (linep > line) {
|
---|
301 | if (--linep < line)
|
---|
302 | linep == line + BUFSIZ - 1;
|
---|
303 | }
|
---|
304 | }
|
---|
305 |
|
---|
306 | Getc()
|
---|
307 | {
|
---|
308 | register int c;
|
---|
309 | char *s, *t;
|
---|
310 |
|
---|
311 | if (c0) {
|
---|
312 | c = c0; c0 = 0;
|
---|
313 | }
|
---|
314 | else if (srcprg)
|
---|
315 | c = *srcprg ? *srcprg++ : EOF;
|
---|
316 | else
|
---|
317 | c = fgetc(pfp);
|
---|
318 |
|
---|
319 | #if 0
|
---|
320 | if (linep - line == BUFSIZ) {
|
---|
321 | printf("!!!\n");
|
---|
322 | for (s = line; *s != '\n' && ((s - line) <BUFSIZ); s++)
|
---|
323 | ;
|
---|
324 | printf("***(%d)***\n", *s);
|
---|
325 | for (t = line; s < linep; )
|
---|
326 | *t++ = *++s;
|
---|
327 | }
|
---|
328 | #endif
|
---|
329 | *linep++ = c;
|
---|
330 | if ((linep - line) == BUFSIZ)
|
---|
331 | linep = line;
|
---|
332 | return c;
|
---|
333 | }
|
---|