[9] | 1 | /*
|
---|
| 2 | * a small awk clone
|
---|
| 3 | *
|
---|
| 4 | * (C) 1989 Saeko Hirabauashi & Kouichi Hirabayashi
|
---|
| 5 | *
|
---|
| 6 | * Absolutely no warranty. Use this software with your own risk.
|
---|
| 7 | *
|
---|
| 8 | * Permission to use, copy, modify and distribute this software for any
|
---|
| 9 | * purpose and without fee is hereby granted, provided that the above
|
---|
| 10 | * copyright and disclaimer notice.
|
---|
| 11 | *
|
---|
| 12 | * This program was written to fit into 64K+64K memory of the Minix 1.2.
|
---|
| 13 | */
|
---|
| 14 |
|
---|
| 15 |
|
---|
| 16 | #include <stdio.h>
|
---|
| 17 | #include <ctype.h>
|
---|
| 18 | #include "awk.h"
|
---|
| 19 |
|
---|
| 20 | extern char *srcprg; /* inline program */
|
---|
| 21 | extern FILE *pfp; /* program file */
|
---|
| 22 |
|
---|
| 23 | int sym; /* lexical token */
|
---|
| 24 | int sym1; /* auxiliary lexical token */
|
---|
| 25 | int regexflg; /* set by parser (y.c) to indicate parsing REGEXPR */
|
---|
| 26 | int funflg; /* set by parser (y.c) to indicate parsing FUNCTION */
|
---|
| 27 | int printflg; /* set by parser (y.c) to indicate parsing PRINT */
|
---|
| 28 | int getlineflg; /* set by parser (y.c) to indicate parsing GETLINE */
|
---|
| 29 | char text[BUFSIZ]; /* lexical word */
|
---|
| 30 | char line[BUFSIZ]; /* program line for error message (ring buffer) */
|
---|
| 31 | char *linep = line; /* line pointer */
|
---|
| 32 | char funnam[128]; /* function name for error message */
|
---|
| 33 | int lineno = 1;
|
---|
| 34 |
|
---|
| 35 | lex()
|
---|
| 36 | {
|
---|
| 37 | int c, d;
|
---|
| 38 | char *s;
|
---|
| 39 |
|
---|
| 40 | if (regexflg)
|
---|
| 41 | return sym = scanreg();
|
---|
| 42 | next:
|
---|
| 43 | while ((c = Getc()) == ' ' || c == '\t')
|
---|
| 44 | ;
|
---|
| 45 | while (c == '#')
|
---|
| 46 | for (c = Getc(); c != '\n'; c = Getc())
|
---|
| 47 | ;
|
---|
| 48 | switch (c) {
|
---|
| 49 | case '\\':
|
---|
| 50 | if ((c = Getc()) == '\n') {
|
---|
| 51 | lineno++;
|
---|
| 52 | goto next;
|
---|
| 53 | }
|
---|
| 54 | break;
|
---|
| 55 | case '\n':
|
---|
| 56 | lineno++;
|
---|
| 57 | break;
|
---|
| 58 | }
|
---|
| 59 | switch (c) {
|
---|
| 60 | case EOF: return sym = 0;
|
---|
| 61 | case '+': return sym = follow2('=', '+', ADDEQ, INC, ADD);
|
---|
| 62 | case '-': return sym = follow2('=', '-', SUBEQ, DEC, SUB);
|
---|
| 63 | case '*': return sym = follow('=', MULTEQ, MULT);
|
---|
| 64 | case '/': return sym = follow('=', DIVEQ, DIV);
|
---|
| 65 | case '%': return sym = follow('=', MODEQ, MOD);
|
---|
| 66 | case '^': return sym = follow('=', POWEQ, POWER);
|
---|
| 67 | case '=': return sym = follow('=', EQ, ASSIGN);
|
---|
| 68 | case '!': return sym = follow2('=', '~', NE, NOMATCH, NOT);
|
---|
| 69 | case '&': return sym = follow('&', AND, BINAND);
|
---|
| 70 | case '|': sym = follow('|', OR, BINOR);
|
---|
| 71 | if (printflg && sym == BINOR)
|
---|
| 72 | sym = R_POUT;
|
---|
| 73 | return sym;
|
---|
| 74 | case '<': sym = follow2('=', '<', LE, SHIFTL, LT);
|
---|
| 75 | if (getlineflg && sym == LT)
|
---|
| 76 | sym = R_IN;
|
---|
| 77 | return sym;
|
---|
| 78 | case '>': sym = follow2('=', '>', GE, SHIFTR, GT);
|
---|
| 79 | if (printflg) {
|
---|
| 80 | switch (sym) {
|
---|
| 81 | case GT: sym = R_OUT; break;
|
---|
| 82 | case SHIFTR: sym = R_APD; break;
|
---|
| 83 | }
|
---|
| 84 | }
|
---|
| 85 | return sym;
|
---|
| 86 | case '~': return sym = MATCH; break;
|
---|
| 87 | case ';': case '\n': return sym = EOL;
|
---|
| 88 | }
|
---|
| 89 | if (isalpha(c) || c == '_') {
|
---|
| 90 | for (s = text; isalnum(c) || c == '_'; ) {
|
---|
| 91 | *s++ = c; c = Getc();
|
---|
| 92 | }
|
---|
| 93 | Ungetc(c);
|
---|
| 94 | *s = '\0';
|
---|
| 95 | if ((d = iskeywd(text)) == 0 &&
|
---|
| 96 | (d = isbuiltin(text, &sym1)) == 0) {
|
---|
| 97 | if (c == '(')
|
---|
| 98 | return sym = CALL;
|
---|
| 99 | else if (funflg) {
|
---|
| 100 | if ((sym1 = isarg(text)) != -1)
|
---|
| 101 | return sym = ARG;
|
---|
| 102 | }
|
---|
| 103 | }
|
---|
| 104 | return sym = d ? d : IDENT;
|
---|
| 105 | }
|
---|
| 106 | else if (c == '.' || (isdigit(c))) {
|
---|
| 107 | Ungetc(c);
|
---|
| 108 | return sym = scannum(text); /* NUMBER */
|
---|
| 109 | }
|
---|
| 110 | else if (c == '"')
|
---|
| 111 | return sym = scanstr(text); /* STRING */
|
---|
| 112 | return sym = c;
|
---|
| 113 | }
|
---|
| 114 |
|
---|
| 115 | static
|
---|
| 116 | follow(c1, r1, r2)
|
---|
| 117 | {
|
---|
| 118 | register int c;
|
---|
| 119 |
|
---|
| 120 | if ((c = Getc()) == c1)
|
---|
| 121 | return r1;
|
---|
| 122 | else {
|
---|
| 123 | Ungetc(c);
|
---|
| 124 | return r2;
|
---|
| 125 | }
|
---|
| 126 | }
|
---|
| 127 |
|
---|
| 128 | static
|
---|
| 129 | follow2(c1, c2, r1, r2, r3)
|
---|
| 130 | {
|
---|
| 131 | register int c;
|
---|
| 132 |
|
---|
| 133 | if ((c = Getc()) == c1)
|
---|
| 134 | return r1;
|
---|
| 135 | else if (c == c2)
|
---|
| 136 | return r2;
|
---|
| 137 | else {
|
---|
| 138 | Ungetc(c);
|
---|
| 139 | return r3;
|
---|
| 140 | }
|
---|
| 141 | }
|
---|
| 142 |
|
---|
| 143 | static
|
---|
| 144 | iskeywd(s) char *s;
|
---|
| 145 | {
|
---|
| 146 | static struct { char *kw; int token; } tab[] = {
|
---|
| 147 | "BEGIN", BEGIN,
|
---|
| 148 | "END", END,
|
---|
| 149 | "break", BREAK,
|
---|
| 150 | "continue", CONTIN,
|
---|
| 151 | "delete", DELETE,
|
---|
| 152 | "do", DO,
|
---|
| 153 | "else", ELSE,
|
---|
| 154 | "exit", EXIT,
|
---|
| 155 | "for", FOR,
|
---|
| 156 | "func", FUNC,
|
---|
| 157 | "function", FUNC,
|
---|
| 158 | "getline", GETLINE,
|
---|
| 159 | "if", IF,
|
---|
| 160 | "in", IN,
|
---|
| 161 | "next", NEXT,
|
---|
| 162 | "print", PRINT,
|
---|
| 163 | "printf", PRINTF,
|
---|
| 164 | "return", RETURN,
|
---|
| 165 | "sprint", SPRINT,
|
---|
| 166 | "sprintf", SPRINTF,
|
---|
| 167 | "while", WHILE,
|
---|
| 168 | "", 0, 0
|
---|
| 169 | };
|
---|
| 170 | register int i;
|
---|
| 171 |
|
---|
| 172 | for (i = 0; tab[i].token; i++)
|
---|
| 173 | if (strcmp(tab[i].kw, s) == 0)
|
---|
| 174 | break;
|
---|
| 175 | return tab[i].token;
|
---|
| 176 | }
|
---|
| 177 |
|
---|
| 178 | static
|
---|
| 179 | isbuiltin(s, p) char *s; int *p;
|
---|
| 180 | {
|
---|
| 181 | static struct { char *kw; int type; int token; } tab[] = {
|
---|
| 182 | "atan2", MATHFUN, ATAN2,
|
---|
| 183 | "close", STRFUN, CLOSE,
|
---|
| 184 | "cos", MATHFUN, COS,
|
---|
| 185 | "exp", MATHFUN, EXP,
|
---|
| 186 | "gsub", SUBST, RGSUB,
|
---|
| 187 | "index", STRFUN, INDEX,
|
---|
| 188 | "int", MATHFUN, INT,
|
---|
| 189 | "length", STRFUN, LENGTH,
|
---|
| 190 | "log", MATHFUN, LOG,
|
---|
| 191 | "match", STRFUN, RMATCH,
|
---|
| 192 | "sin", MATHFUN, SIN,
|
---|
| 193 | "sqrt", MATHFUN, SQRT,
|
---|
| 194 | "rand", MATHFUN, RAND,
|
---|
| 195 | "srand", MATHFUN, SRAND,
|
---|
| 196 | "split", STRFUN, SPLIT,
|
---|
| 197 | "sub", SUBST, RSUB,
|
---|
| 198 | "substr", STRFUN, SUBSTR,
|
---|
| 199 | "system", STRFUN, SYSTEM,
|
---|
| 200 | "", 0, 0
|
---|
| 201 | };
|
---|
| 202 | register int i;
|
---|
| 203 |
|
---|
| 204 | for (i = 0; tab[i].token; i++)
|
---|
| 205 | if (strcmp(tab[i].kw, s) == 0)
|
---|
| 206 | break;
|
---|
| 207 | *p = tab[i].token;
|
---|
| 208 | return tab[i].type;
|
---|
| 209 | }
|
---|
| 210 |
|
---|
| 211 | static
|
---|
| 212 | scannum(s) char *s;
|
---|
| 213 | {
|
---|
| 214 | register int c;
|
---|
| 215 | char *strchr();
|
---|
| 216 |
|
---|
| 217 | if ((c = Getc()) && strchr("+-", c) != NULL) {
|
---|
| 218 | *s++ = c; c = Getc();
|
---|
| 219 | }
|
---|
| 220 | while (isdigit(c)) {
|
---|
| 221 | *s++ = c; c = Getc();
|
---|
| 222 | }
|
---|
| 223 | if (c == '.') {
|
---|
| 224 | *s++ = c; c = Getc();
|
---|
| 225 | while (isdigit(c)) {
|
---|
| 226 | *s++ = c; c = Getc();
|
---|
| 227 | }
|
---|
| 228 | }
|
---|
| 229 | if (c && strchr("eE", c) != NULL) {
|
---|
| 230 | *s++ = c; c = Getc();
|
---|
| 231 | if (c && strchr("+-", c) != NULL) {
|
---|
| 232 | *s++ = c; c = Getc();
|
---|
| 233 | }
|
---|
| 234 | while (isdigit(c)) {
|
---|
| 235 | *s++ = c; c = Getc();
|
---|
| 236 | }
|
---|
| 237 | }
|
---|
| 238 | *s = '\0';
|
---|
| 239 | Ungetc(c);
|
---|
| 240 | return NUMBER;
|
---|
| 241 | }
|
---|
| 242 |
|
---|
| 243 | static
|
---|
| 244 | scanstr(s) char *s;
|
---|
| 245 | {
|
---|
| 246 | register int c, i, j;
|
---|
| 247 |
|
---|
| 248 | for (c = Getc(); c != EOF & c != '"'; ) {
|
---|
| 249 | if (c == '\\') {
|
---|
| 250 | switch (c = Getc()) {
|
---|
| 251 | case 'b': c = '\b'; break;
|
---|
| 252 | case 'f': c = '\f'; break;
|
---|
| 253 | case 'n': c = '\n'; break;
|
---|
| 254 | case 'r': c = '\r'; break;
|
---|
| 255 | case 't': c = '\t'; break;
|
---|
| 256 | default:
|
---|
| 257 | if (isdigit(c)) {
|
---|
| 258 | for (i = j = 0; i < 3 && isdigit(c); c = Getc(), i++)
|
---|
| 259 | j = j * 8 + c - '0';
|
---|
| 260 | Ungetc(c);
|
---|
| 261 | c = j;
|
---|
| 262 | }
|
---|
| 263 | break;
|
---|
| 264 | }
|
---|
| 265 | }
|
---|
| 266 | *s++ = c;
|
---|
| 267 | if (isKanji(c))
|
---|
| 268 | *s++ = Getc();
|
---|
| 269 | c = Getc();
|
---|
| 270 | }
|
---|
| 271 | *s = '\0';
|
---|
| 272 | return STRING;
|
---|
| 273 | }
|
---|
| 274 |
|
---|
| 275 | static
|
---|
| 276 | scanreg()
|
---|
| 277 | {
|
---|
| 278 | register int c;
|
---|
| 279 | register char *s;
|
---|
| 280 |
|
---|
| 281 | for (s = text; (c = Getc()) != '/'; )
|
---|
| 282 | if (c == '\n')
|
---|
| 283 | error("newline in regular expression");
|
---|
| 284 | else {
|
---|
| 285 | if (isKanji(c) || c == '\\') {
|
---|
| 286 | *s++ = c; c = Getc();
|
---|
| 287 | }
|
---|
| 288 | *s++ = c;
|
---|
| 289 | }
|
---|
| 290 | *s = '\0';
|
---|
| 291 | return REGEXP;
|
---|
| 292 | }
|
---|
| 293 |
|
---|
| 294 | static int c0;
|
---|
| 295 |
|
---|
| 296 | Ungetc(c)
|
---|
| 297 | {
|
---|
| 298 | c0 = c;
|
---|
| 299 |
|
---|
| 300 | if (linep > line) {
|
---|
| 301 | if (--linep < line)
|
---|
| 302 | linep == line + BUFSIZ - 1;
|
---|
| 303 | }
|
---|
| 304 | }
|
---|
| 305 |
|
---|
| 306 | Getc()
|
---|
| 307 | {
|
---|
| 308 | register int c;
|
---|
| 309 | char *s, *t;
|
---|
| 310 |
|
---|
| 311 | if (c0) {
|
---|
| 312 | c = c0; c0 = 0;
|
---|
| 313 | }
|
---|
| 314 | else if (srcprg)
|
---|
| 315 | c = *srcprg ? *srcprg++ : EOF;
|
---|
| 316 | else
|
---|
| 317 | c = fgetc(pfp);
|
---|
| 318 |
|
---|
| 319 | #if 0
|
---|
| 320 | if (linep - line == BUFSIZ) {
|
---|
| 321 | printf("!!!\n");
|
---|
| 322 | for (s = line; *s != '\n' && ((s - line) <BUFSIZ); s++)
|
---|
| 323 | ;
|
---|
| 324 | printf("***(%d)***\n", *s);
|
---|
| 325 | for (t = line; s < linep; )
|
---|
| 326 | *t++ = *++s;
|
---|
| 327 | }
|
---|
| 328 | #endif
|
---|
| 329 | *linep++ = c;
|
---|
| 330 | if ((linep - line) == BUFSIZ)
|
---|
| 331 | linep = line;
|
---|
| 332 | return c;
|
---|
| 333 | }
|
---|