source: trunk/minix/commands/indent/lexi.c@ 9

Last change on this file since 9 was 9, checked in by Mattia Monga, 13 years ago

Minix 3.1.2a

File size: 14.0 KB
Line 
1/**
2 * Copyright (c) 1985 Sun Microsystems, Inc.
3 * Copyright (c) 1980 The Regents of the University of California.
4 * Copyright (c) 1976 Board of Trustees of the University of Illinois.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms are permitted
8 * provided that the above copyright notice and this paragraph are
9 * duplicated in all such forms and that any documentation,
10 * advertising materials, and other materials related to such
11 * distribution and use acknowledge that the software was developed
12 * by the University of California, Berkeley, the University of Illinois,
13 * Urbana, and Sun Microsystems, Inc. The name of either University
14 * or Sun Microsystems may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
18 * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
19 */
20
21/*
22 * Here we have the token scanner for indent. It scans off one token and
23 * puts it in the global variable "token". It returns a code, indicating the
24 * type of token scanned.
25 */
26
27#define PUBLIC extern
28#include <ctype.h>
29#include <string.h>
30#include "globs.h"
31#include "codes.h"
32#include "proto.h"
33
34#define alphanum 1
35#define opchar 3
36
37struct templ
38{
39 char *rwd;
40 int rwcode;
41};
42
43struct templ specials[100] =
44{
45 "switch", 1,
46 "case", 2,
47 "break", 0,
48 "struct", 3,
49 "union", 3,
50 "enum", 3,
51 "default", 2,
52 "int", 4,
53 "char", 4,
54 "float", 4,
55 "double", 4,
56 "long", 4,
57 "short", 4,
58 "typedef", 4,
59 "unsigned", 4,
60 "register", 4,
61 "static", 4,
62 "global", 4,
63 "extern", 4,
64 "void", 4,
65 "goto", 0,
66 "return", 0,
67 "if", 5,
68 "while", 5,
69 "for", 5,
70 "else", 6,
71 "do", 6,
72 "sizeof", 7,
73 0, 0
74};
75
76char chartype[128] =
77{ /* this is used to facilitate
78 the decision of what type
79 (alphanumeric, operator)
80 each character is */
81 0, 0, 0, 0, 0, 0, 0, 0,
82 0, 0, 0, 0, 0, 0, 0, 0,
83 0, 0, 0, 0, 0, 0, 0, 0,
84 0, 0, 0, 0, 0, 0, 0, 0,
85 0, 3, 0, 0, 1, 3, 3, 0,
86 0, 0, 3, 3, 0, 3, 0, 3,
87 1, 1, 1, 1, 1, 1, 1, 1,
88 1, 1, 0, 0, 3, 3, 3, 3,
89 0, 1, 1, 1, 1, 1, 1, 1,
90 1, 1, 1, 1, 1, 1, 1, 1,
91 1, 1, 1, 1, 1, 1, 1, 1,
92 1, 1, 1, 0, 0, 0, 3, 1,
93 0, 1, 1, 1, 1, 1, 1, 1,
94 1, 1, 1, 1, 1, 1, 1, 1,
95 1, 1, 1, 1, 1, 1, 1, 1,
96 1, 1, 1, 0, 3, 0, 3, 0
97};
98
99
100
101
102int
103lexi()
104{
105 register char *tok; /* local pointer to next char
106 in token */
107 int unary_delim; /* this is set to 1 if the
108 current token
109
110 forces a following operator to
111 be unary */
112 static int last_code; /* the last token type returned */
113 static int l_struct; /* set to 1 if the last token
114 was 'struct' */
115 int code; /* internal code to be returned */
116 char qchar; /* the delimiter character for
117 a string */
118
119 tok = token; /* point to start of place to
120 save token */
121 unary_delim = false;
122 ps.col_1 = ps.last_nl; /* tell world that this token
123 started in column 1 iff the
124 last thing scanned was nl */
125 ps.last_nl = false;
126
127 while (*buf_ptr == ' ' || *buf_ptr == '\t')
128 { /* get rid of blanks */
129 ps.col_1 = false; /* leading blanks imply token
130 is not in column 1 */
131 if (++buf_ptr >= buf_end)
132 fill_buffer();
133 }
134
135 /* Scan an alphanumeric token */
136 if (chartype[*buf_ptr] == alphanum || (buf_ptr[0] == '.' && isdigit(buf_ptr[1])))
137 {
138 /* we have a character or number */
139 register char *j; /* used for searching thru list
140 of
141
142 reserved words */
143 register struct templ *p;
144
145 if (isdigit(*buf_ptr) || (buf_ptr[0] == '.' && isdigit(buf_ptr[1])))
146 {
147 int seendot = 0, seenexp = 0;
148 if (*buf_ptr == '0' &&
149 (buf_ptr[1] == 'x' || buf_ptr[1] == 'X'))
150 {
151 *tok++ = *buf_ptr++;
152 *tok++ = *buf_ptr++;
153 while (isxdigit(*buf_ptr))
154 *tok++ = *buf_ptr++;
155 } else
156 while (1)
157 {
158 if (*buf_ptr == '.')
159 if (seendot)
160 break;
161 else
162 seendot++;
163 *tok++ = *buf_ptr++;
164 if (!isdigit(*buf_ptr) && *buf_ptr != '.')
165 if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp)
166 break;
167 else
168 {
169 seenexp++;
170 seendot++;
171 *tok++ = *buf_ptr++;
172 if (*buf_ptr == '+' || *buf_ptr == '-')
173 *tok++ = *buf_ptr++;
174 }
175 }
176 if (*buf_ptr == 'L' || *buf_ptr == 'l')
177 *tok++ = *buf_ptr++;
178 } else
179 while (chartype[*buf_ptr] == alphanum)
180 { /* copy it over */
181 *tok++ = *buf_ptr++;
182 if (buf_ptr >= buf_end)
183 fill_buffer();
184 }
185 *tok++ = '\0';
186 while (*buf_ptr == ' ' || *buf_ptr == '\t')
187 { /* get rid of blanks */
188 if (++buf_ptr >= buf_end)
189 fill_buffer();
190 }
191 ps.its_a_keyword = false;
192 ps.sizeof_keyword = false;
193 if (l_struct)
194 { /* if last token was 'struct',
195 then this token should be
196 treated as a declaration */
197 l_struct = false;
198 last_code = ident;
199 ps.last_u_d = true;
200 return (decl);
201 }
202 ps.last_u_d = false; /* Operator after indentifier
203 is binary */
204 last_code = ident; /* Remember that this is the
205 code we will return */
206
207 /* This loop will check if the token is a keyword. */
208 for (p = specials; (j = p->rwd) != 0; p++)
209 {
210 tok = token; /* point at scanned token */
211 if (*j++ != *tok++ || *j++ != *tok++)
212 continue; /* This test depends on the
213 fact that identifiers are
214 always at least 1 character
215 long (ie. the first two
216 bytes of the identifier are
217 always meaningful) */
218 if (tok[-1] == 0)
219 break; /* If its a one-character
220 identifier */
221 while (*tok++ == *j)
222 if (*j++ == 0)
223 goto found_keyword; /* I wish that C had a
224 multi-level break... */
225 }
226 if (p->rwd)
227 { /* we have a keyword */
228 found_keyword:
229 ps.its_a_keyword = true;
230 ps.last_u_d = true;
231 switch (p->rwcode)
232 {
233 case 1: /* it is a switch */
234 return (swstmt);
235 case 2: /* a case or default */
236 return (casestmt);
237
238 case 3: /* a "struct" */
239 if (ps.p_l_follow)
240 break; /* inside parens: cast */
241 l_struct = true;
242
243 /* Next time around, we will want to know that we have had
244 a 'struct' */
245 case 4: /* one of the declaration
246 keywords */
247 if (ps.p_l_follow)
248 {
249 ps.cast_mask |= 1 << ps.p_l_follow;
250 break; /* inside parens: cast */
251 }
252 last_code = decl;
253 return (decl);
254
255 case 5: /* if, while, for */
256 return (sp_paren);
257
258 case 6: /* do, else */
259 return (sp_nparen);
260
261 case 7:
262 ps.sizeof_keyword = true;
263 default: /* all others are treated like
264 any other identifier */
265 return (ident);
266 } /* end of switch */
267 } /* end of if (found_it) */
268 if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0)
269 {
270 register char *tp = buf_ptr;
271 while (tp < buf_end)
272 if (*tp++ == ')' && *tp == ';')
273 goto not_proc;
274 strncpy(ps.procname, token, sizeof ps.procname - 1);
275 ps.in_par_decl = 1;
276 not_proc:;
277 }
278 /* The following hack attempts to guess whether or not the
279 current token is in fact a declaration keyword -- one that has
280 been typedefd */
281 if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_')
282 && !ps.p_l_follow
283 && !ps.block_init
284 && (ps.last_token == rparen || ps.last_token == semicolon ||
285 ps.last_token == decl ||
286 ps.last_token == lbrace || ps.last_token == rbrace))
287 {
288 ps.its_a_keyword = true;
289 ps.last_u_d = true;
290 last_code = decl;
291 return decl;
292 }
293 if (last_code == decl) /* if this is a declared
294 variable, then following
295 sign is unary */
296 ps.last_u_d = true; /* will make "int a -1" work */
297 last_code = ident;
298 return (ident); /* the ident is not in the list */
299 } /* end of procesing for alpanum
300 character */
301 /* l l l Scan a non-alphanumeric token */
302 *tok++ = *buf_ptr; /* if it is only a
303 one-character token, it is
304 moved here */
305 *tok = '\0';
306 if (++buf_ptr >= buf_end)
307 fill_buffer();
308
309 switch (*token)
310 {
311 case '\n':
312 unary_delim = ps.last_u_d;
313 ps.last_nl = true; /* remember that we just had a
314 newline */
315 code = (had_eof ? 0 : newline);
316
317 /* if data has been exausted, the newline is a dummy, and we
318 should return code to stop */
319 break;
320
321 case '\'': /* start of quoted character */
322 case '"': /* start of string */
323 qchar = *token;
324 if (troff)
325 {
326 tok[-1] = '`';
327 if (qchar == '"')
328 *tok++ = '`';
329 tok = chfont(&bodyf, &stringf, tok);
330 }
331 do
332 { /* copy the string */
333 while (1)
334 { /* move one character or
335 [/<char>]<char> */
336 if (*buf_ptr == '\n')
337 {
338 printf("%d: Unterminated literal\n", line_no);
339 goto stop_lit;
340 }
341 *tok = *buf_ptr++;
342 if (buf_ptr >= buf_end)
343 fill_buffer();
344 if (had_eof || ((tok - token) > (bufsize - 2)))
345 {
346 printf("Unterminated literal\n");
347 ++tok;
348 goto stop_lit;
349 /* get outof literal copying loop */
350 }
351 if (*tok == BACKSLASH)
352 { /* if escape, copy extra char */
353 if (*buf_ptr == '\n') /* check for escaped newline */
354 ++line_no;
355 if (troff)
356 {
357 *++tok = BACKSLASH;
358 if (*buf_ptr == BACKSLASH)
359 *++tok = BACKSLASH;
360 }
361 *++tok = *buf_ptr++;
362 ++tok; /* we must increment this again
363 because we copied two chars */
364 if (buf_ptr >= buf_end)
365 fill_buffer();
366 } else
367 break; /* we copied one character */
368 } /* end of while (1) */
369 } while (*tok++ != qchar);
370 if (troff)
371 {
372 tok = chfont(&stringf, &bodyf, tok - 1);
373 if (qchar == '"')
374 *tok++ = '\'';
375 }
376stop_lit:
377 code = ident;
378 break;
379
380 case ('('):
381 case ('['):
382 unary_delim = true;
383 code = lparen;
384 break;
385
386 case (')'):
387 case (']'):
388 code = rparen;
389 break;
390
391 case '#':
392 unary_delim = ps.last_u_d;
393 code = preesc;
394 break;
395
396 case '?':
397 unary_delim = true;
398 code = question;
399 break;
400
401 case (':'):
402 code = colon;
403 unary_delim = true;
404 break;
405
406 case (';'):
407 unary_delim = true;
408 code = semicolon;
409 break;
410
411 case ('{'):
412 unary_delim = true;
413
414 /* if (ps.in_or_st) ps.block_init = 1; */
415 code = ps.block_init ? lparen : lbrace;
416 break;
417
418 case ('}'):
419 unary_delim = true;
420 code = ps.block_init ? rparen : rbrace;
421 break;
422
423 case 014: /* a form feed */
424 unary_delim = ps.last_u_d;
425 ps.last_nl = true; /* remember this so we can set
426 'ps.col_1' right */
427 code = form_feed;
428 break;
429
430 case (','):
431 unary_delim = true;
432 code = comma;
433 break;
434
435 case '.':
436 unary_delim = false;
437 code = period;
438 break;
439
440 case '-':
441 case '+': /* check for -, +, --, ++ */
442 code = (ps.last_u_d ? unary_op : binary_op);
443 unary_delim = true;
444
445 if (*buf_ptr == token[0])
446 {
447 /* check for doubled character */
448 *tok++ = *buf_ptr++;
449 /* buffer overflow will be checked at end of loop */
450 if (last_code == ident || last_code == rparen)
451 {
452 code = (ps.last_u_d ? unary_op : postop);
453 /* check for following ++ or -- */
454 unary_delim = false;
455 }
456 } else if (*buf_ptr == '=')
457 /* check for operator += */
458 *tok++ = *buf_ptr++;
459 else if (*buf_ptr == '>')
460 {
461 /* check for operator -> */
462 *tok++ = *buf_ptr++;
463 if (!ptr_binop)
464 {
465 unary_delim = false;
466 code = unary_op;
467 ps.want_blank = false;
468 }
469 }
470 break; /* buffer overflow will be
471 checked at end of switch */
472
473 case '=':
474 if (ps.in_or_st)
475 ps.block_init = 1;
476#ifdef undef
477 if (chartype[*buf_ptr] == opchar)
478 { /* we have two char assignment */
479 tok[-1] = *buf_ptr++;
480 if ((tok[-1] == '<' || tok[-1] == '>') && tok[-1] == *buf_ptr)
481 *tok++ = *buf_ptr++;
482 *tok++ = '='; /* Flip =+ to += */
483 *tok = 0;
484 }
485#else
486 if (*buf_ptr == '=')
487 { /* == */
488 *tok++ = '='; /* Flip =+ to += */
489 buf_ptr++;
490 *tok = 0;
491 }
492#endif
493 code = binary_op;
494 unary_delim = true;
495 break;
496 /* can drop thru!!! */
497
498 case '>':
499 case '<':
500 case '!': /* ops like <, <<, <=, !=, etc */
501 if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=')
502 {
503 *tok++ = *buf_ptr;
504 if (++buf_ptr >= buf_end)
505 fill_buffer();
506 }
507 if (*buf_ptr == '=')
508 *tok++ = *buf_ptr++;
509 code = (ps.last_u_d ? unary_op : binary_op);
510 unary_delim = true;
511 break;
512
513 default:
514 if (token[0] == '/' && *buf_ptr == '*')
515 {
516 /* it is start of comment */
517 *tok++ = '*';
518
519 if (++buf_ptr >= buf_end)
520 fill_buffer();
521
522 code = comment;
523 unary_delim = ps.last_u_d;
524 break;
525 }
526 while (*(tok - 1) == *buf_ptr || *buf_ptr == '=')
527 {
528 /* handle ||, &&, etc, and also things as in int *****i */
529 *tok++ = *buf_ptr;
530 if (++buf_ptr >= buf_end)
531 fill_buffer();
532 }
533 code = (ps.last_u_d ? unary_op : binary_op);
534 unary_delim = true;
535
536
537 } /* end of switch */
538 if (code != newline)
539 {
540 l_struct = false;
541 last_code = code;
542 }
543 if (buf_ptr >= buf_end) /* check for input buffer empty */
544 fill_buffer();
545 ps.last_u_d = unary_delim;
546 *tok = '\0'; /* null terminate the token */
547 return (code);
548}
549
550/*
551 * Add the given keyword to the keyword table, using val as the keyword type
552 */
553void addkey(key, val)
554 char *key;
555 int val;
556{
557 register struct templ *p = specials;
558 while (p->rwd)
559 if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0)
560 return;
561 else
562 p++;
563 if (p >= specials + sizeof specials / sizeof specials[0])
564 return; /* For now, table overflows are
565 silently ignored */
566 p->rwd = key;
567 p->rwcode = val;
568 p[1].rwd = 0;
569 p[1].rwcode = 0;
570 return;
571}
Note: See TracBrowser for help on using the repository browser.