Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

source: trunk/minix/commands/indent/lexi.c@ 9

Last change on this file since 9 was 9, checked in by Mattia Monga, 13 years ago
Minix 3.1.2a
File size: 14.0 KB

Line
1	/**
2	* Copyright (c) 1985 Sun Microsystems, Inc.
3	* Copyright (c) 1980 The Regents of the University of California.
4	* Copyright (c) 1976 Board of Trustees of the University of Illinois.
5	* All rights reserved.
6	*
7	* Redistribution and use in source and binary forms are permitted
8	* provided that the above copyright notice and this paragraph are
9	* duplicated in all such forms and that any documentation,
10	* advertising materials, and other materials related to such
11	* distribution and use acknowledge that the software was developed
12	* by the University of California, Berkeley, the University of Illinois,
13	* Urbana, and Sun Microsystems, Inc. The name of either University
14	* or Sun Microsystems may not be used to endorse or promote products
15	* derived from this software without specific prior written permission.
16	* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
17	* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
18	* WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
19	*/
20
21	/*
22	* Here we have the token scanner for indent. It scans off one token and
23	* puts it in the global variable "token". It returns a code, indicating the
24	* type of token scanned.
25	*/
26
27	#define PUBLIC extern
28	#include <ctype.h>
29	#include <string.h>
30	#include "globs.h"
31	#include "codes.h"
32	#include "proto.h"
33
34	#define alphanum 1
35	#define opchar 3
36
37	struct templ
38	{
39	char *rwd;
40	int rwcode;
41	};
42
43	struct templ specials[100] =
44	{
45	"switch", 1,
46	"case", 2,
47	"break", 0,
48	"struct", 3,
49	"union", 3,
50	"enum", 3,
51	"default", 2,
52	"int", 4,
53	"char", 4,
54	"float", 4,
55	"double", 4,
56	"long", 4,
57	"short", 4,
58	"typedef", 4,
59	"unsigned", 4,
60	"register", 4,
61	"static", 4,
62	"global", 4,
63	"extern", 4,
64	"void", 4,
65	"goto", 0,
66	"return", 0,
67	"if", 5,
68	"while", 5,
69	"for", 5,
70	"else", 6,
71	"do", 6,
72	"sizeof", 7,
73	0, 0
74	};
75
76	char chartype[128] =
77	{ /* this is used to facilitate
78	the decision of what type
79	(alphanumeric, operator)
80	each character is */
81	0, 0, 0, 0, 0, 0, 0, 0,
82	0, 0, 0, 0, 0, 0, 0, 0,
83	0, 0, 0, 0, 0, 0, 0, 0,
84	0, 0, 0, 0, 0, 0, 0, 0,
85	0, 3, 0, 0, 1, 3, 3, 0,
86	0, 0, 3, 3, 0, 3, 0, 3,
87	1, 1, 1, 1, 1, 1, 1, 1,
88	1, 1, 0, 0, 3, 3, 3, 3,
89	0, 1, 1, 1, 1, 1, 1, 1,
90	1, 1, 1, 1, 1, 1, 1, 1,
91	1, 1, 1, 1, 1, 1, 1, 1,
92	1, 1, 1, 0, 0, 0, 3, 1,
93	0, 1, 1, 1, 1, 1, 1, 1,
94	1, 1, 1, 1, 1, 1, 1, 1,
95	1, 1, 1, 1, 1, 1, 1, 1,
96	1, 1, 1, 0, 3, 0, 3, 0
97	};
98
99
100
101
102	int
103	lexi()
104	{
105	register char tok; / local pointer to next char
106	in token */
107	int unary_delim; /* this is set to 1 if the
108	current token
109
110	forces a following operator to
111	be unary */
112	static int last_code; /* the last token type returned */
113	static int l_struct; /* set to 1 if the last token
114	was 'struct' */
115	int code; /* internal code to be returned */
116	char qchar; /* the delimiter character for
117	a string */
118
119	tok = token; /* point to start of place to
120	save token */
121	unary_delim = false;
122	ps.col_1 = ps.last_nl; /* tell world that this token
123	started in column 1 iff the
124	last thing scanned was nl */
125	ps.last_nl = false;
126
127	while (buf_ptr == ' ' \|\| buf_ptr == '\t')
128	{ /* get rid of blanks */
129	ps.col_1 = false; /* leading blanks imply token
130	is not in column 1 */
131	if (++buf_ptr >= buf_end)
132	fill_buffer();
133	}
134
135	/* Scan an alphanumeric token */
136	if (chartype[*buf_ptr] == alphanum \|\| (buf_ptr[0] == '.' && isdigit(buf_ptr[1])))
137	{
138	/* we have a character or number */
139	register char j; / used for searching thru list
140	of
141
142	reserved words */
143	register struct templ *p;
144
145	if (isdigit(*buf_ptr) \|\| (buf_ptr[0] == '.' && isdigit(buf_ptr[1])))
146	{
147	int seendot = 0, seenexp = 0;
148	if (*buf_ptr == '0' &&
149	(buf_ptr[1] == 'x' \|\| buf_ptr[1] == 'X'))
150	{
151	tok++ = buf_ptr++;
152	tok++ = buf_ptr++;
153	while (isxdigit(*buf_ptr))
154	tok++ = buf_ptr++;
155	} else
156	while (1)
157	{
158	if (*buf_ptr == '.')
159	if (seendot)
160	break;
161	else
162	seendot++;
163	tok++ = buf_ptr++;
164	if (!isdigit(buf_ptr) && buf_ptr != '.')
165	if ((buf_ptr != 'E' && buf_ptr != 'e') \|\| seenexp)
166	break;
167	else
168	{
169	seenexp++;
170	seendot++;
171	tok++ = buf_ptr++;
172	if (buf_ptr == '+' \|\| buf_ptr == '-')
173	tok++ = buf_ptr++;
174	}
175	}
176	if (buf_ptr == 'L' \|\| buf_ptr == 'l')
177	tok++ = buf_ptr++;
178	} else
179	while (chartype[*buf_ptr] == alphanum)
180	{ /* copy it over */
181	tok++ = buf_ptr++;
182	if (buf_ptr >= buf_end)
183	fill_buffer();
184	}
185	*tok++ = '\0';
186	while (buf_ptr == ' ' \|\| buf_ptr == '\t')
187	{ /* get rid of blanks */
188	if (++buf_ptr >= buf_end)
189	fill_buffer();
190	}
191	ps.its_a_keyword = false;
192	ps.sizeof_keyword = false;
193	if (l_struct)
194	{ /* if last token was 'struct',
195	then this token should be
196	treated as a declaration */
197	l_struct = false;
198	last_code = ident;
199	ps.last_u_d = true;
200	return (decl);
201	}
202	ps.last_u_d = false; /* Operator after indentifier
203	is binary */
204	last_code = ident; /* Remember that this is the
205	code we will return */
206
207	/* This loop will check if the token is a keyword. */
208	for (p = specials; (j = p->rwd) != 0; p++)
209	{
210	tok = token; /* point at scanned token */
211	if (j++ != tok++ \|\| j++ != tok++)
212	continue; /* This test depends on the
213	fact that identifiers are
214	always at least 1 character
215	long (ie. the first two
216	bytes of the identifier are
217	always meaningful) */
218	if (tok[-1] == 0)
219	break; /* If its a one-character
220	identifier */
221	while (tok++ == j)
222	if (*j++ == 0)
223	goto found_keyword; /* I wish that C had a
224	multi-level break... */
225	}
226	if (p->rwd)
227	{ /* we have a keyword */
228	found_keyword:
229	ps.its_a_keyword = true;
230	ps.last_u_d = true;
231	switch (p->rwcode)
232	{
233	case 1: /* it is a switch */
234	return (swstmt);
235	case 2: /* a case or default */
236	return (casestmt);
237
238	case 3: /* a "struct" */
239	if (ps.p_l_follow)
240	break; /* inside parens: cast */
241	l_struct = true;
242
243	/* Next time around, we will want to know that we have had
244	a 'struct' */
245	case 4: /* one of the declaration
246	keywords */
247	if (ps.p_l_follow)
248	{
249	ps.cast_mask \|= 1 << ps.p_l_follow;
250	break; /* inside parens: cast */
251	}
252	last_code = decl;
253	return (decl);
254
255	case 5: /* if, while, for */
256	return (sp_paren);
257
258	case 6: /* do, else */
259	return (sp_nparen);
260
261	case 7:
262	ps.sizeof_keyword = true;
263	default: /* all others are treated like
264	any other identifier */
265	return (ident);
266	} /* end of switch */
267	} /* end of if (found_it) */
268	if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0)
269	{
270	register char *tp = buf_ptr;
271	while (tp < buf_end)
272	if (tp++ == ')' && tp == ';')
273	goto not_proc;
274	strncpy(ps.procname, token, sizeof ps.procname - 1);
275	ps.in_par_decl = 1;
276	not_proc:;
277	}
278	/* The following hack attempts to guess whether or not the
279	current token is in fact a declaration keyword -- one that has
280	been typedefd */
281	if (((buf_ptr == '' && buf_ptr[1] != '=') \|\| isalpha(buf_ptr) \|\| buf_ptr == '_')
282	&& !ps.p_l_follow
283	&& !ps.block_init
284	&& (ps.last_token == rparen \|\| ps.last_token == semicolon \|\|
285	ps.last_token == decl \|\|
286	ps.last_token == lbrace \|\| ps.last_token == rbrace))
287	{
288	ps.its_a_keyword = true;
289	ps.last_u_d = true;
290	last_code = decl;
291	return decl;
292	}
293	if (last_code == decl) /* if this is a declared
294	variable, then following
295	sign is unary */
296	ps.last_u_d = true; /* will make "int a -1" work */
297	last_code = ident;
298	return (ident); /* the ident is not in the list */
299	} /* end of procesing for alpanum
300	character */
301	/* l l l Scan a non-alphanumeric token */
302	tok++ = buf_ptr; /* if it is only a
303	one-character token, it is
304	moved here */
305	*tok = '\0';
306	if (++buf_ptr >= buf_end)
307	fill_buffer();
308
309	switch (*token)
310	{
311	case '\n':
312	unary_delim = ps.last_u_d;
313	ps.last_nl = true; /* remember that we just had a
314	newline */
315	code = (had_eof ? 0 : newline);
316
317	/* if data has been exausted, the newline is a dummy, and we
318	should return code to stop */
319	break;
320
321	case '\'': /* start of quoted character */
322	case '"': /* start of string */
323	qchar = *token;
324	if (troff)
325	{
326	tok[-1] = '`';
327	if (qchar == '"')
328	*tok++ = '`';
329	tok = chfont(&bodyf, &stringf, tok);
330	}
331	do
332	{ /* copy the string */
333	while (1)
334	{ /* move one character or
335	[/<char>]<char> */
336	if (*buf_ptr == '\n')
337	{
338	printf("%d: Unterminated literal\n", line_no);
339	goto stop_lit;
340	}
341	tok = buf_ptr++;
342	if (buf_ptr >= buf_end)
343	fill_buffer();
344	if (had_eof \|\| ((tok - token) > (bufsize - 2)))
345	{
346	printf("Unterminated literal\n");
347	++tok;
348	goto stop_lit;
349	/* get outof literal copying loop */
350	}
351	if (*tok == BACKSLASH)
352	{ /* if escape, copy extra char */
353	if (buf_ptr == '\n') / check for escaped newline */
354	++line_no;
355	if (troff)
356	{
357	*++tok = BACKSLASH;
358	if (*buf_ptr == BACKSLASH)
359	*++tok = BACKSLASH;
360	}
361	++tok = buf_ptr++;
362	++tok; /* we must increment this again
363	because we copied two chars */
364	if (buf_ptr >= buf_end)
365	fill_buffer();
366	} else
367	break; /* we copied one character */
368	} /* end of while (1) */
369	} while (*tok++ != qchar);
370	if (troff)
371	{
372	tok = chfont(&stringf, &bodyf, tok - 1);
373	if (qchar == '"')
374	*tok++ = '\'';
375	}
376	stop_lit:
377	code = ident;
378	break;
379
380	case ('('):
381	case ('['):
382	unary_delim = true;
383	code = lparen;
384	break;
385
386	case (')'):
387	case (']'):
388	code = rparen;
389	break;
390
391	case '#':
392	unary_delim = ps.last_u_d;
393	code = preesc;
394	break;
395
396	case '?':
397	unary_delim = true;
398	code = question;
399	break;
400
401	case (':'):
402	code = colon;
403	unary_delim = true;
404	break;
405
406	case (';'):
407	unary_delim = true;
408	code = semicolon;
409	break;
410
411	case ('{'):
412	unary_delim = true;
413
414	/* if (ps.in_or_st) ps.block_init = 1; */
415	code = ps.block_init ? lparen : lbrace;
416	break;
417
418	case ('}'):
419	unary_delim = true;
420	code = ps.block_init ? rparen : rbrace;
421	break;
422
423	case 014: /* a form feed */
424	unary_delim = ps.last_u_d;
425	ps.last_nl = true; /* remember this so we can set
426	'ps.col_1' right */
427	code = form_feed;
428	break;
429
430	case (','):
431	unary_delim = true;
432	code = comma;
433	break;
434
435	case '.':
436	unary_delim = false;
437	code = period;
438	break;
439
440	case '-':
441	case '+': /* check for -, +, --, ++ */
442	code = (ps.last_u_d ? unary_op : binary_op);
443	unary_delim = true;
444
445	if (*buf_ptr == token[0])
446	{
447	/* check for doubled character */
448	tok++ = buf_ptr++;
449	/* buffer overflow will be checked at end of loop */
450	if (last_code == ident \|\| last_code == rparen)
451	{
452	code = (ps.last_u_d ? unary_op : postop);
453	/* check for following ++ or -- */
454	unary_delim = false;
455	}
456	} else if (*buf_ptr == '=')
457	/* check for operator += */
458	tok++ = buf_ptr++;
459	else if (*buf_ptr == '>')
460	{
461	/* check for operator -> */
462	tok++ = buf_ptr++;
463	if (!ptr_binop)
464	{
465	unary_delim = false;
466	code = unary_op;
467	ps.want_blank = false;
468	}
469	}
470	break; /* buffer overflow will be
471	checked at end of switch */
472
473	case '=':
474	if (ps.in_or_st)
475	ps.block_init = 1;
476	#ifdef undef
477	if (chartype[*buf_ptr] == opchar)
478	{ /* we have two char assignment */
479	tok[-1] = *buf_ptr++;
480	if ((tok[-1] == '<' \|\| tok[-1] == '>') && tok[-1] == *buf_ptr)
481	tok++ = buf_ptr++;
482	tok++ = '='; / Flip =+ to += */
483	*tok = 0;
484	}
485	#else
486	if (*buf_ptr == '=')
487	{ /* == */
488	tok++ = '='; / Flip =+ to += */
489	buf_ptr++;
490	*tok = 0;
491	}
492	#endif
493	code = binary_op;
494	unary_delim = true;
495	break;
496	/* can drop thru!!! */
497
498	case '>':
499	case '<':
500	case '!': /* ops like <, <<, <=, !=, etc */
501	if (buf_ptr == '>' \|\| buf_ptr == '<' \|\| *buf_ptr == '=')
502	{
503	tok++ = buf_ptr;
504	if (++buf_ptr >= buf_end)
505	fill_buffer();
506	}
507	if (*buf_ptr == '=')
508	tok++ = buf_ptr++;
509	code = (ps.last_u_d ? unary_op : binary_op);
510	unary_delim = true;
511	break;
512
513	default:
514	if (token[0] == '/' && buf_ptr == '')
515	{
516	/* it is start of comment */
517	tok++ = '';
518
519	if (++buf_ptr >= buf_end)
520	fill_buffer();
521
522	code = comment;
523	unary_delim = ps.last_u_d;
524	break;
525	}
526	while ((tok - 1) == buf_ptr \|\| *buf_ptr == '=')
527	{
528	/* handle \|\|, &&, etc, and also things as in int ****i /
529	tok++ = buf_ptr;
530	if (++buf_ptr >= buf_end)
531	fill_buffer();
532	}
533	code = (ps.last_u_d ? unary_op : binary_op);
534	unary_delim = true;
535
536
537	} /* end of switch */
538	if (code != newline)
539	{
540	l_struct = false;
541	last_code = code;
542	}
543	if (buf_ptr >= buf_end) /* check for input buffer empty */
544	fill_buffer();
545	ps.last_u_d = unary_delim;
546	tok = '\0'; / null terminate the token */
547	return (code);
548	}
549
550	/*
551	* Add the given keyword to the keyword table, using val as the keyword type
552	*/
553	void addkey(key, val)
554	char *key;
555	int val;
556	{
557	register struct templ *p = specials;
558	while (p->rwd)
559	if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0)
560	return;
561	else
562	p++;
563	if (p >= specials + sizeof specials / sizeof specials[0])
564	return; /* For now, table overflows are
565	silently ignored */
566	p->rwd = key;
567	p->rwcode = val;
568	p[1].rwd = 0;
569	p[1].rwcode = 0;
570	return;
571	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: