source: trunk/minix/commands/i386/asmconv/tokenize.c@ 11

Last change on this file since 11 was 9, checked in by Mattia Monga, 14 years ago

Minix 3.1.2a

File size: 5.5 KB
RevLine 
[9]1/* tokenize.c - split input into tokens Author: Kees J. Bot
2 * 13 Dec 1993
3 */
4#define nil 0
5#include <stdio.h>
6#include <stdarg.h>
7#include <stdlib.h>
8#include <string.h>
9#include <assert.h>
10#include "asmconv.h"
11#include "token.h"
12
13static FILE *tf;
14static char *tfile;
15static char *orig_tfile;
16static int tcomment;
17static int tc;
18static long tline;
19static token_t *tq;
20
21static void readtc(void)
22/* Read one character from the input file and put it in the global 'tc'. */
23{
24 static int nl= 0;
25
26 if (nl) tline++;
27 if ((tc= getc(tf)) == EOF && ferror(tf)) fatal(orig_tfile);
28 nl= (tc == '\n');
29}
30
31void set_file(char *file, long line)
32/* Set file name and line number, changed by a preprocessor trick. */
33{
34 deallocate(tfile);
35 tfile= allocate(nil, (strlen(file) + 1) * sizeof(tfile[0]));
36 strcpy(tfile, file);
37 tline= line;
38}
39
40void get_file(char **file, long *line)
41/* Get file name and line number. */
42{
43 *file= tfile;
44 *line= tline;
45}
46
47void parse_err(int err, token_t *t, const char *fmt, ...)
48/* Report a parsing error. */
49{
50 va_list ap;
51
52 fprintf(stderr, "\"%s\", line %ld: ", tfile,
53 t == nil ? tline : t->line);
54 va_start(ap, fmt);
55 vfprintf(stderr, fmt, ap);
56 va_end(ap);
57 if (err) set_error();
58}
59
60void tok_init(char *file, int comment)
61/* Open the file to tokenize and initialize the tokenizer. */
62{
63 if (file == nil) {
64 file= "stdin";
65 tf= stdin;
66 } else {
67 if ((tf= fopen(file, "r")) == nil) fatal(file);
68 }
69 orig_tfile= file;
70 set_file(file, 1);
71 readtc();
72 tcomment= comment;
73}
74
75static int isspace(int c)
76{
77 return between('\0', c, ' ') && c != '\n';
78}
79
80#define iscomment(c) ((c) == tcomment)
81
82static int isidentchar(int c)
83{
84 return between('a', c, 'z')
85 || between('A', c, 'Z')
86 || between('0', c, '9')
87 || c == '.'
88 || c == '_'
89 ;
90}
91
92static token_t *new_token(void)
93{
94 token_t *new;
95
96 new= allocate(nil, sizeof(*new));
97 new->next= nil;
98 new->line= tline;
99 new->name= nil;
100 new->symbol= -1;
101 return new;
102}
103
104static token_t *get_word(void)
105/* Read one word, an identifier, a number, a label, or a mnemonic. */
106{
107 token_t *w;
108 char *name;
109 size_t i, len;
110
111 i= 0;
112 len= 16;
113 name= allocate(nil, len * sizeof(name[0]));
114
115 while (isidentchar(tc)) {
116 name[i++]= tc;
117 readtc();
118 if (i == len) name= allocate(name, (len*= 2) * sizeof(name[0]));
119 }
120 name[i]= 0;
121 name= allocate(name, (i+1) * sizeof(name[0]));
122 w= new_token();
123 w->type= T_WORD;
124 w->name= name;
125 w->len= i;
126 return w;
127}
128
129static token_t *get_string(void)
130/* Read a single or double quotes delimited string. */
131{
132 token_t *s;
133 int quote;
134 char *str;
135 size_t i, len;
136 int n, j;
137 int seen;
138
139 quote= tc;
140 readtc();
141
142 i= 0;
143 len= 16;
144 str= allocate(nil, len * sizeof(str[0]));
145
146 while (tc != quote && tc != '\n' && tc != EOF) {
147 seen= -1;
148 if (tc == '\\') {
149 readtc();
150 if (tc == '\n' || tc == EOF) break;
151
152 switch (tc) {
153 case 'a': tc= '\a'; break;
154 case 'b': tc= '\b'; break;
155 case 'f': tc= '\f'; break;
156 case 'n': tc= '\n'; break;
157 case 'r': tc= '\r'; break;
158 case 't': tc= '\t'; break;
159 case 'v': tc= '\v'; break;
160 case 'x':
161 n= 0;
162 for (j= 0; j < 3; j++) {
163 readtc();
164 if (between('0', tc, '9'))
165 tc-= '0' + 0x0;
166 else
167 if (between('A', tc, 'A'))
168 tc-= 'A' + 0xA;
169 else
170 if (between('a', tc, 'a'))
171 tc-= 'a' + 0xa;
172 else {
173 seen= tc;
174 break;
175 }
176 n= n*0x10 + tc;
177 }
178 tc= n;
179 break;
180 default:
181 if (!between('0', tc, '9')) break;
182 n= 0;
183 for (j= 0; j < 3; j++) {
184 if (between('0', tc, '9'))
185 tc-= '0';
186 else {
187 seen= tc;
188 break;
189 }
190 n= n*010 + tc;
191 readtc();
192 }
193 tc= n;
194 }
195 }
196 str[i++]= tc;
197 if (i == len) str= allocate(str, (len*= 2) * sizeof(str[0]));
198
199 if (seen < 0) readtc(); else tc= seen;
200 }
201
202 if (tc == quote) {
203 readtc();
204 } else {
205 parse_err(1, nil, "string contains newline\n");
206 }
207 str[i]= 0;
208 str= allocate(str, (i+1) * sizeof(str[0]));
209 s= new_token();
210 s->type= T_STRING;
211 s->name= str;
212 s->len= i;
213 return s;
214}
215
216static int old_n= 0; /* To speed up n, n+1, n+2, ... accesses. */
217static token_t **old_ptq= &tq;
218
219token_t *get_token(int n)
220/* Return the n-th token on the input queue. */
221{
222 token_t *t, **ptq;
223
224 assert(n >= 0);
225
226 if (0 && n >= old_n) {
227 /* Go forward from the previous point. */
228 n-= old_n;
229 old_n+= n;
230 ptq= old_ptq;
231 } else {
232 /* Restart from the head of the queue. */
233 old_n= n;
234 ptq= &tq;
235 }
236
237 for (;;) {
238 if ((t= *ptq) == nil) {
239 /* Token queue doesn't have element <n>, read a
240 * new token from the input stream.
241 */
242 while (isspace(tc) || iscomment(tc)) {
243 if (iscomment(tc)) {
244 while (tc != '\n' && tc != EOF)
245 readtc();
246 } else {
247 readtc();
248 }
249 }
250
251 if (tc == EOF) {
252 t= new_token();
253 t->type= T_EOF;
254 } else
255 if (isidentchar(tc)) {
256 t= get_word();
257 } else
258 if (tc == '\'' || tc == '"') {
259 t= get_string();
260 } else {
261 if (tc == '\n') tc= ';';
262 t= new_token();
263 t->type= T_CHAR;
264 t->symbol= tc;
265 readtc();
266 if (t->symbol == '<' && tc == '<') {
267 t->symbol= S_LEFTSHIFT;
268 readtc();
269 } else
270 if (t->symbol == '>' && tc == '>') {
271 t->symbol= S_RIGHTSHIFT;
272 readtc();
273 }
274 }
275 *ptq= t;
276 }
277 if (n == 0) break;
278 n--;
279 ptq= &t->next;
280 }
281 old_ptq= ptq;
282 return t;
283}
284
285void skip_token(int n)
286/* Remove n tokens from the input queue. One is not allowed to skip unread
287 * tokens.
288 */
289{
290 token_t *junk;
291
292 assert(n >= 0);
293
294 while (n > 0) {
295 assert(tq != nil);
296
297 junk= tq;
298 tq= tq->next;
299 deallocate(junk->name);
300 deallocate(junk);
301 n--;
302 }
303 /* Reset the old reference. */
304 old_n= 0;
305 old_ptq= &tq;
306}
Note: See TracBrowser for help on using the repository browser.