[9] | 1 | /* tokenize.c - split input into tokens Author: Kees J. Bot
|
---|
| 2 | * 13 Dec 1993
|
---|
| 3 | */
|
---|
| 4 | #define nil 0
|
---|
| 5 | #include <stdio.h>
|
---|
| 6 | #include <stdarg.h>
|
---|
| 7 | #include <stdlib.h>
|
---|
| 8 | #include <string.h>
|
---|
| 9 | #include <assert.h>
|
---|
| 10 | #include "asmconv.h"
|
---|
| 11 | #include "token.h"
|
---|
| 12 |
|
---|
| 13 | static FILE *tf;
|
---|
| 14 | static char *tfile;
|
---|
| 15 | static char *orig_tfile;
|
---|
| 16 | static int tcomment;
|
---|
| 17 | static int tc;
|
---|
| 18 | static long tline;
|
---|
| 19 | static token_t *tq;
|
---|
| 20 |
|
---|
| 21 | static void readtc(void)
|
---|
| 22 | /* Read one character from the input file and put it in the global 'tc'. */
|
---|
| 23 | {
|
---|
| 24 | static int nl= 0;
|
---|
| 25 |
|
---|
| 26 | if (nl) tline++;
|
---|
| 27 | if ((tc= getc(tf)) == EOF && ferror(tf)) fatal(orig_tfile);
|
---|
| 28 | nl= (tc == '\n');
|
---|
| 29 | }
|
---|
| 30 |
|
---|
| 31 | void set_file(char *file, long line)
|
---|
| 32 | /* Set file name and line number, changed by a preprocessor trick. */
|
---|
| 33 | {
|
---|
| 34 | deallocate(tfile);
|
---|
| 35 | tfile= allocate(nil, (strlen(file) + 1) * sizeof(tfile[0]));
|
---|
| 36 | strcpy(tfile, file);
|
---|
| 37 | tline= line;
|
---|
| 38 | }
|
---|
| 39 |
|
---|
| 40 | void get_file(char **file, long *line)
|
---|
| 41 | /* Get file name and line number. */
|
---|
| 42 | {
|
---|
| 43 | *file= tfile;
|
---|
| 44 | *line= tline;
|
---|
| 45 | }
|
---|
| 46 |
|
---|
| 47 | void parse_err(int err, token_t *t, const char *fmt, ...)
|
---|
| 48 | /* Report a parsing error. */
|
---|
| 49 | {
|
---|
| 50 | va_list ap;
|
---|
| 51 |
|
---|
| 52 | fprintf(stderr, "\"%s\", line %ld: ", tfile,
|
---|
| 53 | t == nil ? tline : t->line);
|
---|
| 54 | va_start(ap, fmt);
|
---|
| 55 | vfprintf(stderr, fmt, ap);
|
---|
| 56 | va_end(ap);
|
---|
| 57 | if (err) set_error();
|
---|
| 58 | }
|
---|
| 59 |
|
---|
| 60 | void tok_init(char *file, int comment)
|
---|
| 61 | /* Open the file to tokenize and initialize the tokenizer. */
|
---|
| 62 | {
|
---|
| 63 | if (file == nil) {
|
---|
| 64 | file= "stdin";
|
---|
| 65 | tf= stdin;
|
---|
| 66 | } else {
|
---|
| 67 | if ((tf= fopen(file, "r")) == nil) fatal(file);
|
---|
| 68 | }
|
---|
| 69 | orig_tfile= file;
|
---|
| 70 | set_file(file, 1);
|
---|
| 71 | readtc();
|
---|
| 72 | tcomment= comment;
|
---|
| 73 | }
|
---|
| 74 |
|
---|
| 75 | static int isspace(int c)
|
---|
| 76 | {
|
---|
| 77 | return between('\0', c, ' ') && c != '\n';
|
---|
| 78 | }
|
---|
| 79 |
|
---|
| 80 | #define iscomment(c) ((c) == tcomment)
|
---|
| 81 |
|
---|
| 82 | static int isidentchar(int c)
|
---|
| 83 | {
|
---|
| 84 | return between('a', c, 'z')
|
---|
| 85 | || between('A', c, 'Z')
|
---|
| 86 | || between('0', c, '9')
|
---|
| 87 | || c == '.'
|
---|
| 88 | || c == '_'
|
---|
| 89 | ;
|
---|
| 90 | }
|
---|
| 91 |
|
---|
| 92 | static token_t *new_token(void)
|
---|
| 93 | {
|
---|
| 94 | token_t *new;
|
---|
| 95 |
|
---|
| 96 | new= allocate(nil, sizeof(*new));
|
---|
| 97 | new->next= nil;
|
---|
| 98 | new->line= tline;
|
---|
| 99 | new->name= nil;
|
---|
| 100 | new->symbol= -1;
|
---|
| 101 | return new;
|
---|
| 102 | }
|
---|
| 103 |
|
---|
| 104 | static token_t *get_word(void)
|
---|
| 105 | /* Read one word, an identifier, a number, a label, or a mnemonic. */
|
---|
| 106 | {
|
---|
| 107 | token_t *w;
|
---|
| 108 | char *name;
|
---|
| 109 | size_t i, len;
|
---|
| 110 |
|
---|
| 111 | i= 0;
|
---|
| 112 | len= 16;
|
---|
| 113 | name= allocate(nil, len * sizeof(name[0]));
|
---|
| 114 |
|
---|
| 115 | while (isidentchar(tc)) {
|
---|
| 116 | name[i++]= tc;
|
---|
| 117 | readtc();
|
---|
| 118 | if (i == len) name= allocate(name, (len*= 2) * sizeof(name[0]));
|
---|
| 119 | }
|
---|
| 120 | name[i]= 0;
|
---|
| 121 | name= allocate(name, (i+1) * sizeof(name[0]));
|
---|
| 122 | w= new_token();
|
---|
| 123 | w->type= T_WORD;
|
---|
| 124 | w->name= name;
|
---|
| 125 | w->len= i;
|
---|
| 126 | return w;
|
---|
| 127 | }
|
---|
| 128 |
|
---|
| 129 | static token_t *get_string(void)
|
---|
| 130 | /* Read a single or double quotes delimited string. */
|
---|
| 131 | {
|
---|
| 132 | token_t *s;
|
---|
| 133 | int quote;
|
---|
| 134 | char *str;
|
---|
| 135 | size_t i, len;
|
---|
| 136 | int n, j;
|
---|
| 137 | int seen;
|
---|
| 138 |
|
---|
| 139 | quote= tc;
|
---|
| 140 | readtc();
|
---|
| 141 |
|
---|
| 142 | i= 0;
|
---|
| 143 | len= 16;
|
---|
| 144 | str= allocate(nil, len * sizeof(str[0]));
|
---|
| 145 |
|
---|
| 146 | while (tc != quote && tc != '\n' && tc != EOF) {
|
---|
| 147 | seen= -1;
|
---|
| 148 | if (tc == '\\') {
|
---|
| 149 | readtc();
|
---|
| 150 | if (tc == '\n' || tc == EOF) break;
|
---|
| 151 |
|
---|
| 152 | switch (tc) {
|
---|
| 153 | case 'a': tc= '\a'; break;
|
---|
| 154 | case 'b': tc= '\b'; break;
|
---|
| 155 | case 'f': tc= '\f'; break;
|
---|
| 156 | case 'n': tc= '\n'; break;
|
---|
| 157 | case 'r': tc= '\r'; break;
|
---|
| 158 | case 't': tc= '\t'; break;
|
---|
| 159 | case 'v': tc= '\v'; break;
|
---|
| 160 | case 'x':
|
---|
| 161 | n= 0;
|
---|
| 162 | for (j= 0; j < 3; j++) {
|
---|
| 163 | readtc();
|
---|
| 164 | if (between('0', tc, '9'))
|
---|
| 165 | tc-= '0' + 0x0;
|
---|
| 166 | else
|
---|
| 167 | if (between('A', tc, 'A'))
|
---|
| 168 | tc-= 'A' + 0xA;
|
---|
| 169 | else
|
---|
| 170 | if (between('a', tc, 'a'))
|
---|
| 171 | tc-= 'a' + 0xa;
|
---|
| 172 | else {
|
---|
| 173 | seen= tc;
|
---|
| 174 | break;
|
---|
| 175 | }
|
---|
| 176 | n= n*0x10 + tc;
|
---|
| 177 | }
|
---|
| 178 | tc= n;
|
---|
| 179 | break;
|
---|
| 180 | default:
|
---|
| 181 | if (!between('0', tc, '9')) break;
|
---|
| 182 | n= 0;
|
---|
| 183 | for (j= 0; j < 3; j++) {
|
---|
| 184 | if (between('0', tc, '9'))
|
---|
| 185 | tc-= '0';
|
---|
| 186 | else {
|
---|
| 187 | seen= tc;
|
---|
| 188 | break;
|
---|
| 189 | }
|
---|
| 190 | n= n*010 + tc;
|
---|
| 191 | readtc();
|
---|
| 192 | }
|
---|
| 193 | tc= n;
|
---|
| 194 | }
|
---|
| 195 | }
|
---|
| 196 | str[i++]= tc;
|
---|
| 197 | if (i == len) str= allocate(str, (len*= 2) * sizeof(str[0]));
|
---|
| 198 |
|
---|
| 199 | if (seen < 0) readtc(); else tc= seen;
|
---|
| 200 | }
|
---|
| 201 |
|
---|
| 202 | if (tc == quote) {
|
---|
| 203 | readtc();
|
---|
| 204 | } else {
|
---|
| 205 | parse_err(1, nil, "string contains newline\n");
|
---|
| 206 | }
|
---|
| 207 | str[i]= 0;
|
---|
| 208 | str= allocate(str, (i+1) * sizeof(str[0]));
|
---|
| 209 | s= new_token();
|
---|
| 210 | s->type= T_STRING;
|
---|
| 211 | s->name= str;
|
---|
| 212 | s->len= i;
|
---|
| 213 | return s;
|
---|
| 214 | }
|
---|
| 215 |
|
---|
| 216 | static int old_n= 0; /* To speed up n, n+1, n+2, ... accesses. */
|
---|
| 217 | static token_t **old_ptq= &tq;
|
---|
| 218 |
|
---|
| 219 | token_t *get_token(int n)
|
---|
| 220 | /* Return the n-th token on the input queue. */
|
---|
| 221 | {
|
---|
| 222 | token_t *t, **ptq;
|
---|
| 223 |
|
---|
| 224 | assert(n >= 0);
|
---|
| 225 |
|
---|
| 226 | if (0 && n >= old_n) {
|
---|
| 227 | /* Go forward from the previous point. */
|
---|
| 228 | n-= old_n;
|
---|
| 229 | old_n+= n;
|
---|
| 230 | ptq= old_ptq;
|
---|
| 231 | } else {
|
---|
| 232 | /* Restart from the head of the queue. */
|
---|
| 233 | old_n= n;
|
---|
| 234 | ptq= &tq;
|
---|
| 235 | }
|
---|
| 236 |
|
---|
| 237 | for (;;) {
|
---|
| 238 | if ((t= *ptq) == nil) {
|
---|
| 239 | /* Token queue doesn't have element <n>, read a
|
---|
| 240 | * new token from the input stream.
|
---|
| 241 | */
|
---|
| 242 | while (isspace(tc) || iscomment(tc)) {
|
---|
| 243 | if (iscomment(tc)) {
|
---|
| 244 | while (tc != '\n' && tc != EOF)
|
---|
| 245 | readtc();
|
---|
| 246 | } else {
|
---|
| 247 | readtc();
|
---|
| 248 | }
|
---|
| 249 | }
|
---|
| 250 |
|
---|
| 251 | if (tc == EOF) {
|
---|
| 252 | t= new_token();
|
---|
| 253 | t->type= T_EOF;
|
---|
| 254 | } else
|
---|
| 255 | if (isidentchar(tc)) {
|
---|
| 256 | t= get_word();
|
---|
| 257 | } else
|
---|
| 258 | if (tc == '\'' || tc == '"') {
|
---|
| 259 | t= get_string();
|
---|
| 260 | } else {
|
---|
| 261 | if (tc == '\n') tc= ';';
|
---|
| 262 | t= new_token();
|
---|
| 263 | t->type= T_CHAR;
|
---|
| 264 | t->symbol= tc;
|
---|
| 265 | readtc();
|
---|
| 266 | if (t->symbol == '<' && tc == '<') {
|
---|
| 267 | t->symbol= S_LEFTSHIFT;
|
---|
| 268 | readtc();
|
---|
| 269 | } else
|
---|
| 270 | if (t->symbol == '>' && tc == '>') {
|
---|
| 271 | t->symbol= S_RIGHTSHIFT;
|
---|
| 272 | readtc();
|
---|
| 273 | }
|
---|
| 274 | }
|
---|
| 275 | *ptq= t;
|
---|
| 276 | }
|
---|
| 277 | if (n == 0) break;
|
---|
| 278 | n--;
|
---|
| 279 | ptq= &t->next;
|
---|
| 280 | }
|
---|
| 281 | old_ptq= ptq;
|
---|
| 282 | return t;
|
---|
| 283 | }
|
---|
| 284 |
|
---|
| 285 | void skip_token(int n)
|
---|
| 286 | /* Remove n tokens from the input queue. One is not allowed to skip unread
|
---|
| 287 | * tokens.
|
---|
| 288 | */
|
---|
| 289 | {
|
---|
| 290 | token_t *junk;
|
---|
| 291 |
|
---|
| 292 | assert(n >= 0);
|
---|
| 293 |
|
---|
| 294 | while (n > 0) {
|
---|
| 295 | assert(tq != nil);
|
---|
| 296 |
|
---|
| 297 | junk= tq;
|
---|
| 298 | tq= tq->next;
|
---|
| 299 | deallocate(junk->name);
|
---|
| 300 | deallocate(junk);
|
---|
| 301 | n--;
|
---|
| 302 | }
|
---|
| 303 | /* Reset the old reference. */
|
---|
| 304 | old_n= 0;
|
---|
| 305 | old_ptq= &tq;
|
---|
| 306 | }
|
---|