| [9] | 1 | /*      tokenize.c - split input into tokens            Author: Kees J. Bot | 
|---|
|  | 2 | *                                                              13 Dec 1993 | 
|---|
|  | 3 | */ | 
|---|
|  | 4 | #define nil 0 | 
|---|
|  | 5 | #include <stdio.h> | 
|---|
|  | 6 | #include <stdarg.h> | 
|---|
|  | 7 | #include <stdlib.h> | 
|---|
|  | 8 | #include <string.h> | 
|---|
|  | 9 | #include <assert.h> | 
|---|
|  | 10 | #include "asmconv.h" | 
|---|
|  | 11 | #include "token.h" | 
|---|
|  | 12 |  | 
|---|
|  | 13 | static FILE *tf; | 
|---|
|  | 14 | static char *tfile; | 
|---|
|  | 15 | static char *orig_tfile; | 
|---|
|  | 16 | static int tcomment; | 
|---|
|  | 17 | static int tc; | 
|---|
|  | 18 | static long tline; | 
|---|
|  | 19 | static token_t *tq; | 
|---|
|  | 20 |  | 
|---|
|  | 21 | static void readtc(void) | 
|---|
|  | 22 | /* Read one character from the input file and put it in the global 'tc'. */ | 
|---|
|  | 23 | { | 
|---|
|  | 24 | static int nl= 0; | 
|---|
|  | 25 |  | 
|---|
|  | 26 | if (nl) tline++; | 
|---|
|  | 27 | if ((tc= getc(tf)) == EOF && ferror(tf)) fatal(orig_tfile); | 
|---|
|  | 28 | nl= (tc == '\n'); | 
|---|
|  | 29 | } | 
|---|
|  | 30 |  | 
|---|
|  | 31 | void set_file(char *file, long line) | 
|---|
|  | 32 | /* Set file name and line number, changed by a preprocessor trick. */ | 
|---|
|  | 33 | { | 
|---|
|  | 34 | deallocate(tfile); | 
|---|
|  | 35 | tfile= allocate(nil, (strlen(file) + 1) * sizeof(tfile[0])); | 
|---|
|  | 36 | strcpy(tfile, file); | 
|---|
|  | 37 | tline= line; | 
|---|
|  | 38 | } | 
|---|
|  | 39 |  | 
|---|
|  | 40 | void get_file(char **file, long *line) | 
|---|
|  | 41 | /* Get file name and line number. */ | 
|---|
|  | 42 | { | 
|---|
|  | 43 | *file= tfile; | 
|---|
|  | 44 | *line= tline; | 
|---|
|  | 45 | } | 
|---|
|  | 46 |  | 
|---|
|  | 47 | void parse_err(int err, token_t *t, const char *fmt, ...) | 
|---|
|  | 48 | /* Report a parsing error. */ | 
|---|
|  | 49 | { | 
|---|
|  | 50 | va_list ap; | 
|---|
|  | 51 |  | 
|---|
|  | 52 | fprintf(stderr, "\"%s\", line %ld: ", tfile, | 
|---|
|  | 53 | t == nil ? tline : t->line); | 
|---|
|  | 54 | va_start(ap, fmt); | 
|---|
|  | 55 | vfprintf(stderr, fmt, ap); | 
|---|
|  | 56 | va_end(ap); | 
|---|
|  | 57 | if (err) set_error(); | 
|---|
|  | 58 | } | 
|---|
|  | 59 |  | 
|---|
|  | 60 | void tok_init(char *file, int comment) | 
|---|
|  | 61 | /* Open the file to tokenize and initialize the tokenizer. */ | 
|---|
|  | 62 | { | 
|---|
|  | 63 | if (file == nil) { | 
|---|
|  | 64 | file= "stdin"; | 
|---|
|  | 65 | tf= stdin; | 
|---|
|  | 66 | } else { | 
|---|
|  | 67 | if ((tf= fopen(file, "r")) == nil) fatal(file); | 
|---|
|  | 68 | } | 
|---|
|  | 69 | orig_tfile= file; | 
|---|
|  | 70 | set_file(file, 1); | 
|---|
|  | 71 | readtc(); | 
|---|
|  | 72 | tcomment= comment; | 
|---|
|  | 73 | } | 
|---|
|  | 74 |  | 
|---|
|  | 75 | static int isspace(int c) | 
|---|
|  | 76 | { | 
|---|
|  | 77 | return between('\0', c, ' ') && c != '\n'; | 
|---|
|  | 78 | } | 
|---|
|  | 79 |  | 
|---|
|  | 80 | #define iscomment(c)    ((c) == tcomment) | 
|---|
|  | 81 |  | 
|---|
|  | 82 | static int isidentchar(int c) | 
|---|
|  | 83 | { | 
|---|
|  | 84 | return between('a', c, 'z') | 
|---|
|  | 85 | || between('A', c, 'Z') | 
|---|
|  | 86 | || between('0', c, '9') | 
|---|
|  | 87 | || c == '.' | 
|---|
|  | 88 | || c == '_' | 
|---|
|  | 89 | ; | 
|---|
|  | 90 | } | 
|---|
|  | 91 |  | 
|---|
|  | 92 | static token_t *new_token(void) | 
|---|
|  | 93 | { | 
|---|
|  | 94 | token_t *new; | 
|---|
|  | 95 |  | 
|---|
|  | 96 | new= allocate(nil, sizeof(*new)); | 
|---|
|  | 97 | new->next= nil; | 
|---|
|  | 98 | new->line= tline; | 
|---|
|  | 99 | new->name= nil; | 
|---|
|  | 100 | new->symbol= -1; | 
|---|
|  | 101 | return new; | 
|---|
|  | 102 | } | 
|---|
|  | 103 |  | 
|---|
|  | 104 | static token_t *get_word(void) | 
|---|
|  | 105 | /* Read one word, an identifier, a number, a label, or a mnemonic. */ | 
|---|
|  | 106 | { | 
|---|
|  | 107 | token_t *w; | 
|---|
|  | 108 | char *name; | 
|---|
|  | 109 | size_t i, len; | 
|---|
|  | 110 |  | 
|---|
|  | 111 | i= 0; | 
|---|
|  | 112 | len= 16; | 
|---|
|  | 113 | name= allocate(nil, len * sizeof(name[0])); | 
|---|
|  | 114 |  | 
|---|
|  | 115 | while (isidentchar(tc)) { | 
|---|
|  | 116 | name[i++]= tc; | 
|---|
|  | 117 | readtc(); | 
|---|
|  | 118 | if (i == len) name= allocate(name, (len*= 2) * sizeof(name[0])); | 
|---|
|  | 119 | } | 
|---|
|  | 120 | name[i]= 0; | 
|---|
|  | 121 | name= allocate(name, (i+1) * sizeof(name[0])); | 
|---|
|  | 122 | w= new_token(); | 
|---|
|  | 123 | w->type= T_WORD; | 
|---|
|  | 124 | w->name= name; | 
|---|
|  | 125 | w->len= i; | 
|---|
|  | 126 | return w; | 
|---|
|  | 127 | } | 
|---|
|  | 128 |  | 
|---|
|  | 129 | static token_t *get_string(void) | 
|---|
|  | 130 | /* Read a single or double quotes delimited string. */ | 
|---|
|  | 131 | { | 
|---|
|  | 132 | token_t *s; | 
|---|
|  | 133 | int quote; | 
|---|
|  | 134 | char *str; | 
|---|
|  | 135 | size_t i, len; | 
|---|
|  | 136 | int n, j; | 
|---|
|  | 137 | int seen; | 
|---|
|  | 138 |  | 
|---|
|  | 139 | quote= tc; | 
|---|
|  | 140 | readtc(); | 
|---|
|  | 141 |  | 
|---|
|  | 142 | i= 0; | 
|---|
|  | 143 | len= 16; | 
|---|
|  | 144 | str= allocate(nil, len * sizeof(str[0])); | 
|---|
|  | 145 |  | 
|---|
|  | 146 | while (tc != quote && tc != '\n' && tc != EOF) { | 
|---|
|  | 147 | seen= -1; | 
|---|
|  | 148 | if (tc == '\\') { | 
|---|
|  | 149 | readtc(); | 
|---|
|  | 150 | if (tc == '\n' || tc == EOF) break; | 
|---|
|  | 151 |  | 
|---|
|  | 152 | switch (tc) { | 
|---|
|  | 153 | case 'a':       tc= '\a'; break; | 
|---|
|  | 154 | case 'b':       tc= '\b'; break; | 
|---|
|  | 155 | case 'f':       tc= '\f'; break; | 
|---|
|  | 156 | case 'n':       tc= '\n'; break; | 
|---|
|  | 157 | case 'r':       tc= '\r'; break; | 
|---|
|  | 158 | case 't':       tc= '\t'; break; | 
|---|
|  | 159 | case 'v':       tc= '\v'; break; | 
|---|
|  | 160 | case 'x': | 
|---|
|  | 161 | n= 0; | 
|---|
|  | 162 | for (j= 0; j < 3; j++) { | 
|---|
|  | 163 | readtc(); | 
|---|
|  | 164 | if (between('0', tc, '9')) | 
|---|
|  | 165 | tc-= '0' + 0x0; | 
|---|
|  | 166 | else | 
|---|
|  | 167 | if (between('A', tc, 'A')) | 
|---|
|  | 168 | tc-= 'A' + 0xA; | 
|---|
|  | 169 | else | 
|---|
|  | 170 | if (between('a', tc, 'a')) | 
|---|
|  | 171 | tc-= 'a' + 0xa; | 
|---|
|  | 172 | else { | 
|---|
|  | 173 | seen= tc; | 
|---|
|  | 174 | break; | 
|---|
|  | 175 | } | 
|---|
|  | 176 | n= n*0x10 + tc; | 
|---|
|  | 177 | } | 
|---|
|  | 178 | tc= n; | 
|---|
|  | 179 | break; | 
|---|
|  | 180 | default: | 
|---|
|  | 181 | if (!between('0', tc, '9')) break; | 
|---|
|  | 182 | n= 0; | 
|---|
|  | 183 | for (j= 0; j < 3; j++) { | 
|---|
|  | 184 | if (between('0', tc, '9')) | 
|---|
|  | 185 | tc-= '0'; | 
|---|
|  | 186 | else { | 
|---|
|  | 187 | seen= tc; | 
|---|
|  | 188 | break; | 
|---|
|  | 189 | } | 
|---|
|  | 190 | n= n*010 + tc; | 
|---|
|  | 191 | readtc(); | 
|---|
|  | 192 | } | 
|---|
|  | 193 | tc= n; | 
|---|
|  | 194 | } | 
|---|
|  | 195 | } | 
|---|
|  | 196 | str[i++]= tc; | 
|---|
|  | 197 | if (i == len) str= allocate(str, (len*= 2) * sizeof(str[0])); | 
|---|
|  | 198 |  | 
|---|
|  | 199 | if (seen < 0) readtc(); else tc= seen; | 
|---|
|  | 200 | } | 
|---|
|  | 201 |  | 
|---|
|  | 202 | if (tc == quote) { | 
|---|
|  | 203 | readtc(); | 
|---|
|  | 204 | } else { | 
|---|
|  | 205 | parse_err(1, nil, "string contains newline\n"); | 
|---|
|  | 206 | } | 
|---|
|  | 207 | str[i]= 0; | 
|---|
|  | 208 | str= allocate(str, (i+1) * sizeof(str[0])); | 
|---|
|  | 209 | s= new_token(); | 
|---|
|  | 210 | s->type= T_STRING; | 
|---|
|  | 211 | s->name= str; | 
|---|
|  | 212 | s->len= i; | 
|---|
|  | 213 | return s; | 
|---|
|  | 214 | } | 
|---|
|  | 215 |  | 
|---|
|  | 216 | static int old_n= 0;            /* To speed up n, n+1, n+2, ... accesses. */ | 
|---|
|  | 217 | static token_t **old_ptq= &tq; | 
|---|
|  | 218 |  | 
|---|
|  | 219 | token_t *get_token(int n) | 
|---|
|  | 220 | /* Return the n-th token on the input queue. */ | 
|---|
|  | 221 | { | 
|---|
|  | 222 | token_t *t, **ptq; | 
|---|
|  | 223 |  | 
|---|
|  | 224 | assert(n >= 0); | 
|---|
|  | 225 |  | 
|---|
|  | 226 | if (0 && n >= old_n) { | 
|---|
|  | 227 | /* Go forward from the previous point. */ | 
|---|
|  | 228 | n-= old_n; | 
|---|
|  | 229 | old_n+= n; | 
|---|
|  | 230 | ptq= old_ptq; | 
|---|
|  | 231 | } else { | 
|---|
|  | 232 | /* Restart from the head of the queue. */ | 
|---|
|  | 233 | old_n= n; | 
|---|
|  | 234 | ptq= &tq; | 
|---|
|  | 235 | } | 
|---|
|  | 236 |  | 
|---|
|  | 237 | for (;;) { | 
|---|
|  | 238 | if ((t= *ptq) == nil) { | 
|---|
|  | 239 | /* Token queue doesn't have element <n>, read a | 
|---|
|  | 240 | * new token from the input stream. | 
|---|
|  | 241 | */ | 
|---|
|  | 242 | while (isspace(tc) || iscomment(tc)) { | 
|---|
|  | 243 | if (iscomment(tc)) { | 
|---|
|  | 244 | while (tc != '\n' && tc != EOF) | 
|---|
|  | 245 | readtc(); | 
|---|
|  | 246 | } else { | 
|---|
|  | 247 | readtc(); | 
|---|
|  | 248 | } | 
|---|
|  | 249 | } | 
|---|
|  | 250 |  | 
|---|
|  | 251 | if (tc == EOF) { | 
|---|
|  | 252 | t= new_token(); | 
|---|
|  | 253 | t->type= T_EOF; | 
|---|
|  | 254 | } else | 
|---|
|  | 255 | if (isidentchar(tc)) { | 
|---|
|  | 256 | t= get_word(); | 
|---|
|  | 257 | } else | 
|---|
|  | 258 | if (tc == '\'' || tc == '"') { | 
|---|
|  | 259 | t= get_string(); | 
|---|
|  | 260 | } else { | 
|---|
|  | 261 | if (tc == '\n') tc= ';'; | 
|---|
|  | 262 | t= new_token(); | 
|---|
|  | 263 | t->type= T_CHAR; | 
|---|
|  | 264 | t->symbol= tc; | 
|---|
|  | 265 | readtc(); | 
|---|
|  | 266 | if (t->symbol == '<' && tc == '<') { | 
|---|
|  | 267 | t->symbol= S_LEFTSHIFT; | 
|---|
|  | 268 | readtc(); | 
|---|
|  | 269 | } else | 
|---|
|  | 270 | if (t->symbol == '>' && tc == '>') { | 
|---|
|  | 271 | t->symbol= S_RIGHTSHIFT; | 
|---|
|  | 272 | readtc(); | 
|---|
|  | 273 | } | 
|---|
|  | 274 | } | 
|---|
|  | 275 | *ptq= t; | 
|---|
|  | 276 | } | 
|---|
|  | 277 | if (n == 0) break; | 
|---|
|  | 278 | n--; | 
|---|
|  | 279 | ptq= &t->next; | 
|---|
|  | 280 | } | 
|---|
|  | 281 | old_ptq= ptq; | 
|---|
|  | 282 | return t; | 
|---|
|  | 283 | } | 
|---|
|  | 284 |  | 
|---|
|  | 285 | void skip_token(int n) | 
|---|
|  | 286 | /* Remove n tokens from the input queue.  One is not allowed to skip unread | 
|---|
|  | 287 | * tokens. | 
|---|
|  | 288 | */ | 
|---|
|  | 289 | { | 
|---|
|  | 290 | token_t *junk; | 
|---|
|  | 291 |  | 
|---|
|  | 292 | assert(n >= 0); | 
|---|
|  | 293 |  | 
|---|
|  | 294 | while (n > 0) { | 
|---|
|  | 295 | assert(tq != nil); | 
|---|
|  | 296 |  | 
|---|
|  | 297 | junk= tq; | 
|---|
|  | 298 | tq= tq->next; | 
|---|
|  | 299 | deallocate(junk->name); | 
|---|
|  | 300 | deallocate(junk); | 
|---|
|  | 301 | n--; | 
|---|
|  | 302 | } | 
|---|
|  | 303 | /* Reset the old reference. */ | 
|---|
|  | 304 | old_n= 0; | 
|---|
|  | 305 | old_ptq= &tq; | 
|---|
|  | 306 | } | 
|---|