source: trunk/minix/commands/i386/asmconv/parse_bas.c@ 15

Last change on this file since 15 was 9, checked in by Mattia Monga, 14 years ago

Minix 3.1.2a

File size: 22.5 KB
Line 
1/* parse_bas.c - parse BCC AS assembly Author: Kees J. Bot
2 * 13 Nov 1994
3 */
4#define nil 0
5#include <stdio.h>
6#include <stdlib.h>
7#include <string.h>
8#include <assert.h>
9#include "asmconv.h"
10#include "token.h"
11#include "asm86.h"
12#include "languages.h"
13
14typedef struct mnemonic { /* BAS mnemonics translation table. */
15 char *name;
16 opcode_t opcode;
17 optype_t optype;
18} mnemonic_t;
19
20static mnemonic_t mnemtab[] = { /* This array is sorted. */
21 { ".align", DOT_ALIGN, PSEUDO },
22 { ".ascii", DOT_ASCII, PSEUDO },
23 { ".asciz", DOT_ASCIZ, PSEUDO },
24 { ".assert", DOT_ASSERT, PSEUDO },
25 { ".base", DOT_BASE, PSEUDO },
26 { ".blkb", DOT_SPACE, PSEUDO },
27 { ".bss", DOT_BSS, PSEUDO },
28 { ".byte", DOT_DATA1, PSEUDO },
29 { ".comm", DOT_COMM, PSEUDO },
30 { ".data", DOT_DATA, PSEUDO },
31 { ".define", DOT_DEFINE, PSEUDO },
32 { ".end", DOT_END, PSEUDO },
33 { ".even", DOT_ALIGN, PSEUDO },
34 { ".extern", DOT_EXTERN, PSEUDO },
35 { ".file", DOT_FILE, PSEUDO },
36 { ".globl", DOT_DEFINE, PSEUDO },
37 { ".lcomm", DOT_LCOMM, PSEUDO },
38 { ".line", DOT_LINE, PSEUDO },
39 { ".list", DOT_LIST, PSEUDO },
40 { ".long", DOT_DATA4, PSEUDO },
41 { ".nolist", DOT_NOLIST, PSEUDO },
42 { ".rom", DOT_ROM, PSEUDO },
43 { ".space", DOT_SPACE, PSEUDO },
44 { ".symb", DOT_SYMB, PSEUDO },
45 { ".text", DOT_TEXT, PSEUDO },
46 { ".use16", DOT_USE16, PSEUDO },
47 { ".use32", DOT_USE32, PSEUDO },
48 { ".word", DOT_DATA2, PSEUDO },
49 { ".zerob", DOT_SPACE, PSEUDO },
50 { ".zerow", DOT_SPACE, PSEUDO },
51 { "aaa", AAA, WORD },
52 { "aad", AAD, WORD },
53 { "aam", AAM, WORD },
54 { "aas", AAS, WORD },
55 { "adc", ADC, WORD },
56 { "add", ADD, WORD },
57 { "and", AND, WORD },
58 { "arpl", ARPL, WORD },
59 { "bc", JB, JUMP },
60 { "beq", JE, JUMP },
61 { "bge", JGE, JUMP },
62 { "bgt", JG, JUMP },
63 { "bhi", JA, JUMP },
64 { "bhis", JAE, JUMP },
65 { "ble", JLE, JUMP },
66 { "blo", JB, JUMP },
67 { "blos", JBE, JUMP },
68 { "blt", JL, JUMP },
69 { "bnc", JAE, JUMP },
70 { "bne", JNE, JUMP },
71 { "bound", BOUND, WORD },
72 { "br", JMP, JUMP },
73 { "bsf", BSF, WORD },
74 { "bsr", BSR, WORD },
75 { "bswap", BSWAP, WORD },
76 { "bt", BT, WORD },
77 { "btc", BTC, WORD },
78 { "btr", BTR, WORD },
79 { "bts", BTS, WORD },
80 { "bz", JE, JUMP },
81 { "call", CALL, JUMP },
82 { "callf", CALLF, JUMP },
83 { "cbw", CBW, WORD },
84 { "cdq", CWD, WORD },
85 { "clc", CLC, WORD },
86 { "cld", CLD, WORD },
87 { "cli", CLI, WORD },
88 { "clts", CLTS, WORD },
89 { "cmc", CMC, WORD },
90 { "cmp", CMP, WORD },
91 { "cmps", CMPS, WORD },
92 { "cmpsb", CMPS, BYTE },
93 { "cmpxchg", CMPXCHG, WORD },
94 { "cwd", CWD, WORD },
95 { "cwde", CBW, WORD },
96 { "daa", DAA, WORD },
97 { "das", DAS, WORD },
98 { "dd", DOT_DATA4, PSEUDO },
99 { "dec", DEC, WORD },
100 { "div", DIV, WORD },
101 { "enter", ENTER, WORD },
102 { "export", DOT_DEFINE, PSEUDO },
103 { "f2xm1", F2XM1, WORD },
104 { "fabs", FABS, WORD },
105 { "fadd", FADD, WORD },
106 { "faddd", FADDD, WORD },
107 { "faddp", FADDP, WORD },
108 { "fadds", FADDS, WORD },
109 { "fbld", FBLD, WORD },
110 { "fbstp", FBSTP, WORD },
111 { "fchs", FCHS, WORD },
112 { "fclex", FCLEX, WORD },
113 { "fcomd", FCOMD, WORD },
114 { "fcompd", FCOMPD, WORD },
115 { "fcompp", FCOMPP, WORD },
116 { "fcomps", FCOMPS, WORD },
117 { "fcoms", FCOMS, WORD },
118 { "fcos", FCOS, WORD },
119 { "fdecstp", FDECSTP, WORD },
120 { "fdivd", FDIVD, WORD },
121 { "fdivp", FDIVP, WORD },
122 { "fdivrd", FDIVRD, WORD },
123 { "fdivrp", FDIVRP, WORD },
124 { "fdivrs", FDIVRS, WORD },
125 { "fdivs", FDIVS, WORD },
126 { "ffree", FFREE, WORD },
127 { "fiaddl", FIADDL, WORD },
128 { "fiadds", FIADDS, WORD },
129 { "ficom", FICOM, WORD },
130 { "ficomp", FICOMP, WORD },
131 { "fidivl", FIDIVL, WORD },
132 { "fidivrl", FIDIVRL, WORD },
133 { "fidivrs", FIDIVRS, WORD },
134 { "fidivs", FIDIVS, WORD },
135 { "fildl", FILDL, WORD },
136 { "fildq", FILDQ, WORD },
137 { "filds", FILDS, WORD },
138 { "fimull", FIMULL, WORD },
139 { "fimuls", FIMULS, WORD },
140 { "fincstp", FINCSTP, WORD },
141 { "finit", FINIT, WORD },
142 { "fistl", FISTL, WORD },
143 { "fistp", FISTP, WORD },
144 { "fists", FISTS, WORD },
145 { "fisubl", FISUBL, WORD },
146 { "fisubrl", FISUBRL, WORD },
147 { "fisubrs", FISUBRS, WORD },
148 { "fisubs", FISUBS, WORD },
149 { "fld1", FLD1, WORD },
150 { "fldcw", FLDCW, WORD },
151 { "fldd", FLDD, WORD },
152 { "fldenv", FLDENV, WORD },
153 { "fldl2e", FLDL2E, WORD },
154 { "fldl2t", FLDL2T, WORD },
155 { "fldlg2", FLDLG2, WORD },
156 { "fldln2", FLDLN2, WORD },
157 { "fldpi", FLDPI, WORD },
158 { "flds", FLDS, WORD },
159 { "fldx", FLDX, WORD },
160 { "fldz", FLDZ, WORD },
161 { "fmuld", FMULD, WORD },
162 { "fmulp", FMULP, WORD },
163 { "fmuls", FMULS, WORD },
164 { "fnop", FNOP, WORD },
165 { "fpatan", FPATAN, WORD },
166 { "fprem", FPREM, WORD },
167 { "fprem1", FPREM1, WORD },
168 { "fptan", FPTAN, WORD },
169 { "frndint", FRNDINT, WORD },
170 { "frstor", FRSTOR, WORD },
171 { "fsave", FSAVE, WORD },
172 { "fscale", FSCALE, WORD },
173 { "fsin", FSIN, WORD },
174 { "fsincos", FSINCOS, WORD },
175 { "fsqrt", FSQRT, WORD },
176 { "fstcw", FSTCW, WORD },
177 { "fstd", FSTD, WORD },
178 { "fstenv", FSTENV, WORD },
179 { "fstpd", FSTPD, WORD },
180 { "fstps", FSTPS, WORD },
181 { "fstpx", FSTPX, WORD },
182 { "fsts", FSTS, WORD },
183 { "fstsw", FSTSW, WORD },
184 { "fsubd", FSUBD, WORD },
185 { "fsubp", FSUBP, WORD },
186 { "fsubpr", FSUBPR, WORD },
187 { "fsubrd", FSUBRD, WORD },
188 { "fsubrs", FSUBRS, WORD },
189 { "fsubs", FSUBS, WORD },
190 { "ftst", FTST, WORD },
191 { "fucom", FUCOM, WORD },
192 { "fucomp", FUCOMP, WORD },
193 { "fucompp", FUCOMPP, WORD },
194 { "fxam", FXAM, WORD },
195 { "fxch", FXCH, WORD },
196 { "fxtract", FXTRACT, WORD },
197 { "fyl2x", FYL2X, WORD },
198 { "fyl2xp1", FYL2XP1, WORD },
199 { "hlt", HLT, WORD },
200 { "idiv", IDIV, WORD },
201 { "imul", IMUL, WORD },
202 { "in", IN, WORD },
203 { "inb", IN, BYTE },
204 { "inc", INC, WORD },
205 { "ins", INS, WORD },
206 { "insb", INS, BYTE },
207 { "int", INT, WORD },
208 { "into", INTO, JUMP },
209 { "invd", INVD, WORD },
210 { "invlpg", INVLPG, WORD },
211 { "iret", IRET, JUMP },
212 { "iretd", IRETD, JUMP },
213 { "j", JMP, JUMP },
214 { "ja", JA, JUMP },
215 { "jae", JAE, JUMP },
216 { "jb", JB, JUMP },
217 { "jbe", JBE, JUMP },
218 { "jc", JB, JUMP },
219 { "jcxz", JCXZ, JUMP },
220 { "je", JE, JUMP },
221 { "jecxz", JCXZ, JUMP },
222 { "jeq", JE, JUMP },
223 { "jg", JG, JUMP },
224 { "jge", JGE, JUMP },
225 { "jgt", JG, JUMP },
226 { "jhi", JA, JUMP },
227 { "jhis", JAE, JUMP },
228 { "jl", JL, JUMP },
229 { "jle", JLE, JUMP },
230 { "jlo", JB, JUMP },
231 { "jlos", JBE, JUMP },
232 { "jlt", JL, JUMP },
233 { "jmp", JMP, JUMP },
234 { "jmpf", JMPF, JUMP },
235 { "jna", JBE, JUMP },
236 { "jnae", JB, JUMP },
237 { "jnb", JAE, JUMP },
238 { "jnbe", JA, JUMP },
239 { "jnc", JAE, JUMP },
240 { "jne", JNE, JUMP },
241 { "jng", JLE, JUMP },
242 { "jnge", JL, JUMP },
243 { "jnl", JGE, JUMP },
244 { "jnle", JG, JUMP },
245 { "jno", JNO, JUMP },
246 { "jnp", JNP, JUMP },
247 { "jns", JNS, JUMP },
248 { "jnz", JNE, JUMP },
249 { "jo", JO, JUMP },
250 { "jp", JP, JUMP },
251 { "js", JS, JUMP },
252 { "jz", JE, JUMP },
253 { "lahf", LAHF, WORD },
254 { "lar", LAR, WORD },
255 { "lds", LDS, WORD },
256 { "lea", LEA, WORD },
257 { "leave", LEAVE, WORD },
258 { "les", LES, WORD },
259 { "lfs", LFS, WORD },
260 { "lgdt", LGDT, WORD },
261 { "lgs", LGS, WORD },
262 { "lidt", LIDT, WORD },
263 { "lldt", LLDT, WORD },
264 { "lmsw", LMSW, WORD },
265 { "lock", LOCK, WORD },
266 { "lods", LODS, WORD },
267 { "lodsb", LODS, BYTE },
268 { "loop", LOOP, JUMP },
269 { "loope", LOOPE, JUMP },
270 { "loopne", LOOPNE, JUMP },
271 { "loopnz", LOOPNE, JUMP },
272 { "loopz", LOOPE, JUMP },
273 { "lsl", LSL, WORD },
274 { "lss", LSS, WORD },
275 { "ltr", LTR, WORD },
276 { "mov", MOV, WORD },
277 { "movs", MOVS, WORD },
278 { "movsb", MOVS, BYTE },
279 { "movsx", MOVSX, WORD },
280 { "movzx", MOVZX, WORD },
281 { "mul", MUL, WORD },
282 { "neg", NEG, WORD },
283 { "nop", NOP, WORD },
284 { "not", NOT, WORD },
285 { "or", OR, WORD },
286 { "out", OUT, WORD },
287 { "outb", OUT, BYTE },
288 { "outs", OUTS, WORD },
289 { "outsb", OUTS, BYTE },
290 { "pop", POP, WORD },
291 { "popa", POPA, WORD },
292 { "popad", POPA, WORD },
293 { "popf", POPF, WORD },
294 { "popfd", POPF, WORD },
295 { "push", PUSH, WORD },
296 { "pusha", PUSHA, WORD },
297 { "pushad", PUSHA, WORD },
298 { "pushf", PUSHF, WORD },
299 { "pushfd", PUSHF, WORD },
300 { "rcl", RCL, WORD },
301 { "rcr", RCR, WORD },
302 { "ret", RET, JUMP },
303 { "retf", RETF, JUMP },
304 { "rol", ROL, WORD },
305 { "ror", ROR, WORD },
306 { "sahf", SAHF, WORD },
307 { "sal", SAL, WORD },
308 { "sar", SAR, WORD },
309 { "sbb", SBB, WORD },
310 { "scas", SCAS, WORD },
311 { "seta", SETA, BYTE },
312 { "setae", SETAE, BYTE },
313 { "setb", SETB, BYTE },
314 { "setbe", SETBE, BYTE },
315 { "sete", SETE, BYTE },
316 { "setg", SETG, BYTE },
317 { "setge", SETGE, BYTE },
318 { "setl", SETL, BYTE },
319 { "setna", SETBE, BYTE },
320 { "setnae", SETB, BYTE },
321 { "setnb", SETAE, BYTE },
322 { "setnbe", SETA, BYTE },
323 { "setne", SETNE, BYTE },
324 { "setng", SETLE, BYTE },
325 { "setnge", SETL, BYTE },
326 { "setnl", SETGE, BYTE },
327 { "setnle", SETG, BYTE },
328 { "setno", SETNO, BYTE },
329 { "setnp", SETNP, BYTE },
330 { "setns", SETNS, BYTE },
331 { "seto", SETO, BYTE },
332 { "setp", SETP, BYTE },
333 { "sets", SETS, BYTE },
334 { "setz", SETE, BYTE },
335 { "sgdt", SGDT, WORD },
336 { "shl", SHL, WORD },
337 { "shld", SHLD, WORD },
338 { "shr", SHR, WORD },
339 { "shrd", SHRD, WORD },
340 { "sidt", SIDT, WORD },
341 { "sldt", SLDT, WORD },
342 { "smsw", SMSW, WORD },
343 { "stc", STC, WORD },
344 { "std", STD, WORD },
345 { "sti", STI, WORD },
346 { "stos", STOS, WORD },
347 { "stosb", STOS, BYTE },
348 { "str", STR, WORD },
349 { "sub", SUB, WORD },
350 { "test", TEST, WORD },
351 { "verr", VERR, WORD },
352 { "verw", VERW, WORD },
353 { "wait", WAIT, WORD },
354 { "wbinvd", WBINVD, WORD },
355 { "xadd", XADD, WORD },
356 { "xchg", XCHG, WORD },
357 { "xlat", XLAT, WORD },
358 { "xor", XOR, WORD },
359};
360
361void bas_parse_init(char *file)
362/* Prepare parsing of an BAS assembly file. */
363{
364 tok_init(file, '!');
365}
366
367static void zap(void)
368/* An error, zap the rest of the line. */
369{
370 token_t *t;
371
372 while ((t= get_token(0))->type != T_EOF && t->symbol != ';')
373 skip_token(1);
374}
375
376static mnemonic_t *search_mnem(char *name)
377/* Binary search for a mnemonic. (That's why the table is sorted.) */
378{
379 int low, mid, high;
380 int cmp;
381 mnemonic_t *m;
382
383 low= 0;
384 high= arraysize(mnemtab)-1;
385 while (low <= high) {
386 mid= (low + high) / 2;
387 m= &mnemtab[mid];
388
389 if ((cmp= strcmp(name, m->name)) == 0) return m;
390
391 if (cmp < 0) high= mid-1; else low= mid+1;
392 }
393 return nil;
394}
395
396static expression_t *bas_get_C_expression(int *pn)
397/* Read a "C-like" expression. Note that we don't worry about precedence,
398 * the expression is printed later like it is read. If the target language
399 * does not have all the operators (like ~) then this has to be repaired by
400 * changing the source file. (No problem, you still have one source file
401 * to maintain, not two.)
402 */
403{
404 expression_t *e, *a1, *a2;
405 token_t *t;
406
407 if ((t= get_token(*pn))->symbol == '(') {
408 /* ( expr ): grouping. */
409 (*pn)++;
410 if ((a1= bas_get_C_expression(pn)) == nil) return nil;
411 if (get_token(*pn)->symbol != ')') {
412 parse_err(1, t, "missing )\n");
413 del_expr(a1);
414 return nil;
415 }
416 (*pn)++;
417 e= new_expr();
418 e->operator= '[';
419 e->middle= a1;
420 } else
421 if (t->type == T_WORD || t->type == T_STRING) {
422 /* Label, number, or string. */
423 e= new_expr();
424 e->operator= t->type == T_WORD ? 'W' : 'S';
425 e->name= allocate(nil, (t->len+1) * sizeof(e->name[0]));
426 memcpy(e->name, t->name, t->len+1);
427 e->len= t->len;
428 (*pn)++;
429 } else
430 if (t->symbol == '+' || t->symbol == '-' || t->symbol == '~') {
431 /* Unary operator. */
432 (*pn)++;
433 if ((a1= bas_get_C_expression(pn)) == nil) return nil;
434 e= new_expr();
435 e->operator= t->symbol;
436 e->middle= a1;
437 } else
438 if (t->symbol == '$' && get_token(*pn + 1)->type == T_WORD) {
439 /* A hexadecimal number. */
440 t= get_token(*pn + 1);
441 e= new_expr();
442 e->operator= 'W';
443 e->name= allocate(nil, (t->len+3) * sizeof(e->name[0]));
444 strcpy(e->name, "0x");
445 memcpy(e->name+2, t->name, t->len+1);
446 e->len= t->len+2;
447 (*pn)+= 2;
448 } else {
449 parse_err(1, t, "expression syntax error\n");
450 return nil;
451 }
452
453 switch ((t= get_token(*pn))->symbol) {
454 case '+':
455 case '-':
456 case '*':
457 case '/':
458 case '%':
459 case '&':
460 case '|':
461 case '^':
462 case S_LEFTSHIFT:
463 case S_RIGHTSHIFT:
464 (*pn)++;
465 a1= e;
466 if ((a2= bas_get_C_expression(pn)) == nil) {
467 del_expr(a1);
468 return nil;
469 }
470 e= new_expr();
471 e->operator= t->symbol;
472 e->left= a1;
473 e->right= a2;
474 }
475 return e;
476}
477
478/* We want to know the sizes of the first two operands. */
479static optype_t optypes[2];
480static int op_idx;
481
482static expression_t *bas_get_operand(int *pn)
483/* Get something like: [memory], offset[base+index*scale], or simpler. */
484{
485 expression_t *e, *offset, *base, *index;
486 token_t *t;
487 int c;
488 optype_t optype;
489
490 /* Prefixed by 'byte', 'word' or 'dword'? */
491 if ((t= get_token(*pn))->type == T_WORD && (
492 strcmp(t->name, "byte") == 0
493 || strcmp(t->name, "word") == 0
494 || strcmp(t->name, "dword") == 0)
495 ) {
496 switch (t->name[0]) {
497 case 'b': optype= BYTE; break;
498 case 'w': optype= use16() ? WORD : OWORD; break;
499 case 'd': optype= use32() ? WORD : OWORD; break;
500 }
501 if (op_idx < arraysize(optypes)) optypes[op_idx++]= optype;
502 (*pn)++;
503
504 /* It may even be "byte ptr"... */
505 if ((t= get_token(*pn))->type == T_WORD
506 && strcmp(t->name, "ptr") == 0) {
507 (*pn)++;
508 }
509 }
510
511 /* Is it [memory]? */
512 if (get_token(*pn)->symbol == '['
513 && ((t= get_token(*pn + 1))->type != T_WORD
514 || !isregister(t->name))
515 ) {
516 /* A memory dereference. */
517 (*pn)++;
518 if ((offset= bas_get_C_expression(pn)) == nil) return nil;
519 if (get_token(*pn)->symbol != ']') {
520 parse_err(1, t, "operand syntax error\n");
521 del_expr(offset);
522 return nil;
523 }
524 (*pn)++;
525 e= new_expr();
526 e->operator= '(';
527 e->middle= offset;
528 return e;
529 }
530
531 /* #something? *something? */
532 if ((c= get_token(*pn)->symbol) == '#' || c == '*') {
533 /* '#' and '*' are often used to introduce some constant. */
534 (*pn)++;
535 }
536
537 /* Offset? */
538 if (get_token(*pn)->symbol != '[') {
539 /* There is an offset. */
540 if ((offset= bas_get_C_expression(pn)) == nil) return nil;
541 } else {
542 /* No offset. */
543 offset= nil;
544 }
545
546 /* [base]? [base+? base-? */
547 c= 0;
548 if (get_token(*pn)->symbol == '['
549 && (t= get_token(*pn + 1))->type == T_WORD
550 && isregister(t->name)
551 && ((c= get_token(*pn + 2)->symbol) == ']' || c=='+' || c=='-')
552 ) {
553 /* A base register expression. */
554 base= new_expr();
555 base->operator= 'B';
556 base->name= copystr(t->name);
557 (*pn)+= c == ']' ? 3 : 2;
558 } else {
559 /* No base register expression. */
560 base= nil;
561 }
562
563 /* +offset]? -offset]? */
564 if (offset == nil
565 && (c == '+' || c == '-')
566 && (t= get_token(*pn + 1))->type == T_WORD
567 && !isregister(t->name)
568 ) {
569 (*pn)++;
570 if ((offset= bas_get_C_expression(pn)) == nil) return nil;
571 if (get_token(*pn)->symbol != ']') {
572 parse_err(1, t, "operand syntax error\n");
573 del_expr(offset);
574 del_expr(base);
575 return nil;
576 }
577 (*pn)++;
578 c= 0;
579 }
580
581 /* [index*scale]? +index*scale]? */
582 if (c == '+' || get_token(*pn)->symbol == '[') {
583 /* An index most likely. */
584 token_t *m= nil;
585
586 if (!( /* This must be true: */
587 (t= get_token(*pn + 1))->type == T_WORD
588 && isregister(t->name)
589 && (get_token(*pn + 2)->symbol == ']' || (
590 get_token(*pn + 2)->symbol == '*'
591 && (m= get_token(*pn + 3))->type == T_WORD
592 && strchr("1248", m->name[0]) != nil
593 && m->name[1] == 0
594 && get_token(*pn + 4)->symbol == ']'
595 ))
596 )) {
597 /* Alas it isn't */
598 parse_err(1, t, "operand syntax error\n");
599 del_expr(offset);
600 del_expr(base);
601 return nil;
602 }
603 /* Found an index. */
604 index= new_expr();
605 index->operator= m == nil ? '1' : m->name[0];
606 index->name= copystr(t->name);
607 (*pn)+= (m == nil ? 3 : 5);
608 } else {
609 /* No index. */
610 index= nil;
611 }
612
613 if (base == nil && index == nil) {
614 /* Return a lone offset as is. */
615 e= offset;
616
617 /* Lone registers tell operand size. */
618 if (offset->operator == 'W' && isregister(offset->name)) {
619 switch (isregister(offset->name)) {
620 case 1: optype= BYTE; break;
621 case 2: optype= use16() ? WORD : OWORD; break;
622 case 4: optype= use32() ? WORD : OWORD; break;
623 }
624 if (op_idx < arraysize(optypes))
625 optypes[op_idx++]= optype;
626 }
627 } else {
628 e= new_expr();
629 e->operator= 'O';
630 e->left= offset;
631 e->middle= base;
632 e->right= index;
633 }
634 return e;
635}
636
637static expression_t *bas_get_oplist(int *pn)
638/* Get a comma (or colon for jmpf and callf) separated list of instruction
639 * operands.
640 */
641{
642 expression_t *e, *o1, *o2;
643 token_t *t;
644
645 if ((e= bas_get_operand(pn)) == nil) return nil;
646
647 if ((t= get_token(*pn))->symbol == ',' || t->symbol == ':') {
648 o1= e;
649 (*pn)++;
650 if ((o2= bas_get_oplist(pn)) == nil) {
651 del_expr(o1);
652 return nil;
653 }
654 e= new_expr();
655 e->operator= ',';
656 e->left= o1;
657 e->right= o2;
658 }
659 return e;
660}
661
662static asm86_t *bas_get_statement(void)
663/* Get a pseudo op or machine instruction with arguments. */
664{
665 token_t *t= get_token(0);
666 asm86_t *a;
667 mnemonic_t *m;
668 int n;
669 int prefix_seen;
670
671
672 assert(t->type == T_WORD);
673
674 if (strcmp(t->name, ".sect") == 0) {
675 /* .sect .text etc. Accept only four segment names. */
676 skip_token(1);
677 t= get_token(0);
678 if (t->type != T_WORD || (
679 strcmp(t->name, ".text") != 0
680 && strcmp(t->name, ".rom") != 0
681 && strcmp(t->name, ".data") != 0
682 && strcmp(t->name, ".bss") != 0
683 && strcmp(t->name, ".end") != 0
684 )) {
685 parse_err(1, t, "weird section name to .sect\n");
686 return nil;
687 }
688 }
689 a= new_asm86();
690
691 /* Process instruction prefixes. */
692 for (prefix_seen= 0;; prefix_seen= 1) {
693 if (strcmp(t->name, "rep") == 0
694 || strcmp(t->name, "repe") == 0
695 || strcmp(t->name, "repne") == 0
696 || strcmp(t->name, "repz") == 0
697 || strcmp(t->name, "repnz") == 0
698 ) {
699 if (a->rep != ONCE) {
700 parse_err(1, t,
701 "can't have more than one rep\n");
702 }
703 switch (t->name[3]) {
704 case 0: a->rep= REP; break;
705 case 'e':
706 case 'z': a->rep= REPE; break;
707 case 'n': a->rep= REPNE; break;
708 }
709 } else
710 if (strcmp(t->name, "seg") == 0
711 && get_token(1)->type == T_WORD) {
712 if (a->seg != DEFSEG) {
713 parse_err(1, t,
714 "can't have more than one segment prefix\n");
715 }
716 switch (get_token(1)->name[0]) {
717 case 'c': a->seg= CSEG; break;
718 case 'd': a->seg= DSEG; break;
719 case 'e': a->seg= ESEG; break;
720 case 'f': a->seg= FSEG; break;
721 case 'g': a->seg= GSEG; break;
722 case 's': a->seg= SSEG; break;
723 }
724 skip_token(1);
725 } else
726 if (!prefix_seen) {
727 /* No prefix here, get out! */
728 break;
729 } else {
730 /* No more prefixes, next must be an instruction. */
731 if (t->type != T_WORD
732 || (m= search_mnem(t->name)) == nil
733 || m->optype == PSEUDO
734 ) {
735 parse_err(1, t,
736 "machine instruction expected after instruction prefix\n");
737 del_asm86(a);
738 return nil;
739 }
740 break;
741 }
742
743 /* Skip the prefix and extra newlines. */
744 do {
745 skip_token(1);
746 } while ((t= get_token(0))->symbol == ';');
747 }
748
749 /* All the readahead being done upsets the line counter. */
750 a->line= t->line;
751
752 /* Read a machine instruction or pseudo op. */
753 if ((m= search_mnem(t->name)) == nil) {
754 parse_err(1, t, "unknown instruction '%s'\n", t->name);
755 del_asm86(a);
756 return nil;
757 }
758 a->opcode= m->opcode;
759 a->optype= m->optype;
760 if (a->opcode == CBW || a->opcode == CWD) {
761 a->optype= (strcmp(t->name, "cbw") == 0
762 || strcmp(t->name, "cwd") == 0) == use16() ? WORD : OWORD;
763 }
764 for (op_idx= 0; op_idx < arraysize(optypes); op_idx++)
765 optypes[op_idx]= m->optype;
766 op_idx= 0;
767
768 n= 1;
769 if (get_token(1)->symbol != ';'
770 && (a->args= bas_get_oplist(&n)) == nil) {
771 del_asm86(a);
772 return nil;
773 }
774
775 if (m->optype == WORD) {
776 /* Does one of the operands overide the optype? */
777 for (op_idx= 0; op_idx < arraysize(optypes); op_idx++) {
778 if (optypes[op_idx] != m->optype)
779 a->optype= optypes[op_idx];
780 }
781 }
782
783 if (get_token(n)->symbol != ';') {
784 parse_err(1, t, "garbage at end of instruction\n");
785 del_asm86(a);
786 return nil;
787 }
788 switch (a->opcode) {
789 case DOT_ALIGN:
790 /* Restrict .align to have a single numeric argument, some
791 * assemblers think of the argument as a power of two, so
792 * we need to be able to change the value.
793 */
794 if (strcmp(t->name, ".even") == 0 && a->args == nil) {
795 /* .even becomes .align 2. */
796 expression_t *e;
797 a->args= e= new_expr();
798 e->operator= 'W';
799 e->name= copystr("2");
800 e->len= 2;
801 }
802 if (a->args == nil || a->args->operator != 'W'
803 || !isanumber(a->args->name)) {
804 parse_err(1, t,
805 ".align is restricted to one numeric argument\n");
806 del_asm86(a);
807 return nil;
808 }
809 break;
810 case MOVSX:
811 case MOVZX:
812 /* Types of both operands tell the instruction type. */
813 a->optype= optypes[0];
814 if (optypes[1] == BYTE) {
815 a->opcode= a->opcode == MOVSX ? MOVSXB : MOVZXB;
816 }
817 break;
818 case SAL:
819 case SAR:
820 case SHL:
821 case SHR:
822 case RCL:
823 case RCR:
824 case ROL:
825 case ROR:
826 /* Only the first operand tells the operand size. */
827 a->optype= optypes[0];
828 break;
829 default:;
830 }
831 skip_token(n+1);
832 return a;
833}
834
835asm86_t *bas_get_instruction(void)
836{
837 asm86_t *a= nil;
838 expression_t *e;
839 token_t *t;
840
841 while ((t= get_token(0))->symbol == ';')
842 skip_token(1);
843
844 if (t->type == T_EOF) return nil;
845
846 if (t->symbol == '#') {
847 /* Preprocessor line and file change. */
848
849 if ((t= get_token(1))->type != T_WORD || !isanumber(t->name)
850 || get_token(2)->type != T_STRING
851 ) {
852 parse_err(1, t, "file not preprocessed?\n");
853 zap();
854 } else {
855 set_file(get_token(2)->name,
856 strtol(get_token(1)->name, nil, 0) - 1);
857
858 /* GNU CPP adds extra cruft, simply zap the line. */
859 zap();
860 }
861 a= bas_get_instruction();
862 } else
863 if (t->type == T_WORD && get_token(1)->symbol == ':') {
864 /* A label definition. */
865 a= new_asm86();
866 a->line= t->line;
867 a->opcode= DOT_LABEL;
868 a->optype= PSEUDO;
869 a->args= e= new_expr();
870 e->operator= ':';
871 e->name= copystr(t->name);
872 skip_token(2);
873 } else
874 if (t->type == T_WORD && get_token(1)->symbol == '=') {
875 int n= 2;
876
877 if ((e= bas_get_C_expression(&n)) == nil) {
878 zap();
879 a= bas_get_instruction();
880 } else
881 if (get_token(n)->symbol != ';') {
882 parse_err(1, t, "garbage after assignment\n");
883 zap();
884 a= bas_get_instruction();
885 } else {
886 a= new_asm86();
887 a->line= t->line;
888 a->opcode= DOT_EQU;
889 a->optype= PSEUDO;
890 a->args= new_expr();
891 a->args->operator= '=';
892 a->args->name= copystr(t->name);
893 a->args->middle= e;
894 skip_token(n+1);
895 }
896 } else
897 if (t->type == T_WORD && get_token(1)->type == T_WORD
898 && strcmp(get_token(1)->name, "lcomm") == 0) {
899 /* Local common block definition. */
900 int n= 2;
901
902 if ((e= bas_get_C_expression(&n)) == nil) {
903 zap();
904 a= bas_get_instruction();
905 } else
906 if (get_token(n)->symbol != ';') {
907 parse_err(1, t, "garbage after lcomm\n");
908 zap();
909 a= bas_get_instruction();
910 } else {
911 a= new_asm86();
912 a->line= t->line;
913 a->opcode= DOT_LCOMM;
914 a->optype= PSEUDO;
915 a->args= new_expr();
916 a->args->operator= ',';
917 a->args->right= e;
918 a->args->left= e= new_expr();
919 e->operator= 'W';
920 e->name= copystr(t->name);
921 e->len= strlen(e->name)+1;
922 skip_token(n+1);
923 }
924 } else
925 if (t->type == T_WORD) {
926 if ((a= bas_get_statement()) == nil) {
927 zap();
928 a= bas_get_instruction();
929 }
930 } else {
931 parse_err(1, t, "syntax error\n");
932 zap();
933 a= bas_get_instruction();
934 }
935 if (a->optype == OWORD) {
936 a->optype= WORD;
937 a->oaz|= OPZ;
938 }
939 return a;
940}
Note: See TracBrowser for help on using the repository browser.