source: trunk/minix/commands/i386/asmconv/parse_ack.c@ 9

Last change on this file since 9 was 9, checked in by Mattia Monga, 14 years ago

Minix 3.1.2a

File size: 21.5 KB
Line 
1/* parse_ack.c - parse ACK assembly Author: Kees J. Bot
2 * parse NCC assembly 18 Dec 1993
3 */
4#define nil 0
5#include <stdio.h>
6#include <stdlib.h>
7#include <string.h>
8#include <assert.h>
9#include "asmconv.h"
10#include "token.h"
11#include "asm86.h"
12#include "languages.h"
13
14typedef struct mnemonic { /* ACK as86 mnemonics translation table. */
15 char *name;
16 opcode_t opcode;
17 optype_t optype;
18} mnemonic_t;
19
20static mnemonic_t mnemtab[] = { /* This array is sorted. */
21 { ".align", DOT_ALIGN, PSEUDO },
22 { ".ascii", DOT_ASCII, PSEUDO },
23 { ".asciz", DOT_ASCIZ, PSEUDO },
24 { ".assert", DOT_ASSERT, PSEUDO },
25 { ".base", DOT_BASE, PSEUDO },
26 { ".bss", DOT_BSS, PSEUDO },
27 { ".comm", DOT_LCOMM, PSEUDO },
28 { ".data", DOT_DATA, PSEUDO },
29 { ".data1", DOT_DATA1, PSEUDO },
30 { ".data2", DOT_DATA2, PSEUDO },
31 { ".data4", DOT_DATA4, PSEUDO },
32 { ".define", DOT_DEFINE, PSEUDO },
33 { ".end", DOT_END, PSEUDO },
34 { ".extern", DOT_EXTERN, PSEUDO },
35 { ".file", DOT_FILE, PSEUDO },
36 { ".line", DOT_LINE, PSEUDO },
37 { ".list", DOT_LIST, PSEUDO },
38 { ".nolist", DOT_NOLIST, PSEUDO },
39 { ".rom", DOT_ROM, PSEUDO },
40 { ".space", DOT_SPACE, PSEUDO },
41 { ".symb", DOT_SYMB, PSEUDO },
42 { ".text", DOT_TEXT, PSEUDO },
43 { ".use16", DOT_USE16, PSEUDO },
44 { ".use32", DOT_USE32, PSEUDO },
45 { "aaa", AAA, WORD },
46 { "aad", AAD, WORD },
47 { "aam", AAM, WORD },
48 { "aas", AAS, WORD },
49 { "adc", ADC, WORD },
50 { "adcb", ADC, BYTE },
51 { "add", ADD, WORD },
52 { "addb", ADD, BYTE },
53 { "and", AND, WORD },
54 { "andb", AND, BYTE },
55 { "arpl", ARPL, WORD },
56 { "bound", BOUND, WORD },
57 { "bsf", BSF, WORD },
58 { "bsr", BSR, WORD },
59 { "bswap", BSWAP, WORD },
60 { "bt", BT, WORD },
61 { "btc", BTC, WORD },
62 { "btr", BTR, WORD },
63 { "bts", BTS, WORD },
64 { "call", CALL, JUMP },
65 { "callf", CALLF, JUMP },
66 { "cbw", CBW, WORD },
67 { "cdq", CWD, WORD },
68 { "clc", CLC, WORD },
69 { "cld", CLD, WORD },
70 { "cli", CLI, WORD },
71 { "clts", CLTS, WORD },
72 { "cmc", CMC, WORD },
73 { "cmp", CMP, WORD },
74 { "cmpb", CMP, BYTE },
75 { "cmps", CMPS, WORD },
76 { "cmpsb", CMPS, BYTE },
77 { "cmpxchg", CMPXCHG, WORD },
78 { "cwd", CWD, WORD },
79 { "cwde", CBW, WORD },
80 { "daa", DAA, WORD },
81 { "das", DAS, WORD },
82 { "dec", DEC, WORD },
83 { "decb", DEC, BYTE },
84 { "div", DIV, WORD },
85 { "divb", DIV, BYTE },
86 { "enter", ENTER, WORD },
87 { "f2xm1", F2XM1, WORD },
88 { "fabs", FABS, WORD },
89 { "fadd", FADD, WORD },
90 { "faddd", FADDD, WORD },
91 { "faddp", FADDP, WORD },
92 { "fadds", FADDS, WORD },
93 { "fbld", FBLD, WORD },
94 { "fbstp", FBSTP, WORD },
95 { "fchs", FCHS, WORD },
96 { "fclex", FCLEX, WORD },
97 { "fcomd", FCOMD, WORD },
98 { "fcompd", FCOMPD, WORD },
99 { "fcompp", FCOMPP, WORD },
100 { "fcomps", FCOMPS, WORD },
101 { "fcoms", FCOMS, WORD },
102 { "fcos", FCOS, WORD },
103 { "fdecstp", FDECSTP, WORD },
104 { "fdivd", FDIVD, WORD },
105 { "fdivp", FDIVP, WORD },
106 { "fdivrd", FDIVRD, WORD },
107 { "fdivrp", FDIVRP, WORD },
108 { "fdivrs", FDIVRS, WORD },
109 { "fdivs", FDIVS, WORD },
110 { "ffree", FFREE, WORD },
111 { "fiaddl", FIADDL, WORD },
112 { "fiadds", FIADDS, WORD },
113 { "ficom", FICOM, WORD },
114 { "ficomp", FICOMP, WORD },
115 { "fidivl", FIDIVL, WORD },
116 { "fidivrl", FIDIVRL, WORD },
117 { "fidivrs", FIDIVRS, WORD },
118 { "fidivs", FIDIVS, WORD },
119 { "fildl", FILDL, WORD },
120 { "fildq", FILDQ, WORD },
121 { "filds", FILDS, WORD },
122 { "fimull", FIMULL, WORD },
123 { "fimuls", FIMULS, WORD },
124 { "fincstp", FINCSTP, WORD },
125 { "finit", FINIT, WORD },
126 { "fistl", FISTL, WORD },
127 { "fistp", FISTP, WORD },
128 { "fists", FISTS, WORD },
129 { "fisubl", FISUBL, WORD },
130 { "fisubrl", FISUBRL, WORD },
131 { "fisubrs", FISUBRS, WORD },
132 { "fisubs", FISUBS, WORD },
133 { "fld1", FLD1, WORD },
134 { "fldcw", FLDCW, WORD },
135 { "fldd", FLDD, WORD },
136 { "fldenv", FLDENV, WORD },
137 { "fldl2e", FLDL2E, WORD },
138 { "fldl2t", FLDL2T, WORD },
139 { "fldlg2", FLDLG2, WORD },
140 { "fldln2", FLDLN2, WORD },
141 { "fldpi", FLDPI, WORD },
142 { "flds", FLDS, WORD },
143 { "fldx", FLDX, WORD },
144 { "fldz", FLDZ, WORD },
145 { "fmuld", FMULD, WORD },
146 { "fmulp", FMULP, WORD },
147 { "fmuls", FMULS, WORD },
148 { "fnop", FNOP, WORD },
149 { "fpatan", FPATAN, WORD },
150 { "fprem", FPREM, WORD },
151 { "fprem1", FPREM1, WORD },
152 { "fptan", FPTAN, WORD },
153 { "frndint", FRNDINT, WORD },
154 { "frstor", FRSTOR, WORD },
155 { "fsave", FSAVE, WORD },
156 { "fscale", FSCALE, WORD },
157 { "fsin", FSIN, WORD },
158 { "fsincos", FSINCOS, WORD },
159 { "fsqrt", FSQRT, WORD },
160 { "fstcw", FSTCW, WORD },
161 { "fstd", FSTD, WORD },
162 { "fstenv", FSTENV, WORD },
163 { "fstpd", FSTPD, WORD },
164 { "fstps", FSTPS, WORD },
165 { "fstpx", FSTPX, WORD },
166 { "fsts", FSTS, WORD },
167 { "fstsw", FSTSW, WORD },
168 { "fsubd", FSUBD, WORD },
169 { "fsubp", FSUBP, WORD },
170 { "fsubpr", FSUBPR, WORD },
171 { "fsubrd", FSUBRD, WORD },
172 { "fsubrs", FSUBRS, WORD },
173 { "fsubs", FSUBS, WORD },
174 { "ftst", FTST, WORD },
175 { "fucom", FUCOM, WORD },
176 { "fucomp", FUCOMP, WORD },
177 { "fucompp", FUCOMPP, WORD },
178 { "fxam", FXAM, WORD },
179 { "fxch", FXCH, WORD },
180 { "fxtract", FXTRACT, WORD },
181 { "fyl2x", FYL2X, WORD },
182 { "fyl2xp1", FYL2XP1, WORD },
183 { "hlt", HLT, WORD },
184 { "idiv", IDIV, WORD },
185 { "idivb", IDIV, BYTE },
186 { "imul", IMUL, WORD },
187 { "imulb", IMUL, BYTE },
188 { "in", IN, WORD },
189 { "inb", IN, BYTE },
190 { "inc", INC, WORD },
191 { "incb", INC, BYTE },
192 { "ins", INS, WORD },
193 { "insb", INS, BYTE },
194 { "int", INT, WORD },
195 { "into", INTO, JUMP },
196 { "invd", INVD, WORD },
197 { "invlpg", INVLPG, WORD },
198 { "iret", IRET, JUMP },
199 { "iretd", IRETD, JUMP },
200 { "ja", JA, JUMP },
201 { "jae", JAE, JUMP },
202 { "jb", JB, JUMP },
203 { "jbe", JBE, JUMP },
204 { "jc", JB, JUMP },
205 { "jcxz", JCXZ, JUMP },
206 { "je", JE, JUMP },
207 { "jecxz", JCXZ, JUMP },
208 { "jg", JG, JUMP },
209 { "jge", JGE, JUMP },
210 { "jl", JL, JUMP },
211 { "jle", JLE, JUMP },
212 { "jmp", JMP, JUMP },
213 { "jmpf", JMPF, JUMP },
214 { "jna", JBE, JUMP },
215 { "jnae", JB, JUMP },
216 { "jnb", JAE, JUMP },
217 { "jnbe", JA, JUMP },
218 { "jnc", JAE, JUMP },
219 { "jne", JNE, JUMP },
220 { "jng", JLE, JUMP },
221 { "jnge", JL, JUMP },
222 { "jnl", JGE, JUMP },
223 { "jnle", JG, JUMP },
224 { "jno", JNO, JUMP },
225 { "jnp", JNP, JUMP },
226 { "jns", JNS, JUMP },
227 { "jnz", JNE, JUMP },
228 { "jo", JO, JUMP },
229 { "jp", JP, JUMP },
230 { "js", JS, JUMP },
231 { "jz", JE, JUMP },
232 { "lahf", LAHF, WORD },
233 { "lar", LAR, WORD },
234 { "lds", LDS, WORD },
235 { "lea", LEA, WORD },
236 { "leave", LEAVE, WORD },
237 { "les", LES, WORD },
238 { "lfs", LFS, WORD },
239 { "lgdt", LGDT, WORD },
240 { "lgs", LGS, WORD },
241 { "lidt", LIDT, WORD },
242 { "lldt", LLDT, WORD },
243 { "lmsw", LMSW, WORD },
244 { "lock", LOCK, WORD },
245 { "lods", LODS, WORD },
246 { "lodsb", LODS, BYTE },
247 { "loop", LOOP, JUMP },
248 { "loope", LOOPE, JUMP },
249 { "loopne", LOOPNE, JUMP },
250 { "loopnz", LOOPNE, JUMP },
251 { "loopz", LOOPE, JUMP },
252 { "lsl", LSL, WORD },
253 { "lss", LSS, WORD },
254 { "ltr", LTR, WORD },
255 { "mov", MOV, WORD },
256 { "movb", MOV, BYTE },
257 { "movs", MOVS, WORD },
258 { "movsb", MOVS, BYTE },
259 { "movsx", MOVSX, WORD },
260 { "movsxb", MOVSXB, WORD },
261 { "movzx", MOVZX, WORD },
262 { "movzxb", MOVZXB, WORD },
263 { "mul", MUL, WORD },
264 { "mulb", MUL, BYTE },
265 { "neg", NEG, WORD },
266 { "negb", NEG, BYTE },
267 { "nop", NOP, WORD },
268 { "not", NOT, WORD },
269 { "notb", NOT, BYTE },
270 { "or", OR, WORD },
271 { "orb", OR, BYTE },
272 { "out", OUT, WORD },
273 { "outb", OUT, BYTE },
274 { "outs", OUTS, WORD },
275 { "outsb", OUTS, BYTE },
276 { "pop", POP, WORD },
277 { "popa", POPA, WORD },
278 { "popad", POPA, WORD },
279 { "popf", POPF, WORD },
280 { "push", PUSH, WORD },
281 { "pusha", PUSHA, WORD },
282 { "pushad", PUSHA, WORD },
283 { "pushf", PUSHF, WORD },
284 { "rcl", RCL, WORD },
285 { "rclb", RCL, BYTE },
286 { "rcr", RCR, WORD },
287 { "rcrb", RCR, BYTE },
288 { "ret", RET, JUMP },
289 { "retf", RETF, JUMP },
290 { "rol", ROL, WORD },
291 { "rolb", ROL, BYTE },
292 { "ror", ROR, WORD },
293 { "rorb", ROR, BYTE },
294 { "sahf", SAHF, WORD },
295 { "sal", SAL, WORD },
296 { "salb", SAL, BYTE },
297 { "sar", SAR, WORD },
298 { "sarb", SAR, BYTE },
299 { "sbb", SBB, WORD },
300 { "sbbb", SBB, BYTE },
301 { "scas", SCAS, WORD },
302 { "scasb", SCAS, BYTE },
303 { "seta", SETA, BYTE },
304 { "setae", SETAE, BYTE },
305 { "setb", SETB, BYTE },
306 { "setbe", SETBE, BYTE },
307 { "sete", SETE, BYTE },
308 { "setg", SETG, BYTE },
309 { "setge", SETGE, BYTE },
310 { "setl", SETL, BYTE },
311 { "setna", SETBE, BYTE },
312 { "setnae", SETB, BYTE },
313 { "setnb", SETAE, BYTE },
314 { "setnbe", SETA, BYTE },
315 { "setne", SETNE, BYTE },
316 { "setng", SETLE, BYTE },
317 { "setnge", SETL, BYTE },
318 { "setnl", SETGE, BYTE },
319 { "setnle", SETG, BYTE },
320 { "setno", SETNO, BYTE },
321 { "setnp", SETNP, BYTE },
322 { "setns", SETNS, BYTE },
323 { "seto", SETO, BYTE },
324 { "setp", SETP, BYTE },
325 { "sets", SETS, BYTE },
326 { "setz", SETE, BYTE },
327 { "sgdt", SGDT, WORD },
328 { "shl", SHL, WORD },
329 { "shlb", SHL, BYTE },
330 { "shld", SHLD, WORD },
331 { "shr", SHR, WORD },
332 { "shrb", SHR, BYTE },
333 { "shrd", SHRD, WORD },
334 { "sidt", SIDT, WORD },
335 { "sldt", SLDT, WORD },
336 { "smsw", SMSW, WORD },
337 { "stc", STC, WORD },
338 { "std", STD, WORD },
339 { "sti", STI, WORD },
340 { "stos", STOS, WORD },
341 { "stosb", STOS, BYTE },
342 { "str", STR, WORD },
343 { "sub", SUB, WORD },
344 { "subb", SUB, BYTE },
345 { "test", TEST, WORD },
346 { "testb", TEST, BYTE },
347 { "verr", VERR, WORD },
348 { "verw", VERW, WORD },
349 { "wait", WAIT, WORD },
350 { "wbinvd", WBINVD, WORD },
351 { "xadd", XADD, WORD },
352 { "xchg", XCHG, WORD },
353 { "xchgb", XCHG, BYTE },
354 { "xlat", XLAT, WORD },
355 { "xor", XOR, WORD },
356 { "xorb", XOR, BYTE },
357};
358
359static enum dialect { ACK, NCC } dialect= ACK;
360
361void ack_parse_init(char *file)
362/* Prepare parsing of an ACK assembly file. */
363{
364 tok_init(file, '!');
365}
366
367void ncc_parse_init(char *file)
368/* Prepare parsing of an ACK Xenix assembly file. See emit_ack.c for comments
369 * on this fine assembly dialect.
370 */
371{
372 dialect= NCC;
373 ack_parse_init(file);
374}
375
376static void zap(void)
377/* An error, zap the rest of the line. */
378{
379 token_t *t;
380
381 while ((t= get_token(0))->type != T_EOF && t->symbol != ';')
382 skip_token(1);
383}
384
385static mnemonic_t *search_mnem(char *name)
386/* Binary search for a mnemonic. (That's why the table is sorted.) */
387{
388 int low, mid, high;
389 int cmp;
390 mnemonic_t *m;
391
392 low= 0;
393 high= arraysize(mnemtab)-1;
394 while (low <= high) {
395 mid= (low + high) / 2;
396 m= &mnemtab[mid];
397
398 if ((cmp= strcmp(name, m->name)) == 0) return m;
399
400 if (cmp < 0) high= mid-1; else low= mid+1;
401 }
402 return nil;
403}
404
405static expression_t *ack_get_C_expression(int *pn)
406/* Read a "C-like" expression. Note that we don't worry about precedence,
407 * the expression is printed later like it is read. If the target language
408 * does not have all the operators (like ~) then this has to be repaired by
409 * changing the source file. (No problem, you still have one source file
410 * to maintain, not two.)
411 */
412{
413 expression_t *e, *a1, *a2;
414 token_t *t;
415
416 if ((t= get_token(*pn))->symbol == '[') {
417 /* [ expr ]: grouping. */
418 (*pn)++;
419 if ((a1= ack_get_C_expression(pn)) == nil) return nil;
420 if (get_token(*pn)->symbol != ']') {
421 parse_err(1, t, "missing ]\n");
422 del_expr(a1);
423 return nil;
424 }
425 (*pn)++;
426 e= new_expr();
427 e->operator= '[';
428 e->middle= a1;
429 } else
430 if (t->type == T_WORD || t->type == T_STRING) {
431 /* Label, number, or string. */
432 e= new_expr();
433 e->operator= t->type == T_WORD ? 'W' : 'S';
434 e->name= allocate(nil, (t->len+1) * sizeof(e->name[0]));
435 memcpy(e->name, t->name, t->len+1);
436 e->len= t->len;
437 (*pn)++;
438 } else
439 if (t->symbol == '+' || t->symbol == '-' || t->symbol == '~') {
440 /* Unary operator. */
441 (*pn)++;
442 if ((a1= ack_get_C_expression(pn)) == nil) return nil;
443 e= new_expr();
444 e->operator= t->symbol;
445 e->middle= a1;
446 } else {
447 parse_err(1, t, "expression syntax error\n");
448 return nil;
449 }
450
451 switch ((t= get_token(*pn))->symbol) {
452 case '+':
453 case '-':
454 case '*':
455 case '/':
456 case '%':
457 case '&':
458 case '|':
459 case '^':
460 case S_LEFTSHIFT:
461 case S_RIGHTSHIFT:
462 (*pn)++;
463 a1= e;
464 if ((a2= ack_get_C_expression(pn)) == nil) {
465 del_expr(a1);
466 return nil;
467 }
468 e= new_expr();
469 e->operator= t->symbol;
470 e->left= a1;
471 e->right= a2;
472 }
473 return e;
474}
475
476static expression_t *ack_get_operand(int *pn, int deref)
477/* Get something like: (memory), offset(base)(index*scale), or simpler. */
478{
479 expression_t *e, *offset, *base, *index;
480 token_t *t;
481 int c;
482
483 /* Is it (memory)? */
484 if (get_token(*pn)->symbol == '('
485 && ((t= get_token(*pn + 1))->type != T_WORD
486 || !isregister(t->name))
487 ) {
488 /* A memory dereference. */
489 (*pn)++;
490 if ((offset= ack_get_C_expression(pn)) == nil) return nil;
491 if (get_token(*pn)->symbol != ')') {
492 parse_err(1, t, "operand syntax error\n");
493 del_expr(offset);
494 return nil;
495 }
496 (*pn)++;
497 e= new_expr();
498 e->operator= '(';
499 e->middle= offset;
500 return e;
501 }
502
503 /* #constant? */
504 if (dialect == NCC && deref
505 && ((c= get_token(*pn)->symbol) == '#' || c == '*')) {
506 /* NCC: mov ax,#constant -> ACK: mov ax,constant */
507 (*pn)++;
508 return ack_get_C_expression(pn);
509 }
510
511 /* @address? */
512 if (dialect == NCC && get_token(*pn)->symbol == '@') {
513 /* NCC: jmp @address -> ACK: jmp (address) */
514 (*pn)++;
515 if ((offset= ack_get_operand(pn, deref)) == nil) return nil;
516 e= new_expr();
517 e->operator= '(';
518 e->middle= offset;
519 return e;
520 }
521
522 /* Offset? */
523 if (get_token(*pn)->symbol != '(') {
524 /* There is an offset. */
525 if ((offset= ack_get_C_expression(pn)) == nil) return nil;
526 } else {
527 /* No offset. */
528 offset= nil;
529 }
530
531 /* (base)? */
532 if (get_token(*pn)->symbol == '('
533 && (t= get_token(*pn + 1))->type == T_WORD
534 && isregister(t->name)
535 && get_token(*pn + 2)->symbol == ')'
536 ) {
537 /* A base register expression. */
538 base= new_expr();
539 base->operator= 'B';
540 base->name= copystr(t->name);
541 (*pn)+= 3;
542 } else {
543 /* No base register expression. */
544 base= nil;
545 }
546
547 /* (index*scale)? */
548 if (get_token(*pn)->symbol == '(') {
549 /* An index most likely. */
550 token_t *m= nil;
551
552 if (!( /* This must be true: */
553 (t= get_token(*pn + 1))->type == T_WORD
554 && isregister(t->name)
555 && (get_token(*pn + 2)->symbol == ')' || (
556 get_token(*pn + 2)->symbol == '*'
557 && (m= get_token(*pn + 3))->type == T_WORD
558 && strchr("1248", m->name[0]) != nil
559 && m->name[1] == 0
560 && get_token(*pn + 4)->symbol == ')'
561 ))
562 )) {
563 /* Alas it isn't */
564 parse_err(1, t, "operand syntax error\n");
565 del_expr(offset);
566 del_expr(base);
567 return nil;
568 }
569 /* Found an index. */
570 index= new_expr();
571 index->operator= m == nil ? '1' : m->name[0];
572 index->name= copystr(t->name);
573 (*pn)+= (m == nil ? 3 : 5);
574 } else {
575 /* No index. */
576 index= nil;
577 }
578
579 if (dialect == NCC && deref && base == nil && index == nil
580 && !(offset != nil && offset->operator == 'W'
581 && isregister(offset->name))
582 ) {
583 /* NCC: mov ax,thing -> ACK mov ax,(thing) */
584 e= new_expr();
585 e->operator= '(';
586 e->middle= offset;
587 return e;
588 }
589
590 if (base == nil && index == nil) {
591 /* Return a lone offset as is. */
592 e= offset;
593 } else {
594 e= new_expr();
595 e->operator= 'O';
596 e->left= offset;
597 e->middle= base;
598 e->right= index;
599 }
600 return e;
601}
602
603static expression_t *ack_get_oplist(int *pn, int deref)
604/* Get a comma (or colon for jmpf and callf) separated list of instruction
605 * operands.
606 */
607{
608 expression_t *e, *o1, *o2;
609 token_t *t;
610
611 if ((e= ack_get_operand(pn, deref)) == nil) return nil;
612
613 if ((t= get_token(*pn))->symbol == ',' || t->symbol == ':') {
614 o1= e;
615 (*pn)++;
616 if ((o2= ack_get_oplist(pn, deref)) == nil) {
617 del_expr(o1);
618 return nil;
619 }
620 e= new_expr();
621 e->operator= ',';
622 e->left= o1;
623 e->right= o2;
624 }
625 return e;
626}
627
628static asm86_t *ack_get_statement(void)
629/* Get a pseudo op or machine instruction with arguments. */
630{
631 token_t *t= get_token(0);
632 asm86_t *a;
633 mnemonic_t *m;
634 int n;
635 int prefix_seen;
636 int oaz_prefix;
637 int deref;
638
639 assert(t->type == T_WORD);
640
641 if (strcmp(t->name, ".sect") == 0) {
642 /* .sect .text etc. Accept only four segment names. */
643 skip_token(1);
644 t= get_token(0);
645 if (t->type != T_WORD || (
646 strcmp(t->name, ".text") != 0
647 && strcmp(t->name, ".rom") != 0
648 && strcmp(t->name, ".data") != 0
649 && strcmp(t->name, ".bss") != 0
650 && strcmp(t->name, ".end") != 0
651 )) {
652 parse_err(1, t, "weird section name to .sect\n");
653 return nil;
654 }
655 }
656 a= new_asm86();
657
658 /* Process instruction prefixes. */
659 oaz_prefix= 0;
660 for (prefix_seen= 0;; prefix_seen= 1) {
661 if (strcmp(t->name, "o16") == 0) {
662 if (use16()) {
663 parse_err(1, t, "o16 in an 8086 section\n");
664 }
665 oaz_prefix|= OPZ;
666 } else
667 if (strcmp(t->name, "o32") == 0) {
668 if (use32()) {
669 parse_err(1, t, "o32 in an 80386 section\n");
670 }
671 oaz_prefix|= OPZ;
672 } else
673 if (strcmp(t->name, "a16") == 0) {
674 if (use16()) {
675 parse_err(1, t, "a16 in an 8086 section\n");
676 }
677 oaz_prefix|= ADZ;
678 } else
679 if (strcmp(t->name, "a32") == 0) {
680 if (use32()) {
681 parse_err(1, t, "a32 in an 80386 section\n");
682 }
683 oaz_prefix|= ADZ;
684 } else
685 if (strcmp(t->name, "rep") == 0
686 || strcmp(t->name, "repe") == 0
687 || strcmp(t->name, "repne") == 0
688 || strcmp(t->name, "repz") == 0
689 || strcmp(t->name, "repnz") == 0
690 ) {
691 if (a->rep != ONCE) {
692 parse_err(1, t,
693 "can't have more than one rep\n");
694 }
695 switch (t->name[3]) {
696 case 0: a->rep= REP; break;
697 case 'e':
698 case 'z': a->rep= REPE; break;
699 case 'n': a->rep= REPNE; break;
700 }
701 } else
702 if (strchr("cdefgs", t->name[0]) != nil
703 && strcmp(t->name+1, "seg") == 0) {
704 if (a->seg != DEFSEG) {
705 parse_err(1, t,
706 "can't have more than one segment prefix\n");
707 }
708 switch (t->name[0]) {
709 case 'c': a->seg= CSEG; break;
710 case 'd': a->seg= DSEG; break;
711 case 'e': a->seg= ESEG; break;
712 case 'f': a->seg= FSEG; break;
713 case 'g': a->seg= GSEG; break;
714 case 's': a->seg= SSEG; break;
715 }
716 } else
717 if (!prefix_seen) {
718 /* No prefix here, get out! */
719 break;
720 } else {
721 /* No more prefixes, next must be an instruction. */
722 if (t->type != T_WORD
723 || (m= search_mnem(t->name)) == nil
724 || m->optype == PSEUDO
725 ) {
726 parse_err(1, t,
727 "machine instruction expected after instruction prefix\n");
728 del_asm86(a);
729 return nil;
730 }
731 if (oaz_prefix != 0 && m->optype != JUMP
732 && m->optype != WORD) {
733 parse_err(1, t,
734 "'%s' can't have an operand size prefix\n", m->name);
735 }
736 break;
737 }
738
739 /* Skip the prefix and extra newlines. */
740 do {
741 skip_token(1);
742 } while ((t= get_token(0))->symbol == ';');
743 }
744
745 /* All the readahead being done upsets the line counter. */
746 a->line= t->line;
747
748 /* Read a machine instruction or pseudo op. */
749 if ((m= search_mnem(t->name)) == nil) {
750 parse_err(1, t, "unknown instruction '%s'\n", t->name);
751 del_asm86(a);
752 return nil;
753 }
754 a->opcode= m->opcode;
755 a->optype= m->optype;
756 a->oaz= oaz_prefix;
757
758 switch (a->opcode) {
759 case IN:
760 case OUT:
761 case INT:
762 deref= 0;
763 break;
764 default:
765 deref= (a->optype >= BYTE);
766 }
767 n= 1;
768 if (get_token(1)->symbol != ';'
769 && (a->args= ack_get_oplist(&n, deref)) == nil) {
770 del_asm86(a);
771 return nil;
772 }
773 if (get_token(n)->symbol != ';') {
774 parse_err(1, t, "garbage at end of instruction\n");
775 del_asm86(a);
776 return nil;
777 }
778 switch (a->opcode) {
779 case DOT_ALIGN:
780 /* Restrict .align to have a single numeric argument, some
781 * assemblers think of the argument as a power of two, so
782 * we need to be able to change the value.
783 */
784 if (a->args == nil || a->args->operator != 'W'
785 || !isanumber(a->args->name)) {
786 parse_err(1, t,
787 ".align is restricted to one numeric argument\n");
788 del_asm86(a);
789 return nil;
790 }
791 break;
792 case JMPF:
793 case CALLF:
794 /* NCC jmpf off,seg -> ACK jmpf seg:off */
795 if (dialect == NCC && a->args != nil
796 && a->args->operator == ',') {
797 expression_t *t;
798
799 t= a->args->left;
800 a->args->left= a->args->right;
801 a->args->right= t;
802 break;
803 }
804 /*FALL THROUGH*/
805 case JMP:
806 case CALL:
807 /* NCC jmp @(reg) -> ACK jmp (reg) */
808 if (dialect == NCC && a->args != nil && (
809 (a->args->operator == '('
810 && a->args->middle != nil
811 && a->args->middle->operator == 'O')
812 || (a->args->operator == 'O'
813 && a->args->left == nil
814 && a->args->middle != nil
815 && a->args->right == nil)
816 )) {
817 expression_t *t;
818
819 t= a->args;
820 a->args= a->args->middle;
821 t->middle= nil;
822 del_expr(t);
823 if (a->args->operator == 'B') a->args->operator= 'W';
824 }
825 break;
826 default:;
827 }
828 skip_token(n+1);
829 return a;
830}
831
832asm86_t *ack_get_instruction(void)
833{
834 asm86_t *a= nil;
835 expression_t *e;
836 token_t *t;
837
838 while ((t= get_token(0))->symbol == ';')
839 skip_token(1);
840
841 if (t->type == T_EOF) return nil;
842
843 if (t->symbol == '#') {
844 /* Preprocessor line and file change. */
845
846 if ((t= get_token(1))->type != T_WORD || !isanumber(t->name)
847 || get_token(2)->type != T_STRING
848 ) {
849 parse_err(1, t, "file not preprocessed?\n");
850 zap();
851 } else {
852 set_file(get_token(2)->name,
853 strtol(get_token(1)->name, nil, 0) - 1);
854
855 /* GNU CPP adds extra cruft, simply zap the line. */
856 zap();
857 }
858 a= ack_get_instruction();
859 } else
860 if (t->type == T_WORD && get_token(1)->symbol == ':') {
861 /* A label definition. */
862 a= new_asm86();
863 a->line= t->line;
864 a->opcode= DOT_LABEL;
865 a->optype= PSEUDO;
866 a->args= e= new_expr();
867 e->operator= ':';
868 e->name= copystr(t->name);
869 skip_token(2);
870 } else
871 if (t->type == T_WORD && get_token(1)->symbol == '=') {
872 int n= 2;
873
874 if ((e= ack_get_C_expression(&n)) == nil) {
875 zap();
876 a= ack_get_instruction();
877 } else
878 if (get_token(n)->symbol != ';') {
879 parse_err(1, t, "garbage after assignment\n");
880 zap();
881 a= ack_get_instruction();
882 } else {
883 a= new_asm86();
884 a->line= t->line;
885 a->opcode= DOT_EQU;
886 a->optype= PSEUDO;
887 a->args= new_expr();
888 a->args->operator= '=';
889 a->args->name= copystr(t->name);
890 a->args->middle= e;
891 skip_token(n+1);
892 }
893 } else
894 if (t->type == T_WORD) {
895 if ((a= ack_get_statement()) == nil) {
896 zap();
897 a= ack_get_instruction();
898 }
899 } else {
900 parse_err(1, t, "syntax error\n");
901 zap();
902 a= ack_get_instruction();
903 }
904 return a;
905}
906
907asm86_t *ncc_get_instruction(void)
908{
909 return ack_get_instruction();
910}
Note: See TracBrowser for help on using the repository browser.