1 | /* parse_ack.c - parse ACK assembly Author: Kees J. Bot
|
---|
2 | * parse NCC assembly 18 Dec 1993
|
---|
3 | */
|
---|
4 | #define nil 0
|
---|
5 | #include <stdio.h>
|
---|
6 | #include <stdlib.h>
|
---|
7 | #include <string.h>
|
---|
8 | #include <assert.h>
|
---|
9 | #include "asmconv.h"
|
---|
10 | #include "token.h"
|
---|
11 | #include "asm86.h"
|
---|
12 | #include "languages.h"
|
---|
13 |
|
---|
14 | typedef struct mnemonic { /* ACK as86 mnemonics translation table. */
|
---|
15 | char *name;
|
---|
16 | opcode_t opcode;
|
---|
17 | optype_t optype;
|
---|
18 | } mnemonic_t;
|
---|
19 |
|
---|
20 | static mnemonic_t mnemtab[] = { /* This array is sorted. */
|
---|
21 | { ".align", DOT_ALIGN, PSEUDO },
|
---|
22 | { ".ascii", DOT_ASCII, PSEUDO },
|
---|
23 | { ".asciz", DOT_ASCIZ, PSEUDO },
|
---|
24 | { ".assert", DOT_ASSERT, PSEUDO },
|
---|
25 | { ".base", DOT_BASE, PSEUDO },
|
---|
26 | { ".bss", DOT_BSS, PSEUDO },
|
---|
27 | { ".comm", DOT_LCOMM, PSEUDO },
|
---|
28 | { ".data", DOT_DATA, PSEUDO },
|
---|
29 | { ".data1", DOT_DATA1, PSEUDO },
|
---|
30 | { ".data2", DOT_DATA2, PSEUDO },
|
---|
31 | { ".data4", DOT_DATA4, PSEUDO },
|
---|
32 | { ".define", DOT_DEFINE, PSEUDO },
|
---|
33 | { ".end", DOT_END, PSEUDO },
|
---|
34 | { ".extern", DOT_EXTERN, PSEUDO },
|
---|
35 | { ".file", DOT_FILE, PSEUDO },
|
---|
36 | { ".line", DOT_LINE, PSEUDO },
|
---|
37 | { ".list", DOT_LIST, PSEUDO },
|
---|
38 | { ".nolist", DOT_NOLIST, PSEUDO },
|
---|
39 | { ".rom", DOT_ROM, PSEUDO },
|
---|
40 | { ".space", DOT_SPACE, PSEUDO },
|
---|
41 | { ".symb", DOT_SYMB, PSEUDO },
|
---|
42 | { ".text", DOT_TEXT, PSEUDO },
|
---|
43 | { ".use16", DOT_USE16, PSEUDO },
|
---|
44 | { ".use32", DOT_USE32, PSEUDO },
|
---|
45 | { "aaa", AAA, WORD },
|
---|
46 | { "aad", AAD, WORD },
|
---|
47 | { "aam", AAM, WORD },
|
---|
48 | { "aas", AAS, WORD },
|
---|
49 | { "adc", ADC, WORD },
|
---|
50 | { "adcb", ADC, BYTE },
|
---|
51 | { "add", ADD, WORD },
|
---|
52 | { "addb", ADD, BYTE },
|
---|
53 | { "and", AND, WORD },
|
---|
54 | { "andb", AND, BYTE },
|
---|
55 | { "arpl", ARPL, WORD },
|
---|
56 | { "bound", BOUND, WORD },
|
---|
57 | { "bsf", BSF, WORD },
|
---|
58 | { "bsr", BSR, WORD },
|
---|
59 | { "bswap", BSWAP, WORD },
|
---|
60 | { "bt", BT, WORD },
|
---|
61 | { "btc", BTC, WORD },
|
---|
62 | { "btr", BTR, WORD },
|
---|
63 | { "bts", BTS, WORD },
|
---|
64 | { "call", CALL, JUMP },
|
---|
65 | { "callf", CALLF, JUMP },
|
---|
66 | { "cbw", CBW, WORD },
|
---|
67 | { "cdq", CWD, WORD },
|
---|
68 | { "clc", CLC, WORD },
|
---|
69 | { "cld", CLD, WORD },
|
---|
70 | { "cli", CLI, WORD },
|
---|
71 | { "clts", CLTS, WORD },
|
---|
72 | { "cmc", CMC, WORD },
|
---|
73 | { "cmp", CMP, WORD },
|
---|
74 | { "cmpb", CMP, BYTE },
|
---|
75 | { "cmps", CMPS, WORD },
|
---|
76 | { "cmpsb", CMPS, BYTE },
|
---|
77 | { "cmpxchg", CMPXCHG, WORD },
|
---|
78 | { "cwd", CWD, WORD },
|
---|
79 | { "cwde", CBW, WORD },
|
---|
80 | { "daa", DAA, WORD },
|
---|
81 | { "das", DAS, WORD },
|
---|
82 | { "dec", DEC, WORD },
|
---|
83 | { "decb", DEC, BYTE },
|
---|
84 | { "div", DIV, WORD },
|
---|
85 | { "divb", DIV, BYTE },
|
---|
86 | { "enter", ENTER, WORD },
|
---|
87 | { "f2xm1", F2XM1, WORD },
|
---|
88 | { "fabs", FABS, WORD },
|
---|
89 | { "fadd", FADD, WORD },
|
---|
90 | { "faddd", FADDD, WORD },
|
---|
91 | { "faddp", FADDP, WORD },
|
---|
92 | { "fadds", FADDS, WORD },
|
---|
93 | { "fbld", FBLD, WORD },
|
---|
94 | { "fbstp", FBSTP, WORD },
|
---|
95 | { "fchs", FCHS, WORD },
|
---|
96 | { "fclex", FCLEX, WORD },
|
---|
97 | { "fcomd", FCOMD, WORD },
|
---|
98 | { "fcompd", FCOMPD, WORD },
|
---|
99 | { "fcompp", FCOMPP, WORD },
|
---|
100 | { "fcomps", FCOMPS, WORD },
|
---|
101 | { "fcoms", FCOMS, WORD },
|
---|
102 | { "fcos", FCOS, WORD },
|
---|
103 | { "fdecstp", FDECSTP, WORD },
|
---|
104 | { "fdivd", FDIVD, WORD },
|
---|
105 | { "fdivp", FDIVP, WORD },
|
---|
106 | { "fdivrd", FDIVRD, WORD },
|
---|
107 | { "fdivrp", FDIVRP, WORD },
|
---|
108 | { "fdivrs", FDIVRS, WORD },
|
---|
109 | { "fdivs", FDIVS, WORD },
|
---|
110 | { "ffree", FFREE, WORD },
|
---|
111 | { "fiaddl", FIADDL, WORD },
|
---|
112 | { "fiadds", FIADDS, WORD },
|
---|
113 | { "ficom", FICOM, WORD },
|
---|
114 | { "ficomp", FICOMP, WORD },
|
---|
115 | { "fidivl", FIDIVL, WORD },
|
---|
116 | { "fidivrl", FIDIVRL, WORD },
|
---|
117 | { "fidivrs", FIDIVRS, WORD },
|
---|
118 | { "fidivs", FIDIVS, WORD },
|
---|
119 | { "fildl", FILDL, WORD },
|
---|
120 | { "fildq", FILDQ, WORD },
|
---|
121 | { "filds", FILDS, WORD },
|
---|
122 | { "fimull", FIMULL, WORD },
|
---|
123 | { "fimuls", FIMULS, WORD },
|
---|
124 | { "fincstp", FINCSTP, WORD },
|
---|
125 | { "finit", FINIT, WORD },
|
---|
126 | { "fistl", FISTL, WORD },
|
---|
127 | { "fistp", FISTP, WORD },
|
---|
128 | { "fists", FISTS, WORD },
|
---|
129 | { "fisubl", FISUBL, WORD },
|
---|
130 | { "fisubrl", FISUBRL, WORD },
|
---|
131 | { "fisubrs", FISUBRS, WORD },
|
---|
132 | { "fisubs", FISUBS, WORD },
|
---|
133 | { "fld1", FLD1, WORD },
|
---|
134 | { "fldcw", FLDCW, WORD },
|
---|
135 | { "fldd", FLDD, WORD },
|
---|
136 | { "fldenv", FLDENV, WORD },
|
---|
137 | { "fldl2e", FLDL2E, WORD },
|
---|
138 | { "fldl2t", FLDL2T, WORD },
|
---|
139 | { "fldlg2", FLDLG2, WORD },
|
---|
140 | { "fldln2", FLDLN2, WORD },
|
---|
141 | { "fldpi", FLDPI, WORD },
|
---|
142 | { "flds", FLDS, WORD },
|
---|
143 | { "fldx", FLDX, WORD },
|
---|
144 | { "fldz", FLDZ, WORD },
|
---|
145 | { "fmuld", FMULD, WORD },
|
---|
146 | { "fmulp", FMULP, WORD },
|
---|
147 | { "fmuls", FMULS, WORD },
|
---|
148 | { "fnop", FNOP, WORD },
|
---|
149 | { "fpatan", FPATAN, WORD },
|
---|
150 | { "fprem", FPREM, WORD },
|
---|
151 | { "fprem1", FPREM1, WORD },
|
---|
152 | { "fptan", FPTAN, WORD },
|
---|
153 | { "frndint", FRNDINT, WORD },
|
---|
154 | { "frstor", FRSTOR, WORD },
|
---|
155 | { "fsave", FSAVE, WORD },
|
---|
156 | { "fscale", FSCALE, WORD },
|
---|
157 | { "fsin", FSIN, WORD },
|
---|
158 | { "fsincos", FSINCOS, WORD },
|
---|
159 | { "fsqrt", FSQRT, WORD },
|
---|
160 | { "fstcw", FSTCW, WORD },
|
---|
161 | { "fstd", FSTD, WORD },
|
---|
162 | { "fstenv", FSTENV, WORD },
|
---|
163 | { "fstpd", FSTPD, WORD },
|
---|
164 | { "fstps", FSTPS, WORD },
|
---|
165 | { "fstpx", FSTPX, WORD },
|
---|
166 | { "fsts", FSTS, WORD },
|
---|
167 | { "fstsw", FSTSW, WORD },
|
---|
168 | { "fsubd", FSUBD, WORD },
|
---|
169 | { "fsubp", FSUBP, WORD },
|
---|
170 | { "fsubpr", FSUBPR, WORD },
|
---|
171 | { "fsubrd", FSUBRD, WORD },
|
---|
172 | { "fsubrs", FSUBRS, WORD },
|
---|
173 | { "fsubs", FSUBS, WORD },
|
---|
174 | { "ftst", FTST, WORD },
|
---|
175 | { "fucom", FUCOM, WORD },
|
---|
176 | { "fucomp", FUCOMP, WORD },
|
---|
177 | { "fucompp", FUCOMPP, WORD },
|
---|
178 | { "fxam", FXAM, WORD },
|
---|
179 | { "fxch", FXCH, WORD },
|
---|
180 | { "fxtract", FXTRACT, WORD },
|
---|
181 | { "fyl2x", FYL2X, WORD },
|
---|
182 | { "fyl2xp1", FYL2XP1, WORD },
|
---|
183 | { "hlt", HLT, WORD },
|
---|
184 | { "idiv", IDIV, WORD },
|
---|
185 | { "idivb", IDIV, BYTE },
|
---|
186 | { "imul", IMUL, WORD },
|
---|
187 | { "imulb", IMUL, BYTE },
|
---|
188 | { "in", IN, WORD },
|
---|
189 | { "inb", IN, BYTE },
|
---|
190 | { "inc", INC, WORD },
|
---|
191 | { "incb", INC, BYTE },
|
---|
192 | { "ins", INS, WORD },
|
---|
193 | { "insb", INS, BYTE },
|
---|
194 | { "int", INT, WORD },
|
---|
195 | { "into", INTO, JUMP },
|
---|
196 | { "invd", INVD, WORD },
|
---|
197 | { "invlpg", INVLPG, WORD },
|
---|
198 | { "iret", IRET, JUMP },
|
---|
199 | { "iretd", IRETD, JUMP },
|
---|
200 | { "ja", JA, JUMP },
|
---|
201 | { "jae", JAE, JUMP },
|
---|
202 | { "jb", JB, JUMP },
|
---|
203 | { "jbe", JBE, JUMP },
|
---|
204 | { "jc", JB, JUMP },
|
---|
205 | { "jcxz", JCXZ, JUMP },
|
---|
206 | { "je", JE, JUMP },
|
---|
207 | { "jecxz", JCXZ, JUMP },
|
---|
208 | { "jg", JG, JUMP },
|
---|
209 | { "jge", JGE, JUMP },
|
---|
210 | { "jl", JL, JUMP },
|
---|
211 | { "jle", JLE, JUMP },
|
---|
212 | { "jmp", JMP, JUMP },
|
---|
213 | { "jmpf", JMPF, JUMP },
|
---|
214 | { "jna", JBE, JUMP },
|
---|
215 | { "jnae", JB, JUMP },
|
---|
216 | { "jnb", JAE, JUMP },
|
---|
217 | { "jnbe", JA, JUMP },
|
---|
218 | { "jnc", JAE, JUMP },
|
---|
219 | { "jne", JNE, JUMP },
|
---|
220 | { "jng", JLE, JUMP },
|
---|
221 | { "jnge", JL, JUMP },
|
---|
222 | { "jnl", JGE, JUMP },
|
---|
223 | { "jnle", JG, JUMP },
|
---|
224 | { "jno", JNO, JUMP },
|
---|
225 | { "jnp", JNP, JUMP },
|
---|
226 | { "jns", JNS, JUMP },
|
---|
227 | { "jnz", JNE, JUMP },
|
---|
228 | { "jo", JO, JUMP },
|
---|
229 | { "jp", JP, JUMP },
|
---|
230 | { "js", JS, JUMP },
|
---|
231 | { "jz", JE, JUMP },
|
---|
232 | { "lahf", LAHF, WORD },
|
---|
233 | { "lar", LAR, WORD },
|
---|
234 | { "lds", LDS, WORD },
|
---|
235 | { "lea", LEA, WORD },
|
---|
236 | { "leave", LEAVE, WORD },
|
---|
237 | { "les", LES, WORD },
|
---|
238 | { "lfs", LFS, WORD },
|
---|
239 | { "lgdt", LGDT, WORD },
|
---|
240 | { "lgs", LGS, WORD },
|
---|
241 | { "lidt", LIDT, WORD },
|
---|
242 | { "lldt", LLDT, WORD },
|
---|
243 | { "lmsw", LMSW, WORD },
|
---|
244 | { "lock", LOCK, WORD },
|
---|
245 | { "lods", LODS, WORD },
|
---|
246 | { "lodsb", LODS, BYTE },
|
---|
247 | { "loop", LOOP, JUMP },
|
---|
248 | { "loope", LOOPE, JUMP },
|
---|
249 | { "loopne", LOOPNE, JUMP },
|
---|
250 | { "loopnz", LOOPNE, JUMP },
|
---|
251 | { "loopz", LOOPE, JUMP },
|
---|
252 | { "lsl", LSL, WORD },
|
---|
253 | { "lss", LSS, WORD },
|
---|
254 | { "ltr", LTR, WORD },
|
---|
255 | { "mov", MOV, WORD },
|
---|
256 | { "movb", MOV, BYTE },
|
---|
257 | { "movs", MOVS, WORD },
|
---|
258 | { "movsb", MOVS, BYTE },
|
---|
259 | { "movsx", MOVSX, WORD },
|
---|
260 | { "movsxb", MOVSXB, WORD },
|
---|
261 | { "movzx", MOVZX, WORD },
|
---|
262 | { "movzxb", MOVZXB, WORD },
|
---|
263 | { "mul", MUL, WORD },
|
---|
264 | { "mulb", MUL, BYTE },
|
---|
265 | { "neg", NEG, WORD },
|
---|
266 | { "negb", NEG, BYTE },
|
---|
267 | { "nop", NOP, WORD },
|
---|
268 | { "not", NOT, WORD },
|
---|
269 | { "notb", NOT, BYTE },
|
---|
270 | { "or", OR, WORD },
|
---|
271 | { "orb", OR, BYTE },
|
---|
272 | { "out", OUT, WORD },
|
---|
273 | { "outb", OUT, BYTE },
|
---|
274 | { "outs", OUTS, WORD },
|
---|
275 | { "outsb", OUTS, BYTE },
|
---|
276 | { "pop", POP, WORD },
|
---|
277 | { "popa", POPA, WORD },
|
---|
278 | { "popad", POPA, WORD },
|
---|
279 | { "popf", POPF, WORD },
|
---|
280 | { "push", PUSH, WORD },
|
---|
281 | { "pusha", PUSHA, WORD },
|
---|
282 | { "pushad", PUSHA, WORD },
|
---|
283 | { "pushf", PUSHF, WORD },
|
---|
284 | { "rcl", RCL, WORD },
|
---|
285 | { "rclb", RCL, BYTE },
|
---|
286 | { "rcr", RCR, WORD },
|
---|
287 | { "rcrb", RCR, BYTE },
|
---|
288 | { "ret", RET, JUMP },
|
---|
289 | { "retf", RETF, JUMP },
|
---|
290 | { "rol", ROL, WORD },
|
---|
291 | { "rolb", ROL, BYTE },
|
---|
292 | { "ror", ROR, WORD },
|
---|
293 | { "rorb", ROR, BYTE },
|
---|
294 | { "sahf", SAHF, WORD },
|
---|
295 | { "sal", SAL, WORD },
|
---|
296 | { "salb", SAL, BYTE },
|
---|
297 | { "sar", SAR, WORD },
|
---|
298 | { "sarb", SAR, BYTE },
|
---|
299 | { "sbb", SBB, WORD },
|
---|
300 | { "sbbb", SBB, BYTE },
|
---|
301 | { "scas", SCAS, WORD },
|
---|
302 | { "scasb", SCAS, BYTE },
|
---|
303 | { "seta", SETA, BYTE },
|
---|
304 | { "setae", SETAE, BYTE },
|
---|
305 | { "setb", SETB, BYTE },
|
---|
306 | { "setbe", SETBE, BYTE },
|
---|
307 | { "sete", SETE, BYTE },
|
---|
308 | { "setg", SETG, BYTE },
|
---|
309 | { "setge", SETGE, BYTE },
|
---|
310 | { "setl", SETL, BYTE },
|
---|
311 | { "setna", SETBE, BYTE },
|
---|
312 | { "setnae", SETB, BYTE },
|
---|
313 | { "setnb", SETAE, BYTE },
|
---|
314 | { "setnbe", SETA, BYTE },
|
---|
315 | { "setne", SETNE, BYTE },
|
---|
316 | { "setng", SETLE, BYTE },
|
---|
317 | { "setnge", SETL, BYTE },
|
---|
318 | { "setnl", SETGE, BYTE },
|
---|
319 | { "setnle", SETG, BYTE },
|
---|
320 | { "setno", SETNO, BYTE },
|
---|
321 | { "setnp", SETNP, BYTE },
|
---|
322 | { "setns", SETNS, BYTE },
|
---|
323 | { "seto", SETO, BYTE },
|
---|
324 | { "setp", SETP, BYTE },
|
---|
325 | { "sets", SETS, BYTE },
|
---|
326 | { "setz", SETE, BYTE },
|
---|
327 | { "sgdt", SGDT, WORD },
|
---|
328 | { "shl", SHL, WORD },
|
---|
329 | { "shlb", SHL, BYTE },
|
---|
330 | { "shld", SHLD, WORD },
|
---|
331 | { "shr", SHR, WORD },
|
---|
332 | { "shrb", SHR, BYTE },
|
---|
333 | { "shrd", SHRD, WORD },
|
---|
334 | { "sidt", SIDT, WORD },
|
---|
335 | { "sldt", SLDT, WORD },
|
---|
336 | { "smsw", SMSW, WORD },
|
---|
337 | { "stc", STC, WORD },
|
---|
338 | { "std", STD, WORD },
|
---|
339 | { "sti", STI, WORD },
|
---|
340 | { "stos", STOS, WORD },
|
---|
341 | { "stosb", STOS, BYTE },
|
---|
342 | { "str", STR, WORD },
|
---|
343 | { "sub", SUB, WORD },
|
---|
344 | { "subb", SUB, BYTE },
|
---|
345 | { "test", TEST, WORD },
|
---|
346 | { "testb", TEST, BYTE },
|
---|
347 | { "verr", VERR, WORD },
|
---|
348 | { "verw", VERW, WORD },
|
---|
349 | { "wait", WAIT, WORD },
|
---|
350 | { "wbinvd", WBINVD, WORD },
|
---|
351 | { "xadd", XADD, WORD },
|
---|
352 | { "xchg", XCHG, WORD },
|
---|
353 | { "xchgb", XCHG, BYTE },
|
---|
354 | { "xlat", XLAT, WORD },
|
---|
355 | { "xor", XOR, WORD },
|
---|
356 | { "xorb", XOR, BYTE },
|
---|
357 | };
|
---|
358 |
|
---|
359 | static enum dialect { ACK, NCC } dialect= ACK;
|
---|
360 |
|
---|
361 | void ack_parse_init(char *file)
|
---|
362 | /* Prepare parsing of an ACK assembly file. */
|
---|
363 | {
|
---|
364 | tok_init(file, '!');
|
---|
365 | }
|
---|
366 |
|
---|
367 | void ncc_parse_init(char *file)
|
---|
368 | /* Prepare parsing of an ACK Xenix assembly file. See emit_ack.c for comments
|
---|
369 | * on this fine assembly dialect.
|
---|
370 | */
|
---|
371 | {
|
---|
372 | dialect= NCC;
|
---|
373 | ack_parse_init(file);
|
---|
374 | }
|
---|
375 |
|
---|
376 | static void zap(void)
|
---|
377 | /* An error, zap the rest of the line. */
|
---|
378 | {
|
---|
379 | token_t *t;
|
---|
380 |
|
---|
381 | while ((t= get_token(0))->type != T_EOF && t->symbol != ';')
|
---|
382 | skip_token(1);
|
---|
383 | }
|
---|
384 |
|
---|
385 | static mnemonic_t *search_mnem(char *name)
|
---|
386 | /* Binary search for a mnemonic. (That's why the table is sorted.) */
|
---|
387 | {
|
---|
388 | int low, mid, high;
|
---|
389 | int cmp;
|
---|
390 | mnemonic_t *m;
|
---|
391 |
|
---|
392 | low= 0;
|
---|
393 | high= arraysize(mnemtab)-1;
|
---|
394 | while (low <= high) {
|
---|
395 | mid= (low + high) / 2;
|
---|
396 | m= &mnemtab[mid];
|
---|
397 |
|
---|
398 | if ((cmp= strcmp(name, m->name)) == 0) return m;
|
---|
399 |
|
---|
400 | if (cmp < 0) high= mid-1; else low= mid+1;
|
---|
401 | }
|
---|
402 | return nil;
|
---|
403 | }
|
---|
404 |
|
---|
405 | static expression_t *ack_get_C_expression(int *pn)
|
---|
406 | /* Read a "C-like" expression. Note that we don't worry about precedence,
|
---|
407 | * the expression is printed later like it is read. If the target language
|
---|
408 | * does not have all the operators (like ~) then this has to be repaired by
|
---|
409 | * changing the source file. (No problem, you still have one source file
|
---|
410 | * to maintain, not two.)
|
---|
411 | */
|
---|
412 | {
|
---|
413 | expression_t *e, *a1, *a2;
|
---|
414 | token_t *t;
|
---|
415 |
|
---|
416 | if ((t= get_token(*pn))->symbol == '[') {
|
---|
417 | /* [ expr ]: grouping. */
|
---|
418 | (*pn)++;
|
---|
419 | if ((a1= ack_get_C_expression(pn)) == nil) return nil;
|
---|
420 | if (get_token(*pn)->symbol != ']') {
|
---|
421 | parse_err(1, t, "missing ]\n");
|
---|
422 | del_expr(a1);
|
---|
423 | return nil;
|
---|
424 | }
|
---|
425 | (*pn)++;
|
---|
426 | e= new_expr();
|
---|
427 | e->operator= '[';
|
---|
428 | e->middle= a1;
|
---|
429 | } else
|
---|
430 | if (t->type == T_WORD || t->type == T_STRING) {
|
---|
431 | /* Label, number, or string. */
|
---|
432 | e= new_expr();
|
---|
433 | e->operator= t->type == T_WORD ? 'W' : 'S';
|
---|
434 | e->name= allocate(nil, (t->len+1) * sizeof(e->name[0]));
|
---|
435 | memcpy(e->name, t->name, t->len+1);
|
---|
436 | e->len= t->len;
|
---|
437 | (*pn)++;
|
---|
438 | } else
|
---|
439 | if (t->symbol == '+' || t->symbol == '-' || t->symbol == '~') {
|
---|
440 | /* Unary operator. */
|
---|
441 | (*pn)++;
|
---|
442 | if ((a1= ack_get_C_expression(pn)) == nil) return nil;
|
---|
443 | e= new_expr();
|
---|
444 | e->operator= t->symbol;
|
---|
445 | e->middle= a1;
|
---|
446 | } else {
|
---|
447 | parse_err(1, t, "expression syntax error\n");
|
---|
448 | return nil;
|
---|
449 | }
|
---|
450 |
|
---|
451 | switch ((t= get_token(*pn))->symbol) {
|
---|
452 | case '+':
|
---|
453 | case '-':
|
---|
454 | case '*':
|
---|
455 | case '/':
|
---|
456 | case '%':
|
---|
457 | case '&':
|
---|
458 | case '|':
|
---|
459 | case '^':
|
---|
460 | case S_LEFTSHIFT:
|
---|
461 | case S_RIGHTSHIFT:
|
---|
462 | (*pn)++;
|
---|
463 | a1= e;
|
---|
464 | if ((a2= ack_get_C_expression(pn)) == nil) {
|
---|
465 | del_expr(a1);
|
---|
466 | return nil;
|
---|
467 | }
|
---|
468 | e= new_expr();
|
---|
469 | e->operator= t->symbol;
|
---|
470 | e->left= a1;
|
---|
471 | e->right= a2;
|
---|
472 | }
|
---|
473 | return e;
|
---|
474 | }
|
---|
475 |
|
---|
476 | static expression_t *ack_get_operand(int *pn, int deref)
|
---|
477 | /* Get something like: (memory), offset(base)(index*scale), or simpler. */
|
---|
478 | {
|
---|
479 | expression_t *e, *offset, *base, *index;
|
---|
480 | token_t *t;
|
---|
481 | int c;
|
---|
482 |
|
---|
483 | /* Is it (memory)? */
|
---|
484 | if (get_token(*pn)->symbol == '('
|
---|
485 | && ((t= get_token(*pn + 1))->type != T_WORD
|
---|
486 | || !isregister(t->name))
|
---|
487 | ) {
|
---|
488 | /* A memory dereference. */
|
---|
489 | (*pn)++;
|
---|
490 | if ((offset= ack_get_C_expression(pn)) == nil) return nil;
|
---|
491 | if (get_token(*pn)->symbol != ')') {
|
---|
492 | parse_err(1, t, "operand syntax error\n");
|
---|
493 | del_expr(offset);
|
---|
494 | return nil;
|
---|
495 | }
|
---|
496 | (*pn)++;
|
---|
497 | e= new_expr();
|
---|
498 | e->operator= '(';
|
---|
499 | e->middle= offset;
|
---|
500 | return e;
|
---|
501 | }
|
---|
502 |
|
---|
503 | /* #constant? */
|
---|
504 | if (dialect == NCC && deref
|
---|
505 | && ((c= get_token(*pn)->symbol) == '#' || c == '*')) {
|
---|
506 | /* NCC: mov ax,#constant -> ACK: mov ax,constant */
|
---|
507 | (*pn)++;
|
---|
508 | return ack_get_C_expression(pn);
|
---|
509 | }
|
---|
510 |
|
---|
511 | /* @address? */
|
---|
512 | if (dialect == NCC && get_token(*pn)->symbol == '@') {
|
---|
513 | /* NCC: jmp @address -> ACK: jmp (address) */
|
---|
514 | (*pn)++;
|
---|
515 | if ((offset= ack_get_operand(pn, deref)) == nil) return nil;
|
---|
516 | e= new_expr();
|
---|
517 | e->operator= '(';
|
---|
518 | e->middle= offset;
|
---|
519 | return e;
|
---|
520 | }
|
---|
521 |
|
---|
522 | /* Offset? */
|
---|
523 | if (get_token(*pn)->symbol != '(') {
|
---|
524 | /* There is an offset. */
|
---|
525 | if ((offset= ack_get_C_expression(pn)) == nil) return nil;
|
---|
526 | } else {
|
---|
527 | /* No offset. */
|
---|
528 | offset= nil;
|
---|
529 | }
|
---|
530 |
|
---|
531 | /* (base)? */
|
---|
532 | if (get_token(*pn)->symbol == '('
|
---|
533 | && (t= get_token(*pn + 1))->type == T_WORD
|
---|
534 | && isregister(t->name)
|
---|
535 | && get_token(*pn + 2)->symbol == ')'
|
---|
536 | ) {
|
---|
537 | /* A base register expression. */
|
---|
538 | base= new_expr();
|
---|
539 | base->operator= 'B';
|
---|
540 | base->name= copystr(t->name);
|
---|
541 | (*pn)+= 3;
|
---|
542 | } else {
|
---|
543 | /* No base register expression. */
|
---|
544 | base= nil;
|
---|
545 | }
|
---|
546 |
|
---|
547 | /* (index*scale)? */
|
---|
548 | if (get_token(*pn)->symbol == '(') {
|
---|
549 | /* An index most likely. */
|
---|
550 | token_t *m= nil;
|
---|
551 |
|
---|
552 | if (!( /* This must be true: */
|
---|
553 | (t= get_token(*pn + 1))->type == T_WORD
|
---|
554 | && isregister(t->name)
|
---|
555 | && (get_token(*pn + 2)->symbol == ')' || (
|
---|
556 | get_token(*pn + 2)->symbol == '*'
|
---|
557 | && (m= get_token(*pn + 3))->type == T_WORD
|
---|
558 | && strchr("1248", m->name[0]) != nil
|
---|
559 | && m->name[1] == 0
|
---|
560 | && get_token(*pn + 4)->symbol == ')'
|
---|
561 | ))
|
---|
562 | )) {
|
---|
563 | /* Alas it isn't */
|
---|
564 | parse_err(1, t, "operand syntax error\n");
|
---|
565 | del_expr(offset);
|
---|
566 | del_expr(base);
|
---|
567 | return nil;
|
---|
568 | }
|
---|
569 | /* Found an index. */
|
---|
570 | index= new_expr();
|
---|
571 | index->operator= m == nil ? '1' : m->name[0];
|
---|
572 | index->name= copystr(t->name);
|
---|
573 | (*pn)+= (m == nil ? 3 : 5);
|
---|
574 | } else {
|
---|
575 | /* No index. */
|
---|
576 | index= nil;
|
---|
577 | }
|
---|
578 |
|
---|
579 | if (dialect == NCC && deref && base == nil && index == nil
|
---|
580 | && !(offset != nil && offset->operator == 'W'
|
---|
581 | && isregister(offset->name))
|
---|
582 | ) {
|
---|
583 | /* NCC: mov ax,thing -> ACK mov ax,(thing) */
|
---|
584 | e= new_expr();
|
---|
585 | e->operator= '(';
|
---|
586 | e->middle= offset;
|
---|
587 | return e;
|
---|
588 | }
|
---|
589 |
|
---|
590 | if (base == nil && index == nil) {
|
---|
591 | /* Return a lone offset as is. */
|
---|
592 | e= offset;
|
---|
593 | } else {
|
---|
594 | e= new_expr();
|
---|
595 | e->operator= 'O';
|
---|
596 | e->left= offset;
|
---|
597 | e->middle= base;
|
---|
598 | e->right= index;
|
---|
599 | }
|
---|
600 | return e;
|
---|
601 | }
|
---|
602 |
|
---|
603 | static expression_t *ack_get_oplist(int *pn, int deref)
|
---|
604 | /* Get a comma (or colon for jmpf and callf) separated list of instruction
|
---|
605 | * operands.
|
---|
606 | */
|
---|
607 | {
|
---|
608 | expression_t *e, *o1, *o2;
|
---|
609 | token_t *t;
|
---|
610 |
|
---|
611 | if ((e= ack_get_operand(pn, deref)) == nil) return nil;
|
---|
612 |
|
---|
613 | if ((t= get_token(*pn))->symbol == ',' || t->symbol == ':') {
|
---|
614 | o1= e;
|
---|
615 | (*pn)++;
|
---|
616 | if ((o2= ack_get_oplist(pn, deref)) == nil) {
|
---|
617 | del_expr(o1);
|
---|
618 | return nil;
|
---|
619 | }
|
---|
620 | e= new_expr();
|
---|
621 | e->operator= ',';
|
---|
622 | e->left= o1;
|
---|
623 | e->right= o2;
|
---|
624 | }
|
---|
625 | return e;
|
---|
626 | }
|
---|
627 |
|
---|
628 | static asm86_t *ack_get_statement(void)
|
---|
629 | /* Get a pseudo op or machine instruction with arguments. */
|
---|
630 | {
|
---|
631 | token_t *t= get_token(0);
|
---|
632 | asm86_t *a;
|
---|
633 | mnemonic_t *m;
|
---|
634 | int n;
|
---|
635 | int prefix_seen;
|
---|
636 | int oaz_prefix;
|
---|
637 | int deref;
|
---|
638 |
|
---|
639 | assert(t->type == T_WORD);
|
---|
640 |
|
---|
641 | if (strcmp(t->name, ".sect") == 0) {
|
---|
642 | /* .sect .text etc. Accept only four segment names. */
|
---|
643 | skip_token(1);
|
---|
644 | t= get_token(0);
|
---|
645 | if (t->type != T_WORD || (
|
---|
646 | strcmp(t->name, ".text") != 0
|
---|
647 | && strcmp(t->name, ".rom") != 0
|
---|
648 | && strcmp(t->name, ".data") != 0
|
---|
649 | && strcmp(t->name, ".bss") != 0
|
---|
650 | && strcmp(t->name, ".end") != 0
|
---|
651 | )) {
|
---|
652 | parse_err(1, t, "weird section name to .sect\n");
|
---|
653 | return nil;
|
---|
654 | }
|
---|
655 | }
|
---|
656 | a= new_asm86();
|
---|
657 |
|
---|
658 | /* Process instruction prefixes. */
|
---|
659 | oaz_prefix= 0;
|
---|
660 | for (prefix_seen= 0;; prefix_seen= 1) {
|
---|
661 | if (strcmp(t->name, "o16") == 0) {
|
---|
662 | if (use16()) {
|
---|
663 | parse_err(1, t, "o16 in an 8086 section\n");
|
---|
664 | }
|
---|
665 | oaz_prefix|= OPZ;
|
---|
666 | } else
|
---|
667 | if (strcmp(t->name, "o32") == 0) {
|
---|
668 | if (use32()) {
|
---|
669 | parse_err(1, t, "o32 in an 80386 section\n");
|
---|
670 | }
|
---|
671 | oaz_prefix|= OPZ;
|
---|
672 | } else
|
---|
673 | if (strcmp(t->name, "a16") == 0) {
|
---|
674 | if (use16()) {
|
---|
675 | parse_err(1, t, "a16 in an 8086 section\n");
|
---|
676 | }
|
---|
677 | oaz_prefix|= ADZ;
|
---|
678 | } else
|
---|
679 | if (strcmp(t->name, "a32") == 0) {
|
---|
680 | if (use32()) {
|
---|
681 | parse_err(1, t, "a32 in an 80386 section\n");
|
---|
682 | }
|
---|
683 | oaz_prefix|= ADZ;
|
---|
684 | } else
|
---|
685 | if (strcmp(t->name, "rep") == 0
|
---|
686 | || strcmp(t->name, "repe") == 0
|
---|
687 | || strcmp(t->name, "repne") == 0
|
---|
688 | || strcmp(t->name, "repz") == 0
|
---|
689 | || strcmp(t->name, "repnz") == 0
|
---|
690 | ) {
|
---|
691 | if (a->rep != ONCE) {
|
---|
692 | parse_err(1, t,
|
---|
693 | "can't have more than one rep\n");
|
---|
694 | }
|
---|
695 | switch (t->name[3]) {
|
---|
696 | case 0: a->rep= REP; break;
|
---|
697 | case 'e':
|
---|
698 | case 'z': a->rep= REPE; break;
|
---|
699 | case 'n': a->rep= REPNE; break;
|
---|
700 | }
|
---|
701 | } else
|
---|
702 | if (strchr("cdefgs", t->name[0]) != nil
|
---|
703 | && strcmp(t->name+1, "seg") == 0) {
|
---|
704 | if (a->seg != DEFSEG) {
|
---|
705 | parse_err(1, t,
|
---|
706 | "can't have more than one segment prefix\n");
|
---|
707 | }
|
---|
708 | switch (t->name[0]) {
|
---|
709 | case 'c': a->seg= CSEG; break;
|
---|
710 | case 'd': a->seg= DSEG; break;
|
---|
711 | case 'e': a->seg= ESEG; break;
|
---|
712 | case 'f': a->seg= FSEG; break;
|
---|
713 | case 'g': a->seg= GSEG; break;
|
---|
714 | case 's': a->seg= SSEG; break;
|
---|
715 | }
|
---|
716 | } else
|
---|
717 | if (!prefix_seen) {
|
---|
718 | /* No prefix here, get out! */
|
---|
719 | break;
|
---|
720 | } else {
|
---|
721 | /* No more prefixes, next must be an instruction. */
|
---|
722 | if (t->type != T_WORD
|
---|
723 | || (m= search_mnem(t->name)) == nil
|
---|
724 | || m->optype == PSEUDO
|
---|
725 | ) {
|
---|
726 | parse_err(1, t,
|
---|
727 | "machine instruction expected after instruction prefix\n");
|
---|
728 | del_asm86(a);
|
---|
729 | return nil;
|
---|
730 | }
|
---|
731 | if (oaz_prefix != 0 && m->optype != JUMP
|
---|
732 | && m->optype != WORD) {
|
---|
733 | parse_err(1, t,
|
---|
734 | "'%s' can't have an operand size prefix\n", m->name);
|
---|
735 | }
|
---|
736 | break;
|
---|
737 | }
|
---|
738 |
|
---|
739 | /* Skip the prefix and extra newlines. */
|
---|
740 | do {
|
---|
741 | skip_token(1);
|
---|
742 | } while ((t= get_token(0))->symbol == ';');
|
---|
743 | }
|
---|
744 |
|
---|
745 | /* All the readahead being done upsets the line counter. */
|
---|
746 | a->line= t->line;
|
---|
747 |
|
---|
748 | /* Read a machine instruction or pseudo op. */
|
---|
749 | if ((m= search_mnem(t->name)) == nil) {
|
---|
750 | parse_err(1, t, "unknown instruction '%s'\n", t->name);
|
---|
751 | del_asm86(a);
|
---|
752 | return nil;
|
---|
753 | }
|
---|
754 | a->opcode= m->opcode;
|
---|
755 | a->optype= m->optype;
|
---|
756 | a->oaz= oaz_prefix;
|
---|
757 |
|
---|
758 | switch (a->opcode) {
|
---|
759 | case IN:
|
---|
760 | case OUT:
|
---|
761 | case INT:
|
---|
762 | deref= 0;
|
---|
763 | break;
|
---|
764 | default:
|
---|
765 | deref= (a->optype >= BYTE);
|
---|
766 | }
|
---|
767 | n= 1;
|
---|
768 | if (get_token(1)->symbol != ';'
|
---|
769 | && (a->args= ack_get_oplist(&n, deref)) == nil) {
|
---|
770 | del_asm86(a);
|
---|
771 | return nil;
|
---|
772 | }
|
---|
773 | if (get_token(n)->symbol != ';') {
|
---|
774 | parse_err(1, t, "garbage at end of instruction\n");
|
---|
775 | del_asm86(a);
|
---|
776 | return nil;
|
---|
777 | }
|
---|
778 | switch (a->opcode) {
|
---|
779 | case DOT_ALIGN:
|
---|
780 | /* Restrict .align to have a single numeric argument, some
|
---|
781 | * assemblers think of the argument as a power of two, so
|
---|
782 | * we need to be able to change the value.
|
---|
783 | */
|
---|
784 | if (a->args == nil || a->args->operator != 'W'
|
---|
785 | || !isanumber(a->args->name)) {
|
---|
786 | parse_err(1, t,
|
---|
787 | ".align is restricted to one numeric argument\n");
|
---|
788 | del_asm86(a);
|
---|
789 | return nil;
|
---|
790 | }
|
---|
791 | break;
|
---|
792 | case JMPF:
|
---|
793 | case CALLF:
|
---|
794 | /* NCC jmpf off,seg -> ACK jmpf seg:off */
|
---|
795 | if (dialect == NCC && a->args != nil
|
---|
796 | && a->args->operator == ',') {
|
---|
797 | expression_t *t;
|
---|
798 |
|
---|
799 | t= a->args->left;
|
---|
800 | a->args->left= a->args->right;
|
---|
801 | a->args->right= t;
|
---|
802 | break;
|
---|
803 | }
|
---|
804 | /*FALL THROUGH*/
|
---|
805 | case JMP:
|
---|
806 | case CALL:
|
---|
807 | /* NCC jmp @(reg) -> ACK jmp (reg) */
|
---|
808 | if (dialect == NCC && a->args != nil && (
|
---|
809 | (a->args->operator == '('
|
---|
810 | && a->args->middle != nil
|
---|
811 | && a->args->middle->operator == 'O')
|
---|
812 | || (a->args->operator == 'O'
|
---|
813 | && a->args->left == nil
|
---|
814 | && a->args->middle != nil
|
---|
815 | && a->args->right == nil)
|
---|
816 | )) {
|
---|
817 | expression_t *t;
|
---|
818 |
|
---|
819 | t= a->args;
|
---|
820 | a->args= a->args->middle;
|
---|
821 | t->middle= nil;
|
---|
822 | del_expr(t);
|
---|
823 | if (a->args->operator == 'B') a->args->operator= 'W';
|
---|
824 | }
|
---|
825 | break;
|
---|
826 | default:;
|
---|
827 | }
|
---|
828 | skip_token(n+1);
|
---|
829 | return a;
|
---|
830 | }
|
---|
831 |
|
---|
832 | asm86_t *ack_get_instruction(void)
|
---|
833 | {
|
---|
834 | asm86_t *a= nil;
|
---|
835 | expression_t *e;
|
---|
836 | token_t *t;
|
---|
837 |
|
---|
838 | while ((t= get_token(0))->symbol == ';')
|
---|
839 | skip_token(1);
|
---|
840 |
|
---|
841 | if (t->type == T_EOF) return nil;
|
---|
842 |
|
---|
843 | if (t->symbol == '#') {
|
---|
844 | /* Preprocessor line and file change. */
|
---|
845 |
|
---|
846 | if ((t= get_token(1))->type != T_WORD || !isanumber(t->name)
|
---|
847 | || get_token(2)->type != T_STRING
|
---|
848 | ) {
|
---|
849 | parse_err(1, t, "file not preprocessed?\n");
|
---|
850 | zap();
|
---|
851 | } else {
|
---|
852 | set_file(get_token(2)->name,
|
---|
853 | strtol(get_token(1)->name, nil, 0) - 1);
|
---|
854 |
|
---|
855 | /* GNU CPP adds extra cruft, simply zap the line. */
|
---|
856 | zap();
|
---|
857 | }
|
---|
858 | a= ack_get_instruction();
|
---|
859 | } else
|
---|
860 | if (t->type == T_WORD && get_token(1)->symbol == ':') {
|
---|
861 | /* A label definition. */
|
---|
862 | a= new_asm86();
|
---|
863 | a->line= t->line;
|
---|
864 | a->opcode= DOT_LABEL;
|
---|
865 | a->optype= PSEUDO;
|
---|
866 | a->args= e= new_expr();
|
---|
867 | e->operator= ':';
|
---|
868 | e->name= copystr(t->name);
|
---|
869 | skip_token(2);
|
---|
870 | } else
|
---|
871 | if (t->type == T_WORD && get_token(1)->symbol == '=') {
|
---|
872 | int n= 2;
|
---|
873 |
|
---|
874 | if ((e= ack_get_C_expression(&n)) == nil) {
|
---|
875 | zap();
|
---|
876 | a= ack_get_instruction();
|
---|
877 | } else
|
---|
878 | if (get_token(n)->symbol != ';') {
|
---|
879 | parse_err(1, t, "garbage after assignment\n");
|
---|
880 | zap();
|
---|
881 | a= ack_get_instruction();
|
---|
882 | } else {
|
---|
883 | a= new_asm86();
|
---|
884 | a->line= t->line;
|
---|
885 | a->opcode= DOT_EQU;
|
---|
886 | a->optype= PSEUDO;
|
---|
887 | a->args= new_expr();
|
---|
888 | a->args->operator= '=';
|
---|
889 | a->args->name= copystr(t->name);
|
---|
890 | a->args->middle= e;
|
---|
891 | skip_token(n+1);
|
---|
892 | }
|
---|
893 | } else
|
---|
894 | if (t->type == T_WORD) {
|
---|
895 | if ((a= ack_get_statement()) == nil) {
|
---|
896 | zap();
|
---|
897 | a= ack_get_instruction();
|
---|
898 | }
|
---|
899 | } else {
|
---|
900 | parse_err(1, t, "syntax error\n");
|
---|
901 | zap();
|
---|
902 | a= ack_get_instruction();
|
---|
903 | }
|
---|
904 | return a;
|
---|
905 | }
|
---|
906 |
|
---|
907 | asm86_t *ncc_get_instruction(void)
|
---|
908 | {
|
---|
909 | return ack_get_instruction();
|
---|
910 | }
|
---|