1 | /* parse_bas.c - parse BCC AS assembly Author: Kees J. Bot
|
---|
2 | * 13 Nov 1994
|
---|
3 | */
|
---|
4 | #define nil 0
|
---|
5 | #include <stdio.h>
|
---|
6 | #include <stdlib.h>
|
---|
7 | #include <string.h>
|
---|
8 | #include <assert.h>
|
---|
9 | #include "asmconv.h"
|
---|
10 | #include "token.h"
|
---|
11 | #include "asm86.h"
|
---|
12 | #include "languages.h"
|
---|
13 |
|
---|
14 | typedef struct mnemonic { /* BAS mnemonics translation table. */
|
---|
15 | char *name;
|
---|
16 | opcode_t opcode;
|
---|
17 | optype_t optype;
|
---|
18 | } mnemonic_t;
|
---|
19 |
|
---|
20 | static mnemonic_t mnemtab[] = { /* This array is sorted. */
|
---|
21 | { ".align", DOT_ALIGN, PSEUDO },
|
---|
22 | { ".ascii", DOT_ASCII, PSEUDO },
|
---|
23 | { ".asciz", DOT_ASCIZ, PSEUDO },
|
---|
24 | { ".assert", DOT_ASSERT, PSEUDO },
|
---|
25 | { ".base", DOT_BASE, PSEUDO },
|
---|
26 | { ".blkb", DOT_SPACE, PSEUDO },
|
---|
27 | { ".bss", DOT_BSS, PSEUDO },
|
---|
28 | { ".byte", DOT_DATA1, PSEUDO },
|
---|
29 | { ".comm", DOT_COMM, PSEUDO },
|
---|
30 | { ".data", DOT_DATA, PSEUDO },
|
---|
31 | { ".define", DOT_DEFINE, PSEUDO },
|
---|
32 | { ".end", DOT_END, PSEUDO },
|
---|
33 | { ".even", DOT_ALIGN, PSEUDO },
|
---|
34 | { ".extern", DOT_EXTERN, PSEUDO },
|
---|
35 | { ".file", DOT_FILE, PSEUDO },
|
---|
36 | { ".globl", DOT_DEFINE, PSEUDO },
|
---|
37 | { ".lcomm", DOT_LCOMM, PSEUDO },
|
---|
38 | { ".line", DOT_LINE, PSEUDO },
|
---|
39 | { ".list", DOT_LIST, PSEUDO },
|
---|
40 | { ".long", DOT_DATA4, PSEUDO },
|
---|
41 | { ".nolist", DOT_NOLIST, PSEUDO },
|
---|
42 | { ".rom", DOT_ROM, PSEUDO },
|
---|
43 | { ".space", DOT_SPACE, PSEUDO },
|
---|
44 | { ".symb", DOT_SYMB, PSEUDO },
|
---|
45 | { ".text", DOT_TEXT, PSEUDO },
|
---|
46 | { ".use16", DOT_USE16, PSEUDO },
|
---|
47 | { ".use32", DOT_USE32, PSEUDO },
|
---|
48 | { ".word", DOT_DATA2, PSEUDO },
|
---|
49 | { ".zerob", DOT_SPACE, PSEUDO },
|
---|
50 | { ".zerow", DOT_SPACE, PSEUDO },
|
---|
51 | { "aaa", AAA, WORD },
|
---|
52 | { "aad", AAD, WORD },
|
---|
53 | { "aam", AAM, WORD },
|
---|
54 | { "aas", AAS, WORD },
|
---|
55 | { "adc", ADC, WORD },
|
---|
56 | { "add", ADD, WORD },
|
---|
57 | { "and", AND, WORD },
|
---|
58 | { "arpl", ARPL, WORD },
|
---|
59 | { "bc", JB, JUMP },
|
---|
60 | { "beq", JE, JUMP },
|
---|
61 | { "bge", JGE, JUMP },
|
---|
62 | { "bgt", JG, JUMP },
|
---|
63 | { "bhi", JA, JUMP },
|
---|
64 | { "bhis", JAE, JUMP },
|
---|
65 | { "ble", JLE, JUMP },
|
---|
66 | { "blo", JB, JUMP },
|
---|
67 | { "blos", JBE, JUMP },
|
---|
68 | { "blt", JL, JUMP },
|
---|
69 | { "bnc", JAE, JUMP },
|
---|
70 | { "bne", JNE, JUMP },
|
---|
71 | { "bound", BOUND, WORD },
|
---|
72 | { "br", JMP, JUMP },
|
---|
73 | { "bsf", BSF, WORD },
|
---|
74 | { "bsr", BSR, WORD },
|
---|
75 | { "bswap", BSWAP, WORD },
|
---|
76 | { "bt", BT, WORD },
|
---|
77 | { "btc", BTC, WORD },
|
---|
78 | { "btr", BTR, WORD },
|
---|
79 | { "bts", BTS, WORD },
|
---|
80 | { "bz", JE, JUMP },
|
---|
81 | { "call", CALL, JUMP },
|
---|
82 | { "callf", CALLF, JUMP },
|
---|
83 | { "cbw", CBW, WORD },
|
---|
84 | { "cdq", CWD, WORD },
|
---|
85 | { "clc", CLC, WORD },
|
---|
86 | { "cld", CLD, WORD },
|
---|
87 | { "cli", CLI, WORD },
|
---|
88 | { "clts", CLTS, WORD },
|
---|
89 | { "cmc", CMC, WORD },
|
---|
90 | { "cmp", CMP, WORD },
|
---|
91 | { "cmps", CMPS, WORD },
|
---|
92 | { "cmpsb", CMPS, BYTE },
|
---|
93 | { "cmpxchg", CMPXCHG, WORD },
|
---|
94 | { "cwd", CWD, WORD },
|
---|
95 | { "cwde", CBW, WORD },
|
---|
96 | { "daa", DAA, WORD },
|
---|
97 | { "das", DAS, WORD },
|
---|
98 | { "dd", DOT_DATA4, PSEUDO },
|
---|
99 | { "dec", DEC, WORD },
|
---|
100 | { "div", DIV, WORD },
|
---|
101 | { "enter", ENTER, WORD },
|
---|
102 | { "export", DOT_DEFINE, PSEUDO },
|
---|
103 | { "f2xm1", F2XM1, WORD },
|
---|
104 | { "fabs", FABS, WORD },
|
---|
105 | { "fadd", FADD, WORD },
|
---|
106 | { "faddd", FADDD, WORD },
|
---|
107 | { "faddp", FADDP, WORD },
|
---|
108 | { "fadds", FADDS, WORD },
|
---|
109 | { "fbld", FBLD, WORD },
|
---|
110 | { "fbstp", FBSTP, WORD },
|
---|
111 | { "fchs", FCHS, WORD },
|
---|
112 | { "fclex", FCLEX, WORD },
|
---|
113 | { "fcomd", FCOMD, WORD },
|
---|
114 | { "fcompd", FCOMPD, WORD },
|
---|
115 | { "fcompp", FCOMPP, WORD },
|
---|
116 | { "fcomps", FCOMPS, WORD },
|
---|
117 | { "fcoms", FCOMS, WORD },
|
---|
118 | { "fcos", FCOS, WORD },
|
---|
119 | { "fdecstp", FDECSTP, WORD },
|
---|
120 | { "fdivd", FDIVD, WORD },
|
---|
121 | { "fdivp", FDIVP, WORD },
|
---|
122 | { "fdivrd", FDIVRD, WORD },
|
---|
123 | { "fdivrp", FDIVRP, WORD },
|
---|
124 | { "fdivrs", FDIVRS, WORD },
|
---|
125 | { "fdivs", FDIVS, WORD },
|
---|
126 | { "ffree", FFREE, WORD },
|
---|
127 | { "fiaddl", FIADDL, WORD },
|
---|
128 | { "fiadds", FIADDS, WORD },
|
---|
129 | { "ficom", FICOM, WORD },
|
---|
130 | { "ficomp", FICOMP, WORD },
|
---|
131 | { "fidivl", FIDIVL, WORD },
|
---|
132 | { "fidivrl", FIDIVRL, WORD },
|
---|
133 | { "fidivrs", FIDIVRS, WORD },
|
---|
134 | { "fidivs", FIDIVS, WORD },
|
---|
135 | { "fildl", FILDL, WORD },
|
---|
136 | { "fildq", FILDQ, WORD },
|
---|
137 | { "filds", FILDS, WORD },
|
---|
138 | { "fimull", FIMULL, WORD },
|
---|
139 | { "fimuls", FIMULS, WORD },
|
---|
140 | { "fincstp", FINCSTP, WORD },
|
---|
141 | { "finit", FINIT, WORD },
|
---|
142 | { "fistl", FISTL, WORD },
|
---|
143 | { "fistp", FISTP, WORD },
|
---|
144 | { "fists", FISTS, WORD },
|
---|
145 | { "fisubl", FISUBL, WORD },
|
---|
146 | { "fisubrl", FISUBRL, WORD },
|
---|
147 | { "fisubrs", FISUBRS, WORD },
|
---|
148 | { "fisubs", FISUBS, WORD },
|
---|
149 | { "fld1", FLD1, WORD },
|
---|
150 | { "fldcw", FLDCW, WORD },
|
---|
151 | { "fldd", FLDD, WORD },
|
---|
152 | { "fldenv", FLDENV, WORD },
|
---|
153 | { "fldl2e", FLDL2E, WORD },
|
---|
154 | { "fldl2t", FLDL2T, WORD },
|
---|
155 | { "fldlg2", FLDLG2, WORD },
|
---|
156 | { "fldln2", FLDLN2, WORD },
|
---|
157 | { "fldpi", FLDPI, WORD },
|
---|
158 | { "flds", FLDS, WORD },
|
---|
159 | { "fldx", FLDX, WORD },
|
---|
160 | { "fldz", FLDZ, WORD },
|
---|
161 | { "fmuld", FMULD, WORD },
|
---|
162 | { "fmulp", FMULP, WORD },
|
---|
163 | { "fmuls", FMULS, WORD },
|
---|
164 | { "fnop", FNOP, WORD },
|
---|
165 | { "fpatan", FPATAN, WORD },
|
---|
166 | { "fprem", FPREM, WORD },
|
---|
167 | { "fprem1", FPREM1, WORD },
|
---|
168 | { "fptan", FPTAN, WORD },
|
---|
169 | { "frndint", FRNDINT, WORD },
|
---|
170 | { "frstor", FRSTOR, WORD },
|
---|
171 | { "fsave", FSAVE, WORD },
|
---|
172 | { "fscale", FSCALE, WORD },
|
---|
173 | { "fsin", FSIN, WORD },
|
---|
174 | { "fsincos", FSINCOS, WORD },
|
---|
175 | { "fsqrt", FSQRT, WORD },
|
---|
176 | { "fstcw", FSTCW, WORD },
|
---|
177 | { "fstd", FSTD, WORD },
|
---|
178 | { "fstenv", FSTENV, WORD },
|
---|
179 | { "fstpd", FSTPD, WORD },
|
---|
180 | { "fstps", FSTPS, WORD },
|
---|
181 | { "fstpx", FSTPX, WORD },
|
---|
182 | { "fsts", FSTS, WORD },
|
---|
183 | { "fstsw", FSTSW, WORD },
|
---|
184 | { "fsubd", FSUBD, WORD },
|
---|
185 | { "fsubp", FSUBP, WORD },
|
---|
186 | { "fsubpr", FSUBPR, WORD },
|
---|
187 | { "fsubrd", FSUBRD, WORD },
|
---|
188 | { "fsubrs", FSUBRS, WORD },
|
---|
189 | { "fsubs", FSUBS, WORD },
|
---|
190 | { "ftst", FTST, WORD },
|
---|
191 | { "fucom", FUCOM, WORD },
|
---|
192 | { "fucomp", FUCOMP, WORD },
|
---|
193 | { "fucompp", FUCOMPP, WORD },
|
---|
194 | { "fxam", FXAM, WORD },
|
---|
195 | { "fxch", FXCH, WORD },
|
---|
196 | { "fxtract", FXTRACT, WORD },
|
---|
197 | { "fyl2x", FYL2X, WORD },
|
---|
198 | { "fyl2xp1", FYL2XP1, WORD },
|
---|
199 | { "hlt", HLT, WORD },
|
---|
200 | { "idiv", IDIV, WORD },
|
---|
201 | { "imul", IMUL, WORD },
|
---|
202 | { "in", IN, WORD },
|
---|
203 | { "inb", IN, BYTE },
|
---|
204 | { "inc", INC, WORD },
|
---|
205 | { "ins", INS, WORD },
|
---|
206 | { "insb", INS, BYTE },
|
---|
207 | { "int", INT, WORD },
|
---|
208 | { "into", INTO, JUMP },
|
---|
209 | { "invd", INVD, WORD },
|
---|
210 | { "invlpg", INVLPG, WORD },
|
---|
211 | { "iret", IRET, JUMP },
|
---|
212 | { "iretd", IRETD, JUMP },
|
---|
213 | { "j", JMP, JUMP },
|
---|
214 | { "ja", JA, JUMP },
|
---|
215 | { "jae", JAE, JUMP },
|
---|
216 | { "jb", JB, JUMP },
|
---|
217 | { "jbe", JBE, JUMP },
|
---|
218 | { "jc", JB, JUMP },
|
---|
219 | { "jcxz", JCXZ, JUMP },
|
---|
220 | { "je", JE, JUMP },
|
---|
221 | { "jecxz", JCXZ, JUMP },
|
---|
222 | { "jeq", JE, JUMP },
|
---|
223 | { "jg", JG, JUMP },
|
---|
224 | { "jge", JGE, JUMP },
|
---|
225 | { "jgt", JG, JUMP },
|
---|
226 | { "jhi", JA, JUMP },
|
---|
227 | { "jhis", JAE, JUMP },
|
---|
228 | { "jl", JL, JUMP },
|
---|
229 | { "jle", JLE, JUMP },
|
---|
230 | { "jlo", JB, JUMP },
|
---|
231 | { "jlos", JBE, JUMP },
|
---|
232 | { "jlt", JL, JUMP },
|
---|
233 | { "jmp", JMP, JUMP },
|
---|
234 | { "jmpf", JMPF, JUMP },
|
---|
235 | { "jna", JBE, JUMP },
|
---|
236 | { "jnae", JB, JUMP },
|
---|
237 | { "jnb", JAE, JUMP },
|
---|
238 | { "jnbe", JA, JUMP },
|
---|
239 | { "jnc", JAE, JUMP },
|
---|
240 | { "jne", JNE, JUMP },
|
---|
241 | { "jng", JLE, JUMP },
|
---|
242 | { "jnge", JL, JUMP },
|
---|
243 | { "jnl", JGE, JUMP },
|
---|
244 | { "jnle", JG, JUMP },
|
---|
245 | { "jno", JNO, JUMP },
|
---|
246 | { "jnp", JNP, JUMP },
|
---|
247 | { "jns", JNS, JUMP },
|
---|
248 | { "jnz", JNE, JUMP },
|
---|
249 | { "jo", JO, JUMP },
|
---|
250 | { "jp", JP, JUMP },
|
---|
251 | { "js", JS, JUMP },
|
---|
252 | { "jz", JE, JUMP },
|
---|
253 | { "lahf", LAHF, WORD },
|
---|
254 | { "lar", LAR, WORD },
|
---|
255 | { "lds", LDS, WORD },
|
---|
256 | { "lea", LEA, WORD },
|
---|
257 | { "leave", LEAVE, WORD },
|
---|
258 | { "les", LES, WORD },
|
---|
259 | { "lfs", LFS, WORD },
|
---|
260 | { "lgdt", LGDT, WORD },
|
---|
261 | { "lgs", LGS, WORD },
|
---|
262 | { "lidt", LIDT, WORD },
|
---|
263 | { "lldt", LLDT, WORD },
|
---|
264 | { "lmsw", LMSW, WORD },
|
---|
265 | { "lock", LOCK, WORD },
|
---|
266 | { "lods", LODS, WORD },
|
---|
267 | { "lodsb", LODS, BYTE },
|
---|
268 | { "loop", LOOP, JUMP },
|
---|
269 | { "loope", LOOPE, JUMP },
|
---|
270 | { "loopne", LOOPNE, JUMP },
|
---|
271 | { "loopnz", LOOPNE, JUMP },
|
---|
272 | { "loopz", LOOPE, JUMP },
|
---|
273 | { "lsl", LSL, WORD },
|
---|
274 | { "lss", LSS, WORD },
|
---|
275 | { "ltr", LTR, WORD },
|
---|
276 | { "mov", MOV, WORD },
|
---|
277 | { "movs", MOVS, WORD },
|
---|
278 | { "movsb", MOVS, BYTE },
|
---|
279 | { "movsx", MOVSX, WORD },
|
---|
280 | { "movzx", MOVZX, WORD },
|
---|
281 | { "mul", MUL, WORD },
|
---|
282 | { "neg", NEG, WORD },
|
---|
283 | { "nop", NOP, WORD },
|
---|
284 | { "not", NOT, WORD },
|
---|
285 | { "or", OR, WORD },
|
---|
286 | { "out", OUT, WORD },
|
---|
287 | { "outb", OUT, BYTE },
|
---|
288 | { "outs", OUTS, WORD },
|
---|
289 | { "outsb", OUTS, BYTE },
|
---|
290 | { "pop", POP, WORD },
|
---|
291 | { "popa", POPA, WORD },
|
---|
292 | { "popad", POPA, WORD },
|
---|
293 | { "popf", POPF, WORD },
|
---|
294 | { "popfd", POPF, WORD },
|
---|
295 | { "push", PUSH, WORD },
|
---|
296 | { "pusha", PUSHA, WORD },
|
---|
297 | { "pushad", PUSHA, WORD },
|
---|
298 | { "pushf", PUSHF, WORD },
|
---|
299 | { "pushfd", PUSHF, WORD },
|
---|
300 | { "rcl", RCL, WORD },
|
---|
301 | { "rcr", RCR, WORD },
|
---|
302 | { "ret", RET, JUMP },
|
---|
303 | { "retf", RETF, JUMP },
|
---|
304 | { "rol", ROL, WORD },
|
---|
305 | { "ror", ROR, WORD },
|
---|
306 | { "sahf", SAHF, WORD },
|
---|
307 | { "sal", SAL, WORD },
|
---|
308 | { "sar", SAR, WORD },
|
---|
309 | { "sbb", SBB, WORD },
|
---|
310 | { "scas", SCAS, WORD },
|
---|
311 | { "seta", SETA, BYTE },
|
---|
312 | { "setae", SETAE, BYTE },
|
---|
313 | { "setb", SETB, BYTE },
|
---|
314 | { "setbe", SETBE, BYTE },
|
---|
315 | { "sete", SETE, BYTE },
|
---|
316 | { "setg", SETG, BYTE },
|
---|
317 | { "setge", SETGE, BYTE },
|
---|
318 | { "setl", SETL, BYTE },
|
---|
319 | { "setna", SETBE, BYTE },
|
---|
320 | { "setnae", SETB, BYTE },
|
---|
321 | { "setnb", SETAE, BYTE },
|
---|
322 | { "setnbe", SETA, BYTE },
|
---|
323 | { "setne", SETNE, BYTE },
|
---|
324 | { "setng", SETLE, BYTE },
|
---|
325 | { "setnge", SETL, BYTE },
|
---|
326 | { "setnl", SETGE, BYTE },
|
---|
327 | { "setnle", SETG, BYTE },
|
---|
328 | { "setno", SETNO, BYTE },
|
---|
329 | { "setnp", SETNP, BYTE },
|
---|
330 | { "setns", SETNS, BYTE },
|
---|
331 | { "seto", SETO, BYTE },
|
---|
332 | { "setp", SETP, BYTE },
|
---|
333 | { "sets", SETS, BYTE },
|
---|
334 | { "setz", SETE, BYTE },
|
---|
335 | { "sgdt", SGDT, WORD },
|
---|
336 | { "shl", SHL, WORD },
|
---|
337 | { "shld", SHLD, WORD },
|
---|
338 | { "shr", SHR, WORD },
|
---|
339 | { "shrd", SHRD, WORD },
|
---|
340 | { "sidt", SIDT, WORD },
|
---|
341 | { "sldt", SLDT, WORD },
|
---|
342 | { "smsw", SMSW, WORD },
|
---|
343 | { "stc", STC, WORD },
|
---|
344 | { "std", STD, WORD },
|
---|
345 | { "sti", STI, WORD },
|
---|
346 | { "stos", STOS, WORD },
|
---|
347 | { "stosb", STOS, BYTE },
|
---|
348 | { "str", STR, WORD },
|
---|
349 | { "sub", SUB, WORD },
|
---|
350 | { "test", TEST, WORD },
|
---|
351 | { "verr", VERR, WORD },
|
---|
352 | { "verw", VERW, WORD },
|
---|
353 | { "wait", WAIT, WORD },
|
---|
354 | { "wbinvd", WBINVD, WORD },
|
---|
355 | { "xadd", XADD, WORD },
|
---|
356 | { "xchg", XCHG, WORD },
|
---|
357 | { "xlat", XLAT, WORD },
|
---|
358 | { "xor", XOR, WORD },
|
---|
359 | };
|
---|
360 |
|
---|
361 | void bas_parse_init(char *file)
|
---|
362 | /* Prepare parsing of an BAS assembly file. */
|
---|
363 | {
|
---|
364 | tok_init(file, '!');
|
---|
365 | }
|
---|
366 |
|
---|
367 | static void zap(void)
|
---|
368 | /* An error, zap the rest of the line. */
|
---|
369 | {
|
---|
370 | token_t *t;
|
---|
371 |
|
---|
372 | while ((t= get_token(0))->type != T_EOF && t->symbol != ';')
|
---|
373 | skip_token(1);
|
---|
374 | }
|
---|
375 |
|
---|
376 | static mnemonic_t *search_mnem(char *name)
|
---|
377 | /* Binary search for a mnemonic. (That's why the table is sorted.) */
|
---|
378 | {
|
---|
379 | int low, mid, high;
|
---|
380 | int cmp;
|
---|
381 | mnemonic_t *m;
|
---|
382 |
|
---|
383 | low= 0;
|
---|
384 | high= arraysize(mnemtab)-1;
|
---|
385 | while (low <= high) {
|
---|
386 | mid= (low + high) / 2;
|
---|
387 | m= &mnemtab[mid];
|
---|
388 |
|
---|
389 | if ((cmp= strcmp(name, m->name)) == 0) return m;
|
---|
390 |
|
---|
391 | if (cmp < 0) high= mid-1; else low= mid+1;
|
---|
392 | }
|
---|
393 | return nil;
|
---|
394 | }
|
---|
395 |
|
---|
396 | static expression_t *bas_get_C_expression(int *pn)
|
---|
397 | /* Read a "C-like" expression. Note that we don't worry about precedence,
|
---|
398 | * the expression is printed later like it is read. If the target language
|
---|
399 | * does not have all the operators (like ~) then this has to be repaired by
|
---|
400 | * changing the source file. (No problem, you still have one source file
|
---|
401 | * to maintain, not two.)
|
---|
402 | */
|
---|
403 | {
|
---|
404 | expression_t *e, *a1, *a2;
|
---|
405 | token_t *t;
|
---|
406 |
|
---|
407 | if ((t= get_token(*pn))->symbol == '(') {
|
---|
408 | /* ( expr ): grouping. */
|
---|
409 | (*pn)++;
|
---|
410 | if ((a1= bas_get_C_expression(pn)) == nil) return nil;
|
---|
411 | if (get_token(*pn)->symbol != ')') {
|
---|
412 | parse_err(1, t, "missing )\n");
|
---|
413 | del_expr(a1);
|
---|
414 | return nil;
|
---|
415 | }
|
---|
416 | (*pn)++;
|
---|
417 | e= new_expr();
|
---|
418 | e->operator= '[';
|
---|
419 | e->middle= a1;
|
---|
420 | } else
|
---|
421 | if (t->type == T_WORD || t->type == T_STRING) {
|
---|
422 | /* Label, number, or string. */
|
---|
423 | e= new_expr();
|
---|
424 | e->operator= t->type == T_WORD ? 'W' : 'S';
|
---|
425 | e->name= allocate(nil, (t->len+1) * sizeof(e->name[0]));
|
---|
426 | memcpy(e->name, t->name, t->len+1);
|
---|
427 | e->len= t->len;
|
---|
428 | (*pn)++;
|
---|
429 | } else
|
---|
430 | if (t->symbol == '+' || t->symbol == '-' || t->symbol == '~') {
|
---|
431 | /* Unary operator. */
|
---|
432 | (*pn)++;
|
---|
433 | if ((a1= bas_get_C_expression(pn)) == nil) return nil;
|
---|
434 | e= new_expr();
|
---|
435 | e->operator= t->symbol;
|
---|
436 | e->middle= a1;
|
---|
437 | } else
|
---|
438 | if (t->symbol == '$' && get_token(*pn + 1)->type == T_WORD) {
|
---|
439 | /* A hexadecimal number. */
|
---|
440 | t= get_token(*pn + 1);
|
---|
441 | e= new_expr();
|
---|
442 | e->operator= 'W';
|
---|
443 | e->name= allocate(nil, (t->len+3) * sizeof(e->name[0]));
|
---|
444 | strcpy(e->name, "0x");
|
---|
445 | memcpy(e->name+2, t->name, t->len+1);
|
---|
446 | e->len= t->len+2;
|
---|
447 | (*pn)+= 2;
|
---|
448 | } else {
|
---|
449 | parse_err(1, t, "expression syntax error\n");
|
---|
450 | return nil;
|
---|
451 | }
|
---|
452 |
|
---|
453 | switch ((t= get_token(*pn))->symbol) {
|
---|
454 | case '+':
|
---|
455 | case '-':
|
---|
456 | case '*':
|
---|
457 | case '/':
|
---|
458 | case '%':
|
---|
459 | case '&':
|
---|
460 | case '|':
|
---|
461 | case '^':
|
---|
462 | case S_LEFTSHIFT:
|
---|
463 | case S_RIGHTSHIFT:
|
---|
464 | (*pn)++;
|
---|
465 | a1= e;
|
---|
466 | if ((a2= bas_get_C_expression(pn)) == nil) {
|
---|
467 | del_expr(a1);
|
---|
468 | return nil;
|
---|
469 | }
|
---|
470 | e= new_expr();
|
---|
471 | e->operator= t->symbol;
|
---|
472 | e->left= a1;
|
---|
473 | e->right= a2;
|
---|
474 | }
|
---|
475 | return e;
|
---|
476 | }
|
---|
477 |
|
---|
478 | /* We want to know the sizes of the first two operands. */
|
---|
479 | static optype_t optypes[2];
|
---|
480 | static int op_idx;
|
---|
481 |
|
---|
482 | static expression_t *bas_get_operand(int *pn)
|
---|
483 | /* Get something like: [memory], offset[base+index*scale], or simpler. */
|
---|
484 | {
|
---|
485 | expression_t *e, *offset, *base, *index;
|
---|
486 | token_t *t;
|
---|
487 | int c;
|
---|
488 | optype_t optype;
|
---|
489 |
|
---|
490 | /* Prefixed by 'byte', 'word' or 'dword'? */
|
---|
491 | if ((t= get_token(*pn))->type == T_WORD && (
|
---|
492 | strcmp(t->name, "byte") == 0
|
---|
493 | || strcmp(t->name, "word") == 0
|
---|
494 | || strcmp(t->name, "dword") == 0)
|
---|
495 | ) {
|
---|
496 | switch (t->name[0]) {
|
---|
497 | case 'b': optype= BYTE; break;
|
---|
498 | case 'w': optype= use16() ? WORD : OWORD; break;
|
---|
499 | case 'd': optype= use32() ? WORD : OWORD; break;
|
---|
500 | }
|
---|
501 | if (op_idx < arraysize(optypes)) optypes[op_idx++]= optype;
|
---|
502 | (*pn)++;
|
---|
503 |
|
---|
504 | /* It may even be "byte ptr"... */
|
---|
505 | if ((t= get_token(*pn))->type == T_WORD
|
---|
506 | && strcmp(t->name, "ptr") == 0) {
|
---|
507 | (*pn)++;
|
---|
508 | }
|
---|
509 | }
|
---|
510 |
|
---|
511 | /* Is it [memory]? */
|
---|
512 | if (get_token(*pn)->symbol == '['
|
---|
513 | && ((t= get_token(*pn + 1))->type != T_WORD
|
---|
514 | || !isregister(t->name))
|
---|
515 | ) {
|
---|
516 | /* A memory dereference. */
|
---|
517 | (*pn)++;
|
---|
518 | if ((offset= bas_get_C_expression(pn)) == nil) return nil;
|
---|
519 | if (get_token(*pn)->symbol != ']') {
|
---|
520 | parse_err(1, t, "operand syntax error\n");
|
---|
521 | del_expr(offset);
|
---|
522 | return nil;
|
---|
523 | }
|
---|
524 | (*pn)++;
|
---|
525 | e= new_expr();
|
---|
526 | e->operator= '(';
|
---|
527 | e->middle= offset;
|
---|
528 | return e;
|
---|
529 | }
|
---|
530 |
|
---|
531 | /* #something? *something? */
|
---|
532 | if ((c= get_token(*pn)->symbol) == '#' || c == '*') {
|
---|
533 | /* '#' and '*' are often used to introduce some constant. */
|
---|
534 | (*pn)++;
|
---|
535 | }
|
---|
536 |
|
---|
537 | /* Offset? */
|
---|
538 | if (get_token(*pn)->symbol != '[') {
|
---|
539 | /* There is an offset. */
|
---|
540 | if ((offset= bas_get_C_expression(pn)) == nil) return nil;
|
---|
541 | } else {
|
---|
542 | /* No offset. */
|
---|
543 | offset= nil;
|
---|
544 | }
|
---|
545 |
|
---|
546 | /* [base]? [base+? base-? */
|
---|
547 | c= 0;
|
---|
548 | if (get_token(*pn)->symbol == '['
|
---|
549 | && (t= get_token(*pn + 1))->type == T_WORD
|
---|
550 | && isregister(t->name)
|
---|
551 | && ((c= get_token(*pn + 2)->symbol) == ']' || c=='+' || c=='-')
|
---|
552 | ) {
|
---|
553 | /* A base register expression. */
|
---|
554 | base= new_expr();
|
---|
555 | base->operator= 'B';
|
---|
556 | base->name= copystr(t->name);
|
---|
557 | (*pn)+= c == ']' ? 3 : 2;
|
---|
558 | } else {
|
---|
559 | /* No base register expression. */
|
---|
560 | base= nil;
|
---|
561 | }
|
---|
562 |
|
---|
563 | /* +offset]? -offset]? */
|
---|
564 | if (offset == nil
|
---|
565 | && (c == '+' || c == '-')
|
---|
566 | && (t= get_token(*pn + 1))->type == T_WORD
|
---|
567 | && !isregister(t->name)
|
---|
568 | ) {
|
---|
569 | (*pn)++;
|
---|
570 | if ((offset= bas_get_C_expression(pn)) == nil) return nil;
|
---|
571 | if (get_token(*pn)->symbol != ']') {
|
---|
572 | parse_err(1, t, "operand syntax error\n");
|
---|
573 | del_expr(offset);
|
---|
574 | del_expr(base);
|
---|
575 | return nil;
|
---|
576 | }
|
---|
577 | (*pn)++;
|
---|
578 | c= 0;
|
---|
579 | }
|
---|
580 |
|
---|
581 | /* [index*scale]? +index*scale]? */
|
---|
582 | if (c == '+' || get_token(*pn)->symbol == '[') {
|
---|
583 | /* An index most likely. */
|
---|
584 | token_t *m= nil;
|
---|
585 |
|
---|
586 | if (!( /* This must be true: */
|
---|
587 | (t= get_token(*pn + 1))->type == T_WORD
|
---|
588 | && isregister(t->name)
|
---|
589 | && (get_token(*pn + 2)->symbol == ']' || (
|
---|
590 | get_token(*pn + 2)->symbol == '*'
|
---|
591 | && (m= get_token(*pn + 3))->type == T_WORD
|
---|
592 | && strchr("1248", m->name[0]) != nil
|
---|
593 | && m->name[1] == 0
|
---|
594 | && get_token(*pn + 4)->symbol == ']'
|
---|
595 | ))
|
---|
596 | )) {
|
---|
597 | /* Alas it isn't */
|
---|
598 | parse_err(1, t, "operand syntax error\n");
|
---|
599 | del_expr(offset);
|
---|
600 | del_expr(base);
|
---|
601 | return nil;
|
---|
602 | }
|
---|
603 | /* Found an index. */
|
---|
604 | index= new_expr();
|
---|
605 | index->operator= m == nil ? '1' : m->name[0];
|
---|
606 | index->name= copystr(t->name);
|
---|
607 | (*pn)+= (m == nil ? 3 : 5);
|
---|
608 | } else {
|
---|
609 | /* No index. */
|
---|
610 | index= nil;
|
---|
611 | }
|
---|
612 |
|
---|
613 | if (base == nil && index == nil) {
|
---|
614 | /* Return a lone offset as is. */
|
---|
615 | e= offset;
|
---|
616 |
|
---|
617 | /* Lone registers tell operand size. */
|
---|
618 | if (offset->operator == 'W' && isregister(offset->name)) {
|
---|
619 | switch (isregister(offset->name)) {
|
---|
620 | case 1: optype= BYTE; break;
|
---|
621 | case 2: optype= use16() ? WORD : OWORD; break;
|
---|
622 | case 4: optype= use32() ? WORD : OWORD; break;
|
---|
623 | }
|
---|
624 | if (op_idx < arraysize(optypes))
|
---|
625 | optypes[op_idx++]= optype;
|
---|
626 | }
|
---|
627 | } else {
|
---|
628 | e= new_expr();
|
---|
629 | e->operator= 'O';
|
---|
630 | e->left= offset;
|
---|
631 | e->middle= base;
|
---|
632 | e->right= index;
|
---|
633 | }
|
---|
634 | return e;
|
---|
635 | }
|
---|
636 |
|
---|
637 | static expression_t *bas_get_oplist(int *pn)
|
---|
638 | /* Get a comma (or colon for jmpf and callf) separated list of instruction
|
---|
639 | * operands.
|
---|
640 | */
|
---|
641 | {
|
---|
642 | expression_t *e, *o1, *o2;
|
---|
643 | token_t *t;
|
---|
644 |
|
---|
645 | if ((e= bas_get_operand(pn)) == nil) return nil;
|
---|
646 |
|
---|
647 | if ((t= get_token(*pn))->symbol == ',' || t->symbol == ':') {
|
---|
648 | o1= e;
|
---|
649 | (*pn)++;
|
---|
650 | if ((o2= bas_get_oplist(pn)) == nil) {
|
---|
651 | del_expr(o1);
|
---|
652 | return nil;
|
---|
653 | }
|
---|
654 | e= new_expr();
|
---|
655 | e->operator= ',';
|
---|
656 | e->left= o1;
|
---|
657 | e->right= o2;
|
---|
658 | }
|
---|
659 | return e;
|
---|
660 | }
|
---|
661 |
|
---|
662 | static asm86_t *bas_get_statement(void)
|
---|
663 | /* Get a pseudo op or machine instruction with arguments. */
|
---|
664 | {
|
---|
665 | token_t *t= get_token(0);
|
---|
666 | asm86_t *a;
|
---|
667 | mnemonic_t *m;
|
---|
668 | int n;
|
---|
669 | int prefix_seen;
|
---|
670 |
|
---|
671 |
|
---|
672 | assert(t->type == T_WORD);
|
---|
673 |
|
---|
674 | if (strcmp(t->name, ".sect") == 0) {
|
---|
675 | /* .sect .text etc. Accept only four segment names. */
|
---|
676 | skip_token(1);
|
---|
677 | t= get_token(0);
|
---|
678 | if (t->type != T_WORD || (
|
---|
679 | strcmp(t->name, ".text") != 0
|
---|
680 | && strcmp(t->name, ".rom") != 0
|
---|
681 | && strcmp(t->name, ".data") != 0
|
---|
682 | && strcmp(t->name, ".bss") != 0
|
---|
683 | && strcmp(t->name, ".end") != 0
|
---|
684 | )) {
|
---|
685 | parse_err(1, t, "weird section name to .sect\n");
|
---|
686 | return nil;
|
---|
687 | }
|
---|
688 | }
|
---|
689 | a= new_asm86();
|
---|
690 |
|
---|
691 | /* Process instruction prefixes. */
|
---|
692 | for (prefix_seen= 0;; prefix_seen= 1) {
|
---|
693 | if (strcmp(t->name, "rep") == 0
|
---|
694 | || strcmp(t->name, "repe") == 0
|
---|
695 | || strcmp(t->name, "repne") == 0
|
---|
696 | || strcmp(t->name, "repz") == 0
|
---|
697 | || strcmp(t->name, "repnz") == 0
|
---|
698 | ) {
|
---|
699 | if (a->rep != ONCE) {
|
---|
700 | parse_err(1, t,
|
---|
701 | "can't have more than one rep\n");
|
---|
702 | }
|
---|
703 | switch (t->name[3]) {
|
---|
704 | case 0: a->rep= REP; break;
|
---|
705 | case 'e':
|
---|
706 | case 'z': a->rep= REPE; break;
|
---|
707 | case 'n': a->rep= REPNE; break;
|
---|
708 | }
|
---|
709 | } else
|
---|
710 | if (strcmp(t->name, "seg") == 0
|
---|
711 | && get_token(1)->type == T_WORD) {
|
---|
712 | if (a->seg != DEFSEG) {
|
---|
713 | parse_err(1, t,
|
---|
714 | "can't have more than one segment prefix\n");
|
---|
715 | }
|
---|
716 | switch (get_token(1)->name[0]) {
|
---|
717 | case 'c': a->seg= CSEG; break;
|
---|
718 | case 'd': a->seg= DSEG; break;
|
---|
719 | case 'e': a->seg= ESEG; break;
|
---|
720 | case 'f': a->seg= FSEG; break;
|
---|
721 | case 'g': a->seg= GSEG; break;
|
---|
722 | case 's': a->seg= SSEG; break;
|
---|
723 | }
|
---|
724 | skip_token(1);
|
---|
725 | } else
|
---|
726 | if (!prefix_seen) {
|
---|
727 | /* No prefix here, get out! */
|
---|
728 | break;
|
---|
729 | } else {
|
---|
730 | /* No more prefixes, next must be an instruction. */
|
---|
731 | if (t->type != T_WORD
|
---|
732 | || (m= search_mnem(t->name)) == nil
|
---|
733 | || m->optype == PSEUDO
|
---|
734 | ) {
|
---|
735 | parse_err(1, t,
|
---|
736 | "machine instruction expected after instruction prefix\n");
|
---|
737 | del_asm86(a);
|
---|
738 | return nil;
|
---|
739 | }
|
---|
740 | break;
|
---|
741 | }
|
---|
742 |
|
---|
743 | /* Skip the prefix and extra newlines. */
|
---|
744 | do {
|
---|
745 | skip_token(1);
|
---|
746 | } while ((t= get_token(0))->symbol == ';');
|
---|
747 | }
|
---|
748 |
|
---|
749 | /* All the readahead being done upsets the line counter. */
|
---|
750 | a->line= t->line;
|
---|
751 |
|
---|
752 | /* Read a machine instruction or pseudo op. */
|
---|
753 | if ((m= search_mnem(t->name)) == nil) {
|
---|
754 | parse_err(1, t, "unknown instruction '%s'\n", t->name);
|
---|
755 | del_asm86(a);
|
---|
756 | return nil;
|
---|
757 | }
|
---|
758 | a->opcode= m->opcode;
|
---|
759 | a->optype= m->optype;
|
---|
760 | if (a->opcode == CBW || a->opcode == CWD) {
|
---|
761 | a->optype= (strcmp(t->name, "cbw") == 0
|
---|
762 | || strcmp(t->name, "cwd") == 0) == use16() ? WORD : OWORD;
|
---|
763 | }
|
---|
764 | for (op_idx= 0; op_idx < arraysize(optypes); op_idx++)
|
---|
765 | optypes[op_idx]= m->optype;
|
---|
766 | op_idx= 0;
|
---|
767 |
|
---|
768 | n= 1;
|
---|
769 | if (get_token(1)->symbol != ';'
|
---|
770 | && (a->args= bas_get_oplist(&n)) == nil) {
|
---|
771 | del_asm86(a);
|
---|
772 | return nil;
|
---|
773 | }
|
---|
774 |
|
---|
775 | if (m->optype == WORD) {
|
---|
776 | /* Does one of the operands overide the optype? */
|
---|
777 | for (op_idx= 0; op_idx < arraysize(optypes); op_idx++) {
|
---|
778 | if (optypes[op_idx] != m->optype)
|
---|
779 | a->optype= optypes[op_idx];
|
---|
780 | }
|
---|
781 | }
|
---|
782 |
|
---|
783 | if (get_token(n)->symbol != ';') {
|
---|
784 | parse_err(1, t, "garbage at end of instruction\n");
|
---|
785 | del_asm86(a);
|
---|
786 | return nil;
|
---|
787 | }
|
---|
788 | switch (a->opcode) {
|
---|
789 | case DOT_ALIGN:
|
---|
790 | /* Restrict .align to have a single numeric argument, some
|
---|
791 | * assemblers think of the argument as a power of two, so
|
---|
792 | * we need to be able to change the value.
|
---|
793 | */
|
---|
794 | if (strcmp(t->name, ".even") == 0 && a->args == nil) {
|
---|
795 | /* .even becomes .align 2. */
|
---|
796 | expression_t *e;
|
---|
797 | a->args= e= new_expr();
|
---|
798 | e->operator= 'W';
|
---|
799 | e->name= copystr("2");
|
---|
800 | e->len= 2;
|
---|
801 | }
|
---|
802 | if (a->args == nil || a->args->operator != 'W'
|
---|
803 | || !isanumber(a->args->name)) {
|
---|
804 | parse_err(1, t,
|
---|
805 | ".align is restricted to one numeric argument\n");
|
---|
806 | del_asm86(a);
|
---|
807 | return nil;
|
---|
808 | }
|
---|
809 | break;
|
---|
810 | case MOVSX:
|
---|
811 | case MOVZX:
|
---|
812 | /* Types of both operands tell the instruction type. */
|
---|
813 | a->optype= optypes[0];
|
---|
814 | if (optypes[1] == BYTE) {
|
---|
815 | a->opcode= a->opcode == MOVSX ? MOVSXB : MOVZXB;
|
---|
816 | }
|
---|
817 | break;
|
---|
818 | case SAL:
|
---|
819 | case SAR:
|
---|
820 | case SHL:
|
---|
821 | case SHR:
|
---|
822 | case RCL:
|
---|
823 | case RCR:
|
---|
824 | case ROL:
|
---|
825 | case ROR:
|
---|
826 | /* Only the first operand tells the operand size. */
|
---|
827 | a->optype= optypes[0];
|
---|
828 | break;
|
---|
829 | default:;
|
---|
830 | }
|
---|
831 | skip_token(n+1);
|
---|
832 | return a;
|
---|
833 | }
|
---|
834 |
|
---|
835 | asm86_t *bas_get_instruction(void)
|
---|
836 | {
|
---|
837 | asm86_t *a= nil;
|
---|
838 | expression_t *e;
|
---|
839 | token_t *t;
|
---|
840 |
|
---|
841 | while ((t= get_token(0))->symbol == ';')
|
---|
842 | skip_token(1);
|
---|
843 |
|
---|
844 | if (t->type == T_EOF) return nil;
|
---|
845 |
|
---|
846 | if (t->symbol == '#') {
|
---|
847 | /* Preprocessor line and file change. */
|
---|
848 |
|
---|
849 | if ((t= get_token(1))->type != T_WORD || !isanumber(t->name)
|
---|
850 | || get_token(2)->type != T_STRING
|
---|
851 | ) {
|
---|
852 | parse_err(1, t, "file not preprocessed?\n");
|
---|
853 | zap();
|
---|
854 | } else {
|
---|
855 | set_file(get_token(2)->name,
|
---|
856 | strtol(get_token(1)->name, nil, 0) - 1);
|
---|
857 |
|
---|
858 | /* GNU CPP adds extra cruft, simply zap the line. */
|
---|
859 | zap();
|
---|
860 | }
|
---|
861 | a= bas_get_instruction();
|
---|
862 | } else
|
---|
863 | if (t->type == T_WORD && get_token(1)->symbol == ':') {
|
---|
864 | /* A label definition. */
|
---|
865 | a= new_asm86();
|
---|
866 | a->line= t->line;
|
---|
867 | a->opcode= DOT_LABEL;
|
---|
868 | a->optype= PSEUDO;
|
---|
869 | a->args= e= new_expr();
|
---|
870 | e->operator= ':';
|
---|
871 | e->name= copystr(t->name);
|
---|
872 | skip_token(2);
|
---|
873 | } else
|
---|
874 | if (t->type == T_WORD && get_token(1)->symbol == '=') {
|
---|
875 | int n= 2;
|
---|
876 |
|
---|
877 | if ((e= bas_get_C_expression(&n)) == nil) {
|
---|
878 | zap();
|
---|
879 | a= bas_get_instruction();
|
---|
880 | } else
|
---|
881 | if (get_token(n)->symbol != ';') {
|
---|
882 | parse_err(1, t, "garbage after assignment\n");
|
---|
883 | zap();
|
---|
884 | a= bas_get_instruction();
|
---|
885 | } else {
|
---|
886 | a= new_asm86();
|
---|
887 | a->line= t->line;
|
---|
888 | a->opcode= DOT_EQU;
|
---|
889 | a->optype= PSEUDO;
|
---|
890 | a->args= new_expr();
|
---|
891 | a->args->operator= '=';
|
---|
892 | a->args->name= copystr(t->name);
|
---|
893 | a->args->middle= e;
|
---|
894 | skip_token(n+1);
|
---|
895 | }
|
---|
896 | } else
|
---|
897 | if (t->type == T_WORD && get_token(1)->type == T_WORD
|
---|
898 | && strcmp(get_token(1)->name, "lcomm") == 0) {
|
---|
899 | /* Local common block definition. */
|
---|
900 | int n= 2;
|
---|
901 |
|
---|
902 | if ((e= bas_get_C_expression(&n)) == nil) {
|
---|
903 | zap();
|
---|
904 | a= bas_get_instruction();
|
---|
905 | } else
|
---|
906 | if (get_token(n)->symbol != ';') {
|
---|
907 | parse_err(1, t, "garbage after lcomm\n");
|
---|
908 | zap();
|
---|
909 | a= bas_get_instruction();
|
---|
910 | } else {
|
---|
911 | a= new_asm86();
|
---|
912 | a->line= t->line;
|
---|
913 | a->opcode= DOT_LCOMM;
|
---|
914 | a->optype= PSEUDO;
|
---|
915 | a->args= new_expr();
|
---|
916 | a->args->operator= ',';
|
---|
917 | a->args->right= e;
|
---|
918 | a->args->left= e= new_expr();
|
---|
919 | e->operator= 'W';
|
---|
920 | e->name= copystr(t->name);
|
---|
921 | e->len= strlen(e->name)+1;
|
---|
922 | skip_token(n+1);
|
---|
923 | }
|
---|
924 | } else
|
---|
925 | if (t->type == T_WORD) {
|
---|
926 | if ((a= bas_get_statement()) == nil) {
|
---|
927 | zap();
|
---|
928 | a= bas_get_instruction();
|
---|
929 | }
|
---|
930 | } else {
|
---|
931 | parse_err(1, t, "syntax error\n");
|
---|
932 | zap();
|
---|
933 | a= bas_get_instruction();
|
---|
934 | }
|
---|
935 | if (a->optype == OWORD) {
|
---|
936 | a->optype= WORD;
|
---|
937 | a->oaz|= OPZ;
|
---|
938 | }
|
---|
939 | return a;
|
---|
940 | }
|
---|