Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

source: trunk/minix/commands/i386/asmconv/parse_bas.c@ 10

Last change on this file since 10 was 9, checked in by Mattia Monga, 15 years ago
Minix 3.1.2a
File size: 22.5 KB

Line
1	/* parse_bas.c - parse BCC AS assembly Author: Kees J. Bot
2	* 13 Nov 1994
3	*/
4	#define nil 0
5	#include <stdio.h>
6	#include <stdlib.h>
7	#include <string.h>
8	#include <assert.h>
9	#include "asmconv.h"
10	#include "token.h"
11	#include "asm86.h"
12	#include "languages.h"
13
14	typedef struct mnemonic { /* BAS mnemonics translation table. */
15	char *name;
16	opcode_t opcode;
17	optype_t optype;
18	} mnemonic_t;
19
20	static mnemonic_t mnemtab[] = { /* This array is sorted. */
21	{ ".align", DOT_ALIGN, PSEUDO },
22	{ ".ascii", DOT_ASCII, PSEUDO },
23	{ ".asciz", DOT_ASCIZ, PSEUDO },
24	{ ".assert", DOT_ASSERT, PSEUDO },
25	{ ".base", DOT_BASE, PSEUDO },
26	{ ".blkb", DOT_SPACE, PSEUDO },
27	{ ".bss", DOT_BSS, PSEUDO },
28	{ ".byte", DOT_DATA1, PSEUDO },
29	{ ".comm", DOT_COMM, PSEUDO },
30	{ ".data", DOT_DATA, PSEUDO },
31	{ ".define", DOT_DEFINE, PSEUDO },
32	{ ".end", DOT_END, PSEUDO },
33	{ ".even", DOT_ALIGN, PSEUDO },
34	{ ".extern", DOT_EXTERN, PSEUDO },
35	{ ".file", DOT_FILE, PSEUDO },
36	{ ".globl", DOT_DEFINE, PSEUDO },
37	{ ".lcomm", DOT_LCOMM, PSEUDO },
38	{ ".line", DOT_LINE, PSEUDO },
39	{ ".list", DOT_LIST, PSEUDO },
40	{ ".long", DOT_DATA4, PSEUDO },
41	{ ".nolist", DOT_NOLIST, PSEUDO },
42	{ ".rom", DOT_ROM, PSEUDO },
43	{ ".space", DOT_SPACE, PSEUDO },
44	{ ".symb", DOT_SYMB, PSEUDO },
45	{ ".text", DOT_TEXT, PSEUDO },
46	{ ".use16", DOT_USE16, PSEUDO },
47	{ ".use32", DOT_USE32, PSEUDO },
48	{ ".word", DOT_DATA2, PSEUDO },
49	{ ".zerob", DOT_SPACE, PSEUDO },
50	{ ".zerow", DOT_SPACE, PSEUDO },
51	{ "aaa", AAA, WORD },
52	{ "aad", AAD, WORD },
53	{ "aam", AAM, WORD },
54	{ "aas", AAS, WORD },
55	{ "adc", ADC, WORD },
56	{ "add", ADD, WORD },
57	{ "and", AND, WORD },
58	{ "arpl", ARPL, WORD },
59	{ "bc", JB, JUMP },
60	{ "beq", JE, JUMP },
61	{ "bge", JGE, JUMP },
62	{ "bgt", JG, JUMP },
63	{ "bhi", JA, JUMP },
64	{ "bhis", JAE, JUMP },
65	{ "ble", JLE, JUMP },
66	{ "blo", JB, JUMP },
67	{ "blos", JBE, JUMP },
68	{ "blt", JL, JUMP },
69	{ "bnc", JAE, JUMP },
70	{ "bne", JNE, JUMP },
71	{ "bound", BOUND, WORD },
72	{ "br", JMP, JUMP },
73	{ "bsf", BSF, WORD },
74	{ "bsr", BSR, WORD },
75	{ "bswap", BSWAP, WORD },
76	{ "bt", BT, WORD },
77	{ "btc", BTC, WORD },
78	{ "btr", BTR, WORD },
79	{ "bts", BTS, WORD },
80	{ "bz", JE, JUMP },
81	{ "call", CALL, JUMP },
82	{ "callf", CALLF, JUMP },
83	{ "cbw", CBW, WORD },
84	{ "cdq", CWD, WORD },
85	{ "clc", CLC, WORD },
86	{ "cld", CLD, WORD },
87	{ "cli", CLI, WORD },
88	{ "clts", CLTS, WORD },
89	{ "cmc", CMC, WORD },
90	{ "cmp", CMP, WORD },
91	{ "cmps", CMPS, WORD },
92	{ "cmpsb", CMPS, BYTE },
93	{ "cmpxchg", CMPXCHG, WORD },
94	{ "cwd", CWD, WORD },
95	{ "cwde", CBW, WORD },
96	{ "daa", DAA, WORD },
97	{ "das", DAS, WORD },
98	{ "dd", DOT_DATA4, PSEUDO },
99	{ "dec", DEC, WORD },
100	{ "div", DIV, WORD },
101	{ "enter", ENTER, WORD },
102	{ "export", DOT_DEFINE, PSEUDO },
103	{ "f2xm1", F2XM1, WORD },
104	{ "fabs", FABS, WORD },
105	{ "fadd", FADD, WORD },
106	{ "faddd", FADDD, WORD },
107	{ "faddp", FADDP, WORD },
108	{ "fadds", FADDS, WORD },
109	{ "fbld", FBLD, WORD },
110	{ "fbstp", FBSTP, WORD },
111	{ "fchs", FCHS, WORD },
112	{ "fclex", FCLEX, WORD },
113	{ "fcomd", FCOMD, WORD },
114	{ "fcompd", FCOMPD, WORD },
115	{ "fcompp", FCOMPP, WORD },
116	{ "fcomps", FCOMPS, WORD },
117	{ "fcoms", FCOMS, WORD },
118	{ "fcos", FCOS, WORD },
119	{ "fdecstp", FDECSTP, WORD },
120	{ "fdivd", FDIVD, WORD },
121	{ "fdivp", FDIVP, WORD },
122	{ "fdivrd", FDIVRD, WORD },
123	{ "fdivrp", FDIVRP, WORD },
124	{ "fdivrs", FDIVRS, WORD },
125	{ "fdivs", FDIVS, WORD },
126	{ "ffree", FFREE, WORD },
127	{ "fiaddl", FIADDL, WORD },
128	{ "fiadds", FIADDS, WORD },
129	{ "ficom", FICOM, WORD },
130	{ "ficomp", FICOMP, WORD },
131	{ "fidivl", FIDIVL, WORD },
132	{ "fidivrl", FIDIVRL, WORD },
133	{ "fidivrs", FIDIVRS, WORD },
134	{ "fidivs", FIDIVS, WORD },
135	{ "fildl", FILDL, WORD },
136	{ "fildq", FILDQ, WORD },
137	{ "filds", FILDS, WORD },
138	{ "fimull", FIMULL, WORD },
139	{ "fimuls", FIMULS, WORD },
140	{ "fincstp", FINCSTP, WORD },
141	{ "finit", FINIT, WORD },
142	{ "fistl", FISTL, WORD },
143	{ "fistp", FISTP, WORD },
144	{ "fists", FISTS, WORD },
145	{ "fisubl", FISUBL, WORD },
146	{ "fisubrl", FISUBRL, WORD },
147	{ "fisubrs", FISUBRS, WORD },
148	{ "fisubs", FISUBS, WORD },
149	{ "fld1", FLD1, WORD },
150	{ "fldcw", FLDCW, WORD },
151	{ "fldd", FLDD, WORD },
152	{ "fldenv", FLDENV, WORD },
153	{ "fldl2e", FLDL2E, WORD },
154	{ "fldl2t", FLDL2T, WORD },
155	{ "fldlg2", FLDLG2, WORD },
156	{ "fldln2", FLDLN2, WORD },
157	{ "fldpi", FLDPI, WORD },
158	{ "flds", FLDS, WORD },
159	{ "fldx", FLDX, WORD },
160	{ "fldz", FLDZ, WORD },
161	{ "fmuld", FMULD, WORD },
162	{ "fmulp", FMULP, WORD },
163	{ "fmuls", FMULS, WORD },
164	{ "fnop", FNOP, WORD },
165	{ "fpatan", FPATAN, WORD },
166	{ "fprem", FPREM, WORD },
167	{ "fprem1", FPREM1, WORD },
168	{ "fptan", FPTAN, WORD },
169	{ "frndint", FRNDINT, WORD },
170	{ "frstor", FRSTOR, WORD },
171	{ "fsave", FSAVE, WORD },
172	{ "fscale", FSCALE, WORD },
173	{ "fsin", FSIN, WORD },
174	{ "fsincos", FSINCOS, WORD },
175	{ "fsqrt", FSQRT, WORD },
176	{ "fstcw", FSTCW, WORD },
177	{ "fstd", FSTD, WORD },
178	{ "fstenv", FSTENV, WORD },
179	{ "fstpd", FSTPD, WORD },
180	{ "fstps", FSTPS, WORD },
181	{ "fstpx", FSTPX, WORD },
182	{ "fsts", FSTS, WORD },
183	{ "fstsw", FSTSW, WORD },
184	{ "fsubd", FSUBD, WORD },
185	{ "fsubp", FSUBP, WORD },
186	{ "fsubpr", FSUBPR, WORD },
187	{ "fsubrd", FSUBRD, WORD },
188	{ "fsubrs", FSUBRS, WORD },
189	{ "fsubs", FSUBS, WORD },
190	{ "ftst", FTST, WORD },
191	{ "fucom", FUCOM, WORD },
192	{ "fucomp", FUCOMP, WORD },
193	{ "fucompp", FUCOMPP, WORD },
194	{ "fxam", FXAM, WORD },
195	{ "fxch", FXCH, WORD },
196	{ "fxtract", FXTRACT, WORD },
197	{ "fyl2x", FYL2X, WORD },
198	{ "fyl2xp1", FYL2XP1, WORD },
199	{ "hlt", HLT, WORD },
200	{ "idiv", IDIV, WORD },
201	{ "imul", IMUL, WORD },
202	{ "in", IN, WORD },
203	{ "inb", IN, BYTE },
204	{ "inc", INC, WORD },
205	{ "ins", INS, WORD },
206	{ "insb", INS, BYTE },
207	{ "int", INT, WORD },
208	{ "into", INTO, JUMP },
209	{ "invd", INVD, WORD },
210	{ "invlpg", INVLPG, WORD },
211	{ "iret", IRET, JUMP },
212	{ "iretd", IRETD, JUMP },
213	{ "j", JMP, JUMP },
214	{ "ja", JA, JUMP },
215	{ "jae", JAE, JUMP },
216	{ "jb", JB, JUMP },
217	{ "jbe", JBE, JUMP },
218	{ "jc", JB, JUMP },
219	{ "jcxz", JCXZ, JUMP },
220	{ "je", JE, JUMP },
221	{ "jecxz", JCXZ, JUMP },
222	{ "jeq", JE, JUMP },
223	{ "jg", JG, JUMP },
224	{ "jge", JGE, JUMP },
225	{ "jgt", JG, JUMP },
226	{ "jhi", JA, JUMP },
227	{ "jhis", JAE, JUMP },
228	{ "jl", JL, JUMP },
229	{ "jle", JLE, JUMP },
230	{ "jlo", JB, JUMP },
231	{ "jlos", JBE, JUMP },
232	{ "jlt", JL, JUMP },
233	{ "jmp", JMP, JUMP },
234	{ "jmpf", JMPF, JUMP },
235	{ "jna", JBE, JUMP },
236	{ "jnae", JB, JUMP },
237	{ "jnb", JAE, JUMP },
238	{ "jnbe", JA, JUMP },
239	{ "jnc", JAE, JUMP },
240	{ "jne", JNE, JUMP },
241	{ "jng", JLE, JUMP },
242	{ "jnge", JL, JUMP },
243	{ "jnl", JGE, JUMP },
244	{ "jnle", JG, JUMP },
245	{ "jno", JNO, JUMP },
246	{ "jnp", JNP, JUMP },
247	{ "jns", JNS, JUMP },
248	{ "jnz", JNE, JUMP },
249	{ "jo", JO, JUMP },
250	{ "jp", JP, JUMP },
251	{ "js", JS, JUMP },
252	{ "jz", JE, JUMP },
253	{ "lahf", LAHF, WORD },
254	{ "lar", LAR, WORD },
255	{ "lds", LDS, WORD },
256	{ "lea", LEA, WORD },
257	{ "leave", LEAVE, WORD },
258	{ "les", LES, WORD },
259	{ "lfs", LFS, WORD },
260	{ "lgdt", LGDT, WORD },
261	{ "lgs", LGS, WORD },
262	{ "lidt", LIDT, WORD },
263	{ "lldt", LLDT, WORD },
264	{ "lmsw", LMSW, WORD },
265	{ "lock", LOCK, WORD },
266	{ "lods", LODS, WORD },
267	{ "lodsb", LODS, BYTE },
268	{ "loop", LOOP, JUMP },
269	{ "loope", LOOPE, JUMP },
270	{ "loopne", LOOPNE, JUMP },
271	{ "loopnz", LOOPNE, JUMP },
272	{ "loopz", LOOPE, JUMP },
273	{ "lsl", LSL, WORD },
274	{ "lss", LSS, WORD },
275	{ "ltr", LTR, WORD },
276	{ "mov", MOV, WORD },
277	{ "movs", MOVS, WORD },
278	{ "movsb", MOVS, BYTE },
279	{ "movsx", MOVSX, WORD },
280	{ "movzx", MOVZX, WORD },
281	{ "mul", MUL, WORD },
282	{ "neg", NEG, WORD },
283	{ "nop", NOP, WORD },
284	{ "not", NOT, WORD },
285	{ "or", OR, WORD },
286	{ "out", OUT, WORD },
287	{ "outb", OUT, BYTE },
288	{ "outs", OUTS, WORD },
289	{ "outsb", OUTS, BYTE },
290	{ "pop", POP, WORD },
291	{ "popa", POPA, WORD },
292	{ "popad", POPA, WORD },
293	{ "popf", POPF, WORD },
294	{ "popfd", POPF, WORD },
295	{ "push", PUSH, WORD },
296	{ "pusha", PUSHA, WORD },
297	{ "pushad", PUSHA, WORD },
298	{ "pushf", PUSHF, WORD },
299	{ "pushfd", PUSHF, WORD },
300	{ "rcl", RCL, WORD },
301	{ "rcr", RCR, WORD },
302	{ "ret", RET, JUMP },
303	{ "retf", RETF, JUMP },
304	{ "rol", ROL, WORD },
305	{ "ror", ROR, WORD },
306	{ "sahf", SAHF, WORD },
307	{ "sal", SAL, WORD },
308	{ "sar", SAR, WORD },
309	{ "sbb", SBB, WORD },
310	{ "scas", SCAS, WORD },
311	{ "seta", SETA, BYTE },
312	{ "setae", SETAE, BYTE },
313	{ "setb", SETB, BYTE },
314	{ "setbe", SETBE, BYTE },
315	{ "sete", SETE, BYTE },
316	{ "setg", SETG, BYTE },
317	{ "setge", SETGE, BYTE },
318	{ "setl", SETL, BYTE },
319	{ "setna", SETBE, BYTE },
320	{ "setnae", SETB, BYTE },
321	{ "setnb", SETAE, BYTE },
322	{ "setnbe", SETA, BYTE },
323	{ "setne", SETNE, BYTE },
324	{ "setng", SETLE, BYTE },
325	{ "setnge", SETL, BYTE },
326	{ "setnl", SETGE, BYTE },
327	{ "setnle", SETG, BYTE },
328	{ "setno", SETNO, BYTE },
329	{ "setnp", SETNP, BYTE },
330	{ "setns", SETNS, BYTE },
331	{ "seto", SETO, BYTE },
332	{ "setp", SETP, BYTE },
333	{ "sets", SETS, BYTE },
334	{ "setz", SETE, BYTE },
335	{ "sgdt", SGDT, WORD },
336	{ "shl", SHL, WORD },
337	{ "shld", SHLD, WORD },
338	{ "shr", SHR, WORD },
339	{ "shrd", SHRD, WORD },
340	{ "sidt", SIDT, WORD },
341	{ "sldt", SLDT, WORD },
342	{ "smsw", SMSW, WORD },
343	{ "stc", STC, WORD },
344	{ "std", STD, WORD },
345	{ "sti", STI, WORD },
346	{ "stos", STOS, WORD },
347	{ "stosb", STOS, BYTE },
348	{ "str", STR, WORD },
349	{ "sub", SUB, WORD },
350	{ "test", TEST, WORD },
351	{ "verr", VERR, WORD },
352	{ "verw", VERW, WORD },
353	{ "wait", WAIT, WORD },
354	{ "wbinvd", WBINVD, WORD },
355	{ "xadd", XADD, WORD },
356	{ "xchg", XCHG, WORD },
357	{ "xlat", XLAT, WORD },
358	{ "xor", XOR, WORD },
359	};
360
361	void bas_parse_init(char *file)
362	/* Prepare parsing of an BAS assembly file. */
363	{
364	tok_init(file, '!');
365	}
366
367	static void zap(void)
368	/* An error, zap the rest of the line. */
369	{
370	token_t *t;
371
372	while ((t= get_token(0))->type != T_EOF && t->symbol != ';')
373	skip_token(1);
374	}
375
376	static mnemonic_t search_mnem(char name)
377	/* Binary search for a mnemonic. (That's why the table is sorted.) */
378	{
379	int low, mid, high;
380	int cmp;
381	mnemonic_t *m;
382
383	low= 0;
384	high= arraysize(mnemtab)-1;
385	while (low <= high) {
386	mid= (low + high) / 2;
387	m= &mnemtab[mid];
388
389	if ((cmp= strcmp(name, m->name)) == 0) return m;
390
391	if (cmp < 0) high= mid-1; else low= mid+1;
392	}
393	return nil;
394	}
395
396	static expression_t bas_get_C_expression(int pn)
397	/* Read a "C-like" expression. Note that we don't worry about precedence,
398	* the expression is printed later like it is read. If the target language
399	* does not have all the operators (like ~) then this has to be repaired by
400	* changing the source file. (No problem, you still have one source file
401	* to maintain, not two.)
402	*/
403	{
404	expression_t e, a1, *a2;
405	token_t *t;
406
407	if ((t= get_token(*pn))->symbol == '(') {
408	/* ( expr ): grouping. */
409	(*pn)++;
410	if ((a1= bas_get_C_expression(pn)) == nil) return nil;
411	if (get_token(*pn)->symbol != ')') {
412	parse_err(1, t, "missing )\n");
413	del_expr(a1);
414	return nil;
415	}
416	(*pn)++;
417	e= new_expr();
418	e->operator= '[';
419	e->middle= a1;
420	} else
421	if (t->type == T_WORD \|\| t->type == T_STRING) {
422	/* Label, number, or string. */
423	e= new_expr();
424	e->operator= t->type == T_WORD ? 'W' : 'S';
425	e->name= allocate(nil, (t->len+1) * sizeof(e->name[0]));
426	memcpy(e->name, t->name, t->len+1);
427	e->len= t->len;
428	(*pn)++;
429	} else
430	if (t->symbol == '+' \|\| t->symbol == '-' \|\| t->symbol == '~') {
431	/* Unary operator. */
432	(*pn)++;
433	if ((a1= bas_get_C_expression(pn)) == nil) return nil;
434	e= new_expr();
435	e->operator= t->symbol;
436	e->middle= a1;
437	} else
438	if (t->symbol == '$' && get_token(*pn + 1)->type == T_WORD) {
439	/* A hexadecimal number. */
440	t= get_token(*pn + 1);
441	e= new_expr();
442	e->operator= 'W';
443	e->name= allocate(nil, (t->len+3) * sizeof(e->name[0]));
444	strcpy(e->name, "0x");
445	memcpy(e->name+2, t->name, t->len+1);
446	e->len= t->len+2;
447	(*pn)+= 2;
448	} else {
449	parse_err(1, t, "expression syntax error\n");
450	return nil;
451	}
452
453	switch ((t= get_token(*pn))->symbol) {
454	case '+':
455	case '-':
456	case '*':
457	case '/':
458	case '%':
459	case '&':
460	case '\|':
461	case '^':
462	case S_LEFTSHIFT:
463	case S_RIGHTSHIFT:
464	(*pn)++;
465	a1= e;
466	if ((a2= bas_get_C_expression(pn)) == nil) {
467	del_expr(a1);
468	return nil;
469	}
470	e= new_expr();
471	e->operator= t->symbol;
472	e->left= a1;
473	e->right= a2;
474	}
475	return e;
476	}
477
478	/* We want to know the sizes of the first two operands. */
479	static optype_t optypes[2];
480	static int op_idx;
481
482	static expression_t bas_get_operand(int pn)
483	/* Get something like: [memory], offset[base+indexscale], or simpler. /
484	{
485	expression_t e, offset, base, index;
486	token_t *t;
487	int c;
488	optype_t optype;
489
490	/* Prefixed by 'byte', 'word' or 'dword'? */
491	if ((t= get_token(*pn))->type == T_WORD && (
492	strcmp(t->name, "byte") == 0
493	\|\| strcmp(t->name, "word") == 0
494	\|\| strcmp(t->name, "dword") == 0)
495	) {
496	switch (t->name[0]) {
497	case 'b': optype= BYTE; break;
498	case 'w': optype= use16() ? WORD : OWORD; break;
499	case 'd': optype= use32() ? WORD : OWORD; break;
500	}
501	if (op_idx < arraysize(optypes)) optypes[op_idx++]= optype;
502	(*pn)++;
503
504	/* It may even be "byte ptr"... */
505	if ((t= get_token(*pn))->type == T_WORD
506	&& strcmp(t->name, "ptr") == 0) {
507	(*pn)++;
508	}
509	}
510
511	/* Is it [memory]? */
512	if (get_token(*pn)->symbol == '['
513	&& ((t= get_token(*pn + 1))->type != T_WORD
514	\|\| !isregister(t->name))
515	) {
516	/* A memory dereference. */
517	(*pn)++;
518	if ((offset= bas_get_C_expression(pn)) == nil) return nil;
519	if (get_token(*pn)->symbol != ']') {
520	parse_err(1, t, "operand syntax error\n");
521	del_expr(offset);
522	return nil;
523	}
524	(*pn)++;
525	e= new_expr();
526	e->operator= '(';
527	e->middle= offset;
528	return e;
529	}
530
531	/* #something? something? /
532	if ((c= get_token(pn)->symbol) == '#' \|\| c == '') {
533	/* '#' and '' are often used to introduce some constant. /
534	(*pn)++;
535	}
536
537	/* Offset? */
538	if (get_token(*pn)->symbol != '[') {
539	/* There is an offset. */
540	if ((offset= bas_get_C_expression(pn)) == nil) return nil;
541	} else {
542	/* No offset. */
543	offset= nil;
544	}
545
546	/* [base]? [base+? base-? */
547	c= 0;
548	if (get_token(*pn)->symbol == '['
549	&& (t= get_token(*pn + 1))->type == T_WORD
550	&& isregister(t->name)
551	&& ((c= get_token(*pn + 2)->symbol) == ']' \|\| c=='+' \|\| c=='-')
552	) {
553	/* A base register expression. */
554	base= new_expr();
555	base->operator= 'B';
556	base->name= copystr(t->name);
557	(*pn)+= c == ']' ? 3 : 2;
558	} else {
559	/* No base register expression. */
560	base= nil;
561	}
562
563	/* +offset]? -offset]? */
564	if (offset == nil
565	&& (c == '+' \|\| c == '-')
566	&& (t= get_token(*pn + 1))->type == T_WORD
567	&& !isregister(t->name)
568	) {
569	(*pn)++;
570	if ((offset= bas_get_C_expression(pn)) == nil) return nil;
571	if (get_token(*pn)->symbol != ']') {
572	parse_err(1, t, "operand syntax error\n");
573	del_expr(offset);
574	del_expr(base);
575	return nil;
576	}
577	(*pn)++;
578	c= 0;
579	}
580
581	/* [indexscale]? +indexscale]? */
582	if (c == '+' \|\| get_token(*pn)->symbol == '[') {
583	/* An index most likely. */
584	token_t *m= nil;
585
586	if (!( /* This must be true: */
587	(t= get_token(*pn + 1))->type == T_WORD
588	&& isregister(t->name)
589	&& (get_token(*pn + 2)->symbol == ']' \|\| (
590	get_token(pn + 2)->symbol == ''
591	&& (m= get_token(*pn + 3))->type == T_WORD
592	&& strchr("1248", m->name[0]) != nil
593	&& m->name[1] == 0
594	&& get_token(*pn + 4)->symbol == ']'
595	))
596	)) {
597	/* Alas it isn't */
598	parse_err(1, t, "operand syntax error\n");
599	del_expr(offset);
600	del_expr(base);
601	return nil;
602	}
603	/* Found an index. */
604	index= new_expr();
605	index->operator= m == nil ? '1' : m->name[0];
606	index->name= copystr(t->name);
607	(*pn)+= (m == nil ? 3 : 5);
608	} else {
609	/* No index. */
610	index= nil;
611	}
612
613	if (base == nil && index == nil) {
614	/* Return a lone offset as is. */
615	e= offset;
616
617	/* Lone registers tell operand size. */
618	if (offset->operator == 'W' && isregister(offset->name)) {
619	switch (isregister(offset->name)) {
620	case 1: optype= BYTE; break;
621	case 2: optype= use16() ? WORD : OWORD; break;
622	case 4: optype= use32() ? WORD : OWORD; break;
623	}
624	if (op_idx < arraysize(optypes))
625	optypes[op_idx++]= optype;
626	}
627	} else {
628	e= new_expr();
629	e->operator= 'O';
630	e->left= offset;
631	e->middle= base;
632	e->right= index;
633	}
634	return e;
635	}
636
637	static expression_t bas_get_oplist(int pn)
638	/* Get a comma (or colon for jmpf and callf) separated list of instruction
639	* operands.
640	*/
641	{
642	expression_t e, o1, *o2;
643	token_t *t;
644
645	if ((e= bas_get_operand(pn)) == nil) return nil;
646
647	if ((t= get_token(*pn))->symbol == ',' \|\| t->symbol == ':') {
648	o1= e;
649	(*pn)++;
650	if ((o2= bas_get_oplist(pn)) == nil) {
651	del_expr(o1);
652	return nil;
653	}
654	e= new_expr();
655	e->operator= ',';
656	e->left= o1;
657	e->right= o2;
658	}
659	return e;
660	}
661
662	static asm86_t *bas_get_statement(void)
663	/* Get a pseudo op or machine instruction with arguments. */
664	{
665	token_t *t= get_token(0);
666	asm86_t *a;
667	mnemonic_t *m;
668	int n;
669	int prefix_seen;
670
671
672	assert(t->type == T_WORD);
673
674	if (strcmp(t->name, ".sect") == 0) {
675	/* .sect .text etc. Accept only four segment names. */
676	skip_token(1);
677	t= get_token(0);
678	if (t->type != T_WORD \|\| (
679	strcmp(t->name, ".text") != 0
680	&& strcmp(t->name, ".rom") != 0
681	&& strcmp(t->name, ".data") != 0
682	&& strcmp(t->name, ".bss") != 0
683	&& strcmp(t->name, ".end") != 0
684	)) {
685	parse_err(1, t, "weird section name to .sect\n");
686	return nil;
687	}
688	}
689	a= new_asm86();
690
691	/* Process instruction prefixes. */
692	for (prefix_seen= 0;; prefix_seen= 1) {
693	if (strcmp(t->name, "rep") == 0
694	\|\| strcmp(t->name, "repe") == 0
695	\|\| strcmp(t->name, "repne") == 0
696	\|\| strcmp(t->name, "repz") == 0
697	\|\| strcmp(t->name, "repnz") == 0
698	) {
699	if (a->rep != ONCE) {
700	parse_err(1, t,
701	"can't have more than one rep\n");
702	}
703	switch (t->name[3]) {
704	case 0: a->rep= REP; break;
705	case 'e':
706	case 'z': a->rep= REPE; break;
707	case 'n': a->rep= REPNE; break;
708	}
709	} else
710	if (strcmp(t->name, "seg") == 0
711	&& get_token(1)->type == T_WORD) {
712	if (a->seg != DEFSEG) {
713	parse_err(1, t,
714	"can't have more than one segment prefix\n");
715	}
716	switch (get_token(1)->name[0]) {
717	case 'c': a->seg= CSEG; break;
718	case 'd': a->seg= DSEG; break;
719	case 'e': a->seg= ESEG; break;
720	case 'f': a->seg= FSEG; break;
721	case 'g': a->seg= GSEG; break;
722	case 's': a->seg= SSEG; break;
723	}
724	skip_token(1);
725	} else
726	if (!prefix_seen) {
727	/* No prefix here, get out! */
728	break;
729	} else {
730	/* No more prefixes, next must be an instruction. */
731	if (t->type != T_WORD
732	\|\| (m= search_mnem(t->name)) == nil
733	\|\| m->optype == PSEUDO
734	) {
735	parse_err(1, t,
736	"machine instruction expected after instruction prefix\n");
737	del_asm86(a);
738	return nil;
739	}
740	break;
741	}
742
743	/* Skip the prefix and extra newlines. */
744	do {
745	skip_token(1);
746	} while ((t= get_token(0))->symbol == ';');
747	}
748
749	/* All the readahead being done upsets the line counter. */
750	a->line= t->line;
751
752	/* Read a machine instruction or pseudo op. */
753	if ((m= search_mnem(t->name)) == nil) {
754	parse_err(1, t, "unknown instruction '%s'\n", t->name);
755	del_asm86(a);
756	return nil;
757	}
758	a->opcode= m->opcode;
759	a->optype= m->optype;
760	if (a->opcode == CBW \|\| a->opcode == CWD) {
761	a->optype= (strcmp(t->name, "cbw") == 0
762	\|\| strcmp(t->name, "cwd") == 0) == use16() ? WORD : OWORD;
763	}
764	for (op_idx= 0; op_idx < arraysize(optypes); op_idx++)
765	optypes[op_idx]= m->optype;
766	op_idx= 0;
767
768	n= 1;
769	if (get_token(1)->symbol != ';'
770	&& (a->args= bas_get_oplist(&n)) == nil) {
771	del_asm86(a);
772	return nil;
773	}
774
775	if (m->optype == WORD) {
776	/* Does one of the operands overide the optype? */
777	for (op_idx= 0; op_idx < arraysize(optypes); op_idx++) {
778	if (optypes[op_idx] != m->optype)
779	a->optype= optypes[op_idx];
780	}
781	}
782
783	if (get_token(n)->symbol != ';') {
784	parse_err(1, t, "garbage at end of instruction\n");
785	del_asm86(a);
786	return nil;
787	}
788	switch (a->opcode) {
789	case DOT_ALIGN:
790	/* Restrict .align to have a single numeric argument, some
791	* assemblers think of the argument as a power of two, so
792	* we need to be able to change the value.
793	*/
794	if (strcmp(t->name, ".even") == 0 && a->args == nil) {
795	/* .even becomes .align 2. */
796	expression_t *e;
797	a->args= e= new_expr();
798	e->operator= 'W';
799	e->name= copystr("2");
800	e->len= 2;
801	}
802	if (a->args == nil \|\| a->args->operator != 'W'
803	\|\| !isanumber(a->args->name)) {
804	parse_err(1, t,
805	".align is restricted to one numeric argument\n");
806	del_asm86(a);
807	return nil;
808	}
809	break;
810	case MOVSX:
811	case MOVZX:
812	/* Types of both operands tell the instruction type. */
813	a->optype= optypes[0];
814	if (optypes[1] == BYTE) {
815	a->opcode= a->opcode == MOVSX ? MOVSXB : MOVZXB;
816	}
817	break;
818	case SAL:
819	case SAR:
820	case SHL:
821	case SHR:
822	case RCL:
823	case RCR:
824	case ROL:
825	case ROR:
826	/* Only the first operand tells the operand size. */
827	a->optype= optypes[0];
828	break;
829	default:;
830	}
831	skip_token(n+1);
832	return a;
833	}
834
835	asm86_t *bas_get_instruction(void)
836	{
837	asm86_t *a= nil;
838	expression_t *e;
839	token_t *t;
840
841	while ((t= get_token(0))->symbol == ';')
842	skip_token(1);
843
844	if (t->type == T_EOF) return nil;
845
846	if (t->symbol == '#') {
847	/* Preprocessor line and file change. */
848
849	if ((t= get_token(1))->type != T_WORD \|\| !isanumber(t->name)
850	\|\| get_token(2)->type != T_STRING
851	) {
852	parse_err(1, t, "file not preprocessed?\n");
853	zap();
854	} else {
855	set_file(get_token(2)->name,
856	strtol(get_token(1)->name, nil, 0) - 1);
857
858	/* GNU CPP adds extra cruft, simply zap the line. */
859	zap();
860	}
861	a= bas_get_instruction();
862	} else
863	if (t->type == T_WORD && get_token(1)->symbol == ':') {
864	/* A label definition. */
865	a= new_asm86();
866	a->line= t->line;
867	a->opcode= DOT_LABEL;
868	a->optype= PSEUDO;
869	a->args= e= new_expr();
870	e->operator= ':';
871	e->name= copystr(t->name);
872	skip_token(2);
873	} else
874	if (t->type == T_WORD && get_token(1)->symbol == '=') {
875	int n= 2;
876
877	if ((e= bas_get_C_expression(&n)) == nil) {
878	zap();
879	a= bas_get_instruction();
880	} else
881	if (get_token(n)->symbol != ';') {
882	parse_err(1, t, "garbage after assignment\n");
883	zap();
884	a= bas_get_instruction();
885	} else {
886	a= new_asm86();
887	a->line= t->line;
888	a->opcode= DOT_EQU;
889	a->optype= PSEUDO;
890	a->args= new_expr();
891	a->args->operator= '=';
892	a->args->name= copystr(t->name);
893	a->args->middle= e;
894	skip_token(n+1);
895	}
896	} else
897	if (t->type == T_WORD && get_token(1)->type == T_WORD
898	&& strcmp(get_token(1)->name, "lcomm") == 0) {
899	/* Local common block definition. */
900	int n= 2;
901
902	if ((e= bas_get_C_expression(&n)) == nil) {
903	zap();
904	a= bas_get_instruction();
905	} else
906	if (get_token(n)->symbol != ';') {
907	parse_err(1, t, "garbage after lcomm\n");
908	zap();
909	a= bas_get_instruction();
910	} else {
911	a= new_asm86();
912	a->line= t->line;
913	a->opcode= DOT_LCOMM;
914	a->optype= PSEUDO;
915	a->args= new_expr();
916	a->args->operator= ',';
917	a->args->right= e;
918	a->args->left= e= new_expr();
919	e->operator= 'W';
920	e->name= copystr(t->name);
921	e->len= strlen(e->name)+1;
922	skip_token(n+1);
923	}
924	} else
925	if (t->type == T_WORD) {
926	if ((a= bas_get_statement()) == nil) {
927	zap();
928	a= bas_get_instruction();
929	}
930	} else {
931	parse_err(1, t, "syntax error\n");
932	zap();
933	a= bas_get_instruction();
934	}
935	if (a->optype == OWORD) {
936	a->optype= WORD;
937	a->oaz\|= OPZ;
938	}
939	return a;
940	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: