Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Normal
Revision Log

source: trunk/minix/commands/i386/asmconv/parse_bas.c@ 9

Last change on this file since 9 was 9, checked in by Mattia Monga, 15 years ago
Minix 3.1.2a
File size: 22.5 KB

Rev	Line
[9]	1	/* parse_bas.c - parse BCC AS assembly Author: Kees J. Bot
	2	* 13 Nov 1994
	3	*/
	4	#define nil 0
	5	#include <stdio.h>
	6	#include <stdlib.h>
	7	#include <string.h>
	8	#include <assert.h>
	9	#include "asmconv.h"
	10	#include "token.h"
	11	#include "asm86.h"
	12	#include "languages.h"
	13
	14	typedef struct mnemonic { /* BAS mnemonics translation table. */
	15	char *name;
	16	opcode_t opcode;
	17	optype_t optype;
	18	} mnemonic_t;
	19
	20	static mnemonic_t mnemtab[] = { /* This array is sorted. */
	21	{ ".align", DOT_ALIGN, PSEUDO },
	22	{ ".ascii", DOT_ASCII, PSEUDO },
	23	{ ".asciz", DOT_ASCIZ, PSEUDO },
	24	{ ".assert", DOT_ASSERT, PSEUDO },
	25	{ ".base", DOT_BASE, PSEUDO },
	26	{ ".blkb", DOT_SPACE, PSEUDO },
	27	{ ".bss", DOT_BSS, PSEUDO },
	28	{ ".byte", DOT_DATA1, PSEUDO },
	29	{ ".comm", DOT_COMM, PSEUDO },
	30	{ ".data", DOT_DATA, PSEUDO },
	31	{ ".define", DOT_DEFINE, PSEUDO },
	32	{ ".end", DOT_END, PSEUDO },
	33	{ ".even", DOT_ALIGN, PSEUDO },
	34	{ ".extern", DOT_EXTERN, PSEUDO },
	35	{ ".file", DOT_FILE, PSEUDO },
	36	{ ".globl", DOT_DEFINE, PSEUDO },
	37	{ ".lcomm", DOT_LCOMM, PSEUDO },
	38	{ ".line", DOT_LINE, PSEUDO },
	39	{ ".list", DOT_LIST, PSEUDO },
	40	{ ".long", DOT_DATA4, PSEUDO },
	41	{ ".nolist", DOT_NOLIST, PSEUDO },
	42	{ ".rom", DOT_ROM, PSEUDO },
	43	{ ".space", DOT_SPACE, PSEUDO },
	44	{ ".symb", DOT_SYMB, PSEUDO },
	45	{ ".text", DOT_TEXT, PSEUDO },
	46	{ ".use16", DOT_USE16, PSEUDO },
	47	{ ".use32", DOT_USE32, PSEUDO },
	48	{ ".word", DOT_DATA2, PSEUDO },
	49	{ ".zerob", DOT_SPACE, PSEUDO },
	50	{ ".zerow", DOT_SPACE, PSEUDO },
	51	{ "aaa", AAA, WORD },
	52	{ "aad", AAD, WORD },
	53	{ "aam", AAM, WORD },
	54	{ "aas", AAS, WORD },
	55	{ "adc", ADC, WORD },
	56	{ "add", ADD, WORD },
	57	{ "and", AND, WORD },
	58	{ "arpl", ARPL, WORD },
	59	{ "bc", JB, JUMP },
	60	{ "beq", JE, JUMP },
	61	{ "bge", JGE, JUMP },
	62	{ "bgt", JG, JUMP },
	63	{ "bhi", JA, JUMP },
	64	{ "bhis", JAE, JUMP },
	65	{ "ble", JLE, JUMP },
	66	{ "blo", JB, JUMP },
	67	{ "blos", JBE, JUMP },
	68	{ "blt", JL, JUMP },
	69	{ "bnc", JAE, JUMP },
	70	{ "bne", JNE, JUMP },
	71	{ "bound", BOUND, WORD },
	72	{ "br", JMP, JUMP },
	73	{ "bsf", BSF, WORD },
	74	{ "bsr", BSR, WORD },
	75	{ "bswap", BSWAP, WORD },
	76	{ "bt", BT, WORD },
	77	{ "btc", BTC, WORD },
	78	{ "btr", BTR, WORD },
	79	{ "bts", BTS, WORD },
	80	{ "bz", JE, JUMP },
	81	{ "call", CALL, JUMP },
	82	{ "callf", CALLF, JUMP },
	83	{ "cbw", CBW, WORD },
	84	{ "cdq", CWD, WORD },
	85	{ "clc", CLC, WORD },
	86	{ "cld", CLD, WORD },
	87	{ "cli", CLI, WORD },
	88	{ "clts", CLTS, WORD },
	89	{ "cmc", CMC, WORD },
	90	{ "cmp", CMP, WORD },
	91	{ "cmps", CMPS, WORD },
	92	{ "cmpsb", CMPS, BYTE },
	93	{ "cmpxchg", CMPXCHG, WORD },
	94	{ "cwd", CWD, WORD },
	95	{ "cwde", CBW, WORD },
	96	{ "daa", DAA, WORD },
	97	{ "das", DAS, WORD },
	98	{ "dd", DOT_DATA4, PSEUDO },
	99	{ "dec", DEC, WORD },
	100	{ "div", DIV, WORD },
	101	{ "enter", ENTER, WORD },
	102	{ "export", DOT_DEFINE, PSEUDO },
	103	{ "f2xm1", F2XM1, WORD },
	104	{ "fabs", FABS, WORD },
	105	{ "fadd", FADD, WORD },
	106	{ "faddd", FADDD, WORD },
	107	{ "faddp", FADDP, WORD },
	108	{ "fadds", FADDS, WORD },
	109	{ "fbld", FBLD, WORD },
	110	{ "fbstp", FBSTP, WORD },
	111	{ "fchs", FCHS, WORD },
	112	{ "fclex", FCLEX, WORD },
	113	{ "fcomd", FCOMD, WORD },
	114	{ "fcompd", FCOMPD, WORD },
	115	{ "fcompp", FCOMPP, WORD },
	116	{ "fcomps", FCOMPS, WORD },
	117	{ "fcoms", FCOMS, WORD },
	118	{ "fcos", FCOS, WORD },
	119	{ "fdecstp", FDECSTP, WORD },
	120	{ "fdivd", FDIVD, WORD },
	121	{ "fdivp", FDIVP, WORD },
	122	{ "fdivrd", FDIVRD, WORD },
	123	{ "fdivrp", FDIVRP, WORD },
	124	{ "fdivrs", FDIVRS, WORD },
	125	{ "fdivs", FDIVS, WORD },
	126	{ "ffree", FFREE, WORD },
	127	{ "fiaddl", FIADDL, WORD },
	128	{ "fiadds", FIADDS, WORD },
	129	{ "ficom", FICOM, WORD },
	130	{ "ficomp", FICOMP, WORD },
	131	{ "fidivl", FIDIVL, WORD },
	132	{ "fidivrl", FIDIVRL, WORD },
	133	{ "fidivrs", FIDIVRS, WORD },
	134	{ "fidivs", FIDIVS, WORD },
	135	{ "fildl", FILDL, WORD },
	136	{ "fildq", FILDQ, WORD },
	137	{ "filds", FILDS, WORD },
	138	{ "fimull", FIMULL, WORD },
	139	{ "fimuls", FIMULS, WORD },
	140	{ "fincstp", FINCSTP, WORD },
	141	{ "finit", FINIT, WORD },
	142	{ "fistl", FISTL, WORD },
	143	{ "fistp", FISTP, WORD },
	144	{ "fists", FISTS, WORD },
	145	{ "fisubl", FISUBL, WORD },
	146	{ "fisubrl", FISUBRL, WORD },
	147	{ "fisubrs", FISUBRS, WORD },
	148	{ "fisubs", FISUBS, WORD },
	149	{ "fld1", FLD1, WORD },
	150	{ "fldcw", FLDCW, WORD },
	151	{ "fldd", FLDD, WORD },
	152	{ "fldenv", FLDENV, WORD },
	153	{ "fldl2e", FLDL2E, WORD },
	154	{ "fldl2t", FLDL2T, WORD },
	155	{ "fldlg2", FLDLG2, WORD },
	156	{ "fldln2", FLDLN2, WORD },
	157	{ "fldpi", FLDPI, WORD },
	158	{ "flds", FLDS, WORD },
	159	{ "fldx", FLDX, WORD },
	160	{ "fldz", FLDZ, WORD },
	161	{ "fmuld", FMULD, WORD },
	162	{ "fmulp", FMULP, WORD },
	163	{ "fmuls", FMULS, WORD },
	164	{ "fnop", FNOP, WORD },
	165	{ "fpatan", FPATAN, WORD },
	166	{ "fprem", FPREM, WORD },
	167	{ "fprem1", FPREM1, WORD },
	168	{ "fptan", FPTAN, WORD },
	169	{ "frndint", FRNDINT, WORD },
	170	{ "frstor", FRSTOR, WORD },
	171	{ "fsave", FSAVE, WORD },
	172	{ "fscale", FSCALE, WORD },
	173	{ "fsin", FSIN, WORD },
	174	{ "fsincos", FSINCOS, WORD },
	175	{ "fsqrt", FSQRT, WORD },
	176	{ "fstcw", FSTCW, WORD },
	177	{ "fstd", FSTD, WORD },
	178	{ "fstenv", FSTENV, WORD },
	179	{ "fstpd", FSTPD, WORD },
	180	{ "fstps", FSTPS, WORD },
	181	{ "fstpx", FSTPX, WORD },
	182	{ "fsts", FSTS, WORD },
	183	{ "fstsw", FSTSW, WORD },
	184	{ "fsubd", FSUBD, WORD },
	185	{ "fsubp", FSUBP, WORD },
	186	{ "fsubpr", FSUBPR, WORD },
	187	{ "fsubrd", FSUBRD, WORD },
	188	{ "fsubrs", FSUBRS, WORD },
	189	{ "fsubs", FSUBS, WORD },
	190	{ "ftst", FTST, WORD },
	191	{ "fucom", FUCOM, WORD },
	192	{ "fucomp", FUCOMP, WORD },
	193	{ "fucompp", FUCOMPP, WORD },
	194	{ "fxam", FXAM, WORD },
	195	{ "fxch", FXCH, WORD },
	196	{ "fxtract", FXTRACT, WORD },
	197	{ "fyl2x", FYL2X, WORD },
	198	{ "fyl2xp1", FYL2XP1, WORD },
	199	{ "hlt", HLT, WORD },
	200	{ "idiv", IDIV, WORD },
	201	{ "imul", IMUL, WORD },
	202	{ "in", IN, WORD },
	203	{ "inb", IN, BYTE },
	204	{ "inc", INC, WORD },
	205	{ "ins", INS, WORD },
	206	{ "insb", INS, BYTE },
	207	{ "int", INT, WORD },
	208	{ "into", INTO, JUMP },
	209	{ "invd", INVD, WORD },
	210	{ "invlpg", INVLPG, WORD },
	211	{ "iret", IRET, JUMP },
	212	{ "iretd", IRETD, JUMP },
	213	{ "j", JMP, JUMP },
	214	{ "ja", JA, JUMP },
	215	{ "jae", JAE, JUMP },
	216	{ "jb", JB, JUMP },
	217	{ "jbe", JBE, JUMP },
	218	{ "jc", JB, JUMP },
	219	{ "jcxz", JCXZ, JUMP },
	220	{ "je", JE, JUMP },
	221	{ "jecxz", JCXZ, JUMP },
	222	{ "jeq", JE, JUMP },
	223	{ "jg", JG, JUMP },
	224	{ "jge", JGE, JUMP },
	225	{ "jgt", JG, JUMP },
	226	{ "jhi", JA, JUMP },
	227	{ "jhis", JAE, JUMP },
	228	{ "jl", JL, JUMP },
	229	{ "jle", JLE, JUMP },
	230	{ "jlo", JB, JUMP },
	231	{ "jlos", JBE, JUMP },
	232	{ "jlt", JL, JUMP },
	233	{ "jmp", JMP, JUMP },
	234	{ "jmpf", JMPF, JUMP },
	235	{ "jna", JBE, JUMP },
	236	{ "jnae", JB, JUMP },
	237	{ "jnb", JAE, JUMP },
	238	{ "jnbe", JA, JUMP },
	239	{ "jnc", JAE, JUMP },
	240	{ "jne", JNE, JUMP },
	241	{ "jng", JLE, JUMP },
	242	{ "jnge", JL, JUMP },
	243	{ "jnl", JGE, JUMP },
	244	{ "jnle", JG, JUMP },
	245	{ "jno", JNO, JUMP },
	246	{ "jnp", JNP, JUMP },
	247	{ "jns", JNS, JUMP },
	248	{ "jnz", JNE, JUMP },
	249	{ "jo", JO, JUMP },
	250	{ "jp", JP, JUMP },
	251	{ "js", JS, JUMP },
	252	{ "jz", JE, JUMP },
	253	{ "lahf", LAHF, WORD },
	254	{ "lar", LAR, WORD },
	255	{ "lds", LDS, WORD },
	256	{ "lea", LEA, WORD },
	257	{ "leave", LEAVE, WORD },
	258	{ "les", LES, WORD },
	259	{ "lfs", LFS, WORD },
	260	{ "lgdt", LGDT, WORD },
	261	{ "lgs", LGS, WORD },
	262	{ "lidt", LIDT, WORD },
	263	{ "lldt", LLDT, WORD },
	264	{ "lmsw", LMSW, WORD },
	265	{ "lock", LOCK, WORD },
	266	{ "lods", LODS, WORD },
	267	{ "lodsb", LODS, BYTE },
	268	{ "loop", LOOP, JUMP },
	269	{ "loope", LOOPE, JUMP },
	270	{ "loopne", LOOPNE, JUMP },
	271	{ "loopnz", LOOPNE, JUMP },
	272	{ "loopz", LOOPE, JUMP },
	273	{ "lsl", LSL, WORD },
	274	{ "lss", LSS, WORD },
	275	{ "ltr", LTR, WORD },
	276	{ "mov", MOV, WORD },
	277	{ "movs", MOVS, WORD },
	278	{ "movsb", MOVS, BYTE },
	279	{ "movsx", MOVSX, WORD },
	280	{ "movzx", MOVZX, WORD },
	281	{ "mul", MUL, WORD },
	282	{ "neg", NEG, WORD },
	283	{ "nop", NOP, WORD },
	284	{ "not", NOT, WORD },
	285	{ "or", OR, WORD },
	286	{ "out", OUT, WORD },
	287	{ "outb", OUT, BYTE },
	288	{ "outs", OUTS, WORD },
	289	{ "outsb", OUTS, BYTE },
	290	{ "pop", POP, WORD },
	291	{ "popa", POPA, WORD },
	292	{ "popad", POPA, WORD },
	293	{ "popf", POPF, WORD },
	294	{ "popfd", POPF, WORD },
	295	{ "push", PUSH, WORD },
	296	{ "pusha", PUSHA, WORD },
	297	{ "pushad", PUSHA, WORD },
	298	{ "pushf", PUSHF, WORD },
	299	{ "pushfd", PUSHF, WORD },
	300	{ "rcl", RCL, WORD },
	301	{ "rcr", RCR, WORD },
	302	{ "ret", RET, JUMP },
	303	{ "retf", RETF, JUMP },
	304	{ "rol", ROL, WORD },
	305	{ "ror", ROR, WORD },
	306	{ "sahf", SAHF, WORD },
	307	{ "sal", SAL, WORD },
	308	{ "sar", SAR, WORD },
	309	{ "sbb", SBB, WORD },
	310	{ "scas", SCAS, WORD },
	311	{ "seta", SETA, BYTE },
	312	{ "setae", SETAE, BYTE },
	313	{ "setb", SETB, BYTE },
	314	{ "setbe", SETBE, BYTE },
	315	{ "sete", SETE, BYTE },
	316	{ "setg", SETG, BYTE },
	317	{ "setge", SETGE, BYTE },
	318	{ "setl", SETL, BYTE },
	319	{ "setna", SETBE, BYTE },
	320	{ "setnae", SETB, BYTE },
	321	{ "setnb", SETAE, BYTE },
	322	{ "setnbe", SETA, BYTE },
	323	{ "setne", SETNE, BYTE },
	324	{ "setng", SETLE, BYTE },
	325	{ "setnge", SETL, BYTE },
	326	{ "setnl", SETGE, BYTE },
	327	{ "setnle", SETG, BYTE },
	328	{ "setno", SETNO, BYTE },
	329	{ "setnp", SETNP, BYTE },
	330	{ "setns", SETNS, BYTE },
	331	{ "seto", SETO, BYTE },
	332	{ "setp", SETP, BYTE },
	333	{ "sets", SETS, BYTE },
	334	{ "setz", SETE, BYTE },
	335	{ "sgdt", SGDT, WORD },
	336	{ "shl", SHL, WORD },
	337	{ "shld", SHLD, WORD },
	338	{ "shr", SHR, WORD },
	339	{ "shrd", SHRD, WORD },
	340	{ "sidt", SIDT, WORD },
	341	{ "sldt", SLDT, WORD },
	342	{ "smsw", SMSW, WORD },
	343	{ "stc", STC, WORD },
	344	{ "std", STD, WORD },
	345	{ "sti", STI, WORD },
	346	{ "stos", STOS, WORD },
	347	{ "stosb", STOS, BYTE },
	348	{ "str", STR, WORD },
	349	{ "sub", SUB, WORD },
	350	{ "test", TEST, WORD },
	351	{ "verr", VERR, WORD },
	352	{ "verw", VERW, WORD },
	353	{ "wait", WAIT, WORD },
	354	{ "wbinvd", WBINVD, WORD },
	355	{ "xadd", XADD, WORD },
	356	{ "xchg", XCHG, WORD },
	357	{ "xlat", XLAT, WORD },
	358	{ "xor", XOR, WORD },
	359	};
	360
	361	void bas_parse_init(char *file)
	362	/* Prepare parsing of an BAS assembly file. */
	363	{
	364	tok_init(file, '!');
	365	}
	366
	367	static void zap(void)
	368	/* An error, zap the rest of the line. */
	369	{
	370	token_t *t;
	371
	372	while ((t= get_token(0))->type != T_EOF && t->symbol != ';')
	373	skip_token(1);
	374	}
	375
	376	static mnemonic_t search_mnem(char name)
	377	/* Binary search for a mnemonic. (That's why the table is sorted.) */
	378	{
	379	int low, mid, high;
	380	int cmp;
	381	mnemonic_t *m;
	382
	383	low= 0;
	384	high= arraysize(mnemtab)-1;
	385	while (low <= high) {
	386	mid= (low + high) / 2;
	387	m= &mnemtab[mid];
	388
	389	if ((cmp= strcmp(name, m->name)) == 0) return m;
	390
	391	if (cmp < 0) high= mid-1; else low= mid+1;
	392	}
	393	return nil;
	394	}
	395
	396	static expression_t bas_get_C_expression(int pn)
	397	/* Read a "C-like" expression. Note that we don't worry about precedence,
	398	* the expression is printed later like it is read. If the target language
	399	* does not have all the operators (like ~) then this has to be repaired by
	400	* changing the source file. (No problem, you still have one source file
	401	* to maintain, not two.)
	402	*/
	403	{
	404	expression_t e, a1, *a2;
	405	token_t *t;
	406
	407	if ((t= get_token(*pn))->symbol == '(') {
	408	/* ( expr ): grouping. */
	409	(*pn)++;
	410	if ((a1= bas_get_C_expression(pn)) == nil) return nil;
	411	if (get_token(*pn)->symbol != ')') {
	412	parse_err(1, t, "missing )\n");
	413	del_expr(a1);
	414	return nil;
	415	}
	416	(*pn)++;
	417	e= new_expr();
	418	e->operator= '[';
	419	e->middle= a1;
	420	} else
	421	if (t->type == T_WORD \|\| t->type == T_STRING) {
	422	/* Label, number, or string. */
	423	e= new_expr();
	424	e->operator= t->type == T_WORD ? 'W' : 'S';
	425	e->name= allocate(nil, (t->len+1) * sizeof(e->name[0]));
	426	memcpy(e->name, t->name, t->len+1);
	427	e->len= t->len;
	428	(*pn)++;
	429	} else
	430	if (t->symbol == '+' \|\| t->symbol == '-' \|\| t->symbol == '~') {
	431	/* Unary operator. */
	432	(*pn)++;
	433	if ((a1= bas_get_C_expression(pn)) == nil) return nil;
	434	e= new_expr();
	435	e->operator= t->symbol;
	436	e->middle= a1;
	437	} else
	438	if (t->symbol == '$' && get_token(*pn + 1)->type == T_WORD) {
	439	/* A hexadecimal number. */
	440	t= get_token(*pn + 1);
	441	e= new_expr();
	442	e->operator= 'W';
	443	e->name= allocate(nil, (t->len+3) * sizeof(e->name[0]));
	444	strcpy(e->name, "0x");
	445	memcpy(e->name+2, t->name, t->len+1);
	446	e->len= t->len+2;
	447	(*pn)+= 2;
	448	} else {
	449	parse_err(1, t, "expression syntax error\n");
	450	return nil;
	451	}
	452
	453	switch ((t= get_token(*pn))->symbol) {
	454	case '+':
	455	case '-':
	456	case '*':
	457	case '/':
	458	case '%':
	459	case '&':
	460	case '\|':
	461	case '^':
	462	case S_LEFTSHIFT:
	463	case S_RIGHTSHIFT:
	464	(*pn)++;
	465	a1= e;
	466	if ((a2= bas_get_C_expression(pn)) == nil) {
	467	del_expr(a1);
	468	return nil;
	469	}
	470	e= new_expr();
	471	e->operator= t->symbol;
	472	e->left= a1;
	473	e->right= a2;
	474	}
	475	return e;
	476	}
	477
	478	/* We want to know the sizes of the first two operands. */
	479	static optype_t optypes[2];
	480	static int op_idx;
	481
	482	static expression_t bas_get_operand(int pn)
	483	/* Get something like: [memory], offset[base+indexscale], or simpler. /
	484	{
	485	expression_t e, offset, base, index;
	486	token_t *t;
	487	int c;
	488	optype_t optype;
	489
	490	/* Prefixed by 'byte', 'word' or 'dword'? */
	491	if ((t= get_token(*pn))->type == T_WORD && (
	492	strcmp(t->name, "byte") == 0
	493	\|\| strcmp(t->name, "word") == 0
	494	\|\| strcmp(t->name, "dword") == 0)
	495	) {
	496	switch (t->name[0]) {
	497	case 'b': optype= BYTE; break;
	498	case 'w': optype= use16() ? WORD : OWORD; break;
	499	case 'd': optype= use32() ? WORD : OWORD; break;
	500	}
	501	if (op_idx < arraysize(optypes)) optypes[op_idx++]= optype;
	502	(*pn)++;
	503
	504	/* It may even be "byte ptr"... */
	505	if ((t= get_token(*pn))->type == T_WORD
	506	&& strcmp(t->name, "ptr") == 0) {
	507	(*pn)++;
	508	}
	509	}
	510
	511	/* Is it [memory]? */
	512	if (get_token(*pn)->symbol == '['
	513	&& ((t= get_token(*pn + 1))->type != T_WORD
	514	\|\| !isregister(t->name))
	515	) {
	516	/* A memory dereference. */
	517	(*pn)++;
	518	if ((offset= bas_get_C_expression(pn)) == nil) return nil;
	519	if (get_token(*pn)->symbol != ']') {
	520	parse_err(1, t, "operand syntax error\n");
	521	del_expr(offset);
	522	return nil;
	523	}
	524	(*pn)++;
	525	e= new_expr();
	526	e->operator= '(';
	527	e->middle= offset;
	528	return e;
	529	}
	530
	531	/* #something? something? /
	532	if ((c= get_token(pn)->symbol) == '#' \|\| c == '') {
	533	/* '#' and '' are often used to introduce some constant. /
	534	(*pn)++;
	535	}
	536
	537	/* Offset? */
	538	if (get_token(*pn)->symbol != '[') {
	539	/* There is an offset. */
	540	if ((offset= bas_get_C_expression(pn)) == nil) return nil;
	541	} else {
	542	/* No offset. */
	543	offset= nil;
	544	}
	545
	546	/* [base]? [base+? base-? */
	547	c= 0;
	548	if (get_token(*pn)->symbol == '['
	549	&& (t= get_token(*pn + 1))->type == T_WORD
	550	&& isregister(t->name)
	551	&& ((c= get_token(*pn + 2)->symbol) == ']' \|\| c=='+' \|\| c=='-')
	552	) {
	553	/* A base register expression. */
	554	base= new_expr();
	555	base->operator= 'B';
	556	base->name= copystr(t->name);
	557	(*pn)+= c == ']' ? 3 : 2;
	558	} else {
	559	/* No base register expression. */
	560	base= nil;
	561	}
	562
	563	/* +offset]? -offset]? */
	564	if (offset == nil
	565	&& (c == '+' \|\| c == '-')
	566	&& (t= get_token(*pn + 1))->type == T_WORD
	567	&& !isregister(t->name)
	568	) {
	569	(*pn)++;
	570	if ((offset= bas_get_C_expression(pn)) == nil) return nil;
	571	if (get_token(*pn)->symbol != ']') {
	572	parse_err(1, t, "operand syntax error\n");
	573	del_expr(offset);
	574	del_expr(base);
	575	return nil;
	576	}
	577	(*pn)++;
	578	c= 0;
	579	}
	580
	581	/* [indexscale]? +indexscale]? */
	582	if (c == '+' \|\| get_token(*pn)->symbol == '[') {
	583	/* An index most likely. */
	584	token_t *m= nil;
	585
	586	if (!( /* This must be true: */
	587	(t= get_token(*pn + 1))->type == T_WORD
	588	&& isregister(t->name)
	589	&& (get_token(*pn + 2)->symbol == ']' \|\| (
	590	get_token(pn + 2)->symbol == ''
	591	&& (m= get_token(*pn + 3))->type == T_WORD
	592	&& strchr("1248", m->name[0]) != nil
	593	&& m->name[1] == 0
	594	&& get_token(*pn + 4)->symbol == ']'
	595	))
	596	)) {
	597	/* Alas it isn't */
	598	parse_err(1, t, "operand syntax error\n");
	599	del_expr(offset);
	600	del_expr(base);
	601	return nil;
	602	}
	603	/* Found an index. */
	604	index= new_expr();
	605	index->operator= m == nil ? '1' : m->name[0];
	606	index->name= copystr(t->name);
	607	(*pn)+= (m == nil ? 3 : 5);
	608	} else {
	609	/* No index. */
	610	index= nil;
	611	}
	612
	613	if (base == nil && index == nil) {
	614	/* Return a lone offset as is. */
	615	e= offset;
	616
	617	/* Lone registers tell operand size. */
	618	if (offset->operator == 'W' && isregister(offset->name)) {
	619	switch (isregister(offset->name)) {
	620	case 1: optype= BYTE; break;
	621	case 2: optype= use16() ? WORD : OWORD; break;
	622	case 4: optype= use32() ? WORD : OWORD; break;
	623	}
	624	if (op_idx < arraysize(optypes))
	625	optypes[op_idx++]= optype;
	626	}
	627	} else {
	628	e= new_expr();
	629	e->operator= 'O';
	630	e->left= offset;
	631	e->middle= base;
	632	e->right= index;
	633	}
	634	return e;
	635	}
	636
	637	static expression_t bas_get_oplist(int pn)
	638	/* Get a comma (or colon for jmpf and callf) separated list of instruction
	639	* operands.
	640	*/
	641	{
	642	expression_t e, o1, *o2;
	643	token_t *t;
	644
	645	if ((e= bas_get_operand(pn)) == nil) return nil;
	646
	647	if ((t= get_token(*pn))->symbol == ',' \|\| t->symbol == ':') {
	648	o1= e;
	649	(*pn)++;
	650	if ((o2= bas_get_oplist(pn)) == nil) {
	651	del_expr(o1);
	652	return nil;
	653	}
	654	e= new_expr();
	655	e->operator= ',';
	656	e->left= o1;
	657	e->right= o2;
	658	}
	659	return e;
	660	}
	661
	662	static asm86_t *bas_get_statement(void)
	663	/* Get a pseudo op or machine instruction with arguments. */
	664	{
	665	token_t *t= get_token(0);
	666	asm86_t *a;
	667	mnemonic_t *m;
	668	int n;
	669	int prefix_seen;
	670
	671
	672	assert(t->type == T_WORD);
	673
	674	if (strcmp(t->name, ".sect") == 0) {
	675	/* .sect .text etc. Accept only four segment names. */
	676	skip_token(1);
	677	t= get_token(0);
	678	if (t->type != T_WORD \|\| (
	679	strcmp(t->name, ".text") != 0
	680	&& strcmp(t->name, ".rom") != 0
	681	&& strcmp(t->name, ".data") != 0
	682	&& strcmp(t->name, ".bss") != 0
	683	&& strcmp(t->name, ".end") != 0
	684	)) {
	685	parse_err(1, t, "weird section name to .sect\n");
	686	return nil;
	687	}
	688	}
	689	a= new_asm86();
	690
	691	/* Process instruction prefixes. */
	692	for (prefix_seen= 0;; prefix_seen= 1) {
	693	if (strcmp(t->name, "rep") == 0
	694	\|\| strcmp(t->name, "repe") == 0
	695	\|\| strcmp(t->name, "repne") == 0
	696	\|\| strcmp(t->name, "repz") == 0
	697	\|\| strcmp(t->name, "repnz") == 0
	698	) {
	699	if (a->rep != ONCE) {
	700	parse_err(1, t,
	701	"can't have more than one rep\n");
	702	}
	703	switch (t->name[3]) {
	704	case 0: a->rep= REP; break;
	705	case 'e':
	706	case 'z': a->rep= REPE; break;
	707	case 'n': a->rep= REPNE; break;
	708	}
	709	} else
	710	if (strcmp(t->name, "seg") == 0
	711	&& get_token(1)->type == T_WORD) {
	712	if (a->seg != DEFSEG) {
	713	parse_err(1, t,
	714	"can't have more than one segment prefix\n");
	715	}
	716	switch (get_token(1)->name[0]) {
	717	case 'c': a->seg= CSEG; break;
	718	case 'd': a->seg= DSEG; break;
	719	case 'e': a->seg= ESEG; break;
	720	case 'f': a->seg= FSEG; break;
	721	case 'g': a->seg= GSEG; break;
	722	case 's': a->seg= SSEG; break;
	723	}
	724	skip_token(1);
	725	} else
	726	if (!prefix_seen) {
	727	/* No prefix here, get out! */
	728	break;
	729	} else {
	730	/* No more prefixes, next must be an instruction. */
	731	if (t->type != T_WORD
	732	\|\| (m= search_mnem(t->name)) == nil
	733	\|\| m->optype == PSEUDO
	734	) {
	735	parse_err(1, t,
	736	"machine instruction expected after instruction prefix\n");
	737	del_asm86(a);
	738	return nil;
	739	}
	740	break;
	741	}
	742
	743	/* Skip the prefix and extra newlines. */
	744	do {
	745	skip_token(1);
	746	} while ((t= get_token(0))->symbol == ';');
	747	}
	748
	749	/* All the readahead being done upsets the line counter. */
	750	a->line= t->line;
	751
	752	/* Read a machine instruction or pseudo op. */
	753	if ((m= search_mnem(t->name)) == nil) {
	754	parse_err(1, t, "unknown instruction '%s'\n", t->name);
	755	del_asm86(a);
	756	return nil;
	757	}
	758	a->opcode= m->opcode;
	759	a->optype= m->optype;
	760	if (a->opcode == CBW \|\| a->opcode == CWD) {
	761	a->optype= (strcmp(t->name, "cbw") == 0
	762	\|\| strcmp(t->name, "cwd") == 0) == use16() ? WORD : OWORD;
	763	}
	764	for (op_idx= 0; op_idx < arraysize(optypes); op_idx++)
	765	optypes[op_idx]= m->optype;
	766	op_idx= 0;
	767
	768	n= 1;
	769	if (get_token(1)->symbol != ';'
	770	&& (a->args= bas_get_oplist(&n)) == nil) {
	771	del_asm86(a);
	772	return nil;
	773	}
	774
	775	if (m->optype == WORD) {
	776	/* Does one of the operands overide the optype? */
	777	for (op_idx= 0; op_idx < arraysize(optypes); op_idx++) {
	778	if (optypes[op_idx] != m->optype)
	779	a->optype= optypes[op_idx];
	780	}
	781	}
	782
	783	if (get_token(n)->symbol != ';') {
	784	parse_err(1, t, "garbage at end of instruction\n");
	785	del_asm86(a);
	786	return nil;
	787	}
	788	switch (a->opcode) {
	789	case DOT_ALIGN:
	790	/* Restrict .align to have a single numeric argument, some
	791	* assemblers think of the argument as a power of two, so
	792	* we need to be able to change the value.
	793	*/
	794	if (strcmp(t->name, ".even") == 0 && a->args == nil) {
	795	/* .even becomes .align 2. */
	796	expression_t *e;
	797	a->args= e= new_expr();
	798	e->operator= 'W';
	799	e->name= copystr("2");
	800	e->len= 2;
	801	}
	802	if (a->args == nil \|\| a->args->operator != 'W'
	803	\|\| !isanumber(a->args->name)) {
	804	parse_err(1, t,
	805	".align is restricted to one numeric argument\n");
	806	del_asm86(a);
	807	return nil;
	808	}
	809	break;
	810	case MOVSX:
	811	case MOVZX:
	812	/* Types of both operands tell the instruction type. */
	813	a->optype= optypes[0];
	814	if (optypes[1] == BYTE) {
	815	a->opcode= a->opcode == MOVSX ? MOVSXB : MOVZXB;
	816	}
	817	break;
	818	case SAL:
	819	case SAR:
	820	case SHL:
	821	case SHR:
	822	case RCL:
	823	case RCR:
	824	case ROL:
	825	case ROR:
	826	/* Only the first operand tells the operand size. */
	827	a->optype= optypes[0];
	828	break;
	829	default:;
	830	}
	831	skip_token(n+1);
	832	return a;
	833	}
	834
	835	asm86_t *bas_get_instruction(void)
	836	{
	837	asm86_t *a= nil;
	838	expression_t *e;
	839	token_t *t;
	840
	841	while ((t= get_token(0))->symbol == ';')
	842	skip_token(1);
	843
	844	if (t->type == T_EOF) return nil;
	845
	846	if (t->symbol == '#') {
	847	/* Preprocessor line and file change. */
	848
	849	if ((t= get_token(1))->type != T_WORD \|\| !isanumber(t->name)
	850	\|\| get_token(2)->type != T_STRING
	851	) {
	852	parse_err(1, t, "file not preprocessed?\n");
	853	zap();
	854	} else {
	855	set_file(get_token(2)->name,
	856	strtol(get_token(1)->name, nil, 0) - 1);
	857
	858	/* GNU CPP adds extra cruft, simply zap the line. */
	859	zap();
	860	}
	861	a= bas_get_instruction();
	862	} else
	863	if (t->type == T_WORD && get_token(1)->symbol == ':') {
	864	/* A label definition. */
	865	a= new_asm86();
	866	a->line= t->line;
	867	a->opcode= DOT_LABEL;
	868	a->optype= PSEUDO;
	869	a->args= e= new_expr();
	870	e->operator= ':';
	871	e->name= copystr(t->name);
	872	skip_token(2);
	873	} else
	874	if (t->type == T_WORD && get_token(1)->symbol == '=') {
	875	int n= 2;
	876
	877	if ((e= bas_get_C_expression(&n)) == nil) {
	878	zap();
	879	a= bas_get_instruction();
	880	} else
	881	if (get_token(n)->symbol != ';') {
	882	parse_err(1, t, "garbage after assignment\n");
	883	zap();
	884	a= bas_get_instruction();
	885	} else {
	886	a= new_asm86();
	887	a->line= t->line;
	888	a->opcode= DOT_EQU;
	889	a->optype= PSEUDO;
	890	a->args= new_expr();
	891	a->args->operator= '=';
	892	a->args->name= copystr(t->name);
	893	a->args->middle= e;
	894	skip_token(n+1);
	895	}
	896	} else
	897	if (t->type == T_WORD && get_token(1)->type == T_WORD
	898	&& strcmp(get_token(1)->name, "lcomm") == 0) {
	899	/* Local common block definition. */
	900	int n= 2;
	901
	902	if ((e= bas_get_C_expression(&n)) == nil) {
	903	zap();
	904	a= bas_get_instruction();
	905	} else
	906	if (get_token(n)->symbol != ';') {
	907	parse_err(1, t, "garbage after lcomm\n");
	908	zap();
	909	a= bas_get_instruction();
	910	} else {
	911	a= new_asm86();
	912	a->line= t->line;
	913	a->opcode= DOT_LCOMM;
	914	a->optype= PSEUDO;
	915	a->args= new_expr();
	916	a->args->operator= ',';
	917	a->args->right= e;
	918	a->args->left= e= new_expr();
	919	e->operator= 'W';
	920	e->name= copystr(t->name);
	921	e->len= strlen(e->name)+1;
	922	skip_token(n+1);
	923	}
	924	} else
	925	if (t->type == T_WORD) {
	926	if ((a= bas_get_statement()) == nil) {
	927	zap();
	928	a= bas_get_instruction();
	929	}
	930	} else {
	931	parse_err(1, t, "syntax error\n");
	932	zap();
	933	a= bas_get_instruction();
	934	}
	935	if (a->optype == OWORD) {
	936	a->optype= WORD;
	937	a->oaz\|= OPZ;
	938	}
	939	return a;
	940	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: