-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathz80_tokenize.cpp
385 lines (352 loc) · 14.3 KB
/
z80_tokenize.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
/***
* Z80 Tokenizer
***/
#include "z80_assembler.h"
#include <cctype>
#include <cstdio>
#include <cstdlib>
#include <cstring>
uint16_t CalcHash( const char *name );
SymbolP FindSymbol( const char *name );
void InitSymTab( void );
// clang-format off
typedef struct {
int16_t id; // ID for the symbol
const char *s; // string
uint16_t p; // additional parameter
} ShortSym;
static const ShortSym Pseudo[] = {
{ DEFB, "DEFB", 0x0000 }, { DEFB,"DB",0x0000 }, // 0x100
{ DEFM, "DEFM", 0x0000 }, { DEFM,"DM",0x0000 }, // 0x101
{ DEFS, "DEFS", 0x0000 }, { DEFS,"DS",0x0000 }, // 0x102
{ DEFW, "DEFW", 0x0000 }, { DEFW,"DW",0x0000 }, // 0x103
{ END, "END", 0x0000 }, // 0x104
{ EQU, "EQU", 0x0000 }, // 0x105
{ ORG, "ORG", 0x0000 }, // 0x106
{ IF, "IF", 0x0000 }, // 0x107
{ ENDIF,"ENDIF",0x0000 }, // 0x108
{ ELSE, "ELSE", 0x0000 }, // 0x109
{ PRINT,"PRINT",0x0000 }, // 0x10A
{ FILL, "FILL", 0x0000 } // 0x10B
};
// type: (+ 0x200)
// 0x00 : IN,OUT
// 0x01 : 1 byte opcode, no parameter
// 0x02 : 2 byte opcode, no parameter
// 0x03 : 2 byte opcode, (HL) required
// 0x04 : 1.parameter = bit number, 2.parameter = <ea> (BIT,RES,SET)
// 0x05 : IM (one parameter: 0,1,2)
// 0x06 : ADD,ADC,SUB,SBC,AND,XOR,OR,CP
// 0x07 : INC, DEC, like 0x06 with absolute address
// 0x08 : JP, CALL, JR (Warning! Different <ea>!)
// 0x09 : RET (c or nothing)
// 0x0A : RST (00,08,10,18,20,28,30,38)
// 0x0B : DJNZ
// 0x0C : EX: (SP),dreg or DE,HL or AF,AF'
// 0x0D : LD
// 0x0E : PUSH, POP: dreg
// 0x0F : RR,RL,RRC,RLC,SRA,SLA,SRL
static const ShortSym Opcodes[] = {
{ 0x206,"ADC",0x88CE }, { 0x206,"ADD",0x80C6 }, { 0x206,"AND",0xA0E6 },
{ 0x204,"BIT",0xCB40 }, { 0x208,"CALL",0xC4CD },{ 0x201,"CCF",0x3F00 },
{ 0x206,"CP",0xB8FE }, { 0x202,"CPD",0xEDA9 }, { 0x202,"CPDR",0xEDB9 },
{ 0x202,"CPI",0xEDA1 }, { 0x202,"CPIR",0xEDB1 },{ 0x201,"CPL",0x2F00 },
{ 0x201,"DAA",0x2700 }, { 0x207,"DEC",0x0500 }, { 0x201,"DI",0xF300 },
{ 0x20B,"DJNZ",0x1000 },{ 0x201,"EI",0xFB00 }, { 0x20C,"EX",0xE3EB },
{ 0x201,"EXX",0xD900 }, { 0x201,"HALT",0x7600 },{ 0x205,"IM",0xED46 },
{ 0x200,"IN",0x40DB }, { 0x207,"INC",0x0400 }, { 0x202,"IND",0xEDAA },
{ 0x202,"INDR",0xEDBA },{ 0x202,"INI",0xEDA2 }, { 0x202,"INIR",0xEDB2 },
{ 0x208,"JP",0xC2C3 }, { 0x208,"JR",0x2018 }, { 0x20D,"LD",0x0000 },
{ 0x202,"LDD",0xEDA8 }, { 0x202,"LDDR",0xEDB8 },{ 0x202,"LDI",0xEDA0 },
{ 0x202,"LDIR",0xEDB0 },{ 0x202,"NEG",0xED44 }, { 0x201,"NOP",0x0000 },
{ 0x206,"OR",0xB0F6 }, { 0x202,"OTDR",0xEDBB },{ 0x202,"OTIR",0xEDB3 },
{ 0x200,"OUT",0x41D3 }, { 0x202,"OUTD",0xEDAB },{ 0x202,"OUTI",0xEDA3 },
{ 0x20E,"POP",0xC1E1 }, { 0x20E,"PUSH",0xC5E5 },{ 0x204,"RES",0xCB80 },
{ 0x209,"RET",0xC0C9 }, { 0x202,"RETI",0xED4D },{ 0x202,"RETN",0xED45 },
{ 0x20F,"RL",0x1016 }, { 0x201,"RLA",0x1700 }, { 0x20F,"RLC",0x0016 },
{ 0x201,"RLCA",0x0700 },{ 0x203,"RLD",0xED6F }, { 0x20F,"RR",0x181E },
{ 0x201,"RRA",0x1F00 }, { 0x20F,"RRC",0x080E }, { 0x201,"RRCA",0x0F00 },
{ 0x203,"RRD",0xED67 }, { 0x20A,"RST",0xC700 }, { 0x206,"SBC",0x98DE },
{ 0x201,"SCF",0x3700 }, { 0x204,"SET",0xCBC0 }, { 0x20F,"SLA",0x2026 },
{ 0x20F,"SLL",0x3036 }, { 0x20F,"SRA",0x282E }, { 0x20F,"SRL",0x383E },
{ 0x206,"SUB",0x90D6 }, { 0x206,"XOR",0xA8EE }
};
static const ShortSym Register[] = {
{ 0x307,"A",0x0000 }, { 0x323,"AF",0x0000 }, // 00…07: B,C,D,E,H,L,(HL),A
{ 0x300,"B",0x0000 }, { 0x310,"BC",0x0000 }, // 10…13: BC,DE,HL,SP
{ 0x301,"C",0x0000 }, { 0x302,"D",0x0000 }, // 23: ,AF
{ 0x311,"DE",0x0000 },{ 0x303,"E",0x0000 }, // 30…31: IX,IY
{ 0x304,"H",0x0000 }, { 0x312,"HL",0x0000 }, // 40…41: R,I
{ 0x341,"I",0x0000 }, { 0x330,"IX",0x0000 }, // 54…55: X,HX
{ 0x331,"IY",0x0000 },{ 0x305,"L",0x0000 }, // 64…65: Y,HY
{ 0x340,"R",0x0000 }, { 0x313,"SP",0x0000 },
{ 0x355,"X",0x0000 }, { 0x354,"HX",0x0000 },
{ 0x365,"Y",0x0000 }, { 0x364,"HY",0x0000 }
};
static const ShortSym Conditions[] = {
/*{ 0x403,"C",0x0000 },*/{ 0x407,"M",0x0000 }, // Condition C = Register C!
{ 0x402,"NC",0x0000 },{ 0x400,"NZ",0x0000 },
{ 0x406,"P",0x0000 },{ 0x405,"PE",0x0000 },
{ 0x404,"PO",0x0000 },{ 0x401,"Z",0x0000 }
};
typedef struct {
const ShortSym *table; // ptr to an opcode list
int16_t tablesize; // length of the table in bytes
} TokenTable;
static const TokenTable Token[] = {
{ Pseudo,sizeof(Pseudo)/sizeof(ShortSym) },
{ Opcodes,sizeof(Opcodes)/sizeof(ShortSym) },
{ Register,sizeof(Register)/sizeof(ShortSym) },
{ Conditions,sizeof(Conditions)/sizeof(ShortSym) },
{ 0,0 }
};
// clang-format on
Command Cmd[ 80 ]; // a tokenized line
SymbolP SymTab[ 256 ]; // symbol table (split with the hash byte)
/***
* calculate a simple hash for a string
***/
uint16_t CalcHash( const char *name ) {
uint16_t hash_val = 0;
uint16_t i;
uint8_t c;
while ( ( c = *name++ ) != 0 ) {
#if 0
hash_val += c;
#else
hash_val = ( hash_val << 4 ) + c;
if ( ( i = ( hash_val >> 12 ) ) != 0 )
hash_val ^= i;
#endif
}
return hash_val;
}
/***
* search for a symbol, generate one if it didn't already exist.
***/
SymbolP FindSymbol( const char *name ) {
uint16_t hash = CalcHash( name ); // hash value for the name
uint8_t hashb = hash;
SymbolP s;
s = SymTab[ hashb ]; // ptr to the first symbol
while ( s ) {
if ( s->hash == hash ) // search for a matching hash
if ( !strcmp( s->name, name ) )
return s; // found the symbol?
s = s->next; // to the next symbol
}
s = (SymbolP)malloc( sizeof( Symbol ) ); // allocate memory for a symbol
if ( !s )
return nullptr; // not enough memory
memset( s, 0, sizeof( Symbol ) );
s->next = SymTab[ hashb ];
SymTab[ hashb ] = s; // link the symbol into the list
s->hash = hash;
strcpy( s->name, name ); // and copy the name
return s;
}
/***
* initialize the symbol table
***/
void InitSymTab( void ) {
int16_t i;
SymbolP s;
const TokenTable *t;
for ( i = 0; i < 256; i++ )
SymTab[ i ] = nullptr; // reset all entries
for ( t = Token; t->table; t++ ) { // check all token tables
for ( i = 0; i < t->tablesize; i++ ) { // and all tokens for a single table
s = FindSymbol( t->table[ i ].s ); // add all opcodes to the symbol table
s->type = t->table[ i ].id; // ID (<> 0!)
s->val = ( (int32_t)t->table[ i ].p << 16 ) | s->type; // merge parameter and id
}
}
}
// Is this an alphanumeric character _or_ an unterline, which is a valid symbol
int isalnum_( char c ) { return isalnum( c ) || c == '_'; }
/***
* tokenize a single line
***/
void TokenizeLine( char *sp ) {
char *tp, *sp2;
char c;
char stemp[ MAXLINELENGTH ];
char maxc;
int16_t base; // binary, decimal or hex
bool dollar; // token starts with $
bool dot;
Type typ;
long val;
char AktUpLine[ MAXLINELENGTH ];
char *AktLine = sp; // remember the beginning of the line
CommandP cp = Cmd; // ptr to the command buffer
sp2 = AktUpLine;
while ( ( *sp2++ = toupper( *sp++ ) ) )
; // convert to capital letters
sp = AktUpLine;
while ( 1 ) { // parse the whole string
while ( ( isspace( c = *sp++ ) ) )
; // ignore spaces
if ( c == ';' )
break; // a comment => ignore the rest of the line
if ( c == 0 )
break; // end of line => done
tp = sp - 1; // pointer to current token
typ = ILLEGAL; // default: an illegal type
base = 0;
dot = false; // pseudo opcodes can start with '.'
dollar = false; // $ = PC
if ( c == '.' ) {
c = *sp++;
dot = true;
} else if ( c == '$' ) { // PC or the beginning of a hex number
if ( isalnum( *sp ) && *sp <= 'F' ) {
base = 16;
c = *sp++;
} else
dollar = true;
} else if ( !strncmp( tp, "0X", 2 ) && isxdigit( tp[ 2 ] ) ) {
sp++; // skip 'X'
c = *sp++; // 1st hex digit
base = 16;
}
if ( dollar ) {
typ = NUM;
val = PC;
} else if ( isalnum_( c ) ) { // A…Z, a…z, 0-9
sp2 = stemp; // ptr to the beginning
maxc = 0; // highest ASCII character
do {
*sp2++ = c;
if ( isalnum_( *sp ) ) { // not the last character?
if ( c > maxc )
maxc = c; // remember the highest ASCII character
} else { // last character
if ( base == 16 ) {
base = ( maxc <= 'F' && c <= 'F' ) ? 16 : 0; // invalid hex digits?
} else if ( stemp + 1 != sp2 ) { // at least one character
if ( isdigit( tp[ 0 ] ) && c == 'H' && maxc <= 'F' )
base = 16; // starts with digit and ends with 'H': hex number
else if ( c == 'D' && maxc <= '9' )
base = 10; // 'D' after a number: decimal number
else if ( c == 'B' && maxc <= '1' )
base = 2; // 'B' after a number: binary number
if ( base > 0 )
--sp2;
}
if ( !base && c >= '0' && c <= '9' && maxc <= '9' )
base = 10;
}
c = *sp++; // get the next character
} while ( isalnum_( c ) );
sp--;
*sp2 = 0;
if ( base > 0 ) { // a valid number?
sp2 = stemp;
val = 0;
while ( ( c = *sp2++ ) != 0 ) { // read the value
val *= base; // multiply with the number base
val += ( c <= '9' ) ? c - '0' : c - 'A' + 10;
}
typ = NUM; // type: a number
} else {
// first character not a digit or token doesn't start with "$" or "0X"?
if ( *stemp >= 'A' && tp[ 0 ] != '$' && strncmp( tp, "0X", 2 ) ) {
SymbolP sym = FindSymbol( stemp ); // an opcode or a symbol
if ( !sym )
break; // error (out of memory)
if ( !sym->type ) { // typ = symbol?
if ( dot )
Error( "symbols can't start with '.'" );
typ = SYMBOL;
val = (long)sym; // value = address of the symbol ptr
if ( !sym->first ) { // symbol already exists?
sym->first = true; // no, then implicitly define it
sym->defined = false; // symbol value not defined
}
} else {
typ = OPCODE; // an opcode
val = sym->val; // parameter, ID
if ( dot && ( val < 0x100 || val >= 0x200 ) ) // only pseudo opcodes
Error( "opcodes can't start with '.'" );
}
} else
Error( "symbols can't start with '$' or digits" );
}
} else {
typ = OPCODE;
switch ( c ) {
case '>':
if ( *sp == '>' ) {
val = 0x120; // >> recognized
sp++;
}
break;
case '<':
if ( *sp == '<' ) {
val = 0x121; // << recognized
sp++;
}
break;
case '=': // = matches EQU
val = EQU;
break;
case '\'': // an ASCII character with '.'
val = AktLine[ sp - AktUpLine ]; // not capitalized ASCII character
if ( ( !val ) || ( sp[ 1 ] != '\'' ) ) {
val = '\'';
} else {
sp++;
typ = NUM; // typ: a number
if ( *sp++ != '\'' )
break;
}
break;
case '\"': // an ASCII string with "..."
sp2 = sp;
base = sp - AktUpLine; // offset to the line
while ( *sp2++ != '\"' )
; // search for the end of the string
sp2 = (char *)malloc( sp2 - sp ); // allocate a buffer for the string
val = (long)sp2;
if ( !sp2 )
break;
else {
while ( *sp++ != '\"' ) // end of the string found?
*sp2++ = AktLine[ base++ ]; // copy characters
*sp2 = 0;
}
typ = STRING; // type: a string
break;
default:
val = c;
}
}
cp->typ = typ;
cp->val = val; // copy into the command buffer
cp++;
if ( verboseMode >= 3 )
switch ( typ ) {
case ILLEGAL:
MSG( 3, "ILLEGAL\n" );
break;
case NUM:
MSG( 3, "NUM: %lX\n", val );
break;
case OPCODE:
if ( val < 0x100 )
MSG( 3, "OPCODE: '%c'\n", val );
else
MSG( 3, "OPCODE: %lX\n", val );
break;
case SYMBOL:
MSG( 3, "SYMBOL: %s\n", val );
break;
case STRING:
MSG( 3, "STRING: \"%s\"\n", (char *)val );
break;
}
}
cp->typ = ILLEGAL;
cp->val = 0; // terminate the command buffer
}