-
Notifications
You must be signed in to change notification settings - Fork 1
/
lexer.ts
140 lines (129 loc) · 3.7 KB
/
lexer.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
// simplified from acorn (MIT license)
function isNewLine(code: number) {
return code === 10 || code === 13 || code === 0x2028 || code === 0x2029;
}
function codePointToString(ch: number) {
if (ch <= 0xffff) return String.fromCharCode(ch);
ch -= 0x10000;
return String.fromCharCode((ch >> 10) + 0xd800, (ch & 0x03ff) + 0xdc00);
}
export class Lexer {
input = "";
pos = 0;
readString(input: string, pos: number): string | null {
if (pos >= input.length) return null;
this.input = input;
this.pos = pos;
const quote = this.input.charCodeAt(pos);
if (!(quote === 34 || quote === 39)) return null;
let out = "";
let chunkStart = ++this.pos;
while (true) {
if (this.pos >= this.input.length) return null;
let ch = this.input.charCodeAt(this.pos);
if (ch === quote) break;
if (ch === 92) {
out += this.input.slice(chunkStart, this.pos);
const escaped = this.readEscapedChar();
if (escaped === null) return null;
out += escaped;
chunkStart = this.pos;
} else {
if (isNewLine(ch)) return null;
++this.pos;
}
}
out += this.input.slice(chunkStart, this.pos++);
return out;
}
readEscapedChar(): string | null {
let ch = this.input.charCodeAt(++this.pos);
let code: number | null;
++this.pos;
switch (ch) {
case 110:
return "\n";
case 114:
return "\r";
case 120:
code = this.readHexChar(2);
if (code === null) return null;
return String.fromCharCode(code);
case 117:
code = this.readCodePoint();
if (code === null) return null;
return codePointToString(code);
case 116:
return "\t";
case 98:
return "\b";
case 118:
return "\u000b";
case 102:
return "\f";
case 13:
if (this.input.charCodeAt(this.pos) === 10) {
++this.pos;
}
case 10:
return "";
case 56:
case 57:
return null;
default:
if (ch >= 48 && ch <= 55) {
let match = this.input.slice(this.pos - 1, this.pos + 2).match(/^[0-7]+/);
if (match === null) return null;
let octalStr = match[0];
let octal = parseInt(octalStr, 8);
if (octal > 255) {
octalStr = octalStr.slice(0, -1);
octal = parseInt(octalStr, 8);
}
this.pos += octalStr.length - 1;
ch = this.input.charCodeAt(this.pos);
if (octalStr !== "0" || ch === 56 || ch === 57) return null;
return String.fromCharCode(octal);
}
if (isNewLine(ch)) return "";
return String.fromCharCode(ch);
}
}
readInt(radix: number, len: number) {
let start = this.pos;
let total = 0;
for (let i = 0; i < len; ++i, ++this.pos) {
let code = this.input.charCodeAt(this.pos);
let val: number;
if (code >= 97) {
val = code - 97 + 10;
} else if (code >= 65) {
val = code - 65 + 10;
} else if (code >= 48 && code <= 57) {
val = code - 48;
} else {
val = Infinity;
}
if (val >= radix) break;
total = total * radix + val;
}
if (this.pos === start || (len != null && this.pos - start !== len)) return null;
return total;
}
readHexChar(len: number) {
return this.readInt(16, len);
}
readCodePoint() {
let ch = this.input.charCodeAt(this.pos);
let code: number | null;
if (ch === 123) {
++this.pos;
code = this.readHexChar(this.input.indexOf("}", this.pos) - this.pos);
++this.pos;
if (code && code > 0x10ffff) return null;
} else {
code = this.readHexChar(4);
}
return code;
}
}