forked from tomnomnom/gron
-
Notifications
You must be signed in to change notification settings - Fork 0
/
ungron.go
515 lines (436 loc) · 10.9 KB
/
ungron.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
// Ungronning is the reverse of gronning: turn statements
// back into JSON. The expected input grammar is:
//
// Input ::= '--'* Statement (Statement | '--')*
// Statement ::= Path Space* "=" Space* Value ";" "\n"
// Path ::= (BareWord) ("." BareWord | ("[" Key "]"))*
// Value ::= String | Number | "true" | "false" | "null" | "[]" | "{}"
// BareWord ::= (UnicodeLu | UnicodeLl | UnicodeLm | UnicodeLo | UnicodeNl | '$' | '_') (UnicodeLu | UnicodeLl | UnicodeLm | UnicodeLo | UnicodeNl | UnicodeMn | UnicodeMc | UnicodeNd | UnicodePc | '$' | '_')*
// Key ::= [0-9]+ | String
// String ::= '"' (UnescapedRune | ("\" (["\/bfnrt] | ('u' Hex))))* '"'
// UnescapedRune ::= [^#x0-#x1f"\]
package main
import (
"encoding/json"
"fmt"
"strconv"
"strings"
"unicode"
"unicode/utf8"
"github.com/pkg/errors"
)
// errRecoverable is an error type to represent errors that
// can be recovered from; e.g. an empty line in the input
type errRecoverable struct {
msg string
}
func (e errRecoverable) Error() string {
return e.msg
}
// A lexer holds the state for lexing statements
type lexer struct {
text string // The raw input text
pos int // The current byte offset in the text
width int // The width of the current rune in bytes
cur rune // The rune at the current position
prev rune // The rune at the previous position
tokens []token // The tokens that have been emitted
tokenStart int // The starting position of the current token
}
// newLexer returns a new lexer for the provided input string
func newLexer(text string) *lexer {
return &lexer{
text: text,
pos: 0,
tokenStart: 0,
tokens: make([]token, 0),
}
}
// lex runs the lexer and returns the lexed statement
func (l *lexer) lex() statement {
for lexfn := lexStatement; lexfn != nil; {
lexfn = lexfn(l)
}
return l.tokens
}
// next gets the next rune in the input and updates the lexer state
func (l *lexer) next() rune {
r, w := utf8.DecodeRuneInString(l.text[l.pos:])
l.pos += w
l.width = w
l.prev = l.cur
l.cur = r
return r
}
// backup moves the lexer back one rune
// can only be used once per call of next()
func (l *lexer) backup() {
l.pos -= l.width
}
// peek returns the next rune in the input
// without moving the internal pointer
func (l *lexer) peek() rune {
r := l.next()
l.backup()
return r
}
// ignore skips the current token
func (l *lexer) ignore() {
l.tokenStart = l.pos
}
// emit adds the current token to the token slice and
// moves the tokenStart pointer to the current position
func (l *lexer) emit(typ tokenTyp) {
t := token{
text: l.text[l.tokenStart:l.pos],
typ: typ,
}
l.tokenStart = l.pos
l.tokens = append(l.tokens, t)
}
// accept moves the pointer if the next rune is in
// the set of valid runes
func (l *lexer) accept(valid string) bool {
if strings.ContainsRune(valid, l.next()) {
return true
}
l.backup()
return false
}
// acceptRun continually accepts runes from the
// set of valid runes
func (l *lexer) acceptRun(valid string) {
for strings.ContainsRune(valid, l.next()) {
}
l.backup()
}
// a runeCheck is a function that determines if a rune is valid
// or not so that we can do complex checks against runes
type runeCheck func(rune) bool
// acceptFunc accepts a rune if the provided runeCheck
// function returns true
func (l *lexer) acceptFunc(fn runeCheck) bool {
if fn(l.next()) {
return true
}
l.backup()
return false
}
// acceptRunFunc continually accepts runes for as long
// as the runeCheck function returns true
func (l *lexer) acceptRunFunc(fn runeCheck) {
for fn(l.next()) {
}
l.backup()
}
// acceptUntil accepts runes until it hits a delimiter
// rune contained in the provided string
func (l *lexer) acceptUntil(delims string) {
for !strings.ContainsRune(delims, l.next()) {
if l.cur == utf8.RuneError {
return
}
}
l.backup()
}
// acceptUntilUnescaped accepts runes until it hits a delimiter
// rune contained in the provided string, unless that rune was
// escaped with a backslash
func (l *lexer) acceptUntilUnescaped(delims string) {
// Read until we hit an unescaped rune or the end of the input
inEscape := false
for {
r := l.next()
if r == '\\' && !inEscape {
inEscape = true
continue
}
if strings.ContainsRune(delims, r) && !inEscape {
l.backup()
return
}
if l.cur == utf8.RuneError {
return
}
inEscape = false
}
}
// a lexFn accepts a lexer, performs some action on it and
// then returns an appropriate lexFn for the next stage
type lexFn func(*lexer) lexFn
// lexStatement is the highest level lexFn. Its only job
// is to determine which more specific lexFn to use
func lexStatement(l *lexer) lexFn {
r := l.peek()
switch {
case r == '.' || validFirstRune(r):
return lexBareWord
case r == '[':
return lexBraces
case r == ' ', r == '=':
return lexValue
case r == '-':
// grep -A etc can add '--' lines to output
// we'll save the text but not actually do
// anything with them
return lexIgnore
case r == utf8.RuneError:
return nil
default:
l.emit(typError)
return nil
}
}
// lexBareWord lexes for bare identifiers.
// E.g: the 'foo' in 'foo.bar' or 'foo[0]' is a bare identifier
func lexBareWord(l *lexer) lexFn {
if l.accept(".") {
l.emit(typDot)
}
if !l.acceptFunc(validFirstRune) {
l.emit(typError)
return nil
}
l.acceptRunFunc(validSecondaryRune)
l.emit(typBare)
return lexStatement
}
// lexBraces lexes keys contained within square braces
func lexBraces(l *lexer) lexFn {
l.accept("[")
l.emit(typLBrace)
switch {
case unicode.IsNumber(l.peek()):
return lexNumericKey
case l.peek() == '"':
return lexQuotedKey
default:
l.emit(typError)
return nil
}
}
// lexNumericKey lexes numeric keys between square braces
func lexNumericKey(l *lexer) lexFn {
l.accept("[")
l.ignore()
l.acceptRunFunc(unicode.IsNumber)
l.emit(typNumericKey)
if l.accept("]") {
l.emit(typRBrace)
} else {
l.emit(typError)
return nil
}
l.ignore()
return lexStatement
}
// lexQuotedKey lexes quoted keys between square braces
func lexQuotedKey(l *lexer) lexFn {
l.accept("[")
l.ignore()
l.accept(`"`)
l.acceptUntilUnescaped(`"`)
l.accept(`"`)
l.emit(typQuotedKey)
if l.accept("]") {
l.emit(typRBrace)
} else {
l.emit(typError)
return nil
}
l.ignore()
return lexStatement
}
// lexValue lexes a value at the end of a statement
func lexValue(l *lexer) lexFn {
l.acceptRun(" ")
l.ignore()
if l.accept("=") {
l.emit(typEquals)
} else {
return nil
}
l.acceptRun(" ")
l.ignore()
switch {
case l.accept(`"`):
l.acceptUntilUnescaped(`"`)
l.accept(`"`)
l.emit(typString)
case l.accept("t"):
l.acceptRun("rue")
l.emit(typTrue)
case l.accept("f"):
l.acceptRun("alse")
l.emit(typFalse)
case l.accept("n"):
l.acceptRun("ul")
l.emit(typNull)
case l.accept("["):
l.accept("]")
l.emit(typEmptyArray)
case l.accept("{"):
l.accept("}")
l.emit(typEmptyObject)
default:
// Assume number
l.acceptUntil(";")
l.emit(typNumber)
}
l.acceptRun(" ")
l.ignore()
if l.accept(";") {
l.emit(typSemi)
}
// The value should always be the last thing
// in the statement
return nil
}
// lexIgnore accepts runes until the end of the input
// and emits them as a typIgnored token
func lexIgnore(l *lexer) lexFn {
l.acceptRunFunc(func(r rune) bool {
return r != utf8.RuneError
})
l.emit(typIgnored)
return nil
}
// ungronTokens turns a slice of tokens into an actual datastructure
func ungronTokens(ts []token) (interface{}, error) {
if len(ts) == 0 {
return nil, errRecoverable{"empty input"}
}
if ts[0].typ == typIgnored {
return nil, errRecoverable{"ignored token"}
}
if ts[len(ts)-1].typ == typError {
return nil, errors.New("invalid statement")
}
// The last token should be typSemi so we need to check
// the second to last token is a value rather than the
// last one
if len(ts) > 1 && !ts[len(ts)-2].isValue() {
return nil, errors.New("statement has no value")
}
t := ts[0]
switch {
case t.isPunct():
// Skip the token
val, err := ungronTokens(ts[1:])
if err != nil {
return nil, err
}
return val, nil
case t.isValue():
var val interface{}
d := json.NewDecoder(strings.NewReader(t.text))
d.UseNumber()
err := d.Decode(&val)
if err != nil {
return nil, fmt.Errorf("invalid value `%s`", t.text)
}
return val, nil
case t.typ == typBare:
val, err := ungronTokens(ts[1:])
if err != nil {
return nil, err
}
out := make(map[string]interface{})
out[t.text] = val
return out, nil
case t.typ == typQuotedKey:
val, err := ungronTokens(ts[1:])
if err != nil {
return nil, err
}
key := ""
err = json.Unmarshal([]byte(t.text), &key)
if err != nil {
return nil, fmt.Errorf("invalid quoted key `%s`", t.text)
}
out := make(map[string]interface{})
out[key] = val
return out, nil
case t.typ == typNumericKey:
key, err := strconv.Atoi(t.text)
if err != nil {
return nil, fmt.Errorf("invalid integer key `%s`", t.text)
}
val, err := ungronTokens(ts[1:])
if err != nil {
return nil, err
}
// There needs to be at least key + 1 space in the array
out := make([]interface{}, key+1)
out[key] = val
return out, nil
default:
return nil, fmt.Errorf("unexpected token `%s`", t.text)
}
}
// recursiveMerge merges maps and slices, or returns b for scalars
func recursiveMerge(a, b interface{}) (interface{}, error) {
switch a.(type) {
case map[string]interface{}:
bMap, ok := b.(map[string]interface{})
if !ok {
return nil, fmt.Errorf("cannot merge object with non-object")
}
return recursiveMapMerge(a.(map[string]interface{}), bMap)
case []interface{}:
bSlice, ok := b.([]interface{})
if !ok {
return nil, fmt.Errorf("cannot merge array with non-array")
}
return recursiveSliceMerge(a.([]interface{}), bSlice)
case string, int, float64, bool, nil:
// Can't merge them, second one wins
return b, nil
default:
return nil, fmt.Errorf("unexpected data type for merge")
}
}
// recursiveMapMerge recursively merges map[string]interface{} values
func recursiveMapMerge(a, b map[string]interface{}) (map[string]interface{}, error) {
// Merge keys from b into a
for k, v := range b {
_, exists := a[k]
if !exists {
// Doesn't exist in a, just add it in
a[k] = v
} else {
// Does exist, merge the values
merged, err := recursiveMerge(a[k], b[k])
if err != nil {
return nil, err
}
a[k] = merged
}
}
return a, nil
}
// recursiveSliceMerge recursively merged []interface{} values
func recursiveSliceMerge(a, b []interface{}) ([]interface{}, error) {
// We need a new slice with the capacity of whichever
// slive is biggest
outLen := len(a)
if len(b) > outLen {
outLen = len(b)
}
out := make([]interface{}, outLen)
// Copy the values from 'a' into the output slice
copy(out, a)
// Add the values from 'b'; merging existing keys
for k, v := range b {
if out[k] == nil {
out[k] = v
} else if v != nil {
merged, err := recursiveMerge(out[k], b[k])
if err != nil {
return nil, err
}
out[k] = merged
}
}
return out, nil
}