Skip to content

Commit

Permalink
Unicode processing fixes.
Browse files Browse the repository at this point in the history
  • Loading branch information
James.Hester committed Mar 31, 2021
1 parent bee1953 commit 7acf788
Show file tree
Hide file tree
Showing 5 changed files with 12 additions and 5 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "Lerche"
uuid = "d42ef402-04e6-4356-9f73-091573ea58dc"
authors = ["James.Hester <[email protected]>"]
version = "0.4.0"
version = "0.4.1"

[deps]
DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
Expand Down
4 changes: 2 additions & 2 deletions src/lexer.jl
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,7 @@ end

next_token(tl::TraditionalLexer,lex_state) = begin
line_ctr = lex_state.line_ctr
#println("Pos: $(line_ctr.char_pos), remaining text\n"*lex_state.text[line_ctr.char_pos:end]*"EOF")
#println("Pos: $(line_ctr.char_pos) out of $(lex_state.text_length), remaining text\n"*lex_state.text[line_ctr.char_pos:end]*"EOF")
while line_ctr.char_pos <= lex_state.text_length
res = match(tl,lex_state.text,line_ctr.char_pos)
if res === nothing
Expand Down Expand Up @@ -360,7 +360,7 @@ mutable struct LexerState
end

LexerState(text,line_ctr;last_token=nothing) = begin
LexerState(text,line_ctr,last_token,length(text))
LexerState(text,line_ctr,last_token,ncodeunits(text))
end

struct ContextualLexer <: Lexer
Expand Down
2 changes: 1 addition & 1 deletion src/load_grammar.jl
Original file line number Diff line number Diff line change
Expand Up @@ -482,7 +482,7 @@ _literal_to_pattern(literal) = begin
end
v = v[1:flag_start-1]
@assert v[1] == v[end] && occursin(v[1], "\"/")
x = v[2:end-1] #drop delimiters
x = v[2:prevind(v,end,1)] #drop delimiters
### TODO fix escaping
s = _fix_escaping(x)
if literal.type_ == "STRING"
Expand Down
2 changes: 1 addition & 1 deletion src/parsers/lalr_parser.jl
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ feed_token!(ps::ParserState,token;is_end=false) = begin
arg = nothing #create outside scope of try
action = nothing # ditto
state = first(state_stack)
#println("State $state, current token $token")
#println("State $state, current token $(token.type_) ($token)")
try
#println("Possibles: $(keys(states[state]))\n")
action,arg = states[state][token.type_]
Expand Down
7 changes: 7 additions & 0 deletions test/test_parser.jl
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,13 @@ make_parser_test(lexer,parser) = begin
@test x.data == "start" && x.children == ["12", "2"]
end

@testset "Test unicode" begin
g = make_lark("""start: B A
B: "♫♪"
A: "♫" """)
x = Lerche.parse(g,"♫♪♫")
end

@testset "EBNF stack depth" begin
g = make_lark("""start: a+
a : "a" """)
Expand Down

0 comments on commit 7acf788

Please sign in to comment.