diff --git a/Project.toml b/Project.toml index 10e8fa0..e013e38 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Lerche" uuid = "d42ef402-04e6-4356-9f73-091573ea58dc" authors = ["James.Hester "] -version = "0.4.0" +version = "0.4.1" [deps] DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" diff --git a/src/lexer.jl b/src/lexer.jl index f0f8c91..ed537bc 100644 --- a/src/lexer.jl +++ b/src/lexer.jl @@ -314,7 +314,7 @@ end next_token(tl::TraditionalLexer,lex_state) = begin line_ctr = lex_state.line_ctr - #println("Pos: $(line_ctr.char_pos), remaining text\n"*lex_state.text[line_ctr.char_pos:end]*"EOF") + #println("Pos: $(line_ctr.char_pos) out of $(lex_state.text_length), remaining text\n"*lex_state.text[line_ctr.char_pos:end]*"EOF") while line_ctr.char_pos <= lex_state.text_length res = match(tl,lex_state.text,line_ctr.char_pos) if res === nothing @@ -360,7 +360,7 @@ mutable struct LexerState end LexerState(text,line_ctr;last_token=nothing) = begin - LexerState(text,line_ctr,last_token,length(text)) + LexerState(text,line_ctr,last_token,ncodeunits(text)) end struct ContextualLexer <: Lexer diff --git a/src/load_grammar.jl b/src/load_grammar.jl index b0463fb..8aeca10 100644 --- a/src/load_grammar.jl +++ b/src/load_grammar.jl @@ -482,7 +482,7 @@ _literal_to_pattern(literal) = begin end v = v[1:flag_start-1] @assert v[1] == v[end] && occursin(v[1], "\"/") - x = v[2:end-1] #drop delimiters + x = v[2:prevind(v,end,1)] #drop delimiters ### TODO fix escaping s = _fix_escaping(x) if literal.type_ == "STRING" diff --git a/src/parsers/lalr_parser.jl b/src/parsers/lalr_parser.jl index 0a85b42..55bb6fb 100644 --- a/src/parsers/lalr_parser.jl +++ b/src/parsers/lalr_parser.jl @@ -84,7 +84,7 @@ feed_token!(ps::ParserState,token;is_end=false) = begin arg = nothing #create outside scope of try action = nothing # ditto state = first(state_stack) - #println("State $state, current token $token") + #println("State $state, current token $(token.type_) ($token)") try #println("Possibles: $(keys(states[state]))\n") action,arg = states[state][token.type_] diff --git a/test/test_parser.jl b/test/test_parser.jl index 5d1d0ff..f27c6be 100644 --- a/test/test_parser.jl +++ b/test/test_parser.jl @@ -113,6 +113,13 @@ make_parser_test(lexer,parser) = begin @test x.data == "start" && x.children == ["12", "2"] end + @testset "Test unicode" begin + g = make_lark("""start: B A + B: "♫♪" + A: "♫" """) + x = Lerche.parse(g,"♫♪♫") + end + @testset "EBNF stack depth" begin g = make_lark("""start: a+ a : "a" """)