From 82bb315ef92780ab7bf8469a3259c2c5ea767f84 Mon Sep 17 00:00:00 2001 From: David Hagen Date: Sun, 11 Jun 2023 18:10:49 -0400 Subject: [PATCH] Remove context-specific parse (#78) --- src/parsita/metaclasses.py | 4 -- src/parsita/options.py | 24 +-------- src/parsita/parsers/__init__.py | 2 +- src/parsita/parsers/_base.py | 92 ++++++++++++++++----------------- tests/test_basic.py | 70 ++++++++++++++----------- 5 files changed, 85 insertions(+), 107 deletions(-) diff --git a/src/parsita/metaclasses.py b/src/parsita/metaclasses.py index d2f4941..fcbd559 100644 --- a/src/parsita/metaclasses.py +++ b/src/parsita/metaclasses.py @@ -74,11 +74,9 @@ class GeneralParsersMeta(type): def __prepare__(mcs, name, bases, **_): # noqa: N804 old_options = { "handle_literal": options.handle_literal, - "parse_method": options.parse_method, } options.handle_literal = options.wrap_literal - options.parse_method = options.basic_parse return ParsersDict(old_options) @@ -120,7 +118,6 @@ def __prepare__(mcs, name, bases, whitespace: str = options.default_whitespace): old_options = { "whitespace": options.whitespace, "handle_literal": options.handle_literal, - "parse_method": options.parse_method, } # Store whitespace in global location so regex parsers can see it @@ -133,7 +130,6 @@ def __prepare__(mcs, name, bases, whitespace: str = options.default_whitespace): options.whitespace = RegexParser(whitespace) options.handle_literal = options.default_handle_literal - options.parse_method = options.default_parse_method return ParsersDict(old_options) diff --git a/src/parsita/options.py b/src/parsita/options.py index f7a25b2..f8622a5 100644 --- a/src/parsita/options.py +++ b/src/parsita/options.py @@ -4,14 +4,11 @@ "default_handle_literal", "wrap_literal", "handle_literal", - "default_parse_method", - "basic_parse", - "parse_method", ] import re from typing import Any, Sequence -from .state import Input, Output, Result, SequenceReader, StringReader +from .state import Input # Global mutable state @@ -33,22 +30,3 @@ def wrap_literal(literal: Sequence[Input]): handle_literal = default_handle_literal - - -def default_parse_method(self, source: str) -> Result[Output]: - from .parsers import completely_parse_reader - - reader = StringReader(source) - - return completely_parse_reader(self, reader) - - -def basic_parse(self, source: Sequence[Input]) -> Result[Output]: - from .parsers import completely_parse_reader - - reader = SequenceReader(source) - - return completely_parse_reader(self, reader) - - -parse_method = default_parse_method diff --git a/src/parsita/parsers/__init__.py b/src/parsita/parsers/__init__.py index cce412a..d137862 100644 --- a/src/parsita/parsers/__init__.py +++ b/src/parsita/parsers/__init__.py @@ -1,6 +1,6 @@ from ._alternative import FirstAlternativeParser, LongestAlternativeParser, first, longest from ._any import AnyParser, any1 -from ._base import Parser, completely_parse_reader +from ._base import Parser from ._conversion import ConversionParser, TransformationParser from ._debug import DebugParser, debug from ._end_of_source import EndOfSourceParser, eof diff --git a/src/parsita/parsers/_base.py b/src/parsita/parsers/_base.py index 740b666..b4e0947 100644 --- a/src/parsita/parsers/_base.py +++ b/src/parsita/parsers/_base.py @@ -1,12 +1,23 @@ from __future__ import annotations -__all__ = ["Parser", "completely_parse_reader"] +__all__ = ["Parser"] -from types import MethodType -from typing import Any, Generic, List, Optional, Sequence +from typing import Any, Generic, List, Optional, Sequence, Union from .. import options -from ..state import Continue, Failure, Input, Output, ParseError, Reader, Result, State, Success +from ..state import ( + Continue, + Failure, + Input, + Output, + ParseError, + Reader, + Result, + SequenceReader, + State, + StringReader, + Success, +) # Singleton indicating that no result is yet in the memo missing = object() @@ -46,9 +57,6 @@ class Parser(Generic[Input, Output]): name. """ - def __init__(self): - self.parse = MethodType(options.parse_method, self) - def cached_consume(self, state: State[Input], reader: Reader[Input]) -> Optional[Continue[Input, Output]]: """Match this parser at the given location. @@ -103,15 +111,9 @@ def consume(self, state: State[Input], reader: Reader[Input]) -> Optional[Contin """ raise NotImplementedError() - def parse(self, source: Sequence[Input]) -> Result[Output]: + def parse(self, source: Union[Sequence[Input], Reader]) -> Result[Output]: """Abstract method for completely parsing a source. - While ``parse`` is a method on every parser for convenience, it - is really a function of the context. It is the duty of the context - to set the correct ``Reader`` to use and to handle whitespace - not handled by the parsers themselves. This method is pulled from the - context when the parser is initialized. - Args: source: What will be parsed. @@ -122,8 +124,35 @@ def parse(self, source: Sequence[Input]) -> Result[Output]: ``Failure``. If the parser succeeded but the source was not completely consumed, a ``Failure`` with a message indicating this is returned. + + If a ``Reader`` is passed in, it is used directly. Otherwise, the source + is converted to an appropriate ``Reader``. If the source is ``str`, a + ``StringReader`` is used. Otherwise, a ``SequenceReader`` is used. """ - raise NotImplementedError() + from ._end_of_source import eof + + if isinstance(source, Reader): + reader = source + elif isinstance(source, str): + reader = StringReader(source, 0) + else: + reader = SequenceReader(source) + + state: State[Input] = State() + + status = (self << eof).cached_consume(state, reader) + + if isinstance(status, Continue): + return Success(status.value) + else: + used = set() + unique_expected = [] + for expected in state.expected: + if expected not in used: + used.add(expected) + unique_expected.append(expected) + + return Failure(ParseError(state.farthest, unique_expected)) name: Optional[str] = None @@ -209,36 +238,3 @@ def __ge__(self, other) -> Parser: from ._conversion import TransformationParser return TransformationParser(self, other) - - -def completely_parse_reader(parser: Parser[Input, Output], reader: Reader[Input]) -> Result[Output]: - """Consume reader and return Success only on complete consumption. - - This is a helper function for ``parse`` methods, which return ``Success`` - when the input is completely consumed and ``Failure`` with an appropriate - message otherwise. - - Args: - parser: The parser doing the consuming - reader: The input being consumed - - Returns: - A Returns ``Result`` containing either the successfully parsed value or - an error from the farthest parsed point in the input. - """ - from ._end_of_source import eof - - state: State[Input] = State() - status = (parser << eof).cached_consume(state, reader) - - if isinstance(status, Continue): - return Success(status.value) - else: - used = set() - unique_expected = [] - for expected in state.expected: - if expected not in used: - used.add(expected) - unique_expected.append(expected) - - return Failure(ParseError(state.farthest, unique_expected)) diff --git a/tests/test_basic.py b/tests/test_basic.py index ef71dfe..6a92535 100644 --- a/tests/test_basic.py +++ b/tests/test_basic.py @@ -6,6 +6,7 @@ ParseError, RecursionError, SequenceReader, + StringReader, Success, any1, eof, @@ -32,9 +33,9 @@ class TestParsers(GeneralParsers): assert TestParsers.a.parse("a") == Success("a") assert TestParsers.ab.parse("ab") == Success("ab") - assert TestParsers.ab.parse("abb") == Failure(ParseError(SequenceReader("abb", 2), ["end of source"])) - assert TestParsers.ab.parse("ca") == Failure(ParseError(SequenceReader("ca", 0), ["a"])) - assert TestParsers.ab.parse("ac") == Failure(ParseError(SequenceReader("ac", 1), ["b"])) + assert TestParsers.ab.parse("abb") == Failure(ParseError(StringReader("abb", 2), ["end of source"])) + assert TestParsers.ab.parse("ca") == Failure(ParseError(StringReader("ca", 0), ["a"])) + assert TestParsers.ab.parse("ac") == Failure(ParseError(StringReader("ac", 1), ["b"])) assert str(TestParsers.a) == "a = 'a'" assert str(TestParsers.ab) == "ab = 'ab'" @@ -64,8 +65,8 @@ class TestParsers(GeneralParsers): assert TestParsers.a.parse("a") == Success("a") assert TestParsers.a.parse("A") == Success("A") assert TestParsers.d.parse("2") == Success("2") - assert TestParsers.d.parse("23") == Failure(ParseError(SequenceReader("23", 1), ["end of source"])) - assert TestParsers.d.parse("a") == Failure(ParseError(SequenceReader("a", 0), ["digit"])) + assert TestParsers.d.parse("23") == Failure(ParseError(StringReader("23", 1), ["end of source"])) + assert TestParsers.d.parse("a") == Failure(ParseError(StringReader("a", 0), ["digit"])) assert str(TestParsers.a) == "a = pred(any1, letter A)" @@ -75,7 +76,7 @@ class TestParsers(GeneralParsers): b = lit("b") assert TestParsers.a.parse("b") == Success("b") - assert TestParsers.a.parse("ab") == Failure(ParseError(SequenceReader("ab", 0), ["b"])) + assert TestParsers.a.parse("ab") == Failure(ParseError(StringReader("ab", 0), ["b"])) def test_forward_expression(): @@ -134,7 +135,7 @@ class TestParsers(GeneralParsers): b = opt(a) assert TestParsers.b.parse("a") == Success(["a"]) - assert TestParsers.b.parse("c") == Failure(ParseError(SequenceReader("c", 0), ["a", "end of source"])) + assert TestParsers.b.parse("c") == Failure(ParseError(StringReader("c", 0), ["a", "end of source"])) assert str(TestParsers.b) == "b = opt(a)" @@ -144,7 +145,7 @@ class TestParsers(GeneralParsers): b = opt(a) assert TestParsers.b.parse("ab") == Success(["ab"]) - assert TestParsers.b.parse("ac") == Failure(ParseError(SequenceReader("ac", 1), ["b"])) + assert TestParsers.b.parse("ac") == Failure(ParseError(StringReader("ac", 1), ["b"])) assert str(TestParsers.b) == "b = opt(a)" @@ -153,7 +154,7 @@ class TestParsers(GeneralParsers): b = opt("ab") assert TestParsers.b.parse("ab") == Success(["ab"]) - assert TestParsers.b.parse("ac") == Failure(ParseError(SequenceReader("ac", 1), ["b"])) + assert TestParsers.b.parse("ac") == Failure(ParseError(StringReader("ac", 1), ["b"])) assert str(TestParsers.b) == "b = opt('ab')" @@ -167,9 +168,9 @@ class TestParsers(GeneralParsers): assert TestParsers.ab.parse("a") == Success("a") assert TestParsers.ab.parse("b") == Success("b") - assert TestParsers.ab.parse("c") == Failure(ParseError(SequenceReader("c", 0), ["a", "b"])) + assert TestParsers.ab.parse("c") == Failure(ParseError(StringReader("c", 0), ["a", "b"])) assert TestParsers.bc.parse("cd") == Success("cd") - assert TestParsers.bc.parse("ce") == Failure(ParseError(SequenceReader("ce", 1), ["d"])) + assert TestParsers.bc.parse("ce") == Failure(ParseError(StringReader("ce", 1), ["d"])) assert str(TestParsers.bc) == "bc = b | c" @@ -186,8 +187,8 @@ class TestParsers(GeneralParsers): for parser in [TestParsers.back, TestParsers.front, TestParsers.both]: assert parser.parse("aaaa") == Success("aaaa") assert parser.parse("cc") == Success("cc") - assert parser.parse("bbc") == Failure(ParseError(SequenceReader("bbc", 2), ["b"])) - assert parser.parse("bbba") == Failure(ParseError(SequenceReader("bbba", 3), ["end of source"])) + assert parser.parse("bbc") == Failure(ParseError(StringReader("bbc", 2), ["b"])) + assert parser.parse("bbba") == Failure(ParseError(StringReader("bbba", 3), ["end of source"])) str(TestParsers.back), "back = a | b | c | d" str(TestParsers.front), "front = a | b | c | d" @@ -208,7 +209,7 @@ class TestParsers(GeneralParsers): ac = a & "c" either = ab | ac - assert TestParsers.either.parse("cc") == Failure(ParseError(SequenceReader("cc", 0), ["a"])) + assert TestParsers.either.parse("cc") == Failure(ParseError(StringReader("cc", 0), ["a"])) def test_first(): @@ -231,8 +232,8 @@ class TestParsers(GeneralParsers): assert TestParsers.ab.parse("ab") == Success(["a", "b"]) assert TestParsers.bc.parse("bcd") == Success(["b", "cd"]) assert TestParsers.abc.parse("abcd") == Success(["a", "b", "cd"]) - assert TestParsers.abc.parse("abc") == Failure(ParseError(SequenceReader("abc", 3), ["d"])) - assert TestParsers.abc.parse("abf") == Failure(ParseError(SequenceReader("abf", 2), ["c"])) + assert TestParsers.abc.parse("abc") == Failure(ParseError(StringReader("abc", 3), ["d"])) + assert TestParsers.abc.parse("abf") == Failure(ParseError(StringReader("abf", 2), ["c"])) assert str(TestParsers.abc) == "abc = a & b & c" @@ -259,7 +260,7 @@ class TestParsers(GeneralParsers): assert TestParsers.ab.parse("ab") == Success("a") assert TestParsers.ac.parse("ac") == Success("a") - assert TestParsers.ac.parse("aa") == Failure(ParseError(SequenceReader("aa", 1), ["c"])) + assert TestParsers.ac.parse("aa") == Failure(ParseError(StringReader("aa", 1), ["c"])) assert str(TestParsers.ac) == "ac = a << c" @@ -285,12 +286,12 @@ class TestParsers(GeneralParsers): assert TestParsers.bs.parse("bbbb") == Success(["b", "b", "b", "b"]) assert TestParsers.bs.parse("b") == Success(["b"]) - assert TestParsers.bs.parse("") == Failure(ParseError(SequenceReader("", 0), ["b"])) - assert TestParsers.bs.parse("bbbc") == Failure(ParseError(SequenceReader("bbbc", 3), ["b", "end of source"])) + assert TestParsers.bs.parse("") == Failure(ParseError(StringReader("", 0), ["b"])) + assert TestParsers.bs.parse("bbbc") == Failure(ParseError(StringReader("bbbc", 3), ["b", "end of source"])) assert TestParsers.cs.parse("ccc") == Success(["c", "c", "c"]) assert TestParsers.cs.parse("c") == Success(["c"]) assert TestParsers.cs.parse("") == Success([]) - assert TestParsers.cs.parse("cccb") == Failure(ParseError(SequenceReader("cccb", 3), ["c", "end of source"])) + assert TestParsers.cs.parse("cccb") == Failure(ParseError(StringReader("cccb", 3), ["c", "end of source"])) assert str(TestParsers.bs) == "bs = rep1('b')" assert str(TestParsers.cs) == "cs = rep('c')" @@ -311,12 +312,12 @@ class TestParsers(GeneralParsers): assert TestParsers.bf.parse("bfbf") == Success(["bf", "bf"]) assert TestParsers.bf.parse("bf") == Success(["bf"]) - assert TestParsers.bf.parse("") == Failure(ParseError(SequenceReader("", 0), ["b"])) - assert TestParsers.bf.parse("bfbc") == Failure(ParseError(SequenceReader("bfbc", 3), ["f"])) + assert TestParsers.bf.parse("") == Failure(ParseError(StringReader("", 0), ["b"])) + assert TestParsers.bf.parse("bfbc") == Failure(ParseError(StringReader("bfbc", 3), ["f"])) assert TestParsers.cf.parse("cfcfcf") == Success(["cf", "cf", "cf"]) assert TestParsers.cf.parse("cf") == Success(["cf"]) assert TestParsers.cf.parse("") == Success([]) - assert TestParsers.cf.parse("cfcb") == Failure(ParseError(SequenceReader("cfcb", 3), ["f"])) + assert TestParsers.cf.parse("cfcb") == Failure(ParseError(StringReader("cfcb", 3), ["f"])) assert str(TestParsers.bf) == "bf = rep1('bf')" assert str(TestParsers.cf) == "cf = rep('cf')" @@ -328,7 +329,7 @@ class TestParsers(GeneralParsers): assert TestParsers.bs.parse("b,b,b") == Success(["b", "b", "b"]) assert TestParsers.bs.parse("b") == Success(["b"]) - assert TestParsers.bs.parse("") == Failure(ParseError(SequenceReader("", 0), ["b"])) + assert TestParsers.bs.parse("") == Failure(ParseError(StringReader("", 0), ["b"])) assert TestParsers.cs.parse("c,c,c") == Success(["c", "c", "c"]) assert TestParsers.cs.parse("c") == Success(["c"]) assert TestParsers.cs.parse("") == Success([]) @@ -343,7 +344,7 @@ class TestParsers(GeneralParsers): assert TestParsers.bs.parse("b,bb") == Success(["b", "b", "b"]) assert TestParsers.bs.parse("b") == Success(["b"]) - assert TestParsers.bs.parse("") == Failure(ParseError(SequenceReader("", 0), ["b"])) + assert TestParsers.bs.parse("") == Failure(ParseError(StringReader("", 0), ["b"])) assert TestParsers.cs.parse("cc,c") == Success(["c", "c", "c"]) assert TestParsers.cs.parse("c") == Success(["c"]) assert TestParsers.cs.parse("") == Success([]) @@ -373,7 +374,7 @@ class TestParsers(GeneralParsers): # Recursion happens in middle of stream for parser in (TestParsers.bad_rep, TestParsers.bad_rep1, TestParsers.bad_repsep, TestParsers.bad_rep1sep): with pytest.raises(RecursionError) as actual: - parser.parse("aab") + parser.parse(SequenceReader("aab")) assert actual.value == RecursionError(parser, SequenceReader("aab", 2)) assert str(actual.value) == ( f"Infinite recursion detected in {parser!r}; " @@ -383,7 +384,7 @@ class TestParsers(GeneralParsers): # Recursion happens at end of stream for parser in (TestParsers.bad_rep, TestParsers.bad_rep1, TestParsers.bad_repsep, TestParsers.bad_rep1sep): with pytest.raises(RecursionError) as actual: - parser.parse("aa") + parser.parse(SequenceReader("aa")) assert actual.value == RecursionError(parser, SequenceReader("aa", 2)) assert str(actual.value) == ( f"Infinite recursion detected in {parser!r}; " @@ -450,7 +451,7 @@ class TestParsers(GeneralParsers): assert TestParsers.bba.parse("bba") == Success(["b", "b", "a"]) assert TestParsers.bba.parse("a") == Success(["a"]) - assert TestParsers.bba.parse("ab") == Failure(ParseError(SequenceReader("ab", 1), ["end of source"])) + assert TestParsers.bba.parse("ab") == Failure(ParseError(StringReader("ab", 1), ["end of source"])) assert str(TestParsers.end_a) == "end_a = 'a' << eof" @@ -461,7 +462,7 @@ class TestParsers(GeneralParsers): assert TestParsers.aaa.parse("aabb") == Success([["a", "a"], 1, ["b", "b"]]) assert TestParsers.aaa.parse("") == Success([[], 1, []]) - assert TestParsers.bbb.parse("aabb") == Failure(ParseError(SequenceReader("aabb", 2), ["something else"])) + assert TestParsers.bbb.parse("aabb") == Failure(ParseError(StringReader("aabb", 2), ["something else"])) assert str(TestParsers.aaa) == "aaa = rep('a') & success(1) & rep('b')" assert str(TestParsers.bbb) == "bbb = 'aa' & failure('something else') & 'bb'" @@ -484,7 +485,7 @@ class TestParser(GeneralParsers): no_termination_content = f"""{block_start}{ambiguous_content}""" result_3 = TestParser.ambiguous.parse(no_termination_content) - assert result_3 == Failure(ParseError(SequenceReader(no_termination_content, 7), ["b"])) + assert result_3 == Failure(ParseError(StringReader(no_termination_content, 7), ["b"])) def test_any(): @@ -492,10 +493,17 @@ class TestParsers(GeneralParsers): any2 = any1 & any1 assert TestParsers.any2.parse("ab") == Success(["a", "b"]) - assert TestParsers.any2.parse("a") == Failure(ParseError(SequenceReader("a", 1), ["anything"])) + assert TestParsers.any2.parse("a") == Failure(ParseError(StringReader("a", 1), ["anything"])) assert str(TestParsers.any2) == "any2 = any1 & any1" +def test_sequence_reader(): + class TestParsers(GeneralParsers): + uhh = lit([1, 2]) + + assert TestParsers.uhh.parse([1, 2]) == Success([1, 2]) + + def test_nested_class(): class TestOuter(GeneralParsers): start = "%"