Fix a few issues in dragonfly/parser and its tests

- Fix UnicodeDecodeError that occurred when unencoded Unicode strings were used in the String or CharacterSeries parser classes (issue t4ngo#12) - Make doctests pass by removing asserted isinstance calls - Test Unicode strings in parser tests - Fix some formatting and style issues in test file
Versatilus · Apr 29, 2018 · 9fe1d04 · 9fe1d04
1 parent 69d29bf
commit 9fe1d04
Show file tree

Hide file tree

Showing 2 changed files with 52 additions and 18 deletions.
diff --git a/dragonfly/parser.py b/dragonfly/parser.py
@@ -202,19 +202,19 @@ def __init__(self, depth, actor, begin):
             self.value = None; self.end = None
 
     def decode_attempt(self, element):
-        assert isinstance(element, ParserElementBase)
+        # assert isinstance(element, ParserElementBase)
         self._depth += 1
         self._stack.append(State._Frame(self._depth, element, self._index))
         self._log_step(element, "attempt")
 
     def decode_retry(self, element):
-        assert isinstance(element, ParserElementBase)
+        # assert isinstance(element, ParserElementBase)
         frame = self._get_frame_from_actor(element)
         self._depth = frame.depth
         self._log_step(element, "retry")
 
     def decode_rollback(self, element):
-        assert isinstance(element, ParserElementBase)
+        # assert isinstance(element, ParserElementBase)
         frame = self._get_frame_from_depth()
         if not frame or frame.actor != element:
             raise grammar_.GrammarError("Parser decoding stack broken")
@@ -226,7 +226,7 @@ def decode_rollback(self, element):
         self._log_step(element, "rollback")
 
     def decode_success(self, element, value=None):
-        assert isinstance(element, ParserElementBase)
+        # assert isinstance(element, ParserElementBase)
         self._log_step(element, "success")
         frame = self._get_frame_from_depth()
         if not frame or frame.actor != element:
@@ -236,7 +236,7 @@ def decode_success(self, element, value=None):
         self._depth -= 1
 
     def decode_failure(self, element):
-        assert isinstance(element, ParserElementBase)
+        # assert isinstance(element, ParserElementBase)
         frame = self._stack.pop()
         self._index = frame.begin
         self._depth = frame.depth
@@ -652,7 +652,10 @@ class String(ParserElementBase):
 
     def __init__(self, string, name=None):
         ParserElementBase.__init__(self, name)
-        self._string = string
+        if isinstance(string, unicode):
+            self._string = string.encode("utf-8")
+        else:
+            self._string = string
 
     #-----------------------------------------------------------------------
     # Methods for runtime introspection.
@@ -695,17 +698,27 @@ def __str__(self):
     #-----------------------------------------------------------------------
     # Methods for runtime recognition processing.
 
+    def char_matches(self, c):
+        if isinstance(c, unicode):
+            c = c.encode("utf-8")
+        if self._set:
+            return c in self._set
+        else:
+            return False
+
     def parse(self, state):
         state.decode_attempt(self)
 
         # Gobble as many valid characters as possible.
         count = 0
         if self._exclude:
-            while not state.finished() and state.peek(1) not in self._set:
+            while (not state.finished() and not
+                    self.char_matches(state.peek(1))):
                 state.next(1)
                 count += 1
         else:
-            while not state.finished() and state.peek(1) in self._set:
+            while (not state.finished() and
+                    self.char_matches(state.peek(1))):
                 state.next(1)
                 count += 1
 

diff --git a/dragonfly/test/test_parser.py b/dragonfly/test/test_parser.py
@@ -20,14 +20,13 @@
 
 
 import unittest
-import time
 import string
 
 from dragonfly import parser
 
-
 #===========================================================================
 
+
 class TestParsers(unittest.TestCase):
 
     def setUp(self):
@@ -36,32 +35,54 @@ def setUp(self):
     def test_character_series(self):
         """ Test CharacterSeries parser class. """
 
+        # Test with ascii characters
         self._test_multiple(
             parser.CharacterSeries(string.letters),
             [
                 ("abc", ["abc"]),
             ],
-            must_finish = False)
+            must_finish=False)
+
+        # Test with Unicode characters
+        self._test_multiple(
+            parser.Letters(),
+            [
+                (u"abc", [u"abc"]),
+            ],
+            must_finish=False
+        )
 
     def test_repetition(self):
         """ Test repetition parser class. """
-        word = parser.CharacterSeries(string.letters)
-        whitespace = parser.CharacterSeries(string.whitespace)
+        word = parser.Letters()
+        whitespace = parser.Whitespace()
         p = parser.Repetition(parser.Alternative((word, whitespace)))
+
+        # Test with ascii letters
         input_output = (
-            ("abc", ["abc"] ),
+            ("abc", ["abc"]),
             ("abc abc", ["abc", " ", "abc"]),
-            ("abc abc\t\t\n   cba", ["abc", " ", "abc", "\t\t\n   ", "cba"]),
+            ("abc abc\t\t\n   cba", ["abc", " ", "abc", "\t\t\n   ",
+                                     "cba"]),
             )
         self._test_single(p, input_output)
 
+        # Test with Unicode characters
+        input_output = (
+            (u"abc", [u"abc"]),
+            (u"abc abc", [u"abc", u" ", u"abc"]),
+            (u"abc abc\t\t\n   cba", [
+                u"abc", " ", u"abc", u"\t\t\n   ", u"cba"]),
+        )
+        self._test_single(p, input_output)
+
     def test_optional_greedy(self):
         """ Test greedy setting of optional parser class. """
         input = "abc"
         p = parser.Sequence([
             parser.Sequence([
                 parser.String("a"),
-                parser.Optional(parser.String("b"), greedy = False)
+                parser.Optional(parser.String("b"), greedy=False)
                 ]),
             parser.Sequence([
                 parser.Optional(parser.String("b")),
@@ -76,7 +97,7 @@ def test_optional_greedy(self):
         generator.next()
         root = state.build_parse_tree()
         self.assertEqual(root.value(), expected_output_1)
-    
+
         generator.next()
         root = state.build_parse_tree()
         self.assertEqual(root.value(), expected_output_2)
@@ -87,7 +108,7 @@ def _test_single(self, parser_element, input_output):
             result = p.parse(input)
             self.assertEqual(result, output)
 
-    def _test_multiple(self, parser_element, input_outputs, must_finish = True):
+    def _test_multiple(self, parser_element, input_outputs, must_finish=True):
         p = parser.Parser(parser_element)
         for input, outputs in input_outputs:
             results = p.parse_multiple(input, must_finish)