Skip to content

Commit

Permalink
Fix a few issues in dragonfly/parser and its tests
Browse files Browse the repository at this point in the history
- Fix UnicodeDecodeError that occurred when unencoded Unicode
  strings were used in the String or CharacterSeries parser
  classes (issue t4ngo#12)
- Make doctests pass by removing asserted isinstance calls
- Test Unicode strings in parser tests
- Fix some formatting and style issues in test file
  • Loading branch information
drmfinlay committed Apr 29, 2018
1 parent 69d29bf commit 9fe1d04
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 18 deletions.
29 changes: 21 additions & 8 deletions dragonfly/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,19 +202,19 @@ def __init__(self, depth, actor, begin):
self.value = None; self.end = None

def decode_attempt(self, element):
assert isinstance(element, ParserElementBase)
# assert isinstance(element, ParserElementBase)
self._depth += 1
self._stack.append(State._Frame(self._depth, element, self._index))
self._log_step(element, "attempt")

def decode_retry(self, element):
assert isinstance(element, ParserElementBase)
# assert isinstance(element, ParserElementBase)
frame = self._get_frame_from_actor(element)
self._depth = frame.depth
self._log_step(element, "retry")

def decode_rollback(self, element):
assert isinstance(element, ParserElementBase)
# assert isinstance(element, ParserElementBase)
frame = self._get_frame_from_depth()
if not frame or frame.actor != element:
raise grammar_.GrammarError("Parser decoding stack broken")
Expand All @@ -226,7 +226,7 @@ def decode_rollback(self, element):
self._log_step(element, "rollback")

def decode_success(self, element, value=None):
assert isinstance(element, ParserElementBase)
# assert isinstance(element, ParserElementBase)
self._log_step(element, "success")
frame = self._get_frame_from_depth()
if not frame or frame.actor != element:
Expand All @@ -236,7 +236,7 @@ def decode_success(self, element, value=None):
self._depth -= 1

def decode_failure(self, element):
assert isinstance(element, ParserElementBase)
# assert isinstance(element, ParserElementBase)
frame = self._stack.pop()
self._index = frame.begin
self._depth = frame.depth
Expand Down Expand Up @@ -652,7 +652,10 @@ class String(ParserElementBase):

def __init__(self, string, name=None):
ParserElementBase.__init__(self, name)
self._string = string
if isinstance(string, unicode):
self._string = string.encode("utf-8")
else:
self._string = string

#-----------------------------------------------------------------------
# Methods for runtime introspection.
Expand Down Expand Up @@ -695,17 +698,27 @@ def __str__(self):
#-----------------------------------------------------------------------
# Methods for runtime recognition processing.

def char_matches(self, c):
if isinstance(c, unicode):
c = c.encode("utf-8")
if self._set:
return c in self._set
else:
return False

def parse(self, state):
state.decode_attempt(self)

# Gobble as many valid characters as possible.
count = 0
if self._exclude:
while not state.finished() and state.peek(1) not in self._set:
while (not state.finished() and not
self.char_matches(state.peek(1))):
state.next(1)
count += 1
else:
while not state.finished() and state.peek(1) in self._set:
while (not state.finished() and
self.char_matches(state.peek(1))):
state.next(1)
count += 1

Expand Down
41 changes: 31 additions & 10 deletions dragonfly/test/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,13 @@


import unittest
import time
import string

from dragonfly import parser


#===========================================================================


class TestParsers(unittest.TestCase):

def setUp(self):
Expand All @@ -36,32 +35,54 @@ def setUp(self):
def test_character_series(self):
""" Test CharacterSeries parser class. """

# Test with ascii characters
self._test_multiple(
parser.CharacterSeries(string.letters),
[
("abc", ["abc"]),
],
must_finish = False)
must_finish=False)

# Test with Unicode characters
self._test_multiple(
parser.Letters(),
[
(u"abc", [u"abc"]),
],
must_finish=False
)

def test_repetition(self):
""" Test repetition parser class. """
word = parser.CharacterSeries(string.letters)
whitespace = parser.CharacterSeries(string.whitespace)
word = parser.Letters()
whitespace = parser.Whitespace()
p = parser.Repetition(parser.Alternative((word, whitespace)))

# Test with ascii letters
input_output = (
("abc", ["abc"] ),
("abc", ["abc"]),
("abc abc", ["abc", " ", "abc"]),
("abc abc\t\t\n cba", ["abc", " ", "abc", "\t\t\n ", "cba"]),
("abc abc\t\t\n cba", ["abc", " ", "abc", "\t\t\n ",
"cba"]),
)
self._test_single(p, input_output)

# Test with Unicode characters
input_output = (
(u"abc", [u"abc"]),
(u"abc abc", [u"abc", u" ", u"abc"]),
(u"abc abc\t\t\n cba", [
u"abc", " ", u"abc", u"\t\t\n ", u"cba"]),
)
self._test_single(p, input_output)

def test_optional_greedy(self):
""" Test greedy setting of optional parser class. """
input = "abc"
p = parser.Sequence([
parser.Sequence([
parser.String("a"),
parser.Optional(parser.String("b"), greedy = False)
parser.Optional(parser.String("b"), greedy=False)
]),
parser.Sequence([
parser.Optional(parser.String("b")),
Expand All @@ -76,7 +97,7 @@ def test_optional_greedy(self):
generator.next()
root = state.build_parse_tree()
self.assertEqual(root.value(), expected_output_1)

generator.next()
root = state.build_parse_tree()
self.assertEqual(root.value(), expected_output_2)
Expand All @@ -87,7 +108,7 @@ def _test_single(self, parser_element, input_output):
result = p.parse(input)
self.assertEqual(result, output)

def _test_multiple(self, parser_element, input_outputs, must_finish = True):
def _test_multiple(self, parser_element, input_outputs, must_finish=True):
p = parser.Parser(parser_element)
for input, outputs in input_outputs:
results = p.parse_multiple(input, must_finish)
Expand Down

0 comments on commit 9fe1d04

Please sign in to comment.