From e9c31706845ffb12429d0f656f25ed09bf9518f9 Mon Sep 17 00:00:00 2001 From: MoSheikh Date: Wed, 6 Nov 2024 15:09:00 -0600 Subject: [PATCH] Add support for boolean expressions and quoted columns (#1286) * Add support for boolean expressions and quoted columns * Add AlwaysTrue & AlwaysFalse support plus tests * Add test for quoted column * Remove commented code --------- Co-authored-by: Mohammad Sheikh --- pyiceberg/expressions/parser.py | 49 +++++++++++++++++++++++--------- tests/expressions/test_parser.py | 18 ++++++++++-- 2 files changed, 51 insertions(+), 16 deletions(-) diff --git a/pyiceberg/expressions/parser.py b/pyiceberg/expressions/parser.py index 61aa1647df..dcd8dceb2c 100644 --- a/pyiceberg/expressions/parser.py +++ b/pyiceberg/expressions/parser.py @@ -21,6 +21,7 @@ CaselessKeyword, DelimitedList, Group, + MatchFirst, ParserElement, ParseResults, Suppress, @@ -57,6 +58,7 @@ StartsWith, ) from pyiceberg.expressions.literals import ( + BooleanLiteral, DecimalLiteral, Literal, LongLiteral, @@ -77,7 +79,9 @@ NAN = CaselessKeyword("nan") LIKE = CaselessKeyword("like") -identifier = Word(alphas, alphanums + "_$").set_results_name("identifier") +unquoted_identifier = Word(alphas, alphanums + "_$") +quoted_identifier = Suppress('"') + unquoted_identifier + Suppress('"') +identifier = MatchFirst([unquoted_identifier, quoted_identifier]).set_results_name("identifier") column = DelimitedList(identifier, delim=".", combine=False).set_results_name("column") like_regex = r"(?P(?(? Reference: string = sgl_quoted_string.set_results_name("raw_quoted_string") decimal = common.real().set_results_name("decimal") integer = common.signed_integer().set_results_name("integer") -literal = Group(string | decimal | integer).set_results_name("literal") -literal_set = Group(DelimitedList(string) | DelimitedList(decimal) | DelimitedList(integer)).set_results_name("literal_set") +literal = Group(string | decimal | integer | boolean).set_results_name("literal") +literal_set = Group( + DelimitedList(string) | DelimitedList(decimal) | DelimitedList(integer) | DelimitedList(boolean) +).set_results_name("literal_set") @boolean.set_parse_action -def _(result: ParseResults) -> BooleanExpression: +def _(result: ParseResults) -> Literal[bool]: if strtobool(result.boolean): - return AlwaysTrue() + return BooleanLiteral(True) else: - return AlwaysFalse() + return BooleanLiteral(False) @string.set_parse_action @@ -265,14 +271,29 @@ def handle_or(result: ParseResults) -> Or: return Or(*result[0]) -boolean_expression = infix_notation( - predicate, - [ - (Suppress(NOT), 1, opAssoc.RIGHT, handle_not), - (Suppress(AND), 2, opAssoc.LEFT, handle_and), - (Suppress(OR), 2, opAssoc.LEFT, handle_or), - ], -).set_name("expr") +def handle_always_expression(result: ParseResults) -> BooleanExpression: + # If the entire result is "true" or "false", return AlwaysTrue or AlwaysFalse + expr = result[0] + if isinstance(expr, BooleanLiteral): + if expr.value: + return AlwaysTrue() + else: + return AlwaysFalse() + return result[0] + + +boolean_expression = ( + infix_notation( + predicate, + [ + (Suppress(NOT), 1, opAssoc.RIGHT, handle_not), + (Suppress(AND), 2, opAssoc.LEFT, handle_and), + (Suppress(OR), 2, opAssoc.LEFT, handle_or), + ], + ) + .set_name("expr") + .add_parse_action(handle_always_expression) +) def parse(expr: str) -> BooleanExpression: diff --git a/tests/expressions/test_parser.py b/tests/expressions/test_parser.py index 0bccc9b80f..6096b10fd4 100644 --- a/tests/expressions/test_parser.py +++ b/tests/expressions/test_parser.py @@ -41,14 +41,28 @@ ) -def test_true() -> None: +def test_always_true() -> None: assert AlwaysTrue() == parser.parse("true") -def test_false() -> None: +def test_always_false() -> None: assert AlwaysFalse() == parser.parse("false") +def test_quoted_column() -> None: + assert EqualTo("foo", True) == parser.parse('"foo" = TRUE') + + +def test_equals_true() -> None: + assert EqualTo("foo", True) == parser.parse("foo = true") + assert EqualTo("foo", True) == parser.parse("foo == TRUE") + + +def test_equals_false() -> None: + assert EqualTo("foo", False) == parser.parse("foo = false") + assert EqualTo("foo", False) == parser.parse("foo == FALSE") + + def test_is_null() -> None: assert IsNull("foo") == parser.parse("foo is null") assert IsNull("foo") == parser.parse("foo IS NULL")