Skip to content

Commit

Permalink
Merge pull request #277 from dmaresma/fix/issue_276_and_add_pattern
Browse files Browse the repository at this point in the history
fix 276 and Snowflake External Table improvement with Pattern keyword
  • Loading branch information
xnuinside authored Aug 9, 2024
2 parents 363d27a + 80dc236 commit 46af2ff
Show file tree
Hide file tree
Showing 8 changed files with 539 additions and 54,644 deletions.
12 changes: 10 additions & 2 deletions CHANGELOG.txt
Original file line number Diff line number Diff line change
@@ -1,14 +1,22 @@
**v1.5.3**
### Fixes
### Improvements
#### Snowflake :
1. In Snowflake add `pattern` token for external table statement, and improve location rendering
2.

1. In Snowflake Fix unexpected behaviour when file_format name given - https://github.com/xnuinside/simple-ddl-parser/issues/273
### Fixes

1. In Snowflake unexpected error when STRIP_OUTER_ARRAY property in file_format statement - https://github.com/xnuinside/simple-ddl-parser/issues/276
2.

**v1.5.2**
### Improvements
#### MySQL
1. Added support for COLLATE - https://github.com/xnuinside/simple-ddl-parser/pull/266/files

### Fixes

1. In Snowflake Fix unexpected behaviour when file_format name given - https://github.com/xnuinside/simple-ddl-parser/issues/273

**v1.5.1**
### Improvements
Expand Down
2 changes: 1 addition & 1 deletion simple_ddl_parser/ddl_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def tokens_not_columns_names(self, t: LexToken) -> LexToken:
t_tag = self.parse_tags_symbols(t)
if t_tag:
return t_tag
if "ARRAY" in t.value:
if t.value.startswith("ARRAY"):
t.type = "ARRAY"
return t
elif self.lexer.is_like:
Expand Down
3 changes: 2 additions & 1 deletion simple_ddl_parser/dialects/hql.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ class HQL:
def p_expression_location(self, p: List) -> None:
"""expr : expr LOCATION STRING
| expr LOCATION DQ_STRING
| expr LOCATION table_property_equals"""
| expr LOCATION multi_id_or_string
"""
p[0] = p[1]
p_list = list(p)
p[0]["location"] = p_list[-1]
Expand Down
10 changes: 8 additions & 2 deletions simple_ddl_parser/dialects/snowflake.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ def p_multi_id_or_string(self, p: List) -> None:
p[0] = p[1]
p[0].append(p_list[-1])
else:
value = " ".join(p_list[1:])
p[0] = value
totrim = " ".join(p_list[1:])
p[0] = totrim.replace(' = ', '=').replace('= ', '')

def p_fmt_equals(self, p: List) -> None:
"""fmt_equals : id LP multi_id_or_string RP
Expand Down Expand Up @@ -210,6 +210,12 @@ def p_expression_auto_refresh(self, p: List) -> None:
p_list = remove_par(list(p))
p[0]["auto_refresh"] = p_list[-1]

def p_expression_pattern(self, p: List) -> None:
"""expr : expr PATTERN table_property_equals"""
p[0] = p[1]
p_list = remove_par(list(p))
p[0]["pattern"] = p_list[-1]

def p_as_virtual(self, p: List):
"""as_virtual : AS LP id LP id LP pid RP COMMA pid RP RP
| AS LP id LP pid RP RP
Expand Down
3 changes: 1 addition & 2 deletions simple_ddl_parser/exception.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,5 @@


class SimpleDDLParserException(Exception):
"""Base exception in simple ddl parser library"""

""" Base exception in simple ddl parser library """
pass
55,119 changes: 489 additions & 54,630 deletions simple_ddl_parser/parsetab.py

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions simple_ddl_parser/tokens.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@
"DATA_RETENTION_TIME_IN_DAYS",
"MAX_DATA_EXTENSION_TIME_IN_DAYS",
"CHANGE_TRACKING",
"PATTERN",
"AUTO_REFRESH",
"FILE_FORMAT",
"TABLE_FORMAT",
Expand Down
33 changes: 27 additions & 6 deletions tests/dialects/test_snowflake.py
Original file line number Diff line number Diff line change
Expand Up @@ -827,16 +827,19 @@ def test_order_sequence():

def test_virtual_column_ext_table():
ddl = """
create or replace external table if not exists TABLE_DATA_SRC.EXT_PAYLOAD_MANIFEST_WEB (
create external table if not exists TABLE_DATA_SRC.EXT_PAYLOAD_MANIFEST_WEB (
"type" VARCHAR(255) AS (SPLIT_PART(SPLIT_PART(METADATA$FILENAME, '/', 1), '=', 2 )),
"year" VARCHAR(255) AS (SPLIT_PART(SPLIT_PART(METADATA$FILENAME, '/', 2), '=', 2)),
"month" VARCHAR(255) AS (SPLIT_PART(SPLIT_PART(METADATA$FILENAME, '/', 3), '=', 2)),
"day" VARCHAR(255) AS (SPLIT_PART(SPLIT_PART(METADATA$FILENAME, '/', 4), '=', 2)),
"cast_YEAR" VARCHAR(200) AS (GET(VALUE,'c1')::string),
"path" VARCHAR(255) AS (METADATA$FILENAME)
)
partition by ("type", "year", "month", "day", "path")
location=@ADL_Azure_Storage_Account_Container_Name/
location=@ADL_Azure_Storage_Account_Container_Name/year=2023/month=08/
auto_refresh=false
pattern='*.csv'
file_format = (TYPE = JSON NULL_IF = () STRIP_OUTER_ARRAY = TRUE )
;
"""
result_ext_table = DDLParser(ddl, normalize_names=True, debug=True).run(
Expand Down Expand Up @@ -901,6 +904,19 @@ def test_virtual_column_ext_table():
"as": "SPLIT_PART(SPLIT_PART(METADATA$FILENAME,'/',4),'=',2)"
},
},
{
"name": "cast_YEAR",
"type": "VARCHAR",
"size": 200,
"references": None,
"unique": False,
"nullable": True,
"default": None,
"check": None,
"generated": {
"as": "GET(VALUE,'c1') ::string"
},
},
{
"name": "path",
"type": "VARCHAR",
Expand All @@ -924,12 +940,17 @@ def test_virtual_column_ext_table():
"schema": "TABLE_DATA_SRC",
"table_name": "EXT_PAYLOAD_MANIFEST_WEB",
"tablespace": None,
"replace": True,
"external": True,
"if_not_exists": True,
"location": "@ADL_Azure_Storage_Account_Container_Name/",
"location": "@ADL_Azure_Storage_Account_Container_Name/year=2023/month=08/",
"table_properties": {
"auto_refresh": False,
"pattern": "'*.csv'",
"file_format" : {
"TYPE" : "JSON",
"NULL_IF": "()",
"STRIP_OUTER_ARRAY" : "TRUE",
}
},
}
]
Expand All @@ -943,7 +964,7 @@ def test_virtual_column_table():
id bigint,
derived bigint as (id * 10)
)
location = @Database.Schema.ADL_Azure_Storage_Account_Container_Name/entity
location = @ADL_Azure_Storage_Account_Container_Name/entity
auto_refresh = false
file_format = (TYPE=JSON NULL_IF=('field') DATE_FORMAT=AUTO TRIM_SPACE=TRUE)
stage_file_format = (TYPE=JSON NULL_IF=())
Expand Down Expand Up @@ -991,7 +1012,7 @@ def test_virtual_column_table():
"tablespace": None,
"replace": True,
"if_not_exists": True,
"location": "ADL_Azure_Storage_Account_Container_Name/entity",
"location": "@ADL_Azure_Storage_Account_Container_Name/entity",
"table_properties": {
"auto_refresh": False,
"file_format": {
Expand Down

0 comments on commit 46af2ff

Please sign in to comment.