Skip to content

Commit

Permalink
release 1.6.0 with refactoring & adding new tokens, adding Athena
Browse files Browse the repository at this point in the history
  • Loading branch information
xnuinside committed Aug 11, 2024
1 parent 548042a commit 0794199
Show file tree
Hide file tree
Showing 27 changed files with 63,858 additions and 678 deletions.
31 changes: 30 additions & 1 deletion CHANGELOG.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,37 @@
**v1.6.0**
### IMPORTANT:
In this versions there is some output changes & fixes that can break your code.
1. Now all arguments inside brackets are parsed as separate strings in the list.
For example:
`file_format = (TYPE=JSON NULL_IF=('field')` this was parsed like 'NULL_IF': "('field')",
now it will be: 'NULL_IF': ["'field'"],

2. Added separate tokens for EQ `=` and IN (previously they was parsed as IDs also - for internal info, for contributors.

3. Some check statements in columns now parsed validly, also IN statements parsed as normal lists.
So this statement include_exclude_ind CHAR(1) NOT NULL CONSTRAINT chk_metalistcombo_logicalopr
CHECK (include_exclude_ind IN ('I', 'E')),


will produce this output:

{'check': {'constraint_name': 'chk_metalistcombo_logicalopr',
'statement': {'in_statement': {'in': ["'I'", "'E'"],
'name': 'include_exclude_ind'}}},


### Fixes
1. DEFAULT word now is not arriving in key 'default' (it was before in some cases)

### New Features
1. Added Athena output mode and initial support - https://github.com/datacontract/datacontract-cli/issues/332


**v1.5.4**
### Improvements
#### Snowflake :
1. In Snowflake add `pattern` token for external table statement, and improve location rendering
2.


**v1.5.3**
### Fixes
Expand Down
31 changes: 30 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -489,11 +489,40 @@ for help with debugging & testing support for BigQuery dialect DDLs:
* https://github.com/kalyan939

## Changelog
**v1.6.0**
### IMPORTANT:
In this versions there is some output changes & fixes that can break your code.
1. Now all arguments inside brackets are parsed as separate strings in the list.
For example:
`file_format = (TYPE=JSON NULL_IF=('field')` this was parsed like 'NULL_IF': "('field')",
now it will be: 'NULL_IF': ["'field'"],

2. Added separate tokens for EQ `=` and IN (previously they was parsed as IDs also - for internal info, for contributors.

3. Some check statements in columns now parsed validly, also IN statements parsed as normal lists.
So this statement include_exclude_ind CHAR(1) NOT NULL CONSTRAINT chk_metalistcombo_logicalopr
CHECK (include_exclude_ind IN ('I', 'E')),


will produce this output:

{'check': {'constraint_name': 'chk_metalistcombo_logicalopr',
'statement': {'in_statement': {'in': ["'I'", "'E'"],
'name': 'include_exclude_ind'}}},


### Fixes
1. DEFAULT word now is not arriving in key 'default' (it was before in some cases)

### New Features
1. Added Athena output mode and initial support - https://github.com/datacontract/datacontract-cli/issues/332


**v1.5.4**
### Improvements
#### Snowflake :
1. In Snowflake add `pattern` token for external table statement, and improve location rendering
2.


**v1.5.3**
### Fixes
Expand Down
41 changes: 40 additions & 1 deletion docs/README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -555,6 +555,46 @@ for help with debugging & testing support for BigQuery dialect DDLs:
Changelog
---------

**v1.6.0**

IMPORTANT:
^^^^^^^^^^

In this versions there is some output changes & fixes that can break your code.


#.
Now all arguments inside brackets are parsed as separate strings in the list.
For example:
``file_format = (TYPE=JSON NULL_IF=('field')`` this was parsed like 'NULL_IF': "('field')",
now it will be: 'NULL_IF': ["'field'"],

#.
Added separate tokens for EQ ``=`` and IN (previously they was parsed as IDs also - for internal info, for contributors.

#.
Some check statements in columns now parsed validly, also IN statements parsed as normal lists.
So this statement include_exclude_ind CHAR(1) NOT NULL CONSTRAINT chk_metalistcombo_logicalopr
CHECK (include_exclude_ind IN ('I', 'E')),

will produce this output:

{'check': {'constraint_name': 'chk_metalistcombo_logicalopr',
'statement': {'in_statement': {'in': ["'I'", "'E'"],
'name': 'include_exclude_ind'}}},

Fixes
^^^^^


#. DEFAULT word now is not arriving in key 'default' (it was before in some cases)

New Features
^^^^^^^^^^^^


#. Added Athena output mode and initial support - https://github.com/datacontract/datacontract-cli/issues/332

**v1.5.4**

Improvements
Expand All @@ -565,7 +605,6 @@ Snowflake :


#. In Snowflake add ``pattern`` token for external table statement, and improve location rendering
2.

**v1.5.3**

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "simple-ddl-parser"
version = "1.5.4"
version = "1.6.0"
description = "Simple DDL Parser to parse SQL & dialects like HQL, TSQL (MSSQL), Oracle, AWS Redshift, Snowflake, MySQL, PostgreSQL, etc ddl files to json/python dict with full information about columns: types, defaults, primary keys, etc.; sequences, alters, custom types & other entities from ddl."
authors = ["Iuliia Volkova <[email protected]>"]
license = "MIT"
Expand Down
19 changes: 16 additions & 3 deletions simple_ddl_parser/ddl_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
HQL,
MSSQL,
PSQL,
Athena,
BaseSQL,
BigQuery,
IBMDb2,
Expand Down Expand Up @@ -37,6 +38,7 @@ class Dialects(
BigQuery,
IBMDb2,
PSQL,
Athena,
):
pass

Expand Down Expand Up @@ -115,8 +117,13 @@ def set_lexer_tags(self, t: LexToken) -> None:
elif t.type == "CHECK":
self.lexer.check = True

def t_EQ(self, t: LexToken) -> LexToken:
r"(=)+"
t.type = "EQ"
return self.set_last_token(t)

def t_DOT(self, t: LexToken) -> LexToken:
r"\."
r"(\.)+"
t.type = "DOT"
return self.set_last_token(t)

Expand Down Expand Up @@ -154,14 +161,18 @@ def is_creation_name(self, t: LexToken) -> bool:
"TYPE",
"DOMAIN",
"TABLESPACE",
"INDEX",
"CONSTRAINT",
"EXISTS",
]
return (
t.value not in skip_id_tokens
and t.value.upper() not in ["IF"]
and self.lexer.last_token in exceptional_keys
and (
self.lexer.last_token in exceptional_keys
or (
self.lexer.last_token == "INDEX" and self.lexer.is_table is not True
)
)
and not self.exceptional_cases(t.value.upper())
)

Expand All @@ -188,6 +199,8 @@ def t_AUTOINCREMENT(self, t: LexToken):

def t_ID(self, t: LexToken):
r"([0-9]+[.][0-9]*([e][+-]?[0-9]+)?|[0-9]\.[0-9])\w|([a-zA-Z_,0-9:><\/\\\=\-\+\~\%$@#\|&?;*\()!{}\[\]\`\[\]]+)"
if len(t.value) > 1 and t.value.endswith(","):
t.value = t.value[:-1]
t.type = tok.symbol_tokens.get(t.value, "ID")

if t.type == "LP":
Expand Down
2 changes: 2 additions & 0 deletions simple_ddl_parser/dialects/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from simple_ddl_parser.dialects.athena import Athena
from simple_ddl_parser.dialects.bigquery import BigQuery
from simple_ddl_parser.dialects.hql import HQL
from simple_ddl_parser.dialects.ibm import IBMDb2
Expand All @@ -22,4 +23,5 @@
"IBMDb2",
"BaseSQL",
"PSQL",
"Athena",
]
11 changes: 11 additions & 0 deletions simple_ddl_parser/dialects/athena.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from typing import List


class Athena:
def p_escaped_by(self, p: List) -> None:
"""expr : expr ESCAPED BY STRING_BASE"""
p[0] = p[1]
p_list = list(p)
if "\\\\" in p_list[-1]:
p_list[-1] = "\\"
p[0]["escaped_by"] = p_list[-1]
5 changes: 3 additions & 2 deletions simple_ddl_parser/dialects/bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,11 @@ def p_multiple_options(self, p):
p[0] = p[1]

def p_options(self, p):
"""options : OPTIONS LP id_equals RP"""
"""options : OPTIONS LP multi_id_equals RP"""
p_list = list(p)
if not isinstance(p[1], dict):
p[0] = {"options": p[3]}
options = [{key: value} for key, value in p[3].items()]
p[0] = {"options": options}
else:
p[0] = p[1]
if len(p) == 4:
Expand Down
22 changes: 16 additions & 6 deletions simple_ddl_parser/dialects/hql.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,22 @@

class HQL:
def p_expression_location(self, p: List) -> None:
"""expr : expr LOCATION STRING
"""expr : expr LOCATION EQ STRING
| expr LOCATION EQ DQ_STRING
| expr LOCATION EQ multi_id_or_string
| expr LOCATION DQ_STRING
| expr LOCATION STRING
| expr LOCATION multi_id_or_string
| expr LOCATION EQ ID EQ ID EQ ID
"""
# last expr for sample like location=@ADL_Azure_Storage_Account_Container_Name/year=2023/month=08/
p[0] = p[1]
p_list = list(p)
p[0]["location"] = p_list[-1]
if len(p_list) == 9:
location = "".join(p_list[4:])
else:
location = p_list[-1]
p[0]["location"] = location

def p_expression_clustered(self, p: List) -> None:
"""expr : expr ID ON LP pid RP
Expand Down Expand Up @@ -73,10 +82,10 @@ def p_multi_assignments(self, p: List) -> None:
p[0].update(p_list[-1])

def p_assignment(self, p: List) -> None:
"""assignment : id id id
| STRING id STRING
| id id STRING
| STRING id id
"""assignment : id EQ id
| STRING EQ STRING
| id EQ STRING
| STRING EQ id
| STRING id"""
p_list = remove_par(list(p))
if "state" in self.lexer.__dict__:
Expand Down Expand Up @@ -142,6 +151,7 @@ def p_expression_partitioned_by_hql(self, p: List) -> None:
"""expr : expr PARTITIONED BY pid_with_type
| expr PARTITIONED BY LP pid RP
| expr PARTITIONED BY LP multiple_funct RP
| expr PARTITIONED BY funct
"""
p[0] = p[1]
p_list = remove_par(list(p))
Expand Down
5 changes: 2 additions & 3 deletions simple_ddl_parser/dialects/ibm.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@

class IBMDb2:
def p_expr_index_in(self, p: List) -> None:
"""expr : expr INDEX id id"""
"""expr : expr INDEX IN id"""
p_list = list(p)
if p_list[-2].upper() == "IN":
p[1].update({"index_in": p_list[-1]})
p[1].update({"index_in": p_list[-1]})
p[0] = p[1]
6 changes: 3 additions & 3 deletions simple_ddl_parser/dialects/mssql.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,9 @@ def p_with(self, p: List) -> None:
p[0]["with"]["properties"] = p_list[-1]["properties"]

def p_equals(self, p: List) -> None:
"""equals : id id id
| id id ON
| id id id DOT id
"""equals : id EQ id
| id EQ ON
| id EQ dot_id
"""
p_list = list(p)
if "." in p_list:
Expand Down
8 changes: 7 additions & 1 deletion simple_ddl_parser/dialects/mysql.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,17 @@

class MySQL:
def p_engine(self, p: List) -> None:
"""expr : expr ENGINE id id"""
"""expr : expr ENGINE EQ id"""
p_list = list(p)
p[0] = p[1]
p[0]["engine"] = p_list[-1]

def p_db_properties(self, p: List) -> None:
"""expr : expr id EQ id_or_string"""
p_list = list(p)
p[0] = p[1]
p[0][p[2]] = p_list[-1]

def p_on_update(self, p: List) -> None:
"""on_update : ON UPDATE id
| ON UPDATE STRING
Expand Down
1 change: 1 addition & 0 deletions simple_ddl_parser/dialects/redshift.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ def p_encode(self, p: List) -> None:
def p_expression_diststyle(self, p: List) -> None:
"""expr : expr id id
| expr id KEY
| expr IN id
"""
p_list = list(p)
if p_list[-2] == "IN":
Expand Down
Loading

0 comments on commit 0794199

Please sign in to comment.