Skip to content

Commit

Permalink
Add binary type (#140)
Browse files Browse the repository at this point in the history
* Add binary type

* Update project files

* Update schema version to v1.4.0
  • Loading branch information
calumabarnett authored Dec 3, 2020
1 parent 723fd19 commit e0fd72b
Show file tree
Hide file tree
Showing 11 changed files with 23 additions and 7 deletions.
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,16 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).

## v7.4.0

### Change
- Add support for `binary` type

## v7.3.0

### Change
- Set the default AWS Glue version to 2.0

## v7.2.0

### Change
Expand Down
2 changes: 1 addition & 1 deletion etl_manager/meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@
)

_web_link_to_table_json_schema = (
"https://moj-analytical-services.github.io/metadata_schema/table/v1.3.0.json"
"https://moj-analytical-services.github.io/metadata_schema/table/v1.4.0.json"
)

try:
Expand Down
4 changes: 4 additions & 0 deletions etl_manager/specs/glue_spark_dict.json
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@
"glue":"timestamp",
"spark":"TimestampType"
},
"binary": {
"glue":"binary",
"spark":"BinaryType"
},
"boolean":{
"glue":"boolean",
"spark":"BooleanType"
Expand Down
1 change: 1 addition & 0 deletions etl_manager/specs/table_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
"decimal",
"date",
"datetime",
"binary",
"boolean",
"struct",
"array"
Expand Down
2 changes: 1 addition & 1 deletion etl_manager/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ def _unnest_github_zipfile_and_return_new_zip_path(zip_path):
# Note the ?R recursive. We only allow 'character' (the agnostic type), in a non-complex type, but must allow string within complex types.
# User will still get an error for string as non-complex type from the schema.
COL_TYPE_REGEX = regex.compile(
r"(character|int|long|float|double|date|datetime|boolean|decimal\(\d+,\d+\)|struct<(([a-zA-Z_]+):((?R)(,?)))+>|array<(?R)>)"
r"(character|int|long|float|double|date|datetime|boolean|binary|decimal\(\d+,\d+\)|struct<(([a-zA-Z_]+):((?R)(,?)))+>|array<(?R)>)"
)


Expand Down
2 changes: 1 addition & 1 deletion example/meta_data/db1/employees.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"$schema": "https://moj-analytical-services.github.io/metadata_schema/table/v1.3.0.json",
"$schema": "https://moj-analytical-services.github.io/metadata_schema/table/v1.4.0.json",
"name": "employees",
"description": "table containing employee information",
"data_format": "parquet",
Expand Down
2 changes: 1 addition & 1 deletion example/meta_data/db1/pay.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"$schema": "https://moj-analytical-services.github.io/metadata_schema/table/v1.3.0.json",
"$schema": "https://moj-analytical-services.github.io/metadata_schema/table/v1.4.0.json",
"name": "pay",
"description": "Check glue specific works",
"data_format": "csv",
Expand Down
2 changes: 1 addition & 1 deletion example/meta_data/db1/teams.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"$schema": "https://moj-analytical-services.github.io/metadata_schema/table/v1.3.0.json",
"$schema": "https://moj-analytical-services.github.io/metadata_schema/table/v1.4.0.json",
"name": "teams",
"description": "month snapshot of which employee with working in what team",
"data_format": "parquet",
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[tool]
[tool.poetry]
name = "etl_manager"
version = "7.3.0"
version = "7.4.0"
description = "A python package to manage etl processes on AWS"
license = "MIT"
authors = ["Karik Isichei <[email protected]>"]
Expand Down
2 changes: 1 addition & 1 deletion tests/data/data_types/test_table.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"$schema": "https://moj-analytical-services.github.io/metadata_schema/table/v1.3.0.json",
"$schema": "https://moj-analytical-services.github.io/metadata_schema/table/v1.4.0.json",
"name": "test_table",
"description": "table containing columns with each of the data types for testing database creation",
"data_format": "json",
Expand Down
1 change: 1 addition & 0 deletions tests/test_column_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ def test_create_tables_using_etl_manager_api(self, mock_client_create_table):
("decimal(38,0)", True),
("date", True),
("datetime", True),
("binary", True),
("boolean", True),
("struct", False),
("array", False),
Expand Down

0 comments on commit e0fd72b

Please sign in to comment.