From 8c05fd816b4826002be64c8be16f170b1a0d6181 Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Fri, 6 Oct 2023 11:57:06 -0400 Subject: [PATCH 01/20] =?UTF-8?q?Rename=20types.py=20=E2=86=92=5Ftypes.py?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Necessary because it breaks some namespace imports in SQLAlchemy "types.py" is a pretty common package name. Other dialects appear to do the same underscore prefix approach Signed-off-by: Jesse Whitehouse --- src/databricks/sqlalchemy/__init__.py | 2 +- src/databricks/sqlalchemy/{types.py => _types.py} | 13 +++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) rename src/databricks/sqlalchemy/{types.py => _types.py} (91%) diff --git a/src/databricks/sqlalchemy/__init__.py b/src/databricks/sqlalchemy/__init__.py index d1d4782d..6eb9d7a2 100644 --- a/src/databricks/sqlalchemy/__init__.py +++ b/src/databricks/sqlalchemy/__init__.py @@ -13,7 +13,7 @@ from databricks import sql # This import is required to process our @compiles decorators -import databricks.sqlalchemy.types +import databricks.sqlalchemy._types from databricks.sqlalchemy.base import ( diff --git a/src/databricks/sqlalchemy/types.py b/src/databricks/sqlalchemy/_types.py similarity index 91% rename from src/databricks/sqlalchemy/types.py rename to src/databricks/sqlalchemy/_types.py index 4b10fc6f..032fa3f3 100644 --- a/src/databricks/sqlalchemy/types.py +++ b/src/databricks/sqlalchemy/_types.py @@ -1,6 +1,7 @@ import sqlalchemy from sqlalchemy.ext.compiler import compiles +import databricks.sql @compiles(sqlalchemy.types.Enum, "databricks") @compiles(sqlalchemy.types.String, "databricks") @@ -78,3 +79,15 @@ def compile_array_databricks(type_, compiler, **kw): inner = compiler.process(type_.item_type, **kw) return f"ARRAY<{inner}>" + + +class DatabricksBinaryType(sqlalchemy.types.TypeDecorator): + """ + """ + + impl = sqlalchemy.types.BINARY + + cache_ok = True + + def process_bind_param(self, value, dialect): + return databricks.sql.BINARY(value) From 68159352cfd7284bb3a1157fb971e31b60727ea6 Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Fri, 6 Oct 2023 13:25:58 -0400 Subject: [PATCH 02/20] Mark BINARY sqlalchemy tests as skip because connector doesn't support them Neither inline nor bound parameter implementations can handle BINARY test_suite.py::BinaryTest_databricks+databricks::test_binary_roundtrip[7\xe7\x9f] SKIPPED (pysql doesn't support binding of BINARY type parameters) test_suite.py::BinaryTest_databricks+databricks::test_binary_roundtrip[this is binary] SKIPPED (pysql doesn't support binding of BINARY type parameters) Signed-off-by: Jesse Whitehouse --- src/databricks/sqlalchemy/test/test_suite.py | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/src/databricks/sqlalchemy/test/test_suite.py b/src/databricks/sqlalchemy/test/test_suite.py index 7a840404..10452bf9 100644 --- a/src/databricks/sqlalchemy/test/test_suite.py +++ b/src/databricks/sqlalchemy/test/test_suite.py @@ -24,20 +24,9 @@ # See further: https://github.com/sqlalchemy/sqlalchemy/blob/rel_1_4_48/README.dialects.rst +@pytest.mark.skip(reason="pysql doesn't support binding of BINARY type parameters") class BinaryTest(BinaryTest): - @pytest.mark.skip(reason="Binary type is not implemented.") - def test_binary_roundtrip(self): - """ - Exception: - sqlalchemy.exc.StatementError: (builtins.AttributeError) module 'databricks.sql' has no attribute 'Binary' - """ - - @pytest.mark.skip(reason="Binary type is not implemented.") - def test_pickle_roundtrip(self): - """ - Exception: - sqlalchemy.exc.StatementError: (builtins.AttributeError) module 'databricks.sql' has no attribute 'Binary' - """ + pass class DateHistoricTest(DateHistoricTest): From 75368a50e609dfaf36c315644bd15c3d693a2172 Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Fri, 6 Oct 2023 13:34:27 -0400 Subject: [PATCH 03/20] Stop skipping DateHistoric tests This test requires date_historic and datetime_literals requirements be marked open. test_suite.py::DateHistoricTest_databricks+databricks::test_literal PASSED test_suite.py::DateHistoricTest_databricks+databricks::test_null PASSED test_suite.py::DateHistoricTest_databricks+databricks::test_null_bound_comparison PASSED test_suite.py::DateHistoricTest_databricks+databricks::test_round_trip PASSED test_suite.py::DateHistoricTest_databricks+databricks::test_round_trip_decorated PASSED test_suite.py::DateHistoricTest_databricks+databricks::test_select_direct PASSED Signed-off-by: Jesse Whitehouse --- src/databricks/sqlalchemy/requirements.py | 19 ++++++++++++++++++- src/databricks/sqlalchemy/test/test_suite.py | 20 -------------------- 2 files changed, 18 insertions(+), 21 deletions(-) diff --git a/src/databricks/sqlalchemy/requirements.py b/src/databricks/sqlalchemy/requirements.py index 7da46005..37a7a979 100644 --- a/src/databricks/sqlalchemy/requirements.py +++ b/src/databricks/sqlalchemy/requirements.py @@ -31,4 +31,21 @@ def __some_example_requirement(self): class Requirements(sqlalchemy.testing.requirements.SuiteRequirements): - pass + + @property + def date_historic(self): + """target dialect supports representation of Python + datetime.datetime() objects with historic (pre 1970) values.""" + + return sqlalchemy.testing.exclusions.open() + + @property + def datetime_literals(self): + """target dialect supports rendering of a date, time, or datetime as a + literal string, e.g. via the TypeEngine.literal_processor() method. + + """ + + return sqlalchemy.testing.exclusions.open() + + diff --git a/src/databricks/sqlalchemy/test/test_suite.py b/src/databricks/sqlalchemy/test/test_suite.py index 10452bf9..0f5f9d27 100644 --- a/src/databricks/sqlalchemy/test/test_suite.py +++ b/src/databricks/sqlalchemy/test/test_suite.py @@ -29,26 +29,6 @@ class BinaryTest(BinaryTest): pass -class DateHistoricTest(DateHistoricTest): - @pytest.mark.skip( - reason="Date type implementation needs work. Cannot render literal values." - ) - def test_literal(self): - """ - Exception: - sqlalchemy.exc.CompileError: No literal value renderer is available for literal value "datetime.date(1727, 4, 1)" with datatype DATE - """ - - @pytest.mark.skip( - reason="Date type implementation needs work. Cannot render literal values." - ) - def test_select_direct(self): - """ - Exception: - AssertionError: '1727-04-01' != datetime.date(1727, 4, 1) - """ - - class DateTest(DateTest): @pytest.mark.skip( reason="Date type implementation needs work. Cannot render literal values." From da9b9f24ea3c0a09328e25c8f0a22cbf3dc943ca Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Fri, 6 Oct 2023 13:38:28 -0400 Subject: [PATCH 04/20] Stop skipping DateTest tests No changes to requirements.py were needed test_suite.py::DateTest_databricks+databricks::test_literal PASSED test_suite.py::DateTest_databricks+databricks::test_null PASSED test_suite.py::DateTest_databricks+databricks::test_null_bound_comparison PASSED test_suite.py::DateTest_databricks+databricks::test_round_trip PASSED test_suite.py::DateTest_databricks+databricks::test_round_trip_decorated PASSED test_suite.py::DateTest_databricks+databricks::test_select_direct PASSED Signed-off-by: Jesse Whitehouse --- src/databricks/sqlalchemy/test/test_suite.py | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/src/databricks/sqlalchemy/test/test_suite.py b/src/databricks/sqlalchemy/test/test_suite.py index 0f5f9d27..b5865f6e 100644 --- a/src/databricks/sqlalchemy/test/test_suite.py +++ b/src/databricks/sqlalchemy/test/test_suite.py @@ -29,26 +29,6 @@ class BinaryTest(BinaryTest): pass -class DateTest(DateTest): - @pytest.mark.skip( - reason="Date type implementation needs work. Cannot render literal values." - ) - def test_literal(self): - """ - Exception: - sqlalchemy.exc.CompileError: No literal value renderer is available for literal value "datetime.date(2012, 10, 15)" with datatype DATE - """ - - @pytest.mark.skip( - reason="Date type implementation needs work. Cannot render literal values." - ) - def test_select_direct(self): - """ - Exception: - AssertionError: '2012-10-15' != datetime.date(2012, 10, 15) - """ - - class DateTimeHistoricTest(DateTimeHistoricTest): @pytest.mark.skip(reason="Date type implementation needs work") def test_literal(self): From b7afd1b56257377e6249cfbeb2d0315129a42110 Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Fri, 6 Oct 2023 20:31:42 -0400 Subject: [PATCH 05/20] Stop skipping DateTimeHistoric tests This is the first case where implement our own process_result_value method. For this to work, we need to update the dialect.colspecs map so that SQLAlchemy knows to use our custom implementation rather than its default test_suite.py::DateTimeHistoricTest_databricks+databricks::test_literal PASSED test_suite.py::DateTimeHistoricTest_databricks+databricks::test_null PASSED test_suite.py::DateTimeHistoricTest_databricks+databricks::test_null_bound_comparison PASSED test_suite.py::DateTimeHistoricTest_databricks+databricks::test_round_trip PASSED test_suite.py::DateTimeHistoricTest_databricks+databricks::test_round_trip_decorated PASSED test_suite.py::DateTimeHistoricTest_databricks+databricks::test_select_direct PASSED Signed-off-by: Jesse Whitehouse --- src/databricks/sqlalchemy/__init__.py | 6 +++- src/databricks/sqlalchemy/_types.py | 23 +++++++++----- src/databricks/sqlalchemy/requirements.py | 7 +++++ src/databricks/sqlalchemy/test/test_suite.py | 30 ------------------- .../sqlalchemy/test_local/test_types.py | 4 +-- 5 files changed, 30 insertions(+), 40 deletions(-) diff --git a/src/databricks/sqlalchemy/__init__.py b/src/databricks/sqlalchemy/__init__.py index 6eb9d7a2..6d61b5eb 100644 --- a/src/databricks/sqlalchemy/__init__.py +++ b/src/databricks/sqlalchemy/__init__.py @@ -13,7 +13,7 @@ from databricks import sql # This import is required to process our @compiles decorators -import databricks.sqlalchemy._types +import databricks.sqlalchemy._types as dialect_type_impl from databricks.sqlalchemy.base import ( @@ -48,6 +48,10 @@ class DatabricksDialect(default.DefaultDialect): non_native_boolean_check_constraint: bool = False paramstyle: str = "named" + colspecs = { + sqlalchemy.types.DateTime: dialect_type_impl.DatabricksDateTimeNoTimezoneType + } + @classmethod def dbapi(cls): return sql diff --git a/src/databricks/sqlalchemy/_types.py b/src/databricks/sqlalchemy/_types.py index 032fa3f3..163f1ca2 100644 --- a/src/databricks/sqlalchemy/_types.py +++ b/src/databricks/sqlalchemy/_types.py @@ -1,7 +1,9 @@ import sqlalchemy from sqlalchemy.ext.compiler import compiles -import databricks.sql +from typing import Union + +from datetime import datetime @compiles(sqlalchemy.types.Enum, "databricks") @compiles(sqlalchemy.types.String, "databricks") @@ -60,7 +62,7 @@ def compile_datetime_databricks(type_, compiler, **kw): """ We need to override the default DateTime compilation rendering because Databricks uses "TIMESTAMP" instead of "DATETIME" """ - return "TIMESTAMP" + return "TIMESTAMP_NTZ" @compiles(sqlalchemy.types.ARRAY, "databricks") @@ -81,13 +83,20 @@ def compile_array_databricks(type_, compiler, **kw): return f"ARRAY<{inner}>" -class DatabricksBinaryType(sqlalchemy.types.TypeDecorator): - """ +class DatabricksDateTimeNoTimezoneType(sqlalchemy.types.TypeDecorator): + """The decimal that pysql creates when it receives the contents of a TIMESTAMP_NTZ + includes a timezone of 'Etc/UTC'. But since SQLAlchemy's test suite assumes that + the sqlalchemy.types.DateTime type will return a datetime.datetime _without_ any + timezone set, we need to strip the timezone off the value received from pysql. + + It's not clear if DBR sends a timezone to pysql or if pysql is adding it. This could be a bug. """ - impl = sqlalchemy.types.BINARY + impl = sqlalchemy.types.DateTime cache_ok = True - def process_bind_param(self, value, dialect): - return databricks.sql.BINARY(value) + def process_result_value(self, value: Union[None, datetime], dialect): + if value is None: + return None + return value.replace(tzinfo=None) diff --git a/src/databricks/sqlalchemy/requirements.py b/src/databricks/sqlalchemy/requirements.py index 37a7a979..c2f0de02 100644 --- a/src/databricks/sqlalchemy/requirements.py +++ b/src/databricks/sqlalchemy/requirements.py @@ -39,6 +39,13 @@ def date_historic(self): return sqlalchemy.testing.exclusions.open() + @property + def datetime_historic(self): + """target dialect supports representation of Python + datetime.datetime() objects with historic (pre 1970) values.""" + + return sqlalchemy.testing.exclusions.open() + @property def datetime_literals(self): """target dialect supports rendering of a date, time, or datetime as a diff --git a/src/databricks/sqlalchemy/test/test_suite.py b/src/databricks/sqlalchemy/test/test_suite.py index b5865f6e..5e62e9a7 100644 --- a/src/databricks/sqlalchemy/test/test_suite.py +++ b/src/databricks/sqlalchemy/test/test_suite.py @@ -29,36 +29,6 @@ class BinaryTest(BinaryTest): pass -class DateTimeHistoricTest(DateTimeHistoricTest): - @pytest.mark.skip(reason="Date type implementation needs work") - def test_literal(self): - """ - Exception: - sqlalchemy.exc.CompileError: No literal value renderer is available for literal value "datetime.datetime(1850, 11, 10, 11, 52, 35)" with datatype DATETIME - """ - - @pytest.mark.skip(reason="Date type implementation needs work") - def test_round_trip(self): - """ - Exception: - AssertionError: (datetime.datetime(1850, 11, 10, 11, 52, 35, tzinfo=),) != (datetime.datetime(1850, 11, 10, 11, 52, 35),) - """ - - @pytest.mark.skip(reason="Date type implementation needs work") - def test_round_trip_decorated(self): - """ - Exception: - AssertionError: (datetime.datetime(1850, 11, 10, 11, 52, 35, tzinfo=),) != (datetime.datetime(1850, 11, 10, 11, 52, 35),) - """ - - @pytest.mark.skip(reason="Date type implementation needs work") - def test_select_direct(self): - """ - Exception: - AssertionError: '1850-11-10 11:52:35.000000' != datetime.datetime(1850, 11, 10, 11, 52, 35) - """ - - class DateTimeMicrosecondsTest(DateTimeMicrosecondsTest): @pytest.mark.skip(reason="Date type implementation needs work") def test_literal(self): diff --git a/src/databricks/sqlalchemy/test_local/test_types.py b/src/databricks/sqlalchemy/test_local/test_types.py index 91f11e17..f7423f69 100644 --- a/src/databricks/sqlalchemy/test_local/test_types.py +++ b/src/databricks/sqlalchemy/test_local/test_types.py @@ -36,12 +36,12 @@ class DatabricksDataType(enum.Enum): sqlalchemy.types.LargeBinary: DatabricksDataType.BINARY, sqlalchemy.types.Boolean: DatabricksDataType.BOOLEAN, sqlalchemy.types.Date: DatabricksDataType.DATE, - sqlalchemy.types.DateTime: DatabricksDataType.TIMESTAMP, + sqlalchemy.types.DateTime: DatabricksDataType.TIMESTAMP_NTZ, sqlalchemy.types.Double: DatabricksDataType.DOUBLE, sqlalchemy.types.Enum: DatabricksDataType.STRING, sqlalchemy.types.Float: DatabricksDataType.FLOAT, sqlalchemy.types.Integer: DatabricksDataType.INT, - sqlalchemy.types.Interval: DatabricksDataType.TIMESTAMP, + sqlalchemy.types.Interval: DatabricksDataType.TIMESTAMP_NTZ, sqlalchemy.types.Numeric: DatabricksDataType.DECIMAL, sqlalchemy.types.PickleType: DatabricksDataType.BINARY, sqlalchemy.types.SmallInteger: DatabricksDataType.SMALLINT, From f4a59f51631c02f88dcc2f65689640ce388b4560 Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Fri, 6 Oct 2023 20:35:26 -0400 Subject: [PATCH 06/20] Stop skipping DateTimeTest These were fixed by updating our @compiles directives and dialect.colspecs test_suite.py::DateTimeTest_databricks+databricks::test_literal PASSED test_suite.py::DateTimeTest_databricks+databricks::test_null PASSED test_suite.py::DateTimeTest_databricks+databricks::test_null_bound_comparison PASSED test_suite.py::DateTimeTest_databricks+databricks::test_round_trip PASSED test_suite.py::DateTimeTest_databricks+databricks::test_round_trip_decorated PASSED test_suite.py::DateTimeTest_databricks+databricks::test_select_direct PASSED Signed-off-by: Jesse Whitehouse --- src/databricks/sqlalchemy/test/test_suite.py | 30 -------------------- 1 file changed, 30 deletions(-) diff --git a/src/databricks/sqlalchemy/test/test_suite.py b/src/databricks/sqlalchemy/test/test_suite.py index 5e62e9a7..95d32329 100644 --- a/src/databricks/sqlalchemy/test/test_suite.py +++ b/src/databricks/sqlalchemy/test/test_suite.py @@ -59,36 +59,6 @@ def test_select_direct(self): """ -class DateTimeTest(DateTimeTest): - @pytest.mark.skip(reason="Date type implementation needs work") - def test_literal(self): - """ - Exception: - sqlalchemy.exc.CompileError: No literal value renderer is available for literal value "datetime.datetime(2012, 10, 15, 12, 57, 18)" with datatype DATETIME - """ - - @pytest.mark.skip(reason="Date type implementation needs work") - def test_round_trip(self): - """ - Exception: - AssertionError: (datetime.datetime(2012, 10, 15, 12, 57, 18, tzinfo=),) != (datetime.datetime(2012, 10, 15, 12, 57, 18),) - """ - - @pytest.mark.skip(reason="Date type implementation needs work") - def test_round_trip_decorated(self): - """ - Exception: - AssertionError: (datetime.datetime(2012, 10, 15, 12, 57, 18, tzinfo=),) != (datetime.datetime(2012, 10, 15, 12, 57, 18),) - """ - - @pytest.mark.skip(reason="Date type implementation needs work") - def test_select_direct(self): - """ - Exception: - AssertionError: '2012-10-15 12:57:18.000000' != datetime.datetime(2012, 10, 15, 12, 57, 18) - """ - - class FetchLimitOffsetTest(FetchLimitOffsetTest): @pytest.mark.skip( reason="Dialect should advertise which offset rules Databricks supports. Offset handling needs work." From c817509969725bb4b988b8a98c65a86f26e5c77b Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Fri, 6 Oct 2023 21:16:10 -0400 Subject: [PATCH 07/20] Stop skipping TimeTest This fix follows the same pattern used to make DateTimeHistoricTest work test_suite.py::TimeTest_databricks+databricks::test_literal PASSED test_suite.py::TimeTest_databricks+databricks::test_null PASSED test_suite.py::TimeTest_databricks+databricks::test_null_bound_comparison PASSED test_suite.py::TimeTest_databricks+databricks::test_round_trip PASSED test_suite.py::TimeTest_databricks+databricks::test_round_trip_decorated PASSED test_suite.py::TimeTest_databricks+databricks::test_select_direct PASSED Signed-off-by: Jesse Whitehouse --- src/databricks/sqlalchemy/__init__.py | 3 +- src/databricks/sqlalchemy/_types.py | 32 +++++++++++++ src/databricks/sqlalchemy/test/test_suite.py | 47 -------------------- 3 files changed, 34 insertions(+), 48 deletions(-) diff --git a/src/databricks/sqlalchemy/__init__.py b/src/databricks/sqlalchemy/__init__.py index 6d61b5eb..7bb90c74 100644 --- a/src/databricks/sqlalchemy/__init__.py +++ b/src/databricks/sqlalchemy/__init__.py @@ -49,7 +49,8 @@ class DatabricksDialect(default.DefaultDialect): paramstyle: str = "named" colspecs = { - sqlalchemy.types.DateTime: dialect_type_impl.DatabricksDateTimeNoTimezoneType + sqlalchemy.types.DateTime: dialect_type_impl.DatabricksDateTimeNoTimezoneType, + sqlalchemy.types.Time: dialect_type_impl.DatabricksTimeType, } @classmethod diff --git a/src/databricks/sqlalchemy/_types.py b/src/databricks/sqlalchemy/_types.py index 163f1ca2..2c32e0da 100644 --- a/src/databricks/sqlalchemy/_types.py +++ b/src/databricks/sqlalchemy/_types.py @@ -100,3 +100,35 @@ def process_result_value(self, value: Union[None, datetime], dialect): if value is None: return None return value.replace(tzinfo=None) + +class DatabricksTimeType(sqlalchemy.types.TypeDecorator): + """Databricks has no native TIME type. So we store it as a string. + """ + + impl = sqlalchemy.types.Time + cache_ok = True + + + def process_bind_param(self, value: Union[datetime.time, None], dialect) -> str: + """Values sent to the database are converted to %:H:%M:%S strings. + """ + if value is None: + return None + return value.strftime("%H:%M:%S") + + def process_literal_param(self, value, dialect) -> datetime.time: + """It's not clear to me why this is necessary. Without it, SQLAlchemy's Timetest:test_literal fails + because the string literal renderer receives a str() object and calls .isoformat() on it. + + Whereas this method receives a datetime.time() object which is subsequently passed to that + same renderer. And that works. + """ + return value + + + def process_result_value(self, value: Union[None, str], dialect) -> Union[datetime.time, None]: + """Values received from the database are parsed into datetime.time() objects + """ + if value is None: + return None + return datetime.strptime(value, "%H:%M:%S").time() diff --git a/src/databricks/sqlalchemy/test/test_suite.py b/src/databricks/sqlalchemy/test/test_suite.py index 95d32329..ecfbfbde 100644 --- a/src/databricks/sqlalchemy/test/test_suite.py +++ b/src/databricks/sqlalchemy/test/test_suite.py @@ -355,53 +355,6 @@ def test_select_direct(self): """ -class TimeTest(TimeTest): - @pytest.mark.skip( - reason="Time type implementation needs work. Dialect cannot write literal values." - ) - def test_literal(self): - """ - Exception: - sqlalchemy.exc.CompileError: No literal value renderer is available for literal value "datetime.time(12, 57, 18)" with datatype TIME - """ - - @pytest.mark.skip( - reason="Time type implementation needs work. Dialect cannot write literal values." - ) - def test_null_bound_comparison(self): - """ - Exception: - sqlalchemy.exc.ProgrammingError: (databricks.sql.exc.ProgrammingError) Unsupported object 12:57:18 - """ - - @pytest.mark.skip( - reason="Time type implementation needs work. Dialect cannot write literal values." - ) - def test_round_trip(self): - """ - Exception: - sqlalchemy.exc.ProgrammingError: (databricks.sql.exc.ProgrammingError) Unsupported object 12:57:18 - """ - - @pytest.mark.skip( - reason="Time type implementation needs work. Dialect cannot write literal values." - ) - def test_round_trip_decorated(self): - """ - Exception: - sqlalchemy.exc.ProgrammingError: (databricks.sql.exc.ProgrammingError) Unsupported object 12:57:18 - """ - - @pytest.mark.skip( - reason="Time type implementation needs work. Dialect cannot write literal values." - ) - def test_select_direct(self): - """ - Exception: - sqlalchemy.exc.ProgrammingError: (databricks.sql.exc.ProgrammingError) Unsupported object 12:57:18 - """ - - class TimestampMicrosecondsTest(TimestampMicrosecondsTest): @pytest.mark.skip( reason="Time type implementation needs work. Timezone not preserved. Cannot render literal values." From d3a76a493f9cdf324e1b3476cc699897c6bb0472 Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Fri, 6 Oct 2023 21:18:20 -0400 Subject: [PATCH 08/20] Stop skipping DateTimeCoercedToDateTimeTest The fixes for DateTimes in the past few commits have enabled these tests to pass. test_suite.py::DateTimeCoercedToDateTimeTest_databricks+databricks::test_literal PASSED test_suite.py::DateTimeCoercedToDateTimeTest_databricks+databricks::test_null PASSED test_suite.py::DateTimeCoercedToDateTimeTest_databricks+databricks::test_null_bound_comparison PASSED test_suite.py::DateTimeCoercedToDateTimeTest_databricks+databricks::test_round_trip PASSED test_suite.py::DateTimeCoercedToDateTimeTest_databricks+databricks::test_round_trip_decorated PASSED test_suite.py::DateTimeCoercedToDateTimeTest_databricks+databricks::test_select_direct PASSED Signed-off-by: Jesse Whitehouse --- src/databricks/sqlalchemy/test/test_suite.py | 52 -------------------- 1 file changed, 52 deletions(-) diff --git a/src/databricks/sqlalchemy/test/test_suite.py b/src/databricks/sqlalchemy/test/test_suite.py index ecfbfbde..bbbcc019 100644 --- a/src/databricks/sqlalchemy/test/test_suite.py +++ b/src/databricks/sqlalchemy/test/test_suite.py @@ -393,58 +393,6 @@ def test_select_direct(self): """ -class DateTimeCoercedToDateTimeTest(DateTimeCoercedToDateTimeTest): - @pytest.mark.skip( - reason="Date type implementation needs work. Literal values not coerced properly." - ) - def test_select_direct(self): - """ - Exception: - AssertionError: '2012-10-15 12:57:18.000000' != datetime.datetime(2012, 10, 15, 12, 57, 18) - assert '2012-10-15 12:57:18.000000' == datetime.datetime(2012, 10, 15, 12, 57, 18) - """ - - @pytest.mark.skip(reason="Forthcoming deprecated feature.") - def test_literal(self): - """ - Exception: - sqlalchemy.exc.RemovedIn20Warning: Deprecated API features detected! These feature(s) are not compatible with SQLAlchemy 2.0. To prevent incompatible upgrades prior to updating applications, ensure requirements files are pinned to "sqlalchemy<2.0". Set environment variable SQLALCHEMY_WARN_20=1 to show all deprecation warnings. Set environment variable SQLALCHEMY_SILENCE_UBER_WARNING=1 to silence this message. (Background on SQLAlchemy 2.0 at: https://sqlalche.me/e/b8d9) - - """ - - @pytest.mark.skip(reason="urllib3 is complaining") - def test_null(self): - """ - Exception: - urllib3.exceptions.ProtocolError: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')) - - """ - - @pytest.mark.skip(reason="urllib3 is complaining") - def test_null_bound_comparison(self): - """ - Exception: - urllib3.exceptions.ProtocolError: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')) - - """ - - @pytest.mark.skip(reason="urllib3 is complaining") - def test_round_trip(self): - """ - Exception: - urllib3.exceptions.ProtocolError: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')) - - """ - - @pytest.mark.skip(reason="urllib3 is complaining") - def test_round_trip_decorated(self): - """ - Exception: - urllib3.exceptions.ProtocolError: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')) - - """ - - class ExceptionTest(ExceptionTest): @pytest.mark.skip(reason="Databricks may not support this method.") def test_integrity_error(self): From d4afb6080114bd6652c13bec7a9502a10f352d20 Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Fri, 6 Oct 2023 21:20:58 -0400 Subject: [PATCH 09/20] Stop skipping TimestampMicrosecondsTest I needed to add an entry to requirements.py otherwise these tests were skipped. test_suite.py::TimestampMicrosecondsTest_databricks+databricks::test_literal PASSED test_suite.py::TimestampMicrosecondsTest_databricks+databricks::test_null PASSED test_suite.py::TimestampMicrosecondsTest_databricks+databricks::test_null_bound_comparison PASSED test_suite.py::TimestampMicrosecondsTest_databricks+databricks::test_round_trip PASSED test_suite.py::TimestampMicrosecondsTest_databricks+databricks::test_round_trip_decorated PASSED test_suite.py::TimestampMicrosecondsTest_databricks+databricks::test_select_direct PASSED Signed-off-by: Jesse Whitehouse --- src/databricks/sqlalchemy/requirements.py | 8 +++++ src/databricks/sqlalchemy/test/test_suite.py | 35 -------------------- 2 files changed, 8 insertions(+), 35 deletions(-) diff --git a/src/databricks/sqlalchemy/requirements.py b/src/databricks/sqlalchemy/requirements.py index c2f0de02..cc76a47e 100644 --- a/src/databricks/sqlalchemy/requirements.py +++ b/src/databricks/sqlalchemy/requirements.py @@ -54,5 +54,13 @@ def datetime_literals(self): """ return sqlalchemy.testing.exclusions.open() + + @property + def timestamp_microseconds(self): + """target dialect supports representation of Python + datetime.datetime() with microsecond objects but only + if TIMESTAMP is used.""" + + return sqlalchemy.testing.exclusions.open() diff --git a/src/databricks/sqlalchemy/test/test_suite.py b/src/databricks/sqlalchemy/test/test_suite.py index bbbcc019..38798abd 100644 --- a/src/databricks/sqlalchemy/test/test_suite.py +++ b/src/databricks/sqlalchemy/test/test_suite.py @@ -355,42 +355,7 @@ def test_select_direct(self): """ -class TimestampMicrosecondsTest(TimestampMicrosecondsTest): - @pytest.mark.skip( - reason="Time type implementation needs work. Timezone not preserved. Cannot render literal values." - ) - def test_literal(self): - """ - Exception: - sqlalchemy.exc.CompileError: No literal value renderer is available for literal value "datetime.datetime(2012, 10, 15, 12, 57, 18, 396)" with datatype TIMESTAMP - """ - @pytest.mark.skip( - reason="Time type implementation needs work. Timezone not preserved. Cannot render literal values." - ) - def test_round_trip(self): - """ - Exception: - AssertionError: (datetime.datetime(2012, 10, 15, 12, 57, 18, 396, tzinfo=),) != (datetime.datetime(2012, 10, 15, 12, 57, 18, 396),) - """ - - @pytest.mark.skip( - reason="Time type implementation needs work. Timezone not preserved. Cannot render literal values." - ) - def test_round_trip_decorated(self): - """ - Exception: - AssertionError: (datetime.datetime(2012, 10, 15, 12, 57, 18, 396, tzinfo=),) != (datetime.datetime(2012, 10, 15, 12, 57, 18, 396),) - """ - - @pytest.mark.skip( - reason="Time type implementation needs work. Timezone not preserved. Cannot render literal values." - ) - def test_select_direct(self): - """ - Exception: - AssertionError: '2012-10-15 12:57:18.000396' != datetime.datetime(2012, 10, 15, 12, 57, 18, 396) - """ class ExceptionTest(ExceptionTest): From ab8e28fedaf7a1639db7bd64ecdfc12232827c1d Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Fri, 6 Oct 2023 21:22:19 -0400 Subject: [PATCH 10/20] Stop skipping DateTimeMicrosecondsTest Changes in earlier commits around DateTime types have enabled these to pass. test_suite.py::DateTimeMicrosecondsTest_databricks+databricks::test_literal PASSED test_suite.py::DateTimeMicrosecondsTest_databricks+databricks::test_null PASSED test_suite.py::DateTimeMicrosecondsTest_databricks+databricks::test_null_bound_comparison PASSED test_suite.py::DateTimeMicrosecondsTest_databricks+databricks::test_round_trip PASSED test_suite.py::DateTimeMicrosecondsTest_databricks+databricks::test_round_trip_decorated PASSED Signed-off-by: Jesse Whitehouse --- src/databricks/sqlalchemy/test/test_suite.py | 30 -------------------- 1 file changed, 30 deletions(-) diff --git a/src/databricks/sqlalchemy/test/test_suite.py b/src/databricks/sqlalchemy/test/test_suite.py index 38798abd..f925be25 100644 --- a/src/databricks/sqlalchemy/test/test_suite.py +++ b/src/databricks/sqlalchemy/test/test_suite.py @@ -29,36 +29,6 @@ class BinaryTest(BinaryTest): pass -class DateTimeMicrosecondsTest(DateTimeMicrosecondsTest): - @pytest.mark.skip(reason="Date type implementation needs work") - def test_literal(self): - """ - Exception: - sqlalchemy.exc.CompileError: No literal value renderer is available for literal value "datetime.datetime(2012, 10, 15, 12, 57, 18, 396)" with datatype DATETIME - """ - - @pytest.mark.skip(reason="Date type implementation needs work") - def test_round_trip(self): - """ - Exception: - AssertionError: (datetime.datetime(2012, 10, 15, 12, 57, 18, 396, tzinfo=),) != (datetime.datetime(2012, 10, 15, 12, 57, 18, 396),) - """ - - @pytest.mark.skip(reason="Date type implementation needs work") - def test_round_trip_decorated(self): - """ - Exception: - AssertionError: (datetime.datetime(2012, 10, 15, 12, 57, 18, 396, tzinfo=),) != (datetime.datetime(2012, 10, 15, 12, 57, 18, 396),) - """ - - @pytest.mark.skip(reason="Date type implementation needs work") - def test_select_direct(self): - """ - Exception: - AssertionError: '2012-10-15 12:57:18.000396' != datetime.datetime(2012, 10, 15, 12, 57, 18, 396) - """ - - class FetchLimitOffsetTest(FetchLimitOffsetTest): @pytest.mark.skip( reason="Dialect should advertise which offset rules Databricks supports. Offset handling needs work." From af1a49e85bf505e1f10f94c8a28b036cacac7f7f Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Sun, 8 Oct 2023 16:13:21 -0400 Subject: [PATCH 11/20] Stop skipping StringTest I've found that test_dont_truncate_rightside test is flaky because sometimes DBR returns the correct data but in the wrong order. It's supposed to check that a query returns ["AB", "BC"] but sometimes it returns ["BC", "AB"] which is the right data in the wrong order. Python list comparison doesn't evaluate that ["AB", "BC"] == ["BC", "AB"]. We could reimplement the test using collections.Counter in stdlib or check with DBR team about why this order is sometimes different. Signed-off-by: Jesse Whitehouse --- Also, we had to break with SQLAlchemy's advice to never implement the TypeDecorator.literal_processor method because otherwise our strings end up double-escaped and raise a syntax error. test_suite.py::StringTest_databricks+databricks::test_concatenate_binary PASSED test_suite.py::StringTest_databricks+databricks::test_concatenate_clauselist PASSED test_suite.py::StringTest_databricks+databricks::test_dont_truncate_rightside[%B%-expected0] PASSED test_suite.py::StringTest_databricks+databricks::test_dont_truncate_rightside[A%C%Z-expected2] PASSED test_suite.py::StringTest_databricks+databricks::test_dont_truncate_rightside[A%C-expected1] PASSED test_suite.py::StringTest_databricks+databricks::test_literal PASSED test_suite.py::StringTest_databricks+databricks::test_literal_backslashes PASSED test_suite.py::StringTest_databricks+databricks::test_literal_non_ascii PASSED test_suite.py::StringTest_databricks+databricks::test_literal_quoting PASSED test_suite.py::StringTest_databricks+databricks::test_nolength_string PASSED --- src/databricks/sqlalchemy/__init__.py | 1 + src/databricks/sqlalchemy/_types.py | 62 ++++++++++++++++++++ src/databricks/sqlalchemy/test/test_suite.py | 19 ------ 3 files changed, 63 insertions(+), 19 deletions(-) diff --git a/src/databricks/sqlalchemy/__init__.py b/src/databricks/sqlalchemy/__init__.py index 7bb90c74..a369a7d0 100644 --- a/src/databricks/sqlalchemy/__init__.py +++ b/src/databricks/sqlalchemy/__init__.py @@ -51,6 +51,7 @@ class DatabricksDialect(default.DefaultDialect): colspecs = { sqlalchemy.types.DateTime: dialect_type_impl.DatabricksDateTimeNoTimezoneType, sqlalchemy.types.Time: dialect_type_impl.DatabricksTimeType, + sqlalchemy.types.String: dialect_type_impl.DatabricksStringType } @classmethod diff --git a/src/databricks/sqlalchemy/_types.py b/src/databricks/sqlalchemy/_types.py index 2c32e0da..a9d72be2 100644 --- a/src/databricks/sqlalchemy/_types.py +++ b/src/databricks/sqlalchemy/_types.py @@ -5,6 +5,9 @@ from datetime import datetime + +from databricks.sql.utils import ParamEscaper + @compiles(sqlalchemy.types.Enum, "databricks") @compiles(sqlalchemy.types.String, "databricks") @compiles(sqlalchemy.types.Text, "databricks") @@ -132,3 +135,62 @@ def process_result_value(self, value: Union[None, str], dialect) -> Union[dateti if value is None: return None return datetime.strptime(value, "%H:%M:%S").time() + +class DatabricksStringType(sqlalchemy.types.TypeDecorator): + """We have to implement our own String() type because SQLAlchemy's default implementation + wants to escape single-quotes with a doubled single-quote. Databricks uses a backslash for + escaping of literal strings. And SQLAlchemy's default escaping breaks Databricks SQL. + """ + + impl = sqlalchemy.types.String + cache_ok = True + pe = ParamEscaper() + + def process_literal_param(self, value, dialect) -> str: + """SQLAlchemy's default string escaping for backslashes doesn't work for databricks. The logic here + implements the same logic as our legacy inline escaping logic. + """ + + return self.pe.escape_string(value) + + def literal_processor(self, dialect): + """We manually override this method to prevent further processing of the string literal beyond + what happens in the process_literal_param() method. + + The SQLAlchemy docs _specifically_ say to not override this method. + + It appears that any processing that happens from TypeEngine.process_literal_param happens _before_ + and _in addition to_ whatever the class's impl.literal_processor() method does. The String.literal_processor() + method performs a string replacement that doubles any single-quote in the contained string. This raises a syntax + error in Databricks. And it's not necessary because ParamEscaper() already implements all the escaping we need. + + We should consider opening an issue on the SQLAlchemy project to see if I'm using it wrong. + + See type_api.py::TypeEngine.literal_processor: + + ```python + def process(value: Any) -> str: + return fixed_impl_processor( + fixed_process_literal_param(value, dialect) + ) + ``` + + That call to fixed_impl_processor wraps the result of fixed_process_literal_param (which is the + process_literal_param defined in our Databricks dialect) + + https://docs.sqlalchemy.org/en/20/core/custom_types.html#sqlalchemy.types.TypeDecorator.literal_processor + """ + def process(value): + """This is a copy of the default String.literal_processor() method but stripping away + its double-escaping behaviour for single-quotes. + """ + + _step1 = self.process_literal_param(value, dialect="databricks") + if dialect.identifier_preparer._double_percents: + _step2 = _step1.replace("%", "%%") + else: + _step2 = _step1 + + return "%s" % _step2 + + return process \ No newline at end of file diff --git a/src/databricks/sqlalchemy/test/test_suite.py b/src/databricks/sqlalchemy/test/test_suite.py index f925be25..86f28f83 100644 --- a/src/databricks/sqlalchemy/test/test_suite.py +++ b/src/databricks/sqlalchemy/test/test_suite.py @@ -236,25 +236,6 @@ def test_row_w_scalar_select(self): """ -class StringTest(StringTest): - @pytest.mark.skip( - reason="String implementation needs work. Quote escaping is inconsistent between read/write." - ) - def test_literal_backslashes(self): - """ - Exception: - AssertionError: assert 'backslash one backslash two \\ end' in ['backslash one \\ backslash two \\\\ end'] - """ - - @pytest.mark.skip( - reason="String implementation needs work. Quote escaping is inconsistent between read/write." - ) - def test_literal_quoting(self): - """ - Exception: - assert 'some text hey "hi there" thats text' in ['some \'text\' hey "hi there" that\'s text'] - """ - class TextTest(TextTest): """Fixing StringTest should fix these failures also.""" From 53eef60765997285165e1cfed2d29a3b14375faa Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Sun, 8 Oct 2023 16:15:50 -0400 Subject: [PATCH 12/20] Stop skipping TextTest The fixes from fixing StringTest also fixed these tests test_suite.py::TextTest_databricks+databricks::test_literal PASSED test_suite.py::TextTest_databricks+databricks::test_literal_backslashes PASSED test_suite.py::TextTest_databricks+databricks::test_literal_non_ascii PASSED test_suite.py::TextTest_databricks+databricks::test_literal_percentsigns PASSED test_suite.py::TextTest_databricks+databricks::test_literal_quoting PASSED test_suite.py::TextTest_databricks+databricks::test_text_empty_strings PASSED test_suite.py::TextTest_databricks+databricks::test_text_null_strings PASSED test_suite.py::TextTest_databricks+databricks::test_text_roundtrip PASSED Signed-off-by: Jesse Whitehouse --- src/databricks/sqlalchemy/test/test_suite.py | 22 -------------------- 1 file changed, 22 deletions(-) diff --git a/src/databricks/sqlalchemy/test/test_suite.py b/src/databricks/sqlalchemy/test/test_suite.py index 86f28f83..9ccac6d7 100644 --- a/src/databricks/sqlalchemy/test/test_suite.py +++ b/src/databricks/sqlalchemy/test/test_suite.py @@ -237,28 +237,6 @@ def test_row_w_scalar_select(self): -class TextTest(TextTest): - """Fixing StringTest should fix these failures also.""" - - @pytest.mark.skip( - reason="String implementation needs work. See comments from StringTest." - ) - def test_literal_backslashes(self): - """ - Exception: - AssertionError: assert 'backslash one backslash two \\ end' in ['backslash one \\ backslash two \\\\ end'] - """ - - @pytest.mark.skip( - reason="String implementation needs work. See comments from StringTest." - ) - def test_literal_quoting(self): - """ - Exception: - assert 'some text hey "hi there" thats text' in ['some \'text\' hey "hi there" that\'s text'] - """ - - class TimeMicrosecondsTest(TimeMicrosecondsTest): @pytest.mark.skip( reason="Time type implementation needs work. Microseconds are not handled at all." From a56b0836da22b35ff07f69355a4795dae7183f19 Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Sun, 8 Oct 2023 16:34:36 -0400 Subject: [PATCH 13/20] Stop skipping TimeMicrosecondsTest I had to update our custom Time() override to cope with both non-microseconds timestrings and microseconds time strings. So I reran the TimeTest() as well to confirm both types still work. test_suite.py::TimeMicrosecondsTest_databricks+databricks::test_literal PASSED test_suite.py::TimeMicrosecondsTest_databricks+databricks::test_null PASSED test_suite.py::TimeMicrosecondsTest_databricks+databricks::test_null_bound_comparison PASSED test_suite.py::TimeMicrosecondsTest_databricks+databricks::test_round_trip PASSED test_suite.py::TimeMicrosecondsTest_databricks+databricks::test_round_trip_decorated PASSED test_suite.py::TimeMicrosecondsTest_databricks+databricks::test_select_direct PASSED Signed-off-by: Jesse Whitehouse --- src/databricks/sqlalchemy/_types.py | 19 +++++++- src/databricks/sqlalchemy/requirements.py | 15 ++++++ src/databricks/sqlalchemy/test/test_suite.py | 50 -------------------- 3 files changed, 32 insertions(+), 52 deletions(-) diff --git a/src/databricks/sqlalchemy/_types.py b/src/databricks/sqlalchemy/_types.py index a9d72be2..b4ddf6d0 100644 --- a/src/databricks/sqlalchemy/_types.py +++ b/src/databricks/sqlalchemy/_types.py @@ -111,13 +111,15 @@ class DatabricksTimeType(sqlalchemy.types.TypeDecorator): impl = sqlalchemy.types.Time cache_ok = True + TIME_WITH_MICROSECONDS_FMT = "%H:%M:%S.%f" + TIME_NO_MICROSECONDS_FMT = "%H:%M:%S" def process_bind_param(self, value: Union[datetime.time, None], dialect) -> str: """Values sent to the database are converted to %:H:%M:%S strings. """ if value is None: return None - return value.strftime("%H:%M:%S") + return value.strftime(self.TIME_WITH_MICROSECONDS_FMT) def process_literal_param(self, value, dialect) -> datetime.time: """It's not clear to me why this is necessary. Without it, SQLAlchemy's Timetest:test_literal fails @@ -125,6 +127,12 @@ def process_literal_param(self, value, dialect) -> datetime.time: Whereas this method receives a datetime.time() object which is subsequently passed to that same renderer. And that works. + + UPDATE: After coping with the literal_processor override in DatabricksStringType, I suspect a similar + mechanism is at play. Two different processors are are called in sequence. This is likely a byproduct + of Databricks not having a true TIME type. I think the string representation of Time() types is + somehow affecting the literal rendering process. But as long as this passes the tests, I'm not + worried about it. """ return value @@ -134,7 +142,14 @@ def process_result_value(self, value: Union[None, str], dialect) -> Union[dateti """ if value is None: return None - return datetime.strptime(value, "%H:%M:%S").time() + + try: + _parsed = datetime.strptime(value, self.TIME_WITH_MICROSECONDS_FMT) + except ValueError: + # If the string doesn't have microseconds, try parsing it without them + _parsed = datetime.strptime(value, self.TIME_NO_MICROSECONDS_FMT) + + return _parsed.time() class DatabricksStringType(sqlalchemy.types.TypeDecorator): """We have to implement our own String() type because SQLAlchemy's default implementation diff --git a/src/databricks/sqlalchemy/requirements.py b/src/databricks/sqlalchemy/requirements.py index cc76a47e..ba4149bc 100644 --- a/src/databricks/sqlalchemy/requirements.py +++ b/src/databricks/sqlalchemy/requirements.py @@ -62,5 +62,20 @@ def timestamp_microseconds(self): if TIMESTAMP is used.""" return sqlalchemy.testing.exclusions.open() + + @property + def time_microseconds(self): + """target dialect supports representation of Python + datetime.time() with microsecond objects. + + This requirement declaration isn't needed but I've included it here for completeness. + Since Databricks doesn't have a TIME type, SQLAlchemy will compile Time() columns + as STRING Databricks data types. And we use a custom time type to render those strings + between str() and time.time() representations. Therefore we can store _any_ precision + that SQLAlchemy needs. The time_microseconds requirement defaults to ON for all dialects + except mssql, mysql, mariadb, and oracle. + """ + + return sqlalchemy.testing.exclusions.open() diff --git a/src/databricks/sqlalchemy/test/test_suite.py b/src/databricks/sqlalchemy/test/test_suite.py index 9ccac6d7..cf5814b7 100644 --- a/src/databricks/sqlalchemy/test/test_suite.py +++ b/src/databricks/sqlalchemy/test/test_suite.py @@ -237,56 +237,6 @@ def test_row_w_scalar_select(self): -class TimeMicrosecondsTest(TimeMicrosecondsTest): - @pytest.mark.skip( - reason="Time type implementation needs work. Microseconds are not handled at all." - ) - def test_literal(self): - """ - Exception: - sqlalchemy.exc.CompileError: No literal value renderer is available for literal value "datetime.time(12, 57, 18, 396)" with datatype TIME - """ - - @pytest.mark.skip( - reason="Time type implementation needs work. Microseconds are not handled at all." - ) - def test_null_bound_comparison(self): - """ - Exception: - sqlalchemy.exc.ProgrammingError: (databricks.sql.exc.ProgrammingError) Unsupported object 12:57:18.000396 - """ - - @pytest.mark.skip( - reason="Time type implementation needs work. Microseconds are not handled at all." - ) - def test_round_trip(self): - """ - Exception: - sqlalchemy.exc.ProgrammingError: (databricks.sql.exc.ProgrammingError) Unsupported object 12:57:18.000396 - """ - - @pytest.mark.skip( - reason="Time type implementation needs work. Microseconds are not handled at all." - ) - def test_round_trip_decorated(self): - """ - Exception: - sqlalchemy.exc.ProgrammingError: (databricks.sql.exc.ProgrammingError) Unsupported object 12:57:18.000396 - """ - - @pytest.mark.skip( - reason="Time type implementation needs work. Microseconds are not handled at all." - ) - def test_select_direct(self): - """ - Exception: - sqlalchemy.exc.ProgrammingError: (databricks.sql.exc.ProgrammingError) Unsupported object 12:57:18.000396 - """ - - - - - class ExceptionTest(ExceptionTest): @pytest.mark.skip(reason="Databricks may not support this method.") def test_integrity_error(self): From aa1cae3ebf9694b600dd4d718892afae5384bfbc Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Sun, 8 Oct 2023 17:06:54 -0400 Subject: [PATCH 14/20] Stop skipping NumericTest I added exclusions to requirements.py for features which Databricks doesn't natively support. These tests apply to the generic (camelcase) Numeric() type. Therefore the tests are applicable to the DECIMAL Databricks data type. Some of these features _will_ work for Databricks' FLOAT type. Those tests marked as skipped in the below listing are skipped because of the exclusions called out in requirements.py test_suite.py::NumericTest_databricks+databricks::test_decimal_coerce_round_trip PASSED test_suite.py::NumericTest_databricks+databricks::test_decimal_coerce_round_trip_w_cast PASSED test_suite.py::NumericTest_databricks+databricks::test_enotation_decimal SKIPPED ('test/test_suite.py::NumericTest_databricks+databricks::test_enotation_decimal (call)' : marked as skip) test_suite.py::NumericTest_databricks+databricks::test_enotation_decimal_large SKIPPED ('test/test_suite.py::NumericTest_databricks+databricks::test_enotation_decimal_large (call)' : marked as skip) test_suite.py::NumericTest_databricks+databricks::test_float_as_decimal PASSED test_suite.py::NumericTest_databricks+databricks::test_float_as_float PASSED test_suite.py::NumericTest_databricks+databricks::test_float_coerce_round_trip SKIPPED ('test/test_suite.py::NumericTest_databricks+databricks::test_float_coerce_round_trip (call)' : marked as skip) test_suite.py::NumericTest_databricks+databricks::test_float_custom_scale SKIPPED ('test/test_suite.py::NumericTest_databricks+databricks::test_float_custom_scale (call)' : marked as skip) test_suite.py::NumericTest_databricks+databricks::test_float_is_not_numeric[Double] PASSED test_suite.py::NumericTest_databricks+databricks::test_float_is_not_numeric[Float] PASSED test_suite.py::NumericTest_databricks+databricks::test_infinity_floats PASSED test_suite.py::NumericTest_databricks+databricks::test_many_significant_digits SKIPPED ('test/test_suite.py::NumericTest_databricks+databricks::test_many_significant_digits (call)' : marked as skip) test_suite.py::NumericTest_databricks+databricks::test_numeric_as_decimal PASSED test_suite.py::NumericTest_databricks+databricks::test_numeric_as_float PASSED test_suite.py::NumericTest_databricks+databricks::test_numeric_no_decimal PASSED test_suite.py::NumericTest_databricks+databricks::test_numeric_null_as_decimal PASSED test_suite.py::NumericTest_databricks+databricks::test_numeric_null_as_float PASSED test_suite.py::NumericTest_databricks+databricks::test_precision_decimal PASSED test_suite.py::NumericTest_databricks+databricks::test_render_literal_float PASSED test_suite.py::NumericTest_databricks+databricks::test_render_literal_numeric PASSED test_suite.py::NumericTest_databricks+databricks::test_render_literal_numeric_asfloat PASSED Signed-off-by: Jesse Whitehouse --- src/databricks/sqlalchemy/requirements.py | 51 ++++++++++++++ src/databricks/sqlalchemy/test/test_suite.py | 72 -------------------- 2 files changed, 51 insertions(+), 72 deletions(-) diff --git a/src/databricks/sqlalchemy/requirements.py b/src/databricks/sqlalchemy/requirements.py index ba4149bc..e12041f4 100644 --- a/src/databricks/sqlalchemy/requirements.py +++ b/src/databricks/sqlalchemy/requirements.py @@ -77,5 +77,56 @@ def time_microseconds(self): """ return sqlalchemy.testing.exclusions.open() + + @property + def precision_generic_float_type(self): + """target backend will return native floating point numbers with at + least seven decimal places when using the generic Float type. + + Databricks sometimes only returns six digits of precision for the generic Float type + """ + return sqlalchemy.testing.exclusions.closed() + + @property + def literal_float_coercion(self): + """target backend will return the exact float value 15.7563 + with only four significant digits from this statement: + + SELECT :param + + where :param is the Python float 15.7563 + + i.e. it does not return 15.75629997253418 + + Without additional work, Databricks returns 15.75629997253418 + This is a potential area where we could override the Float literal processor. + Will leave to a PM to decide if we should do so. + """ + return sqlalchemy.testing.exclusions.closed() + + @property + def precision_numerics_enotation_large(self): + """target backend supports Decimal() objects using E notation + to represent very large values. + + Databricks supports E notation for FLOAT data types but not for DECIMAL types, + which is the underlying data type SQLAlchemy uses for Numeric() types. + + """ + return sqlalchemy.testing.exclusions.closed() + + @property + def infinity_floats(self): + """The Float type can persist and load float('inf'), float('-inf').""" + + return sqlalchemy.testing.exclusions.open() + + @property + def precision_numerics_retains_significant_digits(self): + """A precision numeric type will return empty significant digits, + i.e. a value such as 10.000 will come back in Decimal form with + the .000 maintained.""" + + return sqlalchemy.testing.exclusions.open() diff --git a/src/databricks/sqlalchemy/test/test_suite.py b/src/databricks/sqlalchemy/test/test_suite.py index cf5814b7..16b9ead8 100644 --- a/src/databricks/sqlalchemy/test/test_suite.py +++ b/src/databricks/sqlalchemy/test/test_suite.py @@ -151,78 +151,6 @@ def test_long_convention_name(self): """ -class NumericTest(NumericTest): - @pytest.mark.skip( - reason="Numeric implementation needs work. Rounding looks to be incorrect." - ) - def test_decimal_coerce_round_trip_w_cast(self): - """ - Exception: - AssertionError: Decimal('16') != Decimal('15.7563') - """ - - @pytest.mark.skip( - reason="Numeric implementation needs work. Rounding looks to be incorrect." - ) - def test_enotation_decimal(self): - """ - Exception: - AssertionError: {Decimal('0'), Decimal('1')} != {Decimal('0.70000000000696'), Decimal('1E-7'), Decimal('0.00001'), Decimal('6.96E-12'), Decimal('0.001'), Decimal('5.940696E-8'), Decimal('0.01000005940696'), Decimal('1E-8'), Decimal('0.01'), Decimal('0.000001'), Decimal('0.0001'), Decimal('6.96E-10')} - """ - - @pytest.mark.skip( - reason="Numeric implementation needs work. Rounding looks to be incorrect." - ) - def test_enotation_decimal_large(self): - """ - Exception: - sqlalchemy.exc.DatabaseError: (databricks.sql.exc.ServerOperationError) [CAST_OVERFLOW_IN_TABLE_INSERT] Fail to insert a value of "DOUBLE" type into the "DECIMAL(10,0)" type column `x` due to an overflow. Use `try_cast` on the input value to tolerate overflow and return NULL instead. - """ - - @pytest.mark.skip( - reason="Numeric implementation needs work. Rounding looks to be incorrect." - ) - def test_float_custom_scale(self): - """ - Exception: - AssertionError: {Decimal('15.7563829')} != {Decimal('15.7563827')} - """ - - @pytest.mark.skip( - reason="Numeric implementation needs work. Rounding looks to be incorrect." - ) - def test_many_significant_digits(self): - """ - Exception: - sqlalchemy.exc.DatabaseError: (databricks.sql.exc.ServerOperationError) [CAST_OVERFLOW_IN_TABLE_INSERT] Fail to insert a value of "DECIMAL(22,2)" type into the "DECIMAL(10,0)" type column `x` due to an overflow. Use `try_cast` on the input value to tolerate overflow and return NULL instead. - """ - - @pytest.mark.skip( - reason="Numeric implementation needs work. Rounding looks to be incorrect." - ) - def test_numeric_as_decimal(self): - """ - Exception: - AssertionError: {Decimal('16')} != {Decimal('15.7563')} - """ - - @pytest.mark.skip( - reason="Numeric implementation needs work. Rounding looks to be incorrect." - ) - def test_numeric_as_float(self): - """ - Exception: - AssertionError: {16.0} != {15.7563} - """ - - @pytest.mark.skip( - reason="Numeric implementation needs work. Rounding looks to be incorrect." - ) - def test_precision_decimal(self): - """ - Exception: - AssertionError: {Decimal('0'), Decimal('900'), Decimal('54')} != {Decimal('0.004354'), Decimal('900.0'), Decimal('54.234246451650')} - """ class RowFetchTest(RowFetchTest): From 4671ff90debae95c620db1ec25ad31fc60d88056 Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Sun, 8 Oct 2023 17:08:53 -0400 Subject: [PATCH 15/20] Stop skipping Boolean test These tests appear to be fixed by fixing our @compiles decorators test_suite.py::BooleanTest_databricks+databricks::test_null PASSED test_suite.py::BooleanTest_databricks+databricks::test_render_literal_bool PASSED test_suite.py::BooleanTest_databricks+databricks::test_round_trip PASSED test_suite.py::BooleanTest_databricks+databricks::test_whereclause PASSED Signed-off-by: Jesse Whitehouse --- src/databricks/sqlalchemy/test/test_suite.py | 35 -------------------- 1 file changed, 35 deletions(-) diff --git a/src/databricks/sqlalchemy/test/test_suite.py b/src/databricks/sqlalchemy/test/test_suite.py index 16b9ead8..2afea9dc 100644 --- a/src/databricks/sqlalchemy/test/test_suite.py +++ b/src/databricks/sqlalchemy/test/test_suite.py @@ -298,41 +298,6 @@ def test_numeric_reflection(self): """ -class BooleanTest(BooleanTest): - @pytest.mark.skip(reason="Boolean type needs work.") - def test_null(self): - """ - This failure appears to infrastructure based. Should attempt a re-run. - Exception: - urllib3.exceptions.ProtocolError: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')) - """ - pass - - @pytest.mark.skip(reason="Boolean type needs work.") - def test_render_literal_bool(self): - """ - Exception: - sqlalchemy.exc.RemovedIn20Warning: Deprecated API features detected! These feature(s) are not compatible with SQLAlchemy 2.0. To prevent incompatible upgrades prior to updating applications, ensure requirements files are pinned to "sqlalchemy<2.0". Set environment variable SQLALCHEMY_WARN_20=1 to show all deprecation warnings. Set environment variable SQLALCHEMY_SILENCE_UBER_WARNING=1 to silence this message. (Background on SQLAlchemy 2.0 at: https://sqlalche.me/e/b8d9) - _ ERROR at setup of BooleanTest_databricks+databricks.test_render_literal_bool _ - """ - pass - - @pytest.mark.skip(reason="Boolean type needs work.") - def test_round_trip(self): - """ - Exception: - urllib3.exceptions.ProtocolError: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')) - """ - pass - - @pytest.mark.skip(reason="Boolean type needs work.") - def test_whereclause(self): - """ - Exception: - sqlalchemy.exc.RemovedIn20Warning: Deprecated API features detected! These feature(s) are not compatible with SQLAlchemy 2.0. To prevent incompatible upgrades prior to updating applications, ensure requirements files are pinned to "sqlalchemy<2.0". Set environment variable SQLALCHEMY_WARN_20=1 to show all deprecation warnings. Set environment variable SQLALCHEMY_SILENCE_UBER_WARNING=1 to silence this message. (Background on SQLAlchemy 2.0 at: https://sqlalche.me/e/b8d9) - """ - pass - class DifficultParametersTest(DifficultParametersTest): @pytest.mark.skip(reason="Error during execution. Requires investigation.") From 77221c2f60ec5327a2662284be67d4173004d2e3 Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Sun, 8 Oct 2023 17:10:31 -0400 Subject: [PATCH 16/20] Fix: must add array_type exclusion to this file else the entire test suite fails. This appears to be a programming error in SQLAlchemy Signed-off-by: Jesse Whitehouse --- src/databricks/sqlalchemy/requirements.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/databricks/sqlalchemy/requirements.py b/src/databricks/sqlalchemy/requirements.py index e12041f4..c6877def 100644 --- a/src/databricks/sqlalchemy/requirements.py +++ b/src/databricks/sqlalchemy/requirements.py @@ -129,4 +129,9 @@ def precision_numerics_retains_significant_digits(self): return sqlalchemy.testing.exclusions.open() + @property + def array_type(self): + """While Databricks does support ARRAY types, pysql cannot bind them. So + we cannot use them with SQLAlchemy""" + return sqlalchemy.testing.exclusions.closed() From bc0495dfca0785935a7f9357a0cbb339eb0f1a78 Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Sun, 8 Oct 2023 17:10:44 -0400 Subject: [PATCH 17/20] Black all files changed in this PR Signed-off-by: Jesse Whitehouse --- src/databricks/sqlalchemy/__init__.py | 3 +- src/databricks/sqlalchemy/_types.py | 48 ++++++++++---------- src/databricks/sqlalchemy/requirements.py | 25 +++++----- src/databricks/sqlalchemy/test/test_suite.py | 4 -- 4 files changed, 38 insertions(+), 42 deletions(-) diff --git a/src/databricks/sqlalchemy/__init__.py b/src/databricks/sqlalchemy/__init__.py index a369a7d0..95b6c516 100644 --- a/src/databricks/sqlalchemy/__init__.py +++ b/src/databricks/sqlalchemy/__init__.py @@ -51,7 +51,7 @@ class DatabricksDialect(default.DefaultDialect): colspecs = { sqlalchemy.types.DateTime: dialect_type_impl.DatabricksDateTimeNoTimezoneType, sqlalchemy.types.Time: dialect_type_impl.DatabricksTimeType, - sqlalchemy.types.String: dialect_type_impl.DatabricksStringType + sqlalchemy.types.String: dialect_type_impl.DatabricksStringType, } @classmethod @@ -136,7 +136,6 @@ def get_columns(self, connection, table_name, schema=None, **kwargs): columns = [] for col in resp: - # Taken from PyHive. This removes added type info from decimals and maps _col_type = re.search(r"^\w+", col.TYPE_NAME).group(0) this_column = { diff --git a/src/databricks/sqlalchemy/_types.py b/src/databricks/sqlalchemy/_types.py index b4ddf6d0..3fdfef74 100644 --- a/src/databricks/sqlalchemy/_types.py +++ b/src/databricks/sqlalchemy/_types.py @@ -8,6 +8,7 @@ from databricks.sql.utils import ParamEscaper + @compiles(sqlalchemy.types.Enum, "databricks") @compiles(sqlalchemy.types.String, "databricks") @compiles(sqlalchemy.types.Text, "databricks") @@ -88,11 +89,11 @@ def compile_array_databricks(type_, compiler, **kw): class DatabricksDateTimeNoTimezoneType(sqlalchemy.types.TypeDecorator): """The decimal that pysql creates when it receives the contents of a TIMESTAMP_NTZ - includes a timezone of 'Etc/UTC'. But since SQLAlchemy's test suite assumes that - the sqlalchemy.types.DateTime type will return a datetime.datetime _without_ any - timezone set, we need to strip the timezone off the value received from pysql. + includes a timezone of 'Etc/UTC'. But since SQLAlchemy's test suite assumes that + the sqlalchemy.types.DateTime type will return a datetime.datetime _without_ any + timezone set, we need to strip the timezone off the value received from pysql. - It's not clear if DBR sends a timezone to pysql or if pysql is adding it. This could be a bug. + It's not clear if DBR sends a timezone to pysql or if pysql is adding it. This could be a bug. """ impl = sqlalchemy.types.DateTime @@ -103,10 +104,10 @@ def process_result_value(self, value: Union[None, datetime], dialect): if value is None: return None return value.replace(tzinfo=None) - + + class DatabricksTimeType(sqlalchemy.types.TypeDecorator): - """Databricks has no native TIME type. So we store it as a string. - """ + """Databricks has no native TIME type. So we store it as a string.""" impl = sqlalchemy.types.Time cache_ok = True @@ -115,12 +116,11 @@ class DatabricksTimeType(sqlalchemy.types.TypeDecorator): TIME_NO_MICROSECONDS_FMT = "%H:%M:%S" def process_bind_param(self, value: Union[datetime.time, None], dialect) -> str: - """Values sent to the database are converted to %:H:%M:%S strings. - """ + """Values sent to the database are converted to %:H:%M:%S strings.""" if value is None: return None return value.strftime(self.TIME_WITH_MICROSECONDS_FMT) - + def process_literal_param(self, value, dialect) -> datetime.time: """It's not clear to me why this is necessary. Without it, SQLAlchemy's Timetest:test_literal fails because the string literal renderer receives a str() object and calls .isoformat() on it. @@ -136,21 +136,22 @@ def process_literal_param(self, value, dialect) -> datetime.time: """ return value - - def process_result_value(self, value: Union[None, str], dialect) -> Union[datetime.time, None]: - """Values received from the database are parsed into datetime.time() objects - """ + def process_result_value( + self, value: Union[None, str], dialect + ) -> Union[datetime.time, None]: + """Values received from the database are parsed into datetime.time() objects""" if value is None: return None - + try: _parsed = datetime.strptime(value, self.TIME_WITH_MICROSECONDS_FMT) except ValueError: # If the string doesn't have microseconds, try parsing it without them _parsed = datetime.strptime(value, self.TIME_NO_MICROSECONDS_FMT) - + return _parsed.time() - + + class DatabricksStringType(sqlalchemy.types.TypeDecorator): """We have to implement our own String() type because SQLAlchemy's default implementation wants to escape single-quotes with a doubled single-quote. Databricks uses a backslash for @@ -160,18 +161,18 @@ class DatabricksStringType(sqlalchemy.types.TypeDecorator): impl = sqlalchemy.types.String cache_ok = True pe = ParamEscaper() - + def process_literal_param(self, value, dialect) -> str: """SQLAlchemy's default string escaping for backslashes doesn't work for databricks. The logic here implements the same logic as our legacy inline escaping logic. """ return self.pe.escape_string(value) - + def literal_processor(self, dialect): """We manually override this method to prevent further processing of the string literal beyond what happens in the process_literal_param() method. - + The SQLAlchemy docs _specifically_ say to not override this method. It appears that any processing that happens from TypeEngine.process_literal_param happens _before_ @@ -180,7 +181,7 @@ def literal_processor(self, dialect): error in Databricks. And it's not necessary because ParamEscaper() already implements all the escaping we need. We should consider opening an issue on the SQLAlchemy project to see if I'm using it wrong. - + See type_api.py::TypeEngine.literal_processor: ```python @@ -192,9 +193,10 @@ def process(value: Any) -> str: That call to fixed_impl_processor wraps the result of fixed_process_literal_param (which is the process_literal_param defined in our Databricks dialect) - + https://docs.sqlalchemy.org/en/20/core/custom_types.html#sqlalchemy.types.TypeDecorator.literal_processor """ + def process(value): """This is a copy of the default String.literal_processor() method but stripping away its double-escaping behaviour for single-quotes. @@ -208,4 +210,4 @@ def process(value): return "%s" % _step2 - return process \ No newline at end of file + return process diff --git a/src/databricks/sqlalchemy/requirements.py b/src/databricks/sqlalchemy/requirements.py index c6877def..9ae07572 100644 --- a/src/databricks/sqlalchemy/requirements.py +++ b/src/databricks/sqlalchemy/requirements.py @@ -31,21 +31,20 @@ def __some_example_requirement(self): class Requirements(sqlalchemy.testing.requirements.SuiteRequirements): - @property def date_historic(self): """target dialect supports representation of Python datetime.datetime() objects with historic (pre 1970) values.""" return sqlalchemy.testing.exclusions.open() - + @property def datetime_historic(self): """target dialect supports representation of Python datetime.datetime() objects with historic (pre 1970) values.""" return sqlalchemy.testing.exclusions.open() - + @property def datetime_literals(self): """target dialect supports rendering of a date, time, or datetime as a @@ -54,7 +53,7 @@ def datetime_literals(self): """ return sqlalchemy.testing.exclusions.open() - + @property def timestamp_microseconds(self): """target dialect supports representation of Python @@ -62,22 +61,22 @@ def timestamp_microseconds(self): if TIMESTAMP is used.""" return sqlalchemy.testing.exclusions.open() - + @property def time_microseconds(self): """target dialect supports representation of Python datetime.time() with microsecond objects. - + This requirement declaration isn't needed but I've included it here for completeness. Since Databricks doesn't have a TIME type, SQLAlchemy will compile Time() columns as STRING Databricks data types. And we use a custom time type to render those strings between str() and time.time() representations. Therefore we can store _any_ precision that SQLAlchemy needs. The time_microseconds requirement defaults to ON for all dialects - except mssql, mysql, mariadb, and oracle. + except mssql, mysql, mariadb, and oracle. """ return sqlalchemy.testing.exclusions.open() - + @property def precision_generic_float_type(self): """target backend will return native floating point numbers with at @@ -86,7 +85,7 @@ def precision_generic_float_type(self): Databricks sometimes only returns six digits of precision for the generic Float type """ return sqlalchemy.testing.exclusions.closed() - + @property def literal_float_coercion(self): """target backend will return the exact float value 15.7563 @@ -103,24 +102,24 @@ def literal_float_coercion(self): Will leave to a PM to decide if we should do so. """ return sqlalchemy.testing.exclusions.closed() - + @property def precision_numerics_enotation_large(self): """target backend supports Decimal() objects using E notation to represent very large values. - + Databricks supports E notation for FLOAT data types but not for DECIMAL types, which is the underlying data type SQLAlchemy uses for Numeric() types. """ return sqlalchemy.testing.exclusions.closed() - + @property def infinity_floats(self): """The Float type can persist and load float('inf'), float('-inf').""" return sqlalchemy.testing.exclusions.open() - + @property def precision_numerics_retains_significant_digits(self): """A precision numeric type will return empty significant digits, diff --git a/src/databricks/sqlalchemy/test/test_suite.py b/src/databricks/sqlalchemy/test/test_suite.py index 2afea9dc..055720ac 100644 --- a/src/databricks/sqlalchemy/test/test_suite.py +++ b/src/databricks/sqlalchemy/test/test_suite.py @@ -151,8 +151,6 @@ def test_long_convention_name(self): """ - - class RowFetchTest(RowFetchTest): @pytest.mark.skip( reason="Date type implementation needs work. Timezone information not preserved." @@ -164,7 +162,6 @@ def test_row_w_scalar_select(self): """ - class ExceptionTest(ExceptionTest): @pytest.mark.skip(reason="Databricks may not support this method.") def test_integrity_error(self): @@ -298,7 +295,6 @@ def test_numeric_reflection(self): """ - class DifficultParametersTest(DifficultParametersTest): @pytest.mark.skip(reason="Error during execution. Requires investigation.") def test_round_trip_same_named_column(self): From e2330580385174a1cb6268fbf356bed24ff82756 Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Sun, 8 Oct 2023 17:13:52 -0400 Subject: [PATCH 18/20] Update test running instructions Signed-off-by: Jesse Whitehouse --- CONTRIBUTING.md | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 4efaba0c..f4ec73e7 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -148,16 +148,14 @@ The suites marked `[not documented]` require additional configuration which will SQLAlchemy provides reusable tests for testing dialect implementations. -To run these tests, assuming the environment variables needed for e2e tests are set, do the following: - ``` -cd src/databricks/sqlalchemy -poetry run python -m pytest test/sqlalchemy_dialect_compliance.py --dburi \ +poetry shell +cd src/databricks/sqlalchemy/test +python -m pytest test_suite.py --dburi \ "databricks://token:$access_token@$host?http_path=$http_path&catalog=$catalog&schema=$schema" ``` -Some of these of these tests fail currently. We're working on getting -relavent tests passing and others skipped. +Some of these of these tests fail currently. We're working on getting relevant tests passing and others skipped. ### Code formatting From 214523bdec4b657dd90a05885497ac3f266e202a Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Sun, 8 Oct 2023 17:30:54 -0400 Subject: [PATCH 19/20] Remove outdated comment now that we better understand how to use this file Signed-off-by: Jesse Whitehouse --- src/databricks/sqlalchemy/requirements.py | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/src/databricks/sqlalchemy/requirements.py b/src/databricks/sqlalchemy/requirements.py index 9ae07572..0e3d2ffb 100644 --- a/src/databricks/sqlalchemy/requirements.py +++ b/src/databricks/sqlalchemy/requirements.py @@ -1,20 +1,4 @@ """ -This module is supposedly used by the compliance tests to control which tests are run based on database capabilities. -However, based on some experimentation that does not appear to be consistently the case. Until we better understand -when these requirements are and are not implemented, we prefer to manually capture the exact nature of the failures -and errors. - -Once we better understand how to use requirements.py, an example exclusion will look like this: - - import sqlalchemy.testing.requirements - import sqlalchemy.testing.exclusions - - class Requirements(sqlalchemy.testing.requirements.SuiteRequirements): - @property - def __some_example_requirement(self): - return sqlalchemy.testing.exclusions.closed - - The complete list of requirements is provided by SQLAlchemy here: https://github.com/sqlalchemy/sqlalchemy/blob/main/lib/sqlalchemy/testing/requirements.py @@ -23,13 +7,6 @@ def __some_example_requirement(self): import sqlalchemy.testing.requirements import sqlalchemy.testing.exclusions -import logging - -logger = logging.getLogger(__name__) - -logger.warning("requirements.py is not currently employed by Databricks dialect") - - class Requirements(sqlalchemy.testing.requirements.SuiteRequirements): @property def date_historic(self): From 7b7fe8259b3ff9138651919bf272037de234474b Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Sun, 8 Oct 2023 18:26:57 -0400 Subject: [PATCH 20/20] DRAFT: This scaffolds in the code for running parallel dialect compliance tests. Setting this up isn't documented anywhere and the implementation here doesn't actually work because the test runner keeps trying to create tables within the catalog.schema in the connection string. Need to research more about why this happens. But wanted to stash this code for now as we'll want to address this for faster compliance test runs in the future. Signed-off-by: Jesse Whitehouse --- src/databricks/sqlalchemy/__init__.py | 7 +++++++ src/databricks/sqlalchemy/provision.py | 12 ++++++++++++ src/databricks/sqlalchemy/setup.cfg | 3 +++ 3 files changed, 22 insertions(+) create mode 100644 src/databricks/sqlalchemy/provision.py diff --git a/src/databricks/sqlalchemy/__init__.py b/src/databricks/sqlalchemy/__init__.py index 95b6c516..f701d8a8 100644 --- a/src/databricks/sqlalchemy/__init__.py +++ b/src/databricks/sqlalchemy/__init__.py @@ -282,6 +282,13 @@ def get_schema_names(self, connection, **kw): # TODO: replace with call to cursor.schemas() once its performance matches raw SQL return [row[0] for row in connection.execute("SHOW SCHEMAS")] + + @classmethod + def load_provisioning(cls): + try: + __import__("databricks.sqlalchemy.provision") + except ImportError: + pass @event.listens_for(Engine, "do_connect") diff --git a/src/databricks/sqlalchemy/provision.py b/src/databricks/sqlalchemy/provision.py new file mode 100644 index 00000000..55e9f3a0 --- /dev/null +++ b/src/databricks/sqlalchemy/provision.py @@ -0,0 +1,12 @@ +from sqlalchemy.testing.provision import create_db, drop_db + +@create_db.for_db("databricks") +def _databricks_create_db(cfg, eng, ident): + with eng.begin() as conn: + create_string = "CREATE SCHEMA `main`.`%s`" % ident + conn.exec_driver_sql(create_string) + +@drop_db.for_db("databricks") +def _databricks_drop_db(cfg, eng, ident): + with eng.begin() as conn: + conn.exec_driver_sql("DROP SCHEMA `main`.`%s`" % ident) \ No newline at end of file diff --git a/src/databricks/sqlalchemy/setup.cfg b/src/databricks/sqlalchemy/setup.cfg index ab89d17d..81c7095e 100644 --- a/src/databricks/sqlalchemy/setup.cfg +++ b/src/databricks/sqlalchemy/setup.cfg @@ -2,3 +2,6 @@ [sqla_testing] requirement_cls=databricks.sqlalchemy.requirements:Requirements profile_file=profiles.txt + +[db] +databricks= \ No newline at end of file