From 8fe999224e5d6b5403c8cd2fc81c0066314e90f5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andre=20Anast=C3=A1cio?= <andreluisanastacio@gmail.com>
Date: Wed, 15 Jan 2025 17:06:36 -0300
Subject: [PATCH] Rewrite tests

---
 dev/provision.py                              | 24 ------------
 tests/conftest.py                             |  4 +-
 .../integration/test_statistics_operations.py | 37 +++++++++++++++----
 3 files changed, 31 insertions(+), 34 deletions(-)

diff --git a/dev/provision.py b/dev/provision.py
index a4dd213e7d..b358da6593 100644
--- a/dev/provision.py
+++ b/dev/provision.py
@@ -401,27 +401,3 @@
     )
     spark.sql(f"ALTER TABLE {catalog_name}.default.test_empty_scan_ordered_str WRITE ORDERED BY id")
     spark.sql(f"INSERT INTO {catalog_name}.default.test_empty_scan_ordered_str VALUES 'a', 'c'")
-
-    spark.sql(
-        f"""
-        CREATE OR REPLACE TABLE {catalog_name}.default.test_table_statistics_operations (
-            number integer
-        )
-        USING iceberg
-        TBLPROPERTIES (
-            'format-version'='2'
-        );
-        """
-    )
-    spark.sql(
-        f"""
-        INSERT INTO {catalog_name}.default.test_table_statistics_operations
-        VALUES (1)
-        """
-    )
-    spark.sql(
-        f"""
-        INSERT INTO {catalog_name}.default.test_table_statistics_operations
-        VALUES (2)
-        """
-    )
diff --git a/tests/conftest.py b/tests/conftest.py
index 453c62cbe7..c8dde01563 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1010,7 +1010,7 @@ def generate_snapshot(
             "file-footer-size-in-bytes": 42,
             "blob-metadata": [
                 {
-                    "type": "ndv",
+                    "type": "apache-datasketches-theta-v1",
                     "snapshot-id": 3051729675574597004,
                     "sequence-number": 1,
                     "fields": [1],
@@ -1024,7 +1024,7 @@ def generate_snapshot(
             "file-footer-size-in-bytes": 42,
             "blob-metadata": [
                 {
-                    "type": "ndv",
+                    "type": "deletion-vector-v1",
                     "snapshot-id": 3055729675574597004,
                     "sequence-number": 1,
                     "fields": [1],
diff --git a/tests/integration/test_statistics_operations.py b/tests/integration/test_statistics_operations.py
index de9a8e691f..361bfebb63 100644
--- a/tests/integration/test_statistics_operations.py
+++ b/tests/integration/test_statistics_operations.py
@@ -14,24 +14,45 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
+from typing import TYPE_CHECKING
+
 import pytest
 
-from pyiceberg.catalog import Catalog
+from pyiceberg.exceptions import NoSuchTableError
 from pyiceberg.table.statistics import BlobMetadata, StatisticsFile
 
+if TYPE_CHECKING:
+    import pyarrow as pa
+
+    from pyiceberg.catalog import Catalog
+    from pyiceberg.schema import Schema
+    from pyiceberg.table import Table
+
+
+def _create_table_with_schema(catalog: "Catalog", schema: "Schema") -> "Table":
+    tbl_name = "default.test_table_statistics_operations"
+
+    try:
+        catalog.drop_table(tbl_name)
+    except NoSuchTableError:
+        pass
+    return catalog.create_table(identifier=tbl_name, schema=schema)
+
 
 @pytest.mark.integration
 @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
-def test_manage_statistics(catalog: Catalog) -> None:
-    identifier = "default.test_table_statistics_operations"
-    tbl = catalog.load_table(identifier)
+def test_manage_statistics(catalog: "Catalog", arrow_table_with_null: "pa.Table") -> None:
+    tbl = _create_table_with_schema(catalog, arrow_table_with_null.schema)
+
+    tbl.append(arrow_table_with_null)
+    tbl.append(arrow_table_with_null)
 
     add_snapshot_id_1 = tbl.history()[0].snapshot_id
     add_snapshot_id_2 = tbl.history()[1].snapshot_id
 
-    def create_statistics_file(snapshot_id: int) -> StatisticsFile:
+    def create_statistics_file(snapshot_id: int, type_name: str) -> StatisticsFile:
         blob_metadata = BlobMetadata(
-            type="boring-type",
+            type=type_name,
             snapshot_id=snapshot_id,
             sequence_number=2,
             fields=[1],
@@ -48,8 +69,8 @@ def create_statistics_file(snapshot_id: int) -> StatisticsFile:
 
         return statistics_file
 
-    statistics_file_snap_1 = create_statistics_file(add_snapshot_id_1)
-    statistics_file_snap_2 = create_statistics_file(add_snapshot_id_2)
+    statistics_file_snap_1 = create_statistics_file(add_snapshot_id_1, "apache-datasketches-theta-v1")
+    statistics_file_snap_2 = create_statistics_file(add_snapshot_id_2, "deletion-vector-v1")
 
     with tbl.update_statistics() as update:
         update.set_statistics(add_snapshot_id_1, statistics_file_snap_1)