From 85b0d936c8d4f58d5c3d9ee4194a1540d4ac42a5 Mon Sep 17 00:00:00 2001
From: "R. Tyler Croy" <rtyler@brokenco.de>
Date: Mon, 30 Dec 2024 22:04:42 +0000
Subject: [PATCH] chore: expand the arrow version range to allow arrow v54

This was released last week and in preliminary testing it appears safe
to incorporate for the latest version range

Note: Field.dict_id() is going away, and where it is being used for
field comparisons I don't believe it adds substantial value. There's no
replacement for it in Arrow 54+

Signed-off-by: R. Tyler Croy <rtyler@brokenco.de>
---
 Cargo.toml                    | 26 +++++++++++++++-----------
 acceptance/src/data.rs        |  6 +-----
 ffi/Cargo.toml                |  6 +++---
 kernel/tests/golden_tables.rs |  6 +-----
 4 files changed, 20 insertions(+), 24 deletions(-)
diff --git a/Cargo.toml b/Cargo.toml
index ead0064ca..6cb2c998a 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -22,17 +22,21 @@ rust-version = "1.80"
 version = "0.6.0"
 
 [workspace.dependencies]
-arrow = { version = ">=53, <54" }
-arrow-arith = { version = ">=53, <54" }
-arrow-array = { version = ">=53, <54" }
-arrow-buffer = { version = ">=53, <54" }
-arrow-cast = { version = ">=53, <54" }
-arrow-data = { version = ">=53, <54" }
-arrow-ord = { version = ">=53, <54" }
-arrow-json = { version = ">=53, <54" }
-arrow-select = { version = ">=53, <54" }
-arrow-schema = { version = ">=53, <54" }
-parquet = { version = ">=53, <54", features = ["object_store"] }
+# When changing the arrow version range, also modify ffi/Cargo.toml which has
+# its own arrow version ranges witeh modified features. Failure to do so will
+# result in compilation errors as two different sets of arrow dependencies may
+# be sourced
+arrow = { version = ">=53, <55" }
+arrow-arith = { version = ">=53, <55" }
+arrow-array = { version = ">=53, <55" }
+arrow-buffer = { version = ">=53, <55" }
+arrow-cast = { version = ">=53, <55" }
+arrow-data = { version = ">=53, <55" }
+arrow-ord = { version = ">=53, <55" }
+arrow-json = { version = ">=53, <55" }
+arrow-select = { version = ">=53, <55" }
+arrow-schema = { version = ">=53, <55" }
+parquet = { version = ">=53, <55", features = ["object_store"] }
 object_store = { version = ">=0.11, <0.12" }
 hdfs-native-object-store = "0.12.0"
 hdfs-native = "0.10.0"
diff --git a/acceptance/src/data.rs b/acceptance/src/data.rs
index 9832ac8a4..c515d50c9 100644
--- a/acceptance/src/data.rs
+++ b/acceptance/src/data.rs
@@ -61,7 +61,7 @@ pub fn sort_record_batch(batch: RecordBatch) -> DeltaResult<RecordBatch> {
     Ok(RecordBatch::try_new(batch.schema(), columns)?)
 }
 
-// Ensure that two schema have the same field names, and dict_id/ordering.
+// Ensure that two schema have the same field names, and dict_is_ordered
 // We ignore:
 //  - data type: This is checked already in `assert_columns_match`
 //  - nullability: parquet marks many things as nullable that we don't in our schema
@@ -72,10 +72,6 @@ fn assert_schema_fields_match(schema: &Schema, golden: &Schema) {
             schema_field.name() == golden_field.name(),
             "Field names don't match"
         );
-        assert!(
-            schema_field.dict_id() == golden_field.dict_id(),
-            "Field dict_id doesn't match"
-        );
         assert!(
             schema_field.dict_is_ordered() == golden_field.dict_is_ordered(),
             "Field dict_is_ordered doesn't match"
diff --git a/ffi/Cargo.toml b/ffi/Cargo.toml
index 08162a505..25897d965 100644
--- a/ffi/Cargo.toml
+++ b/ffi/Cargo.toml
@@ -24,13 +24,13 @@ delta_kernel = { path = "../kernel", default-features = false, features = [
 delta_kernel_ffi_macros = { path = "../ffi-proc-macros", version = "0.6.0" }
 
 # used if we use the default engine to be able to move arrow data into the c-ffi format
-arrow-schema = { version = "53.0", default-features = false, features = [
+arrow-schema = { version = ">=53, <55", default-features = false, features = [
   "ffi",
 ], optional = true }
-arrow-data = { version = "53.0", default-features = false, features = [
+arrow-data = { version = ">=53, <55", default-features = false, features = [
   "ffi",
 ], optional = true }
-arrow-array = { version = "53.0", default-features = false, optional = true }
+arrow-array = { version = ">=53, <55", default-features = false, optional = true }
 
 [build-dependencies]
 cbindgen = "0.27.0"
diff --git a/kernel/tests/golden_tables.rs b/kernel/tests/golden_tables.rs
index 1d0c8406b..cd9023db1 100644
--- a/kernel/tests/golden_tables.rs
+++ b/kernel/tests/golden_tables.rs
@@ -89,7 +89,7 @@ fn sort_record_batch(batch: RecordBatch) -> DeltaResult<RecordBatch> {
     Ok(RecordBatch::try_new(batch.schema(), columns)?)
 }
 
-// Ensure that two sets of  fields have the same names, and dict_id/ordering.
+// Ensure that two sets of  fields have the same names, and dict_is_ordered
 // We ignore:
 //  - data type: This is checked already in `assert_columns_match`
 //  - nullability: parquet marks many things as nullable that we don't in our schema
@@ -103,10 +103,6 @@ fn assert_fields_match<'a>(
             actual_field.name() == expected_field.name(),
             "Field names don't match"
         );
-        assert!(
-            actual_field.dict_id() == expected_field.dict_id(),
-            "Field dict_id doesn't match"
-        );
         assert!(
             actual_field.dict_is_ordered() == expected_field.dict_is_ordered(),
             "Field dict_is_ordered doesn't match"