From 85b0d936c8d4f58d5c3d9ee4194a1540d4ac42a5 Mon Sep 17 00:00:00 2001 From: "R. Tyler Croy" Date: Mon, 30 Dec 2024 22:04:42 +0000 Subject: [PATCH] chore: expand the arrow version range to allow arrow v54 This was released last week and in preliminary testing it appears safe to incorporate for the latest version range Note: Field.dict_id() is going away, and where it is being used for field comparisons I don't believe it adds substantial value. There's no replacement for it in Arrow 54+ Signed-off-by: R. Tyler Croy --- Cargo.toml | 26 +++++++++++++++----------- acceptance/src/data.rs | 6 +----- ffi/Cargo.toml | 6 +++--- kernel/tests/golden_tables.rs | 6 +----- 4 files changed, 20 insertions(+), 24 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index ead0064ca..6cb2c998a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,17 +22,21 @@ rust-version = "1.80" version = "0.6.0" [workspace.dependencies] -arrow = { version = ">=53, <54" } -arrow-arith = { version = ">=53, <54" } -arrow-array = { version = ">=53, <54" } -arrow-buffer = { version = ">=53, <54" } -arrow-cast = { version = ">=53, <54" } -arrow-data = { version = ">=53, <54" } -arrow-ord = { version = ">=53, <54" } -arrow-json = { version = ">=53, <54" } -arrow-select = { version = ">=53, <54" } -arrow-schema = { version = ">=53, <54" } -parquet = { version = ">=53, <54", features = ["object_store"] } +# When changing the arrow version range, also modify ffi/Cargo.toml which has +# its own arrow version ranges witeh modified features. Failure to do so will +# result in compilation errors as two different sets of arrow dependencies may +# be sourced +arrow = { version = ">=53, <55" } +arrow-arith = { version = ">=53, <55" } +arrow-array = { version = ">=53, <55" } +arrow-buffer = { version = ">=53, <55" } +arrow-cast = { version = ">=53, <55" } +arrow-data = { version = ">=53, <55" } +arrow-ord = { version = ">=53, <55" } +arrow-json = { version = ">=53, <55" } +arrow-select = { version = ">=53, <55" } +arrow-schema = { version = ">=53, <55" } +parquet = { version = ">=53, <55", features = ["object_store"] } object_store = { version = ">=0.11, <0.12" } hdfs-native-object-store = "0.12.0" hdfs-native = "0.10.0" diff --git a/acceptance/src/data.rs b/acceptance/src/data.rs index 9832ac8a4..c515d50c9 100644 --- a/acceptance/src/data.rs +++ b/acceptance/src/data.rs @@ -61,7 +61,7 @@ pub fn sort_record_batch(batch: RecordBatch) -> DeltaResult { Ok(RecordBatch::try_new(batch.schema(), columns)?) } -// Ensure that two schema have the same field names, and dict_id/ordering. +// Ensure that two schema have the same field names, and dict_is_ordered // We ignore: // - data type: This is checked already in `assert_columns_match` // - nullability: parquet marks many things as nullable that we don't in our schema @@ -72,10 +72,6 @@ fn assert_schema_fields_match(schema: &Schema, golden: &Schema) { schema_field.name() == golden_field.name(), "Field names don't match" ); - assert!( - schema_field.dict_id() == golden_field.dict_id(), - "Field dict_id doesn't match" - ); assert!( schema_field.dict_is_ordered() == golden_field.dict_is_ordered(), "Field dict_is_ordered doesn't match" diff --git a/ffi/Cargo.toml b/ffi/Cargo.toml index 08162a505..25897d965 100644 --- a/ffi/Cargo.toml +++ b/ffi/Cargo.toml @@ -24,13 +24,13 @@ delta_kernel = { path = "../kernel", default-features = false, features = [ delta_kernel_ffi_macros = { path = "../ffi-proc-macros", version = "0.6.0" } # used if we use the default engine to be able to move arrow data into the c-ffi format -arrow-schema = { version = "53.0", default-features = false, features = [ +arrow-schema = { version = ">=53, <55", default-features = false, features = [ "ffi", ], optional = true } -arrow-data = { version = "53.0", default-features = false, features = [ +arrow-data = { version = ">=53, <55", default-features = false, features = [ "ffi", ], optional = true } -arrow-array = { version = "53.0", default-features = false, optional = true } +arrow-array = { version = ">=53, <55", default-features = false, optional = true } [build-dependencies] cbindgen = "0.27.0" diff --git a/kernel/tests/golden_tables.rs b/kernel/tests/golden_tables.rs index 1d0c8406b..cd9023db1 100644 --- a/kernel/tests/golden_tables.rs +++ b/kernel/tests/golden_tables.rs @@ -89,7 +89,7 @@ fn sort_record_batch(batch: RecordBatch) -> DeltaResult { Ok(RecordBatch::try_new(batch.schema(), columns)?) } -// Ensure that two sets of fields have the same names, and dict_id/ordering. +// Ensure that two sets of fields have the same names, and dict_is_ordered // We ignore: // - data type: This is checked already in `assert_columns_match` // - nullability: parquet marks many things as nullable that we don't in our schema @@ -103,10 +103,6 @@ fn assert_fields_match<'a>( actual_field.name() == expected_field.name(), "Field names don't match" ); - assert!( - actual_field.dict_id() == expected_field.dict_id(), - "Field dict_id doesn't match" - ); assert!( actual_field.dict_is_ordered() == expected_field.dict_is_ordered(), "Field dict_is_ordered doesn't match"