Skip to content

Commit

Permalink
Add arrow::compute::take_arrays
Browse files Browse the repository at this point in the history
Adds a modified version of the take_arrays function from upstream arrow-rs.
  • Loading branch information
srh authored Nov 7, 2024
1 parent 1963799 commit ea7d119
Showing 1 changed file with 68 additions and 0 deletions.
68 changes: 68 additions & 0 deletions arrow/src/compute/kernels/take.rs
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,74 @@ where
}
}

// From upstream arrow-rs. But upstream had `indices: &dyn Array` -- we have the generic parameter
// IndexType and use `indices: &PrimitiveArray<IndexType>` in order to work with the existing `take`
// function.
/// For each [ArrayRef] in the [`Vec<ArrayRef>`], take elements by index and create a new
/// [`Vec<ArrayRef>`] from those indices.
///
/// ```text
/// ┌────────┬────────┐
/// │ │ │ ┌────────┐ ┌────────┬────────┐
/// │ A │ 1 │ │ │ │ │ │
/// ├────────┼────────┤ │ 0 │ │ A │ 1 │
/// │ │ │ ├────────┤ ├────────┼────────┤
/// │ D │ 4 │ │ │ │ │ │
/// ├────────┼────────┤ │ 2 │ take_arrays(values,indices) │ B │ 2 │
/// │ │ │ ├────────┤ ├────────┼────────┤
/// │ B │ 2 │ │ │ ───────────────────────────► │ │ │
/// ├────────┼────────┤ │ 3 │ │ C │ 3 │
/// │ │ │ ├────────┤ ├────────┼────────┤
/// │ C │ 3 │ │ │ │ │ │
/// ├────────┼────────┤ │ 1 │ │ D │ 4 │
/// │ │ │ └────────┘ └────────┼────────┘
/// │ E │ 5 │
/// └────────┴────────┘
/// values arrays indices array result
/// ```
///
/// # Errors
/// This function errors whenever:
/// * An index cannot be casted to `usize` (typically 32 bit architectures)
/// * An index is out of bounds and `options` is set to check bounds.
///
/// # Safety
///
/// When `options` is not set to check bounds, taking indexes after `len` will panic.
///
/// # Examples
/// ```
/// /* Commented out so doc-comment compilation checks pass (as this uses upstream stuff like cast::AsArray).
/// # use std::sync::Arc;
/// # use arrow_array::{StringArray, UInt32Array, cast::AsArray};
/// # use arrow_select::take::{take, take_arrays};
/// let string_values = Arc::new(StringArray::from(vec!["zero", "one", "two"]));
/// let values = Arc::new(UInt32Array::from(vec![0, 1, 2]));
///
/// // Take items at index 2, and 1:
/// let indices = UInt32Array::from(vec![2, 1]);
/// let taken_arrays = take_arrays(&[string_values, values], &indices, None).unwrap();
/// let taken_string = taken_arrays[0].as_string::<i32>();
/// assert_eq!(*taken_string, StringArray::from(vec!["two", "one"]));
/// let taken_values = taken_arrays[1].as_primitive();
/// assert_eq!(*taken_values, UInt32Array::from(vec![2, 1]));
/// */
/// ```
pub fn take_arrays<IndexType>(
arrays: &[ArrayRef],
indices: &PrimitiveArray<IndexType>,
options: Option<TakeOptions>,
) -> std::result::Result<Vec<ArrayRef>, ArrowError>
where
IndexType: ArrowNumericType,
IndexType::Native: num::ToPrimitive,
{
arrays
.iter()
.map(|array| take(array.as_ref(), indices, options.clone()))
.collect()
}

/// Options that define how `take` should behave
#[derive(Clone, Debug)]
pub struct TakeOptions {
Expand Down

0 comments on commit ea7d119

Please sign in to comment.