Skip to content

Commit

Permalink
fix: Soundness when loading Parquet string statistics
Browse files Browse the repository at this point in the history
Fixes #20533.
  • Loading branch information
coastalwhite committed Jan 7, 2025
1 parent f104170 commit eba9a18
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 3 deletions.
11 changes: 10 additions & 1 deletion crates/polars-arrow/src/array/binview/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,9 @@ pub trait ViewType: Sealed + 'static + PartialEq + AsRef<Self> {
type Owned: Debug + Clone + Sync + Send + AsRef<Self>;

/// # Safety
/// The caller must ensure `index < self.len()`.
/// The caller must ensure that `slice` is a valid view.
unsafe fn from_bytes_unchecked(slice: &[u8]) -> &Self;
fn from_bytes(slice: &[u8]) -> Option<&Self>;

fn to_bytes(&self) -> &[u8];

Expand All @@ -70,6 +71,10 @@ impl ViewType for str {
unsafe fn from_bytes_unchecked(slice: &[u8]) -> &Self {
std::str::from_utf8_unchecked(slice)
}
#[inline(always)]
fn from_bytes(slice: &[u8]) -> Option<&Self> {
std::str::from_utf8(slice).ok()
}

#[inline(always)]
fn to_bytes(&self) -> &[u8] {
Expand All @@ -93,6 +98,10 @@ impl ViewType for [u8] {
unsafe fn from_bytes_unchecked(slice: &[u8]) -> &Self {
slice
}
#[inline(always)]
fn from_bytes(slice: &[u8]) -> Option<&Self> {
Some(slice)
}

#[inline(always)]
fn to_bytes(&self) -> &[u8] {
Expand Down
4 changes: 2 additions & 2 deletions crates/polars-parquet/src/arrow/read/statistics/binview.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,11 @@ pub(super) fn push<T: ViewType + ?Sized>(

min.push(from.and_then(|s| {
let opt_b = s.min_value.as_deref();
unsafe { opt_b.map(|b| T::from_bytes_unchecked(b)) }
opt_b.and_then(T::from_bytes)
}));
max.push(from.and_then(|s| {
let opt_b = s.max_value.as_deref();
unsafe { opt_b.map(|b| T::from_bytes_unchecked(b)) }
opt_b.and_then(T::from_bytes)
}));

Ok(())
Expand Down

0 comments on commit eba9a18

Please sign in to comment.