Skip to content

Commit

Permalink
fix: Fix should_rechunk check (#16852)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 authored Jun 10, 2024
1 parent c9cad0a commit 3fe4cfe
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 0 deletions.
7 changes: 7 additions & 0 deletions crates/polars-core/src/frame/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -464,6 +464,13 @@ impl DataFrame {

/// Returns true if the chunks of the columns do not align and re-chunking should be done
pub fn should_rechunk(&self) -> bool {
// Fast check. It is also needed for correctness, as code below doesn't check if the number
// of chunks is equal.
if !self.get_columns().iter().map(|s| s.n_chunks()).all_equal() {
return true;
}

// From here we check chunk lengths.
let mut chunk_lengths = self.columns.iter().map(|s| s.chunk_lengths());
match chunk_lengths.next() {
None => false,
Expand Down
16 changes: 16 additions & 0 deletions py-polars/tests/unit/test_chunks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import numpy as np

import polars as pl


def test_chunks_align_16830() -> None:
n = 2
df = pl.DataFrame(
{"index_1": np.repeat(np.arange(10), n), "index_2": np.repeat(np.arange(10), n)}
)
df = pl.concat([df[0:10], df[10:]], rechunk=False)
df = df.filter(df["index_1"] == 0) # filter chunks
df = df.with_columns(
index_2=pl.Series(values=[0] * n)
) # set a chunk of different size
df.set_sorted("index_2") # triggers `select_chunk`.

0 comments on commit 3fe4cfe

Please sign in to comment.