diff --git a/Cargo.lock b/Cargo.lock index d5a23d1a340e..dd3943ce8c89 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -963,15 +963,6 @@ dependencies = [ "error-code", ] -[[package]] -name = "cmake" -version = "0.1.52" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c682c223677e0e5b6b7f63a64b9351844c3f1b1678a68b7ee617e30fb082620e" -dependencies = [ - "cc", -] - [[package]] name = "comfy-table" version = "7.1.3" @@ -1412,7 +1403,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c936bfdafb507ebbf50b8074c54fa31c5be9a1e7e5f467dd659697041407d07c" dependencies = [ "crc32fast", - "libz-ng-sys", + "libz-rs-sys", "miniz_oxide", ] @@ -2302,13 +2293,12 @@ dependencies = [ ] [[package]] -name = "libz-ng-sys" -version = "1.1.20" +name = "libz-rs-sys" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f0f7295a34685977acb2e8cc8b08ee4a8dffd6cf278eeccddbe1ed55ba815d5" +checksum = "a90e19106f1b2c93f1fa6cdeec2e56facbf2e403559c1e1c0ddcc6d46e979cdf" dependencies = [ - "cmake", - "libc", + "zlib-rs", ] [[package]] @@ -5656,6 +5646,12 @@ dependencies = [ "syn 2.0.94", ] +[[package]] +name = "zlib-rs" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aada01553a9312bad4b9569035a1f12b05e5ec9770a1a4b323757356928944f8" + [[package]] name = "zstd" version = "0.13.2" diff --git a/crates/Makefile b/crates/Makefile index da0cea37d6cf..788d1e37edd1 100644 --- a/crates/Makefile +++ b/crates/Makefile @@ -138,7 +138,6 @@ check-wasm: ## Check wasm build without supported features --exclude-features azure \ --exclude-features cloud \ --exclude-features decompress \ - --exclude-features decompress-fast \ --exclude-features default \ --exclude-features docs-selection \ --exclude-features extract_jsonpath \ diff --git a/crates/polars-io/Cargo.toml b/crates/polars-io/Cargo.toml index 4c3ed4b04a76..e06a7aad1b8a 100644 --- a/crates/polars-io/Cargo.toml +++ b/crates/polars-io/Cargo.toml @@ -76,8 +76,7 @@ ipc_streaming = ["arrow/io_ipc", "arrow/io_ipc_compression"] # support for arrow avro parsing avro = ["arrow/io_avro", "arrow/io_avro_compression"] csv = ["atoi_simd", "polars-core/rows", "itoa", "ryu", "fast-float2", "simdutf8"] -decompress = ["flate2/rust_backend", "zstd"] -decompress-fast = ["flate2/zlib-ng", "zstd"] +decompress = ["flate2/zlib-rs", "zstd"] dtype-u8 = ["polars-core/dtype-u8"] dtype-u16 = ["polars-core/dtype-u16"] dtype-i8 = ["polars-core/dtype-i8"] diff --git a/crates/polars-io/src/csv/read/read_impl.rs b/crates/polars-io/src/csv/read/read_impl.rs index 763873667345..7bc023acc83c 100644 --- a/crates/polars-io/src/csv/read/read_impl.rs +++ b/crates/polars-io/src/csv/read/read_impl.rs @@ -19,13 +19,12 @@ use super::parser::{ }; use super::reader::prepare_csv_schema; use super::schema_inference::{check_decimal_comma, infer_file_schema}; -#[cfg(any(feature = "decompress", feature = "decompress-fast"))] +#[cfg(feature = "decompress")] use super::utils::decompress; use super::CsvParseOptions; use crate::csv::read::parser::skip_this_line_naive; use crate::mmap::ReaderBytes; use crate::predicates::PhysicalIoExpr; -#[cfg(not(any(feature = "decompress", feature = "decompress-fast")))] use crate::utils::compression::SupportedCompression; use crate::utils::update_row_counts2; use crate::RowIndex; @@ -161,20 +160,19 @@ impl<'a> CoreReader<'a> { let separator = parse_options.separator; check_decimal_comma(parse_options.decimal_comma, separator)?; - #[cfg(any(feature = "decompress", feature = "decompress-fast"))] + #[cfg(feature = "decompress")] let mut reader_bytes = reader_bytes; - #[cfg(not(any(feature = "decompress", feature = "decompress-fast")))] - if SupportedCompression::check(&reader_bytes).is_some() { + if !cfg!(feature = "decompress") && SupportedCompression::check(&reader_bytes).is_some() { polars_bail!( ComputeError: "cannot read compressed CSV file; \ - compile with feature 'decompress' or 'decompress-fast'" + compile with feature 'decompress'" ); } // We keep track of the inferred schema bool // In case the file is compressed this schema inference is wrong and has to be done // again after decompression. - #[cfg(any(feature = "decompress", feature = "decompress-fast"))] + #[cfg(feature = "decompress")] { let total_n_rows = n_rows.map(|n| skip_rows + (has_header as usize) + skip_rows_after_header + n); diff --git a/crates/polars-io/src/csv/read/utils.rs b/crates/polars-io/src/csv/read/utils.rs index 802f60d93d3b..fa46fb765756 100644 --- a/crates/polars-io/src/csv/read/utils.rs +++ b/crates/polars-io/src/csv/read/utils.rs @@ -1,9 +1,9 @@ -#[cfg(any(feature = "decompress", feature = "decompress-fast"))] +#[cfg(feature = "decompress")] use std::io::Read; use std::mem::MaybeUninit; use super::parser::next_line_position; -#[cfg(any(feature = "decompress", feature = "decompress-fast"))] +#[cfg(feature = "decompress")] use super::parser::next_line_position_naive; use super::splitfields::SplitFields; @@ -45,7 +45,7 @@ pub(crate) fn get_file_chunks( offsets } -#[cfg(any(feature = "decompress", feature = "decompress-fast"))] +#[cfg(feature = "decompress")] fn decompress_impl( decoder: &mut R, n_rows: Option, @@ -121,7 +121,7 @@ fn decompress_impl( }) } -#[cfg(any(feature = "decompress", feature = "decompress-fast"))] +#[cfg(feature = "decompress")] pub(crate) fn decompress( bytes: &[u8], n_rows: Option, @@ -142,7 +142,7 @@ pub(crate) fn decompress( decompress_impl(&mut decoder, n_rows, separator, quote_char, eol_char) }, SupportedCompression::ZSTD => { - let mut decoder = zstd::Decoder::new(bytes).ok()?; + let mut decoder = zstd::Decoder::with_buffer(bytes).ok()?; decompress_impl(&mut decoder, n_rows, separator, quote_char, eol_char) }, } diff --git a/crates/polars-io/src/utils/compression.rs b/crates/polars-io/src/utils/compression.rs index 4c8750f4f9f6..1875c8e75e7c 100644 --- a/crates/polars-io/src/utils/compression.rs +++ b/crates/polars-io/src/utils/compression.rs @@ -1,7 +1,7 @@ use std::io::Read; use polars_core::prelude::*; -use polars_error::to_compute_err; +use polars_error::{feature_gated, to_compute_err}; /// Represents the compression algorithms that we have decoders for pub enum SupportedCompression { @@ -36,8 +36,7 @@ pub fn maybe_decompress_bytes<'a>(bytes: &'a [u8], out: &'a mut Vec) -> Pola assert!(out.is_empty()); if let Some(algo) = SupportedCompression::check(bytes) { - #[cfg(any(feature = "decompress", feature = "decompress-fast"))] - { + feature_gated!("decompress", { match algo { SupportedCompression::GZIP => { flate2::read::MultiGzDecoder::new(bytes) @@ -50,16 +49,12 @@ pub fn maybe_decompress_bytes<'a>(bytes: &'a [u8], out: &'a mut Vec) -> Pola .map_err(to_compute_err)?; }, SupportedCompression::ZSTD => { - zstd::Decoder::new(bytes)?.read_to_end(out)?; + zstd::Decoder::with_buffer(bytes)?.read_to_end(out)?; }, } Ok(out) - } - #[cfg(not(any(feature = "decompress", feature = "decompress-fast")))] - { - panic!("cannot decompress without 'decompress' or 'decompress-fast' feature") - } + }) } else { Ok(bytes) } diff --git a/crates/polars-parquet/Cargo.toml b/crates/polars-parquet/Cargo.toml index b274fdf93fd5..27dda4f55e14 100644 --- a/crates/polars-parquet/Cargo.toml +++ b/crates/polars-parquet/Cargo.toml @@ -38,7 +38,7 @@ lz4 = { version = "1.24", optional = true } lz4_flex = { version = "0.11", optional = true } serde = { workspace = true, optional = true } snap = { version = "^1.1", optional = true } -zstd = { version = "^0.13", optional = true, default-features = false } +zstd = { workspace = true, optional = true } xxhash-rust = { version = "0.8", optional = true, features = ["xxh64"] } @@ -47,17 +47,16 @@ rand = "0.8" [features] compression = [ - "zstd", + "brotli", "gzip", - "snappy", "lz4", - "brotli", + "snappy", + "zstd", ] # compression backends snappy = ["snap"] -gzip = ["flate2/rust_backend"] -gzip_zlib_ng = ["flate2/zlib-ng"] +gzip = ["flate2/zlib-rs"] lz4 = ["dep:lz4"] lz4_flex = ["dep:lz4_flex"] diff --git a/crates/polars-parquet/src/parquet/compression.rs b/crates/polars-parquet/src/parquet/compression.rs index f8e90f65e3ee..6ad0eeb3ab27 100644 --- a/crates/polars-parquet/src/parquet/compression.rs +++ b/crates/polars-parquet/src/parquet/compression.rs @@ -212,7 +212,7 @@ pub fn decompress( #[cfg(feature = "zstd")] Compression::Zstd => { use std::io::Read; - let mut decoder = zstd::Decoder::new(input_buf)?; + let mut decoder = zstd::Decoder::with_buffer(input_buf)?; decoder.read_exact(output_buf).map_err(|e| e.into()) }, #[cfg(not(feature = "zstd"))] diff --git a/crates/polars-python/Cargo.toml b/crates/polars-python/Cargo.toml index b0ab044862e3..89e1af64fd1a 100644 --- a/crates/polars-python/Cargo.toml +++ b/crates/polars-python/Cargo.toml @@ -136,7 +136,7 @@ streaming = ["polars/streaming"] meta = ["polars/meta"] index_of = ["polars/index_of"] search_sorted = ["polars/search_sorted"] -decompress = ["polars/decompress-fast"] +decompress = ["polars/decompress"] regex = ["polars/regex"] csv = ["polars/csv"] clipboard = ["arboard"] diff --git a/crates/polars/Cargo.toml b/crates/polars/Cargo.toml index bb7fe3c06c9c..fb08dd55c79c 100644 --- a/crates/polars/Cargo.toml +++ b/crates/polars/Cargo.toml @@ -153,7 +153,6 @@ month_start = ["polars-lazy?/month_start"] month_end = ["polars-lazy?/month_end"] offset_by = ["polars-lazy?/offset_by"] decompress = ["polars-io/decompress"] -decompress-fast = ["polars-io/decompress-fast"] describe = ["polars-core/describe"] diagonal_concat = ["polars-core/diagonal_concat", "polars-lazy?/diagonal_concat", "polars-sql?/diagonal_concat"] diff = ["polars-ops/diff", "polars-lazy?/diff"]