-
Notifications
You must be signed in to change notification settings - Fork 24
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Refactor compressors to be in separate files (#153)
* Refactor compressors into multiple files Refactors each compressor into a single file and the abstract API + NoCompressor to `Compressors/Compressors.jl`. Also adds API docs for the Compressor API in case people want to do that. Future work may also explore making this compliant with Interfaces.jl so that we have a way to test that all compressors are compliant. * Get docs building again TODO: there must be a better solution than this! * Implement and test `fletcher32` compression * Fix tests by explicitly importing * Revert "Fix tests by explicitly importing" This reverts commit df8ed7a. * Revert "Implement and test `fletcher32` compression" This reverts commit d7f2a69. * Update src/Compressors/Compressors.jl Co-authored-by: Anshul Singhvi <[email protected]> --------- Co-authored-by: Fabian Gans <[email protected]>
- Loading branch information
1 parent
b727aa3
commit 9a8892a
Showing
6 changed files
with
216 additions
and
153 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,106 @@ | ||
import JSON # for JSON.lower | ||
|
||
_reinterpret(::Type{T}, x::AbstractArray{S, 0}) where {T, S} = reinterpret(T, reshape(x, 1)) | ||
_reinterpret(::Type{T}, x::AbstractArray) where T = reinterpret(T, x) | ||
|
||
""" | ||
abstract type Compressor | ||
The abstract supertype for all Zarr compressors. | ||
## Interface | ||
All subtypes of `Compressor` SHALL implement the following methods: | ||
- `zcompress(a, c::Compressor)`: compress the array `a` using the compressor `c`. | ||
- `zuncompress(a, c::Compressor, T)`: uncompress the array `a` using the compressor `c` | ||
and return an array of type `T`. | ||
- `JSON.lower(c::Compressor)`: return a JSON representation of the compressor `c`, which | ||
follows the Zarr specification for that compressor. | ||
- `getCompressor(::Type{<:Compressor}, d::Dict)`: return a compressor object from a given | ||
dictionary `d` which contains the compressor's parameters according to the Zarr spec. | ||
Subtypes of `Compressor` MAY also implement the following methods: | ||
- `zcompress!(compressed, data, c::Compressor)`: compress the array `data` using the | ||
compressor `c` and store the result in the array `compressed`. | ||
- `zuncompress!(data, compressed, c::Compressor)`: uncompress the array `compressed` | ||
using the compressor `c` and store the result in the array `data`. | ||
Finally, an entry MUST be added to the `compressortypes` dictionary for each compressor type. | ||
This must also follow the Zarr specification's name for that compressor. The name of the compressor | ||
is the key, and the value is the compressor type (e.g. `BloscCompressor` or `NoCompressor`). | ||
For example, the Blosc compressor is named "blosc" in the Zarr spec, so the entry for [`BloscCompressor`](@ref) | ||
must be added to `compressortypes` as `compressortypes["blosc"] = BloscCompressor`. | ||
""" | ||
abstract type Compressor end | ||
|
||
const compressortypes = Dict{Union{String,Nothing}, Type{<: Compressor}}() | ||
|
||
# function getCompressor end | ||
# function zcompress end | ||
# function zuncompress end | ||
# function zcompress! end | ||
# function zuncompress! end | ||
# JSON.lower is neither defined nor documented here, since that would be documentation piracy :yarr: | ||
|
||
# Include the compressor implementations | ||
include("blosc.jl") | ||
include("zlib.jl") | ||
|
||
# ## Fallback definitions for the compressor interface | ||
# Define fallbacks and generic methods for the compressor interface | ||
getCompressor(compdict::Dict) = getCompressor(compressortypes[compdict["id"]],compdict) | ||
getCompressor(::Nothing) = NoCompressor() | ||
|
||
# Compression when no filter is given | ||
zcompress!(compressed,data,c,::Nothing) = zcompress!(compressed,data,c) | ||
zuncompress!(data,compressed,c,::Nothing) = zuncompress!(data,compressed,c) | ||
|
||
# Fallback definition of mutating form of compress and uncompress | ||
function zcompress!(compressed, data, c) | ||
empty!(compressed) | ||
append!(compressed,zcompress(data, c)) | ||
end | ||
zuncompress!(data, compressed, c) = copyto!(data, zuncompress(compressed, c, eltype(data))) | ||
|
||
|
||
# Function given a filter stack | ||
function zcompress!(compressed, data, c, f) | ||
a2 = foldl(f, init=data) do anow, fnow | ||
zencode(anow,fnow) | ||
end | ||
zcompress!(compressed, a2, c) | ||
end | ||
|
||
function zuncompress!(data, compressed, c, f) | ||
data2 = zuncompress(compressed, c, desttype(last(f))) | ||
a2 = foldr(f, init = data2) do fnow, anow | ||
zdecode(anow, fnow) | ||
end | ||
copyto!(data, a2) | ||
end | ||
|
||
# ## `NoCompressor` | ||
# The default and most minimal implementation of a compressor follows here, which does | ||
# no actual compression. This is a good reference implementation for other compressors. | ||
|
||
""" | ||
NoCompressor() | ||
Creates an object that can be passed to ZArray constructors without compression. | ||
""" | ||
struct NoCompressor <: Compressor end | ||
|
||
function zuncompress(a, ::NoCompressor, T) | ||
_reinterpret(T,a) | ||
end | ||
|
||
function zcompress(a, ::NoCompressor) | ||
_reinterpret(UInt8,a) | ||
end | ||
|
||
JSON.lower(::NoCompressor) = nothing | ||
|
||
compressortypes[nothing] = NoCompressor |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
#= | ||
# Blosc compression | ||
This file implements a Blosc compressor via Blosc.jl. | ||
=# | ||
|
||
import Blosc | ||
|
||
struct BloscCompressor <: Compressor | ||
blocksize::Int | ||
clevel::Int | ||
cname::String | ||
shuffle::Int | ||
end | ||
|
||
""" | ||
BloscCompressor(;blocksize=0, clevel=5, cname="lz4", shuffle=1) | ||
Returns a `BloscCompressor` struct that can serve as a Zarr array compressor. Keyword arguments are: | ||
* `clevel=5` the compression level, number between 0 (no compression) and 9 (max compression) | ||
* `cname="lz4"` compressor name, can be one of `"blosclz"`, `"lz4"`, and `"lz4hc"` | ||
* `shuffle=1` Either NOSHUFFLE (0), SHUFFLE (1), BITSHUFFLE (2) or AUTOSHUFFLE (-1). | ||
If AUTOSHUFFLE, bit-shuffle will be used for buffers with itemsize 1, and byte-shuffle will be used otherwise. The default is SHUFFLE. | ||
""" | ||
BloscCompressor(;blocksize=0, clevel=5, cname="lz4", shuffle=1) = | ||
BloscCompressor(blocksize, clevel, cname, shuffle) | ||
|
||
function getCompressor(::Type{BloscCompressor}, d::Dict) | ||
BloscCompressor(d["blocksize"], d["clevel"], d["cname"], d["shuffle"]) | ||
end | ||
|
||
zuncompress(a, ::BloscCompressor, T) = Blosc.decompress(Base.nonmissingtype(T), a) | ||
|
||
function zuncompress!(data::DenseArray, compressed, ::BloscCompressor) | ||
Blosc.decompress!(vec(data),compressed) | ||
# if Int(pointer(data,length(data))-pointer(data)) != (length(data)-1)*sizeof(eltype(data)) | ||
# @show size(data) | ||
# @show size(parent(data)) | ||
# @show typeof(data) | ||
# @show Int(pointer(data,length(data))-pointer(data)) | ||
# @show (length(data)-1)*sizeof(eltype(data)) | ||
# error("Something is wrong") | ||
# end | ||
# Zarr.Blosc.blosc_decompress(data, compressed, sizeof(data)) | ||
end | ||
|
||
|
||
function zcompress(a, c::BloscCompressor) | ||
itemsize = sizeof(eltype(a)) | ||
shuffle = c.shuffle | ||
# Weird auto shuffle logic from | ||
# https://github.com/zarr-developers/numcodecs/blob/7d8f9762b4f0f9b5e135688b2eeb3f783f90f208/numcodecs/blosc.pyx#L264-L272 | ||
if shuffle == -1 | ||
if itemsize == 1 | ||
shuffle = Blosc.BITSHUFFLE | ||
else | ||
shuffle = Blosc.SHUFFLE | ||
end | ||
elseif shuffle ∉ (Blosc.NOSHUFFLE, Blosc.SHUFFLE, Blosc.BITSHUFFLE) | ||
throw(ArgumentError("invalid shuffle argument; expected -1, 0, 1 or 2, found $shuffle")) | ||
end | ||
Blosc.set_compressor(c.cname) | ||
Blosc.compress(a; level=c.clevel, shuffle=shuffle) | ||
end | ||
|
||
JSON.lower(c::BloscCompressor) = Dict("id"=>"blosc", "cname"=>c.cname, | ||
"clevel"=>c.clevel, "shuffle"=>c.shuffle, "blocksize"=>c.blocksize) | ||
|
||
Zarr.compressortypes["blosc"] = BloscCompressor |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
#= | ||
# Zlib compression | ||
This file implements a Zlib compressor via CodecZlib.jl. | ||
=# | ||
|
||
import CodecZlib | ||
|
||
""" | ||
ZlibCompressor(clevel=-1) | ||
Returns a `ZlibCompressor` struct that can serve as a Zarr array compressor. Keyword arguments are: | ||
* `clevel=-1` the compression level, number between -1 (Default), 0 (no compression) and 9 (max compression) | ||
* default is -1 compromise between speed and compression (currently equivalent to level 6). | ||
""" | ||
struct ZlibCompressor <: Compressor | ||
clevel::Int | ||
end | ||
|
||
ZlibCompressor(;clevel=-1) = ZlibCompressor(clevel) | ||
|
||
function getCompressor(::Type{ZlibCompressor}, d::Dict) | ||
ZlibCompressor(d["level"]) | ||
end | ||
|
||
function zuncompress(a, ::ZlibCompressor, T) | ||
result = transcode(CodecZlib.ZlibDecompressor,a) | ||
_reinterpret(Base.nonmissingtype(T),result) | ||
end | ||
|
||
function zcompress(a, ::ZlibCompressor) | ||
a_uint8 = _reinterpret(UInt8,a)[:] | ||
transcode(CodecZlib.ZlibCompressor, a_uint8) | ||
end | ||
|
||
JSON.lower(z::ZlibCompressor) = Dict("id"=>"zlib", "level" => z.clevel) | ||
|
||
Zarr.compressortypes["zlib"] = ZlibCompressor |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters