Skip to content

Commit

Permalink
Refactor compressors to be in separate files (#153)
Browse files Browse the repository at this point in the history
* Refactor compressors into multiple files

Refactors each compressor into a single file and the abstract API + NoCompressor to `Compressors/Compressors.jl`.  Also adds API docs for the Compressor API in case people want to do that.

Future work may also explore making this compliant with Interfaces.jl so that we have a way to test that all compressors are compliant.

* Get docs building again

TODO: there must be a better solution than this!

* Implement and test `fletcher32` compression

* Fix tests by explicitly importing

* Revert "Fix tests by explicitly importing"

This reverts commit df8ed7a.

* Revert "Implement and test `fletcher32` compression"

This reverts commit d7f2a69.

* Update src/Compressors/Compressors.jl

Co-authored-by: Anshul Singhvi <[email protected]>

---------

Co-authored-by: Fabian Gans <[email protected]>
  • Loading branch information
asinghvi17 and meggart authored Oct 21, 2024
1 parent b727aa3 commit 9a8892a
Show file tree
Hide file tree
Showing 6 changed files with 216 additions and 153 deletions.
2 changes: 1 addition & 1 deletion docs/src/reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,5 @@ Pages = ["ZGroup.jl"]

```@autodocs
Modules = [Zarr]
Pages = ["Compressors.jl"]
Pages = ["Compressors/Compressors.jl", "Compressors/blosc.jl", "Compressors/zlib.jl"]
```
151 changes: 0 additions & 151 deletions src/Compressors.jl

This file was deleted.

106 changes: 106 additions & 0 deletions src/Compressors/Compressors.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
import JSON # for JSON.lower

_reinterpret(::Type{T}, x::AbstractArray{S, 0}) where {T, S} = reinterpret(T, reshape(x, 1))
_reinterpret(::Type{T}, x::AbstractArray) where T = reinterpret(T, x)

"""
abstract type Compressor
The abstract supertype for all Zarr compressors.
## Interface
All subtypes of `Compressor` SHALL implement the following methods:
- `zcompress(a, c::Compressor)`: compress the array `a` using the compressor `c`.
- `zuncompress(a, c::Compressor, T)`: uncompress the array `a` using the compressor `c`
and return an array of type `T`.
- `JSON.lower(c::Compressor)`: return a JSON representation of the compressor `c`, which
follows the Zarr specification for that compressor.
- `getCompressor(::Type{<:Compressor}, d::Dict)`: return a compressor object from a given
dictionary `d` which contains the compressor's parameters according to the Zarr spec.
Subtypes of `Compressor` MAY also implement the following methods:
- `zcompress!(compressed, data, c::Compressor)`: compress the array `data` using the
compressor `c` and store the result in the array `compressed`.
- `zuncompress!(data, compressed, c::Compressor)`: uncompress the array `compressed`
using the compressor `c` and store the result in the array `data`.
Finally, an entry MUST be added to the `compressortypes` dictionary for each compressor type.
This must also follow the Zarr specification's name for that compressor. The name of the compressor
is the key, and the value is the compressor type (e.g. `BloscCompressor` or `NoCompressor`).
For example, the Blosc compressor is named "blosc" in the Zarr spec, so the entry for [`BloscCompressor`](@ref)
must be added to `compressortypes` as `compressortypes["blosc"] = BloscCompressor`.
"""
abstract type Compressor end

const compressortypes = Dict{Union{String,Nothing}, Type{<: Compressor}}()

# function getCompressor end
# function zcompress end
# function zuncompress end
# function zcompress! end
# function zuncompress! end
# JSON.lower is neither defined nor documented here, since that would be documentation piracy :yarr:

# Include the compressor implementations
include("blosc.jl")
include("zlib.jl")

# ## Fallback definitions for the compressor interface
# Define fallbacks and generic methods for the compressor interface
getCompressor(compdict::Dict) = getCompressor(compressortypes[compdict["id"]],compdict)
getCompressor(::Nothing) = NoCompressor()

# Compression when no filter is given
zcompress!(compressed,data,c,::Nothing) = zcompress!(compressed,data,c)
zuncompress!(data,compressed,c,::Nothing) = zuncompress!(data,compressed,c)

# Fallback definition of mutating form of compress and uncompress
function zcompress!(compressed, data, c)
empty!(compressed)
append!(compressed,zcompress(data, c))
end
zuncompress!(data, compressed, c) = copyto!(data, zuncompress(compressed, c, eltype(data)))


# Function given a filter stack
function zcompress!(compressed, data, c, f)
a2 = foldl(f, init=data) do anow, fnow
zencode(anow,fnow)
end
zcompress!(compressed, a2, c)
end

function zuncompress!(data, compressed, c, f)
data2 = zuncompress(compressed, c, desttype(last(f)))
a2 = foldr(f, init = data2) do fnow, anow
zdecode(anow, fnow)
end
copyto!(data, a2)
end

# ## `NoCompressor`
# The default and most minimal implementation of a compressor follows here, which does
# no actual compression. This is a good reference implementation for other compressors.

"""
NoCompressor()
Creates an object that can be passed to ZArray constructors without compression.
"""
struct NoCompressor <: Compressor end

function zuncompress(a, ::NoCompressor, T)
_reinterpret(T,a)
end

function zcompress(a, ::NoCompressor)
_reinterpret(UInt8,a)
end

JSON.lower(::NoCompressor) = nothing

compressortypes[nothing] = NoCompressor
70 changes: 70 additions & 0 deletions src/Compressors/blosc.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
#=
# Blosc compression
This file implements a Blosc compressor via Blosc.jl.
=#

import Blosc

struct BloscCompressor <: Compressor
blocksize::Int
clevel::Int
cname::String
shuffle::Int
end

"""
BloscCompressor(;blocksize=0, clevel=5, cname="lz4", shuffle=1)
Returns a `BloscCompressor` struct that can serve as a Zarr array compressor. Keyword arguments are:
* `clevel=5` the compression level, number between 0 (no compression) and 9 (max compression)
* `cname="lz4"` compressor name, can be one of `"blosclz"`, `"lz4"`, and `"lz4hc"`
* `shuffle=1` Either NOSHUFFLE (0), SHUFFLE (1), BITSHUFFLE (2) or AUTOSHUFFLE (-1).
If AUTOSHUFFLE, bit-shuffle will be used for buffers with itemsize 1, and byte-shuffle will be used otherwise. The default is SHUFFLE.
"""
BloscCompressor(;blocksize=0, clevel=5, cname="lz4", shuffle=1) =
BloscCompressor(blocksize, clevel, cname, shuffle)

function getCompressor(::Type{BloscCompressor}, d::Dict)
BloscCompressor(d["blocksize"], d["clevel"], d["cname"], d["shuffle"])
end

zuncompress(a, ::BloscCompressor, T) = Blosc.decompress(Base.nonmissingtype(T), a)

function zuncompress!(data::DenseArray, compressed, ::BloscCompressor)
Blosc.decompress!(vec(data),compressed)
# if Int(pointer(data,length(data))-pointer(data)) != (length(data)-1)*sizeof(eltype(data))
# @show size(data)
# @show size(parent(data))
# @show typeof(data)
# @show Int(pointer(data,length(data))-pointer(data))
# @show (length(data)-1)*sizeof(eltype(data))
# error("Something is wrong")
# end
# Zarr.Blosc.blosc_decompress(data, compressed, sizeof(data))
end


function zcompress(a, c::BloscCompressor)
itemsize = sizeof(eltype(a))
shuffle = c.shuffle
# Weird auto shuffle logic from
# https://github.com/zarr-developers/numcodecs/blob/7d8f9762b4f0f9b5e135688b2eeb3f783f90f208/numcodecs/blosc.pyx#L264-L272
if shuffle == -1
if itemsize == 1
shuffle = Blosc.BITSHUFFLE
else
shuffle = Blosc.SHUFFLE
end
elseif shuffle (Blosc.NOSHUFFLE, Blosc.SHUFFLE, Blosc.BITSHUFFLE)
throw(ArgumentError("invalid shuffle argument; expected -1, 0, 1 or 2, found $shuffle"))
end
Blosc.set_compressor(c.cname)
Blosc.compress(a; level=c.clevel, shuffle=shuffle)
end

JSON.lower(c::BloscCompressor) = Dict("id"=>"blosc", "cname"=>c.cname,
"clevel"=>c.clevel, "shuffle"=>c.shuffle, "blocksize"=>c.blocksize)

Zarr.compressortypes["blosc"] = BloscCompressor
38 changes: 38 additions & 0 deletions src/Compressors/zlib.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#=
# Zlib compression
This file implements a Zlib compressor via CodecZlib.jl.
=#

import CodecZlib

"""
ZlibCompressor(clevel=-1)
Returns a `ZlibCompressor` struct that can serve as a Zarr array compressor. Keyword arguments are:
* `clevel=-1` the compression level, number between -1 (Default), 0 (no compression) and 9 (max compression)
* default is -1 compromise between speed and compression (currently equivalent to level 6).
"""
struct ZlibCompressor <: Compressor
clevel::Int
end

ZlibCompressor(;clevel=-1) = ZlibCompressor(clevel)

function getCompressor(::Type{ZlibCompressor}, d::Dict)
ZlibCompressor(d["level"])
end

function zuncompress(a, ::ZlibCompressor, T)
result = transcode(CodecZlib.ZlibDecompressor,a)
_reinterpret(Base.nonmissingtype(T),result)
end

function zcompress(a, ::ZlibCompressor)
a_uint8 = _reinterpret(UInt8,a)[:]
transcode(CodecZlib.ZlibCompressor, a_uint8)
end

JSON.lower(z::ZlibCompressor) = Dict("id"=>"zlib", "level" => z.clevel)

Zarr.compressortypes["zlib"] = ZlibCompressor
2 changes: 1 addition & 1 deletion src/Zarr.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import JSON
import Blosc

include("metadata.jl")
include("Compressors.jl")
include("Compressors/Compressors.jl")
include("Storage/Storage.jl")
include("Filters.jl")
include("ZArray.jl")
Expand Down

0 comments on commit 9a8892a

Please sign in to comment.