From f348c46f0d8f322f87e744824d70c2397cd3a8a7 Mon Sep 17 00:00:00 2001
From: Anshul Singhvi <anshulsinghvi@gmail.com>
Date: Tue, 20 Aug 2024 09:38:55 -0700
Subject: [PATCH 01/31] Document the Filter interface

---
 src/Filters.jl | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/src/Filters.jl b/src/Filters.jl
index fde3db9..85363fc 100644
--- a/src/Filters.jl
+++ b/src/Filters.jl
@@ -1,6 +1,27 @@
 import JSON
 
+"""
+    abstract type Filter{T,TENC}
+
+The supertype for all Zarr filters.  
+
+## Interface
+
+All subtypes MUST implement the following methods:
+
+- [`zencode(ain, filter::Filter)`](@ref zencode): Encodes data `ain` using the filter, and returns a vector of bytes.
+- [`zdecode(ain, filter::Filter)`](@ref zdecode): Decodes data `ain`, a vector of bytes, using the filter, and returns the original data.
+- [`JSON.lower`](@ref): Returns a JSON-serializable dictionary representing the filter, according to the Zarr specification.
+- [`getfilter(::Type{<: Filter}, filterdict)`](@ref getfilter): Returns the filter type read from a given filter dictionary.
 
+If the filter has type parameters, it MUST also implement:
+- [`sourcetype(::Filter)::T`](@ref sourcetype): equivalent to `dtype` in the Python Zarr implementation.
+- [`desttype(::Filter)::T`](@ref desttype): equivalent to `atype` in the Python Zarr implementation.
+
+
+
+Subtypes include: [`VLenArrayFilter`](@ref), [`VLenUTF8Filter`](@ref), [`Fletcher32Filter`](@ref).
+"""
 abstract type Filter{T,TENC} end
 function getfilters(d::Dict) 
     if !haskey(d,"filters")

From 9d765b3104ca504525b86399c2e8345185d3ef89 Mon Sep 17 00:00:00 2001
From: Anshul Singhvi <anshulsinghvi@gmail.com>
Date: Tue, 20 Aug 2024 09:44:43 -0700
Subject: [PATCH 02/31] Move filters to a folder

Same rationale as the other changes :D - just for cleanliness and clarity.
---
 src/{ => Filters}/Filters.jl | 11 ++++++++++-
 src/Zarr.jl                  |  2 +-
 2 files changed, 11 insertions(+), 2 deletions(-)
 rename src/{ => Filters}/Filters.jl (95%)

diff --git a/src/Filters.jl b/src/Filters/Filters.jl
similarity index 95%
rename from src/Filters.jl
rename to src/Filters/Filters.jl
index 85363fc..51325ac 100644
--- a/src/Filters.jl
+++ b/src/Filters/Filters.jl
@@ -23,6 +23,15 @@ If the filter has type parameters, it MUST also implement:
 Subtypes include: [`VLenArrayFilter`](@ref), [`VLenUTF8Filter`](@ref), [`Fletcher32Filter`](@ref).
 """
 abstract type Filter{T,TENC} end
+
+function zencode end
+function zdecode end
+function getfilter end
+function sourcetype end
+function desttype end
+
+filterdict = Dict{String,Type{<:Filter}}()
+
 function getfilters(d::Dict) 
     if !haskey(d,"filters")
         return nothing
@@ -41,6 +50,7 @@ desttype(::Filter{<:Any,T}) where T = T
 
 zencode(ain,::Nothing) = ain
 
+
 """
     VLenArrayFilter(T)
 
@@ -109,4 +119,3 @@ JSON.lower(::VLenUTF8Filter) = Dict("id"=>"vlen-utf8")
 getfilter(::Type{<:VLenArrayFilter}, f) = VLenArrayFilter{typestr(f["dtype"])}()
 getfilter(::Type{<:VLenUTF8Filter}, f) = VLenUTF8Filter()
 
-filterdict = Dict("vlen-array"=>VLenArrayFilter, "vlen-utf8"=>VLenUTF8Filter)
diff --git a/src/Zarr.jl b/src/Zarr.jl
index 5f58e61..47a2539 100644
--- a/src/Zarr.jl
+++ b/src/Zarr.jl
@@ -6,7 +6,7 @@ import Blosc
 include("metadata.jl")
 include("Compressors.jl")
 include("Storage/Storage.jl")
-include("Filters.jl")
+include("Filters/Filters.jl")
 include("ZArray.jl")
 include("ZGroup.jl")
 

From cb374ce4008e26ba6a8e731a9cf622d9403c31aa Mon Sep 17 00:00:00 2001
From: Anshul Singhvi <anshulsinghvi@gmail.com>
Date: Tue, 20 Aug 2024 09:47:47 -0700
Subject: [PATCH 03/31] Factor out variable-length filters to a new file

---
 src/Filters/Filters.jl     | 69 +-------------------------------
 src/Filters/vlenfilters.jl | 80 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 81 insertions(+), 68 deletions(-)
 create mode 100644 src/Filters/vlenfilters.jl

diff --git a/src/Filters/Filters.jl b/src/Filters/Filters.jl
index 51325ac..3eb04d4 100644
--- a/src/Filters/Filters.jl
+++ b/src/Filters/Filters.jl
@@ -50,72 +50,5 @@ desttype(::Filter{<:Any,T}) where T = T
 
 zencode(ain,::Nothing) = ain
 
-
-"""
-    VLenArrayFilter(T)
-
-Encodes and decodes variable-length arrays of arbitrary data type 
-"""
-struct VLenArrayFilter{T} <: Filter{T,UInt8} end
-
-"""
-    VLenUTF8Filter
-
-Encodes and decodes variable-length unicode strings
-"""
-struct VLenUTF8Filter <: Filter{String, UInt8} end
-
-function zdecode(ain, ::VLenArrayFilter{T}) where T
-    f = IOBuffer(ain)
-    nitems = read(f, UInt32)
-    out = Array{Vector{T}}(undef,nitems)
-    for i=1:nitems
-        len1 = read(f,UInt32)
-        out[i] = read!(f,Array{T}(undef,len1 ÷ sizeof(T)))
-    end
-    close(f)
-    out
-end
-
-#Encodes Array of Vectors a into bytes
-function zencode(ain,::VLenArrayFilter)
-    b = IOBuffer()
-    nitems = length(ain)
-    write(b,UInt32(nitems))
-    for a in ain
-        write(b, UInt32(length(a) * sizeof(eltype(a))))
-        write(b, a)
-    end
-    take!(b)
-end
-
-function zdecode(ain, ::VLenUTF8Filter)
-    f = IOBuffer(ain)
-    nitems = read(f, UInt32)
-    out = Array{String}(undef, nitems)
-    for i in 1:nitems
-        clen = read(f, UInt32)
-        out[i] = String(read(f, clen))
-    end
-    close(f)
-    out
-end
-
-function zencode(ain, ::VLenUTF8Filter)
-    b = IOBuffer()
-    nitems = length(ain)
-    write(b, UInt32(nitems))
-    for a in ain
-        utf8encoded = transcode(String, a)
-        write(b, UInt32(ncodeunits(utf8encoded)))
-        write(b, utf8encoded)
-    end
-    take!(b)
-end
-
-JSON.lower(::VLenArrayFilter{T}) where T = Dict("id"=>"vlen-array","dtype"=> typestr(T) )
-JSON.lower(::VLenUTF8Filter) = Dict("id"=>"vlen-utf8")
-
-getfilter(::Type{<:VLenArrayFilter}, f) = VLenArrayFilter{typestr(f["dtype"])}()
-getfilter(::Type{<:VLenUTF8Filter}, f) = VLenUTF8Filter()
+include("vlenfilters.jl")
 
diff --git a/src/Filters/vlenfilters.jl b/src/Filters/vlenfilters.jl
new file mode 100644
index 0000000..e9331f9
--- /dev/null
+++ b/src/Filters/vlenfilters.jl
@@ -0,0 +1,80 @@
+#=
+# Variable-length filters
+
+This file implements variable-length filters for Zarr, i.e., filters that write arrays of variable-length arrays ("ragged arrays").
+
+Specifically, it implements the `VLenArrayFilter` and `VLenUTF8Filter` types, which are used to encode and decode variable-length arrays and UTF-8 strings, respectively.
+=#
+
+# ## VLenArrayFilter
+
+"""
+    VLenArrayFilter(T)
+
+Encodes and decodes variable-length arrays of arbitrary data type `T`.
+"""
+struct VLenArrayFilter{T} <: Filter{T,UInt8} end
+# We don't need to define `sourcetype` and `desttype` for this filter, since the generic implementations are sufficient.
+
+JSON.lower(::VLenArrayFilter{T}) where T = Dict("id"=>"vlen-array","dtype"=> typestr(T) )
+getfilter(::Type{<:VLenArrayFilter}, f) = VLenArrayFilter{typestr(f["dtype"])}()
+
+function zdecode(ain, ::VLenArrayFilter{T}) where T
+    f = IOBuffer(ain)
+    nitems = read(f, UInt32)
+    out = Array{Vector{T}}(undef,nitems)
+    for i=1:nitems
+        len1 = read(f,UInt32)
+        out[i] = read!(f,Array{T}(undef,len1 ÷ sizeof(T)))
+    end
+    close(f)
+    out
+end
+
+#Encodes Array of Vectors `ain` into bytes
+function zencode(ain,::VLenArrayFilter)
+    b = IOBuffer()
+    nitems = length(ain)
+    write(b,UInt32(nitems))
+    for a in ain
+        write(b, UInt32(length(a) * sizeof(eltype(a))))
+        write(b, a)
+    end
+    take!(b)
+end
+
+# ## VLenUTF8Filter
+
+"""
+    VLenUTF8Filter
+
+Encodes and decodes variable-length unicode strings
+"""
+struct VLenUTF8Filter <: Filter{String, UInt8} end
+
+JSON.lower(::VLenUTF8Filter) = Dict("id"=>"vlen-utf8")
+getfilter(::Type{<:VLenUTF8Filter}, f) = VLenUTF8Filter()
+
+function zdecode(ain, ::VLenUTF8Filter)
+    f = IOBuffer(ain)
+    nitems = read(f, UInt32)
+    out = Array{String}(undef, nitems)
+    for i in 1:nitems
+        clen = read(f, UInt32)
+        out[i] = String(read(f, clen))
+    end
+    close(f)
+    out
+end
+
+function zencode(ain, ::VLenUTF8Filter)
+    b = IOBuffer()
+    nitems = length(ain)
+    write(b, UInt32(nitems))
+    for a in ain
+        utf8encoded = transcode(String, a)
+        write(b, UInt32(ncodeunits(utf8encoded)))
+        write(b, utf8encoded)
+    end
+    take!(b)
+end

From 12931a2551ef44e15c2644dca9d8a5dfbde98d04 Mon Sep 17 00:00:00 2001
From: Anshul Singhvi <anshulsinghvi@gmail.com>
Date: Tue, 20 Aug 2024 09:49:50 -0700
Subject: [PATCH 04/31] Add docstrings to filter API functions

---
 src/Filters/Filters.jl | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/src/Filters/Filters.jl b/src/Filters/Filters.jl
index 3eb04d4..c2a7965 100644
--- a/src/Filters/Filters.jl
+++ b/src/Filters/Filters.jl
@@ -24,10 +24,39 @@ Subtypes include: [`VLenArrayFilter`](@ref), [`VLenUTF8Filter`](@ref), [`Fletche
 """
 abstract type Filter{T,TENC} end
 
+"""
+    zencode(ain, filter::Filter)
+
+Encodes data `ain` using the filter, and returns a vector of bytes.
+"""
 function zencode end
+
+"""
+    zdecode(ain, filter::Filter)
+
+Decodes data `ain`, a vector of bytes, using the filter, and returns the original data.
+"""
 function zdecode end
+
+"""
+    getfilter(::Type{<: Filter}, filterdict)
+
+Returns the filter type read from a given specification dictionary, which must follow the Zarr specification.
+"""
 function getfilter end
+
+"""
+    sourcetype(::Filter)::T
+
+Returns the source type of the filter.
+"""
 function sourcetype end
+
+"""
+    desttype(::Filter)::T
+
+Returns the destination type of the filter.
+"""
 function desttype end
 
 filterdict = Dict{String,Type{<:Filter}}()

From 7d7606af8c05143aa3c2d4bb73483177a756ec26 Mon Sep 17 00:00:00 2001
From: Anshul Singhvi <anshulsinghvi@gmail.com>
Date: Tue, 20 Aug 2024 10:09:18 -0700
Subject: [PATCH 05/31] Add a Fletcher32 filter and test

---
 src/Filters/Filters.jl    |  5 ++-
 src/Filters/fletcher32.jl | 85 +++++++++++++++++++++++++++++++++++++++
 test/Filters.jl           | 31 ++++++++++++++
 test/runtests.jl          |  2 +-
 4 files changed, 121 insertions(+), 2 deletions(-)
 create mode 100644 src/Filters/fletcher32.jl
 create mode 100644 test/Filters.jl

diff --git a/src/Filters/Filters.jl b/src/Filters/Filters.jl
index c2a7965..77d3764 100644
--- a/src/Filters/Filters.jl
+++ b/src/Filters/Filters.jl
@@ -18,6 +18,9 @@ If the filter has type parameters, it MUST also implement:
 - [`sourcetype(::Filter)::T`](@ref sourcetype): equivalent to `dtype` in the Python Zarr implementation.
 - [`desttype(::Filter)::T`](@ref desttype): equivalent to `atype` in the Python Zarr implementation.
 
+Finally, an entry MUST be added to the `filterdict` dictionary for each filter type.  
+This must also follow the Zarr specification's name for that filter.  The name of the filter
+is the key, and the value is the filter type (e.g. `VLenUInt8Filter` or `Fletcher32Filter`).
 
 
 Subtypes include: [`VLenArrayFilter`](@ref), [`VLenUTF8Filter`](@ref), [`Fletcher32Filter`](@ref).
@@ -80,4 +83,4 @@ desttype(::Filter{<:Any,T}) where T = T
 zencode(ain,::Nothing) = ain
 
 include("vlenfilters.jl")
-
+include("fletcher32.jl")
diff --git a/src/Filters/fletcher32.jl b/src/Filters/fletcher32.jl
new file mode 100644
index 0000000..4d2bda7
--- /dev/null
+++ b/src/Filters/fletcher32.jl
@@ -0,0 +1,85 @@
+#=
+# Fletcher32 filter
+
+This "filter" basically injects a 4-byte checksum at the end of the data, to ensure data integrity.
+
+The implementation is based on the [numcodecs implementation here](https://github.com/zarr-developers/numcodecs/blob/79d1a8d4f9c89d3513836aba0758e0d2a2a1cfaf/numcodecs/fletcher32.pyx)
+and the [original C implementation for NetCDF](https://github.com/Unidata/netcdf-c/blob/main/plugins/H5checksum.c#L109) linked therein.
+
+=#
+
+"""
+    Fletcher32Filter()
+
+A compressor that uses the Fletcher32 checksum algorithm to compress and uncompress data.
+
+Note that this goes from UInt8 to UInt8, and is effectively only checking 
+the checksum and cropping the last 4 bytes of the data during decoding.
+"""
+struct Fletcher32Filter <: Filter{UInt8, UInt8}
+end
+
+getFilter(::Type{<: Fletcher32Filter}, d::Dict) = Fletcher32Filter()
+JSON.lower(::Fletcher32Filter) = Dict("id" => "fletcher32")
+filterdict["fletcher32"] = Fletcher32Filter
+
+function _checksum_fletcher32(data::AbstractVector{UInt8})
+    len = length(data) / 2 # length in 16-bit words
+    sum1::UInt32 = 0
+    sum2::UInt32 = 0
+    data_idx = 1
+
+    #=
+    Compute the checksum for pairs of bytes.
+    The magic `360` value is the largest number of sums that can be performed without overflow in UInt32.
+    =#
+    while len > 0
+        tlen = len > 360 ? 360 : len
+        len -= tlen
+        while tlen > 0
+            sum1 += begin # create a 16 bit word from two bytes, the first one shifted to the end of the word
+                (UInt16(data[data_idx]) << 8) | UInt16(data[data_idx + 1]) 
+            end
+            sum2 += sum1
+            data_idx += 2
+            tlen -= 1
+            if tlen < 1
+                break
+            end
+        end
+        sum1 = (sum1 & 0xffff) + (sum1 >> 16)
+        sum2 = (sum2 & 0xffff) + (sum2 >> 16)
+    end
+
+    # if the length of the data is odd, add the first byte to the checksum again (?!)
+    if length(data) % 2 == 1 
+        sum1 += UInt16(data[1]) << 8
+        sum2 += sum1
+        sum1 = (sum1 & 0xffff) + (sum1 >> 16)
+        sum2 = (sum2 & 0xffff) + (sum2 >> 16)
+    end
+    return (sum2 << 16) | sum1
+end
+
+function zencode(data, ::Fletcher32Filter)
+    bytes = reinterpret(UInt8, data)
+    checksum = _checksum_fletcher32(bytes)
+    result = copy(bytes)
+    append!(result, reinterpret(UInt8, [checksum])) # TODO: decompose this without the extra allocation of wrapping in Array
+    return result
+end
+
+function zdecode(data, ::Fletcher32Filter)
+    bytes = reinterpret(UInt8, data)
+    checksum = _checksum_fletcher32(view(bytes, 1:length(bytes) - 4))
+    stored_checksum = only(reinterpret(UInt32, view(bytes, (length(bytes) - 3):length(bytes))))
+    if checksum != stored_checksum
+        throw(ErrorException("""
+        Checksum mismatch in Fletcher32 decoding.  
+        
+        The computed value is $(checksum) and the stored value is $(stored_checksum).  
+        This might be a sign that the data is corrupted.
+        """)) # TODO: make this a custom error type
+    end
+    return view(bytes, 1:length(bytes) - 4)
+end
diff --git a/test/Filters.jl b/test/Filters.jl
new file mode 100644
index 0000000..426390d
--- /dev/null
+++ b/test/Filters.jl
@@ -0,0 +1,31 @@
+using Test
+
+using Zarr: zencode, zdecode
+using Zarr: Fletcher32Filter
+
+@testset "Fletcher32Filter" begin
+    # These tests are copied exactly from the [`numcodecs`](https://github.com/zarr-developers/numcodecs/) Python package,
+    # specifically [this file](https://github.com/zarr-developers/numcodecs/blob/main/numcodecs/tests/test_fletcher32.py).
+    
+    bit_data = vcat(
+        b"w\x07\x00\x00\x00\x00\x00\x00\x85\xf6\xff\xff\xff\xff\xff\xff",
+        b"i\x07\x00\x00\x00\x00\x00\x00\x94\xf6\xff\xff\xff\xff\xff\xff",
+        b"\x88\t\x00\x00\x00\x00\x00\x00i\x03\x00\x00\x00\x00\x00\x00",
+        b"\x93\xfd\xff\xff\xff\xff\xff\xff\xc3\xfc\xff\xff\xff\xff\xff\xff",
+        b"'\x02\x00\x00\x00\x00\x00\x00\xba\xf7\xff\xff\xff\xff\xff\xff",
+        b"\xfd%\x86d",
+    )
+    expected = [1911, -2427, 1897, -2412, 2440, 873, -621, -829, 551, -2118]
+    @test reinterpret(Int64, zdecode(bit_data, Fletcher32Filter())) == expected
+    @test zencode(expected, Fletcher32Filter()) == bit_data
+
+    for Typ in (UInt8, Int32, Float32, Float64)
+        arr = rand(Typ, 100)
+        @test reinterpret(Typ, zdecode(zencode(arr, Fletcher32Filter()), Fletcher32Filter())) == arr
+    end
+
+    data = rand(100)
+    enc = zencode(data, Fletcher32Filter())
+    enc[begin] += 1
+    @test_throws "Checksum mismatch in Fletcher32 decoding" zdecode(enc, Fletcher32Filter())
+end
\ No newline at end of file
diff --git a/test/runtests.jl b/test/runtests.jl
index 78067b9..007defc 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -267,7 +267,7 @@ end
 
 include("storage.jl")
 
-
+include("Filters.jl")
 
 include("python.jl")
 

From 6a34368201e7c9ffba99613859c98b51c656d29a Mon Sep 17 00:00:00 2001
From: Anshul Singhvi <anshulsinghvi@gmail.com>
Date: Tue, 20 Aug 2024 10:09:29 -0700
Subject: [PATCH 06/31] re-add the dictionary entries for the vlen filters

---
 src/Filters/vlenfilters.jl | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/Filters/vlenfilters.jl b/src/Filters/vlenfilters.jl
index e9331f9..dad91df 100644
--- a/src/Filters/vlenfilters.jl
+++ b/src/Filters/vlenfilters.jl
@@ -18,6 +18,7 @@ struct VLenArrayFilter{T} <: Filter{T,UInt8} end
 
 JSON.lower(::VLenArrayFilter{T}) where T = Dict("id"=>"vlen-array","dtype"=> typestr(T) )
 getfilter(::Type{<:VLenArrayFilter}, f) = VLenArrayFilter{typestr(f["dtype"])}()
+filterdict["vlen-array"] = VLenArrayFilter
 
 function zdecode(ain, ::VLenArrayFilter{T}) where T
     f = IOBuffer(ain)
@@ -54,6 +55,7 @@ struct VLenUTF8Filter <: Filter{String, UInt8} end
 
 JSON.lower(::VLenUTF8Filter) = Dict("id"=>"vlen-utf8")
 getfilter(::Type{<:VLenUTF8Filter}, f) = VLenUTF8Filter()
+filterdict["vlen-utf8"] = VLenUTF8Filter
 
 function zdecode(ain, ::VLenUTF8Filter)
     f = IOBuffer(ain)

From fbf911e91179aeed3c22af812e0a788f6f9d29ee Mon Sep 17 00:00:00 2001
From: Anshul Singhvi <anshulsinghvi@gmail.com>
Date: Tue, 20 Aug 2024 15:15:39 -0700
Subject: [PATCH 07/31] Semi-working fixed scale offset filter

---
 src/Filters/Filters.jl          |  1 +
 src/Filters/fixedscaleoffset.jl | 74 +++++++++++++++++++++++++++++++++
 2 files changed, 75 insertions(+)
 create mode 100644 src/Filters/fixedscaleoffset.jl

diff --git a/src/Filters/Filters.jl b/src/Filters/Filters.jl
index 77d3764..72816d1 100644
--- a/src/Filters/Filters.jl
+++ b/src/Filters/Filters.jl
@@ -84,3 +84,4 @@ zencode(ain,::Nothing) = ain
 
 include("vlenfilters.jl")
 include("fletcher32.jl")
+include("fixedscaleoffset.jl")
diff --git a/src/Filters/fixedscaleoffset.jl b/src/Filters/fixedscaleoffset.jl
new file mode 100644
index 0000000..2109399
--- /dev/null
+++ b/src/Filters/fixedscaleoffset.jl
@@ -0,0 +1,74 @@
+
+"""
+    FixedScaleOffsetFilter{T,TENC}(scale, offset)
+
+A compressor that scales and offsets the data.
+"""
+struct FixedScaleOffsetFilter{ScaleOffsetType, T, Tenc} <: Filter{T, Tenc}
+    scale::ScaleOffsetType
+    offset::ScaleOffsetType
+end
+
+FixedScaleOffsetFilter{T}(scale::ScaleOffsetType, offset::ScaleOffsetType) where {T, ScaleOffsetType} = FixedScaleOffsetFilter{T, ScaleOffsetType}(scale, offset)
+FixedScaleOffsetFilter(scale::ScaleOffsetType, offset::ScaleOffsetType) where {ScaleOffsetType} = FixedScaleOffsetFilter{ScaleOffsetType, ScaleOffsetType}(scale, offset)
+
+function FixedScaleOffsetFilter(; scale::ScaleOffsetType, offset::ScaleOffsetType, T, Tenc = T) where ScaleOffsetType
+    return FixedScaleOffsetFilter{ScaleOffsetType, T, Tenc}(scale, offset)
+end
+
+# function zencode(a::AbstractArray, c::FixedScaleOffsetFilter{ScaleOffsetType, T, Tenc}) where {T, Tenc, ScaleOffsetType}
+#     return @. convert(Tenc, # convert to the encoding type after applying the scale and offset
+#         round((a - c.offset) * c.scale) # apply scale and offset, and round to nearest integer
+#     )
+# end
+
+function zdecode(a::AbstractArray, c::FixedScaleOffsetFilter{ScaleOffsetType, T, Tenc}) where {T, Tenc, ScaleOffsetType}
+    return _reinterpret(Base.nonmissingtype(T), @. a / c.scale + c.offset)
+end
+
+
+function getFilter(::Type{<: FixedScaleOffsetFilter}, d::Dict)
+    scale = d["scale"]
+    offset = d["offset"]
+    # Types must be converted from strings to the actual Julia types they represent.
+    string_T = d["dtype"]
+    string_Tenc = get(d, "atype", string_T)
+    T = typestr(string_T)
+    Tenc = typestr(string_Tenc)
+    return FixedScaleOffsetFilter{T, Tenc}(scale, offset)
+end
+
+function JSON.lower(c::FixedScaleOffsetFilter{ScaleOffsetType, T, Tenc}) where {ScaleOffsetType, T, Tenc}
+    return Dict("id" => "fixedscaleoffset", "scale" => c.scale, "offset" => c.offset, "dtype" => typestr(T), "atype" => typestr(Tenc))
+end
+
+filterdict["fixedscaleoffset"] = FixedScaleOffsetFilter
+
+#=
+# Tests
+
+arrays = [
+    LinRange{Float64}(1000, 1001, 1000),
+    randn(1000) .+ 1000,
+    reshape(LinRange{Float64}(1000, 1001, 1000), (100, 10)),
+    reshape(LinRange{Float64}(1000, 1001, 1000), (10, 10, 10)),
+]
+
+codecs = [
+    FixedScaleOffsetFilter(offset = 1000, scale = 1, T = Float64, Tenc = Int8),
+    FixedScaleOffsetFilter(offset = 1000, scale = 10^2, T = Float64, Tenc = Int16),
+    FixedScaleOffsetFilter(offset = 1000, scale = 10^6, T = Float64, Tenc = Int32),
+    FixedScaleOffsetFilter(offset = 1000, scale = 10^12, T = Float64, Tenc = Int64),
+    FixedScaleOffsetFilter(offset = 1000, scale = 10^12, T = Float64),
+]
+
+for array in arrays
+    for codec in codecs
+        encoded = zencode(array, codec)
+        decoded = zdecode(encoded, codec)
+        tolerance = round(Int, log10(codec.scale))
+        @test decoded ≈ array atol=tolerance
+    end
+end
+
+=#
\ No newline at end of file

From b960c3704090510ecb0f17ad098903ae0fc7ee11 Mon Sep 17 00:00:00 2001
From: Anshul Singhvi <anshulsinghvi@gmail.com>
Date: Thu, 22 Aug 2024 09:30:50 -0700
Subject: [PATCH 08/31] Add FixedScaleOffset tests

---
 src/Filters/fixedscaleoffset.jl | 39 +++++----------------------------
 test/Filters.jl                 | 31 +++++++++++++++++++++++++-
 2 files changed, 35 insertions(+), 35 deletions(-)

diff --git a/src/Filters/fixedscaleoffset.jl b/src/Filters/fixedscaleoffset.jl
index 2109399..80be305 100644
--- a/src/Filters/fixedscaleoffset.jl
+++ b/src/Filters/fixedscaleoffset.jl
@@ -16,11 +16,11 @@ function FixedScaleOffsetFilter(; scale::ScaleOffsetType, offset::ScaleOffsetTyp
     return FixedScaleOffsetFilter{ScaleOffsetType, T, Tenc}(scale, offset)
 end
 
-# function zencode(a::AbstractArray, c::FixedScaleOffsetFilter{ScaleOffsetType, T, Tenc}) where {T, Tenc, ScaleOffsetType}
-#     return @. convert(Tenc, # convert to the encoding type after applying the scale and offset
-#         round((a - c.offset) * c.scale) # apply scale and offset, and round to nearest integer
-#     )
-# end
+function zencode(a::AbstractArray, c::FixedScaleOffsetFilter{ScaleOffsetType, T, Tenc}) where {T, Tenc, ScaleOffsetType}
+    return @. convert(Tenc, # convert to the encoding type after applying the scale and offset
+        round((a - c.offset) * c.scale) # apply scale and offset, and round to nearest integer
+    )
+end
 
 function zdecode(a::AbstractArray, c::FixedScaleOffsetFilter{ScaleOffsetType, T, Tenc}) where {T, Tenc, ScaleOffsetType}
     return _reinterpret(Base.nonmissingtype(T), @. a / c.scale + c.offset)
@@ -43,32 +43,3 @@ function JSON.lower(c::FixedScaleOffsetFilter{ScaleOffsetType, T, Tenc}) where {
 end
 
 filterdict["fixedscaleoffset"] = FixedScaleOffsetFilter
-
-#=
-# Tests
-
-arrays = [
-    LinRange{Float64}(1000, 1001, 1000),
-    randn(1000) .+ 1000,
-    reshape(LinRange{Float64}(1000, 1001, 1000), (100, 10)),
-    reshape(LinRange{Float64}(1000, 1001, 1000), (10, 10, 10)),
-]
-
-codecs = [
-    FixedScaleOffsetFilter(offset = 1000, scale = 1, T = Float64, Tenc = Int8),
-    FixedScaleOffsetFilter(offset = 1000, scale = 10^2, T = Float64, Tenc = Int16),
-    FixedScaleOffsetFilter(offset = 1000, scale = 10^6, T = Float64, Tenc = Int32),
-    FixedScaleOffsetFilter(offset = 1000, scale = 10^12, T = Float64, Tenc = Int64),
-    FixedScaleOffsetFilter(offset = 1000, scale = 10^12, T = Float64),
-]
-
-for array in arrays
-    for codec in codecs
-        encoded = zencode(array, codec)
-        decoded = zdecode(encoded, codec)
-        tolerance = round(Int, log10(codec.scale))
-        @test decoded ≈ array atol=tolerance
-    end
-end
-
-=#
\ No newline at end of file
diff --git a/test/Filters.jl b/test/Filters.jl
index 426390d..002a73f 100644
--- a/test/Filters.jl
+++ b/test/Filters.jl
@@ -28,4 +28,33 @@ using Zarr: Fletcher32Filter
     enc = zencode(data, Fletcher32Filter())
     enc[begin] += 1
     @test_throws "Checksum mismatch in Fletcher32 decoding" zdecode(enc, Fletcher32Filter())
-end
\ No newline at end of file
+end
+
+#=
+@testset "FixedScaleOffsetFilter" begin
+    arrays = [
+        LinRange{Float64}(1000, 1001, 1000),
+        randn(1000) .+ 1000,
+        reshape(LinRange{Float64}(1000, 1001, 1000), (100, 10)),
+        reshape(LinRange{Float64}(1000, 1001, 1000), (10, 10, 10)),
+    ]
+
+    codecs = [
+        FixedScaleOffsetFilter(offset = 1000, scale = 1, T = Float64, Tenc = Int8),
+        FixedScaleOffsetFilter(offset = 1000, scale = 10^2, T = Float64, Tenc = Int16),
+        FixedScaleOffsetFilter(offset = 1000, scale = 10^6, T = Float64, Tenc = Int32),
+        FixedScaleOffsetFilter(offset = 1000, scale = 10^12, T = Float64, Tenc = Int64),
+        FixedScaleOffsetFilter(offset = 1000, scale = 10^12, T = Float64),
+    ]
+
+    for array in arrays
+        for codec in codecs
+            encoded = Zarr.zencode(array, codec)
+            decoded = Zarr.zdecode(encoded, codec)
+            decimal = round(log10(codec.scale))
+            @test decoded ≈ array rtol=1.5*10^(-decimal)
+        end
+    end
+end
+
+=#
\ No newline at end of file

From dcae156cabb5742359b9eb1c9731384a2d0f56db Mon Sep 17 00:00:00 2001
From: Anshul Singhvi <anshulsinghvi@gmail.com>
Date: Thu, 22 Aug 2024 09:31:27 -0700
Subject: [PATCH 09/31] Add shuffle filter (buggy in the last few bytes,
 indexing issues)

---
 src/Filters/Filters.jl |  1 +
 src/Filters/shuffle.jl | 70 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 71 insertions(+)
 create mode 100644 src/Filters/shuffle.jl

diff --git a/src/Filters/Filters.jl b/src/Filters/Filters.jl
index 72816d1..d6610a8 100644
--- a/src/Filters/Filters.jl
+++ b/src/Filters/Filters.jl
@@ -85,3 +85,4 @@ zencode(ain,::Nothing) = ain
 include("vlenfilters.jl")
 include("fletcher32.jl")
 include("fixedscaleoffset.jl")
+include("shuffle.jl")
diff --git a/src/Filters/shuffle.jl b/src/Filters/shuffle.jl
new file mode 100644
index 0000000..2a4c82f
--- /dev/null
+++ b/src/Filters/shuffle.jl
@@ -0,0 +1,70 @@
+#=
+# Shuffle compression
+
+This file implements the shuffle compressor.
+=#
+
+struct ShuffleFilter <: Filter{UInt8, UInt8}
+    elementsize::Csize_t
+end
+
+ShuffleFilter(; elementsize = 4) = ShuffleFilter(elementsize)
+
+function _do_shuffle!(dest::AbstractVector{UInt8}, source::AbstractVector{UInt8}, elementsize::Csize_t)
+    count = fld(length(source)-1, elementsize) # elementsize is in bytes, so this works
+    for i in 0:(count-1)
+        offset = i * elementsize
+        for byte_index in 0:(elementsize-1)
+            j = byte_index * count + i
+            dest[j+1] = source[offset + byte_index+1]
+        end
+    end
+end
+
+function _do_unshuffle!(dest::AbstractVector{UInt8}, source::AbstractVector{UInt8}, elementsize::Csize_t)
+    count = fld(length(source)-1, elementsize) # elementsize is in bytes, so this works
+    for i in 0:(elementsize-1)
+        offset = i * count
+        for byte_index in 0:(count-1)
+            j = byte_index * elementsize + i
+            dest[j+1] = source[offset + byte_index+1]
+        end
+    end
+end
+
+function zencode(a::AbstractArray, c::ShuffleFilter)
+    if c.elementsize <= 1 # no shuffling needed if elementsize is 1
+        return a
+    end
+    source = reinterpret(UInt8, vec(a))
+    dest = Vector{UInt8}(undef, length(source))
+    _do_shuffle!(dest, source, c.elementsize)
+    return dest
+end
+
+function zdecode(a::AbstractArray, c::ShuffleFilter)
+    if c.elementsize <= 1 # no shuffling needed if elementsize is 1
+        return a
+    end
+    source = reinterpret(UInt8, vec(a))
+    dest = Vector{UInt8}(undef, length(source))
+    _do_unshuffle!(dest, source, c.elementsize)
+    return dest
+end
+
+function getFilter(::Type{ShuffleFilter}, d::Dict)
+    return ShuffleFilter(d["elementsize"])
+end
+
+function JSON.lower(c::ShuffleFilter)
+    return Dict("id" => "shuffle", "elementsize" => Int64(c.elementsize))
+end
+
+filterdict["shuffle"] = ShuffleFilter
+#=
+
+# Tests
+
+
+    
+=#
\ No newline at end of file

From 7a5a5a06f81797af7b0a615e1920bff08ef64c0c Mon Sep 17 00:00:00 2001
From: Anshul Singhvi <anshulsinghvi@gmail.com>
Date: Thu, 22 Aug 2024 09:31:50 -0700
Subject: [PATCH 10/31] WIP quantize filter

---
 src/Filters/quantize.jl | 42 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)
 create mode 100644 src/Filters/quantize.jl

diff --git a/src/Filters/quantize.jl b/src/Filters/quantize.jl
new file mode 100644
index 0000000..ac4ef61
--- /dev/null
+++ b/src/Filters/quantize.jl
@@ -0,0 +1,42 @@
+#=
+# Quantize compression
+
+
+=#
+
+"""
+    QuantizeFilter(; digits, DecodingType, [EncodingType = DecodingType])
+
+Quantization based compression for Zarr arrays.
+"""
+struct QuantizeFilter{T, TENC} <: Filter{T, TENC}
+    digits::Int32
+end
+
+function QuantizeFilter(; digits = 10, T = Float16, Tenc = DecodingType)
+    return QuantizeFilter{T, Tenc}(digits)
+end
+
+function zencode(data::AbstractArray, filter::QuantizeFilter{DecodingType, EncodingType}) where {DecodingType, EncodingType}
+    arr = reinterpret(DecodingType, vec(data))
+
+    precision = 10^(-filter.digits)
+
+    _exponent = log(precision, 10)
+    exponent = _exponent < 0 ? floor(Int, _exponent) : ceil(Int, _exponent)
+
+    bits = ceil(log(10^(-exponent), 2))
+    scale = 2^bits
+
+    enc = @. round(scale * arr) / scale
+
+    if EncodingType == DecodingType
+        return enc
+    else
+        return reinterpret(EncodingType, enc)
+    end
+end
+
+function zdecode(data::AbstractArray, filter::QuantizeFilter{DecodingType, EncodingType}) where {DecodingType, EncodingType}
+    return data
+end
\ No newline at end of file

From 231c0a1aaf92305371e0de65500fb70c938c4272 Mon Sep 17 00:00:00 2001
From: Anshul Singhvi <anshulsinghvi@gmail.com>
Date: Fri, 23 Aug 2024 14:39:38 -0700
Subject: [PATCH 11/31] ShuffleFilter working and tested

---
 src/Filters/shuffle.jl |  4 ++--
 test/Filters.jl        | 37 +++++++++++++++++++++++++++++++++++--
 2 files changed, 37 insertions(+), 4 deletions(-)

diff --git a/src/Filters/shuffle.jl b/src/Filters/shuffle.jl
index 2a4c82f..0dc49a7 100644
--- a/src/Filters/shuffle.jl
+++ b/src/Filters/shuffle.jl
@@ -11,7 +11,7 @@ end
 ShuffleFilter(; elementsize = 4) = ShuffleFilter(elementsize)
 
 function _do_shuffle!(dest::AbstractVector{UInt8}, source::AbstractVector{UInt8}, elementsize::Csize_t)
-    count = fld(length(source)-1, elementsize) # elementsize is in bytes, so this works
+    count = fld(length(source), elementsize) # elementsize is in bytes, so this works
     for i in 0:(count-1)
         offset = i * elementsize
         for byte_index in 0:(elementsize-1)
@@ -22,7 +22,7 @@ function _do_shuffle!(dest::AbstractVector{UInt8}, source::AbstractVector{UInt8}
 end
 
 function _do_unshuffle!(dest::AbstractVector{UInt8}, source::AbstractVector{UInt8}, elementsize::Csize_t)
-    count = fld(length(source)-1, elementsize) # elementsize is in bytes, so this works
+    count = fld(length(source), elementsize) # elementsize is in bytes, so this works
     for i in 0:(elementsize-1)
         offset = i * count
         for byte_index in 0:(count-1)
diff --git a/test/Filters.jl b/test/Filters.jl
index 002a73f..89656cf 100644
--- a/test/Filters.jl
+++ b/test/Filters.jl
@@ -1,7 +1,8 @@
 using Test
+using Zarr: DateTime64 # for datetime reinterpret
 
 using Zarr: zencode, zdecode
-using Zarr: Fletcher32Filter
+using Zarr: Fletcher32Filter, FixedScaleOffsetFilter, ShuffleFilter
 
 @testset "Fletcher32Filter" begin
     # These tests are copied exactly from the [`numcodecs`](https://github.com/zarr-developers/numcodecs/) Python package,
@@ -56,5 +57,37 @@ end
         end
     end
 end
+=#
+@testset "ShuffleFilter" begin
 
-=#
\ No newline at end of file
+    codecs = [
+        ShuffleFilter(),
+        ShuffleFilter(elementsize=0),
+        ShuffleFilter(elementsize=4),
+        ShuffleFilter(elementsize=8),
+    ]
+
+    arrays = [
+        Int32.(collect(1:1000)),                                                                # equivalent to np.arange(1000, dtype='i4')
+        LinRange(1000, 1001, 1000),                                                     # equivalent to np.linspace(1000, 1001, 1000, dtype='f8')
+        reshape(randn(1000) .* 1 .+ 1000, (100, 10)),                                   # equivalent to np.random.normal(loc=1000, scale=1, size=(100, 10))
+        reshape(rand(Bool, 1000), (10, 100)),                                           # equivalent to np.random.randint(0, 2, size=1000, dtype=bool).reshape(100, 10, order='F')
+        reshape(rand(Zarr.MaxLengthString{3, UInt8}["a", "bb", "ccc"], 1000), (10, 10, 10)),                          # equivalent to np.random.choice([b'a', b'bb', b'ccc'], size=1000).reshape(10, 10, 10)
+        reinterpret(DateTime64{Dates.Nanosecond}, rand(UInt64(0):UInt64(2^60)-1, 1000)), # equivalent to np.random.randint(0, 2**60, size=1000, dtype='u8').view('M8[ns]')
+        Nanosecond.(rand(UInt64(0):UInt64(2^60-1), 1000)),                              # equivalent to np.random.randint(0, 2**60, size=1000, dtype='u8').view('m8[ns]')
+        reinterpret(DateTime64{Dates.Minute}, rand(UInt64(0):UInt64(2^25-1), 1000)),    # equivalent to np.random.randint(0, 2**25, size=1000, dtype='u8').view('M8[m]')
+        Minute.(rand(UInt64(0):UInt64(2^25-1), 1000)),                                  # equivalent to np.random.randint(0, 2**25, size=1000, dtype='u8').view('m8[m]')
+        reinterpret(DateTime64{Dates.Nanosecond}, rand(Int64(-(2^63)):Int64(-(2^63)+20), 1000)),    # equivalent to np.random.randint(-(2**63), -(2**63) + 20, size=1000, dtype='i8').view('M8[ns]')
+        Nanosecond.(rand(Int64(-(2^63)):Int64(-(2^63)+20), 1000)),                      # equivalent to np.random.randint(-(2**63), -(2**63) + 20, size=1000, dtype='i8').view('m8[ns]')
+        reinterpret(DateTime64{Dates.Minute}, rand(Int64(-(2^63)):Int64(-(2^63)+20), 1000)),    # equivalent to np.random.randint(-(2**63), -(2**63) + 20, size=1000, dtype='i8').view('M8[m]')
+        Minute.(rand(Int64(-(2^63)):Int64(-(2^63)+20), 1000)),                          # equivalent to np.random.randint(-(2**63), -(2**63) + 20, size=1000, dtype='i8').view('m8[m]')
+    ]
+
+    for codec in codecs
+        for array in arrays
+            encoded = Zarr.zencode(array, codec)
+            decoded = reshape(reinterpret(eltype(array), Zarr.zdecode(encoded, codec)), size(array))
+            @test decoded == array
+        end
+    end
+end

From ecdbeea5f9ebdd94f80048fc2eff7730dcaa8fa8 Mon Sep 17 00:00:00 2001
From: Anshul Singhvi <anshulsinghvi@gmail.com>
Date: Fri, 23 Aug 2024 15:03:53 -0700
Subject: [PATCH 12/31] Semi working quantize filter

---
 src/Filters/Filters.jl  |  1 +
 src/Filters/quantize.jl | 13 ++++++++-----
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/src/Filters/Filters.jl b/src/Filters/Filters.jl
index d6610a8..3161bac 100644
--- a/src/Filters/Filters.jl
+++ b/src/Filters/Filters.jl
@@ -86,3 +86,4 @@ include("vlenfilters.jl")
 include("fletcher32.jl")
 include("fixedscaleoffset.jl")
 include("shuffle.jl")
+include("quantize.jl")
\ No newline at end of file
diff --git a/src/Filters/quantize.jl b/src/Filters/quantize.jl
index ac4ef61..73693a8 100644
--- a/src/Filters/quantize.jl
+++ b/src/Filters/quantize.jl
@@ -13,20 +13,23 @@ struct QuantizeFilter{T, TENC} <: Filter{T, TENC}
     digits::Int32
 end
 
-function QuantizeFilter(; digits = 10, T = Float16, Tenc = DecodingType)
+function QuantizeFilter(; digits = 10, T = Float16, Tenc = T)
     return QuantizeFilter{T, Tenc}(digits)
 end
 
+QuantizeFilter{T, Tenc}(; digits = 10) where {T, Tenc} = QuantizeFilter{T, Tenc}(digits)
+QuantizeFilter{T}(; digits = 10) where T = QuantizeFilter{T, T}(digits)
+
 function zencode(data::AbstractArray, filter::QuantizeFilter{DecodingType, EncodingType}) where {DecodingType, EncodingType}
     arr = reinterpret(DecodingType, vec(data))
 
-    precision = 10^(-filter.digits)
+    precision = 10.0^(-filter.digits)
 
-    _exponent = log(precision, 10)
+    _exponent = log(10, precision) # log 10 in base `precision`
     exponent = _exponent < 0 ? floor(Int, _exponent) : ceil(Int, _exponent)
 
-    bits = ceil(log(10^(-exponent), 2))
-    scale = 2^bits
+    bits = ceil(log(2, 10.0^(-exponent)))
+    scale = 2.0^bits
 
     enc = @. round(scale * arr) / scale
 

From 5b8210fdc83f9a1b4266bced60ecb2752bdffd0e Mon Sep 17 00:00:00 2001
From: Anshul Singhvi <anshulsinghvi@gmail.com>
Date: Fri, 23 Aug 2024 15:04:07 -0700
Subject: [PATCH 13/31] Format tests better

---
 test/Filters.jl | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/test/Filters.jl b/test/Filters.jl
index 89656cf..a9b0bbc 100644
--- a/test/Filters.jl
+++ b/test/Filters.jl
@@ -2,7 +2,7 @@ using Test
 using Zarr: DateTime64 # for datetime reinterpret
 
 using Zarr: zencode, zdecode
-using Zarr: Fletcher32Filter, FixedScaleOffsetFilter, ShuffleFilter
+using Zarr: Fletcher32Filter, FixedScaleOffsetFilter, ShuffleFilter, QuantizeFilter
 
 @testset "Fletcher32Filter" begin
     # These tests are copied exactly from the [`numcodecs`](https://github.com/zarr-developers/numcodecs/) Python package,
@@ -68,11 +68,11 @@ end
     ]
 
     arrays = [
-        Int32.(collect(1:1000)),                                                                # equivalent to np.arange(1000, dtype='i4')
+        Int32.(collect(1:1000)),                                                        # equivalent to np.arange(1000, dtype='i4')
         LinRange(1000, 1001, 1000),                                                     # equivalent to np.linspace(1000, 1001, 1000, dtype='f8')
         reshape(randn(1000) .* 1 .+ 1000, (100, 10)),                                   # equivalent to np.random.normal(loc=1000, scale=1, size=(100, 10))
         reshape(rand(Bool, 1000), (10, 100)),                                           # equivalent to np.random.randint(0, 2, size=1000, dtype=bool).reshape(100, 10, order='F')
-        reshape(rand(Zarr.MaxLengthString{3, UInt8}["a", "bb", "ccc"], 1000), (10, 10, 10)),                          # equivalent to np.random.choice([b'a', b'bb', b'ccc'], size=1000).reshape(10, 10, 10)
+        reshape(rand(Zarr.MaxLengthString{3, UInt8}["a", "bb", "ccc"], 1000), (10, 10, 10)), # equivalent to np.random.choice([b'a', b'bb', b'ccc'], size=1000).reshape(10, 10, 10)
         reinterpret(DateTime64{Dates.Nanosecond}, rand(UInt64(0):UInt64(2^60)-1, 1000)), # equivalent to np.random.randint(0, 2**60, size=1000, dtype='u8').view('M8[ns]')
         Nanosecond.(rand(UInt64(0):UInt64(2^60-1), 1000)),                              # equivalent to np.random.randint(0, 2**60, size=1000, dtype='u8').view('m8[ns]')
         reinterpret(DateTime64{Dates.Minute}, rand(UInt64(0):UInt64(2^25-1), 1000)),    # equivalent to np.random.randint(0, 2**25, size=1000, dtype='u8').view('M8[m]')

From 16306bedefac232a90ef3e2c4f3febc4e23666b2 Mon Sep 17 00:00:00 2001
From: Anshul Singhvi <anshulsinghvi@gmail.com>
Date: Fri, 23 Aug 2024 15:14:10 -0700
Subject: [PATCH 14/31] Complete interface and test quantize

---
 src/Filters/quantize.jl | 13 +++++++++++-
 test/Filters.jl         | 44 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 56 insertions(+), 1 deletion(-)

diff --git a/src/Filters/quantize.jl b/src/Filters/quantize.jl
index 73693a8..d74e3f5 100644
--- a/src/Filters/quantize.jl
+++ b/src/Filters/quantize.jl
@@ -40,6 +40,17 @@ function zencode(data::AbstractArray, filter::QuantizeFilter{DecodingType, Encod
     end
 end
 
+# Decoding is a no-op; quantization is a lossy filter but data is encoded directly.
 function zdecode(data::AbstractArray, filter::QuantizeFilter{DecodingType, EncodingType}) where {DecodingType, EncodingType}
     return data
-end
\ No newline at end of file
+end
+
+function JSON.lower(filter::QuantizeFilter{T, Tenc}) where {T, Tenc}
+    return Dict("type" => "quantize", "digits" => filter.digits, "dtype" => typestring(T), "atype" => typestring(Tenc))
+end
+
+function getFilter(::Type{<: QuantizeFilter}, d)
+    return QuantizeFilter{typestr(d["dtype"], typestr(d["atype"]))}(; digits = d["digits"])
+end
+
+filterdict["quantize"] = QuantizeFilter
\ No newline at end of file
diff --git a/test/Filters.jl b/test/Filters.jl
index a9b0bbc..a7cd31c 100644
--- a/test/Filters.jl
+++ b/test/Filters.jl
@@ -91,3 +91,47 @@ end
         end
     end
 end
+
+
+@testset "QuantizeFilter" begin
+
+    codecs = [
+        QuantizeFilter{Float64, Float16}(digits=-1),
+        QuantizeFilter{Float64, Float16}(digits=0),
+        QuantizeFilter{Float64, Float16}(digits=1),
+        QuantizeFilter{Float64, Float32}(digits=5),
+        QuantizeFilter{Float64, Float64}(digits=12),
+    ]
+
+    arrays = [
+        LinRange(100, 200, 1000),                         # np.linspace(100, 200, 1000, dtype='<f8')
+        randn(1000) .+ 0,                                 # np.random.normal(loc=0, scale=1, size=1000).astype('<f8')
+        reshape(LinRange(100, 200, 1000), (100, 10)),     # np.linspace(100, 200, 1000, dtype='<f8').reshape(100, 10)
+        permutedims(reshape(LinRange(100, 200, 1000), (10, 100))),  # np.linspace(100, 200, 1000, dtype='<f8').reshape(100, 10, order='F')
+        reshape(LinRange(100, 200, 1000), (10, 10, 10)),  # np.linspace(100, 200, 1000, dtype='<f8').reshape(10, 10, 10)
+    ]
+
+    @testset "Encoding accuracy" begin
+        for codec in codecs
+            @testset "$(codec.digits) digits" begin
+                for array in arrays
+                    encoded = Zarr.zencode(array, codec)
+                    decoded = reshape(reinterpret(eltype(array), Zarr.zdecode(encoded, codec)), size(array))
+                    @test decoded ≈ array rtol=(1.5*10.0^(-codec.digits))
+                end
+            end
+        end
+    end
+
+    @testset "Decode is a no-op" begin
+        for codec in codecs
+            @testset "$(codec.digits) digits" begin
+                for array in arrays
+                    encoded = Zarr.zencode(array, codec)
+                    decoded = Zarr.zdecode(encoded, codec)
+                    @test decoded === encoded
+                end
+            end
+        end
+    end
+end
\ No newline at end of file

From eec1b0df820ad5735a88a54c8d44a8ec7a44ef2b Mon Sep 17 00:00:00 2001
From: Anshul Singhvi <anshulsinghvi@gmail.com>
Date: Fri, 23 Aug 2024 21:35:38 -0700
Subject: [PATCH 15/31] Uncomment the FixedScaleOffset tests

---
 test/Filters.jl | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/test/Filters.jl b/test/Filters.jl
index a7cd31c..24ee7d4 100644
--- a/test/Filters.jl
+++ b/test/Filters.jl
@@ -31,7 +31,6 @@ using Zarr: Fletcher32Filter, FixedScaleOffsetFilter, ShuffleFilter, QuantizeFil
     @test_throws "Checksum mismatch in Fletcher32 decoding" zdecode(enc, Fletcher32Filter())
 end
 
-#=
 @testset "FixedScaleOffsetFilter" begin
     arrays = [
         LinRange{Float64}(1000, 1001, 1000),
@@ -57,7 +56,7 @@ end
         end
     end
 end
-=#
+
 @testset "ShuffleFilter" begin
 
     codecs = [

From 42995b2eed37f20086b5c21485843b32b1f135a0 Mon Sep 17 00:00:00 2001
From: Anshul Singhvi <anshulsinghvi@gmail.com>
Date: Wed, 28 Aug 2024 16:51:06 -0700
Subject: [PATCH 16/31] fix getfilter syntax

---
 src/Filters/fixedscaleoffset.jl | 2 +-
 src/Filters/fletcher32.jl       | 2 +-
 src/Filters/quantize.jl         | 2 +-
 src/Filters/shuffle.jl          | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/Filters/fixedscaleoffset.jl b/src/Filters/fixedscaleoffset.jl
index 80be305..66582a9 100644
--- a/src/Filters/fixedscaleoffset.jl
+++ b/src/Filters/fixedscaleoffset.jl
@@ -27,7 +27,7 @@ function zdecode(a::AbstractArray, c::FixedScaleOffsetFilter{ScaleOffsetType, T,
 end
 
 
-function getFilter(::Type{<: FixedScaleOffsetFilter}, d::Dict)
+function getfilter(::Type{<: FixedScaleOffsetFilter}, d::Dict)
     scale = d["scale"]
     offset = d["offset"]
     # Types must be converted from strings to the actual Julia types they represent.
diff --git a/src/Filters/fletcher32.jl b/src/Filters/fletcher32.jl
index 4d2bda7..3241894 100644
--- a/src/Filters/fletcher32.jl
+++ b/src/Filters/fletcher32.jl
@@ -19,7 +19,7 @@ the checksum and cropping the last 4 bytes of the data during decoding.
 struct Fletcher32Filter <: Filter{UInt8, UInt8}
 end
 
-getFilter(::Type{<: Fletcher32Filter}, d::Dict) = Fletcher32Filter()
+getfilter(::Type{<: Fletcher32Filter}, d::Dict) = Fletcher32Filter()
 JSON.lower(::Fletcher32Filter) = Dict("id" => "fletcher32")
 filterdict["fletcher32"] = Fletcher32Filter
 
diff --git a/src/Filters/quantize.jl b/src/Filters/quantize.jl
index d74e3f5..ea00dcb 100644
--- a/src/Filters/quantize.jl
+++ b/src/Filters/quantize.jl
@@ -49,7 +49,7 @@ function JSON.lower(filter::QuantizeFilter{T, Tenc}) where {T, Tenc}
     return Dict("type" => "quantize", "digits" => filter.digits, "dtype" => typestring(T), "atype" => typestring(Tenc))
 end
 
-function getFilter(::Type{<: QuantizeFilter}, d)
+function getfilter(::Type{<: QuantizeFilter}, d)
     return QuantizeFilter{typestr(d["dtype"], typestr(d["atype"]))}(; digits = d["digits"])
 end
 
diff --git a/src/Filters/shuffle.jl b/src/Filters/shuffle.jl
index 0dc49a7..6a01f5d 100644
--- a/src/Filters/shuffle.jl
+++ b/src/Filters/shuffle.jl
@@ -52,7 +52,7 @@ function zdecode(a::AbstractArray, c::ShuffleFilter)
     return dest
 end
 
-function getFilter(::Type{ShuffleFilter}, d::Dict)
+function getfilter(::Type{ShuffleFilter}, d::Dict)
     return ShuffleFilter(d["elementsize"])
 end
 

From 594ffdcaf59075f4cd81144505699f10b2b23528 Mon Sep 17 00:00:00 2001
From: Anshul Singhvi <anshulsinghvi@gmail.com>
Date: Thu, 29 Aug 2024 15:58:05 -0700
Subject: [PATCH 17/31] Add delta filter

---
 src/Filters/Filters.jl |  3 ++-
 src/Filters/delta.jl   | 45 ++++++++++++++++++++++++++++++++++++++++++
 test/Filters.jl        | 29 ++++++++++++++++++++++++++-
 3 files changed, 75 insertions(+), 2 deletions(-)
 create mode 100644 src/Filters/delta.jl

diff --git a/src/Filters/Filters.jl b/src/Filters/Filters.jl
index 3161bac..7f7a394 100644
--- a/src/Filters/Filters.jl
+++ b/src/Filters/Filters.jl
@@ -86,4 +86,5 @@ include("vlenfilters.jl")
 include("fletcher32.jl")
 include("fixedscaleoffset.jl")
 include("shuffle.jl")
-include("quantize.jl")
\ No newline at end of file
+include("quantize.jl")
+include("delta.jl")
diff --git a/src/Filters/delta.jl b/src/Filters/delta.jl
new file mode 100644
index 0000000..ccc7cf5
--- /dev/null
+++ b/src/Filters/delta.jl
@@ -0,0 +1,45 @@
+#=
+# Delta compression
+
+
+=#
+
+"""
+    DeltaFilter(; DecodingType, [EncodingType = DecodingType])
+
+Delta-based compression for Zarr arrays.  (Delta encoding is Julia `diff`, decoding is Julia `cumsum`).
+"""
+struct DeltaFilter{T, TENC} <: Filter{T, TENC}
+end
+
+function DeltaFilter(; DecodingType = Float16, EncodingType = DecodingType)
+    return DeltaFilter{DecodingType, EncodingType}()
+end
+
+DeltaFilter{T}() where T = DeltaFilter{T, T}()
+
+function zencode(data::AbstractArray, filter::DeltaFilter{DecodingType, EncodingType}) where {DecodingType, EncodingType}
+    arr = reinterpret(DecodingType, vec(data))
+
+    enc = similar(arr, EncodingType)
+    # perform the delta operation
+    enc[begin] = arr[begin]
+    enc[begin+1:end] .= diff(arr)
+    return enc
+end
+
+function zdecode(data::AbstractArray, filter::DeltaFilter{DecodingType, EncodingType}) where {DecodingType, EncodingType}
+    encoded = reinterpret(EncodingType, vec(data))
+    decoded = DecodingType.(cumsum(encoded))
+    return decoded
+end
+
+function JSON.lower(filter::DeltaFilter{T, Tenc}) where {T, Tenc}
+    return Dict("type" => "delta", "dtype" => typestring(T), "atype" => typestring(Tenc))
+end
+
+function getfilter(::Type{<: DeltaFilter}, d)
+    return DeltaFilter{typestr(d["dtype"], haskey(d, "atype") ? typestr(d["atype"]) : d["dtype"])}()
+end
+
+filterdict["delta"] = DeltaFilter
\ No newline at end of file
diff --git a/test/Filters.jl b/test/Filters.jl
index 24ee7d4..1bec170 100644
--- a/test/Filters.jl
+++ b/test/Filters.jl
@@ -2,7 +2,7 @@ using Test
 using Zarr: DateTime64 # for datetime reinterpret
 
 using Zarr: zencode, zdecode
-using Zarr: Fletcher32Filter, FixedScaleOffsetFilter, ShuffleFilter, QuantizeFilter
+using Zarr: Fletcher32Filter, FixedScaleOffsetFilter, ShuffleFilter, QuantizeFilter, DeltaFilter
 
 @testset "Fletcher32Filter" begin
     # These tests are copied exactly from the [`numcodecs`](https://github.com/zarr-developers/numcodecs/) Python package,
@@ -133,4 +133,31 @@ end
             end
         end
     end
+end
+
+@testset "DeltaFilter" begin
+    
+    arrays = [
+        Int32.(collect(0:999)),  # np.arange(1000, dtype='<i4')
+        Float32.(reshape(LinRange(1000, 1001, 1000), (100, 10))),  # np.linspace(1000, 1001, 1000, dtype='<f4').reshape(100, 10)
+        Float64.(reshape(randn(1000) .* 1 .+ 1000, (10, 10, 10))),  # np.random.normal(loc=1000, scale=1, size=(10, 10, 10)).astype('<f8')
+        UInt16.(permutedims(reshape(rand(UInt16, 1000) .% 200, (100, 10))))  # np.random.randint(0, 200, size=1000, dtype='u2').astype('<u2').reshape(100, 10, order='F')
+    ]
+
+    for array in arrays
+        encoded = Zarr.zencode(array, DeltaFilter{eltype(array)}())
+        decoded = Zarr.zdecode(encoded, DeltaFilter{eltype(array)}())
+        @test decoded == array
+    end
+
+    @testset "DeltaFilter with different dtypes" begin
+        dtype = Int64
+        astype = Int32
+        codec = Zarr.DeltaFilter{dtype, astype}()
+        arr = collect(Int64, 10:19)
+        expect = Int32[10; fill(1, 9)]
+        actual = Zarr.zencode(arr, codec)
+        @test Int64.(expect) == Int64.(actual)
+        @test eltype(actual) == astype
+    end
 end
\ No newline at end of file

From d7ce424db84952a137e8dc62a3a3217b6aae2c24 Mon Sep 17 00:00:00 2001
From: Anshul Singhvi <anshulsinghvi@gmail.com>
Date: Wed, 4 Sep 2024 11:44:10 -0700
Subject: [PATCH 18/31] Adapt for Kerchunk playing fast and loose with the spec

- Kerchunk often encodes the compressor as the last filter, so we check that the compressor isn't hiding in the filters array if the compressor is null.
- Similarly, the dtype is often unknown in this case, or the transform is not encoded correctly, so we ensure that the datatypes of `data` and `a2` remain the same by reinterpreting.
---
 src/Compressors.jl | 4 ++--
 src/metadata.jl    | 6 ++++++
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/Compressors.jl b/src/Compressors.jl
index b54e97a..ebf0a81 100644
--- a/src/Compressors.jl
+++ b/src/Compressors.jl
@@ -30,11 +30,11 @@ function zcompress!(compressed, data, c, f)
 end
 
 function zuncompress!(data, compressed, c, f)
-    data2 = zuncompress(compressed, c, desttype(last(f))) 
+    data2 = zuncompress(compressed, c, desttype(last(f)))
     a2 = foldr(f, init = data2) do fnow, anow
         zdecode(anow, fnow)
     end
-    copyto!(data, a2)
+    copyto!(data, _reinterpret(Base.nonmissingtype(eltype(data)), a2))
 end
 
 
diff --git a/src/metadata.jl b/src/metadata.jl
index f3dc5df..51bb382 100644
--- a/src/metadata.jl
+++ b/src/metadata.jl
@@ -156,6 +156,12 @@ function Metadata(d::AbstractDict, fill_as_missing)
     # create a Metadata struct from it
 
     compdict = d["compressor"]
+    if isnothing(compdict)
+        # try the last filter, for Kerchunk compat
+        if !isnothing(d["filters"]) && haskey(compressortypes, d["filters"][end]["id"])
+            compdict = pop!(d["filters"]) # TODO: this will not work with JSON3!
+        end
+    end
     compressor = getCompressor(compdict)
 
     filters = getfilters(d)

From 7518c43caa3525b710ded083efc8bc18f6d9c62d Mon Sep 17 00:00:00 2001
From: Anshul Singhvi <anshulsinghvi@gmail.com>
Date: Wed, 9 Oct 2024 15:47:08 -0700
Subject: [PATCH 19/31] Fix the delta and quantize JSON.lower

---
 src/Filters/delta.jl    | 2 +-
 src/Filters/quantize.jl | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Filters/delta.jl b/src/Filters/delta.jl
index ccc7cf5..f7cdc3d 100644
--- a/src/Filters/delta.jl
+++ b/src/Filters/delta.jl
@@ -35,7 +35,7 @@ function zdecode(data::AbstractArray, filter::DeltaFilter{DecodingType, Encoding
 end
 
 function JSON.lower(filter::DeltaFilter{T, Tenc}) where {T, Tenc}
-    return Dict("type" => "delta", "dtype" => typestring(T), "atype" => typestring(Tenc))
+    return Dict("id" => "delta", "dtype" => typestr(T), "atype" => typestr(Tenc))
 end
 
 function getfilter(::Type{<: DeltaFilter}, d)
diff --git a/src/Filters/quantize.jl b/src/Filters/quantize.jl
index ea00dcb..12ad9ee 100644
--- a/src/Filters/quantize.jl
+++ b/src/Filters/quantize.jl
@@ -46,7 +46,7 @@ function zdecode(data::AbstractArray, filter::QuantizeFilter{DecodingType, Encod
 end
 
 function JSON.lower(filter::QuantizeFilter{T, Tenc}) where {T, Tenc}
-    return Dict("type" => "quantize", "digits" => filter.digits, "dtype" => typestring(T), "atype" => typestring(Tenc))
+    return Dict("type" => "quantize", "digits" => filter.digits, "dtype" => typestr(T), "atype" => typestr(Tenc))
 end
 
 function getfilter(::Type{<: QuantizeFilter}, d)

From a3c7710ce9c126bdcbfe56b2e1bebf237d3d6fdf Mon Sep 17 00:00:00 2001
From: Anshul Singhvi <anshulsinghvi@gmail.com>
Date: Wed, 9 Oct 2024 15:47:33 -0700
Subject: [PATCH 20/31] Change the tests to be more sensible/Julian and avoid
 truncation errors

---
 test/Filters.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/Filters.jl b/test/Filters.jl
index 1bec170..6a0a271 100644
--- a/test/Filters.jl
+++ b/test/Filters.jl
@@ -141,12 +141,12 @@ end
         Int32.(collect(0:999)),  # np.arange(1000, dtype='<i4')
         Float32.(reshape(LinRange(1000, 1001, 1000), (100, 10))),  # np.linspace(1000, 1001, 1000, dtype='<f4').reshape(100, 10)
         Float64.(reshape(randn(1000) .* 1 .+ 1000, (10, 10, 10))),  # np.random.normal(loc=1000, scale=1, size=(10, 10, 10)).astype('<f8')
-        UInt16.(permutedims(reshape(rand(UInt16, 1000) .% 200, (100, 10))))  # np.random.randint(0, 200, size=1000, dtype='u2').astype('<u2').reshape(100, 10, order='F')
+        permutedims(reshape(rand(UInt16, 1000) .% 200, (100, 10)))  # np.random.randint(0, 200, size=1000, dtype='u2').astype('<u2').reshape(100, 10, order='F')
     ]
 
     for array in arrays
         encoded = Zarr.zencode(array, DeltaFilter{eltype(array)}())
-        decoded = Zarr.zdecode(encoded, DeltaFilter{eltype(array)}())
+        decoded = reshape(reinterpret(eltype(array), Zarr.zdecode(encoded, DeltaFilter{eltype(array)}())), size(array))
         @test decoded == array
     end
 

From c233e42b5f418d99f2c00791b1c3f75933c10174 Mon Sep 17 00:00:00 2001
From: Anshul Singhvi <anshulsinghvi@gmail.com>
Date: Wed, 9 Oct 2024 15:47:57 -0700
Subject: [PATCH 21/31] Fix the FixedScaleOffset filter materializer

---
 src/Filters/fixedscaleoffset.jl | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/Filters/fixedscaleoffset.jl b/src/Filters/fixedscaleoffset.jl
index 66582a9..b48dbc4 100644
--- a/src/Filters/fixedscaleoffset.jl
+++ b/src/Filters/fixedscaleoffset.jl
@@ -3,6 +3,11 @@
     FixedScaleOffsetFilter{T,TENC}(scale, offset)
 
 A compressor that scales and offsets the data.
+
+!!! note
+    The geographic CF standards define scale/offset decoding as `x * scale + offset`,
+    but this filter defines it as `x / scale + offset`.  Constructing a `FixedScaleOffsetFilter`
+    from CF data means `FixedScaleOffsetFilter(1/cf_scale_factor, cf_add_offset)`.
 """
 struct FixedScaleOffsetFilter{ScaleOffsetType, T, Tenc} <: Filter{T, Tenc}
     scale::ScaleOffsetType
@@ -35,7 +40,7 @@ function getfilter(::Type{<: FixedScaleOffsetFilter}, d::Dict)
     string_Tenc = get(d, "atype", string_T)
     T = typestr(string_T)
     Tenc = typestr(string_Tenc)
-    return FixedScaleOffsetFilter{T, Tenc}(scale, offset)
+    return FixedScaleOffsetFilter{Tenc, T, Tenc}(scale, offset)
 end
 
 function JSON.lower(c::FixedScaleOffsetFilter{ScaleOffsetType, T, Tenc}) where {ScaleOffsetType, T, Tenc}

From c211b6f65428098f136bf350fe679432cb8749aa Mon Sep 17 00:00:00 2001
From: Anshul Singhvi <anshulsinghvi@gmail.com>
Date: Wed, 9 Oct 2024 15:48:15 -0700
Subject: [PATCH 22/31] Fix decoding for fill values to use `reinterpret` on
 unsigned -> integer

---
 src/metadata.jl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/metadata.jl b/src/metadata.jl
index 51bb382..b677cc0 100644
--- a/src/metadata.jl
+++ b/src/metadata.jl
@@ -222,5 +222,6 @@ Base.eltype(::Metadata{T}) where T = T
 fill_value_decoding(v::AbstractString, T::Type{<:Number}) = parse(T, v)
 fill_value_decoding(v::Nothing, ::Any) = v
 fill_value_decoding(v, T) = T(v)
+fill_value_decoding(v::Integer, T::Type{<: Unsigned}) = reinterpret(T, signed(T)(v))
 fill_value_decoding(v::Number, T::Type{String}) = v == 0 ? "" : T(UInt8[v])
 fill_value_decoding(v, ::Type{ASCIIChar}) = v == "" ? nothing : v

From 086b3b8699aaca90008c4bd3d6cf29ad3f87a7a1 Mon Sep 17 00:00:00 2001
From: Anshul Singhvi <anshulsinghvi@gmail.com>
Date: Wed, 9 Oct 2024 15:48:38 -0700
Subject: [PATCH 23/31] If `getfilter` fails, show the filter name and then
 throw an error

---
 src/Filters/Filters.jl | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/Filters/Filters.jl b/src/Filters/Filters.jl
index 7f7a394..829f31f 100644
--- a/src/Filters/Filters.jl
+++ b/src/Filters/Filters.jl
@@ -72,7 +72,12 @@ function getfilters(d::Dict)
             return nothing
         end
         f = map(d["filters"]) do f
+            try
             getfilter(filterdict[f["id"]], f)
+            catch e
+                @show f
+                rethrow(e)
+            end
         end
         return (f...,)
     end

From ffdc62929d23af3cdc601faebf00dd229bbc7748 Mon Sep 17 00:00:00 2001
From: Anshul Singhvi <anshulsinghvi@gmail.com>
Date: Mon, 21 Oct 2024 14:06:23 -0700
Subject: [PATCH 24/31] Apply reinterpret before multiplication in
 fixed-scale-offset filter

---
 src/Filters/fixedscaleoffset.jl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/Filters/fixedscaleoffset.jl b/src/Filters/fixedscaleoffset.jl
index b48dbc4..906ab19 100644
--- a/src/Filters/fixedscaleoffset.jl
+++ b/src/Filters/fixedscaleoffset.jl
@@ -28,7 +28,8 @@ function zencode(a::AbstractArray, c::FixedScaleOffsetFilter{ScaleOffsetType, T,
 end
 
 function zdecode(a::AbstractArray, c::FixedScaleOffsetFilter{ScaleOffsetType, T, Tenc}) where {T, Tenc, ScaleOffsetType}
-    return _reinterpret(Base.nonmissingtype(T), @. a / c.scale + c.offset)
+    data = _reinterpret(Base.nonmissingtype(T), a)
+    return @. (data / c.scale) + c.offset
 end
 
 

From 24a68e6e369f494898492ac6d379f5d3169503f4 Mon Sep 17 00:00:00 2001
From: Anshul Singhvi <anshulsinghvi@gmail.com>
Date: Mon, 21 Oct 2024 14:07:00 -0700
Subject: [PATCH 25/31] Only reinterpret negative integers when decoding fill
 values to unsigned

---
 src/metadata.jl | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/metadata.jl b/src/metadata.jl
index b677cc0..456993b 100644
--- a/src/metadata.jl
+++ b/src/metadata.jl
@@ -222,6 +222,9 @@ Base.eltype(::Metadata{T}) where T = T
 fill_value_decoding(v::AbstractString, T::Type{<:Number}) = parse(T, v)
 fill_value_decoding(v::Nothing, ::Any) = v
 fill_value_decoding(v, T) = T(v)
-fill_value_decoding(v::Integer, T::Type{<: Unsigned}) = reinterpret(T, signed(T)(v))
+# Sometimes, unsigned values are represented as signed integers in strings.
+# If the value is negative, then we know it needs reinterpretation, 
+# but if the value is positive, there is no difference between a signed and unsigned integer.
+fill_value_decoding(v::Integer, T::Type{<: Unsigned}) = sign(v) < 0 ? reinterpret(T, signed(T)(v)) : T(v)
 fill_value_decoding(v::Number, T::Type{String}) = v == 0 ? "" : T(UInt8[v])
 fill_value_decoding(v, ::Type{ASCIIChar}) = v == "" ? nothing : v

From 85c1189fb0d81e268c62bcbb2e0f832ce0a9f0c0 Mon Sep 17 00:00:00 2001
From: Anshul Singhvi <anshulsinghvi@gmail.com>
Date: Mon, 21 Oct 2024 14:10:11 -0700
Subject: [PATCH 26/31] Revert "Only reinterpret negative integers when
 decoding fill values to unsigned"

This reverts commit 24a68e6e369f494898492ac6d379f5d3169503f4.
---
 src/metadata.jl | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/src/metadata.jl b/src/metadata.jl
index 456993b..b677cc0 100644
--- a/src/metadata.jl
+++ b/src/metadata.jl
@@ -222,9 +222,6 @@ Base.eltype(::Metadata{T}) where T = T
 fill_value_decoding(v::AbstractString, T::Type{<:Number}) = parse(T, v)
 fill_value_decoding(v::Nothing, ::Any) = v
 fill_value_decoding(v, T) = T(v)
-# Sometimes, unsigned values are represented as signed integers in strings.
-# If the value is negative, then we know it needs reinterpretation, 
-# but if the value is positive, there is no difference between a signed and unsigned integer.
-fill_value_decoding(v::Integer, T::Type{<: Unsigned}) = sign(v) < 0 ? reinterpret(T, signed(T)(v)) : T(v)
+fill_value_decoding(v::Integer, T::Type{<: Unsigned}) = reinterpret(T, signed(T)(v))
 fill_value_decoding(v::Number, T::Type{String}) = v == 0 ? "" : T(UInt8[v])
 fill_value_decoding(v, ::Type{ASCIIChar}) = v == "" ? nothing : v

From 3fca4eb37e010e7d0df1cde11d35332527d6cb7e Mon Sep 17 00:00:00 2001
From: Anshul Singhvi <anshulsinghvi@gmail.com>
Date: Fri, 22 Nov 2024 10:39:40 -0500
Subject: [PATCH 27/31] let Fletcher32 operate on n-dimensional arrays

not just vectors, as it was previously constrained to
---
 src/Filters/fletcher32.jl | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/Filters/fletcher32.jl b/src/Filters/fletcher32.jl
index 3241894..d854cb9 100644
--- a/src/Filters/fletcher32.jl
+++ b/src/Filters/fletcher32.jl
@@ -23,8 +23,8 @@ getfilter(::Type{<: Fletcher32Filter}, d::Dict) = Fletcher32Filter()
 JSON.lower(::Fletcher32Filter) = Dict("id" => "fletcher32")
 filterdict["fletcher32"] = Fletcher32Filter
 
-function _checksum_fletcher32(data::AbstractVector{UInt8})
-    len = length(data) / 2 # length in 16-bit words
+function _checksum_fletcher32(data::AbstractArray{UInt8})
+    len = length(data) ÷ 2 # length in 16-bit words
     sum1::UInt32 = 0
     sum2::UInt32 = 0
     data_idx = 1
@@ -62,7 +62,7 @@ function _checksum_fletcher32(data::AbstractVector{UInt8})
 end
 
 function zencode(data, ::Fletcher32Filter)
-    bytes = reinterpret(UInt8, data)
+    bytes = reinterpret(UInt8, vec(data))
     checksum = _checksum_fletcher32(bytes)
     result = copy(bytes)
     append!(result, reinterpret(UInt8, [checksum])) # TODO: decompose this without the extra allocation of wrapping in Array

From fdb5defbb6f9f121f1086cf1126b983d0915ce34 Mon Sep 17 00:00:00 2001
From: Anshul Singhvi <anshulsinghvi@gmail.com>
Date: Fri, 22 Nov 2024 10:40:39 -0500
Subject: [PATCH 28/31] fix FixedScaleOffset in many ways

- Never use reinterpret
- use array comprehensions to support 0-dimensional arrays correctly, the performance impact is negligible based on testing
- only round if the target type is an integer, otherwise let it be if it's a float.
---
 src/Filters/fixedscaleoffset.jl | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/Filters/fixedscaleoffset.jl b/src/Filters/fixedscaleoffset.jl
index 906ab19..1924ca1 100644
--- a/src/Filters/fixedscaleoffset.jl
+++ b/src/Filters/fixedscaleoffset.jl
@@ -22,14 +22,15 @@ function FixedScaleOffsetFilter(; scale::ScaleOffsetType, offset::ScaleOffsetTyp
 end
 
 function zencode(a::AbstractArray, c::FixedScaleOffsetFilter{ScaleOffsetType, T, Tenc}) where {T, Tenc, ScaleOffsetType}
-    return @. convert(Tenc, # convert to the encoding type after applying the scale and offset
-        round((a - c.offset) * c.scale) # apply scale and offset, and round to nearest integer
-    )
+    if Tenc <: Integer
+        return [round(Tenc, (a - c.offset) * c.scale) for a in a] # apply scale and offset, and round to nearest integer
+    else
+        return [convert(Tenc, (a - c.offset) * c.scale) for a in a] # apply scale and offset
+    end
 end
 
 function zdecode(a::AbstractArray, c::FixedScaleOffsetFilter{ScaleOffsetType, T, Tenc}) where {T, Tenc, ScaleOffsetType}
-    data = _reinterpret(Base.nonmissingtype(T), a)
-    return @. (data / c.scale) + c.offset
+    return [convert(Base.nonmissingtype(T), (a / c.scale) + c.offset) for a in a]
 end
 
 

From cf602425ea8697df605bb7aebe954bb83b8b279b Mon Sep 17 00:00:00 2001
From: Anshul Singhvi <anshulsinghvi@gmail.com>
Date: Fri, 22 Nov 2024 11:01:48 -0500
Subject: [PATCH 29/31] add filter tests in Python

---
 test/python.jl | 60 +++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 54 insertions(+), 6 deletions(-)

diff --git a/test/python.jl b/test/python.jl
index 86a72ca..4a650e3 100644
--- a/test/python.jl
+++ b/test/python.jl
@@ -22,13 +22,16 @@ groupattrs = Dict("String attribute"=>"One", "Int attribute"=>5, "Float attribut
 g = zgroup(pjulia,attrs=groupattrs)
 
 # Test all supported data types and compressors
-import Zarr: NoCompressor, BloscCompressor, ZlibCompressor, MaxLengthString
+import Zarr: NoCompressor, BloscCompressor, ZlibCompressor, MaxLengthString, 
+       Fletcher32Filter, FixedScaleOffsetFilter, ShuffleFilter, QuantizeFilter, DeltaFilter
 using Random: randstring
-dtypes = (UInt8, UInt16, UInt32, UInt64,
+numeric_dtypes = (UInt8, UInt16, UInt32, UInt64,
     Int8, Int16, Int32, Int64,
     Float16, Float32, Float64,
     Complex{Float32}, Complex{Float64},
-    Bool,MaxLengthString{10,UInt8},MaxLengthString{10,UInt32},
+    Bool,)
+dtypes = (numeric_dtypes...,
+    MaxLengthString{10,UInt8},MaxLengthString{10,UInt32},
     String)
 compressors = (
     "no"=>NoCompressor(),
@@ -37,9 +40,17 @@ compressors = (
     "blosc_noshuffle"=>BloscCompressor(cname="zstd",shuffle=0),
     "blosc_bitshuffle"=>BloscCompressor(cname="zstd",shuffle=2),
     "zlib"=>ZlibCompressor())
+filters = (
+    "fletcher32"=>Fletcher32Filter(),
+    "scale_offset"=>FixedScaleOffsetFilter(offset=1000, scale=10^6, T=Float64, Tenc=Int32),
+    "shuffle"=>ShuffleFilter(elementsize=4),
+    "quantize"=>QuantizeFilter{Float64,Float32}(digits=5),
+    "delta"=>DeltaFilter{Int32}()
+)
 testarrays = Dict(t=>(t<:AbstractString) ? [randstring(maximum(i.I)) for i in CartesianIndices((1:10,1:6,1:2))] : rand(t,10,6,2) for t in dtypes)
 testzerodimarrays = Dict(t=>(t<:AbstractString) ? randstring(10) : rand(t) for t in dtypes)
 
+# Test arrays with compressors
 for t in dtypes, co in compressors
     compstr, comp = co
     att = Dict("This is a nested attribute"=>Dict("a"=>5))
@@ -49,6 +60,21 @@ for t in dtypes, co in compressors
     a = zcreate(t, g,string("azerodim",t,compstr), compressor=comp)
     a[] = testzerodimarrays[t]
 end
+
+# Test arrays with filters
+for (filterstr, filter) in filters
+    t = eltype(filter) == Any ? Float64 : eltype(filter)
+    att = Dict("Filter test attribute"=>Dict("b"=>6))
+    a = zcreate(t, g,string("filter_",filterstr),10,6,2,attrs=att, chunks = (5,2,2),filters=[filter])
+    testdata = rand(t,10,6,2)
+    a[:,:,:] = testdata
+    
+    # Test zero-dimensional array
+    a = zcreate(t, g,string("filter_zerodim_",filterstr), filters=[filter])
+    testzerodim = rand(t)
+    a[] = testzerodim
+end
+
 #Also save as zip file.
 open(pjulia*".zip";write=true) do io
     Zarr.writezip(io, g)
@@ -58,6 +84,7 @@ end
 for julia_path in (pjulia, pjulia*".zip")
 py"""
 import zarr
+import numcodecs
 g = zarr.open_group($julia_path)
 gatts = g.attrs
 """
@@ -67,7 +94,6 @@ gatts = g.attrs
 @test py"gatts['Int attribute']" == 5
 @test py"gatts['Float attribute']" == 10.5
 
-
 dtypesp = ("uint8","uint16","uint32","uint64",
     "int8","int16","int32","int64",
     "float16","float32","float64",
@@ -95,6 +121,30 @@ for i=1:length(dtypes), co in compressors
     end
 end
 
+# Test reading filtered arrays from python
+for (filterstr, filter) in filters
+    t = eltype(filter) == Any ? Float64 : eltype(filter)
+    arname = string("filter_",filterstr)
+    try
+        py"""
+        ar=g[$arname]
+        """
+    catch e
+        @error "Error loading group with filter $filterstr" exception=(e,catch_backtrace())
+        @test false # test failed.
+    end
+    
+    @test py"ar.attrs['Filter test attribute']" == Dict("b"=>6)
+    @test py"ar.shape" == (2,6,10)
+    
+    # Test zero-dimensional filtered array
+    arname = string("filter_zerodim_",filterstr) 
+    py"""
+    ar_zero=g[$arname]
+    """
+    @test py"ar_zero.shape" == ()
+end
+
 for i=1:length(dtypes), co in compressors
     compstr,comp = co
     t = dtypes[i]
@@ -244,6 +294,4 @@ for unit in ["Week", "Day", "Hour", "Minute", "Second",
     @test_py np.datetime64(g_julia[unit][100] |> DateTime |> string) == get(getproperty(g_python,unit),99)
 end
 
-
-
 end

From 1fe11f629f4d42123e5bc369a26461da7667ed62 Mon Sep 17 00:00:00 2001
From: Anshul Singhvi <anshulsinghvi@gmail.com>
Date: Fri, 22 Nov 2024 11:02:53 -0500
Subject: [PATCH 30/31] Fix filter astype, id to conform to Python names

---
 src/Filters/delta.jl            |  4 ++--
 src/Filters/fixedscaleoffset.jl |  4 ++--
 src/Filters/quantize.jl         | 12 ++++--------
 3 files changed, 8 insertions(+), 12 deletions(-)

diff --git a/src/Filters/delta.jl b/src/Filters/delta.jl
index f7cdc3d..9d1de04 100644
--- a/src/Filters/delta.jl
+++ b/src/Filters/delta.jl
@@ -35,11 +35,11 @@ function zdecode(data::AbstractArray, filter::DeltaFilter{DecodingType, Encoding
 end
 
 function JSON.lower(filter::DeltaFilter{T, Tenc}) where {T, Tenc}
-    return Dict("id" => "delta", "dtype" => typestr(T), "atype" => typestr(Tenc))
+    return Dict("id" => "delta", "dtype" => typestr(T), "astype" => typestr(Tenc))
 end
 
 function getfilter(::Type{<: DeltaFilter}, d)
-    return DeltaFilter{typestr(d["dtype"], haskey(d, "atype") ? typestr(d["atype"]) : d["dtype"])}()
+    return DeltaFilter{typestr(d["dtype"], haskey(d, "astype") ? typestr(d["astype"]) : d["dtype"])}()
 end
 
 filterdict["delta"] = DeltaFilter
\ No newline at end of file
diff --git a/src/Filters/fixedscaleoffset.jl b/src/Filters/fixedscaleoffset.jl
index 1924ca1..9e12c52 100644
--- a/src/Filters/fixedscaleoffset.jl
+++ b/src/Filters/fixedscaleoffset.jl
@@ -39,14 +39,14 @@ function getfilter(::Type{<: FixedScaleOffsetFilter}, d::Dict)
     offset = d["offset"]
     # Types must be converted from strings to the actual Julia types they represent.
     string_T = d["dtype"]
-    string_Tenc = get(d, "atype", string_T)
+    string_Tenc = get(d, "astype", string_T)
     T = typestr(string_T)
     Tenc = typestr(string_Tenc)
     return FixedScaleOffsetFilter{Tenc, T, Tenc}(scale, offset)
 end
 
 function JSON.lower(c::FixedScaleOffsetFilter{ScaleOffsetType, T, Tenc}) where {ScaleOffsetType, T, Tenc}
-    return Dict("id" => "fixedscaleoffset", "scale" => c.scale, "offset" => c.offset, "dtype" => typestr(T), "atype" => typestr(Tenc))
+    return Dict("id" => "fixedscaleoffset", "scale" => c.scale, "offset" => c.offset, "dtype" => typestr(T), "astype" => typestr(Tenc))
 end
 
 filterdict["fixedscaleoffset"] = FixedScaleOffsetFilter
diff --git a/src/Filters/quantize.jl b/src/Filters/quantize.jl
index 12ad9ee..c5d7c9a 100644
--- a/src/Filters/quantize.jl
+++ b/src/Filters/quantize.jl
@@ -31,13 +31,9 @@ function zencode(data::AbstractArray, filter::QuantizeFilter{DecodingType, Encod
     bits = ceil(log(2, 10.0^(-exponent)))
     scale = 2.0^bits
 
-    enc = @. round(scale * arr) / scale
+    enc = @. convert(EncodingType, round(scale * arr) / scale)
 
-    if EncodingType == DecodingType
-        return enc
-    else
-        return reinterpret(EncodingType, enc)
-    end
+    return enc
 end
 
 # Decoding is a no-op; quantization is a lossy filter but data is encoded directly.
@@ -46,11 +42,11 @@ function zdecode(data::AbstractArray, filter::QuantizeFilter{DecodingType, Encod
 end
 
 function JSON.lower(filter::QuantizeFilter{T, Tenc}) where {T, Tenc}
-    return Dict("type" => "quantize", "digits" => filter.digits, "dtype" => typestr(T), "atype" => typestr(Tenc))
+    return Dict("id" => "quantize", "digits" => filter.digits, "dtype" => typestr(T), "astype" => typestr(Tenc))
 end
 
 function getfilter(::Type{<: QuantizeFilter}, d)
-    return QuantizeFilter{typestr(d["dtype"], typestr(d["atype"]))}(; digits = d["digits"])
+    return QuantizeFilter{typestr(d["dtype"], typestr(d["astype"]))}(; digits = d["digits"])
 end
 
 filterdict["quantize"] = QuantizeFilter
\ No newline at end of file

From 4ca87a6a428fdb7e9c8d4ce615d3814ec9cf6878 Mon Sep 17 00:00:00 2001
From: Anshul Singhvi <anshulsinghvi@gmail.com>
Date: Fri, 22 Nov 2024 11:03:07 -0500
Subject: [PATCH 31/31] remove encoding validity check for quantize - it's
 pointless

---
 test/Filters.jl | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/test/Filters.jl b/test/Filters.jl
index 6a0a271..f46cf4a 100644
--- a/test/Filters.jl
+++ b/test/Filters.jl
@@ -110,17 +110,7 @@ end
         reshape(LinRange(100, 200, 1000), (10, 10, 10)),  # np.linspace(100, 200, 1000, dtype='<f8').reshape(10, 10, 10)
     ]
 
-    @testset "Encoding accuracy" begin
-        for codec in codecs
-            @testset "$(codec.digits) digits" begin
-                for array in arrays
-                    encoded = Zarr.zencode(array, codec)
-                    decoded = reshape(reinterpret(eltype(array), Zarr.zdecode(encoded, codec)), size(array))
-                    @test decoded ≈ array rtol=(1.5*10.0^(-codec.digits))
-                end
-            end
-        end
-    end
+    # No need to test encoding accuracy - decoding is a no op.
 
     @testset "Decode is a no-op" begin
         for codec in codecs