diff --git a/docs/src/s3examples.md b/docs/src/s3examples.md index df054d1..a75c3c6 100644 --- a/docs/src/s3examples.md +++ b/docs/src/s3examples.md @@ -86,7 +86,7 @@ the overhead of repeatedly requesting many metadata files: g = zopen(store.zstore[1], consolidated=true) ```` -You can access the meta-information through `g.attrs` or for example read the first +You can access the meta-information through `attributes(g)` or for example read the first time slice through ````julia diff --git a/docs/src/storage.md b/docs/src/storage.md index 9b2565d..9bc1687 100644 --- a/docs/src/storage.md +++ b/docs/src/storage.md @@ -8,13 +8,10 @@ CurrentModule = Zarr ```@docs storagesize -zname +Zarr.zname Base.getindex(d::AbstractStore,i::String) Base.setindex!(d::AbstractStore,v,i::String) -subdirs -Base.keys(d::AbstractStore) -newsub -getsub +Zarr.subdirs ``` You can get some inspiration on how to implement this by looking at the source code of existing storage backends. diff --git a/src/ZArray.jl b/src/ZArray.jl index 9d9d915..511bb93 100644 --- a/src/ZArray.jl +++ b/src/ZArray.jl @@ -53,7 +53,12 @@ function Base.show(io::IO,::MIME"text/plain",z::ZArray) print(io, "ZArray{", eltype(z) ,"} of size ",join(string.(size(z)), " x ")) end -zname(z::ZArray) = zname(z.path) +""" + zname(z::ZArray) + +Extract the name of a Zarr array from the object +""" +zname(z::ZArray) = zname(path(z)) function zname(s::String) spl = split(rstrip(s,'/'),'/') @@ -66,7 +71,7 @@ storagesize(z::ZArray) Returns the size of the compressed data stored in the ZArray `z` in bytes """ -storagesize(z::ZArray) = storagesize(z.storage,z.path) +storagesize(z::ZArray) = storagesize(storage(z),path(z)) """ storageratio(z::ZArray) @@ -84,7 +89,7 @@ nobytes(z::ZArray{<:String}) = "unknown" zinfo(z::ZArray) = zinfo(stdout,z) function zinfo(io::IO,z::ZArray) ninit = sum(chunkindices(z)) do i - isinitialized(z.storage,z.path,i) + isinitialized(storage(z),path(z),i) end allinfos = [ "Type" => "ZArray", @@ -92,10 +97,10 @@ function zinfo(io::IO,z::ZArray) "Shape" => size(z), "Chunk Shape" => z.metadata.chunks, "Order" => z.metadata.order, - "Read-Only" => !z.writeable, + "Read-Only" => !iswriteable(z), "Compressor" => z.metadata.compressor, "Filters" => z.metadata.filters, - "Store type" => z.storage, + "Store type" => storage(z), "No. bytes" => nobytes(z), "No. bytes stored" => storagesize(z), "Storage ratio" => storageratio(z), @@ -159,10 +164,10 @@ function readblock!(aout::AbstractArray{<:Any,N}, z::ZArray{<:Any, N}, r::Cartes #bufferdict = IdDict((current_task()=>getchunkarray(z),)) a = getchunkarray(z) # Now loop through the chunks - c = Channel{Pair{eltype(blockr),Union{Nothing,Vector{UInt8}}}}(channelsize(z.storage)) + c = Channel{Pair{eltype(blockr),Union{Nothing,Vector{UInt8}}}}(channelsize(storage(z))) task = @async begin - read_items!($z.storage,c, $z.path, $blockr) + read_items!($storage(z),c, $path(z), $blockr) end bind(c,task) @@ -186,7 +191,7 @@ end function writeblock!(ain::AbstractArray{<:Any,N}, z::ZArray{<:Any, N}, r::CartesianIndices{N}) where {N} - z.writeable || error("Can not write to read-only ZArray") + iswriteable(z) || error("Can not write to read-only ZArray") input_base_offsets = map(i->first(i)-1,r.indices) # Determines which chunks are affected @@ -195,17 +200,17 @@ function writeblock!(ain::AbstractArray{<:Any,N}, z::ZArray{<:Any, N}, r::Cartes #bufferdict = IdDict((current_task()=>getchunkarray(z),)) a = getchunkarray(z) # Now loop through the chunks - readchannel = Channel{Pair{eltype(blockr),Union{Nothing,Vector{UInt8}}}}(channelsize(z.storage)) + readchannel = Channel{Pair{eltype(blockr),Union{Nothing,Vector{UInt8}}}}(channelsize(storage(z))) readtask = @async begin - read_items!(z.storage,readchannel, z.path, blockr) + read_items!(storage(z),readchannel, path(z), blockr) end bind(readchannel,readtask) - writechannel = Channel{Pair{eltype(blockr),Union{Nothing,Vector{UInt8}}}}(channelsize(z.storage)) + writechannel = Channel{Pair{eltype(blockr),Union{Nothing,Vector{UInt8}}}}(channelsize(storage(z))) writetask = @async begin - write_items!(z.storage,writechannel,z.path,blockr) + write_items!(storage(z),writechannel,path(z),blockr) end bind(writechannel,writetask) @@ -394,9 +399,9 @@ function zzeros(T,dims...;kwargs...) z = zcreate(T,dims...;kwargs...) as = zeros(T, z.metadata.chunks...) data_encoded = compress_raw(as,z) - p = z.path + p = path(z) for i in chunkindices(z) - z.storage[p,i] = data_encoded + storage(z)[p,i] = data_encoded end z end @@ -414,9 +419,9 @@ function Base.resize!(z::ZArray{T,N}, newsize::NTuple{N}) where {T,N} z.metadata.shape[] = newsize #Check if array was shrunk if any(map(<,newsize, oldsize)) - prune_oob_chunks(z.storage,z.path,oldsize,newsize, z.metadata.chunks) + prune_oob_chunks(storage(z),path(z),oldsize,newsize, z.metadata.chunks) end - writemetadata(z.storage, z.path, z.metadata) + writemetadata(storage(z), path(z), z.metadata) nothing end Base.resize!(z::ZArray, newsize::Integer...) = resize!(z,newsize) diff --git a/src/ZGroup.jl b/src/ZGroup.jl index 8bc54be..5c93a54 100644 --- a/src/ZGroup.jl +++ b/src/ZGroup.jl @@ -6,12 +6,20 @@ struct ZGroup{S<:AbstractStore} attrs::Dict writeable::Bool end +const ZArrayOrGroup = Union{ZArray, ZGroup} +storage(a::ZArrayOrGroup)=getfield(a,:storage) +path(a::ZArrayOrGroup)=getfield(a,:path) +attributes(a::ZArrayOrGroup)=getfield(a,:attrs) +iswriteable(a::ZArrayOrGroup)=getfield(a,:writeable) +arrays(g::ZGroup)=getfield(g,:arrays) +groups(g::ZGroup)=getfield(g,:groups) +export attributes # path can also be a SubString{String} ZGroup(storage, path::AbstractString, arrays, groups, attrs, writeable) = ZGroup(storage, String(path), arrays, groups, attrs, writeable) -zname(g::ZGroup) = zname(g.path) +zname(g::ZGroup) = zname(path(g)) #Open an existing ZGroup function ZGroup(s::T,mode="r",path="";fill_as_missing=false) where T <: AbstractStore @@ -58,23 +66,46 @@ function zopen_noerr(s::AbstractStore, mode="r"; end function Base.show(io::IO, g::ZGroup) - print(io, "ZarrGroup at ", g.storage, " and path ", g.path) - !isempty(g.arrays) && print(io, "\nVariables: ", map(i -> string(zname(i), " "), values(g.arrays))...) - !isempty(g.groups) && print(io, "\nGroups: ", map(i -> string(zname(i), " "), values(g.groups))...) + print(io, "ZarrGroup at ", storage(g), " and path ", path(g)) + !isempty(arrays(g)) && print(io, "\nVariables: ", map(i -> string(zname(i), " "), values(arrays(g)))...) + !isempty(groups(g)) && print(io, "\nGroups: ", map(i -> string(zname(i), " "), values(groups(g)))...) nothing end -Base.haskey(g::ZGroup,k)= haskey(g.groups,k) || haskey(g.arrays,k) +Base.haskey(g::ZGroup,k)= haskey(groups(g),string(k)) || haskey(arrays(g),string(k)) - -function Base.getindex(g::ZGroup, k) - if haskey(g.groups, k) - return g.groups[k] - elseif haskey(g.arrays, k) - return g.arrays[k] +function Base.getindex(g::ZGroup, k::AbstractString) + if haskey(groups(g), k) + return groups(g)[k] + elseif haskey(arrays(g), k) + return arrays(g)[k] else throw(KeyError("Zarr Dataset does not contain $k")) end end +Base.getindex(g::ZGroup,k)=getindex(g,string(k)) +function Base.propertynames(g::ZGroup,private::Bool=false) + p = if private + Symbol[:attrs] + else + Symbol[] + end + for k in keys(groups(g)) + push!(p,Symbol(k)) + end + for k in keys(arrays(g)) + push!(p,Symbol(k)) + end + p +end + +function Base.getproperty(g::ZGroup, k::Symbol) + if k === :attrs + @warn "Accessing attributes through `.attrs` is not recommended anymore. Please use `attributes(g)` instead." + return getfield(g,:attrs) + else + return g[k] + end +end """ zopen(s::AbstractStore, mode="r"; consolidated = false, path = "", lru = 0) @@ -142,21 +173,21 @@ zgroup(s::String;kwargs...)=zgroup(storefromstring(s, true)...;kwargs...) "Create a subgroup of the group g" function zgroup(g::ZGroup, name; attrs=Dict()) - g.writeable || throw(IOError("Zarr group is not writeable. Please re-open in write mode to create an array")) - g.groups[name] = zgroup(g.storage,_concatpath(g.path,name),attrs=attrs) + iswriteable(g) || throw(IOError("Zarr group is not writeable. Please re-open in write mode to create an array")) + groups(g)[name] = zgroup(storage(g),_concatpath(path(g),name),attrs=attrs) end "Create a new subarray of the group g" function zcreate(::Type{T},g::ZGroup, name::AbstractString, addargs...; kwargs...) where T - g.writeable || throw(IOError("Zarr group is not writeable. Please re-open in write mode to create an array")) + iswriteable(g) || throw(IOError("Zarr group is not writeable. Please re-open in write mode to create an array")) name = string(name) - z = zcreate(T, g.storage, addargs...; path = _concatpath(g.path,name), kwargs...) - g.arrays[name] = z + z = zcreate(T, storage(g), addargs...; path = _concatpath(path(g),name), kwargs...) + arrays(g)[name] = z return z end -HTTP.serve(s::Union{ZArray,ZGroup}, args...; kwargs...) = HTTP.serve(s.storage, s.path, args...; kwargs...) +HTTP.serve(s::Union{ZArray,ZGroup}, args...; kwargs...) = HTTP.serve(storage(s), path(s), args...; kwargs...) function consolidate_metadata(z::Union{ZArray,ZGroup}) - z.writeable || throw(Base.IOError("Zarr group is not writeable. Please re-open in write mode to create an array",0)) - consolidate_metadata(z.storage,z.path) + iswriteable(z) || throw(Base.IOError("Zarr group is not writeable. Please re-open in write mode to create an array",0)) + consolidate_metadata(storage(z),path(z)) end diff --git a/test/python.jl b/test/python.jl index 8124e6e..2c97c9c 100644 --- a/test/python.jl +++ b/test/python.jl @@ -132,11 +132,11 @@ zarr.consolidate_metadata($ppython) #Open in Julia g = zopen(ppython) @test g isa Zarr.ZGroup -@test g.attrs["groupatt"] == "Hi" +@test attributes(g)["groupatt"] == "Hi" a1 = g["a1"] @test a1 isa ZArray @test a1[:,:,:]==permutedims(data,(3,2,1)) -@test a1.attrs["test"]==Dict("b"=>6) +@test attributes(a1)["test"]==Dict("b"=>6) # Test reading the string array @test String(g["a2"][:])=="hallo" @test g["a3"] == ["test1", "test234"] @@ -149,13 +149,13 @@ rm(joinpath(ppython,"a1",".zarray")) rm(joinpath(ppython,"a2",".zarray")) g = zopen(ppython, "w", consolidated=true) @test g isa Zarr.ZGroup -@test g.attrs["groupatt"] == "Hi" +@test attributes(g)["groupatt"] == "Hi" a1 = g["a1"] @test a1 isa ZArray @test a1[:,:,:]==permutedims(data,(3,2,1)) -@test a1.attrs["test"]==Dict("b"=>6) +@test attributes(a1)["test"]==Dict("b"=>6) @test storagesize(a1) == 960 -@test sort(Zarr.subkeys(a1.storage,"a1"))[1:5] == ["0.0.0","0.0.1","0.0.2","0.0.3","0.1.0"] +@test sort(Zarr.subkeys(Zarr.storage(a1),"a1"))[1:5] == ["0.0.0","0.0.1","0.0.2","0.0.3","0.1.0"] a1[:,1,1] = 1:10 @test a1[:,1,1] == 1:10 # Test reading the string array diff --git a/test/runtests.jl b/test/runtests.jl index 4c996e3..62309cb 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -4,6 +4,7 @@ using JSON using Pkg using PyCall using Dates +using Zarr: storage macro test_py(ex) quote @@ -20,9 +21,9 @@ end @test z isa ZArray{Int64, 2, Zarr.BloscCompressor, Zarr.DictStore} - @test length(z.storage.a) === 3 - @test length(z.storage.a["0.0"]) === 64 - @test eltype(z.storage.a["0.0"]) === UInt8 + @test length(storage(z).a) === 3 + @test length(storage(z).a["0.0"]) === 64 + @test eltype(storage(z).a["0.0"]) === UInt8 @test z.metadata.shape[] === (2, 3) @test z.metadata.order === 'C' @test z.metadata.chunks === (2, 3) @@ -32,8 +33,8 @@ end @test z.metadata.compressor.clevel === 5 @test z.metadata.compressor.cname === "lz4" @test z.metadata.compressor.shuffle === 1 - @test z.attrs == Dict{Any, Any}() - @test z.writeable === true + @test attributes(z) == Dict{Any, Any}() + @test Zarr.iswriteable(z) === true @test_throws ArgumentError zzeros(Int64,2,3, chunks = (0,1)) @test_throws ArgumentError zzeros(Int64,0,-1) @test_throws ArgumentError Zarr.Metadata(zeros(2,2), (2,2), zarr_format = 3) @@ -63,7 +64,7 @@ end compressor=Zarr.NoCompressor()) @test z.metadata.compressor === Zarr.NoCompressor() - @test z.storage === Zarr.DirectoryStore("$dir/$name") + @test storage(z) === Zarr.DirectoryStore("$dir/$name") @test isdir("$dir/$name") @test ispath("$dir/$name/.zarray") @test ispath("$dir/$name/.zattrs") @@ -92,9 +93,32 @@ end g2 = zgroup(g,"asubgroup",attrs = Dict("a1"=>5)) @test Zarr.is_zgroup(store,"mygroup") @test Zarr.is_zgroup(store,"mygroup/asubgroup") - @test g2.attrs["a1"]==5 + @test attributes(g2)["a1"]==5 @test isdir(joinpath(store.folder,"mygroup")) @test isdir(joinpath(store.folder,"mygroup","asubgroup")) + + #Another test for indexing and getproperty + a = zgroup(Zarr.DictStore(),attrs=Dict("a"=>5)) + + zzeros(Float64,a,"a",3,3) + zzeros(Int,a,"b",5,4,2) + zgroup(a,"subgroup") + + @test a["a"] isa ZArray + @test a[:a] isa ZArray + @test a.b isa ZArray + @test a.subgroup isa ZGroup + @test haskey(a,"a") + @test haskey(a,:a) + @test !haskey(a,"something") + @test !haskey(a,:something) + @test issetequal(propertynames(a),(:a,:b,:subgroup)) + @test issetequal(propertynames(a,true),(:a,:b,:subgroup,:attrs)) + @test @test_warn "Accessing attributes" a.attrs["a"]==5 + @test attributes(a) == Dict("a"=>5) + buf=IOBuffer() + show(buf,a) + @test startswith(String(take!(buf)),"ZarrGroup at Dictionary Storage and path \nVariables:") end @testset "Metadata" begin @@ -177,7 +201,7 @@ end @test all(ismissing,amiss[:,2]) @test all(i->isequal(i...),zip(amiss[1:3,4],[1,missing,3])) # Test that chunk containing only missings is not initialized - @test !Zarr.isinitialized(amiss.storage,Zarr.citostring(CartesianIndex((1,5)))) + @test !Zarr.isinitialized(Zarr.storage(amiss),Zarr.citostring(CartesianIndex((1,5)))) # amiss = zcreate(Int64, 10,10,chunks=(5,2), fill_value=-1, fill_as_missing=false) amiss[:,1] = 1:10 @@ -189,7 +213,7 @@ end @test all(==(-1),amiss[:,2]) @test all(i->isequal(i...),zip(amiss[1:3,4],[1,-1,3])) # Test that chunk containing only fill values is not initialized - @test !Zarr.isinitialized(amiss.storage,Zarr.citostring(CartesianIndex((1,5)))) + @test !Zarr.isinitialized(Zarr.storage(amiss),Zarr.citostring(CartesianIndex((1,5)))) end @testset "resize" begin diff --git a/test/storage.jl b/test/storage.jl index 1dde7b8..8d19398 100644 --- a/test/storage.jl +++ b/test/storage.jl @@ -105,7 +105,7 @@ end @test storagesize(S3,p) == 0 @test Zarr.is_zgroup(S3,p) == true S3group = zopen(S3,path=p) - S3Array = S3group.groups["bar"].arrays["baz"] + S3Array = Zarr.arrays(Zarr.groups(S3group)["bar"])["baz"] @test eltype(S3Array) == Zarr.ASCIIChar @test storagesize(S3Array) == 69 @test String(S3Array[:]) == "Hello from the cloud!" @@ -142,8 +142,8 @@ end ip,port = getsockname(server) @async HTTP.serve(g,ip,port,server=server) g2 = zopen("http://$ip:$port") - @test g2.attrs == Dict("groupatt"=>5) - @test g2["a1"].attrs == Dict("arratt"=>2.5) + @test attributes(g2) == Dict("groupatt"=>5) + @test attributes(g2["a1"]) == Dict("arratt"=>2.5) @test g2["a1"][:,:] == reshape(1:200,10,20) close(server) end