From 7847bd925dc75534a9b63c76b7f8bbe884a45c89 Mon Sep 17 00:00:00 2001 From: Simon Byrne Date: Tue, 5 Sep 2023 22:45:56 -0700 Subject: [PATCH] update docs, fix deprecations in tests --- docs/src/index.md | 12 +++++------ docs/src/mpi.md | 2 +- src/datasets.jl | 7 ++++-- src/dataspaces.jl | 12 +++++------ test/chunkstorage.jl | 15 +++++-------- test/compound.jl | 4 +++- test/create_dataset.jl | 13 ++++++----- test/gc.jl | 2 +- test/hyperslab.jl | 48 ----------------------------------------- test/mmap.jl | 4 ++-- test/mpio.jl | 7 +----- test/plain.jl | 20 +++++++++-------- test/properties.jl | 2 +- test/swmr.jl | 12 +++++++++-- test/virtual_dataset.jl | 4 ++-- 15 files changed, 60 insertions(+), 104 deletions(-) diff --git a/docs/src/index.md b/docs/src/index.md index 4d40d167b..1b3a3c483 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -276,11 +276,11 @@ useful to incrementally save to very large datasets you don't want to keep in memory. For example, ```julia -dset = create_dataset(g, "B", datatype(Float64), dataspace(1000,100,10), chunk=(100,100,1)) +dset = create_dataset(g, "B", Float64, (1000,100,10), chunk=(100,100,1)) dset[:,1,1] = rand(1000) ``` -creates a Float64 dataset in the file or group `g`, with dimensions 1000x100x10, and then +creates a `Float64` dataset in the file or group `g`, with dimensions 1000x100x10, and then writes to just the first 1000 element slice. If you know the typical size of subset reasons you'll be reading/writing, it can be beneficial to set the chunk dimensions appropriately. @@ -312,7 +312,7 @@ to. The following fails: ```julia -vec_dset = create_dataset(g, "v", datatype(Float64), dataspace(10_000,1)) +vec_dset = create_dataset(g, "v", Float64, (10_000,1)) HDF5.ismmappable(vec_dset) # == true vec = HDF5.readmmap(vec_dset) # throws ErrorException("Error mmapping array") ``` @@ -330,7 +330,7 @@ Alternatively, the policy can be set so that the space is allocated immediately creation of the data set with the `alloc_time` keyword: ```julia -mtx_dset = create_dataset(g, "M", datatype(Float64), dataspace(100, 1000), +mtx_dset = create_dataset(g, "M", Float64, (100, 1000), alloc_time = HDF5.H5D_ALLOC_TIME_EARLY) mtx = HDF5.readmmap(mtx_dset) # succeeds immediately ``` @@ -529,14 +529,14 @@ write_attribute(parent, name, data) You can use extendible dimensions, ```julia -d = create_dataset(parent, name, dtype, (dims, max_dims), chunk=(chunk_dims)) +d = create_dataset(parent, name, dtype, dims; max_dims=max_dims, chunk=(chunk_dims)) HDF5.set_extent_dims(d, new_dims) ``` where dims is a tuple of integers. For example ```julia -b = create_dataset(fid, "b", Int, ((1000,),(-1,)), chunk=(100,)) #-1 is equivalent to typemax(hsize_t) +b = create_dataset(fid, "b", Int, (1000,); max_dims=(HDF5.UNLIMITED,), chunk=(100,)) # HDF5.UNLIMITED is equivalent to typemax(hsize_t) HDF5.set_extent_dims(b, (10000,)) b[1:10000] = collect(1:10000) ``` diff --git a/docs/src/mpi.md b/docs/src/mpi.md index abf4e0260..60980c387 100644 --- a/docs/src/mpi.md +++ b/docs/src/mpi.md @@ -110,7 +110,7 @@ A = fill(myrank, M) # local data dims = (M, Nproc) # dimensions of global data # Create dataset -dset = create_dataset(ff, "/data", datatype(eltype(A)), dataspace(dims)) +dset = create_dataset(ff, "/data", eltype(A), dims) # Write local data dset[:, myrank + 1] = A diff --git a/src/datasets.jl b/src/datasets.jl index 4174f9986..c5320bc3e 100644 --- a/src/datasets.jl +++ b/src/datasets.jl @@ -48,6 +48,8 @@ end There are many keyword properties that can be set. Below are a few select keywords. +* `max_dims` - `Dims` describing the maximum size of the dataset. Required for + resizable datasets. Unlimited dimensions are denoted by `HDF5.UNLIMITED`. * `chunk` - `Dims` describing the size of a chunk. Needed to apply filters. * `filters` - `AbstractVector{<: Filters.Filter}` describing the order of the filters to apply to the data. See [`Filters`](@ref) @@ -93,8 +95,9 @@ create_dataset( path::Union{AbstractString,Nothing}, dtype::Datatype, dspace_dims::Union{Dims,Nothing}; + max_dims=nothing, pv... -) = create_dataset(parent, path, dtype, Dataspace(dspace_dims); pv...) +) = create_dataset(parent, path, dtype, Dataspace(dspace_dims; max_dims); pv...) create_dataset( parent::Union{File,Group}, path::Union{AbstractString,Nothing}, @@ -432,7 +435,7 @@ function create_external_dataset( sz::Dims, offset::Integer=0 ) - create_external_dataset(parent, name, filepath, datatype(t), dataspace(sz), offset) + create_external_dataset(parent, name, filepath, datatype(t), Dataspace(sz), offset) end function create_external_dataset( parent::Union{File,Group}, diff --git a/src/dataspaces.jl b/src/dataspaces.jl index 8045c783f..a623e49c0 100644 --- a/src/dataspaces.jl +++ b/src/dataspaces.jl @@ -37,12 +37,12 @@ Dataspace # defined in types.jl """ HDF5.UNLIMITED -A sentinel value which can be used to indicate an unlimited dimension in a +A sentinel value which indicates an unlimited dimension in a [`Dataspace`](@ref). -Can be used as the `max_dims` argument in the [`Dataspace`](@ref) constructor, -or as the `count` argument in [`BlockRange`](@ref) when selecting virtual -dataset mappings. +Can be used as an entry in the `max_dims` argument in the [`Dataspace`](@ref) +constructor or [`create_dataset`](@ref), or as a `count` argument in +[`BlockRange`](@ref) when selecting virtual dataset mappings. """ const UNLIMITED = -1 @@ -62,7 +62,7 @@ function Base.close(obj::Dataspace) end # null dataspace constructor -Dataspace(::Nothing) = Dataspace(API.h5s_create(API.H5S_NULL)) +Dataspace(::Nothing; max_dims::Nothing=nothing) = Dataspace(API.h5s_create(API.H5S_NULL)) # reverese dims order, convert to hsize_t _to_h5_dims(dims::Dims{N}) where {N} = API.hsize_t[dims[i] for i in N:-1:1] @@ -179,7 +179,7 @@ julia> HDF5.isnull(Dataspace(nothing)) true julia> HDF5.isnull(Dataspace(())) -true +false julia> HDF5.isnull(Dataspace((0,))) false diff --git a/test/chunkstorage.jl b/test/chunkstorage.jl index 82bff6ba8..f7bc55dac 100644 --- a/test/chunkstorage.jl +++ b/test/chunkstorage.jl @@ -7,7 +7,7 @@ using Test # Direct chunk write is no longer dependent on HL library # Test direct chunk writing Cartesian index h5open(fn, "w") do f - d = create_dataset(f, "dataset", datatype(Int), dataspace(4, 5); chunk=(2, 2)) + d = create_dataset(f, "dataset", datatype(Int), (4, 5); chunk=(2, 2)) HDF5.API.h5d_extend(d, HDF5.API.hsize_t[3, 3]) # should do nothing (deprecated call) HDF5.API.h5d_extend(d, HDF5.API.hsize_t[4, 4]) # should do nothing (deprecated call) raw = HDF5.ChunkStorage(d) @@ -84,7 +84,7 @@ using Test # Test direct write chunk writing via linear indexing h5open(fn, "w") do f - d = create_dataset(f, "dataset", datatype(Int64), dataspace(4, 5); chunk=(2, 3)) + d = create_dataset(f, "dataset", datatype(Int64), (4, 5); chunk=(2, 3)) raw = HDF5.ChunkStorage{IndexLinear}(d) raw[1] = 0, collect(reinterpret(UInt8, Int64[1, 2, 5, 6, 9, 10])) raw[2] = 0, collect(reinterpret(UInt8, Int64[3, 4, 7, 8, 11, 12])) @@ -181,7 +181,7 @@ using Test # CartesianIndices does not accept StepRange h5open(fn, "w") do f - d = create_dataset(f, "dataset", datatype(Int), dataspace(4, 5); chunk=(2, 3)) + d = create_dataset(f, "dataset", datatype(Int), (4, 5); chunk=(2, 3)) raw = HDF5.ChunkStorage(d) data = permutedims(reshape(1:24, 2, 2, 3, 2), (1, 3, 2, 4)) ci = CartesianIndices(raw) @@ -197,7 +197,7 @@ using Test # Test direct write chunk writing via linear indexing, using views and without filter flag h5open(fn, "w") do f - d = create_dataset(f, "dataset", datatype(Int), dataspace(4, 5); chunk=(2, 3)) + d = create_dataset(f, "dataset", datatype(Int), (4, 5); chunk=(2, 3)) raw = HDF5.ChunkStorage{IndexLinear}(d) data = permutedims(reshape(1:24, 2, 2, 3, 2), (1, 3, 2, 4)) chunks = Iterators.partition(data, 6) @@ -215,12 +215,7 @@ using Test # Test chunk info retrieval method performance h5open(fn, "w") do f d = create_dataset( - f, - "dataset", - datatype(UInt8), - dataspace(256, 256); - chunk=(16, 16), - alloc_time=:early + f, "dataset", datatype(UInt8), (256, 256); chunk=(16, 16), alloc_time=:early ) if v"1.10.5" ≤ HDF5.API._libhdf5_build_ver HDF5._get_chunk_info_all_by_index(d) diff --git a/test/compound.jl b/test/compound.jl index cd36680a4..c94b9b48a 100644 --- a/test/compound.jl +++ b/test/compound.jl @@ -161,7 +161,9 @@ end bars = [Bar(1, 2, true), Bar(3, 4, false), Bar(5, 6, true), Bar(7, 8, false)] fn = tempname() h5open(fn, "w") do h5f - d = create_dataset(h5f, "the/bars", Bar, ((2,), (-1,)); chunk=(100,)) + d = create_dataset( + h5f, "the/bars", Bar, (2,); max_dims=(HDF5.UNLIMITED,), chunk=(100,) + ) d[1:2] = bars[1:2] end diff --git a/test/create_dataset.jl b/test/create_dataset.jl index 6a2e7407d..5423396de 100644 --- a/test/create_dataset.jl +++ b/test/create_dataset.jl @@ -20,12 +20,7 @@ Test the combination of arguments to create_dataset. # Test primitive, HDF5.Datatype, non-primitive, non-primitive HDF5.Datatype types = (UInt8, datatype(UInt8), Complex{Float32}, datatype(Complex{Float32})) # Test Tuple, HDF5.Dataspace, two tuples (extendible), extendible HDF5.Dataspace - spaces = ( - (3, 4), - dataspace((16, 16)), - ((4, 4), (8, 8)), - dataspace((16, 16); max_dims=(32, 32)) - ) + spaces = ((3, 4), Dataspace((16, 16)), Dataspace((16, 16); max_dims=(32, 32))) # TODO: test keywords # Create argument cross product @@ -36,7 +31,11 @@ Test the combination of arguments to create_dataset. # create a chunked dataset since contiguous datasets are not extendible ds = create_dataset(parent, name, type, space; chunk=(2, 2)) @test datatype(ds) == datatype(type) - @test dataspace(ds) == dataspace(space) + if ds isa Dataspace + @test ds == dataspace(space) + else + @test Dataspace(ds) == dataspace(space) + end @test isvalid(ds) close(ds) if !isnothing(name) diff --git a/test/gc.jl b/test/gc.jl index 8ec38d1fc..d5fea130c 100644 --- a/test/gc.jl +++ b/test/gc.jl @@ -35,7 +35,7 @@ end HDF5.API.h5t_insert(memtype_id, "imag", sizeof(Float64), HDF5.hdf5_type_id(Float64)) dt = HDF5.Datatype(memtype_id) commit_datatype(file, "dt", dt) - ds = dataspace((2,)) + ds = Dataspace((2,)) d = create_dataset(file, "d", dt, ds) g = create_group(file, "g") a = create_attribute(file, "a", dt, ds) diff --git a/test/hyperslab.jl b/test/hyperslab.jl index a6b110576..1f53e31c8 100644 --- a/test/hyperslab.jl +++ b/test/hyperslab.jl @@ -1,53 +1,5 @@ using Random, Test, HDF5 -@testset "BlockRange" begin - br = HDF5.BlockRange(2) - @test length(br) == 1 - @test range(br) === 2:2 - @test convert(AbstractRange, br) === 2:2 - @test convert(UnitRange, br) === 2:2 - @test convert(StepRange, br) === 2:1:2 - @test repr(br) == "HDF5.BlockRange(2:2)" - @test repr(br; context=:compact => true) == "2:2" - - br = HDF5.BlockRange(Base.OneTo(3)) - @test length(br) == 3 - @test range(br) == 1:3 - @test convert(AbstractRange, br) === 1:3 - @test convert(UnitRange, br) === 1:3 - @test convert(StepRange, br) === 1:1:3 - @test repr(br) == "HDF5.BlockRange(1:3)" - @test repr(br; context=:compact => true) == "1:3" - - br = HDF5.BlockRange(2:7) - @test length(br) == 6 - @test range(br) == 2:7 - @test convert(AbstractRange, br) === 2:7 - @test convert(UnitRange, br) === 2:7 - @test convert(StepRange, br) === 2:1:7 - @test repr(br) == "HDF5.BlockRange(2:7)" - @test repr(br; context=:compact => true) == "2:7" - - br = HDF5.BlockRange(1:2:7) - @test length(br) == 4 - @test range(br) == 1:2:7 - @test convert(AbstractRange, br) === 1:2:7 - @test_throws Exception convert(UnitRange, br) - @test convert(StepRange, br) === 1:2:7 - @test repr(br) == "HDF5.BlockRange(1:2:7)" - @test repr(br; context=:compact => true) == "1:2:7" - - br = HDF5.BlockRange(; start=2, stride=8, count=3, block=2) - @test length(br) == 6 - @test_throws Exception range(br) - @test_throws Exception convert(AbstractRange, br) - @test_throws Exception convert(UnitRange, br) - @test_throws Exception convert(StepRange, br) - @test repr(br) == "HDF5.BlockRange(start=2, stride=8, count=3, block=2)" - @test repr(br; context=:compact => true) == - "BlockRange(start=2, stride=8, count=3, block=2)" -end - @testset "hyperslab" begin N = 10 v = [randstring(rand(5:10)) for i in 1:N, j in 1:N] diff --git a/test/mmap.jl b/test/mmap.jl index 5c2af11b7..cce5931aa 100644 --- a/test/mmap.jl +++ b/test/mmap.jl @@ -10,9 +10,9 @@ using Test # Create two datasets, one with late allocation (the default for contiguous # datasets) and the other with explicit early allocation. - hdf5_A = create_dataset(f, "A", datatype(Int64), dataspace(3, 3)) + hdf5_A = create_dataset(f, "A", datatype(Int64), (3, 3)) hdf5_B = create_dataset( - f, "B", datatype(Float64), dataspace(3, 3); alloc_time=HDF5.API.H5D_ALLOC_TIME_EARLY + f, "B", datatype(Float64), (3, 3); alloc_time=HDF5.API.H5D_ALLOC_TIME_EARLY ) # The late case cannot be mapped yet. @test_throws ErrorException("Error getting offset") HDF5.readmmap(f["A"]) diff --git a/test/mpio.jl b/test/mpio.jl index 55ccfa626..9756844ba 100644 --- a/test/mpio.jl +++ b/test/mpio.jl @@ -43,12 +43,7 @@ using Test @test isopen(f) g = create_group(f, "mygroup") dset = create_dataset( - g, - "B", - datatype(Int64), - dataspace(10, nprocs); - chunk=(10, 1), - dxpl_mpio=:collective + g, "B", datatype(Int64), (10, nprocs); chunk=(10, 1), dxpl_mpio=:collective ) dset[:, myrank + 1] = A end diff --git a/test/plain.jl b/test/plain.jl index 294dc09c1..b41f332b6 100644 --- a/test/plain.jl +++ b/test/plain.jl @@ -145,11 +145,15 @@ end dset = create_dataset(f, nothing, datatype(Float64), (20, 20, 5); chunk=(5, 5, 1)) dset[:, :, :] = 3.0 # More complex hyperslab and assignment with "incorrect" types (issue #34) - d = create_dataset(f, "slab2", datatype(Float64), ((10, 20), (100, 200)); chunk=(1, 1)) + d = create_dataset( + f, "slab2", datatype(Float64), (10, 20); max_dims=(100, 200), chunk=(1, 1) + ) d[:, :] = 5 d[1, 1] = 4 # 1d indexing - d = create_dataset(f, "slab3", datatype(Int), ((10,), (-1,)); chunk=(5,)) + d = create_dataset( + f, "slab3", datatype(Int), (10,); max_dims=(HDF5.UNLIMITED,), chunk=(5,) + ) @test d[:] == zeros(Int, 10) d[3:5] = 3:5 # Create a dataset designed to be deleted @@ -443,7 +447,7 @@ end try h5open(fn, "w") do f create_dataset(f, "test", Int, (128, 32)) - create_dataset(f, "test2", Float64, 128, 64) + create_dataset(f, "test2", Float64, (128, 64)) @test size(f["test"]) == (128, 32) @test size(f["test2"]) == (128, 64) end @@ -874,7 +878,7 @@ end # generic read of native types dset = create_dataset(group, "dset", datatype(Int), (1,)) @test sprint(show, dset) == "HDF5.Dataset: /group/dset (file: $fn xfer_mode: 0)" - meta = create_attribute(dset, "meta", datatype(Bool), Dataspace((1,))) + meta = create_attribute(dset, "meta", datatype(Bool), (1,)) @test sprint(show, meta) == "HDF5.Attribute: meta" dsetattrs = attributes(dset) @@ -896,7 +900,7 @@ end # generic read of native types commit_datatype(hfile, "type", dtype) @test sprint(show, dtype) == "HDF5.Datatype: /type H5T_IEEE_F64LE" - dtypemeta = create_attribute(dtype, "dtypemeta", datatype(Bool), Dataspace((1,))) + dtypemeta = create_attribute(dtype, "dtypemeta", datatype(Bool), (1,)) @test sprint(show, dtypemeta) == "HDF5.Attribute: dtypemeta" dtypeattrs = attributes(dtype) @@ -1215,7 +1219,7 @@ end # split1 tests @test haskey(hfile, "group1/dset2") @test !haskey(hfile, "group1/dsetna") - meta1 = create_attribute(dset1, "meta1", datatype(Bool), Dataspace((1,))) + meta1 = create_attribute(dset1, "meta1", datatype(Bool), (1,)) @test haskey(dset1, "meta1") @test !haskey(dset1, "metana") @test_throws KeyError dset1["nothing"] @@ -1253,9 +1257,7 @@ end # haskey tests @test_nowarn hfile[GenericString("dset1")] dset1 = hfile["dset1"] - @test_nowarn create_attribute( - dset1, GenericString("meta1"), datatype(Bool), Dataspace((1,)) - ) + @test_nowarn create_attribute(dset1, GenericString("meta1"), datatype(Bool), (1,)) @test_nowarn create_attribute(dset1, GenericString("meta2"), 1) @test_nowarn dset1[GenericString("meta1")] @test_nowarn dset1[GenericString("x")] = 2 diff --git a/test/properties.jl b/test/properties.jl index 81353ff5f..f75a09ec1 100644 --- a/test/properties.jl +++ b/test/properties.jl @@ -26,7 +26,7 @@ using Test g, "dataset", datatype(Int), - dataspace((500, 50)); + Dataspace((500, 50)); alloc_time=HDF5.API.H5D_ALLOC_TIME_EARLY, chunk=(5, 10), fill_value=1, diff --git a/test/swmr.jl b/test/swmr.jl index 1b7b7046e..301368763 100644 --- a/test/swmr.jl +++ b/test/swmr.jl @@ -29,7 +29,13 @@ end @testset "h5d_oappend" begin h5open(fname, "w") do h5 g = create_group(h5, "shoe") - d = create_dataset(g, "bar", datatype(Float64), ((1,), (-1,)); chunk=(100,)) + d = create_dataset( + g, + "bar", + datatype(Float64), + Dataspace((1,); max_dims=(HDF5.UNLIMITED,)); + chunk=(100,) + ) dxpl_id = HDF5.get_create_properties(d) v = [1.0, 2.0] memtype = datatype(Float64) @@ -91,7 +97,9 @@ end # create datasets and attributes before staring swmr writing function prep_h5_file(h5) - d = create_dataset(h5, "foo", datatype(Int), ((1,), (100,)); chunk=(1,)) + d = create_dataset( + h5, "foo", datatype(Int), Dataspace((1,); max_dims=(100,)); chunk=(1,) + ) attributes(h5)["bar"] = "bar" g = create_group(h5, "group") end diff --git a/test/virtual_dataset.jl b/test/virtual_dataset.jl index 1919b283b..f3b9e32ba 100644 --- a/test/virtual_dataset.jl +++ b/test/virtual_dataset.jl @@ -16,8 +16,8 @@ using Test, HDF5 f1["x"] = fill(2.0, 3) close(f1) - srcspace = dataspace((3,)) - vspace = dataspace((3, 2); max_dims=(3, -1)) + srcspace = Dataspace((3,)) + vspace = Dataspace((3, 2); max_dims=(3, -1)) HDF5.select_hyperslab!(vspace, (1:3, HDF5.BlockRange(1; count=-1))) d = create_dataset(