update docs, fix deprecations in tests

JuliaIO · Sep 6, 2023 · 7847bd9 · 7847bd9
1 parent 9acbbe9
commit 7847bd9
Show file tree

Hide file tree

Showing 15 changed files with 60 additions and 104 deletions.
diff --git a/docs/src/index.md b/docs/src/index.md
@@ -276,11 +276,11 @@ useful to incrementally save to very large datasets you don't want to keep in
 memory. For example,
 
 ```julia
-dset = create_dataset(g, "B", datatype(Float64), dataspace(1000,100,10), chunk=(100,100,1))
+dset = create_dataset(g, "B", Float64, (1000,100,10), chunk=(100,100,1))
 dset[:,1,1] = rand(1000)
 ```
 
-creates a Float64 dataset in the file or group `g`, with dimensions 1000x100x10, and then
+creates a `Float64` dataset in the file or group `g`, with dimensions 1000x100x10, and then
 writes to just the first 1000 element slice.
 If you know the typical size of subset reasons you'll be reading/writing, it can be beneficial to set the chunk dimensions appropriately.
 
@@ -312,7 +312,7 @@ to.
 The following fails:
 
 ```julia
-vec_dset = create_dataset(g, "v", datatype(Float64), dataspace(10_000,1))
+vec_dset = create_dataset(g, "v", Float64, (10_000,1))
 HDF5.ismmappable(vec_dset)    # == true
 vec = HDF5.readmmap(vec_dset) # throws ErrorException("Error mmapping array")
 ```
@@ -330,7 +330,7 @@ Alternatively, the policy can be set so that the space is allocated immediately
 creation of the data set with the `alloc_time` keyword:
 
 ```julia
-mtx_dset = create_dataset(g, "M", datatype(Float64), dataspace(100, 1000),
+mtx_dset = create_dataset(g, "M", Float64, (100, 1000),
                     alloc_time = HDF5.H5D_ALLOC_TIME_EARLY)
 mtx = HDF5.readmmap(mtx_dset) # succeeds immediately
 ```
@@ -529,14 +529,14 @@ write_attribute(parent, name, data)
 You can use extendible dimensions,
 
 ```julia
-d = create_dataset(parent, name, dtype, (dims, max_dims), chunk=(chunk_dims))
+d = create_dataset(parent, name, dtype, dims; max_dims=max_dims, chunk=(chunk_dims))
 HDF5.set_extent_dims(d, new_dims)
 ```
 
 where dims is a tuple of integers. For example
 
 ```julia
-b = create_dataset(fid, "b", Int, ((1000,),(-1,)), chunk=(100,)) #-1 is equivalent to typemax(hsize_t)
+b = create_dataset(fid, "b", Int, (1000,); max_dims=(HDF5.UNLIMITED,), chunk=(100,)) # HDF5.UNLIMITED is equivalent to typemax(hsize_t)
 HDF5.set_extent_dims(b, (10000,))
 b[1:10000] = collect(1:10000)
 ```

diff --git a/docs/src/mpi.md b/docs/src/mpi.md
@@ -110,7 +110,7 @@ A = fill(myrank, M)  # local data
 dims = (M, Nproc)    # dimensions of global data
 
 # Create dataset
-dset = create_dataset(ff, "/data", datatype(eltype(A)), dataspace(dims))
+dset = create_dataset(ff, "/data", eltype(A), dims)
 
 # Write local data
 dset[:, myrank + 1] = A

diff --git a/src/datasets.jl b/src/datasets.jl
@@ -48,6 +48,8 @@ end
 
 There are many keyword properties that can be set. Below are a few select
 keywords.
+* `max_dims` - `Dims` describing the maximum size of the dataset. Required for
+  resizable datasets. Unlimited dimensions are denoted by `HDF5.UNLIMITED`.
 * `chunk` - `Dims` describing the size of a chunk. Needed to apply filters.
 * `filters` - `AbstractVector{<: Filters.Filter}` describing the order of the
   filters to apply to the data. See [`Filters`](@ref)
@@ -93,8 +95,9 @@ create_dataset(
     path::Union{AbstractString,Nothing},
     dtype::Datatype,
     dspace_dims::Union{Dims,Nothing};
+    max_dims=nothing,
     pv...
-) = create_dataset(parent, path, dtype, Dataspace(dspace_dims); pv...)
+) = create_dataset(parent, path, dtype, Dataspace(dspace_dims; max_dims); pv...)
 create_dataset(
     parent::Union{File,Group},
     path::Union{AbstractString,Nothing},
@@ -432,7 +435,7 @@ function create_external_dataset(
     sz::Dims,
     offset::Integer=0
 )
-    create_external_dataset(parent, name, filepath, datatype(t), dataspace(sz), offset)
+    create_external_dataset(parent, name, filepath, datatype(t), Dataspace(sz), offset)
 end
 function create_external_dataset(
     parent::Union{File,Group},

diff --git a/src/dataspaces.jl b/src/dataspaces.jl
@@ -37,12 +37,12 @@ Dataspace # defined in types.jl
 """
     HDF5.UNLIMITED
 
-A sentinel value which can be used to indicate an unlimited dimension in a
+A sentinel value which indicates an unlimited dimension in a
 [`Dataspace`](@ref).
 
-Can be used as the `max_dims` argument in the [`Dataspace`](@ref) constructor,
-or as the `count` argument in [`BlockRange`](@ref) when selecting virtual
-dataset mappings.
+Can be used as an entry in the `max_dims` argument in the [`Dataspace`](@ref)
+constructor or [`create_dataset`](@ref), or as a `count` argument in
+[`BlockRange`](@ref) when selecting virtual dataset mappings.
 """
 const UNLIMITED = -1
 
@@ -62,7 +62,7 @@ function Base.close(obj::Dataspace)
 end
 
 # null dataspace constructor
-Dataspace(::Nothing) = Dataspace(API.h5s_create(API.H5S_NULL))
+Dataspace(::Nothing; max_dims::Nothing=nothing) = Dataspace(API.h5s_create(API.H5S_NULL))
 
 # reverese dims order, convert to hsize_t
 _to_h5_dims(dims::Dims{N}) where {N} = API.hsize_t[dims[i] for i in N:-1:1]
@@ -179,7 +179,7 @@ julia> HDF5.isnull(Dataspace(nothing))
 true
 
 julia> HDF5.isnull(Dataspace(()))
-true
+false
 
 julia> HDF5.isnull(Dataspace((0,)))
 false

diff --git a/test/chunkstorage.jl b/test/chunkstorage.jl
@@ -7,7 +7,7 @@ using Test
     # Direct chunk write is no longer dependent on HL library
     # Test direct chunk writing Cartesian index
     h5open(fn, "w") do f
-        d = create_dataset(f, "dataset", datatype(Int), dataspace(4, 5); chunk=(2, 2))
+        d = create_dataset(f, "dataset", datatype(Int), (4, 5); chunk=(2, 2))
         HDF5.API.h5d_extend(d, HDF5.API.hsize_t[3, 3]) # should do nothing (deprecated call)
         HDF5.API.h5d_extend(d, HDF5.API.hsize_t[4, 4]) # should do nothing (deprecated call)
         raw = HDF5.ChunkStorage(d)
@@ -84,7 +84,7 @@ using Test
 
     # Test direct write chunk writing via linear indexing
     h5open(fn, "w") do f
-        d = create_dataset(f, "dataset", datatype(Int64), dataspace(4, 5); chunk=(2, 3))
+        d = create_dataset(f, "dataset", datatype(Int64), (4, 5); chunk=(2, 3))
         raw = HDF5.ChunkStorage{IndexLinear}(d)
         raw[1] = 0, collect(reinterpret(UInt8, Int64[1, 2, 5, 6, 9, 10]))
         raw[2] = 0, collect(reinterpret(UInt8, Int64[3, 4, 7, 8, 11, 12]))
@@ -181,7 +181,7 @@ using Test
         # CartesianIndices does not accept StepRange
 
         h5open(fn, "w") do f
-            d = create_dataset(f, "dataset", datatype(Int), dataspace(4, 5); chunk=(2, 3))
+            d = create_dataset(f, "dataset", datatype(Int), (4, 5); chunk=(2, 3))
             raw = HDF5.ChunkStorage(d)
             data = permutedims(reshape(1:24, 2, 2, 3, 2), (1, 3, 2, 4))
             ci = CartesianIndices(raw)
@@ -197,7 +197,7 @@ using Test
 
     # Test direct write chunk writing via linear indexing, using views and without filter flag
     h5open(fn, "w") do f
-        d = create_dataset(f, "dataset", datatype(Int), dataspace(4, 5); chunk=(2, 3))
+        d = create_dataset(f, "dataset", datatype(Int), (4, 5); chunk=(2, 3))
         raw = HDF5.ChunkStorage{IndexLinear}(d)
         data = permutedims(reshape(1:24, 2, 2, 3, 2), (1, 3, 2, 4))
         chunks = Iterators.partition(data, 6)
@@ -215,12 +215,7 @@ using Test
     # Test chunk info retrieval method performance
     h5open(fn, "w") do f
         d = create_dataset(
-            f,
-            "dataset",
-            datatype(UInt8),
-            dataspace(256, 256);
-            chunk=(16, 16),
-            alloc_time=:early
+            f, "dataset", datatype(UInt8), (256, 256); chunk=(16, 16), alloc_time=:early
         )
         if v"1.10.5" ≤ HDF5.API._libhdf5_build_ver
             HDF5._get_chunk_info_all_by_index(d)

diff --git a/test/compound.jl b/test/compound.jl
@@ -161,7 +161,9 @@ end
     bars = [Bar(1, 2, true), Bar(3, 4, false), Bar(5, 6, true), Bar(7, 8, false)]
     fn = tempname()
     h5open(fn, "w") do h5f
-        d = create_dataset(h5f, "the/bars", Bar, ((2,), (-1,)); chunk=(100,))
+        d = create_dataset(
+            h5f, "the/bars", Bar, (2,); max_dims=(HDF5.UNLIMITED,), chunk=(100,)
+        )
         d[1:2] = bars[1:2]
     end
 

diff --git a/test/create_dataset.jl b/test/create_dataset.jl
@@ -20,12 +20,7 @@ Test the combination of arguments to create_dataset.
             # Test primitive, HDF5.Datatype, non-primitive, non-primitive HDF5.Datatype
             types = (UInt8, datatype(UInt8), Complex{Float32}, datatype(Complex{Float32}))
             # Test Tuple, HDF5.Dataspace, two tuples (extendible), extendible HDF5.Dataspace
-            spaces = (
-                (3, 4),
-                dataspace((16, 16)),
-                ((4, 4), (8, 8)),
-                dataspace((16, 16); max_dims=(32, 32))
-            )
+            spaces = ((3, 4), Dataspace((16, 16)), Dataspace((16, 16); max_dims=(32, 32)))
             # TODO: test keywords
 
             # Create argument cross product
@@ -36,7 +31,11 @@ Test the combination of arguments to create_dataset.
                     # create a chunked dataset since contiguous datasets are not extendible
                     ds = create_dataset(parent, name, type, space; chunk=(2, 2))
                     @test datatype(ds) == datatype(type)
-                    @test dataspace(ds) == dataspace(space)
+                    if ds isa Dataspace
+                        @test ds == dataspace(space)
+                    else
+                        @test Dataspace(ds) == dataspace(space)
+                    end
                     @test isvalid(ds)
                     close(ds)
                     if !isnothing(name)

diff --git a/test/gc.jl b/test/gc.jl
@@ -35,7 +35,7 @@ end
         HDF5.API.h5t_insert(memtype_id, "imag", sizeof(Float64), HDF5.hdf5_type_id(Float64))
         dt = HDF5.Datatype(memtype_id)
         commit_datatype(file, "dt", dt)
-        ds = dataspace((2,))
+        ds = Dataspace((2,))
         d = create_dataset(file, "d", dt, ds)
         g = create_group(file, "g")
         a = create_attribute(file, "a", dt, ds)

diff --git a/test/hyperslab.jl b/test/hyperslab.jl
@@ -1,53 +1,5 @@
 using Random, Test, HDF5
 
-@testset "BlockRange" begin
-    br = HDF5.BlockRange(2)
-    @test length(br) == 1
-    @test range(br) === 2:2
-    @test convert(AbstractRange, br) === 2:2
-    @test convert(UnitRange, br) === 2:2
-    @test convert(StepRange, br) === 2:1:2
-    @test repr(br) == "HDF5.BlockRange(2:2)"
-    @test repr(br; context=:compact => true) == "2:2"
-
-    br = HDF5.BlockRange(Base.OneTo(3))
-    @test length(br) == 3
-    @test range(br) == 1:3
-    @test convert(AbstractRange, br) === 1:3
-    @test convert(UnitRange, br) === 1:3
-    @test convert(StepRange, br) === 1:1:3
-    @test repr(br) == "HDF5.BlockRange(1:3)"
-    @test repr(br; context=:compact => true) == "1:3"
-
-    br = HDF5.BlockRange(2:7)
-    @test length(br) == 6
-    @test range(br) == 2:7
-    @test convert(AbstractRange, br) === 2:7
-    @test convert(UnitRange, br) === 2:7
-    @test convert(StepRange, br) === 2:1:7
-    @test repr(br) == "HDF5.BlockRange(2:7)"
-    @test repr(br; context=:compact => true) == "2:7"
-
-    br = HDF5.BlockRange(1:2:7)
-    @test length(br) == 4
-    @test range(br) == 1:2:7
-    @test convert(AbstractRange, br) === 1:2:7
-    @test_throws Exception convert(UnitRange, br)
-    @test convert(StepRange, br) === 1:2:7
-    @test repr(br) == "HDF5.BlockRange(1:2:7)"
-    @test repr(br; context=:compact => true) == "1:2:7"
-
-    br = HDF5.BlockRange(; start=2, stride=8, count=3, block=2)
-    @test length(br) == 6
-    @test_throws Exception range(br)
-    @test_throws Exception convert(AbstractRange, br)
-    @test_throws Exception convert(UnitRange, br)
-    @test_throws Exception convert(StepRange, br)
-    @test repr(br) == "HDF5.BlockRange(start=2, stride=8, count=3, block=2)"
-    @test repr(br; context=:compact => true) ==
-        "BlockRange(start=2, stride=8, count=3, block=2)"
-end
-
 @testset "hyperslab" begin
     N = 10
     v = [randstring(rand(5:10)) for i in 1:N, j in 1:N]

diff --git a/test/mmap.jl b/test/mmap.jl
@@ -10,9 +10,9 @@ using Test
 
     # Create two datasets, one with late allocation (the default for contiguous
     # datasets) and the other with explicit early allocation.
-    hdf5_A = create_dataset(f, "A", datatype(Int64), dataspace(3, 3))
+    hdf5_A = create_dataset(f, "A", datatype(Int64), (3, 3))
     hdf5_B = create_dataset(
-        f, "B", datatype(Float64), dataspace(3, 3); alloc_time=HDF5.API.H5D_ALLOC_TIME_EARLY
+        f, "B", datatype(Float64), (3, 3); alloc_time=HDF5.API.H5D_ALLOC_TIME_EARLY
     )
     # The late case cannot be mapped yet.
     @test_throws ErrorException("Error getting offset") HDF5.readmmap(f["A"])

diff --git a/test/mpio.jl b/test/mpio.jl
@@ -43,12 +43,7 @@ using Test
             @test isopen(f)
             g = create_group(f, "mygroup")
             dset = create_dataset(
-                g,
-                "B",
-                datatype(Int64),
-                dataspace(10, nprocs);
-                chunk=(10, 1),
-                dxpl_mpio=:collective
+                g, "B", datatype(Int64), (10, nprocs); chunk=(10, 1), dxpl_mpio=:collective
             )
             dset[:, myrank + 1] = A
         end

diff --git a/test/plain.jl b/test/plain.jl
@@ -145,11 +145,15 @@ end
     dset = create_dataset(f, nothing, datatype(Float64), (20, 20, 5); chunk=(5, 5, 1))
     dset[:, :, :] = 3.0
     # More complex hyperslab and assignment with "incorrect" types (issue #34)
-    d = create_dataset(f, "slab2", datatype(Float64), ((10, 20), (100, 200)); chunk=(1, 1))
+    d = create_dataset(
+        f, "slab2", datatype(Float64), (10, 20); max_dims=(100, 200), chunk=(1, 1)
+    )
     d[:, :] = 5
     d[1, 1] = 4
     # 1d indexing
-    d = create_dataset(f, "slab3", datatype(Int), ((10,), (-1,)); chunk=(5,))
+    d = create_dataset(
+        f, "slab3", datatype(Int), (10,); max_dims=(HDF5.UNLIMITED,), chunk=(5,)
+    )
     @test d[:] == zeros(Int, 10)
     d[3:5] = 3:5
     # Create a dataset designed to be deleted
@@ -443,7 +447,7 @@ end
     try
         h5open(fn, "w") do f
             create_dataset(f, "test", Int, (128, 32))
-            create_dataset(f, "test2", Float64, 128, 64)
+            create_dataset(f, "test2", Float64, (128, 64))
             @test size(f["test"]) == (128, 32)
             @test size(f["test2"]) == (128, 64)
         end
@@ -874,7 +878,7 @@ end # generic read of native types
     dset = create_dataset(group, "dset", datatype(Int), (1,))
     @test sprint(show, dset) == "HDF5.Dataset: /group/dset (file: $fn xfer_mode: 0)"
 
-    meta = create_attribute(dset, "meta", datatype(Bool), Dataspace((1,)))
+    meta = create_attribute(dset, "meta", datatype(Bool), (1,))
     @test sprint(show, meta) == "HDF5.Attribute: meta"
 
     dsetattrs = attributes(dset)
@@ -896,7 +900,7 @@ end # generic read of native types
     commit_datatype(hfile, "type", dtype)
     @test sprint(show, dtype) == "HDF5.Datatype: /type H5T_IEEE_F64LE"
 
-    dtypemeta = create_attribute(dtype, "dtypemeta", datatype(Bool), Dataspace((1,)))
+    dtypemeta = create_attribute(dtype, "dtypemeta", datatype(Bool), (1,))
     @test sprint(show, dtypemeta) == "HDF5.Attribute: dtypemeta"
 
     dtypeattrs = attributes(dtype)
@@ -1215,7 +1219,7 @@ end # split1 tests
     @test haskey(hfile, "group1/dset2")
     @test !haskey(hfile, "group1/dsetna")
 
-    meta1 = create_attribute(dset1, "meta1", datatype(Bool), Dataspace((1,)))
+    meta1 = create_attribute(dset1, "meta1", datatype(Bool), (1,))
     @test haskey(dset1, "meta1")
     @test !haskey(dset1, "metana")
     @test_throws KeyError dset1["nothing"]
@@ -1253,9 +1257,7 @@ end # haskey tests
     @test_nowarn hfile[GenericString("dset1")]
 
     dset1 = hfile["dset1"]
-    @test_nowarn create_attribute(
-        dset1, GenericString("meta1"), datatype(Bool), Dataspace((1,))
-    )
+    @test_nowarn create_attribute(dset1, GenericString("meta1"), datatype(Bool), (1,))
     @test_nowarn create_attribute(dset1, GenericString("meta2"), 1)
     @test_nowarn dset1[GenericString("meta1")]
     @test_nowarn dset1[GenericString("x")] = 2

diff --git a/test/properties.jl b/test/properties.jl
@@ -26,7 +26,7 @@ using Test
             g,
             "dataset",
             datatype(Int),
-            dataspace((500, 50));
+            Dataspace((500, 50));
             alloc_time=HDF5.API.H5D_ALLOC_TIME_EARLY,
             chunk=(5, 10),
             fill_value=1,

diff --git a/test/swmr.jl b/test/swmr.jl
@@ -29,7 +29,13 @@ end
     @testset "h5d_oappend" begin
         h5open(fname, "w") do h5
             g = create_group(h5, "shoe")
-            d = create_dataset(g, "bar", datatype(Float64), ((1,), (-1,)); chunk=(100,))
+            d = create_dataset(
+                g,
+                "bar",
+                datatype(Float64),
+                Dataspace((1,); max_dims=(HDF5.UNLIMITED,));
+                chunk=(100,)
+            )
             dxpl_id = HDF5.get_create_properties(d)
             v = [1.0, 2.0]
             memtype = datatype(Float64)
@@ -91,7 +97,9 @@ end
 
     # create datasets and attributes before staring swmr writing
     function prep_h5_file(h5)
-        d = create_dataset(h5, "foo", datatype(Int), ((1,), (100,)); chunk=(1,))
+        d = create_dataset(
+            h5, "foo", datatype(Int), Dataspace((1,); max_dims=(100,)); chunk=(1,)
+        )
         attributes(h5)["bar"] = "bar"
         g = create_group(h5, "group")
     end