Skip to content

Commit

Permalink
Add anonymous datasets (#917)
Browse files Browse the repository at this point in the history
* Allow dataset keyword property lists

* Add types to property list keywords

* Add h5d_create_anon

* allow nothing for write_dataset

* Apply style suggestions from code review

Removed space within Union.
Used `=== nothing` for consistency and Julia 1.0 compat.

Co-authored-by: Mustafa M <[email protected]>

* Apply suggestions from code review, use isnothing

Co-authored-by: Mustafa M <[email protected]>

Co-authored-by: Mustafa M <[email protected]>
  • Loading branch information
mkitti and musm authored Mar 18, 2022
1 parent 8080e8e commit a7ac578
Show file tree
Hide file tree
Showing 5 changed files with 38 additions and 13 deletions.
2 changes: 2 additions & 0 deletions docs/src/api_bindings.md
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ h5a_write
## [[`H5D`](https://portal.hdfgroup.org/display/HDF5/Datasets) — Dataset Interface](@id H5D)
- [`h5d_close`](@ref h5d_close)
- [`h5d_create`](@ref h5d_create)
- [`h5d_create_anon`](@ref h5d_create_anon)
- [`h5d_extend`](@ref h5d_extend)
- [`h5d_fill`](@ref h5d_fill)
- [`h5d_flush`](@ref h5d_flush)
Expand Down Expand Up @@ -116,6 +117,7 @@ h5a_write
```@docs
h5d_close
h5d_create
h5d_create_anon
h5d_extend
h5d_fill
h5d_flush
Expand Down
1 change: 1 addition & 0 deletions gen/api_defs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@

@bind h5d_close(dataset_id::hid_t)::herr_t "Error closing dataset"
@bind h5d_create2(loc_id::hid_t, pathname::Ptr{UInt8}, dtype_id::hid_t, space_id::hid_t, lcpl_id::hid_t, dcpl_id::hid_t, dapl_id::hid_t)::hid_t string("Error creating dataset ", h5i_get_name(loc_id), "/", pathname)
@bind h5d_create_anon(loc_id::hid_t, type_id::hid_t, space_id::hid_t, dcpl_id::hid_t, dapl_id::hid_t)::hid_t "Error in creating anonymous dataset"
@bind h5d_extend(dataset_id::hid_t, size::Ptr{hsize_t})::herr_t "Error extending dataset" # deprecated in favor of h5d_set_extent
@bind h5d_fill(fill::Ptr{Cvoid}, fill_type_id::hid_t, buf::Ptr{Cvoid}, buf_type_id::hid_t, space_id::hid_t)::herr_t "Error filling dataset"
@bind h5d_flush(dataset_id::hid_t)::herr_t "Error flushing dataset"
Expand Down
32 changes: 19 additions & 13 deletions src/HDF5.jl
Original file line number Diff line number Diff line change
Expand Up @@ -567,7 +567,8 @@ end
# Arguments
* `parent` - `File` or `Group`
* `path` - String describing the path of the dataset within the HDF5 file
* `path` - `String` describing the path of the dataset within the HDF5 file or
`nothing` to create an anonymous dataset
* `datatype` - `Datatype` or `Type` or the dataset
* `dataspace` - `Dataspace` or `Dims` of the dataset
* `properties` - keyword name-value pairs set properties of the dataset
Expand All @@ -589,22 +590,27 @@ See also
"""
function create_dataset(
parent::Union{File,Group},
path::AbstractString,
path::Union{AbstractString,Nothing},
dtype::Datatype,
dspace::Dataspace;
dcpl::DatasetCreateProperties = DatasetCreateProperties(),
dxpl::DatasetTransferProperties = DatasetTransferProperties(),
dapl::DatasetAccessProperties = DatasetAccessProperties(),
pv...
)
haskey(parent, path) && error("cannot create dataset: object \"", path, "\" already exists at ", name(parent))
!isnothing(path) && haskey(parent, path) && error("cannot create dataset: object \"", path, "\" already exists at ", name(parent))
pv = setproperties!(dcpl,dxpl,dapl; pv...)
isempty(pv) || error("invalid keyword options")
Dataset(API.h5d_create(parent, path, dtype, dspace, _link_properties(path), dcpl, dapl), file(parent), dxpl)
if isnothing(path)
ds = API.h5d_create_anon(parent, dtype, dspace, dcpl, dapl)
else
ds = API.h5d_create(parent, path, dtype, dspace, _link_properties(path), dcpl, dapl)
end
Dataset(ds, file(parent), dxpl)
end
create_dataset(parent::Union{File,Group}, path::AbstractString, dtype::Datatype, dspace_dims::Dims; pv...) = create_dataset(checkvalid(parent), path, dtype, dataspace(dspace_dims); pv...)
create_dataset(parent::Union{File,Group}, path::AbstractString, dtype::Datatype, dspace_dims::Tuple{Dims,Dims}; pv...) = create_dataset(checkvalid(parent), path, dtype, dataspace(dspace_dims[1], max_dims=dspace_dims[2]); pv...)
create_dataset(parent::Union{File,Group}, path::AbstractString, dtype::Type, dspace_dims::Tuple{Dims,Dims}; pv...) = create_dataset(checkvalid(parent), path, datatype(dtype), dataspace(dspace_dims[1], max_dims=dspace_dims[2]); pv...)
create_dataset(parent::Union{File,Group}, path::Union{AbstractString,Nothing}, dtype::Datatype, dspace_dims::Dims; pv...) = create_dataset(checkvalid(parent), path, dtype, dataspace(dspace_dims); pv...)
create_dataset(parent::Union{File,Group}, path::Union{AbstractString,Nothing}, dtype::Datatype, dspace_dims::Tuple{Dims,Dims}; pv...) = create_dataset(checkvalid(parent), path, dtype, dataspace(dspace_dims[1], max_dims=dspace_dims[2]); pv...)
create_dataset(parent::Union{File,Group}, path::Union{AbstractString,Nothing}, dtype::Type, dspace_dims::Tuple{Dims,Dims}; pv...) = create_dataset(checkvalid(parent), path, datatype(dtype), dataspace(dspace_dims[1], max_dims=dspace_dims[2]); pv...)

# Note that H5Tcreate is very different; H5Tcommit is the analog of these others
create_datatype(class_id, sz) = Datatype(API.h5t_create(class_id, sz))
Expand Down Expand Up @@ -645,7 +651,7 @@ move_link(parent::Union{File,Group}, src_name::AbstractString, dest_name::Abstra
Base.setindex!(dset::Dataset, val, name::AbstractString) = write_attribute(dset, name, val)
Base.setindex!(x::Attributes, val, name::AbstractString) = write_attribute(x.parent, name, val)
# Create a dataset with properties: obj[path, prop = val, ...] = val
function Base.setindex!(parent::Union{File,Group}, val, path::AbstractString; pv...)
function Base.setindex!(parent::Union{File,Group}, val, path::Union{AbstractString,Nothing}; pv...)
need_chunks = any(k in keys(chunked_props) for k in keys(pv))
have_chunks = any(k == :chunk for k in keys(pv))

Expand Down Expand Up @@ -1170,7 +1176,7 @@ function readmmap(obj::Dataset)
end

# Generic write
function Base.write(parent::Union{File,Group}, name1::AbstractString, val1, name2::AbstractString, val2, nameval...) # FIXME: remove?
function Base.write(parent::Union{File,Group}, name1::Union{AbstractString,Nothing}, val1, name2::Union{AbstractString,Nothing}, val2, nameval...) # FIXME: remove?
if !iseven(length(nameval))
error("name, value arguments must come in pairs")
end
Expand All @@ -1191,7 +1197,7 @@ end
# Create datasets and attributes with "native" types, but don't write the data.
# The return syntax is: dset, dtype = create_dataset(parent, name, data; properties...)

function create_dataset(parent::Union{File,Group}, name::AbstractString, data; pv...)
function create_dataset(parent::Union{File,Group}, name::Union{AbstractString,Nothing}, data; pv...)
dtype = datatype(data)
dspace = dataspace(data)
obj = try
Expand All @@ -1213,7 +1219,7 @@ function create_attribute(parent::Union{File,Object}, name::AbstractString, data
end

# Create and write, closing the objects upon exit
function write_dataset(parent::Union{File,Group}, name::AbstractString, data; pv...)
function write_dataset(parent::Union{File,Group}, name::Union{AbstractString,Nothing}, data; pv...)
obj, dtype = create_dataset(parent, name, data; pv...)
try
write_dataset(obj, dtype, data)
Expand Down Expand Up @@ -1259,7 +1265,7 @@ function Base.write(obj::Dataset, x)
end

# For plain files and groups, let "write(obj, name, val; properties...)" mean "write_dataset"
Base.write(parent::Union{File,Group}, name::AbstractString, data; pv...) = write_dataset(parent, name, data; pv...)
Base.write(parent::Union{File,Group}, name::Union{AbstractString,Nothing}, data; pv...) = write_dataset(parent, name, data; pv...)
# For datasets, "write(dset, name, val; properties...)" means "write_attribute"
Base.write(parent::Dataset, name::AbstractString, data; pv...) = write_attribute(parent, name, data; pv...)

Expand Down Expand Up @@ -1562,7 +1568,7 @@ function write_dataset(dataset::Dataset, memtype::Datatype, buf::AbstractArray,
stride(buf, 1) != 1 && throw(ArgumentError("Cannot write arrays with a different stride than `Array`"))
API.h5d_write(dataset, memtype, API.H5S_ALL, API.H5S_ALL, xfer, buf)
end
function write_dataset(dataset::Dataset, memtype::Datatype, str::AbstractString, xfer::DatasetTransferProperties=dataset.xfer)
function write_dataset(dataset::Dataset, memtype::Datatype, str::Union{AbstractString,Nothing}, xfer::DatasetTransferProperties=dataset.xfer)
strbuf = Base.cconvert(Cstring, str)
GC.@preserve strbuf begin
# unsafe_convert(Cstring, strbuf) is responsible for enforcing the no-'\0' policy,
Expand Down
11 changes: 11 additions & 0 deletions src/api/functions.jl
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,17 @@ function h5d_create(loc_id, pathname, dtype_id, space_id, lcpl_id, dcpl_id, dapl
return var"#status#"
end

"""
h5d_create_anon(loc_id::hid_t, type_id::hid_t, space_id::hid_t, dcpl_id::hid_t, dapl_id::hid_t) -> hid_t
See `libhdf5` documentation for [`H5Dcreate_anon`](https://portal.hdfgroup.org/display/HDF5/H5D_CREATE_ANON).
"""
function h5d_create_anon(loc_id, type_id, space_id, dcpl_id, dapl_id)
var"#status#" = ccall((:H5Dcreate_anon, libhdf5), hid_t, (hid_t, hid_t, hid_t, hid_t, hid_t), loc_id, type_id, space_id, dcpl_id, dapl_id)
var"#status#" < 0 && @h5error("Error in creating anonymous dataset")
return var"#status#"
end

"""
h5d_extend(dataset_id::hid_t, size::Ptr{hsize_t})
Expand Down
5 changes: 5 additions & 0 deletions test/plain.jl
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ f["compressedempty", shuffle=true, deflate=4] = Int64[]
# compression of zero-dimensional array (pull request #445)
f["compressed_zerodim", shuffle=true, deflate=4] = fill(Int32(42), ())
f["bloscempty", blosc=4] = Int64[]
# test creation of an anonymouse dataset
f[nothing] = 5
# Create arrays of different types
A = randn(3, 5)
write(f, "Afloat64", convert(Matrix{Float64}, A))
Expand Down Expand Up @@ -86,6 +88,7 @@ write_attribute(f["int_vlen"], "vlen_attr", vleni)
# Empty arrays
empty = UInt32[]
write(f, "empty", empty)
write(f, nothing, empty)
# Empty strings
empty_string = ""
write(f, "empty_string", empty_string)
Expand Down Expand Up @@ -133,6 +136,8 @@ Xslab = randn(20, 20, 5)
for i = 1:5
dset[:,:,i] = Xslab[:,:,i]
end
dset = create_dataset(f, nothing, datatype(Float64), dataspace(20, 20, 5), chunk=(5, 5, 1))
dset[:, :, :] = 3.0
# More complex hyperslab and assignment with "incorrect" types (issue #34)
d = create_dataset(f, "slab2", datatype(Float64), ((10, 20), (100, 200)), chunk=(1, 1))
d[:,:] = 5
Expand Down

0 comments on commit a7ac578

Please sign in to comment.