Skip to content

Commit

Permalink
don't inline long ntuples (#370)
Browse files Browse the repository at this point in the history
* don't inline long ntuples

* fix edge case of Tuple{}

* @nospecialize to help compiler + NTuple repr

* add fallback for empty tuple

* remove spurious print statement

* bump patch version

Co-authored-by: Jonas Isensee <[email protected]>
  • Loading branch information
JonasIsensee and Jonas Isensee authored Jan 27, 2022
1 parent 4499f87 commit b25ef96
Show file tree
Hide file tree
Showing 6 changed files with 55 additions and 12 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name = "JLD2"
uuid = "033835bb-8acc-5ee8-8aae-3f567f8a3819"
version = "0.4.18"
version = "0.4.19"

[deps]
DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
Expand Down
22 changes: 21 additions & 1 deletion src/data/specialcased_types.jl
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ rconvert(::Type{BigFloat}, x::String) = parse(BigFloat, x)

# Previously it was disallowed to serialize pointers.
# Due to popular demand and in particular to not error on serializing complex structures
# that contain non-essential pointers this has been changed to instead
# that contain non-essential pointers this has been changed to instead
# return null pointers.
writeas(::Type{Ptr{T}}) where {T} = Nothing
rconvert(::Type{Ptr{T}}, ::Nothing) where {T} = Ptr{T}(0)
Expand Down Expand Up @@ -187,3 +187,23 @@ function rconvert(::Type{<:Base.ImmutableDict}, x::Vector{Pair{K,V}}) where {K,V
end
d
end

## NTuples
# Immutable objects are stored as HDF5 structs and inlined into
# parent structures. HDF5 only allows typemax(Unt16) bytes
# for struct description. NTuples are the most common offender for
# exploding struct size. (e.g. in the form of large StaticArrays)
# The definitions below prevent inlining of large NTuples and
# convert to an array instead.
const NTUPLE_INLINE_THRESHOLD = 10

function writeas(NT::Type{NTuple{N,T}}) where {N,T}
if N > NTUPLE_INLINE_THRESHOLD
return Vector{T}
else
return NT
end
end

wconvert(::Type{Vector{T}}, x::NTuple{N,T}) where {N,T} = collect(x)
rconvert(::Type{NTuple{N,T}}, x::Vector{T}) where {N,T} = NTuple{N,T}(x)
16 changes: 16 additions & 0 deletions src/data/writing_datatypes.jl
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,22 @@ function h5convert!(out::Pointers, ::DataTypeODR, f::JLDFile, T::DataType, wsess
end


# This is a trick to compactly write long NTuple
# This uses that NTuple{N,T} == Tuple{T,T,T,T,...,T}
function h5convert!(out::Pointers, ::DataTypeODR, f::JLDFile, T::Type{NTuple{N,ET}}, wsession::JLDWriteSession) where {N,ET}
if isempty(T.parameters)
store_vlen!(out, UInt8, f, unsafe_wrap(Vector{UInt8}, "Tuple"), f.datatype_wsession)
h5convert_uninitialized!(out+odr_sizeof(Vlen{UInt8}), Vlen{UInt8})
else
store_vlen!(out, UInt8, f, unsafe_wrap(Vector{UInt8}, "NTuple"), f.datatype_wsession)
refs = refs_from_types(f, Any[N,ET], wsession)
store_vlen!(out+odr_sizeof(Vlen{UInt8}), RelOffset, f, refs, f.datatype_wsession)
end
nothing
end



## Union Types

const H5TYPE_UNION = CompoundDatatype(
Expand Down
16 changes: 8 additions & 8 deletions src/datasets.jl
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ end

# Most types can only be scalars or arrays
function read_data(f::JLDFile,
rr,
@nospecialize(rr),
read_dataspace::Tuple{ReadDataspace,RelOffset,Int,UInt16},
attributes::Union{Vector{ReadAttribute},Nothing}=nothing)

Expand Down Expand Up @@ -315,7 +315,7 @@ function read_array(f::JLDFile, dataspace::ReadDataspace,
io = f.io
data_offset = position(io)
ndims, offset = get_ndims_offset(f, dataspace, attributes)

seek(io, offset)
v = construct_array(io, T, Int(ndims))
n = length(v)
Expand Down Expand Up @@ -351,11 +351,11 @@ function write_dataset(
io = f.io
datasz = odr_sizeof(odr) * numel(dataspace)
#layout_class
if datasz < 8192
if datasz < 8192
layout_class = LC_COMPACT_STORAGE
elseif compress != false
layout_class = LC_CHUNKED_STORAGE
else
elseif compress != false
layout_class = LC_CHUNKED_STORAGE
else
layout_class = LC_CONTIGUOUS_STORAGE
end
psz = payload_size_without_storage_message(dataspace, datatype)
Expand Down Expand Up @@ -565,7 +565,7 @@ function Base.delete!(g::Group, name::AbstractString)

# Dataset must already exist in the file
# Retrieve offset of group in file
offset = group_offset(g)
offset = group_offset(g)
offset == NULL_REFERENCE && throw(InternalError("Group could not be found."))
delete_written_link!(g.f, offset, name)
delete!(g.written_links, name)
Expand Down Expand Up @@ -615,7 +615,7 @@ function delete_written_link!(f::JLDFile, roffset::RelOffset, name::AbstractStri
while (curpos = position(io)) <= chunk_checksum_offset - 4
msg = jlread(io, HeaderMessage)
endpos = curpos + jlsizeof(HeaderMessage) + msg.size

if msg.msg_type == HM_LINK_MESSAGE
dataset_name, loffset = read_link(io)
if dataset_name == name
Expand Down
9 changes: 9 additions & 0 deletions test/loadsave.jl
Original file line number Diff line number Diff line change
Expand Up @@ -464,3 +464,12 @@ end

rm(tmpdir; force = true, recursive = true)
end

# Test for saving long NTuples
@testset "Long NTuples" begin
cd(mktempdir()) do
tup = ntuple(i->i^2, 5000)
jldsave("test.jld2"; tup)
@test tup == load_object("test.jld2")
end
end
2 changes: 0 additions & 2 deletions test/modules.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
push!(LOAD_PATH, joinpath(pwd(),"testmodules/"))
println(LOAD_PATH)

module TestModule

using A
Expand Down

2 comments on commit b25ef96

@JonasIsensee
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator register()

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/53286

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.4.19 -m "<description of version>" b25ef964ca896bf5b47415fd1fae92acfb7fa3c7
git push origin v0.4.19

Please sign in to comment.