Skip to content

Commit

Permalink
Merge pull request #245 from JuliaIO/customserialization
Browse files Browse the repository at this point in the history
Make custom serialization possible
  • Loading branch information
JonasIsensee authored Nov 15, 2020
2 parents daf76f8 + 31abc32 commit d6bd508
Show file tree
Hide file tree
Showing 7 changed files with 100 additions and 25 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name = "JLD2"
uuid = "033835bb-8acc-5ee8-8aae-3f567f8a3819"
version = "0.2.4"
version = "0.3.0"

[deps]
CodecZlib = "944b1d66-785c-5afd-91f1-9de20f533193"
Expand Down
51 changes: 51 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -143,3 +143,54 @@ or using slashes as path delimiters:
```julia
@assert load("example.jld2", "mygroup/mystuff") == 42
```

### Custom Serialization (Experimental)

Version `v0.3.0` of introduces support for custom serialization.
For now this feature is considered experimental as it passes tests but
has little testing in the wild. → Please test and report if you encounter problems.

The API is simple enough, to enable custom serialization for your type `A` you define
a new type e.g. `ASerialization` that contains the fields you want to store and define
`JLD2.writeas(::Type{A}) = ASerialization`.
Internally JLD2 will call `Base.convert` when writing and loading, so you need to make sure to extend that for your type.

```julia
struct A
x::Int
end

struct ASerialization
x::Vector{Int}
end

JLD2.writeas(::Type{A}) = ASerialization
Base.convert(::Type{ASerialization}, a::A) = ASerialization([a.x])
Base.convert(::Type{A}, a::ASerialization) = A(only(a.x))
```

If you do not want to overload `Base.convert` then you can also define

```julia
JLD2.wconvert(::Type{ASerialization}, a::A) = ASerialization([a.x])
JLD2.rconvert(::Type{A}, a::ASerialization) = A(only(a.x))
```

instead. This may be particularly relevant when types are involved that are not your own.

```julia
struct B
x::Float64
end

JLD2.writeas(::Type{B}) = Float64
JLD2.wconvert(::Type{Float64}, b::B) = b.x
JLD2.rconvert(::Type{B}, x::Float64) = B(x)

arr = [B(rand()) for i=1:10]

@save "test.jld2" arr
```

In this example JLD2 converts the array of `B` structs to a plain `Vector{Float64}` prior to
storing to disk.
48 changes: 34 additions & 14 deletions src/data.jl
Original file line number Diff line number Diff line change
Expand Up @@ -428,7 +428,11 @@ function constructrr(f::JLDFile, T::DataType, dt::CompoundDatatype,
types[i] = readtype
odrs[i] = odrtype
offsets[i] = dt.offsets[dtindex]
samelayout = samelayout && offsets[i] == fieldoffset(T, i) && types[i] === wstype

# The on disk representation of T can only be the same as in memory
# if the offsets are the same, field type on disk (readtype) and in memory (wstype)
# are the same and if no CustomSerialization is involved
samelayout = samelayout && offsets[i] == fieldoffset(T, i) && types[i] === wstype && !(odrs[i] <: CustomSerialization)

mapped[dtindex] = true
end
Expand Down Expand Up @@ -967,7 +971,7 @@ function jlconvert(rr::ReadRepresentation{T,DataTypeODR()},
mypath = String(jlconvert(ReadRepresentation{UInt8,Vlen{UInt8}}(), f, ptr, NULL_REFERENCE))
m = _resolve_type(rr, f, ptr, header_offset, mypath, hasparams, hasparams ? params : nothing)
m isa UnknownType && return m

if hasparams
try
m = m{params...}
Expand Down Expand Up @@ -1288,35 +1292,51 @@ function reconstruct_compound(f::JLDFile, T::String, dt::H5Datatype,
(ReadRepresentation{T,rodr}(), false)
end

# These need to go at the bottom. Also, JLD2 doesn't support custom serialization because
# these methods are not guaranteed to work if you add methods to `writeas`.

@generated function h5type(f::JLDFile, ::Type{T}, ::T) where T<:Array
function h5type(f::JLDFile, ::Type{T}, ::T) where T<:Array
if T <: Array{Union{}}
return :(ReferenceDatatype())
return ReferenceDatatype()
end
ty = T.parameters[1]
writtenas = writeas(ty)
!hasfielddata(writtenas) ? :(h5type(f, $writtenas, $(Expr(:new, ty)))) : :(h5fieldtype(f, $writtenas, $ty, Val{false}))
if !hasfielddata(writtenas)
# This is a hacky way to generate an instance of ty
# the instance isn't actually needed for anything except that inside
# h5type ty is determined via typeof(x)
# one reasonable optimization would be to make h5type accept a type directly
h5type(f, writtenas, rconvert(ty, writtenas()))
else
h5fieldtype(f, writtenas, ty, Val{false})
end
end


# jlconvert for empty objects
@generated function jlconvert(::ReadRepresentation{T,nothing}, f::JLDFile, ptr::Ptr,
function jlconvert(::ReadRepresentation{T,nothing}, f::JLDFile, ptr::Ptr,
header_offset::RelOffset) where T
T.size == 0 && return Expr(:new, T)
#T.size == 0 && return T()

# In this case, T is a non-empty object, but the written data was empty
# because the custom serializers for the fields all resulted in empty
# objects
return Expr(:new, T, [begin
fields = map(T.types) do ty
writtenas = writeas(ty)
@assert writtenas.size == 0
if writtenas === ty
Expr(:new, ty)
# This will usually equal `ty()` unless ty does not have a
# constructor without arguments
jlconvert(ReadRepresentation{ty,nothing}(), f, ptr, header_offset)
else
:(rconvert($ty, $(Expr(:new, writtenas))))
rconvert(ty,
jlconvert(ReadRepresentation{writtenas,nothing}(), f, ptr, header_offset)
)
end
end for ty in T.types]...)
end
if T <: Tuple
# Tuples are weird in that you can't instantiate them with Tuple{T,S}(t,s)
return (fields...,)::T
end
return T(fields...)
end

# At present, we write Union{} as an object of Core.TypeofBottom. The method above
Expand Down Expand Up @@ -1419,7 +1439,7 @@ end
# odr gives the on-disk representation of a given type, similar to
# fieldodr, but actually encoding the data for things that odr stores
# as references
@generated function odr(::Type{T}) where T
function odr(::Type{T}) where T
if !hasdata(T)
# A pointer singleton or ghost. We need to write something, but we'll
# just write a single byte.
Expand Down
2 changes: 1 addition & 1 deletion src/file_header.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import Pkg.Types: VersionRange
# Currently we specify a 512 byte header
const FILE_HEADER_LENGTH = 512

const FORMAT_VERSION = v"0.1.0"
const FORMAT_VERSION = v"0.1.1"
# Range of file format versions that can be read
# Publish patch release relaxing upper version bound
# if the imminent major release is not breaking
Expand Down
8 changes: 4 additions & 4 deletions test/customserialization.jl
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ struct CSB end
b = CSB()

struct CSBSerialization
x::Int
x::Int64
end
JLD2.writeas(::Type{CSB}) = CSBSerialization
function JLD2.wconvert(::Type{CSBSerialization}, x::CSB)
Expand Down Expand Up @@ -178,13 +178,13 @@ function Base.convert(::Type{Union}, x::CSK)
end
function Base.convert(::Type{CSK}, x::Union)
global converted = true
CSK(x.types...)
CSK(Base.uniontypes(x)...)
end

function write_tests(file, prefix, obj)
write(file, prefix, obj)
write(file, "$(prefix)_singlefieldwrapper", SingleFieldWrapper(obj))
write(file, "$(prefix)_multifieldwrapper", MultiFieldWrapper(obj, 5935250212119237787))
write(file, "$(prefix)_multifieldwrapper", MultiFieldWrapper(obj, 2147483645))
write(file, "$(prefix)_untypedwrapper", UntypedWrapper(obj))
write(file, "$(prefix)_arr", [obj])
write(file, "$(prefix)_empty_arr", typeof(obj)[])
Expand All @@ -195,7 +195,7 @@ function read_tests(file, prefix, obj)
@test read(file, prefix) == obj
@test converted
@test read(file, "$(prefix)_singlefieldwrapper") == SingleFieldWrapper(obj)
@test read(file, "$(prefix)_multifieldwrapper") == MultiFieldWrapper(obj, 5935250212119237787)
@test read(file, "$(prefix)_multifieldwrapper") == MultiFieldWrapper(obj, 2147483645)
@test read(file, "$(prefix)_untypedwrapper") == UntypedWrapper(obj)
arr = read(file, "$(prefix)_arr")
@test typeof(arr) == Vector{typeof(obj)} && length(arr) == 1 && arr[1] == obj
Expand Down
9 changes: 8 additions & 1 deletion test/loadsave.jl
Original file line number Diff line number Diff line change
Expand Up @@ -249,4 +249,11 @@ end
@load fn tup

@test tup == (EmptyImmutable(), EmptyImmutable())
end

# Test for Recursively Empty struct
@save fn tup=(EmptyII(EmptyImmutable()), EmptyImmutable())
@load fn tup

@test tup == (EmptyII(EmptyImmutable()), EmptyImmutable())
end

5 changes: 1 addition & 4 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,4 @@ include("modules-nested.jl")
include("isreconstructed.jl")
include("backwards_compatibility.jl")
include("inlineunion.jl")

# Only run the reconstruction tests on versions where `workspace` is a thing

#include("customserialization.jl") currently broken due to #265
include("customserialization.jl")

3 comments on commit d6bd508

@JonasIsensee
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator register()

@JonasIsensee
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator register()

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/24687

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.3.0 -m "<description of version>" d6bd50889667312d90b5c80b830bb797354706a7
git push origin v0.3.0

Please sign in to comment.