diff --git a/Project.toml b/Project.toml index c726f32..d176956 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "DiskArrays" uuid = "3c3547ce-8d99-4f5e-a174-61eb10b00ae3" authors = ["Fabian Gans "] -version = "0.1.0" +version = "0.1.1" [deps] diff --git a/src/DiskArrays.jl b/src/DiskArrays.jl index c5a0223..c3d45c8 100644 --- a/src/DiskArrays.jl +++ b/src/DiskArrays.jl @@ -191,6 +191,7 @@ include("chunks.jl") include("ops.jl") include("iterator.jl") include("subarrays.jl") +include("permute_reshape.jl") #The all-in-one macro macro implement_diskarray(t) @@ -200,6 +201,8 @@ quote @implement_broadcast $t @implement_iteration $t @implement_mapreduce $t + @implement_reshape $t + @implement_permutedims $t end end diff --git a/src/permute_reshape.jl b/src/permute_reshape.jl new file mode 100644 index 0000000..a647a6f --- /dev/null +++ b/src/permute_reshape.jl @@ -0,0 +1,93 @@ +import Base: _throw_dmrs +import DiskArrays: splittuple +#Reshaping is really not trivial, because the access pattern would completely change for reshaped arrays, +#rectangles would not remain rectangles in the parent array. However, we can support the case where only +#singleton dimensions are added, later we could allow more special cases like joining two dimensions to one +struct ReshapedDiskArray{T,N,P<:AbstractArray,M} <: AbstractDiskArray{T,N} + parent::P + keepdim::NTuple{M,Int} + newsize::NTuple{N,Int} +end +Base.size(r::ReshapedDiskArray) = r.newsize +haschunks(a::ReshapedDiskArray) = haschunks(a.parent) +eachchunk(a::ReshapedDiskArray{<:Any,N}) where N = map(eachchunk(a.parent)) do j + r = toRanges(j) + inow::Int = 0 + ntuple(N) do i + if i in a.keepdim + inow+=1 + r[inow] + else + 1:1 + end + end::NTuple{N,UnitRange{Int}} +end +function DiskArrays.readblock!(a::ReshapedDiskArray,aout,i...) + inew = tuple_tuple_getindex(i,a.keepdim) + DiskArrays.readblock!(a.parent,reshape(aout,map(length,inew)),inew...) + nothing +end +tuple_tuple_getindex(t,i) = _ttgi((),t,i...) +_ttgi(o,t,i1,irest...) = _ttgi((o...,t[i1]),t,irest...) +_ttgi(o,t,i1) = (o...,t[i1]) +function DiskArrays.writeblock!(a::ReshapedDiskArray,v,i...) + inew = tuple_tuple_getindex(i,a.keepdim) + DiskArrays.writeblock!(a.parent,reshape(v,map(length,inew)),inew...) + nothing +end +function reshape_disk(parent, dims) + n = length(parent) + ndims(parent) > length(dims) && error("For DiskArrays, reshape is restricted to adding singleton dimensions") + prod(dims) == n || _throw_dmrs(n, "size", dims) + ipassed::Int=0 + s = size(parent) + keepdim = map(s) do snow + while true + ipassed += 1 + d = dims[ipassed] + if d>1 + d != snow && error("For DiskArrays, reshape is restricted to adding singleton dimensions") + return ipassed + end + end + end + ReshapedDiskArray{eltype(parent),length(dims),typeof(parent),ndims(parent)}(parent,keepdim,dims) +end + +import Base: _throw_dmrs +import DiskArrays: splittuple, toRanges +import Base.PermutedDimsArrays: genperm +struct PermutedDiskArray{T,N,P<:PermutedDimsArray} <: AbstractDiskArray{T,N} + a::P +end +function permutedims_disk(a,perm) + pd = PermutedDimsArray(a,perm) + PermutedDiskArray{eltype(a),ndims(a),typeof(pd)}(pd) +end +Base.size(r::PermutedDiskArray) = size(r.a) +haschunks(a::PermutedDiskArray) = haschunks(a.a.parent) +eachchunk(a::PermutedDiskArray{T,N,<:PermutedDimsArray{T,N,perm,iperm}}) where {T,N,perm,iperm} = map(j->CartesianIndices(genperm(toRanges(j),perm)),eachchunk(a.a.parent)) +function DiskArrays.readblock!(a::PermutedDiskArray{T,N,<:PermutedDimsArray{T,N,perm,iperm}},aout,i...) where {T,N,perm,iperm} + inew = genperm(i, iperm) + DiskArrays.readblock!(a.a.parent,PermutedDimsArray(aout,iperm),inew...) + nothing +end +function DiskArrays.writeblock!(a::PermutedDiskArray{T,N,<:PermutedDimsArray{T,N,perm,iperm}},v,i...) where {T,N,perm,iperm} + inew = genperm(i, iperm) + DiskArrays.writeblock!(a.a.parent,PermutedDimsArray(v,iperm),inew...) + nothing +end + +macro implement_reshape(t) + quote + Base._reshape(parent::$t, dims::NTuple{N,Int}) where N = + reshape_disk(parent, dims) + end +end + +macro implement_permutedims(t) + quote + Base.permutedims(parent::$t, perm) = + permutedims_disk(parent, perm) + end +end diff --git a/test/runtests.jl b/test/runtests.jl index 614872b..e92a69d 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,4 +1,5 @@ using DiskArrays +using DiskArrays: ReshapedDiskArray, PermutedDiskArray using Test #Define a data structure that can be used for testing @@ -12,6 +13,15 @@ _DiskArray(a;chunksize=size(a)) = _DiskArray(Ref(0),Ref(0),a,chunksize) Base.size(a::_DiskArray) = size(a.parent) DiskArrays.haschunks(::_DiskArray) = DiskArrays.Chunked() DiskArrays.eachchunk(a::_DiskArray) = DiskArrays.GridChunks(a,a.chunksize) +getindex_count(a::_DiskArray) = a.getindex_count[] +setindex_count(a::_DiskArray) = a.setindex_count[] +trueparent(a::_DiskArray) = a.parent +getindex_count(a::ReshapedDiskArray) = getindex_count(a.parent) +setindex_count(a::ReshapedDiskArray) = setindex_count(a.parent) +trueparent(a::ReshapedDiskArray) = trueparent(a.parent) +getindex_count(a::PermutedDiskArray) = getindex_count(a.a.parent) +setindex_count(a::PermutedDiskArray) = setindex_count(a.a.parent) +trueparent(a::PermutedDiskArray{T,N,<:PermutedDimsArray{T,N,perm,iperm}}) where {T,N,perm,iperm} = permutedims(trueparent(a.a.parent),perm) function DiskArrays.readblock!(a::_DiskArray,aout,i...) ndims(a) == length(i) || error("Number of indices is not correct") all(r->isa(r,AbstractUnitRange),i) || error("Not all indices are unit ranges") @@ -27,19 +37,7 @@ function DiskArrays.writeblock!(a::_DiskArray,v,i...) view(a.parent,i...) .= v end -@testset "Index interpretation" begin - import DiskArrays: DimsDropper, Reshaper - a = zeros(3,3,1) - @test interpret_indices_disk(a, (:,2,:)) == ((Base.OneTo(3), 2:2, Base.OneTo(1)), DimsDropper{Tuple{Int64}}((2,))) - @test interpret_indices_disk(a, (1,2,:)) == ((1:1, 2:2, Base.OneTo(1)), DimsDropper{Tuple{Int64,Int64}}((1, 2))) - @test interpret_indices_disk(a, (1,2,2,1)) == ((1:1, 2:2, 2:2), DimsDropper{Tuple{Int64,Int64,Int64}}((1, 2, 3))) - @test interpret_indices_disk(a, (1,2,2,1)) == ((1:1, 2:2, 2:2), DimsDropper{Tuple{Int64,Int64,Int64}}((1, 2, 3))) - @test interpret_indices_disk(a, (:,1:2)) == ((Base.OneTo(3), 1:2, 1:1), DimsDropper{Tuple{Int64}}((3,))) - @test interpret_indices_disk(a, (:,)) == ((Base.OneTo(3), Base.OneTo(3), Base.OneTo(1)), DiskArrays.Reshaper{Int64}(9)) -end - -@testset "AbstractDiskArray getindex" begin - a = _DiskArray(reshape(1:20,4,5,1)) +function test_getindex(a) @test a[2,3,1] == 10 @test a[2,3] == 10 @test a[2,3,1,1] == 10 @@ -50,12 +48,10 @@ end m[2,:,1] .= true @test a[m] == [2,6,10,14,18] #Test that readblock was called exactly onces for every getindex - @test a.getindex_count[] == 6 + @test getindex_count(a) == 6 end - -@testset "AbstractDiskArray setindex" begin - a = _DiskArray(zeros(Int,4,5,1)) +function test_setindex(a) a[1,1,1] = 1 a[1,2] = 2 a[1,3,1,1] = 3 @@ -66,68 +62,65 @@ end m[4,:,1] .= true a[m] = [10,11,12,13,14] #Test that readblock was called exactly onces for every getindex - @test a.setindex_count[] == 6 - @test a.parent[1,1:3,1] == [1,2,3] - @test a.parent[2,:,1] == [1,2,3,4,5] - @test a.parent[3,3:4,1] == [3,4] - @test a.parent[4,:,1] == [10,11,12,13,14] + @test setindex_count(a) == 6 + @test trueparent(a)[1,1:3,1] == [1,2,3] + @test trueparent(a)[2,:,1] == [1,2,3,4,5] + @test trueparent(a)[3,3:4,1] == [3,4] + @test trueparent(a)[4,:,1] == [10,11,12,13,14] end -@testset "Views" begin - a = _DiskArray(zeros(Int,4,5,1)) -v = view(a,2:3,2:4,1) +function test_view(a) + v = view(a,2:3,2:4,1) -v[1:2,1] = [1,2] -v[1:2,2:3] = [4 4; 4 4] -@test v[1:2,1] == [1,2] -@test v[1:2,2:3] == [4 4; 4 4] -@test a.parent[2:3,2] == [1,2] -@test a.parent[2:3,3:4] == [4 4; 4 4] -@test a.getindex_count[] == 2 -@test a.setindex_count[] == 2 + v[1:2,1] = [1,2] + v[1:2,2:3] = [4 4; 4 4] + @test v[1:2,1] == [1,2] + @test v[1:2,2:3] == [4 4; 4 4] + @test trueparent(a)[2:3,2] == [1,2] + @test trueparent(a)[2:3,3:4] == [4 4; 4 4] + @test getindex_count(a) == 2 + @test setindex_count(a) == 2 end -import Statistics: mean -@testset "Reductions" begin +function test_reductions(af) data = rand(10,20,2) for f in (minimum,maximum,sum, (i,args...;kwargs...)->all(j->j>0.1,i,args...;kwargs...), (i,args...;kwargs...)->any(j->j<0.1,i,args...;kwargs...), (i,args...;kwargs...)->mapreduce(x->2*x,+,i,args...;kwargs...)) - a = _DiskArray(data,chunksize=(5,4,2)) + a = af(data) @test isapprox(f(a),f(data)) - @test a.getindex_count[] <= 10 + @test getindex_count(a) <= 10 #And test reduction along dimensions a = _DiskArray(data,chunksize=(5,4,2)) @test all(isapprox.(f(a,dims=2),f(data,dims=2))) #The minimum and maximum functions do some initialization, which will increase #the number of reads - @test f in (minimum, maximum) || a.getindex_count[] <= 12 + @test f in (minimum, maximum) || getindex_count(a) <= 12 a = _DiskArray(data,chunksize=(5,4,2)) @test all(isapprox.(f(a,dims=(1,3)),f(data,dims=(1,3)))) - @test f in (minimum, maximum) || a.getindex_count[] <= 12 + @test f in (minimum, maximum) || getindex_count(a) <= 12 end end -@testset "Broadcast" begin - a_disk1 = _DiskArray(rand(10,9,2), chunksize=(5,3,2)) +function test_broadcast(a_disk1) a_disk2 = _DiskArray(rand(1:10,1,9), chunksize=(1,3)) a_mem = reshape(1:2,1,1,2); s = a_disk1 .+ a_disk2 #Test lazy broadcasting @test s isa DiskArrays.BroadcastDiskArray - @test a_disk1.getindex_count[]==0 - @test a_disk1.setindex_count[]==0 - @test a_disk2.getindex_count[]==0 - @test a_disk2.setindex_count[]==0 + @test getindex_count(a_disk1)==0 + @test setindex_count(a_disk1)==0 + @test getindex_count(a_disk2)==0 + @test setindex_count(a_disk2)==0 @test size(s)==(10,9,2) @test eltype(s) == Float64 #Lets merge another broadcast s2 = s ./ a_mem @test s isa DiskArrays.BroadcastDiskArray - @test a_disk1.getindex_count[]==0 - @test a_disk2.getindex_count[]==0 + @test getindex_count(a_disk1)==0 + @test getindex_count(a_disk2)==0 @test size(s)==(10,9,2) @test eltype(s) == Float64 #And now do the computation with Array as a sink @@ -135,19 +128,81 @@ end aout .= s2 #Test if the result is correct @test aout == (a_disk1.parent .+ a_disk2.parent)./a_mem - @test a_disk1.getindex_count[]==6 - @test a_disk2.getindex_count[]==6 + @test getindex_count(a_disk1)==6 + @test getindex_count(a_disk2)==6 #Now use another DiskArray as the output aout = _DiskArray(zeros(10,9,2),chunksize=(5,3,2)) aout .= s ./ a_mem @test aout.parent == (a_disk1.parent .+ a_disk2.parent)./a_mem - @test aout.setindex_count[]==6 - @test a_disk1.getindex_count[]==12 - @test a_disk2.getindex_count[]==12 + @test setindex_count(aout)==6 + @test getindex_count(a_disk1)==12 + @test getindex_count(a_disk2)==12 #Test reduction of broadcasted expression r = sum(s2, dims=(1,2)) @test all(isapprox.(sum((a_disk1.parent .+ a_disk2.parent)./a_mem,dims=(1,2)),r)) - @test a_disk1.getindex_count[]==18 - @test a_disk2.getindex_count[]==18 + @test getindex_count(a_disk1)==18 + @test getindex_count(a_disk2)==18 +end + +@testset "Index interpretation" begin + import DiskArrays: DimsDropper, Reshaper + a = zeros(3,3,1) + @test interpret_indices_disk(a, (:,2,:)) == ((Base.OneTo(3), 2:2, Base.OneTo(1)), DimsDropper{Tuple{Int64}}((2,))) + @test interpret_indices_disk(a, (1,2,:)) == ((1:1, 2:2, Base.OneTo(1)), DimsDropper{Tuple{Int64,Int64}}((1, 2))) + @test interpret_indices_disk(a, (1,2,2,1)) == ((1:1, 2:2, 2:2), DimsDropper{Tuple{Int64,Int64,Int64}}((1, 2, 3))) + @test interpret_indices_disk(a, (1,2,2,1)) == ((1:1, 2:2, 2:2), DimsDropper{Tuple{Int64,Int64,Int64}}((1, 2, 3))) + @test interpret_indices_disk(a, (:,1:2)) == ((Base.OneTo(3), 1:2, 1:1), DimsDropper{Tuple{Int64}}((3,))) + @test interpret_indices_disk(a, (:,)) == ((Base.OneTo(3), Base.OneTo(3), Base.OneTo(1)), DiskArrays.Reshaper{Int64}(9)) +end + +@testset "AbstractDiskArray getindex" begin + a = _DiskArray(reshape(1:20,4,5,1)) + test_getindex(a) +end + + +@testset "AbstractDiskArray setindex" begin + a = _DiskArray(zeros(Int,4,5,1)) + test_setindex(a) +end + +@testset "Views" begin + a = _DiskArray(zeros(Int,4,5,1)) + test_view(a) +end + +import Statistics: mean +@testset "Reductions" begin + a = data -> _DiskArray(data,chunksize=(5,4,2)) + test_reductions(a) +end + +@testset "Broadcast" begin + a_disk1 = _DiskArray(rand(10,9,2), chunksize=(5,3,2)) + test_broadcast(a_disk1) +end + +@testset "Reshape" begin + a = reshape(_DiskArray(reshape(1:20,4,5)),4,5,1) + test_getindex(a) + a = reshape(_DiskArray(zeros(Int,4,5)),4,5,1) + test_setindex(a) + a = reshape(_DiskArray(zeros(Int,4,5)),4,5,1) + test_view(a) + a = data -> reshape(_DiskArray(data,chunksize=(5,4,2)),10,20,2,1) + test_reductions(a) +end +import Base.PermutedDimsArrays.invperm +@testset "Permutedims" begin + p = (3,1,2) + ip = invperm(p) + a = permutedims(_DiskArray(permutedims(reshape(1:20,4,5,1),ip)),p) + test_getindex(a) + a = permutedims(_DiskArray(zeros(Int,5,1,4)),p) + test_setindex(a) + a = permutedims(_DiskArray(zeros(Int,5,1,4)),p) + test_view(a) + a = data -> permutedims(_DiskArray(permutedims(data,ip),chunksize=(4,2,5)),p) + test_reductions(a) end