Finished Implementation of Reshape and Permutedims (#8)

* Implementation of Reshape and Permutedims * Add unit tests * Make definition a macro
JuliaIO · Feb 17, 2020 · f84f40a · f84f40a · meggart · Feb 18, 2020
1 parent 9883569
commit f84f40a
Show file tree

Hide file tree

Showing 4 changed files with 206 additions and 55 deletions.
diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "DiskArrays"
 uuid = "3c3547ce-8d99-4f5e-a174-61eb10b00ae3"
 authors = ["Fabian Gans <[email protected]>"]
-version = "0.1.0"
+version = "0.1.1"
 
 [deps]
 

diff --git a/src/DiskArrays.jl b/src/DiskArrays.jl
@@ -191,6 +191,7 @@ include("chunks.jl")
 include("ops.jl")
 include("iterator.jl")
 include("subarrays.jl")
+include("permute_reshape.jl")
 
 #The all-in-one macro
 macro implement_diskarray(t)
@@ -200,6 +201,8 @@ quote
   @implement_broadcast $t
   @implement_iteration $t
   @implement_mapreduce $t
+  @implement_reshape $t
+  @implement_permutedims $t
 end
 end
 

diff --git a/src/permute_reshape.jl b/src/permute_reshape.jl
@@ -0,0 +1,93 @@
+import Base: _throw_dmrs
+import DiskArrays: splittuple
+#Reshaping is really not trivial, because the access pattern would completely change for reshaped arrays,
+#rectangles would not remain rectangles in the parent array. However, we can support the case where only
+#singleton dimensions are added, later we could allow more special cases like joining two dimensions to one
+struct ReshapedDiskArray{T,N,P<:AbstractArray,M} <: AbstractDiskArray{T,N}
+    parent::P
+    keepdim::NTuple{M,Int}
+    newsize::NTuple{N,Int}
+end
+Base.size(r::ReshapedDiskArray) = r.newsize
+haschunks(a::ReshapedDiskArray) = haschunks(a.parent)
+eachchunk(a::ReshapedDiskArray{<:Any,N}) where N = map(eachchunk(a.parent)) do j
+    r = toRanges(j)
+    inow::Int = 0
+    ntuple(N) do i
+        if i in a.keepdim
+            inow+=1
+            r[inow]
+        else
+            1:1
+        end
+    end::NTuple{N,UnitRange{Int}}
+end
+function DiskArrays.readblock!(a::ReshapedDiskArray,aout,i...)
+  inew = tuple_tuple_getindex(i,a.keepdim)
+  DiskArrays.readblock!(a.parent,reshape(aout,map(length,inew)),inew...)
+  nothing
+end
+tuple_tuple_getindex(t,i) = _ttgi((),t,i...)
+_ttgi(o,t,i1,irest...) = _ttgi((o...,t[i1]),t,irest...)
+_ttgi(o,t,i1) = (o...,t[i1])
+function DiskArrays.writeblock!(a::ReshapedDiskArray,v,i...)
+  inew = tuple_tuple_getindex(i,a.keepdim)
+  DiskArrays.writeblock!(a.parent,reshape(v,map(length,inew)),inew...)
+  nothing
+end
+function reshape_disk(parent, dims)
+    n = length(parent)
+    ndims(parent) > length(dims) && error("For DiskArrays, reshape is restricted to adding singleton dimensions")
+    prod(dims) == n || _throw_dmrs(n, "size", dims)
+    ipassed::Int=0
+    s = size(parent)
+    keepdim = map(s) do snow
+        while true
+            ipassed += 1
+            d = dims[ipassed]
+            if d>1
+                d != snow && error("For DiskArrays, reshape is restricted to adding singleton dimensions")
+                return ipassed
+            end
+        end
+    end
+    ReshapedDiskArray{eltype(parent),length(dims),typeof(parent),ndims(parent)}(parent,keepdim,dims)
+end
+
+import Base: _throw_dmrs
+import DiskArrays: splittuple, toRanges
+import Base.PermutedDimsArrays: genperm
+struct PermutedDiskArray{T,N,P<:PermutedDimsArray} <: AbstractDiskArray{T,N}
+    a::P
+end
+function permutedims_disk(a,perm)
+  pd = PermutedDimsArray(a,perm)
+  PermutedDiskArray{eltype(a),ndims(a),typeof(pd)}(pd)
+end
+Base.size(r::PermutedDiskArray) = size(r.a)
+haschunks(a::PermutedDiskArray) = haschunks(a.a.parent)
+eachchunk(a::PermutedDiskArray{T,N,<:PermutedDimsArray{T,N,perm,iperm}}) where {T,N,perm,iperm} = map(j->CartesianIndices(genperm(toRanges(j),perm)),eachchunk(a.a.parent))
+function DiskArrays.readblock!(a::PermutedDiskArray{T,N,<:PermutedDimsArray{T,N,perm,iperm}},aout,i...) where {T,N,perm,iperm}
+  inew = genperm(i, iperm)
+  DiskArrays.readblock!(a.a.parent,PermutedDimsArray(aout,iperm),inew...)
+  nothing
+end
+function DiskArrays.writeblock!(a::PermutedDiskArray{T,N,<:PermutedDimsArray{T,N,perm,iperm}},v,i...) where {T,N,perm,iperm}
+  inew = genperm(i, iperm)
+  DiskArrays.writeblock!(a.a.parent,PermutedDimsArray(v,iperm),inew...)
+  nothing
+end
+
+macro implement_reshape(t)
+  quote
+    Base._reshape(parent::$t, dims::NTuple{N,Int}) where N =
+      reshape_disk(parent, dims)
+  end
+end
+
+macro implement_permutedims(t)
+  quote
+    Base.permutedims(parent::$t, perm) =
+      permutedims_disk(parent, perm)
+  end
+end
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -1,4 +1,5 @@
 using DiskArrays
+using DiskArrays: ReshapedDiskArray, PermutedDiskArray
 using Test
 
 #Define a data structure that can be used for testing
@@ -12,6 +13,15 @@ _DiskArray(a;chunksize=size(a)) = _DiskArray(Ref(0),Ref(0),a,chunksize)
 Base.size(a::_DiskArray) = size(a.parent)
 DiskArrays.haschunks(::_DiskArray) = DiskArrays.Chunked()
 DiskArrays.eachchunk(a::_DiskArray) = DiskArrays.GridChunks(a,a.chunksize)
+getindex_count(a::_DiskArray) = a.getindex_count[]
+setindex_count(a::_DiskArray) = a.setindex_count[]
+trueparent(a::_DiskArray) = a.parent
+getindex_count(a::ReshapedDiskArray) = getindex_count(a.parent)
+setindex_count(a::ReshapedDiskArray) = setindex_count(a.parent)
+trueparent(a::ReshapedDiskArray) = trueparent(a.parent)
+getindex_count(a::PermutedDiskArray) = getindex_count(a.a.parent)
+setindex_count(a::PermutedDiskArray) = setindex_count(a.a.parent)
+trueparent(a::PermutedDiskArray{T,N,<:PermutedDimsArray{T,N,perm,iperm}}) where {T,N,perm,iperm} = permutedims(trueparent(a.a.parent),perm)
 function DiskArrays.readblock!(a::_DiskArray,aout,i...)
   ndims(a) == length(i) || error("Number of indices is not correct")
   all(r->isa(r,AbstractUnitRange),i) || error("Not all indices are unit ranges")
@@ -27,19 +37,7 @@ function DiskArrays.writeblock!(a::_DiskArray,v,i...)
   view(a.parent,i...) .= v
 end
 
-@testset "Index interpretation" begin
-  import DiskArrays: DimsDropper, Reshaper
-  a = zeros(3,3,1)
-  @test interpret_indices_disk(a, (:,2,:)) == ((Base.OneTo(3), 2:2, Base.OneTo(1)), DimsDropper{Tuple{Int64}}((2,)))
-  @test interpret_indices_disk(a, (1,2,:)) == ((1:1, 2:2, Base.OneTo(1)), DimsDropper{Tuple{Int64,Int64}}((1, 2)))
-  @test interpret_indices_disk(a, (1,2,2,1)) == ((1:1, 2:2, 2:2), DimsDropper{Tuple{Int64,Int64,Int64}}((1, 2, 3)))
-  @test interpret_indices_disk(a, (1,2,2,1)) == ((1:1, 2:2, 2:2), DimsDropper{Tuple{Int64,Int64,Int64}}((1, 2, 3)))
-  @test interpret_indices_disk(a, (:,1:2)) == ((Base.OneTo(3), 1:2, 1:1), DimsDropper{Tuple{Int64}}((3,)))
-  @test interpret_indices_disk(a, (:,)) == ((Base.OneTo(3), Base.OneTo(3), Base.OneTo(1)), DiskArrays.Reshaper{Int64}(9))
-end
-
-@testset "AbstractDiskArray getindex" begin
-  a = _DiskArray(reshape(1:20,4,5,1))
+function test_getindex(a)
   @test a[2,3,1] == 10
   @test a[2,3] == 10
   @test a[2,3,1,1] == 10
@@ -50,12 +48,10 @@ end
   m[2,:,1] .= true
   @test a[m] == [2,6,10,14,18]
   #Test that readblock was called exactly onces for every getindex
-  @test a.getindex_count[] == 6
+  @test getindex_count(a) == 6
 end
 
-
-@testset "AbstractDiskArray setindex" begin
-  a = _DiskArray(zeros(Int,4,5,1))
+function test_setindex(a)
   a[1,1,1] = 1
   a[1,2] = 2
   a[1,3,1,1] = 3
@@ -66,88 +62,147 @@ end
   m[4,:,1] .= true
   a[m] = [10,11,12,13,14]
   #Test that readblock was called exactly onces for every getindex
-  @test a.setindex_count[] == 6
-  @test a.parent[1,1:3,1] == [1,2,3]
-  @test a.parent[2,:,1] == [1,2,3,4,5]
-  @test a.parent[3,3:4,1] == [3,4]
-  @test a.parent[4,:,1] == [10,11,12,13,14]
+  @test setindex_count(a) == 6
+  @test trueparent(a)[1,1:3,1] == [1,2,3]
+  @test trueparent(a)[2,:,1] == [1,2,3,4,5]
+  @test trueparent(a)[3,3:4,1] == [3,4]
+  @test trueparent(a)[4,:,1] == [10,11,12,13,14]
 end
 
-@testset "Views" begin
-  a = _DiskArray(zeros(Int,4,5,1))
-v = view(a,2:3,2:4,1)
+function test_view(a)
+  v = view(a,2:3,2:4,1)
 
-v[1:2,1] = [1,2]
-v[1:2,2:3] = [4 4; 4 4]
-@test v[1:2,1] == [1,2]
-@test v[1:2,2:3] == [4 4; 4 4]
-@test a.parent[2:3,2] == [1,2]
-@test a.parent[2:3,3:4] == [4 4; 4 4]
-@test a.getindex_count[] == 2
-@test a.setindex_count[] == 2
+  v[1:2,1] = [1,2]
+  v[1:2,2:3] = [4 4; 4 4]
+  @test v[1:2,1] == [1,2]
+  @test v[1:2,2:3] == [4 4; 4 4]
+  @test trueparent(a)[2:3,2] == [1,2]
+  @test trueparent(a)[2:3,3:4] == [4 4; 4 4]
+  @test getindex_count(a) == 2
+  @test setindex_count(a) == 2
 end
 
-import Statistics: mean
-@testset "Reductions" begin
+function test_reductions(af)
   data = rand(10,20,2)
   for f in (minimum,maximum,sum,
             (i,args...;kwargs...)->all(j->j>0.1,i,args...;kwargs...),
             (i,args...;kwargs...)->any(j->j<0.1,i,args...;kwargs...),
             (i,args...;kwargs...)->mapreduce(x->2*x,+,i,args...;kwargs...))
-    a = _DiskArray(data,chunksize=(5,4,2))
+    a = af(data)
     @test isapprox(f(a),f(data))
-    @test a.getindex_count[] <= 10
+    @test getindex_count(a) <= 10
     #And test reduction along dimensions
     a = _DiskArray(data,chunksize=(5,4,2))
     @test all(isapprox.(f(a,dims=2),f(data,dims=2)))
     #The minimum and maximum functions do some initialization, which will increase
     #the number of reads
-    @test f in (minimum, maximum) || a.getindex_count[] <= 12
+    @test f in (minimum, maximum) || getindex_count(a) <= 12
     a = _DiskArray(data,chunksize=(5,4,2))
     @test all(isapprox.(f(a,dims=(1,3)),f(data,dims=(1,3))))
-    @test f in (minimum, maximum) || a.getindex_count[] <= 12
+    @test f in (minimum, maximum) || getindex_count(a) <= 12
   end
 end
 
-@testset "Broadcast" begin
-  a_disk1 = _DiskArray(rand(10,9,2), chunksize=(5,3,2))
+function test_broadcast(a_disk1)
   a_disk2 = _DiskArray(rand(1:10,1,9), chunksize=(1,3))
   a_mem   = reshape(1:2,1,1,2);
 
   s = a_disk1 .+ a_disk2
   #Test lazy broadcasting
   @test s isa DiskArrays.BroadcastDiskArray
-  @test a_disk1.getindex_count[]==0
-  @test a_disk1.setindex_count[]==0
-  @test a_disk2.getindex_count[]==0
-  @test a_disk2.setindex_count[]==0
+  @test getindex_count(a_disk1)==0
+  @test setindex_count(a_disk1)==0
+  @test getindex_count(a_disk2)==0
+  @test setindex_count(a_disk2)==0
   @test size(s)==(10,9,2)
   @test eltype(s) == Float64
   #Lets merge another broadcast
   s2 = s ./ a_mem
   @test s isa DiskArrays.BroadcastDiskArray
-  @test a_disk1.getindex_count[]==0
-  @test a_disk2.getindex_count[]==0
+  @test getindex_count(a_disk1)==0
+  @test getindex_count(a_disk2)==0
   @test size(s)==(10,9,2)
   @test eltype(s) == Float64
   #And now do the computation with Array as a sink
   aout = zeros(10,9,2)
   aout .= s2
   #Test if the result is correct
   @test aout == (a_disk1.parent .+ a_disk2.parent)./a_mem
-  @test a_disk1.getindex_count[]==6
-  @test a_disk2.getindex_count[]==6
+  @test getindex_count(a_disk1)==6
+  @test getindex_count(a_disk2)==6
   #Now use another DiskArray as the output
   aout = _DiskArray(zeros(10,9,2),chunksize=(5,3,2))
   aout .= s ./ a_mem
   @test aout.parent == (a_disk1.parent .+ a_disk2.parent)./a_mem
-  @test aout.setindex_count[]==6
-  @test a_disk1.getindex_count[]==12
-  @test a_disk2.getindex_count[]==12
+  @test setindex_count(aout)==6
+  @test getindex_count(a_disk1)==12
+  @test getindex_count(a_disk2)==12
   #Test reduction of broadcasted expression
   r = sum(s2, dims=(1,2))
   @test all(isapprox.(sum((a_disk1.parent .+ a_disk2.parent)./a_mem,dims=(1,2)),r))
-  @test a_disk1.getindex_count[]==18
-  @test a_disk2.getindex_count[]==18
+  @test getindex_count(a_disk1)==18
+  @test getindex_count(a_disk2)==18
+end
+
+@testset "Index interpretation" begin
+  import DiskArrays: DimsDropper, Reshaper
+  a = zeros(3,3,1)
+  @test interpret_indices_disk(a, (:,2,:)) == ((Base.OneTo(3), 2:2, Base.OneTo(1)), DimsDropper{Tuple{Int64}}((2,)))
+  @test interpret_indices_disk(a, (1,2,:)) == ((1:1, 2:2, Base.OneTo(1)), DimsDropper{Tuple{Int64,Int64}}((1, 2)))
+  @test interpret_indices_disk(a, (1,2,2,1)) == ((1:1, 2:2, 2:2), DimsDropper{Tuple{Int64,Int64,Int64}}((1, 2, 3)))
+  @test interpret_indices_disk(a, (1,2,2,1)) == ((1:1, 2:2, 2:2), DimsDropper{Tuple{Int64,Int64,Int64}}((1, 2, 3)))
+  @test interpret_indices_disk(a, (:,1:2)) == ((Base.OneTo(3), 1:2, 1:1), DimsDropper{Tuple{Int64}}((3,)))
+  @test interpret_indices_disk(a, (:,)) == ((Base.OneTo(3), Base.OneTo(3), Base.OneTo(1)), DiskArrays.Reshaper{Int64}(9))
+end
+
+@testset "AbstractDiskArray getindex" begin
+  a = _DiskArray(reshape(1:20,4,5,1))
+  test_getindex(a)
+end
+
+
+@testset "AbstractDiskArray setindex" begin
+  a = _DiskArray(zeros(Int,4,5,1))
+  test_setindex(a)
+end
+
+@testset "Views" begin
+  a = _DiskArray(zeros(Int,4,5,1))
+  test_view(a)
+end
+
+import Statistics: mean
+@testset "Reductions" begin
+  a = data -> _DiskArray(data,chunksize=(5,4,2))
+  test_reductions(a)
+end
+
+@testset "Broadcast" begin
+  a_disk1 = _DiskArray(rand(10,9,2), chunksize=(5,3,2))
+  test_broadcast(a_disk1)
+end
+
+@testset "Reshape" begin
+  a = reshape(_DiskArray(reshape(1:20,4,5)),4,5,1)
+  test_getindex(a)
+  a = reshape(_DiskArray(zeros(Int,4,5)),4,5,1)
+  test_setindex(a)
+  a = reshape(_DiskArray(zeros(Int,4,5)),4,5,1)
+  test_view(a)
+  a = data -> reshape(_DiskArray(data,chunksize=(5,4,2)),10,20,2,1)
+  test_reductions(a)
+end
 
+import Base.PermutedDimsArrays.invperm
+@testset "Permutedims" begin
+  p = (3,1,2)
+  ip = invperm(p)
+  a = permutedims(_DiskArray(permutedims(reshape(1:20,4,5,1),ip)),p)
+  test_getindex(a)
+  a = permutedims(_DiskArray(zeros(Int,5,1,4)),p)
+  test_setindex(a)
+  a = permutedims(_DiskArray(zeros(Int,5,1,4)),p)
+  test_view(a)
+  a = data -> permutedims(_DiskArray(permutedims(data,ip),chunksize=(4,2,5)),p)
+  test_reductions(a)
 end