Skip to content

Commit

Permalink
Finished Implementation of Reshape and Permutedims (#8)
Browse files Browse the repository at this point in the history
* Implementation of Reshape and Permutedims

* Add unit tests

* Make definition a macro
  • Loading branch information
meggart authored Feb 17, 2020
1 parent 9883569 commit f84f40a
Show file tree
Hide file tree
Showing 4 changed files with 206 additions and 55 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "DiskArrays"
uuid = "3c3547ce-8d99-4f5e-a174-61eb10b00ae3"
authors = ["Fabian Gans <[email protected]>"]
version = "0.1.0"
version = "0.1.1"

[deps]

Expand Down
3 changes: 3 additions & 0 deletions src/DiskArrays.jl
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,7 @@ include("chunks.jl")
include("ops.jl")
include("iterator.jl")
include("subarrays.jl")
include("permute_reshape.jl")

#The all-in-one macro
macro implement_diskarray(t)
Expand All @@ -200,6 +201,8 @@ quote
@implement_broadcast $t
@implement_iteration $t
@implement_mapreduce $t
@implement_reshape $t
@implement_permutedims $t
end
end

Expand Down
93 changes: 93 additions & 0 deletions src/permute_reshape.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
import Base: _throw_dmrs
import DiskArrays: splittuple
#Reshaping is really not trivial, because the access pattern would completely change for reshaped arrays,
#rectangles would not remain rectangles in the parent array. However, we can support the case where only
#singleton dimensions are added, later we could allow more special cases like joining two dimensions to one
struct ReshapedDiskArray{T,N,P<:AbstractArray,M} <: AbstractDiskArray{T,N}
parent::P
keepdim::NTuple{M,Int}
newsize::NTuple{N,Int}
end
Base.size(r::ReshapedDiskArray) = r.newsize
haschunks(a::ReshapedDiskArray) = haschunks(a.parent)
eachchunk(a::ReshapedDiskArray{<:Any,N}) where N = map(eachchunk(a.parent)) do j
r = toRanges(j)
inow::Int = 0
ntuple(N) do i
if i in a.keepdim
inow+=1
r[inow]
else
1:1
end
end::NTuple{N,UnitRange{Int}}
end
function DiskArrays.readblock!(a::ReshapedDiskArray,aout,i...)
inew = tuple_tuple_getindex(i,a.keepdim)
DiskArrays.readblock!(a.parent,reshape(aout,map(length,inew)),inew...)
nothing
end
tuple_tuple_getindex(t,i) = _ttgi((),t,i...)
_ttgi(o,t,i1,irest...) = _ttgi((o...,t[i1]),t,irest...)
_ttgi(o,t,i1) = (o...,t[i1])
function DiskArrays.writeblock!(a::ReshapedDiskArray,v,i...)
inew = tuple_tuple_getindex(i,a.keepdim)
DiskArrays.writeblock!(a.parent,reshape(v,map(length,inew)),inew...)
nothing
end
function reshape_disk(parent, dims)
n = length(parent)
ndims(parent) > length(dims) && error("For DiskArrays, reshape is restricted to adding singleton dimensions")
prod(dims) == n || _throw_dmrs(n, "size", dims)
ipassed::Int=0
s = size(parent)
keepdim = map(s) do snow
while true
ipassed += 1
d = dims[ipassed]
if d>1
d != snow && error("For DiskArrays, reshape is restricted to adding singleton dimensions")
return ipassed
end
end
end
ReshapedDiskArray{eltype(parent),length(dims),typeof(parent),ndims(parent)}(parent,keepdim,dims)
end

import Base: _throw_dmrs
import DiskArrays: splittuple, toRanges
import Base.PermutedDimsArrays: genperm
struct PermutedDiskArray{T,N,P<:PermutedDimsArray} <: AbstractDiskArray{T,N}
a::P
end
function permutedims_disk(a,perm)
pd = PermutedDimsArray(a,perm)
PermutedDiskArray{eltype(a),ndims(a),typeof(pd)}(pd)
end
Base.size(r::PermutedDiskArray) = size(r.a)
haschunks(a::PermutedDiskArray) = haschunks(a.a.parent)
eachchunk(a::PermutedDiskArray{T,N,<:PermutedDimsArray{T,N,perm,iperm}}) where {T,N,perm,iperm} = map(j->CartesianIndices(genperm(toRanges(j),perm)),eachchunk(a.a.parent))
function DiskArrays.readblock!(a::PermutedDiskArray{T,N,<:PermutedDimsArray{T,N,perm,iperm}},aout,i...) where {T,N,perm,iperm}
inew = genperm(i, iperm)
DiskArrays.readblock!(a.a.parent,PermutedDimsArray(aout,iperm),inew...)
nothing
end
function DiskArrays.writeblock!(a::PermutedDiskArray{T,N,<:PermutedDimsArray{T,N,perm,iperm}},v,i...) where {T,N,perm,iperm}
inew = genperm(i, iperm)
DiskArrays.writeblock!(a.a.parent,PermutedDimsArray(v,iperm),inew...)
nothing
end

macro implement_reshape(t)
quote
Base._reshape(parent::$t, dims::NTuple{N,Int}) where N =
reshape_disk(parent, dims)
end
end

macro implement_permutedims(t)
quote
Base.permutedims(parent::$t, perm) =
permutedims_disk(parent, perm)
end
end
163 changes: 109 additions & 54 deletions test/runtests.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using DiskArrays
using DiskArrays: ReshapedDiskArray, PermutedDiskArray
using Test

#Define a data structure that can be used for testing
Expand All @@ -12,6 +13,15 @@ _DiskArray(a;chunksize=size(a)) = _DiskArray(Ref(0),Ref(0),a,chunksize)
Base.size(a::_DiskArray) = size(a.parent)
DiskArrays.haschunks(::_DiskArray) = DiskArrays.Chunked()
DiskArrays.eachchunk(a::_DiskArray) = DiskArrays.GridChunks(a,a.chunksize)
getindex_count(a::_DiskArray) = a.getindex_count[]
setindex_count(a::_DiskArray) = a.setindex_count[]
trueparent(a::_DiskArray) = a.parent
getindex_count(a::ReshapedDiskArray) = getindex_count(a.parent)
setindex_count(a::ReshapedDiskArray) = setindex_count(a.parent)
trueparent(a::ReshapedDiskArray) = trueparent(a.parent)
getindex_count(a::PermutedDiskArray) = getindex_count(a.a.parent)
setindex_count(a::PermutedDiskArray) = setindex_count(a.a.parent)
trueparent(a::PermutedDiskArray{T,N,<:PermutedDimsArray{T,N,perm,iperm}}) where {T,N,perm,iperm} = permutedims(trueparent(a.a.parent),perm)
function DiskArrays.readblock!(a::_DiskArray,aout,i...)
ndims(a) == length(i) || error("Number of indices is not correct")
all(r->isa(r,AbstractUnitRange),i) || error("Not all indices are unit ranges")
Expand All @@ -27,19 +37,7 @@ function DiskArrays.writeblock!(a::_DiskArray,v,i...)
view(a.parent,i...) .= v
end

@testset "Index interpretation" begin
import DiskArrays: DimsDropper, Reshaper
a = zeros(3,3,1)
@test interpret_indices_disk(a, (:,2,:)) == ((Base.OneTo(3), 2:2, Base.OneTo(1)), DimsDropper{Tuple{Int64}}((2,)))
@test interpret_indices_disk(a, (1,2,:)) == ((1:1, 2:2, Base.OneTo(1)), DimsDropper{Tuple{Int64,Int64}}((1, 2)))
@test interpret_indices_disk(a, (1,2,2,1)) == ((1:1, 2:2, 2:2), DimsDropper{Tuple{Int64,Int64,Int64}}((1, 2, 3)))
@test interpret_indices_disk(a, (1,2,2,1)) == ((1:1, 2:2, 2:2), DimsDropper{Tuple{Int64,Int64,Int64}}((1, 2, 3)))
@test interpret_indices_disk(a, (:,1:2)) == ((Base.OneTo(3), 1:2, 1:1), DimsDropper{Tuple{Int64}}((3,)))
@test interpret_indices_disk(a, (:,)) == ((Base.OneTo(3), Base.OneTo(3), Base.OneTo(1)), DiskArrays.Reshaper{Int64}(9))
end

@testset "AbstractDiskArray getindex" begin
a = _DiskArray(reshape(1:20,4,5,1))
function test_getindex(a)
@test a[2,3,1] == 10
@test a[2,3] == 10
@test a[2,3,1,1] == 10
Expand All @@ -50,12 +48,10 @@ end
m[2,:,1] .= true
@test a[m] == [2,6,10,14,18]
#Test that readblock was called exactly onces for every getindex
@test a.getindex_count[] == 6
@test getindex_count(a) == 6
end


@testset "AbstractDiskArray setindex" begin
a = _DiskArray(zeros(Int,4,5,1))
function test_setindex(a)
a[1,1,1] = 1
a[1,2] = 2
a[1,3,1,1] = 3
Expand All @@ -66,88 +62,147 @@ end
m[4,:,1] .= true
a[m] = [10,11,12,13,14]
#Test that readblock was called exactly onces for every getindex
@test a.setindex_count[] == 6
@test a.parent[1,1:3,1] == [1,2,3]
@test a.parent[2,:,1] == [1,2,3,4,5]
@test a.parent[3,3:4,1] == [3,4]
@test a.parent[4,:,1] == [10,11,12,13,14]
@test setindex_count(a) == 6
@test trueparent(a)[1,1:3,1] == [1,2,3]
@test trueparent(a)[2,:,1] == [1,2,3,4,5]
@test trueparent(a)[3,3:4,1] == [3,4]
@test trueparent(a)[4,:,1] == [10,11,12,13,14]
end

@testset "Views" begin
a = _DiskArray(zeros(Int,4,5,1))
v = view(a,2:3,2:4,1)
function test_view(a)
v = view(a,2:3,2:4,1)

v[1:2,1] = [1,2]
v[1:2,2:3] = [4 4; 4 4]
@test v[1:2,1] == [1,2]
@test v[1:2,2:3] == [4 4; 4 4]
@test a.parent[2:3,2] == [1,2]
@test a.parent[2:3,3:4] == [4 4; 4 4]
@test a.getindex_count[] == 2
@test a.setindex_count[] == 2
v[1:2,1] = [1,2]
v[1:2,2:3] = [4 4; 4 4]
@test v[1:2,1] == [1,2]
@test v[1:2,2:3] == [4 4; 4 4]
@test trueparent(a)[2:3,2] == [1,2]
@test trueparent(a)[2:3,3:4] == [4 4; 4 4]
@test getindex_count(a) == 2
@test setindex_count(a) == 2
end

import Statistics: mean
@testset "Reductions" begin
function test_reductions(af)
data = rand(10,20,2)
for f in (minimum,maximum,sum,
(i,args...;kwargs...)->all(j->j>0.1,i,args...;kwargs...),
(i,args...;kwargs...)->any(j->j<0.1,i,args...;kwargs...),
(i,args...;kwargs...)->mapreduce(x->2*x,+,i,args...;kwargs...))
a = _DiskArray(data,chunksize=(5,4,2))
a = af(data)
@test isapprox(f(a),f(data))
@test a.getindex_count[] <= 10
@test getindex_count(a) <= 10
#And test reduction along dimensions
a = _DiskArray(data,chunksize=(5,4,2))
@test all(isapprox.(f(a,dims=2),f(data,dims=2)))
#The minimum and maximum functions do some initialization, which will increase
#the number of reads
@test f in (minimum, maximum) || a.getindex_count[] <= 12
@test f in (minimum, maximum) || getindex_count(a) <= 12
a = _DiskArray(data,chunksize=(5,4,2))
@test all(isapprox.(f(a,dims=(1,3)),f(data,dims=(1,3))))
@test f in (minimum, maximum) || a.getindex_count[] <= 12
@test f in (minimum, maximum) || getindex_count(a) <= 12
end
end

@testset "Broadcast" begin
a_disk1 = _DiskArray(rand(10,9,2), chunksize=(5,3,2))
function test_broadcast(a_disk1)
a_disk2 = _DiskArray(rand(1:10,1,9), chunksize=(1,3))
a_mem = reshape(1:2,1,1,2);

s = a_disk1 .+ a_disk2
#Test lazy broadcasting
@test s isa DiskArrays.BroadcastDiskArray
@test a_disk1.getindex_count[]==0
@test a_disk1.setindex_count[]==0
@test a_disk2.getindex_count[]==0
@test a_disk2.setindex_count[]==0
@test getindex_count(a_disk1)==0
@test setindex_count(a_disk1)==0
@test getindex_count(a_disk2)==0
@test setindex_count(a_disk2)==0
@test size(s)==(10,9,2)
@test eltype(s) == Float64
#Lets merge another broadcast
s2 = s ./ a_mem
@test s isa DiskArrays.BroadcastDiskArray
@test a_disk1.getindex_count[]==0
@test a_disk2.getindex_count[]==0
@test getindex_count(a_disk1)==0
@test getindex_count(a_disk2)==0
@test size(s)==(10,9,2)
@test eltype(s) == Float64
#And now do the computation with Array as a sink
aout = zeros(10,9,2)
aout .= s2
#Test if the result is correct
@test aout == (a_disk1.parent .+ a_disk2.parent)./a_mem
@test a_disk1.getindex_count[]==6
@test a_disk2.getindex_count[]==6
@test getindex_count(a_disk1)==6
@test getindex_count(a_disk2)==6
#Now use another DiskArray as the output
aout = _DiskArray(zeros(10,9,2),chunksize=(5,3,2))
aout .= s ./ a_mem
@test aout.parent == (a_disk1.parent .+ a_disk2.parent)./a_mem
@test aout.setindex_count[]==6
@test a_disk1.getindex_count[]==12
@test a_disk2.getindex_count[]==12
@test setindex_count(aout)==6
@test getindex_count(a_disk1)==12
@test getindex_count(a_disk2)==12
#Test reduction of broadcasted expression
r = sum(s2, dims=(1,2))
@test all(isapprox.(sum((a_disk1.parent .+ a_disk2.parent)./a_mem,dims=(1,2)),r))
@test a_disk1.getindex_count[]==18
@test a_disk2.getindex_count[]==18
@test getindex_count(a_disk1)==18
@test getindex_count(a_disk2)==18
end

@testset "Index interpretation" begin
import DiskArrays: DimsDropper, Reshaper
a = zeros(3,3,1)
@test interpret_indices_disk(a, (:,2,:)) == ((Base.OneTo(3), 2:2, Base.OneTo(1)), DimsDropper{Tuple{Int64}}((2,)))
@test interpret_indices_disk(a, (1,2,:)) == ((1:1, 2:2, Base.OneTo(1)), DimsDropper{Tuple{Int64,Int64}}((1, 2)))
@test interpret_indices_disk(a, (1,2,2,1)) == ((1:1, 2:2, 2:2), DimsDropper{Tuple{Int64,Int64,Int64}}((1, 2, 3)))
@test interpret_indices_disk(a, (1,2,2,1)) == ((1:1, 2:2, 2:2), DimsDropper{Tuple{Int64,Int64,Int64}}((1, 2, 3)))
@test interpret_indices_disk(a, (:,1:2)) == ((Base.OneTo(3), 1:2, 1:1), DimsDropper{Tuple{Int64}}((3,)))
@test interpret_indices_disk(a, (:,)) == ((Base.OneTo(3), Base.OneTo(3), Base.OneTo(1)), DiskArrays.Reshaper{Int64}(9))
end

@testset "AbstractDiskArray getindex" begin
a = _DiskArray(reshape(1:20,4,5,1))
test_getindex(a)
end


@testset "AbstractDiskArray setindex" begin
a = _DiskArray(zeros(Int,4,5,1))
test_setindex(a)
end

@testset "Views" begin
a = _DiskArray(zeros(Int,4,5,1))
test_view(a)
end

import Statistics: mean
@testset "Reductions" begin
a = data -> _DiskArray(data,chunksize=(5,4,2))
test_reductions(a)
end

@testset "Broadcast" begin
a_disk1 = _DiskArray(rand(10,9,2), chunksize=(5,3,2))
test_broadcast(a_disk1)
end

@testset "Reshape" begin
a = reshape(_DiskArray(reshape(1:20,4,5)),4,5,1)
test_getindex(a)
a = reshape(_DiskArray(zeros(Int,4,5)),4,5,1)
test_setindex(a)
a = reshape(_DiskArray(zeros(Int,4,5)),4,5,1)
test_view(a)
a = data -> reshape(_DiskArray(data,chunksize=(5,4,2)),10,20,2,1)
test_reductions(a)
end

import Base.PermutedDimsArrays.invperm
@testset "Permutedims" begin
p = (3,1,2)
ip = invperm(p)
a = permutedims(_DiskArray(permutedims(reshape(1:20,4,5,1),ip)),p)
test_getindex(a)
a = permutedims(_DiskArray(zeros(Int,5,1,4)),p)
test_setindex(a)
a = permutedims(_DiskArray(zeros(Int,5,1,4)),p)
test_view(a)
a = data -> permutedims(_DiskArray(permutedims(data,ip),chunksize=(4,2,5)),p)
test_reductions(a)
end

2 comments on commit f84f40a

@meggart
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/9703

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if Julia TagBot is installed, or can be done manually through the github interface, or via:

git tag -a v0.1.1 -m "<description of version>" f84f40afe9b0d381732103b692133d641d30b7e8
git push origin v0.1.1

Please sign in to comment.