Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tableindex #184

Merged
merged 3 commits into from
Oct 12, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ jobs:
matrix:
version:
- '1.6'
- '1'
os:
- ubuntu-latest
- macOS-latest
Expand Down
12 changes: 12 additions & 0 deletions docs/src/expl/expl.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
## Indexing and subsetting

As for most array types, YAXArray also provides special indexing behavior when using the square brackets for indexing.
Assuming that `c` is a YAXArray, there are 3 different semantics to use the square brackets with, depending on the types of the arguments
provided to getindex.

1. **Ranges and Integers only** as for example `c[1,4:8,:]` will access the underlying data according to the provided index in index space and read the data *into memory* as a plain Julia Array.
It is equivalent to `c.data[1,4:8,:]`.
2. **Keyword arguments with values or Intervals** as for example `c[longitude = 30..50, time=Date(2005,6,1), variable="air_temperature"]`.
This always creates a *view* into the specified subset of the data and return a new YAXArray with new axes without reading the data. Intervals and
values are always interpreted in the units as provided by the axis values.
3. **A Tables.jl-compatible object** for irregular extraction of a list of points or sub-arrays and random locations. For example calling `c[[(lon=30,lat=42),(lon=-50,lat=2.5)]]` will extract data at the specified coordinates and along all additional axes into memory. It returns a new YAXArray with a new Multi-Index axis along the selected longitudes and latitudes.
87 changes: 86 additions & 1 deletion src/Cubes/Cubes.jl
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ using YAXArrayBase: YAXArrayBase, iscompressed, dimnames, iscontdimval
import YAXArrayBase: getattributes, iscontdim, dimnames, dimvals, getdata
using DiskArrayTools: CFDiskArray
using DocStringExtensions
using Tables: istable, schema, columns

export concatenatecubes, caxes, subsetcube, readcubedata, renameaxis!, YAXArray, setchunks

Expand Down Expand Up @@ -213,7 +214,91 @@ setchunks(c::YAXArray,chunks) = YAXArray(c.axes,c.data,c.properties,interpret_cu
cubechunks(c) = approx_chunksize(eachchunk(c))
DiskArrays.eachchunk(c::YAXArray) = c.chunks
getindex_all(a) = getindex(a, ntuple(_ -> Colon(), ndims(a))...)
Base.getindex(x::YAXArray, i...) = getdata(x)[i...]
function Base.getindex(x::YAXArray, i...)
if length(i)==1 && istable(first(i))
batchextract(x,first(i))
else
getdata(x)[i...]
end
end
function batchextract(x,i)
sch = schema(i)
axinds = map(sch.names) do n
findAxis(n,x)
end

tcols = columns(i)
#Try to find a column denoting new axis name and values
newaxcol = nothing
if any(isnothing,axinds)
allnothings = findall(isnothing,axinds)
if length(allnothings) == 1
newaxcol = allnothings[1]
end
axinds = filter(!isnothing,axinds)
end
allax = 1:ndims(x)
axrem = setdiff(allax,axinds)
ai1, ai2 = extrema(axinds)
if !all(diff(sort(collect(axinds))).==1)
#Axes to be extracted from are not consecutive in cube -> permute
p = [1:(ai1-1);collect(axinds);filter(!in(axinds),ai1:ai2);(ai2+1:ndims(x))]
x_perm = permutedims(x,p)
return batchextract(x_perm,i)
end
cartinds = map(axinds,tcols) do iax,col
axcur = caxes(x)[iax]
map(col) do val
axVal2Index(axcur,val)
end
end

before = ntuple(_->Colon(),ai1-1)
after = ntuple(_->Colon(),ndims(x)-ai2)
sp = issorted(axinds) ? nothing : sortperm(collect(axinds))
function makeindex(sp, inds...)
if sp === nothing
CartesianIndex(inds...)
else
CartesianIndex(inds[sp]...)
end
end
indlist = makeindex.(Ref(sp),cartinds...)
d = getdata(x)[before...,indlist,after...]
cax = caxes(x)
newax = if newaxcol == nothing
outaxis_from_data(cax,axinds,indlist)
else
outaxis_from_column(i,newaxcol)
end
outax = CubeAxis[axcopy(a) for a in cax][axrem]
insert!(outax,minimum(axinds),newax)
YAXArray(outax,d,x.properties)
end

function outaxis_from_column(tab,icol)
axdata = columns(tab)[icol]
axname = schema(tab).names[icol]
if eltype(axdata) <: AbstractString ||
(!issorted(axdata) && !issorted(axdata, rev = true))
CategoricalAxis(axname, axdata)
else
RangeAxis(axname, axdata)
end
end

function outaxis_from_data(cax,axinds,indlist)
mergeaxes = getindex.(Ref(cax),axinds)
mergenames = axname.(mergeaxes)
newname = join(mergenames,'_')
minai = minimum(axinds)
mergevals = map(indlist) do i
broadcast(mergeaxes,axinds) do ax,ai
ax.values[i[ai-minai+1]]
end
end
CategoricalAxis(newname, mergevals)
end
chunkoffset(c) = grid_offset(eachchunk(c))

# Implementation for YAXArrayBase interface
Expand Down
53 changes: 53 additions & 0 deletions test/Cubes/batchextraction.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
using Test

@testset "Batch extraction along multiple axes" begin
lons = range(30,35,step=0.25)
lats = range(50,55,step=0.25)
times = Date(2000,1,1):Month(1):Date(2000,12,31)

data = rand(length(lons),length(lats), length(times));

c = YAXArray([RangeAxis("longitude",lons),RangeAxis("latitude",lats),RangeAxis("time",times)],data)
c_perm = permutedims(c,(3,2,1))


sites_names = [(lon = rand()*5+30, lat = rand()*5+50,site = string(i)) for i in 1:200]
sites_pure = [(lon = n.lon, lat=n.lat) for n in sites_names]
lon,lat = sites_pure[10]

r = c[sites_names]
@test r isa YAXArray
@test YAXArrays.Cubes.axname.(caxes(r)) == ["site","time"]
@test r.site.values == string.(1:200)
@test all(isequal.(c[lon=lon,lat=lat][:], r[10,:]))

r = c_perm[sites_names]
@test r isa YAXArray
@test YAXArrays.Cubes.axname.(caxes(r)) == ["time","site"]
@test r.site.values == string.(1:200)
@test all(isequal.(c[lon=lon,lat=lat][:], r[:,10]))

r = c[sites_pure]
@test r isa YAXArray
@test YAXArrays.Cubes.axname.(caxes(r)) == ["longitude_latitude","time"]
map(r.longitude_latitude.values,[(n.lon,n.lat) for n in sites_pure]) do ll, ll_real
abs(ll[1]-ll_real[1]) <= 0.125 && abs(ll[2]-ll_real[2]) <= 0.125
end |> all
@test all(isequal.(c[lon=lon,lat=lat][:], r[10,:]))

r = c_perm[sites_pure]
@test r isa YAXArray
@test YAXArrays.Cubes.axname.(caxes(r)) == ["time","longitude_latitude"]
map(r.longitude_latitude.values,[(n.lon,n.lat) for n in sites_pure]) do ll, ll_real
abs(ll[1]-ll_real[1]) <= 0.125 && abs(ll[2]-ll_real[2]) <= 0.125
end |> all
@test all(isequal.(c[lon=lon,lat=lat][:], r[:,10]))

othersites = [(lon=32.0,time=Date(2000,6,1),point=3),(lon=33.0,time=Date(2000,7,1),point=5)]
r = c[othersites]
@test r isa YAXArray
@test YAXArrays.Cubes.axname.(caxes(r)) == ["point","latitude"]
@test r.point.values == [3,5]
@test c[lon=33.0,time=Date(2000,7,1)][:] == r[point=5][:]

end
1 change: 1 addition & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ include("tools.jl")
include("Cubes/axes.jl")
include("Cubes/cubes.jl")
include("Cubes/transformedcubes.jl")
include("Cubes/batchextraction.jl")

include("Datasets/datasets.jl")

Expand Down