Skip to content

Commit

Permalink
Add shapely geometry provider (#3)
Browse files Browse the repository at this point in the history
* Add shapely geometry provider

* Tidyup

* Usage docs
  • Loading branch information
DPeterK authored Jul 22, 2022
1 parent f974ec3 commit 42c9985
Show file tree
Hide file tree
Showing 4 changed files with 124 additions and 9 deletions.
73 changes: 73 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,79 @@ conda activate shapecutter

And then install shapecutter, as above.

## Usage

Shapecutter takes a gridded data source and a geometry source and returns a new dataset that is the result of cutting data out of the data source that intersects with a specified geometry in the geometry source. Two levels of cutting are provided:
1. `"bbox"` (coarse cutting): the data source is subset so that only data points that intersect (are contained by) the bounding box of the selected geometry are returned
1. `"boundary"` (fine cutting): data points that directly intersect with the selected geometry (and within the geometry's bounding box) are returned. To maintain a gridded dataset, this is implemented by masking points returned in (1.) that do not intersect the geometry.

### Cartopy and GeoPandas examples

With cartopy or GeoPandas providing a

An example of cutting a gridded data source to a geometry provided by `cartopy`:

```python
import cartopy.io.shapereader as shpreader
import iris
import shapecutter

data_source = iris.load_cube("my_dataset.nc")
geom_source = shpreader.Reader("my_shapefile.shp")
geom_ref = "devon" # Select a geometry from the name of a record in `geom_source`.

cutter = shapecutter.Cutter(data_source, geom_source)
# 1. coarse cutting:
bbox_cut_result = cutter.cut_dataset(geom_ref)
# 2. fine cutting:
fine_cut_result = cutter.cut_dataset(geom_ref, to="boundary")
```

An example of cutting a gridded data source to a geometry provided by `GeoPandas`:

```python
import geopandas as gpd
import iris
import shapecutter

data_source = iris.load_cube("my_dataset.nc")
geom_source = gpd.read_file("my_shapefile.shp")
geom_ref = "devon" # Select a geometry from the name of a record in `geom_source`.

cutter = shapecutter.Cutter(data_source, geom_source)
# 1. coarse cutting:
bbox_cut_result = cutter.cut_dataset(geom_ref)
# 2. fine cutting:
fine_cut_result = cutter.cut_dataset(geom_ref, to="boundary")
```

Note that you can also pass the shapefile source directly to the `Cutter` class and let `shapecutter` load your geometry source for you:

```python
cutter = shapecutter.Cutter(data_source, "my_shapefile.shp")
```

### Shapely geometry source

If you have just a Shapely geometry instance (for example, one that you have manually constructed from a list of points), you can utilise the geometry directly. Note that in this case there is no records metadata available (it's just the single geometry) so we just supply an empty string as the geometry reference when cutting the data source to the Shapely geometry:

```python
import iris
import shapecutter
from shapely.geometry import Polygon

data_source = iris.load_cube("my_dataset.nc")
data_points = [...]
my_polygon = Polygon(data_points)

cutter = shapecutter.Cutter(data_source, my_polygon)
# 1. coarse cutting:
bbox_cut_result = cutter.cut_dataset("")
# 2. fine cutting:
fine_cut_result = cutter.cut_dataset("", to="boundary")
```


## Testing

See the [testing README](https://github.com/informatics-lab/shapecutter/blob/main/shapecutter/tests/integration/README.md) for more details.
4 changes: 2 additions & 2 deletions shapecutter/cutter.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ def geometry_boundary_mask(self, dataset, geometry_ref):
x_lo, x_hi = x_coord[xi].bounds[0]
y_lo, y_hi = y_coord[yi].bounds[0]
cell = box(x_lo, y_lo, x_hi, y_hi)
mask_point = geometry.intersects(cell)
mask_point = cell.intersects(geometry)
flat_mask.append(mask_point)

return np.array(flat_mask).reshape(dataset.shape[-2:])
Expand Down Expand Up @@ -148,7 +148,7 @@ def cut_dataset(self, geometry_ref, to="bbox"):
# XXX cache the boundary cutting?
result = self._cut_to_boundary(subset, geometry_ref)
else:
emsg = f"`to` must be one of ['bbox', 'boundary'], got {to}."
emsg = f"`to` must be one of ['bbox', 'boundary'], got {to!r}."
raise ValueError(emsg)

return result
18 changes: 14 additions & 4 deletions shapecutter/providers/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,6 @@ def __init__(self, data_source):
def __repr__(self):
raise NotImplemented

def __getattr__(self, attr):
return getattr(self.data_source, attr)

def get_xmax(self):
"""Get the maximum value of the x-axis coordinate."""
raise NotImplemented
Expand All @@ -25,6 +22,12 @@ def get_axis_name(self, axis):
"""Get the name of the coordinate describing a particular axis."""
raise NotImplemented

def coord(self, name):
raise NotImplemented

def coords(self, kwargs):
raise NotImplemented

def extract(self, keys):
"""Extract a other from the dataset based on one or more metadata queries."""
raise NotImplemented
Expand Down Expand Up @@ -56,7 +59,14 @@ def _confirm_bounds(self):
with warnings.catch_warnings():
warnings.simplefilter("ignore")
for coord in [x_coord, y_coord]:
coord.guess_bounds()
if not coord.has_bounds():
coord.guess_bounds()

def coord(self, name):
return self.data_source.coord(name)

def coords(self, **kwargs):
return self.data_source.coords(**kwargs)

def extract(self, keys):
cstr = iris.Constraint(coord_values=keys)
Expand Down
38 changes: 35 additions & 3 deletions shapecutter/providers/geometry.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import cartopy.io.shapereader as shpreader
import geopandas as gpd
from shapely.geometry import LineString, Polygon


class _GeometryProvider(object):
Expand Down Expand Up @@ -30,9 +31,6 @@ def __repr__(self):
def __getitem__(self, keys):
raise NotImplemented

def __getattr__(self, attr):
return getattr(self.geometry_source, attr)

def load(self, source):
raise NotImplemented

Expand Down Expand Up @@ -119,11 +117,45 @@ def get_named_bound(self, geometry_ref, bound_ref):
return bounds.loc[bounds.index[0]][bound_ref]


class ShapelyGeometryProvider(_GeometryProvider):
"""Shapely geometry provider."""
def __init__(self, geometry_source):
super().__init__(geometry_source)
self._names = ["minx", "miny", "maxx", "maxy"] # Follows geopandas bounds names.
self._named_bounds = {n: self._names.index(n) for n in self._names}

def __repr__(self):
t = self.__class__.__name__
l = len(self.geometry_source)
repr_str = f"{t} containing {l} geometries as a Shapely Geometry."
return repr_str

def __getitem__(self, _):
return self.geometry_source

def load(self, _):
self.geometry_source = self._geometry_source

def get_bounds_points(self, geometry_ref):
return self.get_bounds(geometry_ref)

def get_named_bound(self, geometry_ref, bound_ref):
bounds = self.get_bounds(geometry_ref)
try:
bound_idx = self._named_bounds[bound_ref]
except KeyError:
emsg = f"Expected `bound_ref` to be one of {self._names}, got {bound_ref!r}."
raise ValueError(emsg)
return bounds[bound_idx]


def select_best_geometry_provider(geometry_source):
if isinstance(geometry_source, shpreader.FionaReader):
provider = CartopyGeometryProvider(geometry_source)
elif isinstance(geometry_source, (gpd.geodataframe.GeoDataFrame, str)):
provider = GeoPandasGeometryProvider(geometry_source)
elif isinstance(geometry_source, (LineString, Polygon)):
provider = ShapelyGeometryProvider(geometry_source)
else:
raise TypeError("No suitable geometry provider found.")
return provider

0 comments on commit 42c9985

Please sign in to comment.