Skip to content

Commit

Permalink
incorporate feature IDs in netCDF output for aggregate_spatial
Browse files Browse the repository at this point in the history
  • Loading branch information
bossie committed Sep 20, 2022
1 parent 2d0b27e commit 037790f
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 17 deletions.
9 changes: 6 additions & 3 deletions openeo_driver/datacube.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from typing import List, Union, Optional, Dict, Any, Tuple, Sequence

import geopandas as gpd
import pyproj
import shapely.geometry
import shapely.geometry.base
import shapely.ops
Expand Down Expand Up @@ -226,10 +227,12 @@ def _as_geopandas_df(self) -> gpd.GeoDataFrame:
def to_geojson(self):
return shapely.geometry.mapping(self._as_geopandas_df())

def to_wkt(self) -> Tuple[List[str], str]:
def to_wkt(self) -> List[str]:
wkts = [str(g) for g in self._geometries.geometry]
crs = self._geometries.crs.to_string() if self._geometries.crs else "EPSG:4326"
return wkts, crs
return wkts

def get_crs(self) -> pyproj.CRS:
return self._geometries.crs or pyproj.CRS.from_epsg(4326)

def write_assets(
self, directory: Union[str, Path], format: str, options: Optional[dict] = None
Expand Down
31 changes: 19 additions & 12 deletions openeo_driver/save_result.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,12 +205,20 @@ class AggregatePolygonResult(JSONResult): # TODO: if it supports NetCDF and CSV

# TODO #71 #114 EP-3981 port this to proper vector cube support

def __init__(self, timeseries: dict, regions: GeometryCollection, metadata:CollectionMetadata=None):
def __init__(self, timeseries: dict, regions: Union[GeometryCollection, DriverVectorCube],
metadata: CollectionMetadata = None):
super().__init__(data=timeseries)
if not isinstance(regions, GeometryCollection):
# TODO: raise exception instead of warning?
warnings.warn("AggregatePolygonResult: GeometryCollection expected but got {t}".format(t=type(regions)))
self._regions = regions

if isinstance(regions, GeometryCollection):
self._regions = regions
self._feature_ids = ['feature_%d' % i for i in range(len(self._regions))]
else:
self._regions = GeometryCollection(list(regions.get_geometries()))
feature_ids = regions._as_geopandas_df().get("id") # TODO: don't access private method

self._feature_ids = (feature_ids.to_list() if feature_ids is not None
else ['feature_%d' % i for i in range(len(self._regions))])

self._metadata = metadata

def get_data(self):
Expand All @@ -219,7 +227,7 @@ def get_data(self):
# By default, keep original (proprietary) result format
return self.data

def write_assets(self, directory:str) -> Dict[str, StacAsset]:
def write_assets(self, directory: str) -> Dict[str, StacAsset]:
"""
Save generated assets into a directory, return asset metadata.
TODO: can an asset also be a full STAC item? In principle, one openEO job can either generate a full STAC collection, or one STAC item with multiple assets...
Expand Down Expand Up @@ -263,7 +271,7 @@ def create_flask_response(self) -> Response:

return super().create_flask_response()

def create_point_timeseries_xarray(self, feature_ids, timestamps,lats,lons,averages_by_feature):
def _create_point_timeseries_xarray(self, timestamps, lats, lons, averages_by_feature):
import xarray as xr

#xarray breaks with timezone aware dates: https://github.com/pydata/xarray/issues/1490
Expand All @@ -289,7 +297,7 @@ def create_point_timeseries_xarray(self, feature_ids, timestamps,lats,lons,avera

the_array.coords['lat'] = (('feature'),lats)
the_array.coords['lon'] = (('feature'), lons)
the_array.coords['feature_names'] = (('feature'), feature_ids)
the_array.coords['feature_names'] = (('feature'), self._feature_ids)

the_array.variables['lat'].attrs['units'] = 'degrees_north'
the_array.variables['lat'].attrs['standard_name'] = 'latitude'
Expand All @@ -300,11 +308,10 @@ def create_point_timeseries_xarray(self, feature_ids, timestamps,lats,lons,avera
the_array.attrs['source'] = 'Aggregated timeseries generated by openEO GeoPySpark backend.'
return the_array

def to_netcdf(self,destination = None):
def to_netcdf(self, destination=None):
points = [r.representative_point() for r in self._regions]
lats = [p.y for p in points]
lons = [p.x for p in points]
feature_ids = ['feature_%s'% str(i) for i in range(len(self._regions))]

values = self.data.values()
if self._metadata is not None and self._metadata.has_band_dimension():
Expand All @@ -318,11 +325,11 @@ def to_netcdf(self,destination = None):
cleaned_values = [[bands if len(bands)>0 else fill_value for bands in feature_bands ] for feature_bands in values]
time_feature_bands_array = np.array(list(cleaned_values))
#time_feature_bands_array[time_feature_bands_array == []] = [np.nan, np.nan]
assert len(feature_ids) == time_feature_bands_array.shape[1]
assert len(self._feature_ids) == time_feature_bands_array.shape[1]
#if len(time_feature_bands_array.shape) == 3:
# nb_bands = time_feature_bands_array.shape[2]
feature_time_bands_array = np.swapaxes(time_feature_bands_array,0,1)
array = self.create_point_timeseries_xarray(feature_ids, list(self.data.keys()), lats, lons,feature_time_bands_array)
array = self._create_point_timeseries_xarray(list(self.data.keys()), lats, lons, feature_time_bands_array)
comp = dict(zlib=True, complevel=5)
encoding = {var: comp for var in array.data_vars}
if destination is None:
Expand Down
8 changes: 6 additions & 2 deletions tests/test_vectorcube.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import geopandas as gpd
import pyproj
import pytest
import xarray
from shapely.geometry import Polygon, MultiPolygon
Expand Down Expand Up @@ -63,10 +64,13 @@ def test_to_geojson(self, gdf):
def test_to_wkt(self, gdf):
vc = DriverVectorCube(gdf)
assert vc.to_wkt() == (
['POLYGON ((1 1, 3 1, 2 3, 1 1))', 'POLYGON ((4 2, 5 4, 3 4, 4 2))'],
'EPSG:4326',
['POLYGON ((1 1, 3 1, 2 3, 1 1))', 'POLYGON ((4 2, 5 4, 3 4, 4 2))']
)

def test_get_crs(self, gdf):
vc = DriverVectorCube(gdf)
assert vc.get_crs() == pyproj.CRS.from_epsg(4326)

def test_with_cube_to_geojson(self, gdf):
vc1 = DriverVectorCube(gdf)
dims, coords = vc1.get_xarray_cube_basics()
Expand Down

0 comments on commit 037790f

Please sign in to comment.