Skip to content

Commit

Permalink
Issue #197/#211/#213 use load_geojson in TestVectorCubeRunUDF
Browse files Browse the repository at this point in the history
  • Loading branch information
soxofaan committed Aug 7, 2023
1 parent 1726a78 commit f095480
Show file tree
Hide file tree
Showing 3 changed files with 104 additions and 118 deletions.
1 change: 1 addition & 0 deletions openeo_driver/ProcessGraphDeserializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -1598,6 +1598,7 @@ def load_geojson(args: ProcessArgs, env: EvalEnv) -> DriverVectorCube:
allowed_types=["Point", "MultiPoint", "Polygon", "MultiPolygon", "Feature", "FeatureCollection"]
),
)
# TODO: better default value for `properties`? https://github.com/Open-EO/openeo-processes/issues/448
properties = args.get_optional("properties", default=[], expected_type=(list, tuple))
vector_cube = env.backend_implementation.vector_cube_cls.from_geojson(data, columns_for_cube=properties)
return vector_cube
Expand Down
8 changes: 6 additions & 2 deletions openeo_driver/datacube.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from pyproj import CRS

from openeo_driver.datastructs import ResolutionMergeArgs, SarBackscatterArgs, StacAsset
from openeo_driver.errors import FeatureUnsupportedException, InternalException
from openeo_driver.errors import FeatureUnsupportedException, InternalException, ProcessGraphInvalidException
from openeo_driver.util.geometry import GeometryBufferer, validate_geojson_coordinates
from openeo_driver.util.ioformats import IOFORMATS
from openeo_driver.util.pgparsing import SingleRunUDFProcessGraph
Expand Down Expand Up @@ -638,6 +638,10 @@ def apply_dimension(

if single_run_udf:
# Process with single "run_udf" node
if single_run_udf.data != {"from_parameter": "data"}:
raise ProcessGraphInvalidException(
message="Vector cube `apply_dimension` process does not reference `data` parameter."
)
if (
dimension == self.DIM_GEOMETRIES
or (dimension in {self.DIM_BANDS, self.DIM_PROPERTIES}.intersection(self.get_dimension_names()))
Expand All @@ -651,7 +655,7 @@ def apply_dimension(
feature_collection = openeo.udf.FeatureCollection(id="_", data=gdf)
# TODO: dedicated UDF signature to indicate to work on vector cube through a feature collection based API
udf_data = openeo.udf.UdfData(
proj={"EPSG": self._geometries.crs.to_epsg()},
proj={"EPSG": self._geometries.crs.to_epsg()} if self._geometries.crs else None,
feature_collection_list=[feature_collection],
user_context=context,
)
Expand Down
213 changes: 97 additions & 116 deletions tests/test_views_execute.py
Original file line number Diff line number Diff line change
Expand Up @@ -3628,6 +3628,22 @@ class TestVectorCubeRunUDF:
- https://github.com/Open-EO/openeo-geopyspark-driver/issues/437
"""

def _build_run_udf_callback(self, udf_code: str) -> dict:
udf_code = textwrap.dedent(udf_code)
return {
"process_graph": {
"runudf1": {
"process_id": "run_udf",
"arguments": {
"data": {"from_parameter": "data"},
"udf": udf_code,
"runtime": "Python",
},
"result": True,
}
},
}

@pytest.mark.parametrize(
"dimension",
[
Expand All @@ -3636,40 +3652,32 @@ class TestVectorCubeRunUDF:
],
)
def test_apply_dimension_run_udf_change_geometry(self, api100, dimension):
udf_code = """
from openeo.udf import UdfData, FeatureCollection
def process_geometries(udf_data: UdfData) -> UdfData:
[feature_collection] = udf_data.get_feature_collection_list()
gdf = feature_collection.data
gdf["geometry"] = gdf["geometry"].buffer(distance=1, resolution=2)
udf_data.set_feature_collection_list([
FeatureCollection(id="_", data=gdf),
])
"""
udf_code = textwrap.dedent(udf_code)
"""VectorCube + apply_dimension + UDF (changing geometry)"""
process_graph = {
"get_vector_data": {
"process_id": "load_uploaded_files",
"arguments": {"paths": [str(get_path("geojson/FeatureCollection02.json"))], "format": "GeoJSON"},
"load": {
"process_id": "load_geojson",
"arguments": {
"data": load_json("geojson/FeatureCollection02.json"),
"properties": ["pop"],
},
},
"apply_dimension": {
"process_id": "apply_dimension",
"arguments": {
"data": {"from_node": "get_vector_data"},
"data": {"from_node": "load"},
"dimension": dimension,
"process": {
"process_graph": {
"runudf1": {
"process_id": "run_udf",
"arguments": {
"data": {"from_node": "get_vector_data"},
"udf": udf_code,
"runtime": "Python",
},
"result": True,
}
},
},
"process": self._build_run_udf_callback(
"""
from openeo.udf import UdfData, FeatureCollection
def process_geometries(udf_data: UdfData) -> UdfData:
[feature_collection] = udf_data.get_feature_collection_list()
gdf = feature_collection.data
gdf["geometry"] = gdf["geometry"].buffer(distance=1, resolution=2)
udf_data.set_feature_collection_list([
FeatureCollection(id="_", data=gdf),
])
"""
),
},
"result": True,
},
Expand Down Expand Up @@ -3708,42 +3716,33 @@ def test_apply_dimension_run_udf_filter_on_geometries(self, api100, dimension):
Test to use `apply_dimension(dimension="...", process=UDF)` to filter out certain
entries from geometries dimension based on geometry (e.g. intersection with another geometry)
"""
udf_code = """
from openeo.udf import UdfData, FeatureCollection
import shapely.geometry
def process_geometries(udf_data: UdfData) -> UdfData:
[feature_collection] = udf_data.get_feature_collection_list()
gdf = feature_collection.data
to_intersect = shapely.geometry.box(4, 3, 8, 4)
gdf = gdf[gdf["geometry"].intersects(to_intersect)]
udf_data.set_feature_collection_list([
FeatureCollection(id="_", data=gdf),
])
"""
udf_code = textwrap.dedent(udf_code)
process_graph = {
"get_vector_data": {
"process_id": "load_uploaded_files",
"arguments": {"paths": [str(get_path("geojson/FeatureCollection10.json"))], "format": "GeoJSON"},
"load": {
"process_id": "load_geojson",
"arguments": {
"data": load_json("geojson/FeatureCollection10.json"),
"properties": ["pop"],
},
},
"apply_dimension": {
"process_id": "apply_dimension",
"arguments": {
"data": {"from_node": "get_vector_data"},
"data": {"from_node": "load"},
"dimension": dimension,
"process": {
"process_graph": {
"runudf1": {
"process_id": "run_udf",
"arguments": {
"data": {"from_node": "get_vector_data"},
"udf": udf_code,
"runtime": "Python",
},
"result": True,
}
},
},
"process": self._build_run_udf_callback(
"""
from openeo.udf import UdfData, FeatureCollection
import shapely.geometry
def process_geometries(udf_data: UdfData) -> UdfData:
[feature_collection] = udf_data.get_feature_collection_list()
gdf = feature_collection.data
to_intersect = shapely.geometry.box(4, 3, 8, 4)
gdf = gdf[gdf["geometry"].intersects(to_intersect)]
udf_data.set_feature_collection_list([
FeatureCollection(id="_", data=gdf),
])
"""
),
},
"result": True,
},
Expand Down Expand Up @@ -3787,41 +3786,32 @@ def test_apply_dimension_run_udf_filter_on_properties(self, api100, dimension):
as apply_dimension only allows changing the cardinality of the provided dimension ("properties" in this case),
not any other dimension (like "geometries" in this case).
"""
udf_code = """
from openeo.udf import UdfData, FeatureCollection
import shapely.geometry
def process_geometries(udf_data: UdfData) -> UdfData:
[feature_collection] = udf_data.get_feature_collection_list()
gdf = feature_collection.data
gdf = gdf[gdf["pop"] > 500]
udf_data.set_feature_collection_list([
FeatureCollection(id="_", data=gdf),
])
"""
udf_code = textwrap.dedent(udf_code)
process_graph = {
"get_vector_data": {
"process_id": "load_uploaded_files",
"arguments": {"paths": [str(get_path("geojson/FeatureCollection10.json"))], "format": "GeoJSON"},
"load": {
"process_id": "load_geojson",
"arguments": {
"data": load_json("geojson/FeatureCollection10.json"),
"properties": ["pop"],
},
},
"apply_dimension": {
"process_id": "apply_dimension",
"arguments": {
"data": {"from_node": "get_vector_data"},
"data": {"from_node": "load"},
"dimension": dimension,
"process": {
"process_graph": {
"runudf1": {
"process_id": "run_udf",
"arguments": {
"data": {"from_node": "get_vector_data"},
"udf": udf_code,
"runtime": "Python",
},
"result": True,
}
},
},
"process": self._build_run_udf_callback(
"""
from openeo.udf import UdfData, FeatureCollection
import shapely.geometry
def process_geometries(udf_data: UdfData) -> UdfData:
[feature_collection] = udf_data.get_feature_collection_list()
gdf = feature_collection.data
gdf = gdf[gdf["pop"] > 500]
udf_data.set_feature_collection_list([
FeatureCollection(id="_", data=gdf),
])
"""
),
},
"result": True,
},
Expand Down Expand Up @@ -3859,41 +3849,32 @@ def test_apply_dimension_run_udf_add_properties(self, api100, dimension):
"""
Test to use `apply_dimension(dimension="...", process=UDF)` to add properties
"""
udf_code = """
from openeo.udf import UdfData, FeatureCollection
import shapely.geometry
def process_geometries(udf_data: UdfData) -> UdfData:
[feature_collection] = udf_data.get_feature_collection_list()
gdf = feature_collection.data
gdf["poppop"] = gdf["pop"] ** 2
udf_data.set_feature_collection_list([
FeatureCollection(id="_", data=gdf),
])
"""
udf_code = textwrap.dedent(udf_code)
process_graph = {
"get_vector_data": {
"process_id": "load_uploaded_files",
"arguments": {"paths": [str(get_path("geojson/FeatureCollection02.json"))], "format": "GeoJSON"},
"load": {
"process_id": "load_geojson",
"arguments": {
"data": load_json("geojson/FeatureCollection02.json"),
"properties": ["pop"],
},
},
"apply_dimension": {
"process_id": "apply_dimension",
"arguments": {
"data": {"from_node": "get_vector_data"},
"data": {"from_node": "load"},
"dimension": dimension,
"process": {
"process_graph": {
"runudf1": {
"process_id": "run_udf",
"arguments": {
"data": {"from_node": "get_vector_data"},
"udf": udf_code,
"runtime": "Python",
},
"result": True,
}
},
},
"process": self._build_run_udf_callback(
"""
from openeo.udf import UdfData, FeatureCollection
import shapely.geometry
def process_geometries(udf_data: UdfData) -> UdfData:
[feature_collection] = udf_data.get_feature_collection_list()
gdf = feature_collection.data
gdf["poppop"] = gdf["pop"] ** 2
udf_data.set_feature_collection_list([
FeatureCollection(id="_", data=gdf),
])
"""
),
},
"result": True,
},
Expand Down

0 comments on commit f095480

Please sign in to comment.