From 977f6456131a600d8b8c3b7962018b9a44b32004 Mon Sep 17 00:00:00 2001 From: Matic Lubej Date: Wed, 24 Jan 2024 08:04:50 +0100 Subject: [PATCH] add pandas parquet loader and update vector comparison (#325) Co-authored-by: Matic Lubej --- eogrow/utils/testing.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/eogrow/utils/testing.py b/eogrow/utils/testing.py index f2ab44ba..9b3a39e0 100644 --- a/eogrow/utils/testing.py +++ b/eogrow/utils/testing.py @@ -14,11 +14,12 @@ import fs import geopandas as gpd import numpy as np +import pandas as pd import rasterio from deepdiff import DeepDiff from fs.base import FS from fs.osfs import OSFS -from shapely import MultiPolygon, Point, Polygon +from shapely import MultiPolygon, Point, Polygon, wkb, wkt from eolearn.core import EOPatch, FeatureType from eolearn.core.eodata_io import get_filesystem_data_info @@ -90,13 +91,26 @@ def calculate_statistics(folder: str, config: StatCalcConfig) -> JsonDict: elif content_path.endswith((".geojson", ".gpkg")): stats[content] = _calculate_vector_stats(gpd.read_file(content_path), config) elif content_path.endswith(".parquet"): - stats[content] = _calculate_vector_stats(gpd.read_parquet(content_path), config) + try: + data = gpd.read_parquet(content_path) + except Exception: + data = _load_as_geoparquet(content_path) + stats[content] = _calculate_vector_stats(data, config) else: stats[content] = None return stats +def _load_as_geoparquet(path: str) -> gpd.GeoDataFrame: + data = pd.read_parquet(path) + if isinstance(data.geometry.iloc[0], str): + data.geometry = data.geometry.apply(wkt.loads) + elif isinstance(data.geometry.iloc[0], bytes): + data.geometry = data.geometry.apply(wkb.loads) + return gpd.GeoDataFrame(data, geometry="geometry", crs=data.utm_crs.iloc[0]) + + def _calculate_eopatch_stats(eopatch: EOPatch, config: StatCalcConfig) -> JsonDict: """Calculates statistics of given EOPatch and it's content""" stats: JsonDict = defaultdict(dict)