diff --git a/city_metrix/layers/__init__.py b/city_metrix/layers/__init__.py index 9d65cc5..5f1a28d 100644 --- a/city_metrix/layers/__init__.py +++ b/city_metrix/layers/__init__.py @@ -21,6 +21,7 @@ from .nasa_dem import NasaDEM from .era_5_hottest_day import Era5HottestDay from .impervious_surface import ImperviousSurface +from .isoline import Isoline from .glad_lulc import LandCoverGlad from .glad_lulc import LandCoverSimplifiedGlad from .glad_lulc import LandCoverHabitatGlad diff --git a/city_metrix/layers/isoline.py b/city_metrix/layers/isoline.py new file mode 100644 index 0000000..dc2d02e --- /dev/null +++ b/city_metrix/layers/isoline.py @@ -0,0 +1,26 @@ +import geopandas as gpd +import shapely +import urllib + +from .layer import Layer +from city_metrix.layers.layer import get_utm_zone_epsg + +BUCKET_NAME = 'wri-cities-indicators' +PREFIX = 'data/isolines/' + +class Isoline(Layer): + def __init__(self, filename, **kwargs): + super().__init__(**kwargs) + # Get data from S3 + url = f'https://{BUCKET_NAME}.s3.us-east-1.amazonaws.com/{PREFIX}{filename}' + print(f'Attempting to retrieve isoline file from {url}', end=' ') + try: + gdf = gpd.read_file(url) + print('(Succeeded)') + except urllib.error.HTTPError: + raise Exception(f"Isoline file {filename} does not exist.") + + self.gdf = gpd.GeoDataFrame({'is_accessible': [1] * len(gdf), 'geometry': gdf.to_crs('EPSG:4326')['geometry']}).to_crs('EPSG:4326').set_crs('EPSG:4326').set_geometry('geometry') + + def get_data(self, bbox): + return self.gdf.clip(shapely.box(*bbox)) diff --git a/city_metrix/layers/layer.py b/city_metrix/layers/layer.py index afe18a3..667a8fa 100644 --- a/city_metrix/layers/layer.py +++ b/city_metrix/layers/layer.py @@ -401,6 +401,7 @@ def write_layer(path, data): data.to_file(path, driver="GeoJSON") else: raise NotImplementedError("Can only write DataArray, Dataset, or GeoDataFrame") + raise NotImplementedError("Can only write DataArray, Dataset, or GeoDataFrame") def write_dataarray(path, data): # for rasters, need to write to locally first then copy to cloud storage diff --git a/city_metrix/layers/urban_land_use.py b/city_metrix/layers/urban_land_use.py index 1c81d39..0716a92 100644 --- a/city_metrix/layers/urban_land_use.py +++ b/city_metrix/layers/urban_land_use.py @@ -13,10 +13,11 @@ class UrbanLandUse(Layer): spatial_resolution: raster resolution in meters (see https://github.com/stac-extensions/raster) """ - def __init__(self, band='lulc', spatial_resolution=5, **kwargs): + def __init__(self, band='lulc', return_value=None, spatial_resolution=5, **kwargs): super().__init__(**kwargs) self.band = band self.spatial_resolution = spatial_resolution + self.return_value = return_value def get_data(self, bbox): ulu = ee.ImageCollection("projects/wri-datalab/cities/urban_land_use/V1") @@ -43,4 +44,7 @@ def get_data(self, bbox): "urban land use" ).lulc + if self.return_value is not None: + data = data.where(data==self.return_value) + return data diff --git a/city_metrix/metrics/__init__.py b/city_metrix/metrics/__init__.py index 0f7c15d..c40d230 100644 --- a/city_metrix/metrics/__init__.py +++ b/city_metrix/metrics/__init__.py @@ -5,6 +5,8 @@ from .urban_open_space import urban_open_space from .natural_areas import natural_areas from .era_5_met_preprocessing import era_5_met_preprocessing +from .percent_population_access import percent_population_access#, percent_population_access_elderly, percent_population_access_children, percent_population_access_female, percent_population_access_informal +from .count_accessible_amenities import count_accessible_amenities#_areamean, count_accessible_amenities_populationmean from .recreational_space_per_capita import recreational_space_per_capita from .vegetation_water_change import vegetation_water_change_gain_area from .vegetation_water_change import vegetation_water_change_loss_area diff --git a/city_metrix/metrics/count_accessible_amenities.py b/city_metrix/metrics/count_accessible_amenities.py new file mode 100644 index 0000000..ec3fdfe --- /dev/null +++ b/city_metrix/metrics/count_accessible_amenities.py @@ -0,0 +1,135 @@ +from geopandas import GeoDataFrame, GeoSeries +from pandas import Series +from city_metrix.layers import Layer +from math import floor +import shapely + +from city_metrix.layers import Isoline, UrbanLandUse, WorldPop +from city_metrix.layers.layer import get_utm_zone_epsg + + +def count_accessible_amenities(zones: GeoDataFrame, city_name, amenity_name, travel_mode, threshold_value, threshold_unit, retrieval_date, weighting='population', worldpop_agesex_classes=[], worldpop_year=2020, informal_only=False) -> GeoSeries: + # cityname example: ARG-Buenos-Aires + # amenityname is OSMclass names, in lowercase + # travelmode is walk, bike, automobile, publictransit (only walk implemented for now) + # threshold_value is integer, in minutes or meters + # threshold_unit is usually minutes or meters + + class AccessCountTmp(Layer): + def __init__(self, access_gdf, return_value, **kwargs): + super().__init__(**kwargs) + self.gdf = access_gdf[access_gdf['count_amenities']==return_value] + def get_data(self, bbox): + return self.gdf.clip(shapely.box(*bbox)) + + class IsolinesBuffered(Isoline): + # Stores and returns isochrone or isodistance polygons with polygons slightly buffered and simplified to avoid invalid-geometry errors + def __init__(self, filename, **kwargs): + super().__init__(filename=filename) + iso_gdf = self.gdf + if iso_gdf.crs in ('EPSG:4326', 'epsg:4326'): + utm_epsg = get_utm_zone_epsg(iso_gdf.total_bounds) + iso_gdf = iso_gdf.to_crs(utm_epsg).set_crs(utm_epsg) + poly_list = [shapely.simplify(p.buffer(0.1), tolerance=10) for p in iso_gdf.geometry] # Buffer and simplify + buffered_gdf = GeoDataFrame({'accessible': 1, 'geometry': poly_list}).set_crs(utm_epsg) + self.gdf = buffered_gdf.to_crs('EPSG:4326').set_crs('EPSG:4326') + + def get_data(self, bbox): + return self.gdf.clip(shapely.box(*bbox)) + + filename = f"{city_name}_{amenity_name}_{travel_mode}_{threshold_value}_{threshold_unit}_{retrieval_date}.geojson" + + population_layer = WorldPop(agesex_classes=worldpop_agesex_classes, year=worldpop_year) + if informal_only: + informal_layer = UrbanLandUse(return_value=3) + population_layer.masks.append(informal_layer) + + iso_layer = IsolinesBuffered(filename=filename) + results = [] + for rownum in range(len(zones)): + zone = zones.iloc[[rownum]] + print('\n{0} (geo {1} of {2})'.format(zone.geo_name[rownum], rownum+1, len(zones))) + iso_gdf = iso_layer.get_data(zone.total_bounds) + if len(iso_gdf) == 0: + results.append(0) + else: + if len(iso_gdf) == 1 and type(iso_gdf.iloc[0].geometry) == shapely.geometry.polygon.Polygon: # only one simple-polygon isoline + dissected_gdf = GeoDataFrame({'poly_id': [0], 'geometry': [iso_gdf.iloc[0].geometry]}).set_crs('EPSG:4326') + else: + # Collect individual boundary linestrings of each isoline polygon + iso_eachboundary = [iso_gdf.iloc[rownum]['geometry'].boundary for rownum in range(len(iso_gdf))] + iso_boundarylinesmulti = [i for i in iso_eachboundary if i is not None] + iso_boundarylines = [] + for i in iso_boundarylinesmulti: + if type(i) == shapely.geometry.linestring.LineString: + iso_boundarylines.append(i) + else: + for j in list(i.geoms): + iso_boundarylines.append(j) + iso_bl_gdf = GeoDataFrame({'idx': range(len(iso_boundarylines)), 'geometry': iso_boundarylines}) + print("Converted isoline polygons into boundary multilines") + # Dissolve all linestrings into large multilinestring, and polygonize into "dissected polygons" + + iso_dissected_polys = shapely.polygonize(list(iso_bl_gdf.dissolve().geometry[0].geoms)) + print("Creating gdf of dissected polygons") + dissected_gdf = GeoDataFrame({'poly_id': range(len(list(iso_dissected_polys.geoms))), 'geometry': list(iso_dissected_polys.geoms)}).set_crs('EPSG:4326') + # For each dissected polygon, find how many of the original isoline polys contain the centroid + # This is the number of amenity points are accessible within the dissected polygon + print("Counting original isoline polygons that intersect with each dissected polygon") + count_amenities = dissected_gdf.centroid.within(iso_gdf.iloc[[0]].geometry[iso_gdf.index[0]]) * 1 # This is a Series storing running sum, a vector with num rows == num of dissected polys + print("|==" + "PROGRESS=OF=INCLUSION=TESTS" + ("=" * 71) + "|") + print(" ", end="") + progress = 0 + + for iso_idx in range(1, len(iso_gdf)): # Iterate through all original isoline polys: test dissected polys to see whether centroids are included in isoline poly, add to running sum Series + if floor(100 * iso_idx / len(iso_gdf)) > progress: + progress = floor(100 * iso_idx / len(iso_gdf)) + print("X", end="") + count_amenities = count_amenities + (dissected_gdf.centroid.within(iso_gdf.iloc[[iso_idx]].geometry[iso_gdf.index[iso_idx]]) * 1) + print("X") + dissected_gdf['count_amenities'] = count_amenities + + # Create dict of polygons each with a single asset-count + max_count = dissected_gdf['count_amenities'].max() + count_layers = {count: AccessCountTmp(access_gdf=dissected_gdf, return_value=count) for count in range(1, max_count + 1)} + + # For each zone, find average number of accessible amenities, and store in result_gdf + + running_sum = Series([0]) + for count in range(1, max_count+1): + try: # Because adding masks to pop_layer adds them to WorldPop(), and they cannot be removed from WorldPop() + pop_layer + except NameError: + pop_layer = WorldPop(agesex_classes=worldpop_agesex_classes, year=worldpop_year) + else: + pop_layer.masks = [] + try: + totalpop_layer + except NameError: + totalpop_layer = WorldPop(agesex_classes=worldpop_agesex_classes, year=worldpop_year) + else: + totalpop_layer.masks = [] + if informal_only: + pop_layer.masks.append(informal_layer) + totalpop_layer.masks.append(informal_layer) + pop_layer.masks.append(count_layers[count]) + groupby = pop_layer.groupby(zone) + if weighting == 'area': + try: + running_sum += count * groupby.count().fillna(0) + except: + running_sum += Series([0]) + else: # weighting == 'population' + try: + running_sum += count * groupby.sum().fillna(0) + except: + running_sum += Series([0]) + if weighting == 'area': + rowresult = running_sum / totalpop_layer.groupby(zone).count() + else: # weighting == 'population' + rowresult = running_sum / totalpop_layer.groupby(zone).sum() + + results.append(rowresult[0]) + + result_gdf = GeoDataFrame({f'{weighting}_averaged_num_accessible_amenities': results, 'geometry': zones['geometry']}).set_crs('EPSG:4326') + return result_gdf \ No newline at end of file diff --git a/city_metrix/metrics/percent_population_access.py b/city_metrix/metrics/percent_population_access.py new file mode 100644 index 0000000..aea50bf --- /dev/null +++ b/city_metrix/metrics/percent_population_access.py @@ -0,0 +1,44 @@ +from geopandas import GeoDataFrame, GeoSeries +import shapely +import numpy as np + +from city_metrix.layers.isoline import Isoline +from city_metrix.layers.world_pop import WorldPop +from city_metrix.layers.urban_land_use import UrbanLandUse +from city_metrix.layers.layer import Layer, get_utm_zone_epsg + + +def percent_population_access(zones, city_name, amenity_name, travel_mode, threshold_value, threshold_unit, retrieval_date, worldpop_agesex_classes=[], worldpop_year=2020, informal_only=False): + # Example: city_name='MEX-Mexico_City', amenity_name='schools', travel_mode='walk', threshold_value=30, threshold_unit='minutes', retrieval_date='20241105' + + class IsolineSimplified(Isoline): + # Stores and returns isochrone or isodistance polygons with simplified geometry, and dissolved into fewer non-overlapping polygons + def __init__(self, filename, **kwargs): + super().__init__(filename=filename) + iso_gdf = self.gdf + if iso_gdf.crs in ('EPSG:4326', 'epsg:4326'): + utm_epsg = get_utm_zone_epsg(iso_gdf.total_bounds) + iso_gdf = iso_gdf.to_crs(utm_epsg).set_crs(utm_epsg) + poly_list = [shapely.simplify(p.buffer(0.1), tolerance=10) for p in iso_gdf.geometry] # Buffer and simplify + uu = shapely.unary_union(shapely.MultiPolygon(poly_list)) # Dissolve + shorter_gdf = GeoDataFrame({'accessible': 1, 'geometry': list(uu.geoms)}).set_crs(utm_epsg) + self.gdf = shorter_gdf.to_crs('EPSG:4326').set_crs('EPSG:4326') + + def get_data(self, bbox): + return self.gdf.clip(shapely.box(*bbox)) + filename = f"{city_name}_{amenity_name}_{travel_mode}_{threshold_value}_{threshold_unit}_{retrieval_date}.geojson" + iso_layer = IsolineSimplified(filename) + accesspop_layer = WorldPop(agesex_classes=worldpop_agesex_classes, year=worldpop_year, masks=[iso_layer,]) + totalpop_layer = WorldPop(agesex_classes=worldpop_agesex_classes, year=worldpop_year) + if informal_only: + informal_layer = UrbanLandUse(return_value=3) + accesspop_layer.masks.append(informal_layer) + totalpop_layer.masks.append(informal_layer) + res = [] + for rownum in range(len(zones)): # Doing it district-by-district to avoid empty-GDF error + try: + res.append(100 * accesspop_layer.groupby(zones.iloc[[rownum]]).sum()[0] / totalpop_layer.groupby(zones.iloc[[rownum]]).sum()[0]) + except: + res.append(0) + result_gdf = GeoDataFrame({'access_percent': res, 'geometry': zones.geometry}).set_crs(zones.crs) + return result_gdf \ No newline at end of file diff --git a/tests/test_layers.py b/tests/test_layers.py index 93f82b2..a8a3305 100644 --- a/tests/test_layers.py +++ b/tests/test_layers.py @@ -11,6 +11,7 @@ EsaWorldCoverClass, HighLandSurfaceTemperature, ImperviousSurface, + Isoline, LandCoverGlad, LandCoverHabitatGlad, LandCoverHabitatChangeGlad, @@ -78,6 +79,12 @@ def test_impervious_surface(): data = ImperviousSurface().get_data(BBOX) assert np.size(data) > 0 +@pytest.mark.skipif(EXECUTE_IGNORED_TESTS == False, reason="AWS redentials needed") +def test_isoline(): + layer = Isoline({'cityname': 'KEN-Nairobi', 'amenityname': 'schools', 'travelmode': 'walk', 'threshold_type': 'time', 'threshold_value': '15', 'year': 2023}) + nairobi_bbox = (36.66446402, -1.44560888, 37.10497899, -1.16058296) + data = layer.get_data(nairobi_bbox) + def test_land_cover_glad(): data = LandCoverGlad().get_data(BBOX) assert np.size(data) > 0 diff --git a/tests/test_metrics.py b/tests/test_metrics.py index 07090af..d4f32f5 100644 --- a/tests/test_metrics.py +++ b/tests/test_metrics.py @@ -3,6 +3,7 @@ import pytest + def test_built_land_with_high_lst(): indicator = built_land_with_high_land_surface_temperature(ZONES) expected_zone_size = ZONES.geometry.size @@ -23,6 +24,13 @@ def test_built_land_without_tree_cover(): actual_indicator_size = indicator.size assert expected_zone_size == actual_indicator_size +@pytest.mark.skipif(EXECUTE_IGNORED_TESTS == False, reason="Needs specific zone file to run") +def test_count_accessible_amenities(): + nairobi_gdf = None # Need link to a GDF for which the isoline file has been calculated + indicator = count_accessible_amenities(nairobi_gdf, 'KEN-Nairobi', 'schools', 'walk', 'time', 15, '20241105', weighting='population') + expected_zone_size = NAIROBI_BBOX.geometry.size + actual_indicator_size = indicator.size + assert expected_zone_size == actual_indicator_size @pytest.mark.skipif(EXECUTE_IGNORED_TESTS == False, reason="CDS API needs personal access token file to run") def test_era_5_met_preprocess(): @@ -43,6 +51,14 @@ def test_natural_areas(): actual_indicator_size = indicator.size assert expected_zone_size == actual_indicator_size +@pytest.mark.skipif(EXECUTE_IGNORED_TESTS == False, reason="AWS credentials needed") +def test_percent_population_access(): + from .conftest import create_fishnet_grid + NAIROBI_BBOX = create_fishnet_grid(36.66446402, -1.44560888, 37.10497899, -1.16058296, 0.01).reset_index() + indicator = percent_population_access(NAIROBI_BBOX, 'KEN-Nairobi', 'schools', 'walk', 'time', '15', 2024, aws_profilename=None) + expected_zone_size = NAIROBI_BBOX.geometry.size + actual_indicator_size = indicator.size + assert expected_zone_size == actual_indicator_size def test_recreational_space_per_capita(): indicator = recreational_space_per_capita(ZONES)