Skip to content

Commit

Permalink
Merge pull request #414 from nasaharvest/multi_roi_multi_period_area_…
Browse files Browse the repository at this point in the history
…estimate

Multi roi multi period area estimate
  • Loading branch information
Gedeon-m-gedus authored Nov 7, 2024
2 parents 0463d9e + 95b2e6c commit 8924837
Show file tree
Hide file tree
Showing 15 changed files with 1,881 additions and 2 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ISO-8859-1
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]]
Binary file not shown.
Binary file not shown.
Binary file added data/shapefiles/aljazirah_boundary.zip
Binary file not shown.
Binary file added data/shapefiles/alqadarif_boundary.zip
Binary file not shown.
Binary file added data/shapefiles/centraldarfur_boundary.zip
Binary file not shown.
Binary file added data/shapefiles/southdarfur_boundary.zip
Binary file not shown.
Binary file added data/shapefiles/westdarfur_boundary.zip
Binary file not shown.
3 changes: 3 additions & 0 deletions maps/Sudan/area-sudan-22sept2024.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Year,AlJazirah crop area,95% CI,AlJazirah Non-crop area,95% CI,AlQadarif crop area,95% CI,AlQadarif Non-crop area,95% CI,WestDarfur crop area,95% CI,WestDarfur Non-crop area,95% CI,CentralDarfur crop area,95% CI,CentralDarfur Non-crop area,95% CI,SouthDarfur crop area,95% CI,SouthDarfur Non-crop area,95% CI
2022,1038357.67,211911.48,1405219.82,211911.48,3699444.03,327081.69,2728299.09,327081.69,365424.27,155661.61,1911921.47,155661.61,557058.29,185579.4,3165385.83,185579.4,1473342.87,287520.97,6349430.03,287520.97
2023,1165285.74,209330.3,1278291.75,209330.3,3710186.82,337116.99,2717556.29,337116.99,351607.46,150545.59,1925738.28,150545.59,562417.16,178617.87,3160026.96,178617.87,1261626.04,291377.41,6561146.86,291377.41
63 changes: 63 additions & 0 deletions maps/Sudan/config-area-sudan.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
rois:

- roi_name: AlJazirah
roi_boundary_path: '../data/shapefiles/aljazirah_boundary.zip'
samples_and_cropmap:
- year: 2022
set1_labels_path: '../maps/Sudan/sudan_2022_updated22sept2024.csv'
set2_labels_path: '../maps/Sudan/sudan_2022_updated22sept2024.csv'
crop_mask_path: '../maps/Sudan/rasters/Sudan_dw_AlJazirah_2022_dw_may_march_cropmask-32637-cropped.tif'
- year: 2023
set1_labels_path: '../maps/Sudan/sudan_2023_updated22sept2024.csv'
set2_labels_path: '../maps/Sudan/sudan_2023_updated22sept2024.csv'
crop_mask_path: '../maps/Sudan/rasters/Sudan_dw_AlJazirah_2023_dw_may_march_cropmask-32637-cropped.tif'

- roi_name: AlQadarif
roi_boundary_path: '../data/shapefiles/alqadarif_boundary.zip'
samples_and_cropmap:
- year: 2022
set1_labels_path: '../maps/Sudan/sudan_2022_updated22sept2024.csv'
set2_labels_path: '../maps/Sudan/sudan_2022_updated22sept2024.csv'
crop_mask_path: '../maps/Sudan/rasters/Sudan_dw_AlQadarif_2022_dw_may_march_cropmask-32637-cropped.tif'
- year: 2023
set1_labels_path: '../maps/Sudan/sudan_2023_updated22sept2024.csv'
set2_labels_path: '../maps/Sudan/sudan_2023_updated22sept2024.csv'
crop_mask_path: '../maps/Sudan/rasters/Sudan_dw_AlQadarif_2023_dw_may_march_cropmask-32637-cropped.tif'

- roi_name: WestDarfur
roi_boundary_path: '../data/shapefiles/westdarfur_boundary.zip'
samples_and_cropmap:
- year: 2022
set1_labels_path: '../maps/Sudan/sudan_2022_updated22sept2024.csv'
set2_labels_path: '../maps/Sudan/sudan_2022_updated22sept2024.csv'
crop_mask_path: '../maps/Sudan/rasters/Sudan_dw_WestDarfur_2022_dw_may_march_cropmask-32635-cropped.tif'
- year: 2023
set1_labels_path: '../maps/Sudan/sudan_2023_updated22sept2024.csv'
set2_labels_path: '../maps/Sudan/sudan_2023_updated22sept2024.csv'
crop_mask_path: '../maps/Sudan/rasters/Sudan_dw_WestDarfur_2023_dw_may_march_cropmask-32635-cropped.tif'

- roi_name: CentralDarfur
roi_boundary_path: '../data/shapefiles/centraldarfur_boundary.zip'
samples_and_cropmap:
- year: 2022
set1_labels_path: '../maps/Sudan/sudan_2022_updated22sept2024.csv'
set2_labels_path: '../maps/Sudan/sudan_2022_updated22sept2024.csv'
crop_mask_path: '../maps/Sudan/rasters/Sudan_dw_CentralDarfur_2022_dw_may_march_cropmask-32635-cropped.tif'
- year: 2023
set1_labels_path: '../maps/Sudan/sudan_2023_updated22sept2024.csv'
set2_labels_path: '../maps/Sudan/sudan_2023_updated22sept2024.csv'
crop_mask_path: '../maps/Sudan/rasters/Sudan_dw_CentralDarfur_2023_dw_may_march_cropmask-32635-cropped.tif'

- roi_name: SouthDarfur
roi_boundary_path: '../data/shapefiles/southdarfur_boundary.zip'
samples_and_cropmap:
- year: 2022
set1_labels_path: '../maps/Sudan/sudan_2022_updated22sept2024.csv'
set2_labels_path: '../maps/Sudan/sudan_2022_updated22sept2024.csv'
crop_mask_path: '../maps/Sudan/rasters/Sudan_dw_SouthDarfur_2022_dw_may_march_cropmask-32635-cropped.tif'
- year: 2023
set1_labels_path: '../maps/Sudan/sudan_2023_updated22sept2024.csv'
set2_labels_path: '../maps/Sudan/sudan_2023_updated22sept2024.csv'
crop_mask_path: '../maps/Sudan/rasters/Sudan_dw_SouthDarfur_2023_dw_may_march_cropmask-32635-cropped.tif'

output_path: '../maps/Sudan/area-sudan-22sept2024.csv'
793 changes: 793 additions & 0 deletions maps/Sudan/sudan_allrois_area_estimate.ipynb

Large diffs are not rendered by default.

793 changes: 793 additions & 0 deletions notebooks/multi_roi_area_estimate.ipynb

Large diffs are not rendered by default.

229 changes: 227 additions & 2 deletions src/area_utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import csv
import json
import os
from typing import List, Optional, Tuple, Union
Expand All @@ -8,13 +9,55 @@
import numpy as np
import pandas as pd
import rasterio as rio
import seaborn as sns
import yaml
from osgeo import gdal
from rasterio import transform
from rasterio.mask import mask
from shapely.geometry import box
from sklearn.metrics import confusion_matrix


def plot_confusion_matrix(cm, labels, datatype="d") -> None:
"""Pretty prints confusion matrix.
Expects row 'Reference' and column 'Prediction/Map' ordered confusion matrix.
Args:
cm:
Confusion matrix of reference and map samples expressed in terms of
sample counts, n[i,j]. Row-column ordered reference-row, map-column.
labels:
List-like containing labels in same order as confusion matrix. For
example:
["Stable NP", "PGain", "PLoss", "Stable P"]
["Non-Crop", "Crop"]
"""

_, ax = plt.subplots(nrows=1, ncols=1)
sns.heatmap(
cm,
cmap="crest",
annot=True,
fmt=datatype,
cbar=False,
square=True,
ax=ax,
annot_kws={"size": 20},
)
ax.xaxis.tick_top()
ax.xaxis.set_label_coords(0.50, 1.125)
ax.yaxis.set_label_coords(-0.125, 0.50)
ax.set_xticklabels(labels=labels, fontsize=16)
ax.set_yticklabels(labels=labels, fontsize=16)
ax.set_xlabel("Map", fontsize=20)
ax.set_ylabel("Reference", fontsize=20)
plt.tight_layout()


def gdal_reproject(target_crs: str, source_crs: str, source_fn: str, dest_fn: str) -> None:
cmd = (
f"gdalwarp -t_srs {target_crs} -s_srs {source_crs} -tr 10 10 "
Expand Down Expand Up @@ -175,7 +218,7 @@ def cal_map_area_class(
crop_area = crop_px[0].shape[0] * (px_size * px_size) / 10000
noncrop_area = noncrop_px[0].shape[0] * (px_size * px_size) / 10000
print(
f"Crop area: {crop_area:.2f} ha, Non-crop area: {noncrop_area:.2f} ha \n \
f"[Pixel counting] Crop area: {crop_area:.2f} ha, Non-crop area: {noncrop_area:.2f} ha \n \
Total area: {crop_area + noncrop_area:.2f} ha"
)

Expand All @@ -190,7 +233,7 @@ def cal_map_area_class(
elif unit == "fraction":
crop_area = int(crop_px[0].shape[0]) / total
noncrop_area = int(noncrop_px[0].shape[0]) / total
print(f"Crop area: {crop_area:.2f} fraction, Non-crop area: {noncrop_area:.2f} fraction")
print(f"[Fraction] Crop area: {crop_area:.2f}, Non-crop area: {noncrop_area:.2f}")
assert crop_area + noncrop_area == 1

else:
Expand Down Expand Up @@ -743,3 +786,185 @@ def plot_area(summary: pd.DataFrame) -> None:

plt.tight_layout()
plt.show()


def area_estimate_pipeline(
aoi_path: str, map_path: str, ceo_set_1: str, ceo_set_2: str, visual_outputs: bool
) -> dict:
"""Returns area estimate for a particular region of interest (ROI) using cropland raster and labels.
Args:
aoi_path (str): The file path to the Area of Interest (AOI) ShapeFile/GeoJSON file, which defines the region for which the area estimates are computed.
map_path (str): The file path to the cropland raster file (.tif) that contains the land cover map of the area of interest.
ceo_set_1 (str): The file path to the first CEO (Collect Earth Online) set, which includes ground truth labels.
ceo_set_2 (str): The file path to the second CEO set, which includes additional ground truth labels for further validation or comparison.
visual_outputs (bool): A boolean flag indicating whether to generate visual outputs (plots, maps, etc.) to visualize the results.
Returns:
dict: A dictionary containing area estimates and accuracy metrics for each class in the confusion matrix.
The dictionary has the following keys:
- 'user_accuracy': A list of user's accuracy values for each class.
- 'producer_accuracy': A list of producer's accuracy values for each class.
- 'area_estimates': A nested dictionary containing area estimates and 95% confidence intervals in different units:
- 'pr': Proportional area estimates and confidence intervals.
- 'px': Area estimates and confidence intervals in pixels.
- 'ha': Area estimates and confidence intervals in hectares.
- 'confidence_interval': The 95% confidence interval for each class.
"""
aoi = gpd.read_file(aoi_path)
roi = aoi.dissolve()

map_array, map_meta = load_raster(map_path, roi)

if map_array.data.dtype == "uint8":
binary_map = map_array
else:
binary_map = binarize(map_array, map_meta)

crop_area_px, noncrop_area_px = cal_map_area_class(binary_map, unit="pixels")
crop_area_ha, noncrop_area_ha = cal_map_area_class(binary_map, unit="ha")
crop_area_frac, noncrop_area_frac = cal_map_area_class(binary_map, unit="fraction")

ceo_geom = reference_sample_agree(binary_map, map_meta, ceo_set_1, ceo_set_2)
ceo_geom = ceo_geom[ceo_geom["Mapped class"] != 255]

cm = compute_confusion_matrix(ceo_geom)

a_j = np.array([noncrop_area_px, crop_area_px], dtype=np.int64)
total_px = a_j.sum()
w_j = a_j / total_px
am = compute_area_error_matrix(cm, w_j)

px_size = map_meta["transform"][0]

estimates = compute_area_estimate(cm, a_j, px_size)

if visual_outputs:
plt.imshow(binary_map, cmap="YlGn", vmin=0, vmax=1)
plt.show()
plot_confusion_matrix(cm, ["Non-crop", "Crop"])
plot_confusion_matrix(am, ["Non-crop", "Crop"], datatype="0.2f")
a_ha, err_ha = estimates["area"]["ha"]
u_j, err_u_j = estimates["user"]
p_i, err_p_i = estimates["producer"]
summary = create_area_estimate_summary(
a_ha, err_ha, u_j, err_u_j, p_i, err_p_i, columns=["Non-Crop", "Crop"]
)
print(summary)

return estimates


def write_area_estimate_results_to_csv(data: dict, output_file: str) -> bool:
"""
Writes area estimate results to a CSV file in an easy-to-interpret format.
Args:
data (dict): A nested dictionary containing the area estimates and their respective confidence intervals
for different regions of interest (ROIs) and years. The dictionary structure is expected to be:
{
'ROI_name_1': {
'year': [list of years],
'crop area': [list of crop area estimates],
'95%CI crop': [list of 95% confidence intervals for crop area],
'Non-crop area': [list of non-crop area estimates],
'95%CI noncrop': [list of 95% confidence intervals for non-crop area]
},
'ROI_name_2': {...},
...
}
output_file (str): The file path where the output CSV will be saved.
Returns:
bool: Returns True when the CSV file is successfully written.
"""

# The header based on the RoI results names(keys)
header = ["Year"]
for roi in data.keys():
header.extend([f"{roi} crop area", "95% CI", f"{roi} Non-crop area", "95% CI"])

with open(output_file, "w", newline="") as file:

writer = csv.writer(file)
writer.writerow(header)

years = data[list(data.keys())[0]]["year"]

for i, year in enumerate(years):
row = [year]
for roi in data.keys():
row.append(data[roi]["crop area"][i])
row.append(data[roi]["95%CI crop"][i])
row.append(data[roi]["Non-crop area"][i])
row.append(data[roi]["95%CI noncrop"][i])
writer.writerow(row)

return True


def run_area_estimates_config(config: dict, show_output_charts: bool = False) -> dict:
"""
Runs area estimates for multiple regions of interest (ROIs) and compiles the results.
Args:
config (dict): A dictionary containing configuration information for the ROIs and their associated data.
It includes details such as the names of the ROIs, paths to boundary files, crop mask files,
CEO datasets, and the output path for saving the CSV file.
show_output_charts (bool): A boolean flag indicating whether to display visual output charts during processing.
Returns:
dict: A dictionary containing area estimates and confidence intervals for each ROI and year.
Raises:
Exception: If there is an error writing the results to the CSV file.
"""

all_results = {}
for roi in config["rois"]:
all_results[roi["roi_name"]] = {
"year": [],
"crop area": [],
"95%CI crop": [],
"Non-crop area": [],
"95%CI noncrop": [],
}
roi_results = all_results[roi["roi_name"]]

for yearly_data in roi["samples_and_cropmap"]:

print("\n" * 3 + f"_____ Running for {roi['roi_name']}, {yearly_data['year']} _____")

# Call the function for yearly estimate
estimates = area_estimate_pipeline(
aoi_path=roi["roi_boundary_path"],
map_path=yearly_data["crop_mask_path"],
ceo_set_1=yearly_data["set1_labels_path"],
ceo_set_2=yearly_data["set2_labels_path"],
visual_outputs=show_output_charts,
)

a_ha, err_ha = estimates["area"]["ha"]
a_ha, err_ha = a_ha.round(2), err_ha.round(2)

non_crop_area = a_ha[0]
crop_area = a_ha[1]
non_crop_area_err = err_ha[0]
crop_area_err = err_ha[1]

# Keep yearly results
roi_results["year"].append(yearly_data["year"])
roi_results["crop area"].append(crop_area)
roi_results["95%CI crop"].append(crop_area_err)
roi_results["Non-crop area"].append(non_crop_area)
roi_results["95%CI noncrop"].append(non_crop_area_err)

if write_area_estimate_results_to_csv(all_results, config["output_path"]):
return all_results

else:
raise Exception("Error Writing Estimates to CSV")

0 comments on commit 8924837

Please sign in to comment.