nasaharvest · Gedeon-m-gedus · Nov 7, 2024 · Aug 31, 2024 · Sep 22, 2024 · Sep 22, 2024
diff --git a/notebooks/multi_roi_area_astimate.ipynb b/notebooks/multi_roi_area_astimate.ipynb
@@ -0,0 +1,205 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!git clone https://github.com/nasaharvest/crop-mask.git"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Create a Config file with these steps\n",
+    "\n",
+    "1. Create a file `config.yaml`\n",
+    "2. Follow this template to provide RoI names, and necessary file paths in your config file.\n",
+    "\n",
+    "```\n",
+    "rois: \n",
+    "  - roi_name: AlGedaref\n",
+    "    roi_boundary_path: '../data/roi/alqadarif_boundary.shp'\n",
+    "    samples_and_cropmap:\n",
+    "      - year: 2022\n",
+    "        set1_labels_path: '../data/ceo/rechecked_processed_Feb-2022---Feb-2023_ceo_samples.csv'\n",
+    "        set2_labels_path: '../data/ceo/rechecked_processed_Feb-2022---Feb-2023_ceo_samples.csv'\n",
+    "        crop_mask_path: '../data/rasters/AlQadarif_AlJazirah_2022_cropmask_epsg32636_v5_pp.tif'\n",
+    "      - year: 2023\n",
+    "        set1_labels_path: '../data/ceo/rechecked_processed_Feb-2023---Feb-2024_ceo_samples.csv'\n",
+    "        set2_labels_path: '../data/ceo/rechecked_processed_Feb-2023---Feb-2024_ceo_samples.csv'\n",
+    "        crop_mask_path: '../data/rasters/AlQadarif_AlJazirah_2023_cropmask_epsg32636_v5_pp.tif'\n",
+    "\n",
+    "  - roi_name: AlJazirah\n",
+    "    roi_boundary_path: '../data/roi/aljazirah_boundary.shp'\n",
+    "    samples_and_cropmap:\n",
+    "      - year: 2022\n",
+    "        set1_labels_path: '../data/ceo/rechecked_processed_Feb-2022---Feb-2023_ceo_samples.csv'\n",
+    "        set2_labels_path: '../data/ceo/rechecked_processed_Feb-2022---Feb-2023_ceo_samples.csv'\n",
+    "        crop_mask_path: '../data/rasters/AlQadarif_AlJazirah_2022_cropmask_epsg32636_v5_pp.tif'\n",
+    "      - year: 2023\n",
+    "        set1_labels_path: '../data/ceo/rechecked_processed_Feb-2023---Feb-2024_ceo_samples.csv'\n",
+    "        set2_labels_path: '../data/ceo/rechecked_processed_Feb-2023---Feb-2024_ceo_samples.csv'\n",
+    "        crop_mask_path: '../data/rasters/AlQadarif_AlJazirah_2023_cropmask_epsg32636_v5_pp.tif'\n",
+    "  \n",
+    "output_path: '../results.csv'    \n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Install required packages\n",
+    "# Skip this step if you have already installed the packages in your local environment\n",
+    "!pip install geopandas -q\n",
+    "!pip install seaborn -q\n",
+    "!pip install rasterio -q\n",
+    "!pip install cartopy -q"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import sys\n",
+    "import yaml"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "module_path = os.path.abspath(os.path.join('..'))\n",
+    "if module_path not in sys.path:\n",
+    "    sys.path.append(module_path)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%cd crop-mask"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from src.area_utils import (\n",
+    "    run_area_estimates_config\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "config_paths = \"../config.yaml\"\n",
+    "with open(config_paths, 'r') as file:\n",
+    "      config = yaml.safe_load(file)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "annual_estimates = run_area_estimates_config(config, show_output_charts=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "import numpy as np\n",
+    "\n",
+    "def plot_crop_area_with_confidence(annual_estimates):\n",
+    "    \"\"\"\n",
+    "    This function takes in a dictionary with crop area data and plots a bar chart \n",
+    "    with 95% confidence intervals for each region and year.\n",
+    "\n",
+    "    Parameters:\n",
+    "    data (dict): A dictionary where keys are region names and values contain \n",
+    "                 crop area data, confidence intervals, and years.\n",
+    "    \"\"\"\n",
+    "    regions = list(annual_estimates.keys())\n",
+    "    years = [2022, 2023]\n",
+    "\n",
+    "    crop_areas = np.array([annual_estimates[region]['crop area'] for region in regions])\n",
+    "    crop_ci = np.array([annual_estimates[region]['95%CI crop'] for region in regions])\n",
+    "\n",
+    "    fig, ax = plt.subplots(figsize=(10, 6))\n",
+    "\n",
+    "    bar_width = 0.35\n",
+    "    index = np.arange(len(regions))\n",
+    "\n",
+    "    for i, year in enumerate(years):\n",
+    "        ax.bar(index + i * bar_width, crop_areas[:, i], bar_width,\n",
+    "               yerr=crop_ci[:, i], label=f'Year {year}', capsize=5)\n",
+    "\n",
+    "    ax.set_xlabel('Region')\n",
+    "    ax.set_ylabel('Crop Area (hectares)')\n",
+    "    ax.set_title('Crop Area by Region with 95% Confidence Intervals (2022 vs 2023)')\n",
+    "    ax.set_xticks(index + bar_width / 2)\n",
+    "    ax.set_xticklabels(regions)\n",
+    "    ax.legend()\n",
+    "\n",
+    "    ax.set_axisbelow(True)\n",
+    "    ax.grid(True, which='both', axis='y', linestyle='--', linewidth=0.7)\n",
+    "\n",
+    "    plt.tight_layout()\n",
+    "    plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plot_crop_area_with_confidence(annual_estimates)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "cropmask",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/src/area_utils.py b/src/area_utils.py
@@ -1,3 +1,4 @@
+import csv
 import json
 import os
 from typing import List, Optional, Tuple, Union
@@ -8,6 +9,7 @@
 import numpy as np
 import pandas as pd
 import rasterio as rio
+import yaml
 from osgeo import gdal
 from rasterio import transform
 from rasterio.mask import mask
@@ -741,3 +743,179 @@ def plot_area(summary: pd.DataFrame) -> None:
 
     plt.tight_layout()
     plt.show()
+
+
+def area_estimate_pipeline(
+    aoi_path: str, map_path: str, ceo_set_1: str, ceo_set_2: str, visual_outputs: bool
+) -> dict:
+    """Returns area estimate for a particular region of interest (ROI) using cropland raster and labels.
+
+    Args:
+        aoi_path (str): The file path to the Area of Interest (AOI) ShapeFile/GeoJSON file, which defines the region for which the area estimates are computed.
+        map_path (str): The file path to the cropland raster file (.tif) that contains the land cover map of the area of interest.
+        ceo_set_1 (str): The file path to the first CEO (Collect Earth Online) set, which includes ground truth labels.
+        ceo_set_2 (str): The file path to the second CEO set, which includes additional ground truth labels for further validation or comparison.
+        visual_outputs (bool): A boolean flag indicating whether to generate visual outputs (plots, maps, etc.) to visualize the results.
+
+    Returns:
+        dict: A dictionary containing area estimates and accuracy metrics for each class in the confusion matrix.
+
+        The dictionary has the following keys:
+            - 'user_accuracy': A list of user's accuracy values for each class.
+            - 'producer_accuracy': A list of producer's accuracy values for each class.
+            - 'area_estimates': A nested dictionary containing area estimates and 95% confidence intervals in different units:
+                - 'pr': Proportional area estimates and confidence intervals.
+                - 'px': Area estimates and confidence intervals in pixels.
+                - 'ha': Area estimates and confidence intervals in hectares.
+            - 'confidence_interval': The 95% confidence interval for each class.
+
+    """
+    aoi = gpd.read_file(aoi_path)
+    roi = aoi.dissolve()
+
+    map_array, map_meta = load_raster(map_path, roi)
+
+    if map_array.data.dtype == "uint8":
+        binary_map = map_array.data
+    else:
+        binary_map = binarize(map_array, map_meta)
+
+    crop_area_px, noncrop_area_px = cal_map_area_class(binary_map, unit="pixels")
+    crop_area_ha, noncrop_area_ha = cal_map_area_class(binary_map, unit="ha")
+    crop_area_frac, noncrop_area_frac = cal_map_area_class(binary_map, unit="fraction")
+
+    ceo_geom = reference_sample_agree(binary_map, map_meta, ceo_set_1, ceo_set_2)
+    ceo_geom = ceo_geom[ceo_geom["Mapped class"] != 255]
+
+    cm = compute_confusion_matrix(ceo_geom)
+
+    a_j = np.array([noncrop_area_px, crop_area_px], dtype=np.int64)
+
+    px_size = map_meta["transform"][0]
+
+    estimates = compute_area_estimate(cm, a_j, px_size)
+
+    if visual_outputs:
+        plt.imshow(binary_map, cmap="YlGn", vmin=0, vmax=1)
+        plt.show()
+        a_ha, err_ha = estimates["area"]["ha"]
+        u_j, err_u_j = estimates["user"]
+        p_i, err_p_i = estimates["producer"]
+        summary = create_area_estimate_summary(
+            a_ha, err_ha, u_j, err_u_j, p_i, err_p_i, columns=["Non-Crop", "Crop"]
+        )
+
+    return estimates
+
+
+def write_area_estimate_results_to_csv(data: dict, output_file: str) -> bool:
+    """
+    Writes area estimate results to a CSV file in an easy-to-interpret format.
+
+    Args:
+        data (dict): A nested dictionary containing the area estimates and their respective confidence intervals
+                     for different regions of interest (ROIs) and years. The dictionary structure is expected to be:
+                     {
+                        'ROI_name_1': {
+                            'year': [list of years],
+                            'crop area': [list of crop area estimates],
+                            '95%CI crop': [list of 95% confidence intervals for crop area],
+                            'Non-crop area': [list of non-crop area estimates],
+                            '95%CI noncrop': [list of 95% confidence intervals for non-crop area]
+                        },
+                        'ROI_name_2': {...},
+                        ...
+                     }
+        output_file (str): The file path where the output CSV will be saved.
+
+    Returns:
+        bool: Returns True when the CSV file is successfully written.
+
+    """
+
+    # The header based on the RoI results names(keys)
+    header = ["Year"]
+    for roi in data.keys():
+        header.extend([f"{roi} crop area", "95% CI", f"{roi} Non-crop area", "95% CI"])
+
+    with open(output_file, "w", newline="") as file:
+
+        writer = csv.writer(file)
+        writer.writerow(header)
+
+        years = data[list(data.keys())[0]]["year"]
+
+        for i, year in enumerate(years):
+            row = [year]
+            for roi in data.keys():
+                row.append(data[roi]["crop area"][i])
+                row.append(data[roi]["95%CI crop"][i])
+                row.append(data[roi]["Non-crop area"][i])
+                row.append(data[roi]["95%CI noncrop"][i])
+            writer.writerow(row)
+
+    return True
+
+
+def run_area_estimates_config(config: dict, show_output_charts: bool = False) -> dict:
+    """
+    Runs area estimates for multiple regions of interest (ROIs) and compiles the results.
+
+    Args:
+        config (dict): A dictionary containing configuration information for the ROIs and their associated data.
+                       It includes details such as the names of the ROIs, paths to boundary files, crop mask files,
+                       CEO datasets, and the output path for saving the CSV file.
+        show_output_charts (bool): A boolean flag indicating whether to display visual output charts during processing.
+
+    Returns:
+        dict: A dictionary containing area estimates and confidence intervals for each ROI and year.
+
+    Raises:
+        Exception: If there is an error writing the results to the CSV file.
+
+    """
+
+    all_results = {}
+    for roi in config["rois"]:
+        all_results[roi["roi_name"]] = {
+            "year": [],
+            "crop area": [],
+            "95%CI crop": [],
+            "Non-crop area": [],
+            "95%CI noncrop": [],
+        }
+        roi_results = all_results[roi["roi_name"]]
+
+        for yearly_data in roi["samples_and_cropmap"]:
+
+            print("\n" * 3 + f"_____ Running for {roi['roi_name']}, {yearly_data['year']} _____")
+
+            # Call the function for yearly estimate
+            estimates = area_estimate_pipeline(
+                aoi_path=roi["roi_boundary_path"],
+                map_path=yearly_data["crop_mask_path"],
+                ceo_set_1=yearly_data["set1_labels_path"],
+                ceo_set_2=yearly_data["set2_labels_path"],
+                visual_outputs=show_output_charts,
+            )
+
+            a_ha, err_ha = estimates["area"]["ha"]
+            a_ha, err_ha = a_ha.round(2), err_ha.round(2)
+
+            non_crop_area = a_ha[0]
+            crop_area = a_ha[1]
+            non_crop_area_err = err_ha[0]
+            crop_area_err = err_ha[1]
+
+            # Keep yearly results
+            roi_results["year"].append(yearly_data["year"])
+            roi_results["crop area"].append(crop_area)
+            roi_results["95%CI crop"].append(crop_area_err)
+            roi_results["Non-crop area"].append(non_crop_area)
+            roi_results["95%CI noncrop"].append(non_crop_area_err)
+
+    if write_area_estimate_results_to_csv(all_results, config["output_path"]):
+        return all_results
+
+    else:
+        raise Exception("Error Writing Estimates to CSV")