From ac0196633da4a71f3f84c3a7119b0f843b5d3bf4 Mon Sep 17 00:00:00 2001
From: "ygadhiya@umd.edu" <yashgadhiya10>
Date: Tue, 29 Oct 2024 22:00:52 +0000
Subject: [PATCH 1/3] Added New Data for North Uganda 2020

---
 data/raw.dvc    |  6 +++---
 data/report.txt |  9 +++++++++
 datasets.py     | 32 ++++++++++++++++++++++++++++++++
 3 files changed, 44 insertions(+), 3 deletions(-)
diff --git a/data/raw.dvc b/data/raw.dvc
index f13f5629..ea890038 100644
--- a/data/raw.dvc
+++ b/data/raw.dvc
@@ -1,6 +1,6 @@
 outs:
-- md5: 0916e33f6eef6c80a87e319427005f5e.dir
-  size: 446720790
-  nfiles: 408
+- md5: f08af5bba486092e8c1ce82375a2e247.dir
+  size: 447063881
+  nfiles: 410
   path: raw
   hash: md5
diff --git a/data/report.txt b/data/report.txt
index 46f3000b..e11529c9 100644
--- a/data/report.txt
+++ b/data/report.txt
@@ -521,3 +521,12 @@ eo_data_complete    1000
 ✔ training amount: 387, positive class: 1.3%
 ✔ validation amount: 294, positive class: 1.0%
 ✔ testing amount: 319, positive class: 1.3%
+
+
+
+Uganda_NorthCEO2020 (Timesteps: 24)
+----------------------------------------------------------------------------
+eo_data_complete    1000
+✔ training amount: 387, positive class: 21.4%
+✔ validation amount: 294, positive class: 15.3%
+✔ testing amount: 319, positive class: 14.1%
diff --git a/datasets.py b/datasets.py
index e6d5282e..91c7ae01 100644
--- a/datasets.py
+++ b/datasets.py
@@ -539,6 +539,37 @@ def load_labels(self) -> pd.DataFrame:
         df[SUBSET] = train_val_test_split(df.index, 0.3, 0.3)
         return df
 
+class Uganda_NorthCEO2020(LabeledDataset):
+    def load_labels(self) -> pd.DataFrame:
+        raw_folder = raw_dir / "Uganda_North_2020"
+        df1 = pd.read_csv(
+            raw_folder
+            / "ceo-UNHCR-North-Uganda-Feb-2019---Feb-2020-(Set-1)-sample-data-2024-10-01.csv"
+        )
+        df2 = pd.read_csv(
+            raw_folder
+            / "ceo-UNHCR-North-Uganda-Feb-2019---Feb-2020-(Set-2)-sample-data-2024-10-01.csv"
+        )
+        df = pd.concat([df1, df2])
+
+        # Discard rows with no label
+        df = df[~df["Does this pixel contain active cropland?"].isna()].copy()
+        df[CLASS_PROB] = df["Does this pixel contain active cropland?"] == "Crop"
+        df[CLASS_PROB] = df[CLASS_PROB].astype(int)
+        df["num_labelers"] = 1
+        df = df.groupby([LON, LAT], as_index=False, sort=False).agg(
+            {
+                CLASS_PROB: "mean",
+                "num_labelers": "sum",
+                "plotid": join_unique,
+                "sampleid": join_unique,
+                "email": join_unique,
+            }
+        )
+        df[START], df[END] = date(2019, 1, 1), date(2020, 12, 31)
+        df[SUBSET] = train_val_test_split(df.index, 0.3, 0.3)
+        return df
+
 
 class Uganda_NorthCEO2021(LabeledDataset):
     def load_labels(self) -> pd.DataFrame:
@@ -1571,6 +1602,7 @@ def load_labels(self) -> pd.DataFrame:
     FranceCropArea2020(),
     Uganda_NorthCEO2016(),
     Uganda_NorthCEO2017(),
+    Uganda_NorthCEO2020(),
 ]
 
 if __name__ == "__main__":

From e6653e79a9ff0326cf1a9b81f8d4df7f8182ceae Mon Sep 17 00:00:00 2001
From: "ygadhiya@umd.edu" <yashgadhiya10>
Date: Tue, 29 Oct 2024 22:01:10 +0000
Subject: [PATCH 2/3] Trigger Build


From 65bf7e6abf7e5d44fdb805d8d368bb847396f48f Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 29 Oct 2024 22:06:46 +0000
Subject: [PATCH 3/3] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 datasets.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/datasets.py b/datasets.py
index 91c7ae01..c796f1df 100644
--- a/datasets.py
+++ b/datasets.py
@@ -539,6 +539,7 @@ def load_labels(self) -> pd.DataFrame:
         df[SUBSET] = train_val_test_split(df.index, 0.3, 0.3)
         return df
 
+
 class Uganda_NorthCEO2020(LabeledDataset):
     def load_labels(self) -> pd.DataFrame:
         raw_folder = raw_dir / "Uganda_North_2020"