From ac0196633da4a71f3f84c3a7119b0f843b5d3bf4 Mon Sep 17 00:00:00 2001 From: "ygadhiya@umd.edu" Date: Tue, 29 Oct 2024 22:00:52 +0000 Subject: [PATCH 1/3] Added New Data for North Uganda 2020 --- data/raw.dvc | 6 +++--- data/report.txt | 9 +++++++++ datasets.py | 32 ++++++++++++++++++++++++++++++++ 3 files changed, 44 insertions(+), 3 deletions(-) diff --git a/data/raw.dvc b/data/raw.dvc index f13f5629..ea890038 100644 --- a/data/raw.dvc +++ b/data/raw.dvc @@ -1,6 +1,6 @@ outs: -- md5: 0916e33f6eef6c80a87e319427005f5e.dir - size: 446720790 - nfiles: 408 +- md5: f08af5bba486092e8c1ce82375a2e247.dir + size: 447063881 + nfiles: 410 path: raw hash: md5 diff --git a/data/report.txt b/data/report.txt index 46f3000b..e11529c9 100644 --- a/data/report.txt +++ b/data/report.txt @@ -521,3 +521,12 @@ eo_data_complete 1000 ✔ training amount: 387, positive class: 1.3% ✔ validation amount: 294, positive class: 1.0% ✔ testing amount: 319, positive class: 1.3% + + + +Uganda_NorthCEO2020 (Timesteps: 24) +---------------------------------------------------------------------------- +eo_data_complete 1000 +✔ training amount: 387, positive class: 21.4% +✔ validation amount: 294, positive class: 15.3% +✔ testing amount: 319, positive class: 14.1% diff --git a/datasets.py b/datasets.py index e6d5282e..91c7ae01 100644 --- a/datasets.py +++ b/datasets.py @@ -539,6 +539,37 @@ def load_labels(self) -> pd.DataFrame: df[SUBSET] = train_val_test_split(df.index, 0.3, 0.3) return df +class Uganda_NorthCEO2020(LabeledDataset): + def load_labels(self) -> pd.DataFrame: + raw_folder = raw_dir / "Uganda_North_2020" + df1 = pd.read_csv( + raw_folder + / "ceo-UNHCR-North-Uganda-Feb-2019---Feb-2020-(Set-1)-sample-data-2024-10-01.csv" + ) + df2 = pd.read_csv( + raw_folder + / "ceo-UNHCR-North-Uganda-Feb-2019---Feb-2020-(Set-2)-sample-data-2024-10-01.csv" + ) + df = pd.concat([df1, df2]) + + # Discard rows with no label + df = df[~df["Does this pixel contain active cropland?"].isna()].copy() + df[CLASS_PROB] = df["Does this pixel contain active cropland?"] == "Crop" + df[CLASS_PROB] = df[CLASS_PROB].astype(int) + df["num_labelers"] = 1 + df = df.groupby([LON, LAT], as_index=False, sort=False).agg( + { + CLASS_PROB: "mean", + "num_labelers": "sum", + "plotid": join_unique, + "sampleid": join_unique, + "email": join_unique, + } + ) + df[START], df[END] = date(2019, 1, 1), date(2020, 12, 31) + df[SUBSET] = train_val_test_split(df.index, 0.3, 0.3) + return df + class Uganda_NorthCEO2021(LabeledDataset): def load_labels(self) -> pd.DataFrame: @@ -1571,6 +1602,7 @@ def load_labels(self) -> pd.DataFrame: FranceCropArea2020(), Uganda_NorthCEO2016(), Uganda_NorthCEO2017(), + Uganda_NorthCEO2020(), ] if __name__ == "__main__": From e6653e79a9ff0326cf1a9b81f8d4df7f8182ceae Mon Sep 17 00:00:00 2001 From: "ygadhiya@umd.edu" Date: Tue, 29 Oct 2024 22:01:10 +0000 Subject: [PATCH 2/3] Trigger Build From 65bf7e6abf7e5d44fdb805d8d368bb847396f48f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 29 Oct 2024 22:06:46 +0000 Subject: [PATCH 3/3] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- datasets.py | 1 + 1 file changed, 1 insertion(+) diff --git a/datasets.py b/datasets.py index 91c7ae01..c796f1df 100644 --- a/datasets.py +++ b/datasets.py @@ -539,6 +539,7 @@ def load_labels(self) -> pd.DataFrame: df[SUBSET] = train_val_test_split(df.index, 0.3, 0.3) return df + class Uganda_NorthCEO2020(LabeledDataset): def load_labels(self) -> pd.DataFrame: raw_folder = raw_dir / "Uganda_North_2020"