From 40aa13703a2ca28c2798712d572e86e12e32d0bf Mon Sep 17 00:00:00 2001
From: John Wilkie <john@v7labs.com>
Date: Fri, 11 Oct 2024 12:54:01 +0100
Subject: [PATCH] Initial commit

---
 e2e_tests/cli/test_import.py                  | 111 ++++++++++++-
 .../data/import/coco_annotations/output.json  | 157 ++++++++++++++++++
 e2e_tests/helpers.py                          |   5 +-
 3 files changed, 270 insertions(+), 3 deletions(-)
 create mode 100644 e2e_tests/data/import/coco_annotations/output.json

diff --git a/e2e_tests/cli/test_import.py b/e2e_tests/cli/test_import.py
index afd6fa082..6669c875b 100644
--- a/e2e_tests/cli/test_import.py
+++ b/e2e_tests/cli/test_import.py
@@ -16,6 +16,21 @@
 from typing import List, Dict, Optional, Union
 
 
+def compare_local_annotations_with_uploaded_annotations(
+    annotations_subdir: str,
+    annotation_format: str,
+    local_dataset: E2EDataset,
+    config_values: ConfigValues,
+) -> None:
+    # This will need to:
+    #   Parse local annotations
+    #   Get the state of remote annotations from `/annotations`
+    #   Assert that everything we're expecting is there
+    # I think this can be done most straightforwardly by parsing each location annotation
+    # file into a common format (dt.Annotation / dt.VideoAnnotation) and making baseic asserts against the API
+    pass
+
+
 def get_actual_annotation_filename(
     expected_filename: str, actual_annotation_files: Dict[str, str]
 ) -> str:
@@ -181,6 +196,14 @@ def compare_annotations_export(
         and not any(file.name.startswith(prefix) for prefix in file_prefixes_to_ignore)
     }
     for expected_filename in expected_annotation_files:
+        # This part need to change to handle multiple formats, perhaps another function which takes the annotation format?
+        # Need to test with some sample data (Probably COCO as it's the simplest case)
+        # Or, does it need to change? After parsing the COCO annotations, maybe they're in a unified format?
+        # If so, I think we only need to change the code responsible for removing UUIDs
+
+        # Yes, I think it will have to change. We'll should put this into a new function
+        # This will take the format & import the relevant importer, then parse failes with `parse_path`
+        # After parsing, we should have List[AnnotationFile] which should mean very little else has to change, save the UUID removal!
         actual_filename = get_actual_annotation_filename(
             expected_filename, actual_annotation_files
         )
@@ -222,6 +245,8 @@ def run_import_test(
     config_values: ConfigValues,
     item_type: str,
     annotations_subdir: str,
+    annotation_format: Optional[str] = "darwin",
+    export_only: Optional[bool] = False,
     item_name: Optional[str] = None,
     additional_flags: str = "",
     exit_code: int = 0,
@@ -236,7 +261,7 @@ def run_import_test(
         Path(__file__).parents[1] / "data" / "import" / annotations_subdir
     )
     result = run_cli_command(
-        f"darwin dataset import {local_dataset.name} darwin {expected_annotations_dir} {additional_flags}"
+        f"darwin dataset import {local_dataset.name} {annotation_format} {expected_annotations_dir} {additional_flags}"
     )
     assert_cli(result, exit_code)
 
@@ -247,6 +272,12 @@ def run_import_test(
         assert expect_error in result.stdout
         return
 
+    if export_only:
+        compare_local_annotations_with_uploaded_annotations(
+            annotations_subdir, annotation_format, local_dataset, config_values  # type: ignore
+        )
+        return
+
     base_slot = (
         get_base_slot_name_of_item(config_values, local_dataset.id, item_name)
         if item_name
@@ -255,7 +286,10 @@ def run_import_test(
     with tempfile.TemporaryDirectory() as tmp_dir_str:
         actual_annotations_dir = Path(tmp_dir_str)
         export_and_download_annotations(
-            actual_annotations_dir, local_dataset, config_values
+            actual_annotations_dir,
+            annotation_format,  # type: ignore
+            local_dataset,
+            config_values,
         )
         compare_annotations_export(
             actual_annotations_dir, expected_annotations_dir, item_type, base_slot
@@ -465,3 +499,76 @@ def test_import_annotations_with_subtypes_to_videos(
         item_type="single_slotted_video",
         annotations_subdir="video_annotations_with_subtypes",
     )
+
+
+def test_importing_coco_annotations(
+    local_dataset: E2EDataset, config_values: ConfigValues
+) -> None:
+    annotation_format = "coco"
+    run_import_test(
+        local_dataset,
+        config_values,
+        item_type="single_slotted",
+        annotations_subdir="coco_annotations",
+        annotation_format=annotation_format,
+    )
+
+
+def test_importing_csv_tags_annotations(
+    local_dataset: E2EDataset, config_values: ConfigValues
+) -> None:
+    annotation_format = "csv_tags"
+    run_import_test(
+        local_dataset,
+        config_values,
+        item_type="single_slotted",
+        annotations_subdir="csv_tag_annotations",
+        annotation_format=annotation_format,
+        export_only=True,
+    )
+
+
+def test_importing_csv_tags_video_annotations(
+    local_dataset: E2EDataset, config_values: ConfigValues
+) -> None:
+    annotation_format = "csv_tags_video"
+    run_import_test(
+        local_dataset,
+        config_values,
+        item_type="single_slotted_video",
+        annotations_subdir="csv_tag_video_annotations",
+        annotation_format=annotation_format,
+        export_only=True,
+    )
+
+
+def test_importing_nifti_annotations(
+    local_dataset: E2EDataset, config_values: ConfigValues
+) -> None:
+    # This test is tricky to write. We ideally want to test importing NifTI annotations to a NifTI file, but:
+    # I'm fairly sure we can't regsiter a NifTI file in read-only from external store, so:
+    # Can we just import to a file that we can register from external storage? Or:
+    # Can we upload a NifTI file just for this test?
+    # More investigation needed to finish
+    annotation_format = "nifti"
+    run_import_test(
+        local_dataset,
+        config_values,
+        item_type="nifti",
+        annotations_subdir="nifti_annotations",
+        annotation_format=annotation_format,
+        export_only=True,
+    )
+
+
+def test_importing_pascal_voc_annotations(
+    local_dataset: E2EDataset, config_values: ConfigValues
+) -> None:
+    annotation_format = "pascal_voc"
+    run_import_test(
+        local_dataset,
+        config_values,
+        item_type="single_slotted",
+        annotations_subdir="pascal_voc_annotations",
+        annotation_format=annotation_format,
+    )
diff --git a/e2e_tests/data/import/coco_annotations/output.json b/e2e_tests/data/import/coco_annotations/output.json
new file mode 100644
index 000000000..10268ac65
--- /dev/null
+++ b/e2e_tests/data/import/coco_annotations/output.json
@@ -0,0 +1,157 @@
+{
+  "info": {
+    "description": "Exported from Darwin",
+    "url": "n/a",
+    "version": "n/a",
+    "year": 2024,
+    "contributor": "n/a",
+    "date_created": "2024/10/11"
+  },
+  "licenses": [
+    {
+      "url": "n/a",
+      "id": 0,
+      "name": "placeholder license"
+    }
+  ],
+  "images": [
+    {
+      "license": 0,
+      "file_name": "image_1",
+      "coco_url": "n/a",
+      "height": 1080,
+      "width": 1920,
+      "date_captured": "",
+      "flickr_url": "n/a",
+      "darwin_url": "https://staging.v7labs.com/api/v2/teams/e2e-testing/uploads/1d012d3b-c330-4485-a2a1-14c55d4747ef",
+      "darwin_workview_url": "https://staging.v7labs.com/workview?dataset=387354&item=01927b33-5a68-74a7-9a46-9fa29a8bffcc",
+      "id": 3480380172,
+      "tag_ids": []
+    },
+    {
+      "license": 0,
+      "file_name": "image_2",
+      "coco_url": "n/a",
+      "height": 1080,
+      "width": 1920,
+      "date_captured": "",
+      "flickr_url": "n/a",
+      "darwin_url": "https://staging.v7labs.com/api/v2/teams/e2e-testing/uploads/404a1db2-3dd1-4266-8cf3-6c8fcb0af015",
+      "darwin_workview_url": "https://staging.v7labs.com/workview?dataset=387354&item=01927b33-5a68-cc62-7ec4-45692d9f06ea",
+      "id": 1450914486,
+      "tag_ids": []
+    }
+  ],
+  "annotations": [
+    {
+      "id": 1,
+      "image_id": 3480380172,
+      "category_id": 3638723632,
+      "segmentation": [
+        [
+          6.8628,
+          6.4715,
+          5.6848,
+          11.012,
+          5.3166,
+          11.5274,
+          9.4153,
+          12.3128,
+          9.5626,
+          11.7728
+        ]
+      ],
+      "area": 11.555552699999993,
+      "bbox": [
+        5.3166,
+        6.4715,
+        4.2459999999999996,
+        5.8412999999999995
+      ],
+      "iscrowd": 0,
+      "extra": {}
+    },
+    {
+      "id": 2,
+      "image_id": 3480380172,
+      "category_id": 3638723632,
+      "segmentation": [
+        [
+          17.4164,
+          13.0491,
+          9.0963,
+          19.553,
+          21.2697,
+          21.6882,
+          19.9198,
+          12.6073
+        ]
+      ],
+      "area": 60.13458233000006,
+      "bbox": [
+        9.0963,
+        12.6073,
+        12.1734,
+        9.080899999999998
+      ],
+      "iscrowd": 0,
+      "extra": {}
+    },
+    {
+      "id": 3,
+      "image_id": 1450914486,
+      "category_id": 3638723632,
+      "segmentation": [
+        [
+          6.1866,
+          5.8174,
+          3.858,
+          16.2317,
+          15.1131,
+          16.6845
+        ]
+      ],
+      "area": 59.134189004999996,
+      "bbox": [
+        3.858,
+        5.8174,
+        11.255099999999999,
+        10.8671
+      ],
+      "iscrowd": 0,
+      "extra": {}
+    },
+    {
+      "id": 4,
+      "image_id": 1450914486,
+      "category_id": 3638723632,
+      "segmentation": [
+        [
+          18.6061,
+          5.6234,
+          16.9243,
+          9.3751,
+          21.3229,
+          8.7282
+        ]
+      ],
+      "area": 7.707135599999987,
+      "bbox": [
+        16.9243,
+        5.6234,
+        4.398600000000002,
+        3.7516999999999996
+      ],
+      "iscrowd": 0,
+      "extra": {}
+    }
+  ],
+  "categories": [
+    {
+      "id": 3638723632,
+      "name": "test_polygon_basic",
+      "supercategory": "root"
+    }
+  ],
+  "tag_categories": []
+}
\ No newline at end of file
diff --git a/e2e_tests/helpers.py b/e2e_tests/helpers.py
index 16c5a375b..5c3fc652d 100644
--- a/e2e_tests/helpers.py
+++ b/e2e_tests/helpers.py
@@ -195,6 +195,7 @@ def wait_until_items_processed(
 
 def export_and_download_annotations(
     actual_annotations_dir: Path,
+    annotation_format: str,
     local_dataset: E2EDataset,
     config_values: ConfigValues,
 ) -> None:
@@ -211,11 +212,13 @@ def export_and_download_annotations(
     create_export_url = (
         f"{base_url}/api/v2/teams/{team_slug}/datasets/{dataset_slug}/exports"
     )
-
+    if annotation_format == "darwin":
+        annotation_format = "darwin_json_2"  # Necessary because this is the only format where `annotation_format` does not match the required payload value
     payload = {
         "filters": {"statuses": ["new", "annotate", "review", "complete"]},
         "include_authorship": False,
         "include_export_token": False,
+        "format": f"{annotation_format}",
         "name": f"{export_name}",
     }
     headers = {