From 40aa13703a2ca28c2798712d572e86e12e32d0bf Mon Sep 17 00:00:00 2001 From: John Wilkie Date: Fri, 11 Oct 2024 12:54:01 +0100 Subject: [PATCH] Initial commit --- e2e_tests/cli/test_import.py | 111 ++++++++++++- .../data/import/coco_annotations/output.json | 157 ++++++++++++++++++ e2e_tests/helpers.py | 5 +- 3 files changed, 270 insertions(+), 3 deletions(-) create mode 100644 e2e_tests/data/import/coco_annotations/output.json diff --git a/e2e_tests/cli/test_import.py b/e2e_tests/cli/test_import.py index afd6fa082..6669c875b 100644 --- a/e2e_tests/cli/test_import.py +++ b/e2e_tests/cli/test_import.py @@ -16,6 +16,21 @@ from typing import List, Dict, Optional, Union +def compare_local_annotations_with_uploaded_annotations( + annotations_subdir: str, + annotation_format: str, + local_dataset: E2EDataset, + config_values: ConfigValues, +) -> None: + # This will need to: + # Parse local annotations + # Get the state of remote annotations from `/annotations` + # Assert that everything we're expecting is there + # I think this can be done most straightforwardly by parsing each location annotation + # file into a common format (dt.Annotation / dt.VideoAnnotation) and making baseic asserts against the API + pass + + def get_actual_annotation_filename( expected_filename: str, actual_annotation_files: Dict[str, str] ) -> str: @@ -181,6 +196,14 @@ def compare_annotations_export( and not any(file.name.startswith(prefix) for prefix in file_prefixes_to_ignore) } for expected_filename in expected_annotation_files: + # This part need to change to handle multiple formats, perhaps another function which takes the annotation format? + # Need to test with some sample data (Probably COCO as it's the simplest case) + # Or, does it need to change? After parsing the COCO annotations, maybe they're in a unified format? + # If so, I think we only need to change the code responsible for removing UUIDs + + # Yes, I think it will have to change. We'll should put this into a new function + # This will take the format & import the relevant importer, then parse failes with `parse_path` + # After parsing, we should have List[AnnotationFile] which should mean very little else has to change, save the UUID removal! actual_filename = get_actual_annotation_filename( expected_filename, actual_annotation_files ) @@ -222,6 +245,8 @@ def run_import_test( config_values: ConfigValues, item_type: str, annotations_subdir: str, + annotation_format: Optional[str] = "darwin", + export_only: Optional[bool] = False, item_name: Optional[str] = None, additional_flags: str = "", exit_code: int = 0, @@ -236,7 +261,7 @@ def run_import_test( Path(__file__).parents[1] / "data" / "import" / annotations_subdir ) result = run_cli_command( - f"darwin dataset import {local_dataset.name} darwin {expected_annotations_dir} {additional_flags}" + f"darwin dataset import {local_dataset.name} {annotation_format} {expected_annotations_dir} {additional_flags}" ) assert_cli(result, exit_code) @@ -247,6 +272,12 @@ def run_import_test( assert expect_error in result.stdout return + if export_only: + compare_local_annotations_with_uploaded_annotations( + annotations_subdir, annotation_format, local_dataset, config_values # type: ignore + ) + return + base_slot = ( get_base_slot_name_of_item(config_values, local_dataset.id, item_name) if item_name @@ -255,7 +286,10 @@ def run_import_test( with tempfile.TemporaryDirectory() as tmp_dir_str: actual_annotations_dir = Path(tmp_dir_str) export_and_download_annotations( - actual_annotations_dir, local_dataset, config_values + actual_annotations_dir, + annotation_format, # type: ignore + local_dataset, + config_values, ) compare_annotations_export( actual_annotations_dir, expected_annotations_dir, item_type, base_slot @@ -465,3 +499,76 @@ def test_import_annotations_with_subtypes_to_videos( item_type="single_slotted_video", annotations_subdir="video_annotations_with_subtypes", ) + + +def test_importing_coco_annotations( + local_dataset: E2EDataset, config_values: ConfigValues +) -> None: + annotation_format = "coco" + run_import_test( + local_dataset, + config_values, + item_type="single_slotted", + annotations_subdir="coco_annotations", + annotation_format=annotation_format, + ) + + +def test_importing_csv_tags_annotations( + local_dataset: E2EDataset, config_values: ConfigValues +) -> None: + annotation_format = "csv_tags" + run_import_test( + local_dataset, + config_values, + item_type="single_slotted", + annotations_subdir="csv_tag_annotations", + annotation_format=annotation_format, + export_only=True, + ) + + +def test_importing_csv_tags_video_annotations( + local_dataset: E2EDataset, config_values: ConfigValues +) -> None: + annotation_format = "csv_tags_video" + run_import_test( + local_dataset, + config_values, + item_type="single_slotted_video", + annotations_subdir="csv_tag_video_annotations", + annotation_format=annotation_format, + export_only=True, + ) + + +def test_importing_nifti_annotations( + local_dataset: E2EDataset, config_values: ConfigValues +) -> None: + # This test is tricky to write. We ideally want to test importing NifTI annotations to a NifTI file, but: + # I'm fairly sure we can't regsiter a NifTI file in read-only from external store, so: + # Can we just import to a file that we can register from external storage? Or: + # Can we upload a NifTI file just for this test? + # More investigation needed to finish + annotation_format = "nifti" + run_import_test( + local_dataset, + config_values, + item_type="nifti", + annotations_subdir="nifti_annotations", + annotation_format=annotation_format, + export_only=True, + ) + + +def test_importing_pascal_voc_annotations( + local_dataset: E2EDataset, config_values: ConfigValues +) -> None: + annotation_format = "pascal_voc" + run_import_test( + local_dataset, + config_values, + item_type="single_slotted", + annotations_subdir="pascal_voc_annotations", + annotation_format=annotation_format, + ) diff --git a/e2e_tests/data/import/coco_annotations/output.json b/e2e_tests/data/import/coco_annotations/output.json new file mode 100644 index 000000000..10268ac65 --- /dev/null +++ b/e2e_tests/data/import/coco_annotations/output.json @@ -0,0 +1,157 @@ +{ + "info": { + "description": "Exported from Darwin", + "url": "n/a", + "version": "n/a", + "year": 2024, + "contributor": "n/a", + "date_created": "2024/10/11" + }, + "licenses": [ + { + "url": "n/a", + "id": 0, + "name": "placeholder license" + } + ], + "images": [ + { + "license": 0, + "file_name": "image_1", + "coco_url": "n/a", + "height": 1080, + "width": 1920, + "date_captured": "", + "flickr_url": "n/a", + "darwin_url": "https://staging.v7labs.com/api/v2/teams/e2e-testing/uploads/1d012d3b-c330-4485-a2a1-14c55d4747ef", + "darwin_workview_url": "https://staging.v7labs.com/workview?dataset=387354&item=01927b33-5a68-74a7-9a46-9fa29a8bffcc", + "id": 3480380172, + "tag_ids": [] + }, + { + "license": 0, + "file_name": "image_2", + "coco_url": "n/a", + "height": 1080, + "width": 1920, + "date_captured": "", + "flickr_url": "n/a", + "darwin_url": "https://staging.v7labs.com/api/v2/teams/e2e-testing/uploads/404a1db2-3dd1-4266-8cf3-6c8fcb0af015", + "darwin_workview_url": "https://staging.v7labs.com/workview?dataset=387354&item=01927b33-5a68-cc62-7ec4-45692d9f06ea", + "id": 1450914486, + "tag_ids": [] + } + ], + "annotations": [ + { + "id": 1, + "image_id": 3480380172, + "category_id": 3638723632, + "segmentation": [ + [ + 6.8628, + 6.4715, + 5.6848, + 11.012, + 5.3166, + 11.5274, + 9.4153, + 12.3128, + 9.5626, + 11.7728 + ] + ], + "area": 11.555552699999993, + "bbox": [ + 5.3166, + 6.4715, + 4.2459999999999996, + 5.8412999999999995 + ], + "iscrowd": 0, + "extra": {} + }, + { + "id": 2, + "image_id": 3480380172, + "category_id": 3638723632, + "segmentation": [ + [ + 17.4164, + 13.0491, + 9.0963, + 19.553, + 21.2697, + 21.6882, + 19.9198, + 12.6073 + ] + ], + "area": 60.13458233000006, + "bbox": [ + 9.0963, + 12.6073, + 12.1734, + 9.080899999999998 + ], + "iscrowd": 0, + "extra": {} + }, + { + "id": 3, + "image_id": 1450914486, + "category_id": 3638723632, + "segmentation": [ + [ + 6.1866, + 5.8174, + 3.858, + 16.2317, + 15.1131, + 16.6845 + ] + ], + "area": 59.134189004999996, + "bbox": [ + 3.858, + 5.8174, + 11.255099999999999, + 10.8671 + ], + "iscrowd": 0, + "extra": {} + }, + { + "id": 4, + "image_id": 1450914486, + "category_id": 3638723632, + "segmentation": [ + [ + 18.6061, + 5.6234, + 16.9243, + 9.3751, + 21.3229, + 8.7282 + ] + ], + "area": 7.707135599999987, + "bbox": [ + 16.9243, + 5.6234, + 4.398600000000002, + 3.7516999999999996 + ], + "iscrowd": 0, + "extra": {} + } + ], + "categories": [ + { + "id": 3638723632, + "name": "test_polygon_basic", + "supercategory": "root" + } + ], + "tag_categories": [] +} \ No newline at end of file diff --git a/e2e_tests/helpers.py b/e2e_tests/helpers.py index 16c5a375b..5c3fc652d 100644 --- a/e2e_tests/helpers.py +++ b/e2e_tests/helpers.py @@ -195,6 +195,7 @@ def wait_until_items_processed( def export_and_download_annotations( actual_annotations_dir: Path, + annotation_format: str, local_dataset: E2EDataset, config_values: ConfigValues, ) -> None: @@ -211,11 +212,13 @@ def export_and_download_annotations( create_export_url = ( f"{base_url}/api/v2/teams/{team_slug}/datasets/{dataset_slug}/exports" ) - + if annotation_format == "darwin": + annotation_format = "darwin_json_2" # Necessary because this is the only format where `annotation_format` does not match the required payload value payload = { "filters": {"statuses": ["new", "annotate", "review", "complete"]}, "include_authorship": False, "include_export_token": False, + "format": f"{annotation_format}", "name": f"{export_name}", } headers = {