From 2f92fd70acf49d19acc86e34f31a6b81bd0c3a35 Mon Sep 17 00:00:00 2001 From: John Wilkie Date: Wed, 23 Oct 2024 18:18:30 +0100 Subject: [PATCH] Test for importing non-Darwin JSON formats --- e2e_tests/cli/test_import.py | 172 ++++-- .../data/import/coco_annotations/output.json | 518 ++++++++++++++++-- .../import/csv_tag_annotations/csv_tags.csv | 8 + .../csv_tags_video.csv | 2 + .../import/pascal_voc_annotations/image_1.xml | 1 + .../import/pascal_voc_annotations/image_2.xml | 1 + .../import/pascal_voc_annotations/image_3.xml | 1 + .../import/pascal_voc_annotations/image_4.xml | 1 + .../import/pascal_voc_annotations/image_5.xml | 1 + .../import/pascal_voc_annotations/image_6.xml | 1 + .../import/pascal_voc_annotations/image_7.xml | 1 + .../import/pascal_voc_annotations/image_8.xml | 1 + e2e_tests/helpers.py | 10 +- e2e_tests/objects.py | 38 +- 14 files changed, 626 insertions(+), 130 deletions(-) create mode 100644 e2e_tests/data/import/csv_tag_annotations/csv_tags.csv create mode 100644 e2e_tests/data/import/csv_tag_video_annotations/csv_tags_video.csv create mode 100644 e2e_tests/data/import/pascal_voc_annotations/image_1.xml create mode 100644 e2e_tests/data/import/pascal_voc_annotations/image_2.xml create mode 100644 e2e_tests/data/import/pascal_voc_annotations/image_3.xml create mode 100644 e2e_tests/data/import/pascal_voc_annotations/image_4.xml create mode 100644 e2e_tests/data/import/pascal_voc_annotations/image_5.xml create mode 100644 e2e_tests/data/import/pascal_voc_annotations/image_6.xml create mode 100644 e2e_tests/data/import/pascal_voc_annotations/image_7.xml create mode 100644 e2e_tests/data/import/pascal_voc_annotations/image_8.xml diff --git a/e2e_tests/cli/test_import.py b/e2e_tests/cli/test_import.py index 6669c875b..d07de0187 100644 --- a/e2e_tests/cli/test_import.py +++ b/e2e_tests/cli/test_import.py @@ -9,26 +9,44 @@ list_items, ) from e2e_tests.objects import E2EDataset, ConfigValues -from darwin.utils.utils import parse_darwin_json import tempfile import zipfile import darwin.datatypes as dt -from typing import List, Dict, Optional, Union +import importlib +from typing import List, Dict, Optional, Union, Tuple, Any, Sequence def compare_local_annotations_with_uploaded_annotations( - annotations_subdir: str, annotation_format: str, local_dataset: E2EDataset, config_values: ConfigValues, ) -> None: - # This will need to: - # Parse local annotations - # Get the state of remote annotations from `/annotations` - # Assert that everything we're expecting is there - # I think this can be done most straightforwardly by parsing each location annotation - # file into a common format (dt.Annotation / dt.VideoAnnotation) and making baseic asserts against the API - pass + """ + Checks that every annotation uploaded to every item of the given `local_dataset` is + of the expected type given the annotation format + + This is necessary to verify that imports of formats that cannot be exported are successful + """ + expected_annotation_types = { + "csv_tags": "tag", + "csv_tags_video": "tag", + } + video_formats = ["csv_tags_video"] + expected_annotation_type = expected_annotation_types[annotation_format] + all_item_annotations, _, _ = local_dataset.get_annotation_data(config_values) + for item in local_dataset.items: + item_name = item.name + item_annotations = all_item_annotations[item_name] + for item_annotation in item_annotations: + if annotation_format in video_formats: + frame_indices = item_annotation["data"]["frames"].keys() + for frame_index in frame_indices: + assert ( + expected_annotation_type + in item_annotation["data"]["frames"][frame_index] + ) + else: + assert expected_annotation_type in item_annotation["data"] def get_actual_annotation_filename( @@ -51,8 +69,8 @@ def get_actual_annotation_filename( def find_matching_actual_annotation( - expected_annotation: dt.Annotation, - actual_annotations: List[Union[dt.Annotation, dt.VideoAnnotation]], + expected_annotation: Union[dt.Annotation, dt.VideoAnnotation], + actual_annotations: Sequence[Union[dt.Annotation, dt.VideoAnnotation]], ) -> Union[dt.Annotation, dt.VideoAnnotation]: """ For a given expected annotation, finds the corresponding actual annotation @@ -97,14 +115,19 @@ def assert_same_annotation_data( For `dt.VideoAnnotation` objects: Ensures that `expected_annotation.frames` is equivalent to `actual_annotation.frames` """ - if isinstance(expected_annotation, dt.Annotation): + if isinstance(expected_annotation, dt.Annotation) and isinstance( + actual_annotation, dt.Annotation + ): assert expected_annotation.data == actual_annotation.data - elif isinstance(expected_annotation, dt.VideoAnnotation): + elif isinstance(expected_annotation, dt.VideoAnnotation) and isinstance( + actual_annotation, dt.VideoAnnotation + ): assert expected_annotation.frames == actual_annotation.frames def assert_same_annotation_properties( - expected_annotation: dt.Annotation, actual_annotation: dt.Annotation + expected_annotation: Union[dt.Annotation, dt.VideoAnnotation], + actual_annotation: Union[dt.Annotation, dt.VideoAnnotation], ) -> None: """ Ensures that `expected_annotation.properties` is equivalent to `actual_annotation.properties` @@ -114,7 +137,7 @@ def assert_same_annotation_properties( actual_properties = actual_annotation.properties assert actual_properties is not None for expected_property in expected_properties: - assert expected_property in actual_properties # type : ignore + assert expected_property in actual_properties def get_base_slot_name_of_item( @@ -135,9 +158,37 @@ def get_base_slot_name_of_item( return item["slots"][0]["slot_name"] +def parse_expected_and_actual_annotations( + expected_annotation_files, + actual_annotation_files, + expected_filename: str = "", + actual_filename: str = "", + annotation_format: str = "", +) -> Tuple[List[dt.AnnotationFile], List[dt.AnnotationFile]]: + """ + Parses and returns exported & actual annotation files in a given format. + """ + importer_module = importlib.import_module( + f"darwin.importer.formats.{annotation_format}" + ) + expected_annotation_data = importer_module.parse_path( + Path(expected_annotation_files[expected_filename]) + ) + actual_annotation_data = importer_module.parse_path( + Path(actual_annotation_files[actual_filename]) + ) + + if not isinstance(expected_annotation_data, list): + expected_annotation_data = [expected_annotation_data] + if not isinstance(actual_annotation_data, list): + actual_annotation_data = [actual_annotation_data] + + return expected_annotation_data, actual_annotation_data + + def assert_same_annotation_slot_name( - expected_annotation: dt.Annotation, - actual_annotation: dt.Annotation, + expected_annotation: Union[dt.Annotation, dt.VideoAnnotation], + actual_annotation: Union[dt.Annotation, dt.VideoAnnotation], item_type: str, base_slot: Optional[str], ) -> None: @@ -159,8 +210,8 @@ def assert_same_annotation_slot_name( def assert_same_item_level_properties( - expected_item_level_properties: List[Dict[str, str]], - actual_item_level_properties: List[Dict[str, str]], + expected_item_level_properties: List[Dict[str, Any]], + actual_item_level_properties: List[Dict[str, Any]], ) -> None: """ Ensures that all expected item-level properties are present in exported item-level properties @@ -174,6 +225,7 @@ def compare_annotations_export( expected_annotations_dir: Path, item_type: str, base_slot: Optional[str] = "0", + annotation_format: str = "darwin", ): """ Compares a set of downloaded annotation files with the imported files that resulted @@ -196,48 +248,44 @@ def compare_annotations_export( and not any(file.name.startswith(prefix) for prefix in file_prefixes_to_ignore) } for expected_filename in expected_annotation_files: - # This part need to change to handle multiple formats, perhaps another function which takes the annotation format? - # Need to test with some sample data (Probably COCO as it's the simplest case) - # Or, does it need to change? After parsing the COCO annotations, maybe they're in a unified format? - # If so, I think we only need to change the code responsible for removing UUIDs - - # Yes, I think it will have to change. We'll should put this into a new function - # This will take the format & import the relevant importer, then parse failes with `parse_path` - # After parsing, we should have List[AnnotationFile] which should mean very little else has to change, save the UUID removal! actual_filename = get_actual_annotation_filename( expected_filename, actual_annotation_files ) - expected_annotation_data: List[dt.Annotation] = parse_darwin_json( - Path(expected_annotation_files[expected_filename]) # type: ignore - ) - expected_annotations = expected_annotation_data.annotations # type: ignore - expected_item_level_properties = ( - expected_annotation_data.item_properties # type: ignore - ) - - actual_annotation_data: List[dt.Annotation] = parse_darwin_json( - Path(actual_annotation_files[actual_filename]) # type: ignore - ) - actual_annotations = actual_annotation_data.annotations # type: ignore - actual_item_level_properties = ( - actual_annotation_data.item_properties # type: ignore + expected_annotation_data, actual_annotation_data = ( + parse_expected_and_actual_annotations( + expected_annotation_files, + actual_annotation_files, + expected_filename, + actual_filename, + annotation_format, + ) ) + for idx, expected_annotation_file in enumerate(expected_annotation_data): + actual_annotation_file = actual_annotation_data[idx] + expected_annotations = expected_annotation_file.annotations + actual_annotations = actual_annotation_file.annotations + expected_item_level_properties = ( + expected_annotation_file.item_properties or [] + ) + actual_item_level_properties = actual_annotation_file.item_properties or [] - delete_annotation_uuids(expected_annotations) - delete_annotation_uuids(actual_annotations) + delete_annotation_uuids(expected_annotations) + delete_annotation_uuids(actual_annotations) - assert_same_item_level_properties( - expected_item_level_properties, actual_item_level_properties - ) - for expected_annotation in expected_annotations: - actual_annotation = find_matching_actual_annotation( - expected_annotation, actual_annotations - ) - assert_same_annotation_data(expected_annotation, actual_annotation) - assert_same_annotation_properties(expected_annotation, actual_annotation) - assert_same_annotation_slot_name( - expected_annotation, actual_annotation, item_type, base_slot + assert_same_item_level_properties( + expected_item_level_properties, actual_item_level_properties ) + for expected_annotation in expected_annotations: + actual_annotation = find_matching_actual_annotation( + expected_annotation, actual_annotations + ) + assert_same_annotation_data(expected_annotation, actual_annotation) + assert_same_annotation_properties( + expected_annotation, actual_annotation + ) + assert_same_annotation_slot_name( + expected_annotation, actual_annotation, item_type, base_slot + ) def run_import_test( @@ -246,6 +294,7 @@ def run_import_test( item_type: str, annotations_subdir: str, annotation_format: Optional[str] = "darwin", + files_in_flat_structure: bool = False, export_only: Optional[bool] = False, item_name: Optional[str] = None, additional_flags: str = "", @@ -256,7 +305,9 @@ def run_import_test( """ Helper function to run import tests for different item types and annotation configurations. """ - local_dataset.register_read_only_items(config_values, item_type) + local_dataset.register_read_only_items( + config_values, item_type, files_in_flat_structure + ) expected_annotations_dir = ( Path(__file__).parents[1] / "data" / "import" / annotations_subdir ) @@ -274,7 +325,7 @@ def run_import_test( if export_only: compare_local_annotations_with_uploaded_annotations( - annotations_subdir, annotation_format, local_dataset, config_values # type: ignore + annotation_format, local_dataset, config_values # type: ignore ) return @@ -292,7 +343,11 @@ def run_import_test( config_values, ) compare_annotations_export( - actual_annotations_dir, expected_annotations_dir, item_type, base_slot + actual_annotations_dir, + expected_annotations_dir, + item_type, + base_slot, + annotation_format, # type: ignore ) @@ -511,6 +566,7 @@ def test_importing_coco_annotations( item_type="single_slotted", annotations_subdir="coco_annotations", annotation_format=annotation_format, + files_in_flat_structure=True, ) diff --git a/e2e_tests/data/import/coco_annotations/output.json b/e2e_tests/data/import/coco_annotations/output.json index 10268ac65..5cf0f25f2 100644 --- a/e2e_tests/data/import/coco_annotations/output.json +++ b/e2e_tests/data/import/coco_annotations/output.json @@ -5,7 +5,7 @@ "version": "n/a", "year": 2024, "contributor": "n/a", - "date_created": "2024/10/11" + "date_created": "2024/10/23" }, "licenses": [ { @@ -23,8 +23,8 @@ "width": 1920, "date_captured": "", "flickr_url": "n/a", - "darwin_url": "https://staging.v7labs.com/api/v2/teams/e2e-testing/uploads/1d012d3b-c330-4485-a2a1-14c55d4747ef", - "darwin_workview_url": "https://staging.v7labs.com/workview?dataset=387354&item=01927b33-5a68-74a7-9a46-9fa29a8bffcc", + "darwin_url": "https://staging.v7labs.com/api/v2/teams/e2e-testing/uploads/f21b93c7-c68a-4961-9ad6-493be0e3c7b1", + "darwin_workview_url": "https://staging.v7labs.com/workview?dataset=420078&item=0192b9e8-0494-7be0-38a1-0fbaedf9dba1", "id": 3480380172, "tag_ids": [] }, @@ -36,10 +36,88 @@ "width": 1920, "date_captured": "", "flickr_url": "n/a", - "darwin_url": "https://staging.v7labs.com/api/v2/teams/e2e-testing/uploads/404a1db2-3dd1-4266-8cf3-6c8fcb0af015", - "darwin_workview_url": "https://staging.v7labs.com/workview?dataset=387354&item=01927b33-5a68-cc62-7ec4-45692d9f06ea", + "darwin_url": "https://staging.v7labs.com/api/v2/teams/e2e-testing/uploads/0a88d821-f63a-4ec7-85c2-2146d1d62b8c", + "darwin_workview_url": "https://staging.v7labs.com/workview?dataset=420078&item=0192b9e8-0494-d733-8ad3-090078a45bc1", "id": 1450914486, "tag_ids": [] + }, + { + "license": 0, + "file_name": "image_3", + "coco_url": "n/a", + "height": 1080, + "width": 1920, + "date_captured": "", + "flickr_url": "n/a", + "darwin_url": "https://staging.v7labs.com/api/v2/teams/e2e-testing/uploads/8bfeb6ee-c25a-440d-a213-990165254fce", + "darwin_workview_url": "https://staging.v7labs.com/workview?dataset=420078&item=0192b9e8-0494-9324-dd72-9cba5956d5f8", + "id": 561775136, + "tag_ids": [] + }, + { + "license": 0, + "file_name": "image_4", + "coco_url": "n/a", + "height": 1080, + "width": 1920, + "date_captured": "", + "flickr_url": "n/a", + "darwin_url": "https://staging.v7labs.com/api/v2/teams/e2e-testing/uploads/18fb1d7d-fcc8-4b65-9747-a1abad190381", + "darwin_workview_url": "https://staging.v7labs.com/workview?dataset=420078&item=0192b9e8-0494-d66a-f214-7879ea2930f3", + "id": 3206059907, + "tag_ids": [] + }, + { + "license": 0, + "file_name": "image_5", + "coco_url": "n/a", + "height": 1080, + "width": 1920, + "date_captured": "", + "flickr_url": "n/a", + "darwin_url": "https://staging.v7labs.com/api/v2/teams/e2e-testing/uploads/75da3414-edc4-481c-9898-f9872698274b", + "darwin_workview_url": "https://staging.v7labs.com/workview?dataset=420078&item=0192b9e8-0494-a4c1-60af-b780542b2339", + "id": 3357517589, + "tag_ids": [] + }, + { + "license": 0, + "file_name": "image_6", + "coco_url": "n/a", + "height": 1080, + "width": 1920, + "date_captured": "", + "flickr_url": "n/a", + "darwin_url": "https://staging.v7labs.com/api/v2/teams/e2e-testing/uploads/034e9641-f908-4b76-90c9-0e6830e0abea", + "darwin_workview_url": "https://staging.v7labs.com/workview?dataset=420078&item=0192b9e8-0494-2020-8451-9e47f8774560", + "id": 1360459439, + "tag_ids": [] + }, + { + "license": 0, + "file_name": "image_7", + "coco_url": "n/a", + "height": 1080, + "width": 1920, + "date_captured": "", + "flickr_url": "n/a", + "darwin_url": "https://staging.v7labs.com/api/v2/teams/e2e-testing/uploads/3944dcaf-d1f4-4b3e-af11-95deb0ad0ea7", + "darwin_workview_url": "https://staging.v7labs.com/workview?dataset=420078&item=0192b9e8-0494-6ce6-03de-17abe1274053", + "id": 638699065, + "tag_ids": [] + }, + { + "license": 0, + "file_name": "image_8", + "coco_url": "n/a", + "height": 1080, + "width": 1920, + "date_captured": "", + "flickr_url": "n/a", + "darwin_url": "https://staging.v7labs.com/api/v2/teams/e2e-testing/uploads/5db2daaf-dc02-4098-8dbb-513022e64a14", + "darwin_workview_url": "https://staging.v7labs.com/workview?dataset=420078&item=0192b9e8-0494-e424-5ec8-2e271562c750", + "id": 3064912808, + "tag_ids": [] } ], "annotations": [ @@ -49,24 +127,24 @@ "category_id": 3638723632, "segmentation": [ [ - 6.8628, - 6.4715, - 5.6848, - 11.012, - 5.3166, - 11.5274, - 9.4153, - 12.3128, - 9.5626, - 11.7728 + 6.7217, + 3.6824, + 3.3607, + 17.3754, + 11.0785, + 18.2468, + 11.452, + 17.3754, + 11.7009, + 16.0061 ] ], - "area": 11.555552699999993, + "area": 63.865158604999976, "bbox": [ - 5.3166, - 6.4715, - 4.2459999999999996, - 5.8412999999999995 + 3.3607, + 3.6824, + 8.340200000000001, + 14.564400000000001 ], "iscrowd": 0, "extra": {} @@ -77,22 +155,22 @@ "category_id": 3638723632, "segmentation": [ [ - 17.4164, - 13.0491, - 9.0963, - 19.553, - 21.2697, - 21.6882, - 19.9198, - 12.6073 + 18.7964, + 3.558, + 15.0619, + 20.4875, + 22.9043, + 21.8568, + 23.0288, + 21.1099 ] ], - "area": 60.13458233000006, + "area": 71.61397638000005, "bbox": [ - 9.0963, - 12.6073, - 12.1734, - 9.080899999999998 + 15.0619, + 3.558, + 7.966900000000001, + 18.2988 ], "iscrowd": 0, "extra": {} @@ -103,20 +181,24 @@ "category_id": 3638723632, "segmentation": [ [ - 6.1866, - 5.8174, - 3.858, - 16.2317, - 15.1131, - 16.6845 + 6.7217, + 3.6824, + 3.3607, + 17.3754, + 11.0785, + 18.2468, + 11.452, + 17.3754, + 11.7009, + 16.0061 ] ], - "area": 59.134189004999996, + "area": 63.865158604999976, "bbox": [ - 3.858, - 5.8174, - 11.255099999999999, - 10.8671 + 3.3607, + 3.6824, + 8.340200000000001, + 14.564400000000001 ], "iscrowd": 0, "extra": {} @@ -127,20 +209,346 @@ "category_id": 3638723632, "segmentation": [ [ - 18.6061, - 5.6234, - 16.9243, - 9.3751, - 21.3229, - 8.7282 + 18.7964, + 3.558, + 15.0619, + 20.4875, + 22.9043, + 21.8568, + 23.0288, + 21.1099 + ] + ], + "area": 71.61397638000005, + "bbox": [ + 15.0619, + 3.558, + 7.966900000000001, + 18.2988 + ], + "iscrowd": 0, + "extra": {} + }, + { + "id": 5, + "image_id": 561775136, + "category_id": 3638723632, + "segmentation": [ + [ + 6.7217, + 3.6824, + 3.3607, + 17.3754, + 11.0785, + 18.2468, + 11.452, + 17.3754, + 11.7009, + 16.0061 + ] + ], + "area": 63.865158604999976, + "bbox": [ + 3.3607, + 3.6824, + 8.340200000000001, + 14.564400000000001 + ], + "iscrowd": 0, + "extra": {} + }, + { + "id": 6, + "image_id": 561775136, + "category_id": 3638723632, + "segmentation": [ + [ + 18.7964, + 3.558, + 15.0619, + 20.4875, + 22.9043, + 21.8568, + 23.0288, + 21.1099 + ] + ], + "area": 71.61397638000005, + "bbox": [ + 15.0619, + 3.558, + 7.966900000000001, + 18.2988 + ], + "iscrowd": 0, + "extra": {} + }, + { + "id": 7, + "image_id": 3206059907, + "category_id": 3638723632, + "segmentation": [ + [ + 6.7217, + 3.6824, + 3.3607, + 17.3754, + 11.0785, + 18.2468, + 11.452, + 17.3754, + 11.7009, + 16.0061 + ] + ], + "area": 63.865158604999976, + "bbox": [ + 3.3607, + 3.6824, + 8.340200000000001, + 14.564400000000001 + ], + "iscrowd": 0, + "extra": {} + }, + { + "id": 8, + "image_id": 3206059907, + "category_id": 3638723632, + "segmentation": [ + [ + 18.7964, + 3.558, + 15.0619, + 20.4875, + 22.9043, + 21.8568, + 23.0288, + 21.1099 + ] + ], + "area": 71.61397638000005, + "bbox": [ + 15.0619, + 3.558, + 7.966900000000001, + 18.2988 + ], + "iscrowd": 0, + "extra": {} + }, + { + "id": 9, + "image_id": 3357517589, + "category_id": 3638723632, + "segmentation": [ + [ + 6.7217, + 3.6824, + 3.3607, + 17.3754, + 11.0785, + 18.2468, + 11.452, + 17.3754, + 11.7009, + 16.0061 + ] + ], + "area": 63.865158604999976, + "bbox": [ + 3.3607, + 3.6824, + 8.340200000000001, + 14.564400000000001 + ], + "iscrowd": 0, + "extra": {} + }, + { + "id": 10, + "image_id": 3357517589, + "category_id": 3638723632, + "segmentation": [ + [ + 18.7964, + 3.558, + 15.0619, + 20.4875, + 22.9043, + 21.8568, + 23.0288, + 21.1099 + ] + ], + "area": 71.61397638000005, + "bbox": [ + 15.0619, + 3.558, + 7.966900000000001, + 18.2988 + ], + "iscrowd": 0, + "extra": {} + }, + { + "id": 11, + "image_id": 1360459439, + "category_id": 3638723632, + "segmentation": [ + [ + 6.7217, + 3.6824, + 3.3607, + 17.3754, + 11.0785, + 18.2468, + 11.452, + 17.3754, + 11.7009, + 16.0061 + ] + ], + "area": 63.865158604999976, + "bbox": [ + 3.3607, + 3.6824, + 8.340200000000001, + 14.564400000000001 + ], + "iscrowd": 0, + "extra": {} + }, + { + "id": 12, + "image_id": 1360459439, + "category_id": 3638723632, + "segmentation": [ + [ + 18.7964, + 3.558, + 15.0619, + 20.4875, + 22.9043, + 21.8568, + 23.0288, + 21.1099 + ] + ], + "area": 71.61397638000005, + "bbox": [ + 15.0619, + 3.558, + 7.966900000000001, + 18.2988 + ], + "iscrowd": 0, + "extra": {} + }, + { + "id": 13, + "image_id": 638699065, + "category_id": 3638723632, + "segmentation": [ + [ + 6.7217, + 3.6824, + 3.3607, + 17.3754, + 11.0785, + 18.2468, + 11.452, + 17.3754, + 11.7009, + 16.0061 + ] + ], + "area": 63.865158604999976, + "bbox": [ + 3.3607, + 3.6824, + 8.340200000000001, + 14.564400000000001 + ], + "iscrowd": 0, + "extra": {} + }, + { + "id": 14, + "image_id": 638699065, + "category_id": 3638723632, + "segmentation": [ + [ + 18.7964, + 3.558, + 15.0619, + 20.4875, + 22.9043, + 21.8568, + 23.0288, + 21.1099 + ] + ], + "area": 71.61397638000005, + "bbox": [ + 15.0619, + 3.558, + 7.966900000000001, + 18.2988 + ], + "iscrowd": 0, + "extra": {} + }, + { + "id": 15, + "image_id": 3064912808, + "category_id": 3638723632, + "segmentation": [ + [ + 6.7217, + 3.6824, + 3.3607, + 17.3754, + 11.0785, + 18.2468, + 11.452, + 17.3754, + 11.7009, + 16.0061 + ] + ], + "area": 63.865158604999976, + "bbox": [ + 3.3607, + 3.6824, + 8.340200000000001, + 14.564400000000001 + ], + "iscrowd": 0, + "extra": {} + }, + { + "id": 16, + "image_id": 3064912808, + "category_id": 3638723632, + "segmentation": [ + [ + 18.7964, + 3.558, + 15.0619, + 20.4875, + 22.9043, + 21.8568, + 23.0288, + 21.1099 ] ], - "area": 7.707135599999987, + "area": 71.61397638000005, "bbox": [ - 16.9243, - 5.6234, - 4.398600000000002, - 3.7516999999999996 + 15.0619, + 3.558, + 7.966900000000001, + 18.2988 ], "iscrowd": 0, "extra": {} diff --git a/e2e_tests/data/import/csv_tag_annotations/csv_tags.csv b/e2e_tests/data/import/csv_tag_annotations/csv_tags.csv new file mode 100644 index 000000000..839b66863 --- /dev/null +++ b/e2e_tests/data/import/csv_tag_annotations/csv_tags.csv @@ -0,0 +1,8 @@ +image_1, test_tag_basic +image_2, test_tag_basic +dir1/image_3, test_tag_basic +dir1/image_4, test_tag_basic +dir2/image_5, test_tag_basic +dir2/image_6, test_tag_basic +dir1/dir3/image_7, test_tag_basic +dir1/dir3/image_8, test_tag_basic \ No newline at end of file diff --git a/e2e_tests/data/import/csv_tag_video_annotations/csv_tags_video.csv b/e2e_tests/data/import/csv_tag_video_annotations/csv_tags_video.csv new file mode 100644 index 000000000..18922782d --- /dev/null +++ b/e2e_tests/data/import/csv_tag_video_annotations/csv_tags_video.csv @@ -0,0 +1,2 @@ +mini_uct.mp4, test_tag_basic, 0, 3 +mini_uct.mp4, test_tag_basic, 4, 8 diff --git a/e2e_tests/data/import/pascal_voc_annotations/image_1.xml b/e2e_tests/data/import/pascal_voc_annotations/image_1.xml new file mode 100644 index 000000000..4e19ec558 --- /dev/null +++ b/e2e_tests/data/import/pascal_voc_annotations/image_1.xml @@ -0,0 +1 @@ +imagesimage_1images/image_1darwin1920108030test_bounding_box_basicUnspecified00681512test_bounding_box_basicUnspecified0016182320 \ No newline at end of file diff --git a/e2e_tests/data/import/pascal_voc_annotations/image_2.xml b/e2e_tests/data/import/pascal_voc_annotations/image_2.xml new file mode 100644 index 000000000..201c58e62 --- /dev/null +++ b/e2e_tests/data/import/pascal_voc_annotations/image_2.xml @@ -0,0 +1 @@ +imagesimage_2images/image_2darwin1920108030test_bounding_box_basicUnspecified00681512test_bounding_box_basicUnspecified0016182320 \ No newline at end of file diff --git a/e2e_tests/data/import/pascal_voc_annotations/image_3.xml b/e2e_tests/data/import/pascal_voc_annotations/image_3.xml new file mode 100644 index 000000000..9d1d121af --- /dev/null +++ b/e2e_tests/data/import/pascal_voc_annotations/image_3.xml @@ -0,0 +1 @@ +imagesdir1/image_3images/image_3darwin1920108030test_bounding_box_basicUnspecified00681512test_bounding_box_basicUnspecified0016182320 \ No newline at end of file diff --git a/e2e_tests/data/import/pascal_voc_annotations/image_4.xml b/e2e_tests/data/import/pascal_voc_annotations/image_4.xml new file mode 100644 index 000000000..2dc5225cb --- /dev/null +++ b/e2e_tests/data/import/pascal_voc_annotations/image_4.xml @@ -0,0 +1 @@ +imagesdir1/image_4images/image_4darwin1920108030test_bounding_box_basicUnspecified00681512test_bounding_box_basicUnspecified0016182320 \ No newline at end of file diff --git a/e2e_tests/data/import/pascal_voc_annotations/image_5.xml b/e2e_tests/data/import/pascal_voc_annotations/image_5.xml new file mode 100644 index 000000000..028ff26e6 --- /dev/null +++ b/e2e_tests/data/import/pascal_voc_annotations/image_5.xml @@ -0,0 +1 @@ +imagesdir2/image_5images/image_5darwin1920108030test_bounding_box_basicUnspecified00681512test_bounding_box_basicUnspecified0016182320 \ No newline at end of file diff --git a/e2e_tests/data/import/pascal_voc_annotations/image_6.xml b/e2e_tests/data/import/pascal_voc_annotations/image_6.xml new file mode 100644 index 000000000..b8750999e --- /dev/null +++ b/e2e_tests/data/import/pascal_voc_annotations/image_6.xml @@ -0,0 +1 @@ +imagesdir2/image_6images/image_6darwin1920108030test_bounding_box_basicUnspecified00681512test_bounding_box_basicUnspecified0016182320 \ No newline at end of file diff --git a/e2e_tests/data/import/pascal_voc_annotations/image_7.xml b/e2e_tests/data/import/pascal_voc_annotations/image_7.xml new file mode 100644 index 000000000..434c19dce --- /dev/null +++ b/e2e_tests/data/import/pascal_voc_annotations/image_7.xml @@ -0,0 +1 @@ +imagesdir1/dir3/image_7images/image_7darwin1920108030test_bounding_box_basicUnspecified00681512test_bounding_box_basicUnspecified0016182320 \ No newline at end of file diff --git a/e2e_tests/data/import/pascal_voc_annotations/image_8.xml b/e2e_tests/data/import/pascal_voc_annotations/image_8.xml new file mode 100644 index 000000000..21cf80168 --- /dev/null +++ b/e2e_tests/data/import/pascal_voc_annotations/image_8.xml @@ -0,0 +1 @@ +imagesdir1/dir3/image_8images/image_8darwin1920108030test_bounding_box_basicUnspecified00681512test_bounding_box_basicUnspecified0016182320 \ No newline at end of file diff --git a/e2e_tests/helpers.py b/e2e_tests/helpers.py index 5c3fc652d..b4991d8e8 100644 --- a/e2e_tests/helpers.py +++ b/e2e_tests/helpers.py @@ -1,6 +1,6 @@ from subprocess import run from time import sleep -from typing import Optional, List, Union +from typing import Optional, Union, Sequence from attr import dataclass from pathlib import Path @@ -212,8 +212,12 @@ def export_and_download_annotations( create_export_url = ( f"{base_url}/api/v2/teams/{team_slug}/datasets/{dataset_slug}/exports" ) + + # Necessary because these are the only formats where `annotation_format` does not match the required payload value if annotation_format == "darwin": - annotation_format = "darwin_json_2" # Necessary because this is the only format where `annotation_format` does not match the required payload value + annotation_format = "darwin_json_2" + elif annotation_format == "pascal_voc": + annotation_format = "pascalvoc" payload = { "filters": {"statuses": ["new", "annotate", "review", "complete"]}, "include_authorship": False, @@ -261,7 +265,7 @@ def export_and_download_annotations( def delete_annotation_uuids( - annotations: List[Union[dt.Annotation, dt.VideoAnnotation]] + annotations: Sequence[Union[dt.Annotation, dt.VideoAnnotation]] ): """ Removes all UUIDs present in instances of `dt.Annotation` and `dt.VideoAnnotation` objects. diff --git a/e2e_tests/objects.py b/e2e_tests/objects.py index f19428779..8a538a15c 100644 --- a/e2e_tests/objects.py +++ b/e2e_tests/objects.py @@ -64,14 +64,21 @@ def add_item(self, item: E2EItem) -> None: self.items.append(item) def register_read_only_items( - self, config_values: ConfigValues, item_type: str = "single_slotted" + self, + config_values: ConfigValues, + item_type: str = "single_slotted", + files_in_flat_structure: bool = False, ) -> None: """ Registers a set of images from an external bucket in the dataset in a read-only fashion: Useful for creating dataset to test `pull` or `import` operations on without having to wait for items to finish processing """ - payload = get_read_only_registration_payload(item_type, dataset_slug=self.slug) + payload = get_read_only_registration_payload( + item_type, + dataset_slug=self.slug, + files_in_flat_structure=files_in_flat_structure, + ) api_key = config_values.api_key headers = { "Content-Type": "application/json", @@ -137,7 +144,9 @@ def get_annotation_data( def get_read_only_registration_payload( - item_type: str, dataset_slug: str + item_type: str, + dataset_slug: str, + files_in_flat_structure: bool = False, ) -> Dict[str, str]: """ Returns a payload for registering items from external storage in a read-only @@ -147,10 +156,11 @@ def get_read_only_registration_payload( - `multi_channel`: A single item with 3 image channels - `single_slotted_video`: A single single-slotted video """ + path = "/" if files_in_flat_structure else None items = { "single_slotted": [ { - "path": "/", + "path": path or "/", "type": "image", "storage_key": "darwin-py/images/image_1.jpg", "storage_thumbnail_key": "darwin-py/images/image_1_thumbnail.jpg", @@ -159,7 +169,7 @@ def get_read_only_registration_payload( "name": "image_1", }, { - "path": "/", + "path": path or "/", "type": "image", "storage_key": "darwin-py/images/image_2.jpg", "storage_thumbnail_key": "darwin-py/images/image_2_thumbnail.jpg", @@ -168,7 +178,7 @@ def get_read_only_registration_payload( "name": "image_2", }, { - "path": "dir1", + "path": path or "dir1", "type": "image", "storage_key": "darwin-py/images/image_3.jpg", "storage_thumbnail_key": "darwin-py/images/image_3_thumbnail.jpg", @@ -177,7 +187,7 @@ def get_read_only_registration_payload( "name": "image_3", }, { - "path": "dir1", + "path": path or "dir1", "type": "image", "storage_key": "darwin-py/images/image_4.jpg", "storage_thumbnail_key": "darwin-py/images/image_4_thumbnail.jpg", @@ -186,7 +196,7 @@ def get_read_only_registration_payload( "name": "image_4", }, { - "path": "dir2", + "path": path or "dir2", "type": "image", "storage_key": "darwin-py/images/image_5.jpg", "storage_thumbnail_key": "darwin-py/images/image_5_thumbnail.jpg", @@ -195,7 +205,7 @@ def get_read_only_registration_payload( "name": "image_5", }, { - "path": "dir2", + "path": path or "dir2", "type": "image", "storage_key": "darwin-py/images/image_6.jpg", "storage_thumbnail_key": "darwin-py/images/image_6_thumbnail.jpg", @@ -204,7 +214,7 @@ def get_read_only_registration_payload( "name": "image_6", }, { - "path": "dir1/dir3", + "path": path or "dir1/dir3", "type": "image", "storage_key": "darwin-py/images/image_7.jpg", "storage_thumbnail_key": "darwin-py/images/image_7_thumbnail.jpg", @@ -213,7 +223,7 @@ def get_read_only_registration_payload( "name": "image_7", }, { - "path": "dir1/dir3", + "path": path or "dir1/dir3", "type": "image", "storage_key": "darwin-py/images/image_8.jpg", "storage_thumbnail_key": "darwin-py/images/image_8_thumbnail.jpg", @@ -224,7 +234,7 @@ def get_read_only_registration_payload( ], "multi_slotted": [ { - "path": "/", + "path": path or "/", "layout": { "slots_grid": [[["0"], ["1"], ["2"]]], "version": 3, @@ -263,7 +273,7 @@ def get_read_only_registration_payload( ], "multi_channel": [ { - "path": "/", + "path": path or "/", "layout": { "slots_grid": [ [ @@ -310,7 +320,7 @@ def get_read_only_registration_payload( ], "single_slotted_video": [ { - "path": "/", + "path": path or "/", "type": "video", "storage_key": "darwin-py/videos/mini_uct.mp4", "storage_thumbnail_key": "darwin-py/videos/video_thumbnail.jpg",