From 314ec61cd6e35647b1ff23e68c1aeccdd33668d9 Mon Sep 17 00:00:00 2001 From: John Wilkie Date: Thu, 2 Jan 2025 22:29:07 +0000 Subject: [PATCH 1/2] Allow import of multi-array COCO segmentations --- darwin/importer/formats/coco.py | 40 ++++---- .../importer/formats/import_coco_test.py | 92 +++++++++++++++++++ 2 files changed, 113 insertions(+), 19 deletions(-) create mode 100644 tests/darwin/importer/formats/import_coco_test.py diff --git a/darwin/importer/formats/coco.py b/darwin/importer/formats/coco.py index 616a4eeb5..5d7190345 100644 --- a/darwin/importer/formats/coco.py +++ b/darwin/importer/formats/coco.py @@ -88,7 +88,7 @@ def parse_json( annotation["segmentation"] if image_id not in image_annotations: image_annotations[image_id] = [] - image_annotations[image_id].append( + image_annotations[image_id].extend( parse_annotation(annotation, category_lookup_table) ) @@ -105,7 +105,7 @@ def parse_json( def parse_annotation( annotation: Dict[str, dt.UnknownType], category_lookup_table: Dict[str, dt.UnknownType], -) -> Optional[dt.Annotation]: +) -> List[dt.Annotation]: """ Parses the given ``json`` dictionary into a darwin ``Annotation`` if possible. @@ -128,20 +128,20 @@ def parse_annotation( if iscrowd: logger.warn( f"Skipping annotation {annotation.get('id')} because it is a crowd " - "annotation, and Darwin does not support import of crowd annotations." + "annotation, and Darwin does not support import of COCO crowd annotations." ) - return None + return [] if len(segmentation) == 0 and len(annotation["bbox"]) == 4: x, y, w, h = map(int, annotation["bbox"]) - return dt.make_bounding_box(category["name"], x, y, w, h) + return [dt.make_bounding_box(category["name"], x, y, w, h)] elif ( len(segmentation) == 0 and len(annotation["bbox"]) == 1 and len(annotation["bbox"][0]) == 4 ): x, y, w, h = map(int, annotation["bbox"][0]) - return dt.make_bounding_box(category["name"], x, y, w, h) + return [dt.make_bounding_box(category["name"], x, y, w, h)] elif isinstance(segmentation, dict): logger.warn( "warning, converting complex coco rle mask to polygon, could take some time" @@ -167,21 +167,23 @@ def parse_annotation( except StopIteration: break paths.append(path) - return dt.make_polygon(category["name"], paths) + return [dt.make_polygon(category["name"], paths)] elif isinstance(segmentation, list): - path = [] - points = iter( - segmentation[0] if isinstance(segmentation[0], list) else segmentation - ) - while True: - try: - x, y = next(points), next(points) - path.append({"x": x, "y": y}) - except StopIteration: - break - return dt.make_polygon(category["name"], path) + paths = segmentation if isinstance(segmentation[0], list) else [segmentation] + polygons = [] + for path in paths: + point_path = [] + points = iter(path) + while True: + try: + x, y = next(points), next(points) + point_path.append({"x": x, "y": y}) + except StopIteration: + break + polygons.append(dt.make_polygon(category["name"], point_path)) + return polygons else: - return None + return [] def _decode_file(current_encoding: str, path: Path): diff --git a/tests/darwin/importer/formats/import_coco_test.py b/tests/darwin/importer/formats/import_coco_test.py new file mode 100644 index 000000000..ce8e6893b --- /dev/null +++ b/tests/darwin/importer/formats/import_coco_test.py @@ -0,0 +1,92 @@ +from typing import Dict, Any + +import darwin.datatypes as dt +from darwin.importer.formats.coco import parse_annotation + + +def test_parse_annotation_single_polygon(): + """Test parsing a single polygon segmentation""" + annotation = { + "segmentation": [[10, 10, 20, 10, 20, 20, 10, 20]], + "category_id": "1", + "bbox": [10, 10, 10, 10], + "iscrowd": 0, + } + category_lookup: Dict[str, Any] = {"1": {"name": "test_class"}} + + result = parse_annotation(annotation, category_lookup) + + assert len(result) == 1 + assert isinstance(result[0], dt.Annotation) + assert result[0].annotation_class.name == "test_class" + assert len(result[0].data["paths"]) == 1 + path = result[0].data["paths"][0] + assert len(path) == 4 + assert path[0] == {"x": 10, "y": 10} + assert path[2] == {"x": 20, "y": 20} + + +def test_parse_annotation_multiple_polygons(): + """Test parsing segmentation with multiple polygons""" + annotation = { + "segmentation": [ + [10, 10, 20, 10, 20, 20, 10, 20], + [30, 30, 40, 30, 40, 40, 30, 40], + ], + "category_id": "1", + "bbox": [10, 10, 30, 30], + "iscrowd": 0, + } + category_lookup: Dict[str, Any] = {"1": {"name": "test_class"}} + + result = parse_annotation(annotation, category_lookup) + + assert len(result) == 2 + assert all(isinstance(r, dt.Annotation) for r in result) + assert all(r.annotation_class.name == "test_class" for r in result) + + path1 = result[0].data["paths"][0] + assert len(path1) == 4 + assert path1[0] == {"x": 10, "y": 10} + assert path1[2] == {"x": 20, "y": 20} + + path2 = result[1].data["paths"][0] + assert len(path2) == 4 + assert path2[0] == {"x": 30, "y": 30} + assert path2[2] == {"x": 40, "y": 40} + + +def test_parse_annotation_bounding_box(): + """Test parsing a bounding box annotation""" + annotation = { + "segmentation": [], + "category_id": "1", + "bbox": [10, 20, 30, 40], + "iscrowd": 0, + } + category_lookup: Dict[str, Any] = {"1": {"name": "test_class"}} + + result = parse_annotation(annotation, category_lookup) + + assert len(result) == 1 + assert isinstance(result[0], dt.Annotation) + assert result[0].annotation_class.name == "test_class" + assert result[0].data["x"] == 10 + assert result[0].data["y"] == 20 + assert result[0].data["w"] == 30 + assert result[0].data["h"] == 40 + + +def test_parse_annotation_crowd(): + """Test that crowd annotations are skipped""" + annotation = { + "segmentation": [[10, 10, 20, 10, 20, 20, 10, 20]], + "category_id": "1", + "bbox": [10, 10, 10, 10], + "iscrowd": 1, + } + category_lookup: Dict[str, Any] = {"1": {"name": "test_class"}} + + result = parse_annotation(annotation, category_lookup) + + assert len(result) == 0 From 8ec70d0cee690c652fb2fee2c4f96e765f8130f4 Mon Sep 17 00:00:00 2001 From: John Wilkie Date: Mon, 6 Jan 2025 10:49:43 +0000 Subject: [PATCH 2/2] Support for complex polygon import --- darwin/importer/formats/coco.py | 6 +++--- tests/darwin/importer/formats/import_coco_test.py | 13 +++++++------ 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/darwin/importer/formats/coco.py b/darwin/importer/formats/coco.py index 5d7190345..1dd3e5725 100644 --- a/darwin/importer/formats/coco.py +++ b/darwin/importer/formats/coco.py @@ -170,7 +170,7 @@ def parse_annotation( return [dt.make_polygon(category["name"], paths)] elif isinstance(segmentation, list): paths = segmentation if isinstance(segmentation[0], list) else [segmentation] - polygons = [] + point_paths = [] for path in paths: point_path = [] points = iter(path) @@ -180,8 +180,8 @@ def parse_annotation( point_path.append({"x": x, "y": y}) except StopIteration: break - polygons.append(dt.make_polygon(category["name"], point_path)) - return polygons + point_paths.append(point_path) + return [dt.make_polygon(category["name"], point_paths)] else: return [] diff --git a/tests/darwin/importer/formats/import_coco_test.py b/tests/darwin/importer/formats/import_coco_test.py index ce8e6893b..ff9e53cd9 100644 --- a/tests/darwin/importer/formats/import_coco_test.py +++ b/tests/darwin/importer/formats/import_coco_test.py @@ -26,8 +26,8 @@ def test_parse_annotation_single_polygon(): assert path[2] == {"x": 20, "y": 20} -def test_parse_annotation_multiple_polygons(): - """Test parsing segmentation with multiple polygons""" +def test_parse_annotation_multiple_paths(): + """Test parsing segmentation with multiple paths in a single polygon""" annotation = { "segmentation": [ [10, 10, 20, 10, 20, 20, 10, 20], @@ -41,16 +41,17 @@ def test_parse_annotation_multiple_polygons(): result = parse_annotation(annotation, category_lookup) - assert len(result) == 2 - assert all(isinstance(r, dt.Annotation) for r in result) - assert all(r.annotation_class.name == "test_class" for r in result) + assert len(result) == 1 + assert isinstance(result[0], dt.Annotation) + assert result[0].annotation_class.name == "test_class" + assert len(result[0].data["paths"]) == 2 path1 = result[0].data["paths"][0] assert len(path1) == 4 assert path1[0] == {"x": 10, "y": 10} assert path1[2] == {"x": 20, "y": 20} - path2 = result[1].data["paths"][0] + path2 = result[0].data["paths"][1] assert len(path2) == 4 assert path2[0] == {"x": 30, "y": 30} assert path2[2] == {"x": 40, "y": 40}