Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DAR-5342][External] Allow import of complex polygons in COCO #989

Merged
merged 2 commits into from
Jan 6, 2025
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 21 additions & 19 deletions darwin/importer/formats/coco.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def parse_json(
annotation["segmentation"]
if image_id not in image_annotations:
image_annotations[image_id] = []
image_annotations[image_id].append(
image_annotations[image_id].extend(
parse_annotation(annotation, category_lookup_table)
)

Expand All @@ -105,7 +105,7 @@ def parse_json(
def parse_annotation(
annotation: Dict[str, dt.UnknownType],
category_lookup_table: Dict[str, dt.UnknownType],
) -> Optional[dt.Annotation]:
) -> List[dt.Annotation]:
"""
Parses the given ``json`` dictionary into a darwin ``Annotation`` if possible.

Expand All @@ -128,20 +128,20 @@ def parse_annotation(
if iscrowd:
logger.warn(
f"Skipping annotation {annotation.get('id')} because it is a crowd "
"annotation, and Darwin does not support import of crowd annotations."
"annotation, and Darwin does not support import of COCO crowd annotations."
)
return None
return []

if len(segmentation) == 0 and len(annotation["bbox"]) == 4:
x, y, w, h = map(int, annotation["bbox"])
return dt.make_bounding_box(category["name"], x, y, w, h)
return [dt.make_bounding_box(category["name"], x, y, w, h)]
elif (
len(segmentation) == 0
and len(annotation["bbox"]) == 1
and len(annotation["bbox"][0]) == 4
):
x, y, w, h = map(int, annotation["bbox"][0])
return dt.make_bounding_box(category["name"], x, y, w, h)
return [dt.make_bounding_box(category["name"], x, y, w, h)]
elif isinstance(segmentation, dict):
logger.warn(
"warning, converting complex coco rle mask to polygon, could take some time"
Expand All @@ -167,21 +167,23 @@ def parse_annotation(
except StopIteration:
break
paths.append(path)
return dt.make_polygon(category["name"], paths)
return [dt.make_polygon(category["name"], paths)]
elif isinstance(segmentation, list):
path = []
points = iter(
segmentation[0] if isinstance(segmentation[0], list) else segmentation
)
while True:
try:
x, y = next(points), next(points)
path.append({"x": x, "y": y})
except StopIteration:
break
return dt.make_polygon(category["name"], path)
paths = segmentation if isinstance(segmentation[0], list) else [segmentation]
polygons = []
for path in paths:
point_path = []
points = iter(path)
while True:
try:
x, y = next(points), next(points)
point_path.append({"x": x, "y": y})
except StopIteration:
break
polygons.append(dt.make_polygon(category["name"], point_path))
return polygons
else:
return None
return []


def _decode_file(current_encoding: str, path: Path):
Expand Down
92 changes: 92 additions & 0 deletions tests/darwin/importer/formats/import_coco_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
from typing import Dict, Any

import darwin.datatypes as dt
from darwin.importer.formats.coco import parse_annotation


def test_parse_annotation_single_polygon():
"""Test parsing a single polygon segmentation"""
annotation = {
"segmentation": [[10, 10, 20, 10, 20, 20, 10, 20]],
"category_id": "1",
"bbox": [10, 10, 10, 10],
"iscrowd": 0,
}
category_lookup: Dict[str, Any] = {"1": {"name": "test_class"}}

result = parse_annotation(annotation, category_lookup)

assert len(result) == 1
assert isinstance(result[0], dt.Annotation)
assert result[0].annotation_class.name == "test_class"
assert len(result[0].data["paths"]) == 1
path = result[0].data["paths"][0]
assert len(path) == 4
assert path[0] == {"x": 10, "y": 10}
assert path[2] == {"x": 20, "y": 20}


def test_parse_annotation_multiple_polygons():
"""Test parsing segmentation with multiple polygons"""
annotation = {
"segmentation": [
[10, 10, 20, 10, 20, 20, 10, 20],
[30, 30, 40, 30, 40, 40, 30, 40],
],
"category_id": "1",
"bbox": [10, 10, 30, 30],
"iscrowd": 0,
}
category_lookup: Dict[str, Any] = {"1": {"name": "test_class"}}

result = parse_annotation(annotation, category_lookup)

assert len(result) == 2
assert all(isinstance(r, dt.Annotation) for r in result)
assert all(r.annotation_class.name == "test_class" for r in result)

path1 = result[0].data["paths"][0]
assert len(path1) == 4
assert path1[0] == {"x": 10, "y": 10}
assert path1[2] == {"x": 20, "y": 20}

path2 = result[1].data["paths"][0]
assert len(path2) == 4
assert path2[0] == {"x": 30, "y": 30}
assert path2[2] == {"x": 40, "y": 40}


def test_parse_annotation_bounding_box():
"""Test parsing a bounding box annotation"""
annotation = {
"segmentation": [],
"category_id": "1",
"bbox": [10, 20, 30, 40],
"iscrowd": 0,
}
category_lookup: Dict[str, Any] = {"1": {"name": "test_class"}}

result = parse_annotation(annotation, category_lookup)

assert len(result) == 1
assert isinstance(result[0], dt.Annotation)
assert result[0].annotation_class.name == "test_class"
assert result[0].data["x"] == 10
assert result[0].data["y"] == 20
assert result[0].data["w"] == 30
assert result[0].data["h"] == 40


def test_parse_annotation_crowd():
"""Test that crowd annotations are skipped"""
annotation = {
"segmentation": [[10, 10, 20, 10, 20, 20, 10, 20]],
"category_id": "1",
"bbox": [10, 10, 10, 10],
"iscrowd": 1,
}
category_lookup: Dict[str, Any] = {"1": {"name": "test_class"}}

result = parse_annotation(annotation, category_lookup)

assert len(result) == 0
Loading