Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DAR-3919][External] Warn when importing annotations with multiple instance IDs #929

Merged
merged 4 commits into from
Sep 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 64 additions & 0 deletions darwin/importer/importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -865,6 +865,22 @@ def import_annotations( # noqa: C901
slot_errors, slot_warnings, annotation_format, console
)

if annotation_format == "darwin":
dataset.client.load_feature_flags()

# Check if the flag exists. When the flag is deprecated in the future we will always perform this check
static_instance_id_feature_flag_exists = any(
feature.name == "STATIC_INSTANCE_ID"
for feature in dataset.client.features.get(dataset.team, [])
Copy link
Contributor

@dorfmanrobert dorfmanrobert Sep 24, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why do we need this static_instance_id_feature_flag_exists and associated check? it exists, so seems this is unneeded

Copy link
Collaborator Author

@JBWilkie JBWilkie Sep 25, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We only want to run the validation that this PR introduces (_warn_for_annotations_with_multiple_instance_ids) if:

  • 1: The STATIC_INSTANCE_ID feature flag exists and it's value for the target workspace is True, or:
  • 2: The flag doesn't exist. At this point, the feature will be globally enabled so we will always want to run the check. We'll be able to remove this section of code (apart from the function call to _warn_for_annotations_with_multiple_instance_ids, but the flag will be removed before we can update darwin-py

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oooh makes sense thanks!

)
check_for_multi_instance_id_annotations = (
static_instance_id_feature_flag_exists
and dataset.client.feature_enabled("STATIC_INSTANCE_ID")
) or not static_instance_id_feature_flag_exists

if check_for_multi_instance_id_annotations:
_warn_for_annotations_with_multiple_instance_ids(local_files, console)

console.print(
f"{len(local_files) + len(local_files_missing_remotely)} annotation file(s) found.",
style="info",
Expand Down Expand Up @@ -1680,3 +1696,51 @@ def _display_slot_warnings_and_errors(
console.print(f"- File: {file}, issues:", style="info")
for warning in slot_errors[file]:
console.print(f" - {warning}")


def _warn_for_annotations_with_multiple_instance_ids(
local_files: List[dt.AnnotationFile], console: Console
) -> None:
"""
Warns the user if any video annotations have multiple unique instance IDs.

This function checks each video annotation in the provided list of local annotation
files for multiple instance ID values. A warning is printed to the console for each
instance of this occurrence.

Parameters
----------
local_files : List[dt.AnnotationFile]
A list of local annotation files to be checked.
console : Console
The console object used to print warnings and messages.
"""
files_with_multi_instance_id_annotations = {}
files_with_video_annotations = [
local_file for local_file in local_files if local_file.is_video
]
for file in files_with_video_annotations:
for annotation in file.annotations:
unique_instance_ids = []
for frame_idx in annotation.frames: # type: ignore
for subannotation in annotation.frames[frame_idx].subs: # type: ignore
if subannotation.annotation_type == "instance_id":
instance_id = subannotation.data
if instance_id not in unique_instance_ids:
unique_instance_ids.append(instance_id)

if len(unique_instance_ids) > 1:
if file.path not in files_with_multi_instance_id_annotations:
files_with_multi_instance_id_annotations[file.path] = 1
else:
files_with_multi_instance_id_annotations[file.path] += 1

if files_with_multi_instance_id_annotations:
console.print(
"The following files have annotation(s) with multiple instance ID values. Instance IDs are static, so only the first instance ID of each annotation will be imported:",
style="warning",
)
for file in files_with_multi_instance_id_annotations:
console.print(
f"- File: {file} has {files_with_multi_instance_id_annotations[file]} annotation(s) with multiple instance IDs"
)
124 changes: 124 additions & 0 deletions tests/darwin/importer/importer_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
_resolve_annotation_classes,
_verify_slot_annotation_alignment,
_import_properties,
_warn_for_annotations_with_multiple_instance_ids,
)


Expand Down Expand Up @@ -1812,3 +1813,126 @@ def test_import_new_annotation_level_properties_with_manifest(
],
granularity=PropertyGranularity.annotation,
)


def test_no_instance_id_warning_with_no_video_annotations():
bounding_box_class = dt.AnnotationClass(
name="class1", annotation_type="bounding_box"
)
local_files = [
dt.AnnotationFile(
path=Path("file1.json"),
is_video=False,
annotations=[],
filename="file1",
annotation_classes={bounding_box_class},
),
dt.AnnotationFile(
path=Path("file2.json"),
is_video=False,
annotations=[],
filename="file2",
annotation_classes={bounding_box_class},
),
]
console = MagicMock()
_warn_for_annotations_with_multiple_instance_ids(local_files, console)
console.print.assert_not_called()


def test_warning_with_multiple_files_with_multi_instance_id_annotations():
bounding_box_class = dt.AnnotationClass(
name="class1", annotation_type="bounding_box"
)
annotation1 = dt.VideoAnnotation(
annotation_class=bounding_box_class,
frames={
0: dt.Annotation(
annotation_class=dt.AnnotationClass(
name="class1", annotation_type="bounding_box"
),
data={"x": 5, "y": 10, "w": 5, "h": 10},
subs=[
dt.SubAnnotation(annotation_type="instance_id", data="1"),
],
),
1: dt.Annotation(
annotation_class=dt.AnnotationClass(
name="class1", annotation_type="bounding_box"
),
data={"x": 15, "y": 20, "w": 15, "h": 20},
subs=[
dt.SubAnnotation(annotation_type="instance_id", data="2"),
],
),
2: dt.Annotation(
annotation_class=dt.AnnotationClass(
name="class1", annotation_type="bounding_box"
),
data={"x": 25, "y": 30, "w": 25, "h": 30},
subs=[
dt.SubAnnotation(annotation_type="instance_id", data="3"),
],
),
},
keyframes={0: True, 1: False, 2: True},
segments=[[0, 2]],
interpolated=False,
)
annotation2 = dt.VideoAnnotation(
annotation_class=bounding_box_class,
frames={
0: dt.Annotation(
annotation_class=dt.AnnotationClass(
name="class1", annotation_type="bounding_box"
),
data={"x": 5, "y": 10, "w": 5, "h": 10},
subs=[
dt.SubAnnotation(annotation_type="instance_id", data="1"),
],
),
1: dt.Annotation(
annotation_class=dt.AnnotationClass(
name="class1", annotation_type="bounding_box"
),
data={"x": 15, "y": 20, "w": 15, "h": 20},
subs=[
dt.SubAnnotation(annotation_type="instance_id", data="2"),
],
),
2: dt.Annotation(
annotation_class=dt.AnnotationClass(
name="class1", annotation_type="bounding_box"
),
data={"x": 25, "y": 30, "w": 25, "h": 30},
subs=[
dt.SubAnnotation(annotation_type="instance_id", data="3"),
],
),
},
keyframes={0: True, 1: False, 2: True},
segments=[[0, 2]],
interpolated=False,
)
local_files = [
dt.AnnotationFile(
path=Path("file1.json"),
is_video=True,
annotations=[annotation1],
filename="file1",
annotation_classes={bounding_box_class},
),
dt.AnnotationFile(
path=Path("file2.json"),
is_video=True,
annotations=[annotation2],
filename="file2",
annotation_classes={bounding_box_class},
),
]
console = MagicMock()
_warn_for_annotations_with_multiple_instance_ids(local_files, console)
console.print.assert_called()
assert (
console.print.call_count == 3
) # One for the warning message, two for the file details