Skip to content

Commit

Permalink
feat: Count implicit tags (#133)
Browse files Browse the repository at this point in the history
This updates our "get tag counts" API so that the tag counts can
include the implicit, ancestor tags.
  • Loading branch information
bradenmacdonald authored Dec 21, 2023
1 parent 1d9c459 commit 0f90a7f
Show file tree
Hide file tree
Showing 5 changed files with 112 additions and 10 deletions.
36 changes: 32 additions & 4 deletions openedx_tagging/core/tagging/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

from .data import TagDataQuerySet
from .models import ObjectTag, Tag, Taxonomy
from .models.utils import ConcatNull
from .models.utils import ConcatNull, StringAgg

# Export this as part of the API
TagDoesNotExist = Tag.DoesNotExist
Expand Down Expand Up @@ -196,7 +196,7 @@ def get_object_tags(
return tags


def get_object_tag_counts(object_id_pattern: str) -> dict[str, int]:
def get_object_tag_counts(object_id_pattern: str, count_implicit=False) -> dict[str, int]:
"""
Given an object ID, a "starts with" glob pattern like
"course-v1:foo+bar+baz@*", or a list of "comma,separated,IDs", return a
Expand All @@ -217,8 +217,36 @@ def get_object_tag_counts(object_id_pattern: str) -> dict[str, int]:
qs = qs.exclude(taxonomy_id=None) # The whole taxonomy was deleted
qs = qs.exclude(taxonomy__enabled=False) # The whole taxonomy is disabled
qs = qs.exclude(tag_id=None, taxonomy__allow_free_text=False) # The taxonomy exists but the tag is deleted
qs = qs.values("object_id").annotate(num_tags=models.Count("id")).order_by("object_id")
return {row["object_id"]: row["num_tags"] for row in qs}
if count_implicit:
# Counting the implicit tags is tricky, because if two "grandchild" tags have the same implicit parent tag, we
# need to count that parent tag only once. To do that, we collect all the ancestor tag IDs into an aggregate
# string, and then count the unique values using python
qs = qs.values("object_id").annotate(
num_tags=models.Count("id"),
tag_ids_str_1=StringAgg("tag_id"),
tag_ids_str_2=StringAgg("tag__parent_id"),
tag_ids_str_3=StringAgg("tag__parent__parent_id"),
tag_ids_str_4=StringAgg("tag__parent__parent__parent_id"),
).order_by("object_id")
result = {}
for row in qs:
# ObjectTags for free text taxonomies will be included in "num_tags" count, but not "tag_ids_str_1" since
# they have no tag ID. We can compute how many free text tags each object has now:
if row["tag_ids_str_1"]:
num_free_text_tags = row["num_tags"] - len(row["tag_ids_str_1"].split(","))
else:
num_free_text_tags = row["num_tags"]
# Then we count the total number of *unique* Tags for this object, both implicit and explicit:
other_tag_ids = set()
for field in ("tag_ids_str_1", "tag_ids_str_2", "tag_ids_str_3", "tag_ids_str_4"):
if row[field] is not None:
for tag_id in row[field].split(","):
other_tag_ids.add(int(tag_id))
result[row["object_id"]] = num_free_text_tags + len(other_tag_ids)
return result
else:
qs = qs.values("object_id").annotate(num_tags=models.Count("id")).order_by("object_id")
return {row["object_id"]: row["num_tags"] for row in qs}


def delete_object_tags(object_id: str):
Expand Down
22 changes: 21 additions & 1 deletion openedx_tagging/core/tagging/models/utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""
Utilities for tagging and taxonomy models
"""

from django.db.models import Aggregate, CharField
from django.db.models.expressions import Func


Expand All @@ -22,3 +22,23 @@ def as_sqlite(self, compiler, connection, **extra_context):
arg_joiner=" || ",
**extra_context,
)


class StringAgg(Aggregate): # pylint: disable=abstract-method
"""
Aggregate function that collects the values of some column across all rows,
and creates a string by concatenating those values, with "," as a separator.
This is the same as Django's django.contrib.postgres.aggregates.StringAgg,
but this version works with MySQL and SQLite.
"""
function = 'GROUP_CONCAT'
template = '%(function)s(%(distinct)s%(expressions)s)'

def __init__(self, expression, distinct=False, **extra):
super().__init__(
expression,
distinct='DISTINCT ' if distinct else '',
output_field=CharField(),
**extra,
)
5 changes: 4 additions & 1 deletion openedx_tagging/core/tagging/rest_api/v1/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -532,9 +532,11 @@ class ObjectTagCountsView(
**Retrieve Parameters**
* object_id_pattern (required): - The Object ID to retrieve ObjectTags for. Can contain '*' at the end
for wildcard matching, or use ',' to separate multiple object IDs.
* count_implicit (optional): If present, implicit parent/grandparent tags will be included in the counts
**Retrieve Example Requests**
GET api/tagging/v1/object_tag_counts/:object_id_pattern
GET api/tagging/v1/object_tag_counts/:object_id_pattern?count_implicit
**Retrieve Query Returns**
* 200 - Success
Expand All @@ -553,8 +555,9 @@ def retrieve(self, request, *args, **kwargs) -> Response:
"""
# This API does NOT bother doing any permission checks as the # of tags is not considered sensitive information.
object_id_pattern = self.kwargs["object_id_pattern"]
count_implicit = "count_implicit" in request.query_params
try:
return Response(get_object_tag_counts(object_id_pattern))
return Response(get_object_tag_counts(object_id_pattern, count_implicit=count_implicit))
except ValueError as err:
raise ValidationError(err.args[0]) from err

Expand Down
33 changes: 33 additions & 0 deletions tests/openedx_tagging/core/tagging/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -704,6 +704,36 @@ def test_get_object_tag_counts(self) -> None:
assert tagging_api.get_object_tag_counts(f"{obj1},{obj2}") == {obj1: 1, obj2: 2}
assert tagging_api.get_object_tag_counts("object_*") == {obj1: 1, obj2: 2}

def test_get_object_tag_counts_implicit(self) -> None:
"""
Basic test of get_object_tag_counts, including implicit (parent) tags
Note that:
- "DPANN" is "Archaea > DPANN" (2 tags, 1 implicit), and
- "Chordata" is "Eukaryota > Animalia > Chordata" (3 tags, 2 implicit)
- "Arthropoda" is "Eukaryota > Animalia > Arthropoda" (same)
"""
self.taxonomy.allow_multiple = True
self.taxonomy.save()
obj1, obj2, obj3 = "object_id1", "object_id2", "object_id3"
other = "other_object"
# Give each object 1-2 tags:
tagging_api.tag_object(object_id=obj1, taxonomy=self.taxonomy, tags=["DPANN"])
tagging_api.tag_object(object_id=obj2, taxonomy=self.taxonomy, tags=["Chordata"])
tagging_api.tag_object(object_id=obj2, taxonomy=self.free_text_taxonomy, tags=["has a notochord"])
tagging_api.tag_object(object_id=obj3, taxonomy=self.taxonomy, tags=["Chordata", "Arthropoda"])
tagging_api.tag_object(object_id=other, taxonomy=self.free_text_taxonomy, tags=["other"])

assert tagging_api.get_object_tag_counts(obj1, count_implicit=True) == {obj1: 2}
assert tagging_api.get_object_tag_counts(obj2, count_implicit=True) == {obj2: 4}
assert tagging_api.get_object_tag_counts(f"{obj1},{obj2}", count_implicit=True) == {obj1: 2, obj2: 4}
assert tagging_api.get_object_tag_counts("object_*", count_implicit=True) == {
obj1: 2,
obj2: 4,
obj3: 4, # obj3 has 2 explicit tags and 2 implicit tags (not 4 because the implicit tags are the same)
}
assert tagging_api.get_object_tag_counts(other, count_implicit=True) == {other: 1}

def test_get_object_tag_counts_deleted_disabled(self) -> None:
"""
Test that get_object_tag_counts doesn't "count" disabled taxonomies or
Expand All @@ -726,6 +756,9 @@ def test_get_object_tag_counts_deleted_disabled(self) -> None:
self.free_text_taxonomy.enabled = False
self.free_text_taxonomy.save()
assert tagging_api.get_object_tag_counts("object_*") == {obj1: 1, obj2: 1}
# Also check the result with count_implicit:
# "English" has no implicit tags but "Chordata" has two, so we expect these totals:
assert tagging_api.get_object_tag_counts("object_*", count_implicit=True) == {obj1: 1, obj2: 3}

# But, by the way, if we re-enable the taxonomy and restore the tag, the counts return:
self.free_text_taxonomy.enabled = True
Expand Down
26 changes: 22 additions & 4 deletions tests/openedx_tagging/core/tagging/test_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -1029,10 +1029,14 @@ def test_get_counts(self):
# Course 7 Unit 2
api.tag_object(object_id="course07-unit02-problem01", taxonomy=self.free_text_taxonomy, tags=["b"])
api.tag_object(object_id="course07-unit02-problem02", taxonomy=self.free_text_taxonomy, tags=["c", "d"])
api.tag_object(object_id="course07-unit02-problem03", taxonomy=self.free_text_taxonomy, tags=["N", "M", "x"])

def check(object_id_pattern: str):
result = self.client.get(OBJECT_TAG_COUNTS_URL.format(object_id_pattern=object_id_pattern))
api.tag_object(object_id="course07-unit02-problem03", taxonomy=self.free_text_taxonomy, tags=["N", "M"])
api.tag_object(object_id="course07-unit02-problem03", taxonomy=self.taxonomy, tags=["Mammalia"])

def check(object_id_pattern: str, count_implicit=False):
url = OBJECT_TAG_COUNTS_URL.format(object_id_pattern=object_id_pattern)
if count_implicit:
url += "?count_implicit"
result = self.client.get(url)
assert result.status_code == status.HTTP_200_OK
return result.data

Expand All @@ -1045,6 +1049,20 @@ def check(object_id_pattern: str):
"course07-unit01-problem01": 3,
"course07-unit01-problem02": 2,
}
with self.assertNumQueries(1):
assert check(object_id_pattern="course07-unit02-*") == {
"course07-unit02-problem01": 1,
"course07-unit02-problem02": 2,
"course07-unit02-problem03": 3,
}
with self.assertNumQueries(1):
assert check(object_id_pattern="course07-unit02-*", count_implicit=True) == {
"course07-unit02-problem01": 1,
"course07-unit02-problem02": 2,
# "Mammalia" includes 1 explicit + 3 implicit tags: "Eukaryota > Animalia > Chordata > Mammalia"
# so problem03 has 2 free text tags and "4" life on earth tags:
"course07-unit02-problem03": 6,
}
with self.assertNumQueries(1):
assert check(object_id_pattern="course07-unit*") == {
"course07-unit01-problem01": 3,
Expand Down

0 comments on commit 0f90a7f

Please sign in to comment.