Skip to content
This repository has been archived by the owner on Sep 11, 2024. It is now read-only.

Commit

Permalink
Add more granular total hit counts
Browse files Browse the repository at this point in the history
The top level "totalCount", (which we call "total_hits" in the search
response), includes all passages found, adding a count at different levels
means we can surface the total number of each item seperately, unblocking
vespa based pagination
  • Loading branch information
olaughter committed Mar 12, 2024
1 parent bd56e3d commit f18a10d
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 1 deletion.
2 changes: 2 additions & 0 deletions src/cpr_data_access/models/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,12 +257,14 @@ class Family(BaseModel):

id: str
hits: Sequence[Hit]
total_passage_hits: int = 0


class SearchResponse(BaseModel):
"""Relevant results, and search response metadata"""

total_hits: int
total_family_hits: int = 0
query_time_ms: Optional[int] = None
total_time_ms: Optional[int] = None
families: Sequence[Family]
Expand Down
11 changes: 10 additions & 1 deletion src/cpr_data_access/vespa.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,10 +122,17 @@ def parse_vespa_response(
response_families = dig(root, "children", 0, "children", 0, "children", default=[])

for family in response_families:
total_passage_hits = dig(family, "fields", "count()")
family_hits: List[Hit] = []
for hit in dig(family, "children", 0, "children", default=[]):
family_hits.append(Hit.from_vespa_response(response_hit=hit))
families.append(Family(id=family["value"], hits=family_hits))
families.append(
Family(
id=family["value"],
hits=family_hits,
total_passage_hits=total_passage_hits,
)
)

# For now, we can't sort our results natively in vespa because sort orders are
# applied _before_ grouping. We're sorting here instead.
Expand All @@ -138,8 +145,10 @@ def parse_vespa_response(

continuation = dig(root, "children", 0, "continuation", "next", default=None)
total_hits = dig(root, "fields", "totalCount", default=0)
total_family_hits = dig(root, "children", 0, "fields", "count()", default=0)
return SearchResponse(
total_hits=total_hits,
total_family_hits=total_family_hits,
families=families,
continuation_token=continuation,
query_time_ms=None,
Expand Down
2 changes: 2 additions & 0 deletions src/cpr_data_access/yql_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,10 @@ class YQLBuilder:
$CONTINUATION
all(
group(family_import_id)
output(count())
max($LIMIT)
each(
output(count())
max($MAX_HITS_PER_FAMILY)
each(
output(
Expand Down

0 comments on commit f18a10d

Please sign in to comment.