Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add fuzzy-date function to case search #35065

Merged
merged 8 commits into from
Sep 12, 2024
15 changes: 15 additions & 0 deletions corehq/apps/case_search/tests/test_filter_dsl.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,21 @@ def test_fuzzy_match(self):
case_property_query("name", "jimmy", fuzzy=True)
)

def test_fuzzy_date(self):
self._test_xpath_query(
"fuzzy-date(dob, '2024-12-03')",
case_property_query("dob", [
"2024-12-03",
"2024-03-12",
"2024-03-21",
"2024-12-30",
"2042-12-03",
"2042-03-12",
"2042-03-21",
"2042-12-30"
], boost_first=True)
)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Know you're still working on this. Have you thought about adding a test that validates the output of the query is as expected?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@AddisonDunn is there a test the run ES queries against test data already? I am having trouble finding any.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The tests in test_case_search_filters test that a query in a case search function returns the cases expected.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@AddisonDunn pushed the test


def _test_xpath_query(self, query_string, expected_filter):
helper = QueryHelper("domain")
helper.config = CaseSearchConfig(domain="domain")
Expand Down
2 changes: 2 additions & 0 deletions corehq/apps/case_search/xpath_functions/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from .query_functions import (
fuzzy_date,
fuzzy_match,
not_,
selected_all,
Expand Down Expand Up @@ -30,6 +31,7 @@
'selected-any': selected_any,
'selected-all': selected_all,
'within-distance': within_distance,
'fuzzy-date': fuzzy_date,
'fuzzy-match': fuzzy_match,
'phonetic-match': phonetic_match,
'starts-with': starts_with,
Expand Down
52 changes: 52 additions & 0 deletions corehq/apps/case_search/xpath_functions/query_functions.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from datetime import datetime

from django.utils.translation import gettext as _

from eulxml.xpath import serialize
Expand Down Expand Up @@ -95,6 +97,56 @@ def fuzzy_match(node, context):
return case_property_query(property_name, value, fuzzy=True)


def fuzzy_date(node, context):
"""fuzzy-match(dob, '2024-12-03')"""
confirm_args_count(node, 2)
property_name = _property_name_to_string(node.args[0], node)
value = unwrap_value(node.args[1], context)

if not validate_date(value):
raise XPathFunctionException(
_(f"'{value}' is not a valid date. Expected 'YYYY-MM-DD"),
serialize(node)
)

return case_property_query(property_name, date_permutations(value), boost_first=True)


def date_permutations(date_str):
MartinRiese marked this conversation as resolved.
Show resolved Hide resolved
[year, month, day] = date_str.split('-')
reverse_decade = year[:2] + year[3] + year[2]
reverse_month = month[::-1]
reverse_day = day[::-1]
permutations = [
date_str,
f"{year}-{day}-{month}",
f"{year}-{reverse_month}-{day}",
f"{year}-{day}-{reverse_month}",
f"{year}-{month}-{reverse_day}",
f"{year}-{reverse_day}-{month}",
f"{year}-{reverse_month}-{reverse_day}",
f"{year}-{reverse_day}-{reverse_month}",
f"{reverse_decade}-{month}-{day}",
f"{reverse_decade}-{day}-{month}",
f"{reverse_decade}-{reverse_month}-{day}",
f"{reverse_decade}-{day}-{reverse_month}",
f"{reverse_decade}-{month}-{reverse_day}",
f"{reverse_decade}-{reverse_day}-{month}",
f"{reverse_decade}-{reverse_month}-{reverse_day}",
f"{reverse_decade}-{reverse_day}-{reverse_month}"
]
MartinRiese marked this conversation as resolved.
Show resolved Hide resolved
valid_permutations = [p for p in permutations if validate_date(p)]
return valid_permutations


def validate_date(date_text):
try:
datetime.strptime(date_text, '%Y-%m-%d') # change the date format if needed
return True
except ValueError:
return False


def _property_name_to_string(value, node):
if isinstance(value, Step):
return serialize(value)
Expand Down
11 changes: 10 additions & 1 deletion corehq/apps/es/case_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ def multiplex_to_adapter(domain):
return None


def case_property_query(case_property_name, value, fuzzy=False, multivalue_mode=None):
def case_property_query(case_property_name, value, fuzzy=False, multivalue_mode=None, boost_first=False):
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@esoergel I am feeling a bit iffy about adding yet another flag here. I see two alternatives:

  1. just create a separate function case_property_query_boost_first and call it from fuzzy_date
  2. use _base_property_query directly in fuzzy_date probably renaming it without the underscore in the process.
    I am curious, if you have a preference?

"""
Search for all cases where case property with name `case_property_name`` has text value `value`
"""
Expand All @@ -254,6 +254,15 @@ def case_property_query(case_property_name, value, fuzzy=False, multivalue_mode=
queries.match(value, PROPERTY_VALUE, operator=multivalue_mode)
),
)
if boost_first:
return _base_property_query(
case_property_name,
filters.OR(
filters.term(PROPERTY_VALUE, value),
queries.match(value[0], PROPERTY_VALUE)
)

)
if not fuzzy and multivalue_mode in ['or', 'and']:
return case_property_text_query(case_property_name, value, operator=multivalue_mode)
return exact_case_property_text_query(case_property_name, value)
Expand Down
23 changes: 19 additions & 4 deletions corehq/apps/es/tests/test_case_search_es.py
Original file line number Diff line number Diff line change
Expand Up @@ -391,10 +391,11 @@ def _bootstrap_cases_in_es_for_domain(self, domain, input_cases):

def _assert_query_runs_correctly(self, domain, input_cases, query, xpath_query, output):
self._bootstrap_cases_in_es_for_domain(domain, input_cases)
self.assertItemsEqual(
query.get_ids(),
output
)
if query:
self.assertItemsEqual(
query.get_ids(),
output
)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think other tests put the xpath query as part of a call to get_case_search_query under the query. Kind of silly though IMO I think it basically does the same thing

if xpath_query:
self.assertItemsEqual(
CaseSearchES().xpath_query(self.domain, xpath_query).get_ids(),
Expand Down Expand Up @@ -435,6 +436,20 @@ def test_fuzzy_case_property_query(self):
['c3']
)

def test_fuzzy_date(self):
self._assert_query_runs_correctly(
self.domain,
[
{'_id': 'c1', 'dob': date(2020, 3, 1)},
{'_id': 'c2', 'dob': date(2020, 1, 3)},
{'_id': 'c3', 'dob': date(2002, 3, 1)},
{'_id': 'c4', 'dob': date(2020, 3, 4)},
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice

],
None,
"fuzzy-date(dob, '2020-03-01')",
['c1', 'c2', 'c3']
)

def test_multiple_case_search_queries(self):
query = (CaseSearchES().domain(self.domain)
.case_property_query("foo", "redbeard")
Expand Down
13 changes: 13 additions & 0 deletions docs/case_search_query_language.rst
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,19 @@ The following functions are supported:
.. _Fuzzy Query: https://www.elastic.co/guide/en/elasticsearch/reference/8.11/query-dsl-fuzzy-query.html
.. _Levenshtein distance: https://en.wikipedia.org/wiki/Levenshtein_distance

``fuzzy-date``
---------------
* **Behavior**: Determines if a given date is a fuzzy match for a given case property.
* **Return**: True if that particular date or any of the generated permutations matches the case property.
Otherwise False.
* **Arguments**: Two arguments: the case property and the date to check.
* **Usage**: ``fuzzy-date(dob, "2012-12-03")``

.. note::
``fuzzy-date`` generates a list of dates that might be the result of a typo in the date like switching
day and month field or reversing the digits in either day, month or the decade part of the year. Only
combinations of these that are valid dates will be check against.

``phonetic-match``
------------------
* **Behavior**: Match cases if a given value "sounds like" (using `Soundex`_) the value of a given
Expand Down
Loading