Skip to content

Commit

Permalink
Reject duplicate submissions and improve UUID extraction
Browse files Browse the repository at this point in the history
- Add test case for duplicate submission with an attachment
- Improve logic to extract UUID from xml
- Add logic to reject submission without UUID
  • Loading branch information
rajpatel24 committed Sep 3, 2024
1 parent 7630294 commit f3c89f6
Show file tree
Hide file tree
Showing 28 changed files with 215 additions and 49 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ def test_post_submission_authenticated_bad_json(self):
self.assertTrue('error' in rendered_response.data)
self.assertTrue(
rendered_response.data['error'].startswith(
'Received empty submission'
'Instance ID is required'
)
)
self.assertTrue(rendered_response.status_code == 400)
Expand Down Expand Up @@ -513,7 +513,7 @@ def test_post_concurrent_same_submissions(self):
results[result] += 1

assert results[status.HTTP_201_CREATED] == 1
assert results[status.HTTP_409_CONFLICT] == DUPLICATE_SUBMISSIONS_COUNT - 1
assert results[status.HTTP_202_ACCEPTED] == DUPLICATE_SUBMISSIONS_COUNT - 1


def submit_data(identifier, survey_, username_, live_server_url, token_):
Expand Down
9 changes: 5 additions & 4 deletions kobo/apps/openrosa/apps/logger/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,6 @@ class BuildDbQueriesNoConfirmationProvidedError(Exception):
pass


class ConflictingXMLHashInstanceError(Exception):
pass


class DuplicateInstanceError(Exception):
def __init__(self, message='Duplicate Instance'):
super().__init__(message)
Expand All @@ -37,6 +33,11 @@ def __init__(self, message='Could not determine the user'):
super().__init__(message)


class InstanceIdMissingError(Exception):
def __init__(self, message='Could not determine the instance ID'):
super().__init__(message)


class InstanceMultipleNodeError(Exception):
pass

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,7 @@
</kids>
<gps>-1.2627557 36.7926442 0.0 30.0</gps>
<web_browsers>chrome ie</web_browsers>
<meta>
<instanceID>uuid:364f173c688e482486a48661700466gg</instanceID>
</meta>
</new_repeats>
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,7 @@
</kids>
<gps>-1.2627557 36.7926442 0.0 30.0</gps>
<web_browsers>chrome ie</web_browsers>
<meta>
<instanceID>uuid:364f173c688e482486a48661700466gg</instanceID>
</meta>
</new_repeats>
Original file line number Diff line number Diff line change
@@ -1 +1 @@
<?xml version='1.0' ?><survey_names id="survey_names"><start>2012-08-17T11:24:53.254+03</start><name>HD</name><end>2012-08-17T11:24:57.897+03</end></survey_names>
<?xml version='1.0' ?><survey_names id="survey_names"><start>2012-08-17T11:24:53.254+03</start><name>HD</name><end>2012-08-17T11:24:57.897+03</end><meta><instanceID>uuid:729f173c688e482486a48661700455ff</instanceID></meta></survey_names>
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,7 @@
<has_children>0</has_children>
<gps>-1.2836198 36.8795437 0.0 1044.0</gps>
<web_browsers>firefox chrome safari</web_browsers>
<meta>
<instanceID>uuid:364f173c688e482486a48661700466gg</instanceID>
</meta>
</tutorial>
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,7 @@
<has_children>0</has_children>
<gps>-1.2836198 36.8795437 0.0 1044.0</gps>
<web_browsers>firefox chrome safari</web_browsers>
<meta>
<instanceID>uuid:639f173c688e482486a48661700456ty</instanceID>
</meta>
</tutorial>
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
<?xml version='1.0' ?>
<tutorial id="tutorial">
<name>Larry
Again
</name><!-- newline to make sure we can handle it -->
<age>23</age>
<picture>1335783522563.jpg</picture>
<has_children>0</has_children>
<gps>-1.2836198 36.8795437 0.0 1044.0</gps>
<web_browsers>firefox chrome safari</web_browsers>
<meta>
<instanceID>uuid:729f173c688e482486a48661700455ff</instanceID>
</meta>
</tutorial>
Original file line number Diff line number Diff line change
@@ -1 +1 @@
<?xml version='1.0' ?><Water id="Water_2011_03_17"><research_assistant_name>Andrew</research_assistant_name><photo>1300375832136.jpg</photo><location><gps>9.045921564102173 7.526530623435974 572.0 48.0</gps><zone>northwest</zone><state_in_northwest>kebbi</state_in_northwest><lga_in_kebbi>bunza</lga_in_kebbi><ward>R</ward><community>Y</community></location><respondent><name>R</name><position>R</position><phone_number>4</phone_number></respondent><locality>T</locality><nearest_school_or_clinic>R</nearest_school_or_clinic><pre_existing_id>n/a</pre_existing_id><type>borehole</type><distribution_type>single_point</distribution_type><lift_mechanism>diesel</lift_mechanism><developed_by>federal_government</developed_by><managed_by>federal_government state_government</managed_by><managing_organizations><federal_government><name>T</name></federal_government><state_government><name>Y</name></state_government><local_government /><community /><individual /><international_development_partner /><private_organization /></managing_organizations><pay_for_water>no</pay_for_water><used_today>no</used_today><reasons_not_used>no_diesel missing_stolen_parts</reasons_not_used><physical_state>well_maintained</physical_state><other_close_water>yes</other_close_water><times_used>year_round</times_used><help_completed>nobody</help_completed><thank_you /></Water>
<?xml version='1.0' ?><Water id="Water_2011_03_17"><research_assistant_name>Andrew</research_assistant_name><photo>1300375832136.jpg</photo><location><gps>9.045921564102173 7.526530623435974 572.0 48.0</gps><zone>northwest</zone><state_in_northwest>kebbi</state_in_northwest><lga_in_kebbi>bunza</lga_in_kebbi><ward>R</ward><community>Y</community></location><respondent><name>R</name><position>R</position><phone_number>4</phone_number></respondent><locality>T</locality><nearest_school_or_clinic>R</nearest_school_or_clinic><pre_existing_id>n/a</pre_existing_id><type>borehole</type><distribution_type>single_point</distribution_type><lift_mechanism>diesel</lift_mechanism><developed_by>federal_government</developed_by><managed_by>federal_government state_government</managed_by><managing_organizations><federal_government><name>T</name></federal_government><state_government><name>Y</name></state_government><local_government /><community /><individual /><international_development_partner /><private_organization /></managing_organizations><pay_for_water>no</pay_for_water><used_today>no</used_today><reasons_not_used>no_diesel missing_stolen_parts</reasons_not_used><physical_state>well_maintained</physical_state><other_close_water>yes</other_close_water><times_used>year_round</times_used><help_completed>nobody</help_completed><meta><instanceID>uuid:435f173c688e482486a48661700467gh</instanceID></meta><thank_you /></Water>
Binary file not shown.
2 changes: 1 addition & 1 deletion kobo/apps/openrosa/apps/logger/tests/test_backup_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def test_date_created_override(self):
"""
xml_file_path = os.path.join(
settings.OPENROSA_APP_DIR, "apps", "logger", "fixtures",
"tutorial", "instances", "tutorial_2012-06-27_11-27-53.xml")
"tutorial", "instances", "tutorial_2012-06-27_11-27-53_w_uuid.xml")
xml_file = django_file(
xml_file_path, field_name="xml_file", content_type="text/xml")
media_files = []
Expand Down
44 changes: 38 additions & 6 deletions kobo/apps/openrosa/apps/logger/tests/test_form_submission.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

from kobo.apps.openrosa.apps.main.models.user_profile import UserProfile
from kobo.apps.openrosa.apps.main.tests.test_base import TestBase
from kobo.apps.openrosa.apps.logger.models import Instance
from kobo.apps.openrosa.apps.logger.models import Instance, Attachment
from kobo.apps.openrosa.apps.logger.models.instance import InstanceHistory
from kobo.apps.openrosa.apps.logger.xform_instance_parser import clean_and_parse_xml
from kobo.apps.openrosa.apps.viewer.models.parsed_instance import ParsedInstance
Expand All @@ -36,7 +36,7 @@ def test_form_post(self):
"""
xml_submission_file_path = os.path.join(
os.path.dirname(os.path.abspath(__file__)),
"../fixtures/tutorial/instances/tutorial_2012-06-27_11-27-53.xml"
"../fixtures/tutorial/instances/tutorial_2012-06-27_11-27-53_w_uuid.xml"
)

self._make_submission(xml_submission_file_path)
Expand Down Expand Up @@ -114,7 +114,7 @@ def test_submission_to_not_required_auth_as_anonymous_user(self):

xml_submission_file_path = os.path.join(
os.path.dirname(os.path.abspath(__file__)),
"../fixtures/tutorial/instances/tutorial_2012-06-27_11-27-53.xml"
"../fixtures/tutorial/instances/tutorial_2012-06-27_11-27-53_w_uuid.xml"
)

# Anonymous should be able to submit data
Expand Down Expand Up @@ -154,7 +154,7 @@ def test_submission_to_require_auth_with_perm(self):

xml_submission_file_path = os.path.join(
os.path.dirname(os.path.abspath(__file__)),
"../fixtures/tutorial/instances/tutorial_2012-06-27_11-27-53.xml"
"../fixtures/tutorial/instances/tutorial_2012-06-27_11-27-53_w_uuid.xml"
)
self._make_submission(xml_submission_file_path, auth=auth)
self.assertEqual(self.response.status_code, 201)
Expand Down Expand Up @@ -252,6 +252,38 @@ def test_duplicate_submission_with_different_content(self):
another_inst = Instance.objects.order_by('pk').last()
self.assertEqual(inst.xml, another_inst.xml)

def test_duplicate_submission_with_same_content_but_with_attachment(self):
"""
Test that submitting the same XML content twice,
first without and then with an attachment,
results in a single instance with the attachment added.
"""
xml_submission_file_path = os.path.join(
os.path.dirname(__file__),
"../fixtures/tutorial/instances/tutorial_with_attachment",
"tutorial_2012-06-27_11-27-53_w_attachment.xml"
)
media_file_path = os.path.join(
os.path.dirname(__file__),
"../fixtures/tutorial/instances/tutorial_with_attachment",
"1335783522563.jpg"
)
initial_instance_count = Instance.objects.count()

# Test submission with XML file
self._make_submission(xml_submission_file_path)
initial_instance = Instance.objects.last()
self.assertEqual(self.response.status_code, 201)
self.assertEqual(Instance.objects.count(), initial_instance_count + 1)

# Test duplicate submission with attachment
with open(media_file_path, 'rb') as media_file:
self._make_submission(xml_submission_file_path, media_file=media_file)
self.assertEqual(self.response.status_code, 201)
self.assertEqual(Instance.objects.count(), initial_instance_count + 1)
self.assertEqual(Attachment.objects.filter(instance=initial_instance).count(), 1)


def test_owner_can_edit_submissions(self):
xml_submission_file_path = os.path.join(
os.path.dirname(os.path.abspath(__file__)),
Expand Down Expand Up @@ -393,7 +425,7 @@ def test_submission_when_requires_auth(self):

xml_submission_file_path = os.path.join(
os.path.dirname(os.path.abspath(__file__)),
"../fixtures/tutorial/instances/tutorial_2012-06-27_11-27-53.xml"
"../fixtures/tutorial/instances/tutorial_2012-06-27_11-27-53_w_uuid.xml"
)
auth = DigestAuth('alice', 'alice')
self._make_submission(
Expand All @@ -410,7 +442,7 @@ def test_submission_linked_to_reporter(self):

xml_submission_file_path = os.path.join(
os.path.dirname(os.path.abspath(__file__)),
"../fixtures/tutorial/instances/tutorial_2012-06-27_11-27-53.xml"
"../fixtures/tutorial/instances/tutorial_2012-06-27_11-27-53_w_uuid.xml"
)
auth = DigestAuth('alice', 'alice')
self._make_submission(
Expand Down
17 changes: 15 additions & 2 deletions kobo/apps/openrosa/apps/logger/tests/test_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,10 @@ def test_parse_xform_nested_repeats(self):
'has_kids': '1'
},
'web_browsers': 'chrome ie',
'gps': '-1.2627557 36.7926442 0.0 30.0'
'gps': '-1.2627557 36.7926442 0.0 30.0',
'meta': {
'instanceID': 'uuid:364f173c688e482486a48661700466gg'
}
}
}
self.assertEqual(dict_, expected_dict)
Expand All @@ -83,7 +86,8 @@ def test_parse_xform_nested_repeats(self):
'kids/has_kids': '1',
'info/age': '80',
'web_browsers': 'chrome ie',
'info/name': 'Adam'
'info/name': 'Adam',
'meta/instanceID': 'uuid:364f173c688e482486a48661700466gg'
}
self.assertEqual(flat_dict, expected_flat_dict)

Expand Down Expand Up @@ -140,6 +144,15 @@ def test_get_uuid_from_xml(self):
instanceID = get_uuid_from_xml(xml_str)
self.assertEqual(instanceID, "729f173c688e482486a48661700455ff")

# Additional test case for a custom prefixed UUID
submission = """<?xml version="1.0" encoding="UTF-8" ?>
<submission xmlns:orx="http://openrosa.org/xforms">
<meta><instanceID>uuid:getodk.org:123456789</instanceID></meta>
</submission>
"""
custom_instance_id = get_uuid_from_xml(submission)
self.assertEqual(custom_instance_id, "getodk.org:123456789")

def test_get_deprecated_uuid_from_xml(self):
with open(
os.path.join(
Expand Down
17 changes: 12 additions & 5 deletions kobo/apps/openrosa/apps/logger/tests/test_simple_submission.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
# coding: utf-8
import uuid

from django.test import TestCase, RequestFactory
from pyxform import SurveyElementBuilder

Expand Down Expand Up @@ -35,18 +37,23 @@ def _get_xml_for_form(self, xform):
xform.save()

def _submit_at_hour(self, hour):
st_xml = '<?xml version=\'1.0\' ?><start_time id="start_time"><st'\
'art_time>2012-01-11T%d:00:00.000+00</start_time></start'\
'_time>' % hour
st_xml = (
f'<?xml version=\'1.0\' ?><start_time id="start_time">'
f'<start_time>2012-01-11T{hour}:00:00.000+00</start_time>'
f'<meta><instanceID>uuid:918a1889-389f-4427-b48a-0ba16b7c9b{hour}</instanceID></meta>'
f'</start_time>'
)
try:
create_instance(self.user.username, TempFileProxy(st_xml), [])
except DuplicateInstanceError:
pass

def _submit_simple_yes(self):
create_instance(self.user.username, TempFileProxy(
'<?xml version=\'1.0\' ?><yes_or_no id="yes_or_no"><yesno>Yes<'
'/yesno></yes_or_no>'), [])
f'<?xml version=\'1.0\' ?><yes_or_no id="yes_or_no"><yesno>Yes<'
f'/yesno>'
f'<meta><instanceID>uuid:{str(uuid.uuid4())}</instanceID></meta>'
f'</yes_or_no>'), [])

def setUp(self):
self.user = User.objects.create(
Expand Down
22 changes: 13 additions & 9 deletions kobo/apps/openrosa/apps/logger/xform_instance_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,15 +55,19 @@ def get_meta_from_xml(xml_str: str, meta_name: str) -> str:

def get_uuid_from_xml(xml):

def _uuid_only(uuid, regex):
matches = regex.match(uuid)
if matches and len(matches.groups()) > 0:
return matches.groups()[0]
return None
def _uuid_only(uuid):
"""
Strips the 'uuid:' prefix from the provided identifier if it exists.
This preserves any custom ID schemes (e.g., 'getodk.org:123456789')
while ensuring only the 'uuid:' prefix is removed. This approach
adheres to the OpenRosa spec, allowing custom prefixes to be stored
intact in the database to prevent potential ID collisions.
"""
return re.sub(r'^uuid:', '', uuid)

uuid = get_meta_from_xml(xml, "instanceID")
regex = re.compile(r"uuid:(.*)")
if uuid:
return _uuid_only(uuid, regex)
return _uuid_only(uuid)
# check in survey_node attributes
xml = clean_and_parse_xml(xml)
children = xml.childNodes
Expand All @@ -74,12 +78,12 @@ def _uuid_only(uuid, regex):
survey_node = children[0]
uuid = survey_node.getAttribute('instanceID')
if uuid != '':
return _uuid_only(uuid, regex)
return _uuid_only(uuid)
return None


def get_root_uuid_from_xml(xml):
root_uuid = get_meta_from_xml(xml, "rootUuid")
root_uuid = get_meta_from_xml(xml, 'rootUuid')
if root_uuid:
return root_uuid

Expand Down
Original file line number Diff line number Diff line change
@@ -1 +1 @@
<?xml version='1.0' ?><gps id="gps"><location>40.81101715564728 -73.96446704864502 -152.0 16.0</location></gps>
<?xml version='1.0' ?><gps id='gps'><location>40.81101715564728 -73.96446704864502 -152.0 16.0</location><meta><instanceID>uuid:729f173c688e482486a48661700455ff</instanceID></meta></gps>
Original file line number Diff line number Diff line change
@@ -1 +1 @@
<?xml version='1.0' ?><gps id="gps"><location>40.811086893081665 -73.96449387073517 -113.0 16.0</location></gps>
<?xml version='1.0' ?><gps id='gps'><location>40.811086893081665 -73.96449387073517 -113.0 16.0</location><meta><instanceID>uuid:435f173c688e482486a48661700467gh</instanceID></meta></gps>
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,7 @@
<gps>-1.2627107 36.7925771 0.0 37.5</gps>
</gps_group>
<web_browsers>chrome safari</web_browsers>
<meta>
<instanceID>uuid:435f173c688e482486a486617004534df</instanceID>
</meta>
</grouped_gps>
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,7 @@
</kids>
<gps>-1.2627557 36.7926442 0.0 30.0</gps>
<web_browsers>chrome ie</web_browsers>
<meta>
<instanceID>uuid:435f173c688e482463a486617004534df</instanceID>
</meta>
</new_repeats>
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,7 @@
</kids>
<gps>-1.2627557 36.7926442 0.0 30.0</gps>
<web_browsers>chrome ie firefox</web_browsers>
<meta>
<instanceID>uuid:32sd3c688e482486a48661700455ff</instanceID>
</meta>
</new_repeats>
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
<?xml version='1.0' ?>
<new_repeats id="new_repeats">
<info>
<name>Adam</name>
<age>80</age>
</info>
<kids>
<has_kids>1</has_kids>
<kids_details>
<kids_name>Liam</kids_name>
<kids_age>40</kids_age>
</kids_details>
<kids_details>
<kids_name>Emma</kids_name>
<kids_age>70</kids_age>
</kids_details>
</kids>
<gps>-1.2627557 36.7926442 0.0 30.0</gps>
<web_browsers>chrome</web_browsers>
</new_repeats>
Loading

0 comments on commit f3c89f6

Please sign in to comment.