From a4dfe912a6a1d9d273ec31d7ae06456e071dd258 Mon Sep 17 00:00:00 2001 From: "John N. Milner" Date: Mon, 17 Jun 2024 01:31:03 -0400 Subject: [PATCH] Repair damage from kobotoolbox/formpack#322 --- kobo/apps/reports/report_data.py | 5 +++ kpi/models/asset_version.py | 2 + kpi/serializers/v2/asset.py | 1 + kpi/utils/bugfix.py | 67 ++++++++++++++++++++++++++++++++ kpi/views/v2/asset.py | 13 ++++++- 5 files changed, 87 insertions(+), 1 deletion(-) create mode 100644 kpi/utils/bugfix.py diff --git a/kobo/apps/reports/report_data.py b/kobo/apps/reports/report_data.py index 0661ec3324..cb90a15411 100644 --- a/kobo/apps/reports/report_data.py +++ b/kobo/apps/reports/report_data.py @@ -6,6 +6,7 @@ from rest_framework import serializers from formpack import FormPack +from kpi.utils.bugfix import repair_file_column_content_and_save from kpi.utils.log import logging from .constants import ( FUZZY_VERSION_ID_KEY, @@ -21,6 +22,10 @@ def build_formpack(asset, submission_stream=None, use_all_form_versions=True): are assumed to have been collected with that version of the form. """ + # Cope with kobotoolbox/formpack#322, which wrote invalid content into the + # database + repair_file_column_content_and_save(asset) + if asset.has_deployment: if use_all_form_versions: _versions = asset.deployed_versions diff --git a/kpi/models/asset_version.py b/kpi/models/asset_version.py index eeea8423ad..0bddf9e528 100644 --- a/kpi/models/asset_version.py +++ b/kpi/models/asset_version.py @@ -29,6 +29,8 @@ class AssetVersion(models.Model): ) version_content = models.JSONField() uid_aliases = models.JSONField(null=True) + # Tee hee, `deployed_content` is never written to the database! + # TODO: It should be changed to a property instead, no? deployed_content = models.JSONField(null=True) _deployment_data = models.JSONField(default=dict) deployed = models.BooleanField(default=False) diff --git a/kpi/serializers/v2/asset.py b/kpi/serializers/v2/asset.py index ebe0b9576d..96e696a780 100644 --- a/kpi/serializers/v2/asset.py +++ b/kpi/serializers/v2/asset.py @@ -893,6 +893,7 @@ def validate_settings(self, settings: dict) -> dict: return {**self.instance.settings, **settings} def _content(self, obj): + # FIXME: Is this dead code? return json.dumps(obj.content) def _get_status(self, perm_assignments): diff --git a/kpi/utils/bugfix.py b/kpi/utils/bugfix.py new file mode 100644 index 0000000000..96f0e5be93 --- /dev/null +++ b/kpi/utils/bugfix.py @@ -0,0 +1,67 @@ +# coding: utf-8 + +import datetime + +from django.db import transaction +from formpack.utils.bugfix import repair_file_column_content_in_place + +from kpi.models import Asset, AssetVersion + + +def repair_file_column_content_and_save(asset, include_versions=True) -> bool: + """ + Given an `Asset`, repair any damage caused to its `content` and the + `version_content` of all related `AssetVersion`s by + kobotoolbox/formpack#322, which wrongly transformed the `file` column into + `media::file` and included it in the list of translated columns. + + Writes only to the `content` (or `version_content`) field for each modified + model instance. + + Returns `True` if any change was made + """ + + # When was this bug introduced? See formpack commit + # 443a8e940756976a9f88fb577dbbc53510726536 + BAD_TIME = datetime.datetime( + 2024, 4, 30, 15, 27, 2, tzinfo=datetime.timezone.utc + ) + + any_change = False + with transaction.atomic(): + # Lock and refetch the latest content to guard against clobbering + # concurrent updates + content = ( + Asset.objects.filter(pk=asset.pk, date_modified__gte=BAD_TIME) + .select_for_update() + .values('content') + .first() + ) + if not content: + # This asset was not modified recently enough to be affected by + # this bug + return False + + if repair_file_column_content_in_place(content): + Asset.objects.filter(pk=asset.pk).update(content=content) + any_change = True + # Also update the in-memory asset instance passed to this function + asset.content = content + + if not include_versions: + return any_change + + # Previous versions of the content may need repair, regardless of + # whether or not the current content did + for pk, version_content in ( + asset.asset_versions.filter(date_modified__gte=BAD_TIME) + .select_for_update() + .values_list('pk', 'version_content') + ): + if repair_file_column_content_in_place(version_content): + AssetVersion.objects.filter(pk=pk).update( + version_content=version_content + ) + any_change = True + + return any_change diff --git a/kpi/views/v2/asset.py b/kpi/views/v2/asset.py index 36845e5401..1e70df08ca 100644 --- a/kpi/views/v2/asset.py +++ b/kpi/views/v2/asset.py @@ -55,8 +55,9 @@ AssetListSerializer, AssetSerializer, ) -from kpi.utils.hash import calculate_hash from kpi.serializers.v2.reports import ReportsDetailSerializer +from kpi.utils.bugfix import repair_file_column_content_and_save +from kpi.utils.hash import calculate_hash from kpi.utils.kobo_to_xlsform import to_xlsform_structure from kpi.utils.ss_structure_to_mdtable import ss_structure_to_mdtable from kpi.utils.object_permission import ( @@ -403,6 +404,16 @@ def get_object(self): raise Http404 self.check_object_permissions(self.request, asset) + + # Cope with kobotoolbox/formpack#322, which wrote invalid content + # into the database. For performance, consider only the current + # content, not previous versions. Previous versions are handled in + # `kobo.apps.reports.report_data.build_formpack()` + if self.request.method == 'GET': + repair_file_column_content_and_save( + asset, include_versions=False + ) + return asset return super().get_object()