Skip to content

Commit

Permalink
Merge pull request #905 from kobotoolbox/hot-fix-background-audio-aud…
Browse files Browse the repository at this point in the history
…it-files-wrongly-soft-deleted

Fix missing background audio recordings and audit logs when exporting or viewing submitted data
  • Loading branch information
jnm authored Nov 13, 2023
2 parents f2aeb56 + a9f5386 commit 72442cb
Show file tree
Hide file tree
Showing 2 changed files with 126 additions and 3 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
from __future__ import annotations

from django.core.management.base import BaseCommand
from django.core.management import call_command
from django.db.models import F, Q

from onadata.apps.logger.models import (
Attachment,
Instance,
XForm,
)
from onadata.apps.main.models import UserProfile


class Command(BaseCommand):

help = (
'Undelete background audio files and audit logs previously soft-deleted'
' by a bug introduced in release 2.023.37c'
)

def add_arguments(self, parser):
parser.add_argument(
'--chunks',
type=int,
default=2000,
help='Number of records to process per query'
)

parser.add_argument(
'--force',
action='store_true',
default=False,
help='Run the management command even if no attachments are affected'
)

def handle(self, *args, **kwargs):
chunks = kwargs['chunks']
verbosity = kwargs['verbosity']
force = kwargs['force']

self.stdout.write(
'⚠ Warning! This management can take a while (i.e. several hours) '
'to run on big databases'
)

queryset = Attachment.all_objects.filter(
Q(media_file_basename='audit.csv')
| Q(media_file_basename__regex=r'^\d{10,}\.(m4a|amr)$'),
deleted_at__isnull=False,
)

if not queryset.exists() and not force:
self.stdout.write(
'No background recording or audit logs seem to be affected'
)
return

att_queryset = Attachment.all_objects.filter(
Q(media_file_basename='audit.csv')
| Q(media_file_basename__regex=r'^\d{10,}\.(m4a|amr)$')
)
if not force:
att_queryset = att_queryset.filter(deleted_at__isnull=False)

instance_ids = list(
att_queryset.values_list('instance_id', flat=True).distinct()
)

if verbosity > 1:
instances_count = len(instance_ids)
self.stdout.write(f'Instances to process: {instances_count}…')

cpt = 1

instances = Instance.objects.filter(pk__in=instance_ids).order_by('id')
for instance in instances.iterator(chunk_size=chunks):
message = '' if verbosity <= 1 else f' - {cpt}/{instances_count}'
if verbosity:
self.stdout.write(
f'Processing instance #{instance.pk}{message}…'
)
Attachment.all_objects.filter(
Q(media_file_basename='audit.csv')
| Q(media_file_basename__regex=r'^\d+\.(m4a|amr)$'),
instance_id=instance.pk,
).update(deleted_at=None)
try:
instance.parsed_instance.update_mongo()
except Instance.parsed_instance.RelatedObjectDoesNotExist:
pass
cpt += 1

if verbosity:
self.stdout.write(
f'Updating storage counters…'
)
# Attachment storage counters need to be updated.
xform_ids = (
Instance.objects.filter(pk__in=instance_ids)
.values_list('xform_id', flat=True)
.distinct()
)

# Update related profile counters with a wrong value to let
# the management command `update_attachment_storage_byte` find them
# when calling with `--sync` option.
UserProfile.objects.filter(
user_id__in=XForm.objects.filter(
pk__in=list(xform_ids)
).values_list('user_id', flat=True)
).update(attachment_storage_bytes=F('attachment_storage_bytes') - 1)

call_command(
'update_attachment_storage_bytes', verbosity=verbosity, sync=True
)

self.stdout.write('Done!')
11 changes: 8 additions & 3 deletions onadata/libs/utils/logger_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -733,9 +733,14 @@ def get_soft_deleted_attachments(instance: Instance) -> list[Attachment]:

# Update Attachment objects to hide them if they are not used anymore.
# We do not want to delete them until the instance itself is deleted.
queryset = Attachment.objects.filter(
instance=instance
).exclude(media_file_basename__in=basenames)

# FIXME Temporary hack to leave background-audio files and audit files alone
# Bug comes from `get_xform_media_question_xpaths()`
queryset = Attachment.objects.filter(instance=instance).exclude(
Q(media_file_basename__in=basenames)
| Q(media_file_basename='audit.csv')
| Q(media_file_basename__regex=r'^\d{10,}\.(m4a|amr)$')
)
soft_deleted_attachments = list(queryset.all())
queryset.update(deleted_at=timezone.now())

Expand Down

0 comments on commit 72442cb

Please sign in to comment.