Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Always remove deleted submissions entirely instead of merely marking them as deleted #732

Merged
merged 5 commits into from
Jun 15, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions onadata/apps/api/viewsets/data_viewset.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,6 @@ class DataViewSet(AnonymousUserPublicFormsMixin, ModelViewSet):
> [
> {
> "_id": 4503,
> "_deleted_at": null,
> "expense_type": "service",
> "_xform_id_string": "exp",
> "_geolocation": [
Expand Down Expand Up @@ -157,7 +156,6 @@ class DataViewSet(AnonymousUserPublicFormsMixin, ModelViewSet):
>
> {
> "_id": 4503,
> "_deleted_at": null,
> "expense_type": "service",
> "_xform_id_string": "exp",
> "_geolocation": [
Expand Down Expand Up @@ -213,7 +211,6 @@ class DataViewSet(AnonymousUserPublicFormsMixin, ModelViewSet):
> [
> {
> "_id": 4503,
> "_deleted_at": null,
> "expense_type": "service",
> "_xform_id_string": "exp",
> "_geolocation": [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,19 +39,9 @@ def add_arguments(self, parser):
help="Specify a XForm's `id_string` to clean up only this form",
)

parser.add_argument(
"--purge",
action='store_true',
default=False,
help="Erase duplicate `Instance`s from the database entirely instead "
"of marking them as deleted using the `deleted_at` attribute. "
"Default is False",
)

def handle(self, *args, **options):
username = options['user']
xform_id_string = options['xform']
purge = options['purge']

# Retrieve all instances with the same `uuid`.
query = Instance.objects
Expand All @@ -61,13 +51,6 @@ def handle(self, *args, **options):
if username:
query = query.filter(xform__user__username=username)

# if we don't purge, we don't want to see instances
# that have been marked as deleted. However, if we do purge
# we do need these instances to be in the list in order
# to delete them permanently
if not purge:
query = query.filter(deleted_at=None)

query = query.values_list('uuid', flat=True)\
.annotate(count_uuid=Count('uuid'))\
.filter(count_uuid__gt=1)\
Expand All @@ -77,13 +60,6 @@ def handle(self, *args, **options):

duplicated_query = Instance.objects.filter(uuid=uuid)

# if we don't purge, we don't want to see instances
# that have been marked as deleted. However, if we do purge
# we do need these instances to be in the list in order
# to delete them permanently
if not purge:
duplicated_query = duplicated_query.filter(deleted_at=None)

instances_with_same_uuid = duplicated_query.values_list('id',
'xml_hash')\
.order_by('xml_hash', 'date_created')
Expand All @@ -97,8 +73,7 @@ def handle(self, *args, **options):

if instance_xml_hash != xml_hash_ref:
self.__clean_up(instance_id_ref,
duplicated_instance_ids,
purge)
duplicated_instance_ids)
xml_hash_ref = instance_xml_hash
instance_id_ref = instance_id
duplicated_instance_ids = []
Expand All @@ -107,14 +82,10 @@ def handle(self, *args, **options):
duplicated_instance_ids.append(instance_id)

self.__clean_up(instance_id_ref,
duplicated_instance_ids,
purge)
duplicated_instance_ids)

if not self.__vaccuum:
if purge:
self.stdout.write('No instances have been purged.')
else:
self.stdout.write('No instances have been marked as deleted.')
self.stdout.write('No instances have been purged.')
else:
# Update number of submissions for each user.
for user_ in list(self.__users):
Expand All @@ -128,7 +99,7 @@ def handle(self, *args, **options):
self.stdout.write(
'\t\tDone! New number: {}'.format(result['count']))

def __clean_up(self, instance_id_ref, duplicated_instance_ids, purge):
def __clean_up(self, instance_id_ref, duplicated_instance_ids):
if instance_id_ref is not None and len(duplicated_instance_ids) > 0:
self.__vaccuum = True
with transaction.atomic():
Expand All @@ -144,30 +115,15 @@ def __clean_up(self, instance_id_ref, duplicated_instance_ids, purge):
.get(id=instance_id_ref)
main_instance.parsed_instance.save()

if purge:
self.stdout.write('\tPurging instances: {}'.format(
duplicated_instance_ids))
Instance.objects.select_for_update()\
.filter(id__in=duplicated_instance_ids).delete()
ParsedInstance.objects.select_for_update()\
.filter(instance_id__in=duplicated_instance_ids).delete()
settings.MONGO_DB.instances.remove(
{'_id': {'$in': duplicated_instance_ids}}
)
else:
self.stdout.write('\tMarking instances as deleted: {}'.format(
duplicated_instance_ids))
# We could loop through instances and use `Instance.set_deleted()`
# but it would be way slower.
Instance.objects.select_for_update()\
.filter(id__in=duplicated_instance_ids)\
.update(deleted_at=timezone.now())
settings.MONGO_DB.instances.update_many(
{'_id': {'$in': duplicated_instance_ids}},
{'$set': {
'_deleted_at': timezone.now().strftime(MONGO_STRFTIME)
}}
)
self.stdout.write('\tPurging instances: {}'.format(
duplicated_instance_ids))
Instance.objects.select_for_update()\
.filter(id__in=duplicated_instance_ids).delete()
ParsedInstance.objects.select_for_update()\
.filter(instance_id__in=duplicated_instance_ids).delete()
settings.MONGO_DB.instances.remove(
{'_id': {'$in': duplicated_instance_ids}}
)
# Update number of submissions
xform = main_instance.xform
self.stdout.write(
Expand Down

This file was deleted.

48 changes: 27 additions & 21 deletions onadata/apps/logger/models/instance.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
clean_and_parse_xml, get_uuid_from_xml
from onadata.libs.utils.common_tags import (
ATTACHMENTS,
DELETEDAT,
GEOLOCATION,
ID,
MONGO_STRFTIME,
Expand Down Expand Up @@ -91,6 +90,28 @@ def update_xform_submission_count(sender, instance, created, **kwargs):
)


def nullify_exports_time_of_last_submission(sender, instance, **kwargs):
"""
Formerly, "deleting" a submission would set a flag on the `Instance`,
causing the `date_modified` attribute to be set to the current timestamp.
`Export.exports_outdated()` relied on this to detect when a new `Export`
needed to be generated due to submission deletion, but now that we always
delete `Instance`s outright, this trick doesn't work. This kludge simply
makes every `Export` for a form appear stale by nulling out its
`time_of_last_submission` attribute.
"""
# Avoid circular import
try:
export_model = instance.xform.export_set.model
except XForm.DoesNotExist:
return
f = instance.xform.export_set.filter(
# Match the statuses considered by `Export.exports_outdated()`
internal_status__in=[export_model.SUCCESSFUL, export_model.PENDING],
)
f.update(time_of_last_submission=None)


def update_user_submissions_counter(sender, instance, created, **kwargs):
if not created:
return
Expand Down Expand Up @@ -157,7 +178,8 @@ class Instance(models.Model):
# this will end up representing "date last parsed"
date_modified = models.DateTimeField(auto_now=True)

# this will end up representing "date instance was deleted"
# this formerly represented "date instance was deleted".
# do not use it anymore.
deleted_at = models.DateTimeField(null=True, default=None)

# ODK keeps track of three statuses for an instance:
Expand Down Expand Up @@ -195,15 +217,6 @@ def asset(self):
"""
return self.xform

@classmethod
def set_deleted_at(cls, instance_id, deleted_at=timezone.now()):
try:
instance = cls.objects.get(id=instance_id)
except cls.DoesNotExist:
pass
else:
instance.set_deleted(deleted_at)

def _check_active(self, force):
"""Check that form is active and raise exception if not.

Expand Down Expand Up @@ -359,9 +372,6 @@ def get_full_dict(self):
NOTES: self.get_notes()
}

if isinstance(self.instance.deleted_at, datetime):
data[DELETEDAT] = self.deleted_at.strftime(MONGO_STRFTIME)

d.update(data)

return d
Expand Down Expand Up @@ -413,13 +423,6 @@ def save(self, *args, **kwargs):

super().save(*args, **kwargs)

def set_deleted(self, deleted_at=timezone.now()):
self.deleted_at = deleted_at
self.save()
# force submission count re-calculation
self.xform.submission_count(force_update=True)
self.parsed_instance.save()

def get_validation_status(self):
"""
Returns instance validation status.
Expand All @@ -436,6 +439,9 @@ def get_validation_status(self):
post_save.connect(update_xform_submission_count, sender=Instance,
dispatch_uid='update_xform_submission_count')

post_delete.connect(nullify_exports_time_of_last_submission, sender=Instance,
dispatch_uid='nullify_exports_time_of_last_submission')

post_save.connect(update_user_submissions_counter, sender=Instance,
dispatch_uid='update_user_submissions_counter')

Expand Down
8 changes: 3 additions & 5 deletions onadata/apps/logger/models/xform.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,22 +193,20 @@ def __str__(self):

def submission_count(self, force_update=False):
if self.num_of_submissions == 0 or force_update:
count = self.instances.filter(deleted_at__isnull=True).count()
count = self.instances.count()
self.num_of_submissions = count
self.save(update_fields=['num_of_submissions'])
return self.num_of_submissions
submission_count.short_description = ugettext_lazy("Submission Count")

def geocoded_submission_count(self):
"""Number of geocoded submissions."""
return self.instances.filter(deleted_at__isnull=True,
geom__isnull=False).count()
return self.instances.filter(geom__isnull=False).count()

def time_of_last_submission(self):
if self.last_submission_time is None and self.num_of_submissions > 0:
try:
last_submission = self.instances.\
filter(deleted_at__isnull=True).latest("date_created")
last_submission = self.instances.latest("date_created")
except ObjectDoesNotExist:
pass
else:
Expand Down
16 changes: 0 additions & 16 deletions onadata/apps/logger/tests/models/test_xform.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,22 +8,6 @@


class TestXForm(TestBase):
def test_submission_count_filters_deleted(self):
self._publish_transportation_form_and_submit_instance()

# update the xform object the num_submissions seems to be cached in
# the in-memory xform object as zero
self.xform = XForm.objects.get(pk=self.xform.id)
self.assertEqual(self.xform.submission_count(), 1)
instance = Instance.objects.get(xform=self.xform)
instance.set_deleted()
self.assertIsNotNone(instance.deleted_at)

# update the xform object, the num_submissions seems to be cached in
# the in-memory xform object as one
self.xform = XForm.objects.get(pk=self.xform.id)
self.assertEqual(self.xform.submission_count(), 0)

def test_set_title_in_xml_unicode_error(self):
xls_file_path = os.path.join(
os.path.dirname(os.path.abspath(__file__)),
Expand Down
Loading