From c49c7148fe0a216bf981221809b30b60a18d3f9b Mon Sep 17 00:00:00 2001 From: James Kiger Date: Tue, 30 Apr 2024 14:57:36 -0400 Subject: [PATCH 1/3] Add revision cleanup task --- onadata/apps/logger/maintenance_tasks.py | 17 ++++++ onadata/apps/logger/tasks.py | 11 ++++ .../apps/logger/tests/models/test_instance.py | 54 +++++++++++++++---- onadata/settings/base.py | 1 + 4 files changed, 74 insertions(+), 9 deletions(-) create mode 100644 onadata/apps/logger/maintenance_tasks.py diff --git a/onadata/apps/logger/maintenance_tasks.py b/onadata/apps/logger/maintenance_tasks.py new file mode 100644 index 000000000..bce591352 --- /dev/null +++ b/onadata/apps/logger/maintenance_tasks.py @@ -0,0 +1,17 @@ +from datetime import timedelta + +# from django.db.models import QuerySet +from django.conf import settings +from django.utils import timezone +from reversion.models import Revision + + +def remove_old_revisions(): + days = settings.KOBOCAT_REVERSION_RETENTION_DAYS + delete_queryset = Revision.objects.filter( + date_created__lt=timezone.now() - timedelta(days=days), + ) + while True: + count, _ = delete_queryset.filter(pk__in=delete_queryset[:1000]).delete() + if not count: + break diff --git a/onadata/apps/logger/tasks.py b/onadata/apps/logger/tasks.py index 0975daa5f..c22e1ca31 100644 --- a/onadata/apps/logger/tasks.py +++ b/onadata/apps/logger/tasks.py @@ -15,6 +15,7 @@ from django.utils import timezone from onadata.celery import app +from .maintenance_tasks import remove_old_revisions from .models.daily_xform_submission_counter import DailyXFormSubmissionCounter from .models import Instance, XForm @@ -122,3 +123,13 @@ def list_created_by_month(model, date_field): @app.task() def sync_storage_counters(): call_command('update_attachment_storage_bytes', verbosity=3, sync=True) + + +LIMIT_HOURS_23 = 82800 + +@app.task(time_limit=LIMIT_HOURS_23, soft_time_limit=LIMIT_HOURS_23) +def perform_maintenance(): + """ + Run daily maintenance tasks + """ + remove_old_revisions() diff --git a/onadata/apps/logger/tests/models/test_instance.py b/onadata/apps/logger/tests/models/test_instance.py index 66eda9d37..6d90802ad 100644 --- a/onadata/apps/logger/tests/models/test_instance.py +++ b/onadata/apps/logger/tests/models/test_instance.py @@ -1,15 +1,18 @@ # coding: utf-8 import os -import reversion -from datetime import datetime, timedelta +from datetime import timedelta from dateutil import parser -from django.utils.timezone import utc +from django.utils import timezone +from django.test import override_settings from django_digest.test import DigestAuth from mock import patch +from reversion import create_revision, is_registered, set_date_created +from reversion.models import Revision from onadata.apps.main.tests.test_base import TestBase from onadata.apps.logger.models import XForm, Instance +from onadata.apps.logger.maintenance_tasks import remove_old_revisions from onadata.apps.logger.models.instance import get_id_string_from_xml_str from onadata.apps.viewer.models import ParsedInstance from onadata.libs.utils.common_tags import MONGO_STRFTIME, SUBMISSION_TIME,\ @@ -21,6 +24,16 @@ class TestInstance(TestBase): def setUp(self): super().setUp() + def create_transportation_fixture_xml_path(self, index = 0): + return os.path.join( + self.this_directory, + "fixtures", + "transportation", + "instances", + self.surveys[index], + self.surveys[index] + ".xml", + ) + def test_stores_json(self): self._publish_transportation_form_and_submit_instance() instances = Instance.objects.all() @@ -30,7 +43,7 @@ def test_stores_json(self): @patch('django.utils.timezone.now') def test_json_assigns_attributes(self, mock_time): - mock_time.return_value = datetime.utcnow().replace(tzinfo=utc) + mock_time.return_value = timezone.datetime.now(timezone.utc) self._publish_transportation_form_and_submit_instance() xform_id_string = XForm.objects.all()[0].id_string @@ -44,13 +57,11 @@ def test_json_assigns_attributes(self, mock_time): @patch('django.utils.timezone.now') def test_json_stores_user_attribute(self, mock_time): - mock_time.return_value = datetime.utcnow().replace(tzinfo=utc) + mock_time.return_value = timezone.datetime.now(timezone.utc) self._publish_transportation_form() # submit instance with a request user - path = os.path.join( - self.this_directory, 'fixtures', 'transportation', 'instances', - self.surveys[0], self.surveys[0] + '.xml') + path = self.create_transportation_fixture_xml_path() auth = DigestAuth(self.login_username, self.login_password) self._make_submission(path, auth=auth) @@ -115,4 +126,29 @@ def test_get_id_string_from_xml_str(self): self.assertEqual(id_string, 'id_string') def test_reversion(self): - self.assertTrue(reversion.is_registered(Instance)) + self.assertTrue(is_registered(Instance)) + + @override_settings(KOBOCAT_REVERSION_RETENTION_DAYS=2) + def test_reversion_cleanup(self): + days_ago_3 = timezone.now() - timedelta(days=3) + self._publish_transportation_form() + + path = self.create_transportation_fixture_xml_path() + + with create_revision(): + self._make_submission(path, forced_submission_time=days_ago_3) + set_date_created(days_ago_3) + old_revision = Revision.objects.first() + + path = self.create_transportation_fixture_xml_path(1) + + with create_revision(): + self._make_submission(path) + new_revision = Revision.objects.first() + + assert Revision.objects.count() == 2 + + remove_old_revisions() + + assert not Revision.objects.filter(id=old_revision.id).exists() + assert Revision.objects.filter(id=new_revision.id).exists() diff --git a/onadata/settings/base.py b/onadata/settings/base.py index 1f779e620..30d914293 100644 --- a/onadata/settings/base.py +++ b/onadata/settings/base.py @@ -665,6 +665,7 @@ def skip_suspicious_operations(record): REVERSION_MIDDLEWARE_SKIPPED_URL_PATTERNS = { r'/api/v1/users/(.*)': ['DELETE'] } +KOBOCAT_REVERSION_RETENTION_DAYS = env.int("KOBOCAT_REVERSION_RETENTION_DAYS", 90) # run heavy migration scripts by default # NOTE: this should be set to False for major deployments. This can take a long time From 7176a724d1a152ae1b0e0096ff8f9d78c233f998 Mon Sep 17 00:00:00 2001 From: James Kiger Date: Wed, 1 May 2024 08:16:53 -0400 Subject: [PATCH 2/3] Code cleanup --- onadata/apps/logger/maintenance_tasks.py | 1 - onadata/apps/logger/tests/models/test_instance.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/onadata/apps/logger/maintenance_tasks.py b/onadata/apps/logger/maintenance_tasks.py index bce591352..0a5ef8f6d 100644 --- a/onadata/apps/logger/maintenance_tasks.py +++ b/onadata/apps/logger/maintenance_tasks.py @@ -1,6 +1,5 @@ from datetime import timedelta -# from django.db.models import QuerySet from django.conf import settings from django.utils import timezone from reversion.models import Revision diff --git a/onadata/apps/logger/tests/models/test_instance.py b/onadata/apps/logger/tests/models/test_instance.py index 6d90802ad..e3fae8948 100644 --- a/onadata/apps/logger/tests/models/test_instance.py +++ b/onadata/apps/logger/tests/models/test_instance.py @@ -129,7 +129,7 @@ def test_reversion(self): self.assertTrue(is_registered(Instance)) @override_settings(KOBOCAT_REVERSION_RETENTION_DAYS=2) - def test_reversion_cleanup(self): + def test_revision_cleanup(self): days_ago_3 = timezone.now() - timedelta(days=3) self._publish_transportation_form() From 922d477d11b5bde8bdfc5e7bd4b4117736500ed9 Mon Sep 17 00:00:00 2001 From: James Kiger Date: Wed, 1 May 2024 08:30:30 -0400 Subject: [PATCH 3/3] Register maintenance task with Celery --- onadata/settings/base.py | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/onadata/settings/base.py b/onadata/settings/base.py index 30d914293..f69d546ae 100644 --- a/onadata/settings/base.py +++ b/onadata/settings/base.py @@ -709,16 +709,22 @@ def skip_suspicious_operations(record): CELERY_BEAT_SCHEDULE = { # Periodically mark exports stuck in the "pending" state as "failed" # See https://github.com/kobotoolbox/kobocat/issues/315 - 'log-stuck-exports-and-mark-failed': { - 'task': 'onadata.apps.viewer.tasks.log_stuck_exports_and_mark_failed', - 'schedule': timedelta(hours=6), - 'options': {'queue': 'kobocat_queue'} + "log-stuck-exports-and-mark-failed": { + "task": "onadata.apps.viewer.tasks.log_stuck_exports_and_mark_failed", + "schedule": timedelta(hours=6), + "options": {"queue": "kobocat_queue"}, + }, + "delete-daily-xform-submissions-counter": { + "task": "onadata.apps.logger.tasks.delete_daily_counters", + "schedule": crontab(hour=0, minute=0), + "options": {"queue": "kobocat_queue"}, + }, + # Run maintenance every day at 20:00 UTC + "perform-maintenance": { + "task": "onadata.apps.logger.tasks.perform_maintenance", + "schedule": crontab(hour=20, minute=0), + "options": {"queue": "kobocat_queue"}, }, - 'delete-daily-xform-submissions-counter': { - 'task': 'onadata.apps.logger.tasks.delete_daily_counters', - 'schedule': crontab(hour=0, minute=0), - 'options': {'queue': 'kobocat_queue'} - } } CELERY_TASK_DEFAULT_QUEUE = "kobocat_queue"