From 130d81fc7ef175c4dfb35071d8ed27b3b52c7ca0 Mon Sep 17 00:00:00 2001 From: James Kiger Date: Wed, 10 Apr 2024 10:27:42 -0400 Subject: [PATCH 01/13] Expand asset snapshot cleanup test --- kpi/models/asset.py | 2 -- kpi/tasks.py | 3 +-- kpi/tests/test_asset_snapshots.py | 43 +++++++++++++++++++++---------- 3 files changed, 31 insertions(+), 17 deletions(-) diff --git a/kpi/models/asset.py b/kpi/models/asset.py index 1e5e83af12..7e57ee797a 100644 --- a/kpi/models/asset.py +++ b/kpi/models/asset.py @@ -81,7 +81,6 @@ from kpi.utils.asset_content_analyzer import AssetContentAnalyzer from kpi.utils.object_permission import get_cached_code_names from kpi.utils.sluggify import sluggify_label -from kpi.tasks import remove_asset_snapshots class AssetDeploymentStatus(models.TextChoices): @@ -1236,7 +1235,6 @@ def snapshot( if regenerate: snapshot = False # Let's do some housekeeping - remove_asset_snapshots.delay(self.id) else: snapshot = AssetSnapshot.objects.filter(**snap_params).order_by( '-date_created' diff --git a/kpi/tasks.py b/kpi/tasks.py index d42b2387b9..9fec682a51 100644 --- a/kpi/tasks.py +++ b/kpi/tasks.py @@ -97,7 +97,7 @@ def enketo_flush_cached_preview(server_url, form_id): @celery_app.task -def remove_asset_snapshots(asset_id: int): +def remove_asset_snapshots(): """ Temporary task to delete old snapshots. TODO remove when kpi#2434 is merged @@ -105,5 +105,4 @@ def remove_asset_snapshots(asset_id: int): call_command( 'delete_assets_snapshots', days=constance.config.ASSET_SNAPSHOT_DAYS_RETENTION, - asset_id=asset_id, ) diff --git a/kpi/tests/test_asset_snapshots.py b/kpi/tests/test_asset_snapshots.py index d6acede340..b170d8b435 100644 --- a/kpi/tests/test_asset_snapshots.py +++ b/kpi/tests/test_asset_snapshots.py @@ -7,6 +7,7 @@ from django.test import TestCase from django.utils.timezone import now +from kpi.tasks import remove_asset_snapshots from kpi.tests.api.v2 import test_api_asset_snapshots from ..models import Asset from ..models import AssetSnapshot @@ -93,21 +94,37 @@ def test_delete_old_asset_snapshots_on_regenerate(self): old_snapshot = AssetSnapshot.objects.create(asset=self.asset) old_snapshot.date_created = yesterday old_snapshot.save(update_fields=['date_created']) - # versioned snapshots are always regenerated - versioned_snapshot = self.asset.snapshot( + + self.asset.deploy(backend='mock', active=True) + versioned_snapshot_1 = self.asset.snapshot( regenerate=True, version_uid=self.asset.latest_deployed_version_uid ) - snapshot_uids = list(AssetSnapshot.objects.filter( - asset=self.asset - ).values_list('uid', flat=True)) - expected_snapshot_uids = [ - versioned_snapshot.uid, - old_snapshot.uid, - self.asset_snapshot.uid, - ] + versioned_snapshot_1.date_created = two_days_before + versioned_snapshot_1.save(update_fields=['date_created']) + + # Redeploy asset to add version + self.asset.content['survey'].append( + {'type': 'note', 'label': 'Read me', 'name': 'n'} + ) + self.asset.save() + self.asset.deploy(backend='mock', active=True) + versioned_snapshot_2 = self.asset.snapshot( + regenerate=True, version_uid=self.asset.latest_deployed_version_uid + ) + # Set date to beyond retention limit to ensure latest version snapshot is preserved + versioned_snapshot_2.date_created = two_days_before + versioned_snapshot_2.save(update_fields=['date_created']) + + with self.assertNumQueries(7): + remove_asset_snapshots.delay() + + # Old snapshot should still exist + assert AssetSnapshot.objects.filter(pk=old_snapshot.id).exists() # Older snapshot should be gone assert not AssetSnapshot.objects.filter(pk=older_snapshot.id).exists() - # Older snapshot should still exist - assert AssetSnapshot.objects.filter(pk=old_snapshot.id).exists() - assert sorted(expected_snapshot_uids) == sorted(snapshot_uids) + # Only latest versioned snapshot should still exist + assert AssetSnapshot.objects.filter(pk=versioned_snapshot_2.id).exists() + assert not AssetSnapshot.objects.filter( + pk=versioned_snapshot_1.id + ).exists() From 63773eeaccec5649dd760ee34810d72ce452f14e Mon Sep 17 00:00:00 2001 From: James Kiger Date: Thu, 11 Apr 2024 08:39:29 -0400 Subject: [PATCH 02/13] Add reworked assetsnapshot cleanup task --- kpi/maintenance_tasks.py | 39 +++++++++++++++++++++++++++++++ kpi/tasks.py | 12 ---------- kpi/tests/test_asset_snapshots.py | 6 ++--- 3 files changed, 42 insertions(+), 15 deletions(-) create mode 100644 kpi/maintenance_tasks.py diff --git a/kpi/maintenance_tasks.py b/kpi/maintenance_tasks.py new file mode 100644 index 0000000000..4577640ff6 --- /dev/null +++ b/kpi/maintenance_tasks.py @@ -0,0 +1,39 @@ +import constance +from datetime import timedelta + +from django.db.models import Max +from django.utils import timezone + +from kpi.models import AssetSnapshot + + +default_retention_period = 90 + +def remove_old_assetsnapshots(): + days = ( + constance.config.ASSET_SNAPSHOT_DAYS_RETENTION + or default_retention_period + ) + max_snapshots_qs = ( + AssetSnapshot.objects.exclude(asset_version=None) + .values('asset_id') + .annotate(max_snapshot_id=Max('pk')) + .values_list('max_snapshot_id', flat=True) + ) + + max_snapshot_ids = list(max_snapshots_qs) + + delete_queryset = AssetSnapshot.objects.filter( + date_created__lt=timezone.now() - timedelta(days=days), + ).exclude(pk__in=max_snapshot_ids).order_by('pk') + + while True: + try: + deletion_delimiter = delete_queryset.values_list('pk', flat=True)[ + 1000:1001 + ].get() + delete_queryset.filter(id__lte=deletion_delimiter).delete() + except AssetSnapshot.DoesNotExist: + break + + delete_queryset.delete() diff --git a/kpi/tasks.py b/kpi/tasks.py index 9fec682a51..40b057d50e 100644 --- a/kpi/tasks.py +++ b/kpi/tasks.py @@ -94,15 +94,3 @@ def enketo_flush_cached_preview(server_url, form_id): data=dict(server_url=server_url, form_id=form_id), ) response.raise_for_status() - - -@celery_app.task -def remove_asset_snapshots(): - """ - Temporary task to delete old snapshots. - TODO remove when kpi#2434 is merged - """ - call_command( - 'delete_assets_snapshots', - days=constance.config.ASSET_SNAPSHOT_DAYS_RETENTION, - ) diff --git a/kpi/tests/test_asset_snapshots.py b/kpi/tests/test_asset_snapshots.py index b170d8b435..b2495aed4a 100644 --- a/kpi/tests/test_asset_snapshots.py +++ b/kpi/tests/test_asset_snapshots.py @@ -7,7 +7,7 @@ from django.test import TestCase from django.utils.timezone import now -from kpi.tasks import remove_asset_snapshots +from kpi.maintenance_tasks import remove_old_assetsnapshots from kpi.tests.api.v2 import test_api_asset_snapshots from ..models import Asset from ..models import AssetSnapshot @@ -116,8 +116,8 @@ def test_delete_old_asset_snapshots_on_regenerate(self): versioned_snapshot_2.date_created = two_days_before versioned_snapshot_2.save(update_fields=['date_created']) - with self.assertNumQueries(7): - remove_asset_snapshots.delay() + with self.assertNumQueries(4): + remove_old_assetsnapshots() # Old snapshot should still exist assert AssetSnapshot.objects.filter(pk=old_snapshot.id).exists() From d20ffde536ac71e1a0f06b6d055a4709eb3cd545 Mon Sep 17 00:00:00 2001 From: James Kiger Date: Thu, 11 Apr 2024 08:45:11 -0400 Subject: [PATCH 03/13] Add to perform_maintenance --- kobo/tasks.py | 2 ++ kpi/models/asset.py | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/kobo/tasks.py b/kobo/tasks.py index a7348d81f0..00e41d016c 100644 --- a/kobo/tasks.py +++ b/kobo/tasks.py @@ -1,6 +1,7 @@ from celery import shared_task from kobo.apps.markdownx_uploader.tasks import remove_unused_markdown_files +from kpi.maintenance_tasks import remove_old_assetsnapshots @shared_task @@ -9,3 +10,4 @@ def perform_maintenance(): Run daily maintenance tasks """ remove_unused_markdown_files() + remove_old_assetsnapshots() diff --git a/kpi/models/asset.py b/kpi/models/asset.py index 7e57ee797a..cf6460777c 100644 --- a/kpi/models/asset.py +++ b/kpi/models/asset.py @@ -1234,7 +1234,6 @@ def snapshot( if regenerate: snapshot = False - # Let's do some housekeeping else: snapshot = AssetSnapshot.objects.filter(**snap_params).order_by( '-date_created' From f554b9f8b1e2c1e744dd9deecf381bba98da86db Mon Sep 17 00:00:00 2001 From: James Kiger Date: Thu, 11 Apr 2024 09:44:38 -0400 Subject: [PATCH 04/13] Rename test --- kpi/tests/test_asset_snapshots.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kpi/tests/test_asset_snapshots.py b/kpi/tests/test_asset_snapshots.py index b2495aed4a..313bfc4439 100644 --- a/kpi/tests/test_asset_snapshots.py +++ b/kpi/tests/test_asset_snapshots.py @@ -84,7 +84,7 @@ def test_snapshots_allow_choice_duplicates(self): class AssetSnapshotHousekeeping(AssetSnapshotsTestCase): @override_config(ASSET_SNAPSHOT_DAYS_RETENTION=2) - def test_delete_old_asset_snapshots_on_regenerate(self): + def test_delete_old_asset_snapshots_task(self): two_days_before = now() - timedelta(days=3) # One more day than Constance setting yesterday = now() - timedelta(days=1) # Because of `auto_date_now` , we cannot specify the date with `create()` From 122cc90a022aba813863e573fcc8e8d0385000d4 Mon Sep 17 00:00:00 2001 From: James Kiger Date: Fri, 12 Apr 2024 07:25:17 -0400 Subject: [PATCH 05/13] Rewrite query --- kpi/maintenance_tasks.py | 34 ++++++++++++++----------------- kpi/tests/test_asset_snapshots.py | 2 +- 2 files changed, 16 insertions(+), 20 deletions(-) diff --git a/kpi/maintenance_tasks.py b/kpi/maintenance_tasks.py index 4577640ff6..a8761bc9e5 100644 --- a/kpi/maintenance_tasks.py +++ b/kpi/maintenance_tasks.py @@ -1,32 +1,29 @@ import constance from datetime import timedelta -from django.db.models import Max +from django.db.models import Exists, OuterRef from django.utils import timezone from kpi.models import AssetSnapshot -default_retention_period = 90 - def remove_old_assetsnapshots(): - days = ( - constance.config.ASSET_SNAPSHOT_DAYS_RETENTION - or default_retention_period - ) - max_snapshots_qs = ( - AssetSnapshot.objects.exclude(asset_version=None) - .values('asset_id') - .annotate(max_snapshot_id=Max('pk')) - .values_list('max_snapshot_id', flat=True) + days = constance.config.ASSET_SNAPSHOT_DAYS_RETENTION + + # We don't want to delete an asset's latest versioned snapshot, + # even if it is older than the retention period + newer_snapshot_for_asset = AssetSnapshot.objects.exclude( + asset_version=None + ).filter(asset_id=OuterRef('asset_id'), pk__gt=OuterRef('pk')) + + delete_queryset = ( + AssetSnapshot.objects.filter( + date_created__lt=timezone.now() - timedelta(days=days), + ) + .filter(Exists(newer_snapshot_for_asset)) + .order_by('pk') ) - max_snapshot_ids = list(max_snapshots_qs) - - delete_queryset = AssetSnapshot.objects.filter( - date_created__lt=timezone.now() - timedelta(days=days), - ).exclude(pk__in=max_snapshot_ids).order_by('pk') - while True: try: deletion_delimiter = delete_queryset.values_list('pk', flat=True)[ @@ -35,5 +32,4 @@ def remove_old_assetsnapshots(): delete_queryset.filter(id__lte=deletion_delimiter).delete() except AssetSnapshot.DoesNotExist: break - delete_queryset.delete() diff --git a/kpi/tests/test_asset_snapshots.py b/kpi/tests/test_asset_snapshots.py index 313bfc4439..317f7621a6 100644 --- a/kpi/tests/test_asset_snapshots.py +++ b/kpi/tests/test_asset_snapshots.py @@ -116,7 +116,7 @@ def test_delete_old_asset_snapshots_task(self): versioned_snapshot_2.date_created = two_days_before versioned_snapshot_2.save(update_fields=['date_created']) - with self.assertNumQueries(4): + with self.assertNumQueries(3): remove_old_assetsnapshots() # Old snapshot should still exist From a5fe7bd15082db5d2b667571d98fbfe18b124938 Mon Sep 17 00:00:00 2001 From: James Kiger Date: Fri, 19 Apr 2024 11:21:10 -0400 Subject: [PATCH 06/13] Move perform_maintenance to KPI task file, add timeout --- kobo/tasks.py | 13 ------------- kpi/tasks.py | 12 ++++++++++++ 2 files changed, 12 insertions(+), 13 deletions(-) delete mode 100644 kobo/tasks.py diff --git a/kobo/tasks.py b/kobo/tasks.py deleted file mode 100644 index 00e41d016c..0000000000 --- a/kobo/tasks.py +++ /dev/null @@ -1,13 +0,0 @@ -from celery import shared_task - -from kobo.apps.markdownx_uploader.tasks import remove_unused_markdown_files -from kpi.maintenance_tasks import remove_old_assetsnapshots - - -@shared_task -def perform_maintenance(): - """ - Run daily maintenance tasks - """ - remove_unused_markdown_files() - remove_old_assetsnapshots() diff --git a/kpi/tasks.py b/kpi/tasks.py index 40b057d50e..58d6e3b1ae 100644 --- a/kpi/tasks.py +++ b/kpi/tasks.py @@ -94,3 +94,15 @@ def enketo_flush_cached_preview(server_url, form_id): data=dict(server_url=server_url, form_id=form_id), ) response.raise_for_status() + +@celery_app.task(time_limit=82800, soft_time_limit=82800) +def perform_maintenance(): + """ + Run daily maintenance tasks + """ + from kobo.apps.markdownx_uploader.tasks import remove_unused_markdown_files + from kpi.maintenance_tasks import remove_old_assetsnapshots + + remove_unused_markdown_files() + remove_old_assetsnapshots() + \ No newline at end of file From 40774f88e197676a6026994dd1d3487a0e0196fb Mon Sep 17 00:00:00 2001 From: James Kiger Date: Fri, 19 Apr 2024 14:37:03 -0400 Subject: [PATCH 07/13] Adjust delete query and clarify tests --- kpi/maintenance_tasks.py | 4 +- kpi/tests/test_asset_snapshots.py | 75 ++++++++++++++++--------------- 2 files changed, 41 insertions(+), 38 deletions(-) diff --git a/kpi/maintenance_tasks.py b/kpi/maintenance_tasks.py index a8761bc9e5..5441683b80 100644 --- a/kpi/maintenance_tasks.py +++ b/kpi/maintenance_tasks.py @@ -1,7 +1,7 @@ import constance from datetime import timedelta -from django.db.models import Exists, OuterRef +from django.db.models import Exists, OuterRef, Q from django.utils import timezone from kpi.models import AssetSnapshot @@ -20,7 +20,7 @@ def remove_old_assetsnapshots(): AssetSnapshot.objects.filter( date_created__lt=timezone.now() - timedelta(days=days), ) - .filter(Exists(newer_snapshot_for_asset)) + .filter(Exists(newer_snapshot_for_asset) | Q(asset_version=None)) .order_by('pk') ) diff --git a/kpi/tests/test_asset_snapshots.py b/kpi/tests/test_asset_snapshots.py index 317f7621a6..6b8ff37109 100644 --- a/kpi/tests/test_asset_snapshots.py +++ b/kpi/tests/test_asset_snapshots.py @@ -9,6 +9,7 @@ from kpi.maintenance_tasks import remove_old_assetsnapshots from kpi.tests.api.v2 import test_api_asset_snapshots + from ..models import Asset from ..models import AssetSnapshot @@ -30,7 +31,6 @@ def setUp(self): class CreateAssetSnapshots(AssetSnapshotsTestCase): - def test_init_asset_snapshot(self): ae = AssetSnapshot(asset=self.asset) self.assertEqual(ae.asset.id, self.asset.id) @@ -82,49 +82,52 @@ def test_snapshots_allow_choice_duplicates(self): class AssetSnapshotHousekeeping(AssetSnapshotsTestCase): - @override_config(ASSET_SNAPSHOT_DAYS_RETENTION=2) def test_delete_old_asset_snapshots_task(self): - two_days_before = now() - timedelta(days=3) # One more day than Constance setting - yesterday = now() - timedelta(days=1) # Because of `auto_date_now` , we cannot specify the date with `create()` older_snapshot = AssetSnapshot.objects.create(asset=self.asset) - older_snapshot.date_created = two_days_before + older_snapshot.date_created = now() - timedelta(days=5) older_snapshot.save(update_fields=['date_created']) - old_snapshot = AssetSnapshot.objects.create(asset=self.asset) - old_snapshot.date_created = yesterday - old_snapshot.save(update_fields=['date_created']) - - self.asset.deploy(backend='mock', active=True) - versioned_snapshot_1 = self.asset.snapshot( - regenerate=True, - version_uid=self.asset.latest_deployed_version_uid - ) - versioned_snapshot_1.date_created = two_days_before - versioned_snapshot_1.save(update_fields=['date_created']) + newer_snapshot = AssetSnapshot.objects.create(asset=self.asset) + newer_snapshot.date_created = now() - timedelta(days=4) + newer_snapshot.save(update_fields=['date_created']) - # Redeploy asset to add version - self.asset.content['survey'].append( - {'type': 'note', 'label': 'Read me', 'name': 'n'} - ) - self.asset.save() - self.asset.deploy(backend='mock', active=True) - versioned_snapshot_2 = self.asset.snapshot( - regenerate=True, version_uid=self.asset.latest_deployed_version_uid + remove_old_assetsnapshots() + + # Both are outside retention date, oldest gets removed + assert AssetSnapshot.objects.filter(pk=newer_snapshot.id).exists() + assert not AssetSnapshot.objects.filter(pk=older_snapshot.id).exists() + + newest_unversioned_snapshot = AssetSnapshot.objects.create( + asset=self.asset, source=self.asset.content ) - # Set date to beyond retention limit to ensure latest version snapshot is preserved - versioned_snapshot_2.date_created = two_days_before - versioned_snapshot_2.save(update_fields=['date_created']) + newest_unversioned_snapshot.date_created = now() - timedelta(days=3) + newest_unversioned_snapshot.save(update_fields=['date_created']) - with self.assertNumQueries(3): - remove_old_assetsnapshots() + remove_old_assetsnapshots() - # Old snapshot should still exist - assert AssetSnapshot.objects.filter(pk=old_snapshot.id).exists() - # Older snapshot should be gone - assert not AssetSnapshot.objects.filter(pk=older_snapshot.id).exists() - # Only latest versioned snapshot should still exist - assert AssetSnapshot.objects.filter(pk=versioned_snapshot_2.id).exists() + # Newest still gets deleted because it doesn't have version + assert AssetSnapshot.objects.filter(pk=newer_snapshot.id).exists() assert not AssetSnapshot.objects.filter( - pk=versioned_snapshot_1.id + pk=newest_unversioned_snapshot.id + ).exists() + + asset_2 = Asset.objects.create( + content=self.asset.content, + owner=self.user, + asset_type='survey', + ) + asset_2_older_snapshot = AssetSnapshot.objects.create(asset=asset_2) + asset_2_older_snapshot.date_created = now() - timedelta(days=1) + asset_2_older_snapshot.save(update_fields=['date_created']) + asset_2_newer_snapshot = AssetSnapshot.objects.create(asset=asset_2) + + remove_old_assetsnapshots() + + # Both remain because they are within retention date + assert AssetSnapshot.objects.filter( + pk=asset_2_older_snapshot.id + ).exists() + assert AssetSnapshot.objects.filter( + pk=asset_2_newer_snapshot.id ).exists() From 6e0d9b4169d4cd0d09d534c7d06224e354e65d5d Mon Sep 17 00:00:00 2001 From: James Kiger Date: Fri, 19 Apr 2024 14:57:03 -0400 Subject: [PATCH 08/13] Remove TODO comment --- kpi/models/asset_snapshot.py | 6 ------ kpi/tasks.py | 1 - 2 files changed, 7 deletions(-) diff --git a/kpi/models/asset_snapshot.py b/kpi/models/asset_snapshot.py index 39b93cd569..f362db06e0 100644 --- a/kpi/models/asset_snapshot.py +++ b/kpi/models/asset_snapshot.py @@ -54,12 +54,6 @@ class AssetSnapshot( """ This model serves as a cache of the XML that was exported by the installed version of pyxform. - - TODO: come up with a policy to clear this cache out. - DO NOT: depend on these snapshots existing for more than a day - until a policy is set. - Done with https://github.com/kobotoolbox/kpi/pull/2434. - Remove above lines when PR is merged """ xml = models.TextField() source = models.JSONField(default=dict) diff --git a/kpi/tasks.py b/kpi/tasks.py index 58d6e3b1ae..17b56c58c4 100644 --- a/kpi/tasks.py +++ b/kpi/tasks.py @@ -105,4 +105,3 @@ def perform_maintenance(): remove_unused_markdown_files() remove_old_assetsnapshots() - \ No newline at end of file From 9ee33ab756dd8e28358cdc82785cc97e6a9948c3 Mon Sep 17 00:00:00 2001 From: James Kiger Date: Mon, 22 Apr 2024 09:32:23 -0400 Subject: [PATCH 09/13] Fix circular imports, add time limit var --- kpi/deployment_backends/mixin.py | 5 +++-- kpi/tasks.py | 28 +++++++++++++--------------- 2 files changed, 16 insertions(+), 17 deletions(-) diff --git a/kpi/deployment_backends/mixin.py b/kpi/deployment_backends/mixin.py index c892cef236..7c6a519afd 100644 --- a/kpi/deployment_backends/mixin.py +++ b/kpi/deployment_backends/mixin.py @@ -1,10 +1,11 @@ # coding: utf-8 +import celery from django.utils import timezone from kpi.constants import ASSET_TYPE_SURVEY from kpi.exceptions import BadAssetTypeException, DeploymentNotFound from kpi.models.asset_file import AssetFile -from kpi.tasks import sync_media_files + from .backends import DEPLOYMENT_BACKENDS from .base_backend import BaseDeploymentBackend @@ -19,7 +20,7 @@ def sync_media_files_async(self, always=True): file_type=AssetFile.FORM_MEDIA, synced_with_backend=False ).exists(): self.save(create_version=False, adjust_content=False) - sync_media_files.delay(self.uid) + celery.current_app.send_task('kpi.tasks.sync_media_files', (self.uid,)) @property def can_be_deployed(self): diff --git a/kpi/tasks.py b/kpi/tasks.py index 17b56c58c4..ae62eeff21 100644 --- a/kpi/tasks.py +++ b/kpi/tasks.py @@ -3,24 +3,28 @@ import requests from django.conf import settings from django.contrib.auth.models import User -from django.core.management import call_command from django.core.mail import send_mail +from django.core.management import call_command +from kobo.apps.markdownx_uploader.tasks import remove_unused_markdown_files from kobo.celery import celery_app +from kpi.maintenance_tasks import remove_old_assetsnapshots +from kpi.models.asset import Asset +from kpi.models.import_export_task import ( + ExportTask, + ImportTask, + ProjectViewExportTask, +) @celery_app.task def import_in_background(import_task_uid): - from kpi.models.import_export_task import ImportTask # avoid circular imports - import_task = ImportTask.objects.get(uid=import_task_uid) import_task.run() @celery_app.task def export_in_background(export_task_uid): - from kpi.models.import_export_task import ExportTask # avoid circular imports - export_task = ExportTask.objects.get(uid=export_task_uid) export_task.run() @@ -29,10 +33,6 @@ def export_in_background(export_task_uid): def project_view_export_in_background( export_task_uid: str, username: str ) -> None: - from kpi.models.import_export_task import ( - ProjectViewExportTask, - ) # avoid circular imports - user = User.objects.get(username=username) export_task = ProjectViewExportTask.objects.get(uid=export_task_uid) @@ -72,8 +72,6 @@ def sync_kobocat_xforms( @celery_app.task def sync_media_files(asset_uid): - from kpi.models.asset import Asset # avoid circular imports - asset = Asset.objects.get(uid=asset_uid) asset.deployment.sync_media_files() @@ -95,13 +93,13 @@ def enketo_flush_cached_preview(server_url, form_id): ) response.raise_for_status() -@celery_app.task(time_limit=82800, soft_time_limit=82800) +limit_hours_23 = 82800 + + +@celery_app.task(time_limit=limit_hours_23, soft_time_limit=limit_hours_23) def perform_maintenance(): """ Run daily maintenance tasks """ - from kobo.apps.markdownx_uploader.tasks import remove_unused_markdown_files - from kpi.maintenance_tasks import remove_old_assetsnapshots - remove_unused_markdown_files() remove_old_assetsnapshots() From 60b03841b5e150a3d0da3df3860dfc2354ba65a7 Mon Sep 17 00:00:00 2001 From: James Kiger Date: Mon, 22 Apr 2024 10:10:19 -0400 Subject: [PATCH 10/13] Add note about circular import --- kpi/deployment_backends/mixin.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kpi/deployment_backends/mixin.py b/kpi/deployment_backends/mixin.py index 7c6a519afd..ae1690bf14 100644 --- a/kpi/deployment_backends/mixin.py +++ b/kpi/deployment_backends/mixin.py @@ -20,6 +20,8 @@ def sync_media_files_async(self, always=True): file_type=AssetFile.FORM_MEDIA, synced_with_backend=False ).exists(): self.save(create_version=False, adjust_content=False) + + # Not using .delay() due to circular import in tasks.py celery.current_app.send_task('kpi.tasks.sync_media_files', (self.uid,)) @property From 1214115488ce66dca9830f064d0bee01e0d02830 Mon Sep 17 00:00:00 2001 From: James Kiger Date: Mon, 22 Apr 2024 10:25:31 -0400 Subject: [PATCH 11/13] Change constant case --- kpi/tasks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kpi/tasks.py b/kpi/tasks.py index ae62eeff21..1d4afb40c1 100644 --- a/kpi/tasks.py +++ b/kpi/tasks.py @@ -93,10 +93,10 @@ def enketo_flush_cached_preview(server_url, form_id): ) response.raise_for_status() -limit_hours_23 = 82800 +LIMIT_HOURS_23 = 82800 -@celery_app.task(time_limit=limit_hours_23, soft_time_limit=limit_hours_23) +@celery_app.task(time_limit=LIMIT_HOURS_23, soft_time_limit=LIMIT_HOURS_23) def perform_maintenance(): """ Run daily maintenance tasks From 3f630d584ffe824f7fb53e813af7e1092c21b4c8 Mon Sep 17 00:00:00 2001 From: James Kiger Date: Tue, 23 Apr 2024 13:41:05 -0400 Subject: [PATCH 12/13] Code review adjustments --- kpi/constants.py | 2 ++ kpi/maintenance_tasks.py | 2 +- kpi/tasks.py | 6 +++--- kpi/tests/test_asset_snapshots.py | 23 +++++++++++++---------- 4 files changed, 19 insertions(+), 14 deletions(-) diff --git a/kpi/constants.py b/kpi/constants.py index c7988619e2..7498196bfd 100644 --- a/kpi/constants.py +++ b/kpi/constants.py @@ -126,3 +126,5 @@ manually from the django shell. """ ) + +LIMIT_HOURS_23 = 82800 diff --git a/kpi/maintenance_tasks.py b/kpi/maintenance_tasks.py index 5441683b80..75680ffbd6 100644 --- a/kpi/maintenance_tasks.py +++ b/kpi/maintenance_tasks.py @@ -7,7 +7,7 @@ from kpi.models import AssetSnapshot -def remove_old_assetsnapshots(): +def remove_old_asset_snapshots(): days = constance.config.ASSET_SNAPSHOT_DAYS_RETENTION # We don't want to delete an asset's latest versioned snapshot, diff --git a/kpi/tasks.py b/kpi/tasks.py index 1d4afb40c1..553b71d3d6 100644 --- a/kpi/tasks.py +++ b/kpi/tasks.py @@ -8,7 +8,8 @@ from kobo.apps.markdownx_uploader.tasks import remove_unused_markdown_files from kobo.celery import celery_app -from kpi.maintenance_tasks import remove_old_assetsnapshots +from kpi.constants import LIMIT_HOURS_23 +from kpi.maintenance_tasks import remove_old_asset_snapshots from kpi.models.asset import Asset from kpi.models.import_export_task import ( ExportTask, @@ -93,7 +94,6 @@ def enketo_flush_cached_preview(server_url, form_id): ) response.raise_for_status() -LIMIT_HOURS_23 = 82800 @celery_app.task(time_limit=LIMIT_HOURS_23, soft_time_limit=LIMIT_HOURS_23) @@ -102,4 +102,4 @@ def perform_maintenance(): Run daily maintenance tasks """ remove_unused_markdown_files() - remove_old_assetsnapshots() + remove_old_asset_snapshots() diff --git a/kpi/tests/test_asset_snapshots.py b/kpi/tests/test_asset_snapshots.py index 6b8ff37109..ce0768eb6e 100644 --- a/kpi/tests/test_asset_snapshots.py +++ b/kpi/tests/test_asset_snapshots.py @@ -5,11 +5,10 @@ from constance.test import override_config from django.contrib.auth.models import User from django.test import TestCase -from django.utils.timezone import now +from django.utils import timezone -from kpi.maintenance_tasks import remove_old_assetsnapshots +from kpi.maintenance_tasks import remove_old_asset_snapshots from kpi.tests.api.v2 import test_api_asset_snapshots - from ..models import Asset from ..models import AssetSnapshot @@ -31,6 +30,7 @@ def setUp(self): class CreateAssetSnapshots(AssetSnapshotsTestCase): + def test_init_asset_snapshot(self): ae = AssetSnapshot(asset=self.asset) self.assertEqual(ae.asset.id, self.asset.id) @@ -82,17 +82,18 @@ def test_snapshots_allow_choice_duplicates(self): class AssetSnapshotHousekeeping(AssetSnapshotsTestCase): + @override_config(ASSET_SNAPSHOT_DAYS_RETENTION=2) def test_delete_old_asset_snapshots_task(self): # Because of `auto_date_now` , we cannot specify the date with `create()` older_snapshot = AssetSnapshot.objects.create(asset=self.asset) - older_snapshot.date_created = now() - timedelta(days=5) + older_snapshot.date_created = timezone.now() - timedelta(days=5) older_snapshot.save(update_fields=['date_created']) newer_snapshot = AssetSnapshot.objects.create(asset=self.asset) - newer_snapshot.date_created = now() - timedelta(days=4) + newer_snapshot.date_created = timezone.now() - timedelta(days=4) newer_snapshot.save(update_fields=['date_created']) - remove_old_assetsnapshots() + remove_old_asset_snapshots() # Both are outside retention date, oldest gets removed assert AssetSnapshot.objects.filter(pk=newer_snapshot.id).exists() @@ -101,10 +102,12 @@ def test_delete_old_asset_snapshots_task(self): newest_unversioned_snapshot = AssetSnapshot.objects.create( asset=self.asset, source=self.asset.content ) - newest_unversioned_snapshot.date_created = now() - timedelta(days=3) + newest_unversioned_snapshot.date_created = timezone.now() - timedelta( + days=3 + ) newest_unversioned_snapshot.save(update_fields=['date_created']) - remove_old_assetsnapshots() + remove_old_asset_snapshots() # Newest still gets deleted because it doesn't have version assert AssetSnapshot.objects.filter(pk=newer_snapshot.id).exists() @@ -118,11 +121,11 @@ def test_delete_old_asset_snapshots_task(self): asset_type='survey', ) asset_2_older_snapshot = AssetSnapshot.objects.create(asset=asset_2) - asset_2_older_snapshot.date_created = now() - timedelta(days=1) + asset_2_older_snapshot.date_created = timezone.now() - timedelta(days=1) asset_2_older_snapshot.save(update_fields=['date_created']) asset_2_newer_snapshot = AssetSnapshot.objects.create(asset=asset_2) - remove_old_assetsnapshots() + remove_old_asset_snapshots() # Both remain because they are within retention date assert AssetSnapshot.objects.filter( From 69ff6cce63cc19d03ed2ba0062139b1b89702e9f Mon Sep 17 00:00:00 2001 From: James Kiger Date: Tue, 23 Apr 2024 14:09:29 -0400 Subject: [PATCH 13/13] Update delete procedure --- kpi/maintenance_tasks.py | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/kpi/maintenance_tasks.py b/kpi/maintenance_tasks.py index 75680ffbd6..43d4bc8313 100644 --- a/kpi/maintenance_tasks.py +++ b/kpi/maintenance_tasks.py @@ -16,20 +16,13 @@ def remove_old_asset_snapshots(): asset_version=None ).filter(asset_id=OuterRef('asset_id'), pk__gt=OuterRef('pk')) - delete_queryset = ( - AssetSnapshot.objects.filter( - date_created__lt=timezone.now() - timedelta(days=days), - ) - .filter(Exists(newer_snapshot_for_asset) | Q(asset_version=None)) - .order_by('pk') - ) + delete_queryset = AssetSnapshot.objects.filter( + date_created__lt=timezone.now() - timedelta(days=days), + ).filter(Exists(newer_snapshot_for_asset) | Q(asset_version=None)) while True: - try: - deletion_delimiter = delete_queryset.values_list('pk', flat=True)[ - 1000:1001 - ].get() - delete_queryset.filter(id__lte=deletion_delimiter).delete() - except AssetSnapshot.DoesNotExist: + count, _ = delete_queryset.filter( + pk__in=delete_queryset[:1000] + ).delete() + if not count: break - delete_queryset.delete()