diff --git a/kobo/tasks.py b/kobo/tasks.py deleted file mode 100644 index a7348d81f0..0000000000 --- a/kobo/tasks.py +++ /dev/null @@ -1,11 +0,0 @@ -from celery import shared_task - -from kobo.apps.markdownx_uploader.tasks import remove_unused_markdown_files - - -@shared_task -def perform_maintenance(): - """ - Run daily maintenance tasks - """ - remove_unused_markdown_files() diff --git a/kpi/constants.py b/kpi/constants.py index c7988619e2..7498196bfd 100644 --- a/kpi/constants.py +++ b/kpi/constants.py @@ -126,3 +126,5 @@ manually from the django shell. """ ) + +LIMIT_HOURS_23 = 82800 diff --git a/kpi/deployment_backends/mixin.py b/kpi/deployment_backends/mixin.py index c892cef236..ae1690bf14 100644 --- a/kpi/deployment_backends/mixin.py +++ b/kpi/deployment_backends/mixin.py @@ -1,10 +1,11 @@ # coding: utf-8 +import celery from django.utils import timezone from kpi.constants import ASSET_TYPE_SURVEY from kpi.exceptions import BadAssetTypeException, DeploymentNotFound from kpi.models.asset_file import AssetFile -from kpi.tasks import sync_media_files + from .backends import DEPLOYMENT_BACKENDS from .base_backend import BaseDeploymentBackend @@ -19,7 +20,9 @@ def sync_media_files_async(self, always=True): file_type=AssetFile.FORM_MEDIA, synced_with_backend=False ).exists(): self.save(create_version=False, adjust_content=False) - sync_media_files.delay(self.uid) + + # Not using .delay() due to circular import in tasks.py + celery.current_app.send_task('kpi.tasks.sync_media_files', (self.uid,)) @property def can_be_deployed(self): diff --git a/kpi/maintenance_tasks.py b/kpi/maintenance_tasks.py new file mode 100644 index 0000000000..43d4bc8313 --- /dev/null +++ b/kpi/maintenance_tasks.py @@ -0,0 +1,28 @@ +import constance +from datetime import timedelta + +from django.db.models import Exists, OuterRef, Q +from django.utils import timezone + +from kpi.models import AssetSnapshot + + +def remove_old_asset_snapshots(): + days = constance.config.ASSET_SNAPSHOT_DAYS_RETENTION + + # We don't want to delete an asset's latest versioned snapshot, + # even if it is older than the retention period + newer_snapshot_for_asset = AssetSnapshot.objects.exclude( + asset_version=None + ).filter(asset_id=OuterRef('asset_id'), pk__gt=OuterRef('pk')) + + delete_queryset = AssetSnapshot.objects.filter( + date_created__lt=timezone.now() - timedelta(days=days), + ).filter(Exists(newer_snapshot_for_asset) | Q(asset_version=None)) + + while True: + count, _ = delete_queryset.filter( + pk__in=delete_queryset[:1000] + ).delete() + if not count: + break diff --git a/kpi/models/asset.py b/kpi/models/asset.py index 1e5e83af12..cf6460777c 100644 --- a/kpi/models/asset.py +++ b/kpi/models/asset.py @@ -81,7 +81,6 @@ from kpi.utils.asset_content_analyzer import AssetContentAnalyzer from kpi.utils.object_permission import get_cached_code_names from kpi.utils.sluggify import sluggify_label -from kpi.tasks import remove_asset_snapshots class AssetDeploymentStatus(models.TextChoices): @@ -1235,8 +1234,6 @@ def snapshot( if regenerate: snapshot = False - # Let's do some housekeeping - remove_asset_snapshots.delay(self.id) else: snapshot = AssetSnapshot.objects.filter(**snap_params).order_by( '-date_created' diff --git a/kpi/models/asset_snapshot.py b/kpi/models/asset_snapshot.py index 39b93cd569..f362db06e0 100644 --- a/kpi/models/asset_snapshot.py +++ b/kpi/models/asset_snapshot.py @@ -54,12 +54,6 @@ class AssetSnapshot( """ This model serves as a cache of the XML that was exported by the installed version of pyxform. - - TODO: come up with a policy to clear this cache out. - DO NOT: depend on these snapshots existing for more than a day - until a policy is set. - Done with https://github.com/kobotoolbox/kpi/pull/2434. - Remove above lines when PR is merged """ xml = models.TextField() source = models.JSONField(default=dict) diff --git a/kpi/tasks.py b/kpi/tasks.py index d42b2387b9..553b71d3d6 100644 --- a/kpi/tasks.py +++ b/kpi/tasks.py @@ -3,24 +3,29 @@ import requests from django.conf import settings from django.contrib.auth.models import User -from django.core.management import call_command from django.core.mail import send_mail +from django.core.management import call_command +from kobo.apps.markdownx_uploader.tasks import remove_unused_markdown_files from kobo.celery import celery_app +from kpi.constants import LIMIT_HOURS_23 +from kpi.maintenance_tasks import remove_old_asset_snapshots +from kpi.models.asset import Asset +from kpi.models.import_export_task import ( + ExportTask, + ImportTask, + ProjectViewExportTask, +) @celery_app.task def import_in_background(import_task_uid): - from kpi.models.import_export_task import ImportTask # avoid circular imports - import_task = ImportTask.objects.get(uid=import_task_uid) import_task.run() @celery_app.task def export_in_background(export_task_uid): - from kpi.models.import_export_task import ExportTask # avoid circular imports - export_task = ExportTask.objects.get(uid=export_task_uid) export_task.run() @@ -29,10 +34,6 @@ def export_in_background(export_task_uid): def project_view_export_in_background( export_task_uid: str, username: str ) -> None: - from kpi.models.import_export_task import ( - ProjectViewExportTask, - ) # avoid circular imports - user = User.objects.get(username=username) export_task = ProjectViewExportTask.objects.get(uid=export_task_uid) @@ -72,8 +73,6 @@ def sync_kobocat_xforms( @celery_app.task def sync_media_files(asset_uid): - from kpi.models.asset import Asset # avoid circular imports - asset = Asset.objects.get(uid=asset_uid) asset.deployment.sync_media_files() @@ -96,14 +95,11 @@ def enketo_flush_cached_preview(server_url, form_id): response.raise_for_status() -@celery_app.task -def remove_asset_snapshots(asset_id: int): + +@celery_app.task(time_limit=LIMIT_HOURS_23, soft_time_limit=LIMIT_HOURS_23) +def perform_maintenance(): """ - Temporary task to delete old snapshots. - TODO remove when kpi#2434 is merged + Run daily maintenance tasks """ - call_command( - 'delete_assets_snapshots', - days=constance.config.ASSET_SNAPSHOT_DAYS_RETENTION, - asset_id=asset_id, - ) + remove_unused_markdown_files() + remove_old_asset_snapshots() diff --git a/kpi/tests/test_asset_snapshots.py b/kpi/tests/test_asset_snapshots.py index d6acede340..ce0768eb6e 100644 --- a/kpi/tests/test_asset_snapshots.py +++ b/kpi/tests/test_asset_snapshots.py @@ -5,8 +5,9 @@ from constance.test import override_config from django.contrib.auth.models import User from django.test import TestCase -from django.utils.timezone import now +from django.utils import timezone +from kpi.maintenance_tasks import remove_old_asset_snapshots from kpi.tests.api.v2 import test_api_asset_snapshots from ..models import Asset from ..models import AssetSnapshot @@ -83,31 +84,53 @@ def test_snapshots_allow_choice_duplicates(self): class AssetSnapshotHousekeeping(AssetSnapshotsTestCase): @override_config(ASSET_SNAPSHOT_DAYS_RETENTION=2) - def test_delete_old_asset_snapshots_on_regenerate(self): - two_days_before = now() - timedelta(days=3) # One more day than Constance setting - yesterday = now() - timedelta(days=1) + def test_delete_old_asset_snapshots_task(self): # Because of `auto_date_now` , we cannot specify the date with `create()` older_snapshot = AssetSnapshot.objects.create(asset=self.asset) - older_snapshot.date_created = two_days_before + older_snapshot.date_created = timezone.now() - timedelta(days=5) older_snapshot.save(update_fields=['date_created']) - old_snapshot = AssetSnapshot.objects.create(asset=self.asset) - old_snapshot.date_created = yesterday - old_snapshot.save(update_fields=['date_created']) - # versioned snapshots are always regenerated - versioned_snapshot = self.asset.snapshot( - regenerate=True, - version_uid=self.asset.latest_deployed_version_uid - ) - snapshot_uids = list(AssetSnapshot.objects.filter( - asset=self.asset - ).values_list('uid', flat=True)) - expected_snapshot_uids = [ - versioned_snapshot.uid, - old_snapshot.uid, - self.asset_snapshot.uid, - ] - # Older snapshot should be gone + newer_snapshot = AssetSnapshot.objects.create(asset=self.asset) + newer_snapshot.date_created = timezone.now() - timedelta(days=4) + newer_snapshot.save(update_fields=['date_created']) + + remove_old_asset_snapshots() + + # Both are outside retention date, oldest gets removed + assert AssetSnapshot.objects.filter(pk=newer_snapshot.id).exists() assert not AssetSnapshot.objects.filter(pk=older_snapshot.id).exists() - # Older snapshot should still exist - assert AssetSnapshot.objects.filter(pk=old_snapshot.id).exists() - assert sorted(expected_snapshot_uids) == sorted(snapshot_uids) + + newest_unversioned_snapshot = AssetSnapshot.objects.create( + asset=self.asset, source=self.asset.content + ) + newest_unversioned_snapshot.date_created = timezone.now() - timedelta( + days=3 + ) + newest_unversioned_snapshot.save(update_fields=['date_created']) + + remove_old_asset_snapshots() + + # Newest still gets deleted because it doesn't have version + assert AssetSnapshot.objects.filter(pk=newer_snapshot.id).exists() + assert not AssetSnapshot.objects.filter( + pk=newest_unversioned_snapshot.id + ).exists() + + asset_2 = Asset.objects.create( + content=self.asset.content, + owner=self.user, + asset_type='survey', + ) + asset_2_older_snapshot = AssetSnapshot.objects.create(asset=asset_2) + asset_2_older_snapshot.date_created = timezone.now() - timedelta(days=1) + asset_2_older_snapshot.save(update_fields=['date_created']) + asset_2_newer_snapshot = AssetSnapshot.objects.create(asset=asset_2) + + remove_old_asset_snapshots() + + # Both remain because they are within retention date + assert AssetSnapshot.objects.filter( + pk=asset_2_older_snapshot.id + ).exists() + assert AssetSnapshot.objects.filter( + pk=asset_2_newer_snapshot.id + ).exists()