Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Asset snapshot cleanup task #4901

Merged
merged 13 commits into from
Apr 23, 2024
2 changes: 2 additions & 0 deletions kobo/tasks.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from celery import shared_task

from kobo.apps.markdownx_uploader.tasks import remove_unused_markdown_files
from kpi.maintenance_tasks import remove_old_assetsnapshots


@shared_task
Expand All @@ -9,3 +10,4 @@ def perform_maintenance():
Run daily maintenance tasks
jamesrkiger marked this conversation as resolved.
Show resolved Hide resolved
"""
remove_unused_markdown_files()
remove_old_assetsnapshots()
39 changes: 39 additions & 0 deletions kpi/maintenance_tasks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import constance
jamesrkiger marked this conversation as resolved.
Show resolved Hide resolved
from datetime import timedelta

from django.db.models import Max
from django.utils import timezone

from kpi.models import AssetSnapshot


default_retention_period = 90

def remove_old_assetsnapshots():
jamesrkiger marked this conversation as resolved.
Show resolved Hide resolved
days = (
constance.config.ASSET_SNAPSHOT_DAYS_RETENTION
or default_retention_period
jamesrkiger marked this conversation as resolved.
Show resolved Hide resolved
)
max_snapshots_qs = (
AssetSnapshot.objects.exclude(asset_version=None)
jamesrkiger marked this conversation as resolved.
Show resolved Hide resolved
.values('asset_id')
.annotate(max_snapshot_id=Max('pk'))
.values_list('max_snapshot_id', flat=True)
)

max_snapshot_ids = list(max_snapshots_qs)

delete_queryset = AssetSnapshot.objects.filter(
date_created__lt=timezone.now() - timedelta(days=days),
).exclude(pk__in=max_snapshot_ids).order_by('pk')

while True:
try:
deletion_delimiter = delete_queryset.values_list('pk', flat=True)[
1000:1001
].get()
delete_queryset.filter(id__lte=deletion_delimiter).delete()
except AssetSnapshot.DoesNotExist:
jamesrkiger marked this conversation as resolved.
Show resolved Hide resolved
break

delete_queryset.delete()
3 changes: 0 additions & 3 deletions kpi/models/asset.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,6 @@
from kpi.utils.asset_content_analyzer import AssetContentAnalyzer
from kpi.utils.object_permission import get_cached_code_names
from kpi.utils.sluggify import sluggify_label
from kpi.tasks import remove_asset_snapshots


class AssetDeploymentStatus(models.TextChoices):
Expand Down Expand Up @@ -1235,8 +1234,6 @@ def snapshot(

if regenerate:
snapshot = False
# Let's do some housekeeping
remove_asset_snapshots.delay(self.id)
else:
snapshot = AssetSnapshot.objects.filter(**snap_params).order_by(
'-date_created'
Expand Down
13 changes: 0 additions & 13 deletions kpi/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,16 +94,3 @@ def enketo_flush_cached_preview(server_url, form_id):
data=dict(server_url=server_url, form_id=form_id),
)
response.raise_for_status()


@celery_app.task
def remove_asset_snapshots(asset_id: int):
"""
Temporary task to delete old snapshots.
TODO remove when kpi#2434 is merged
"""
call_command(
'delete_assets_snapshots',
days=constance.config.ASSET_SNAPSHOT_DAYS_RETENTION,
asset_id=asset_id,
)
45 changes: 31 additions & 14 deletions kpi/tests/test_asset_snapshots.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from django.test import TestCase
from django.utils.timezone import now

from kpi.maintenance_tasks import remove_old_assetsnapshots
from kpi.tests.api.v2 import test_api_asset_snapshots
from ..models import Asset
from ..models import AssetSnapshot
Expand Down Expand Up @@ -83,7 +84,7 @@ def test_snapshots_allow_choice_duplicates(self):
class AssetSnapshotHousekeeping(AssetSnapshotsTestCase):

jamesrkiger marked this conversation as resolved.
Show resolved Hide resolved
@override_config(ASSET_SNAPSHOT_DAYS_RETENTION=2)
def test_delete_old_asset_snapshots_on_regenerate(self):
def test_delete_old_asset_snapshots_task(self):
two_days_before = now() - timedelta(days=3) # One more day than Constance setting
yesterday = now() - timedelta(days=1)
# Because of `auto_date_now` , we cannot specify the date with `create()`
Expand All @@ -93,21 +94,37 @@ def test_delete_old_asset_snapshots_on_regenerate(self):
old_snapshot = AssetSnapshot.objects.create(asset=self.asset)
old_snapshot.date_created = yesterday
old_snapshot.save(update_fields=['date_created'])
# versioned snapshots are always regenerated
versioned_snapshot = self.asset.snapshot(

self.asset.deploy(backend='mock', active=True)
versioned_snapshot_1 = self.asset.snapshot(
regenerate=True,
version_uid=self.asset.latest_deployed_version_uid
)
snapshot_uids = list(AssetSnapshot.objects.filter(
asset=self.asset
).values_list('uid', flat=True))
expected_snapshot_uids = [
versioned_snapshot.uid,
old_snapshot.uid,
self.asset_snapshot.uid,
]
versioned_snapshot_1.date_created = two_days_before
versioned_snapshot_1.save(update_fields=['date_created'])

# Redeploy asset to add version
self.asset.content['survey'].append(
{'type': 'note', 'label': 'Read me', 'name': 'n'}
)
self.asset.save()
self.asset.deploy(backend='mock', active=True)
versioned_snapshot_2 = self.asset.snapshot(
regenerate=True, version_uid=self.asset.latest_deployed_version_uid
)
# Set date to beyond retention limit to ensure latest version snapshot is preserved
versioned_snapshot_2.date_created = two_days_before
versioned_snapshot_2.save(update_fields=['date_created'])

with self.assertNumQueries(4):
remove_old_assetsnapshots()

# Old snapshot should still exist
assert AssetSnapshot.objects.filter(pk=old_snapshot.id).exists()
# Older snapshot should be gone
assert not AssetSnapshot.objects.filter(pk=older_snapshot.id).exists()
# Older snapshot should still exist
assert AssetSnapshot.objects.filter(pk=old_snapshot.id).exists()
assert sorted(expected_snapshot_uids) == sorted(snapshot_uids)
# Only latest versioned snapshot should still exist
assert AssetSnapshot.objects.filter(pk=versioned_snapshot_2.id).exists()
assert not AssetSnapshot.objects.filter(
pk=versioned_snapshot_1.id
).exists()
Loading