Skip to content

Commit

Permalink
fix(projectOwnershipTransfer): ensure OpenRosa media files are synchr…
Browse files Browse the repository at this point in the history
…onized when transferring project ownership TASK-1352 (#5365)

### 📣 Summary
Fixed an issue where OpenRosa media files were not synced during project
ownership transfers causing a 404 error in Collect when opening a
project due to missing media files.


### 📖 Description
An issue was resolved where OpenRosa media files were not properly
synced when transferring project ownership. This fix ensures that all
media files are transferred correctly along with the project, preventing
errors and maintaining data consistency.

Previously, the absence of these media files resulted in a 404 error
when users attempted to open the project using Collect. This update
addresses the root cause, ensuring that all required files are available
post-transfer.
  • Loading branch information
noliveleger authored Dec 17, 2024
1 parent 296ac68 commit edd6352
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 80 deletions.
1 change: 0 additions & 1 deletion kobo/apps/project_ownership/constants.py
Original file line number Diff line number Diff line change
@@ -1,2 +1 @@
ASYNC_TASK_HEARTBEAT = 60 * 5 # every 5 minutes
FILE_MOVE_CHUNK_SIZE = 1000
128 changes: 49 additions & 79 deletions kobo/apps/project_ownership/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from kpi.models.asset import AssetFile
from .models.choices import TransferStatusChoices, TransferStatusTypeChoices
from .exceptions import AsyncTaskException
from .constants import ASYNC_TASK_HEARTBEAT, FILE_MOVE_CHUNK_SIZE
from .constants import ASYNC_TASK_HEARTBEAT


def get_target_folder(
Expand Down Expand Up @@ -62,40 +62,25 @@ def move_attachments(transfer: 'project_ownership.Transfer'):
instance_id__in=submission_ids
).exclude(media_file__startswith=f'{transfer.asset.owner.username}/')

attachments_to_update = []
try:
heartbeat = int(time.time())
# Moving files is pretty slow, thus it should run in a celery task.
for attachment in attachments.iterator():
if not (
target_folder := get_target_folder(
transfer.invite.sender.username,
transfer.invite.recipient.username,
attachment.media_file.name,
)
):
continue
else:
# We want to be sure the path of the file is saved no matter what.
# Thanks to try/finally block, if updates are still pending, they
# should be saved in case of errors.
# It lets us resume when it stopped in case of failure.
attachment.media_file.move(target_folder)
attachments_to_update.append(attachment)

if len(attachments_to_update) > FILE_MOVE_CHUNK_SIZE:
Attachment.objects.bulk_update(
attachments_to_update, fields=['media_file']
)
attachments_to_update = []

heartbeat = _update_heartbeat(heartbeat, transfer, async_task_type)

finally:
if attachments_to_update:
Attachment.objects.bulk_update(
attachments_to_update, fields=['media_file']
heartbeat = int(time.time())
# Moving files is pretty slow, thus it should run in a celery task.
for attachment in attachments.iterator():
if not (
target_folder := get_target_folder(
transfer.invite.sender.username,
transfer.invite.recipient.username,
attachment.media_file.name,
)
):
continue
else:
# There is no way to ensure atomicity when moving the file and saving the
# object to the database. Fingers crossed that the process doesn't get
# interrupted between these two operations.
attachment.media_file.move(target_folder)
attachment.save(updated_fields=['media_file'])

heartbeat = _update_heartbeat(heartbeat, transfer, async_task_type)

_mark_task_as_successful(transfer, async_task_type)

Expand All @@ -117,53 +102,38 @@ def move_media_files(transfer: 'project_ownership.Transfer'):
)
}

media_files_to_update = []
metadata_to_update = []
try:
heartbeat = int(time.time())
# Moving files is pretty slow, thus it should run in a celery task.
for media_file in media_files:
if not (
target_folder := get_target_folder(
heartbeat = int(time.time())
# Moving files is pretty slow, thus it should run in a celery task.
for media_file in media_files:
if not (
target_folder := get_target_folder(
transfer.invite.sender.username,
transfer.invite.recipient.username,
media_file.content.name,
)
):
continue
else:
# There is no way to ensure atomicity when moving the file and saving the
# object to the database. Fingers crossed that the process doesn't get
# interrupted between these two operations.
media_file.content.move(target_folder)
old_md5 = media_file.metadata.pop('hash', None)
media_file.set_md5_hash()
media_file.save(update_fields=['content', 'metadata'])

if old_md5 in kc_files.keys():
kc_obj = kc_files[old_md5]
if kc_target_folder := get_target_folder(
transfer.invite.sender.username,
transfer.invite.recipient.username,
media_file.content.name,
)
):
continue
else:
# We want to be sure the path of the file is saved no matter what.
# Thanks to try/finally block, if updates are still pending, they
# should be saved in case of errors.
# It lets us resume when it stopped in case of failure.
media_file.content.move(target_folder)
old_md5 = media_file.metadata.pop('hash', None)
media_file.set_md5_hash()
if old_md5 in kc_files.keys():
kc_obj = kc_files[old_md5]
if kc_target_folder := get_target_folder(
transfer.invite.sender.username,
transfer.invite.recipient.username,
kc_obj.data_file.name,
):
kc_obj.data_file.move(kc_target_folder)
kc_obj.file_hash = media_file.md5_hash
metadata_to_update.append(kc_obj)

media_files_to_update.append(media_file)
heartbeat = _update_heartbeat(heartbeat, transfer, async_task_type)

finally:
# No need to use chunk size for media files like we do for attachments,
# because the odds are pretty low that more than 100 media files are
# linked to the project.
if metadata_to_update:
media_files_to_update.append(media_file)

if media_files_to_update:
AssetFile.objects.bulk_update(
media_files_to_update, fields=['content', 'metadata']
)
kc_obj.data_file.name,
):
kc_obj.data_file.move(kc_target_folder)
kc_obj.file_hash = media_file.md5_hash
kc_obj.save(update_fields=['file_hash', 'data_file'])

heartbeat = _update_heartbeat(heartbeat, transfer, async_task_type)

_mark_task_as_successful(transfer, async_task_type)

Expand Down

0 comments on commit edd6352

Please sign in to comment.