From cd6e5c2043ef8041027e02af8aa6bab9729de971 Mon Sep 17 00:00:00 2001 From: Olivier Leger Date: Mon, 31 Oct 2022 16:50:01 -0400 Subject: [PATCH 1/2] Set 'AZURE_OVERWRITE_FILES' with env var. Make it 'False' by default --- onadata/settings/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onadata/settings/base.py b/onadata/settings/base.py index 5b3335426..04f7412d9 100644 --- a/onadata/settings/base.py +++ b/onadata/settings/base.py @@ -556,7 +556,7 @@ def skip_suspicious_operations(record): AZURE_ACCOUNT_KEY = env.str('AZURE_ACCOUNT_KEY') AZURE_CONTAINER = env.str('AZURE_CONTAINER') AZURE_URL_EXPIRATION_SECS = env.int('AZURE_URL_EXPIRATION_SECS', None) - AZURE_OVERWRITE_FILES = True + AZURE_OVERWRITE_FILES = env.bool('AZURE_OVERWRITE_FILES', False) GOOGLE_ANALYTICS_PROPERTY_ID = env.str("GOOGLE_ANALYTICS_TOKEN", False) GOOGLE_ANALYTICS_DOMAIN = "auto" From 0d95c31961ed13f7ae91b2154c8dbbd99b4a5d3a Mon Sep 17 00:00:00 2001 From: Olivier Leger Date: Wed, 2 Nov 2022 11:14:30 -0400 Subject: [PATCH 2/2] Fix zip exports with Azure --- onadata/libs/utils/export_tools.py | 59 ++++++++++++++++++++++++++++-- onadata/libs/utils/viewer_tools.py | 5 ++- onadata/settings/base.py | 3 +- 3 files changed, 59 insertions(+), 8 deletions(-) diff --git a/onadata/libs/utils/export_tools.py b/onadata/libs/utils/export_tools.py index fd49376b3..d6d9b44e2 100644 --- a/onadata/libs/utils/export_tools.py +++ b/onadata/libs/utils/export_tools.py @@ -7,6 +7,7 @@ from bson import json_util from django.conf import settings from django.core.files.base import File, ContentFile +from django.core.files.storage import FileSystemStorage from django.core.files.temp import NamedTemporaryFile from django.core.files.storage import get_storage_class from django.contrib.auth.models import User @@ -675,18 +676,18 @@ def generate_attachments_zip_export( export_type, filename) - export_filename = get_storage_class()().save(file_path, ContentFile(b'')) + absolute_filename = _get_absolute_filename(file_path) - with get_storage_class()().open(export_filename, 'wb') as destination_file: + with get_storage_class()().open(absolute_filename, 'wb') as destination_file: create_attachments_zipfile( attachments, output_file=destination_file, ) - dir_name, basename = os.path.split(export_filename) + dir_name, basename = os.path.split(absolute_filename) # get or create export object - if(export_id): + if export_id: export = Export.objects.get(id=export_id) else: export = Export.objects.create(xform=xform, export_type=export_type) @@ -763,3 +764,53 @@ def kml_export_data(id_string, user): }) return data_for_template + + +def _get_absolute_filename(filename: str) -> str: + """ + Get absolute filename related to storage root. + """ + + storage_class = get_storage_class()() + filename = storage_class.generate_filename(filename) + + # We cannot call `self.result.save()` before reopening the file + # in write mode (i.e. open(filename, 'wb')). because it does not work + # with AzureStorage. + # Unfortunately, `self.result.save()` does few things that we need to + # reimplement here: + # - Create parent folders (if they do not exist) for local storage + # - Get a unique filename if filename already exists on storage + + # Copied from `FileSystemStorage._save()` 😢 + if isinstance(storage_class, FileSystemStorage): + full_path = storage_class.path(filename) + + # Create any intermediate directories that do not exist. + directory = os.path.dirname(full_path) + if not os.path.exists(directory): + try: + if storage_class.directory_permissions_mode is not None: + # os.makedirs applies the global umask, so we reset it, + # for consistency with file_permissions_mode behavior. + old_umask = os.umask(0) + try: + os.makedirs( + directory, storage_class.directory_permissions_mode + ) + finally: + os.umask(old_umask) + else: + os.makedirs(directory) + except FileExistsError: + # There's a race between os.path.exists() and os.makedirs(). + # If os.makedirs() fails with FileExistsError, the directory + # was created concurrently. + pass + if not os.path.isdir(directory): + raise IOError("%s exists and is not a directory." % directory) + + # Store filenames with forward slashes, even on Windows. + filename = filename.replace('\\', '/') + + return storage_class.get_available_name(filename) diff --git a/onadata/libs/utils/viewer_tools.py b/onadata/libs/utils/viewer_tools.py index 2ddcdb9e6..ccdf96448 100644 --- a/onadata/libs/utils/viewer_tools.py +++ b/onadata/libs/utils/viewer_tools.py @@ -56,10 +56,11 @@ def report_exception(subject, info, exc_info=None): info += "".join(traceback.format_exception(*exc_info)) if settings.DEBUG: - print(subject) - print(info) + print(subject, flush=True) + print(info, flush=True) else: mail_admins(subject=subject, message=info) + logging.error(info, exc_info=exc_info) def django_file(path, field_name, content_type): diff --git a/onadata/settings/base.py b/onadata/settings/base.py index 04f7412d9..53e41ebd2 100644 --- a/onadata/settings/base.py +++ b/onadata/settings/base.py @@ -556,7 +556,6 @@ def skip_suspicious_operations(record): AZURE_ACCOUNT_KEY = env.str('AZURE_ACCOUNT_KEY') AZURE_CONTAINER = env.str('AZURE_CONTAINER') AZURE_URL_EXPIRATION_SECS = env.int('AZURE_URL_EXPIRATION_SECS', None) - AZURE_OVERWRITE_FILES = env.bool('AZURE_OVERWRITE_FILES', False) GOOGLE_ANALYTICS_PROPERTY_ID = env.str("GOOGLE_ANALYTICS_TOKEN", False) GOOGLE_ANALYTICS_DOMAIN = "auto" @@ -678,7 +677,7 @@ def skip_suspicious_operations(record): # Celery defaults to having as many workers as there are cores. To avoid # excessive resource consumption, don't spawn more than 6 workers by default -# even if there more than 6 cores. +# even if there are more than 6 cores. CELERY_WORKER_MAX_CONCURRENCY = int(os.environ.get('CELERYD_MAX_CONCURRENCY', 6)) if multiprocessing.cpu_count() > CELERY_WORKER_MAX_CONCURRENCY: CELERY_WORKER_CONCURRENCY = CELERY_WORKER_MAX_CONCURRENCY