Skip to content

Commit

Permalink
feat(accessLogExport)!: create new AccessLogExportTask to generate a …
Browse files Browse the repository at this point in the history
…csv of access logs TASK-871 (#5258)

### 👀 Preview steps

1. Ensure that the `Export all data` button for `ProjectViews` still
works as normal and that the email header has the appropriate title:
"Project View Report Complete"
2. Ensure that exports for submission data works correctly:
- make submissions to a project
- navigate to the _Project < Data < Downloads_ 
- 🟢 ensure the `Export` button is exporting data correctly
3. Preview the AccessLogExportTask from the shell:
- ℹ️ have a user 
- enter the Django shell: `./manage.py shell_plus`
- get your user: `test_user = User.objects.get(username='test')`
- create an AccessLogExportTask: 
``` 
task = AccessLogExportTask.objects.create(
           user=test_user,
           get_all_logs=False,
           data={'type': 'access_logs_export'},
  )
```
- 🟢 print the task object and notice that an AccessLogExportTask object
has been created: `print(task)`
- run the task: `task.run()`
- 🟢 print the result and notice that it generated a link to the csv
export: `print(task.result)`

### 💭 Notes
- Changed the field named `user` to `user_url` because naming it
`user` conflicts with an existing field in the `AccessLog` model
- Renamed the file titled `project_view_exports.py` to `data_exports.py`
because it now generates exports for both project view and access log
exports
- Refactored the `ProjectViewExportTask` to have a base
`CommonExportTask` which both the project view and access log export
classes inherit from
- Updated the email (in `kpi/tasks.py`) to have the correct subject
title based on the type of export report it is sending
- Alphabetized the functions in `data_exports.py`
  • Loading branch information
RuthShryock authored Dec 2, 2024
1 parent b5d51b3 commit 6992e8a
Show file tree
Hide file tree
Showing 20 changed files with 478 additions and 150 deletions.
2 changes: 1 addition & 1 deletion jsapp/js/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -356,7 +356,7 @@ export const META_QUESTION_TYPES = createEnum([
// 1. https://github.com/kobotoolbox/kobocat/blob/78133d519f7b7674636c871e3ba5670cd64a7227/onadata/apps/viewer/models/parsed_instance.py#L242-L260
// 2. https://github.com/kobotoolbox/kpi/blob/7db39015866c905edc645677d72b9c1ea16067b1/jsapp/js/constants.es6#L284-L294
export const ADDITIONAL_SUBMISSION_PROPS = createEnum([
// match the ordering of (Python) kpi.models.import_export_task.ExportTask.COPY_FIELDS
// match the ordering of (Python) kpi.models.import_export_task.SubmissionExportTask.COPY_FIELDS
'_id',
'_uuid',
'_submission_time',
Expand Down
4 changes: 2 additions & 2 deletions kobo/apps/audit_log/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,7 +334,7 @@ def create_from_request(cls, request):
'asset-file-detail': cls.create_from_file_request,
'asset-file-list': cls.create_from_file_request,
'asset-export-list': cls.create_from_export_request,
'exporttask-list': cls.create_from_v1_export,
'submissionexporttask-list': cls.create_from_v1_export,
'asset-bulk': cls.create_from_bulk_request,
}
url_name = request.resolver_match.url_name
Expand Down Expand Up @@ -610,7 +610,7 @@ def create_from_related_request(
action = modify_action
if action:
# some actions on related objects do not need to be logged,
# eg deleting an ExportTask
# eg deleting a SubmissionExportTask
ProjectHistoryLog.objects.create(
user=request.user, object_id=object_id, action=action, metadata=metadata
)
Expand Down
2 changes: 1 addition & 1 deletion kobo/apps/audit_log/tests/test_project_history_logs.py
Original file line number Diff line number Diff line change
Expand Up @@ -869,7 +869,7 @@ def test_export_v1_creates_log(self):
# can't use _base_project_history_log_test because
# the old endpoint doesn't like format=json
self.client.post(
path=reverse('exporttask-list'),
path=reverse('submissionexporttask-list'),
data=request_data,
)

Expand Down
4 changes: 2 additions & 2 deletions kobo/apps/trash_bin/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from kobo.apps.audit_log.audit_actions import AuditAction
from kobo.apps.audit_log.models import AuditLog, AuditType
from kpi.exceptions import InvalidXFormException, MissingXFormException
from kpi.models import Asset, ExportTask, ImportTask
from kpi.models import Asset, SubmissionExportTask, ImportTask
from kpi.utils.mongo_helper import MongoHelper
from kpi.utils.storage import rmdir
from .constants import DELETE_PROJECT_STR_PREFIX, DELETE_USER_STR_PREFIX
Expand All @@ -45,7 +45,7 @@ def delete_asset(request_author: settings.AUTH_USER_MODEL, asset: 'kpi.Asset'):
if asset.has_deployment:
_delete_submissions(request_author, asset)
asset.deployment.delete()
project_exports = ExportTask.objects.filter(
project_exports = SubmissionExportTask.objects.filter(
Q(data__source=f'{host}/api/v2/assets/{asset.uid}/')
| Q(data__source=f'{host}/assets/{asset.uid}/')
)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
# Generated by Django 4.2.15 on 2024-11-26 19:55

import django.db.models.deletion
import private_storage.fields
import private_storage.storage.files
from django.conf import settings
from django.db import migrations, models

import kpi.fields.file
import kpi.fields.kpi_uid
import kpi.models.asset_file
import kpi.models.import_export_task


class Migration(migrations.Migration):

dependencies = [
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
('kpi', '0059_assetexportsettings_date_created_and_more'),
]

operations = [
migrations.RenameModel(
old_name='ExportTask',
new_name='SubmissionExportTask',
),
migrations.RenameModel(
old_name='SynchronousExport',
new_name='SubmissionSynchronousExport',
),
migrations.AlterField(
model_name='assetfile',
name='content',
field=kpi.fields.file.PrivateExtendedFileField(
max_length=380, null=True, upload_to=kpi.models.asset_file.upload_to
),
),
migrations.CreateModel(
name='AccessLogExportTask',
fields=[
(
'id',
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name='ID',
),
),
('data', models.JSONField()),
('messages', models.JSONField(default=dict)),
(
'status',
models.CharField(
choices=[
('created', 'created'),
('processing', 'processing'),
('error', 'error'),
('complete', 'complete'),
],
default='created',
max_length=32,
),
),
('date_created', models.DateTimeField(auto_now_add=True)),
('uid', kpi.fields.kpi_uid.KpiUidField(_null=False, uid_prefix='ale')),
('get_all_logs', models.BooleanField(default=False)),
(
'result',
private_storage.fields.PrivateFileField(
max_length=380,
storage=(
private_storage.storage.files.PrivateFileSystemStorage()
),
upload_to=kpi.models.import_export_task.export_upload_to,
),
),
(
'user',
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
to=settings.AUTH_USER_MODEL,
),
),
],
options={
'abstract': False,
},
bases=(kpi.models.import_export_task.ExportTaskMixin, models.Model),
),
]
6 changes: 3 additions & 3 deletions kpi/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@
from .asset_snapshot import AssetSnapshot
from .asset_user_partial_permission import AssetUserPartialPermission
from .object_permission import ObjectPermission
from .import_export_task import (
ExportTask,
from .import_export_task import ( # noqa F401
SubmissionExportTask,
ImportTask,
ProjectViewExportTask,
SynchronousExport,
SubmissionSynchronousExport,
)
from .tag_uid import TagUid
from .authorized_application import AuthorizedApplication
Expand Down
140 changes: 120 additions & 20 deletions kpi/models/import_export_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@
from django.contrib.postgres.indexes import BTreeIndex, HashIndex
from django.core.files.storage import FileSystemStorage
from django.db import models, transaction
from django.db.models import F
from django.db.models import CharField, F, Value
from django.db.models.functions import Concat
from django.db.models.query import QuerySet
from django.urls import reverse
from django.utils import timezone
from django.utils.translation import gettext as t
Expand Down Expand Up @@ -52,9 +54,18 @@
from kpi.exceptions import XlsFormatException
from kpi.fields import KpiUidField
from kpi.models import Asset
from kpi.utils.data_exports import (
ACCESS_LOGS_EXPORT_FIELDS,
ASSET_FIELDS,
CONFIG,
SETTINGS,
create_data_export,
filter_remaining_metadata,
get_q,
)
from kpi.utils.log import logging
from kpi.utils.models import _load_library_content, create_assets, resolve_url_to_asset
from kpi.utils.project_view_exports import create_project_view_export
from kpi.utils.project_views import get_region_for_view
from kpi.utils.rename_xls_sheet import (
ConflictSheetError,
NoFromSheetError,
Expand Down Expand Up @@ -129,7 +140,7 @@ def run(self):
# This method must be implemented by a subclass
self._run_task(msgs)
self.status = self.COMPLETE
except ExportTaskBase.InaccessibleData as e:
except SubmissionExportTaskBase.InaccessibleData as e:
msgs['error_type'] = t('Cannot access data')
msgs['error'] = str(e)
self.status = self.ERROR
Expand Down Expand Up @@ -477,27 +488,28 @@ def export_upload_to(self, filename):
return posixpath.join(self.user.username, 'exports', filename)


class ProjectViewExportTask(ImportExportTask):
uid = KpiUidField(uid_prefix='pve')
result = PrivateFileField(upload_to=export_upload_to, max_length=380)
class ExportTaskMixin:

@property
def default_email_subject(self) -> str:
return 'Report Complete'

def _get_export_details(self) -> tuple:
return self.data.get('type'), self.data.get('view', None)

def _build_export_filename(
self, export_type: str, username: str, view: str
self, export_type: str, username: str, view: str = None
) -> str:
time = timezone.now().strftime('%Y-%m-%dT%H:%M:%SZ')
return f'{export_type}-{username}-view_{view}-{time}.csv'

def _run_task(self, messages: list) -> None:
export_type = self.data['type']
view = self.data['view']
if view:
return f'{export_type}-{username}-view_{view}-{time}.csv'
return f'{export_type}-{username}-{time}.csv'

filename = self._build_export_filename(
export_type, self.user.username, view
)
def _export_data_to_file(self, messages: list, buff) -> None:
export_type, view = self._get_export_details()
filename = self._build_export_filename(export_type, self.user.username, view)
absolute_filepath = self.get_absolute_filepath(filename)

buff = create_project_view_export(export_type, self.user.username, view)

with self.result.storage.open(absolute_filepath, 'wb') as output_file:
output_file.write(buff.read().encode())

Expand All @@ -510,7 +522,95 @@ def delete(self, *args, **kwargs) -> None:
super().delete(*args, **kwargs)


class ExportTaskBase(ImportExportTask):
class AccessLogExportTask(ExportTaskMixin, ImportExportTask):
uid = KpiUidField(uid_prefix='ale')
get_all_logs = models.BooleanField(default=False)
result = PrivateFileField(upload_to=export_upload_to, max_length=380)

@property
def default_email_subject(self) -> str:
return 'Access Log Report Complete'

def get_data(self, filtered_queryset: QuerySet) -> QuerySet:
user_url = Concat(
Value(f'{settings.KOBOFORM_URL}/api/v2/users/'),
F('user__username'),
output_field=CharField(),
)

return filtered_queryset.annotate(
user_url=user_url,
username=F('user__username'),
auth_type=F('metadata__auth_type'),
source=F('metadata__source'),
ip_address=F('metadata__ip_address'),
initial_superusername=F('metadata__initial_user_username'),
initial_superuseruid=F('metadata__initial_user_uid'),
authorized_application=F('metadata__authorized_app_name'),
other_details=F('metadata'),
).values(*ACCESS_LOGS_EXPORT_FIELDS)

def _run_task(self, messages: list) -> None:
if self.get_all_logs and not self.user.is_superuser:
raise PermissionError('Only superusers can export all access logs.')

export_type, view = self._get_export_details()
config = CONFIG[export_type]

queryset = config['queryset']()
if not self.get_all_logs:
queryset = queryset.filter(user__username=self.user.username)
data = self.get_data(queryset)
accessed_metadata_fields = [
'auth_type',
'source',
'ip_address',
'initial_user_username',
'initial_user_uid',
'authorized_app_name',
]
for row in data:
row['other_details'] = filter_remaining_metadata(
row, accessed_metadata_fields
)
buff = create_data_export(export_type, data)
self._export_data_to_file(messages, buff)


class ProjectViewExportTask(ExportTaskMixin, ImportExportTask):
uid = KpiUidField(uid_prefix='pve')
result = PrivateFileField(upload_to=export_upload_to, max_length=380)

@property
def default_email_subject(self) -> str:
return 'Project View Report Complete'

def get_data(self, filtered_queryset: QuerySet) -> QuerySet:
vals = ASSET_FIELDS + (SETTINGS,)
return (
filtered_queryset.annotate(
owner__name=F('owner__extra_details__data__name'),
owner__organization=F('owner__extra_details__data__organization'),
form_id=F('_deployment_data__backend_response__formid'),
)
.values(*vals)
.order_by('id')
)

def _run_task(self, messages: list) -> None:
export_type, view = self._get_export_details()
config = CONFIG[export_type]

region_for_view = get_region_for_view(view)
q = get_q(region_for_view, export_type)
queryset = config['queryset'].filter(q)

data = self.get_data(queryset)
buff = create_data_export(export_type, data)
self._export_data_to_file(messages, buff)


class SubmissionExportTaskBase(ImportExportTask):
"""
An (asynchronous) submission data export job. The instantiator must set the
`data` attribute to a dictionary with the following keys:
Expand Down Expand Up @@ -940,7 +1040,7 @@ def remove_excess(cls, user, source):
export.delete()


class ExportTask(ExportTaskBase):
class SubmissionExportTask(SubmissionExportTaskBase):
"""
An asynchronous export task, to be run with Celery
"""
Expand All @@ -961,7 +1061,7 @@ def _run_task(self, messages):
self.remove_excess(self.user, source_url)


class SynchronousExport(ExportTaskBase):
class SubmissionSynchronousExport(SubmissionExportTaskBase):
"""
A synchronous export, with significant limitations on processing time, but
offered for user convenience
Expand Down
6 changes: 3 additions & 3 deletions kpi/serializers/v1/export_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,19 @@
from rest_framework import serializers

from kpi.fields import ReadOnlyJSONField
from kpi.models import ExportTask
from kpi.models import SubmissionExportTask


class ExportTaskSerializer(serializers.HyperlinkedModelSerializer):
url = serializers.HyperlinkedIdentityField(
lookup_field='uid',
view_name='exporttask-detail'
view_name='submissionexporttask-detail'
)
messages = ReadOnlyJSONField(required=False)
data = ReadOnlyJSONField()

class Meta:
model = ExportTask
model = SubmissionExportTask
fields = (
'url',
'status',
Expand Down
Loading

0 comments on commit 6992e8a

Please sign in to comment.