Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(accessLogExport)!: create new AccessLogExportTask to generate a csv of access logs TASK-871 #5258

Merged
merged 17 commits into from
Dec 2, 2024
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion jsapp/js/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -356,7 +356,7 @@ export const META_QUESTION_TYPES = createEnum([
// 1. https://github.com/kobotoolbox/kobocat/blob/78133d519f7b7674636c871e3ba5670cd64a7227/onadata/apps/viewer/models/parsed_instance.py#L242-L260
// 2. https://github.com/kobotoolbox/kpi/blob/7db39015866c905edc645677d72b9c1ea16067b1/jsapp/js/constants.es6#L284-L294
export const ADDITIONAL_SUBMISSION_PROPS = createEnum([
// match the ordering of (Python) kpi.models.import_export_task.ExportTask.COPY_FIELDS
// match the ordering of (Python) kpi.models.import_export_task.SubmissionExportTask.COPY_FIELDS
'_id',
'_uuid',
'_submission_time',
Expand Down
4 changes: 2 additions & 2 deletions kobo/apps/audit_log/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,7 +334,7 @@ def create_from_request(cls, request):
'asset-file-detail': cls.create_from_file_request,
'asset-file-list': cls.create_from_file_request,
'asset-export-list': cls.create_from_export_request,
'exporttask-list': cls.create_from_v1_export,
'submissionexporttask-list': cls.create_from_v1_export,
'asset-bulk': cls.create_from_bulk_request,
}
url_name = request.resolver_match.url_name
Expand Down Expand Up @@ -610,7 +610,7 @@ def create_from_related_request(
action = modify_action
if action:
# some actions on related objects do not need to be logged,
# eg deleting an ExportTask
# eg deleting an SubmissionExportTask
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# eg deleting an SubmissionExportTask
# eg deleting a SubmissionExportTask

nit, definitely non-blocking

ProjectHistoryLog.objects.create(
user=request.user, object_id=object_id, action=action, metadata=metadata
)
Expand Down
2 changes: 1 addition & 1 deletion kobo/apps/audit_log/tests/test_project_history_logs.py
Original file line number Diff line number Diff line change
Expand Up @@ -869,7 +869,7 @@ def test_export_v1_creates_log(self):
# can't use _base_project_history_log_test because
# the old endpoint doesn't like format=json
self.client.post(
path=reverse('exporttask-list'),
path=reverse('submissionexporttask-list'),
data=request_data,
)

Expand Down
4 changes: 2 additions & 2 deletions kobo/apps/trash_bin/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from kobo.apps.audit_log.audit_actions import AuditAction
from kobo.apps.audit_log.models import AuditLog, AuditType
from kpi.exceptions import InvalidXFormException, MissingXFormException
from kpi.models import Asset, ExportTask, ImportTask
from kpi.models import Asset, SubmissionExportTask, ImportTask
from kpi.utils.mongo_helper import MongoHelper
from kpi.utils.storage import rmdir
from .constants import DELETE_PROJECT_STR_PREFIX, DELETE_USER_STR_PREFIX
Expand All @@ -45,7 +45,7 @@ def delete_asset(request_author: settings.AUTH_USER_MODEL, asset: 'kpi.Asset'):
if asset.has_deployment:
_delete_submissions(request_author, asset)
asset.deployment.delete()
project_exports = ExportTask.objects.filter(
project_exports = SubmissionExportTask.objects.filter(
Q(data__source=f'{host}/api/v2/assets/{asset.uid}/')
| Q(data__source=f'{host}/assets/{asset.uid}/')
)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
# Generated by Django 4.2.15 on 2024-11-26 19:55

import django.db.models.deletion
import private_storage.fields
import private_storage.storage.files
from django.conf import settings
from django.db import migrations, models

import kpi.fields.file
import kpi.fields.kpi_uid
import kpi.models.asset_file
import kpi.models.import_export_task


class Migration(migrations.Migration):

dependencies = [
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
('kpi', '0059_assetexportsettings_date_created_and_more'),
]

operations = [
migrations.RenameModel(
old_name='ExportTask',
new_name='SubmissionExportTask',
),
migrations.RenameModel(
old_name='SynchronousExport',
new_name='SubmissionSynchronousExport',
),
migrations.AlterField(
model_name='assetfile',
name='content',
field=kpi.fields.file.PrivateExtendedFileField(
max_length=380, null=True, upload_to=kpi.models.asset_file.upload_to
),
),
migrations.CreateModel(
name='AccessLogExportTask',
fields=[
(
'id',
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name='ID',
),
),
('data', models.JSONField()),
('messages', models.JSONField(default=dict)),
(
'status',
models.CharField(
choices=[
('created', 'created'),
('processing', 'processing'),
('error', 'error'),
('complete', 'complete'),
],
default='created',
max_length=32,
),
),
('date_created', models.DateTimeField(auto_now_add=True)),
('uid', kpi.fields.kpi_uid.KpiUidField(_null=False, uid_prefix='ale')),
('get_all_logs', models.BooleanField(default=False)),
(
'result',
private_storage.fields.PrivateFileField(
max_length=380,
storage=(
private_storage.storage.files.PrivateFileSystemStorage()
),
upload_to=kpi.models.import_export_task.export_upload_to,
),
),
(
'user',
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
to=settings.AUTH_USER_MODEL,
),
),
],
options={
'abstract': False,
},
bases=(kpi.models.import_export_task.ExportTaskMixin, models.Model),
),
]
6 changes: 3 additions & 3 deletions kpi/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@
from .asset_snapshot import AssetSnapshot
from .asset_user_partial_permission import AssetUserPartialPermission
from .object_permission import ObjectPermission
from .import_export_task import (
ExportTask,
from .import_export_task import ( # noqa F401
SubmissionExportTask,
ImportTask,
ProjectViewExportTask,
SynchronousExport,
SubmissionSynchronousExport,
)
from .tag_uid import TagUid
from .authorized_application import AuthorizedApplication
Expand Down
134 changes: 116 additions & 18 deletions kpi/models/import_export_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@
from django.contrib.postgres.indexes import BTreeIndex, HashIndex
from django.core.files.storage import FileSystemStorage
from django.db import models, transaction
from django.db.models import F
from django.db.models import CharField, F, Value
from django.db.models.functions import Concat
from django.db.models.query import QuerySet
from django.urls import reverse
from django.utils import timezone
from django.utils.translation import gettext as t
Expand Down Expand Up @@ -52,9 +54,18 @@
from kpi.exceptions import XlsFormatException
from kpi.fields import KpiUidField
from kpi.models import Asset
from kpi.utils.data_exports import (
ACCESS_LOGS_EXPORT_FIELDS,
ASSET_FIELDS,
CONFIG,
SETTINGS,
create_data_export,
filter_remaining_metadata,
get_q,
)
from kpi.utils.log import logging
from kpi.utils.models import _load_library_content, create_assets, resolve_url_to_asset
from kpi.utils.project_view_exports import create_project_view_export
from kpi.utils.project_views import get_region_for_view
from kpi.utils.rename_xls_sheet import (
ConflictSheetError,
NoFromSheetError,
Expand Down Expand Up @@ -129,7 +140,7 @@ def run(self):
# This method must be implemented by a subclass
self._run_task(msgs)
self.status = self.COMPLETE
except ExportTaskBase.InaccessibleData as e:
except SubmissionExportTaskBase.InaccessibleData as e:
msgs['error_type'] = t('Cannot access data')
msgs['error'] = str(e)
self.status = self.ERROR
Expand Down Expand Up @@ -477,27 +488,26 @@ def export_upload_to(self, filename):
return posixpath.join(self.user.username, 'exports', filename)


class ProjectViewExportTask(ImportExportTask):
uid = KpiUidField(uid_prefix='pve')
result = PrivateFileField(upload_to=export_upload_to, max_length=380)
class ExportTaskMixin:

@property
def default_email_subject(self) -> str:
return 'Report Complete'

def _get_export_details(self) -> tuple:
return self.data['type'], self.data['view']
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should account for data not having a view key since we don't need one for access logs exports. the filename can just be export_type-username-time for tasks without a view.


def _build_export_filename(
self, export_type: str, username: str, view: str
) -> str:
time = timezone.now().strftime('%Y-%m-%dT%H:%M:%SZ')
return f'{export_type}-{username}-view_{view}-{time}.csv'
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as previous comment. This should be updated to allow an empty/None view parameter


def _run_task(self, messages: list) -> None:
export_type = self.data['type']
view = self.data['view']

filename = self._build_export_filename(
export_type, self.user.username, view
)
def _run_task_base(self, messages: list, buff) -> None:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
def _run_task_base(self, messages: list, buff) -> None:
def _export_data_to_file(self, messages: list, buff) -> None:

since it's no longer doing the whole task

export_type, view = self._get_export_details()
filename = self._build_export_filename(export_type, self.user.username, view)
absolute_filepath = self.get_absolute_filepath(filename)

buff = create_project_view_export(export_type, self.user.username, view)

with self.result.storage.open(absolute_filepath, 'wb') as output_file:
output_file.write(buff.read().encode())

Expand All @@ -510,7 +520,95 @@ def delete(self, *args, **kwargs) -> None:
super().delete(*args, **kwargs)


class ExportTaskBase(ImportExportTask):
class AccessLogExportTask(ExportTaskMixin, ImportExportTask):
uid = KpiUidField(uid_prefix='ale')
get_all_logs = models.BooleanField(default=False)
result = PrivateFileField(upload_to=export_upload_to, max_length=380)

@property
def default_email_subject(self) -> str:
return 'Access Log Report Complete'

def get_data(self, filtered_queryset: QuerySet) -> QuerySet:
user_url = Concat(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

didn't know you could do this outside a query. neat.

Value(f'{settings.KOBOFORM_URL}/api/v2/users/'),
F('user__username'),
output_field=CharField(),
)

return filtered_queryset.annotate(
user_url=user_url,
username=F('user__username'),
auth_type=F('metadata__auth_type'),
source=F('metadata__source'),
ip_address=F('metadata__ip_address'),
initial_superusername=F('metadata__initial_user_username'),
initial_superuseruid=F('metadata__initial_user_uid'),
authorized_application=F('metadata__authorized_app_name'),
other_details=F('metadata'),
).values(*ACCESS_LOGS_EXPORT_FIELDS)

def _run_task(self, messages: list) -> None:
if self.get_all_logs and not self.user.is_superuser:
raise PermissionError('Only superusers can export all access logs.')

export_type, view = self._get_export_details()
config = CONFIG[export_type]

queryset = config['queryset']()
if not self.get_all_logs:
queryset = queryset.filter(user__username=self.user.username)
data = self.get_data(queryset)
accessed_metadata_fields = [
'auth_type',
'source',
'ip_address',
'initial_user_username',
'initial_user_uid',
'auth_app_name',
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
'auth_app_name',
'authorized_app_name',

]
for row in data:
row['other_details'] = filter_remaining_metadata(
row, accessed_metadata_fields
)
buff = create_data_export(export_type, data)
self._run_task_base(messages, buff)


class ProjectViewExportTask(ExportTaskMixin, ImportExportTask):
uid = KpiUidField(uid_prefix='pve')
result = PrivateFileField(upload_to=export_upload_to, max_length=380)

@property
def default_email_subject(self) -> str:
return 'Project View Report Complete'

def get_data(self, filtered_queryset: QuerySet) -> QuerySet:
vals = ASSET_FIELDS + (SETTINGS,)
return (
filtered_queryset.annotate(
owner__name=F('owner__extra_details__data__name'),
owner__organization=F('owner__extra_details__data__organization'),
form_id=F('_deployment_data__backend_response__formid'),
)
.values(*vals)
.order_by('id')
)

def _run_task(self, messages: list) -> None:
export_type, view = self._get_export_details()
config = CONFIG[export_type]

region_for_view = get_region_for_view(view)
q = get_q(region_for_view, export_type)
queryset = config['queryset'].filter(q)

data = self.get_data(queryset)
buff = create_data_export(export_type, data)
self._run_task_base(messages, buff)


class SubmissionExportTaskBase(ImportExportTask):
"""
An (asynchronous) submission data export job. The instantiator must set the
`data` attribute to a dictionary with the following keys:
Expand Down Expand Up @@ -940,7 +1038,7 @@ def remove_excess(cls, user, source):
export.delete()


class ExportTask(ExportTaskBase):
class SubmissionExportTask(SubmissionExportTaskBase):
"""
An asynchronous export task, to be run with Celery
"""
Expand All @@ -961,7 +1059,7 @@ def _run_task(self, messages):
self.remove_excess(self.user, source_url)


class SynchronousExport(ExportTaskBase):
class SubmissionSynchronousExport(SubmissionExportTaskBase):
"""
A synchronous export, with significant limitations on processing time, but
offered for user convenience
Expand Down
6 changes: 3 additions & 3 deletions kpi/serializers/v1/export_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,19 @@
from rest_framework import serializers

from kpi.fields import ReadOnlyJSONField
from kpi.models import ExportTask
from kpi.models import SubmissionExportTask


class ExportTaskSerializer(serializers.HyperlinkedModelSerializer):
url = serializers.HyperlinkedIdentityField(
lookup_field='uid',
view_name='exporttask-detail'
view_name='submissionexporttask-detail'
)
messages = ReadOnlyJSONField(required=False)
data = ReadOnlyJSONField()

class Meta:
model = ExportTask
model = SubmissionExportTask
fields = (
'url',
'status',
Expand Down
Loading