Skip to content

Commit

Permalink
feat: Added a new endpoint to get enterprise analytics aggregated data.
Browse files Browse the repository at this point in the history
  • Loading branch information
saleem-latif committed Jul 26, 2024
1 parent b8637fd commit 92f0b51
Show file tree
Hide file tree
Showing 40 changed files with 1,000 additions and 208 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ Unreleased
----------

=========================
[8.2.0] - 2024-07-25
---------------------
* Added a new API endpoint to get admin analytics aggregated data on user enrollment and engagement.

[8.1.0] - 2024-07-22
---------------------
* Upgrade python requirements
Expand Down
2 changes: 1 addition & 1 deletion enterprise_data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
Enterprise data api application. This Django app exposes API endpoints used by enterprises.
"""

__version__ = "8.1.0"
__version__ = "8.2.0"
Empty file.
15 changes: 15 additions & 0 deletions enterprise_data/admin_analytics/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
"""
Constants for admin analytics.
"""
import mysql.connector

from django.conf import settings

DATABASE_CONNECTION_CONFIG = {
'host': settings.DATABASES[settings.ENTERPRISE_REPORTING_DB_ALIAS]['HOST'],
'port': settings.DATABASES[settings.ENTERPRISE_REPORTING_DB_ALIAS]['PORT'],
'database': settings.DATABASES[settings.ENTERPRISE_REPORTING_DB_ALIAS]['NAME'],
'user': settings.DATABASES[settings.ENTERPRISE_REPORTING_DB_ALIAS]['USER'],
'password': settings.DATABASES[settings.ENTERPRISE_REPORTING_DB_ALIAS]['PASSWORD'],
}
DATABASE_CONNECTOR = mysql.connector.connect
137 changes: 137 additions & 0 deletions enterprise_data/admin_analytics/data_loaders.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
"""
Utility functions for fetching data from the database.
"""
import numpy
import pandas

from django.http import Http404

from enterprise_data.admin_analytics.database import run_query


def get_select_query(table: str, columns: list, enterprise_uuid: str) -> str:
"""
Generate a SELECT query for the given table and columns.
Arguments:
table (str): The table to query.
columns (list): The columns to select.
enterprise_uuid (str): The UUID of the enterprise customer.
Returns:
(str): The SELECT query.
"""
return f'SELECT {", ".join(columns)} FROM {table} WHERE enterprise_customer_uuid = "{enterprise_uuid}"'


def fetch_enrollment_data(enterprise_uuid: str):
"""
Fetch enrollment data from the database for the given enterprise customer.
Arguments:
enterprise_uuid (str): The UUID of the enterprise customer.
Returns:
(pandas.DataFrame): The enrollment data.
"""
enterprise_uuid = enterprise_uuid.replace('-', '')

columns = [
'enterprise_customer_name',
'enterprise_customer_uuid',
'lms_enrollment_id',
'user_id',
'email',
'course_key',
'courserun_key',
'course_id',
'course_subject',
'course_title',
'enterprise_enrollment_date',
'lms_enrollment_mode',
'enroll_type',
'program_title',
'date_certificate_awarded',
'grade_percent',
'cert_awarded',
'date_certificate_created_raw',
'passed_date_raw',
'passed_date',
'has_passed',
]
query = get_select_query(
table='fact_enrollment_admin_dash',
columns=columns,
enterprise_uuid=enterprise_uuid,
)

results = run_query(query=query)
if not results:
raise Http404(f'No enrollment data found for enterprise {enterprise_uuid}')

enrollments = pandas.DataFrame(numpy.array(results), columns=columns)

# Convert date columns to datetime.
enrollments['enterprise_enrollment_date'] = enrollments['enterprise_enrollment_date'].astype('datetime64[ns]')
enrollments['date_certificate_awarded'] = enrollments['date_certificate_awarded'].astype('datetime64[ns]')
enrollments['date_certificate_created_raw'] = enrollments['date_certificate_created_raw'].astype('datetime64[ns]')
enrollments['passed_date_raw'] = enrollments['passed_date_raw'].astype('datetime64[ns]')
enrollments['passed_date'] = enrollments['passed_date'].astype('datetime64[ns]')

return enrollments


def fetch_engagement_data(enterprise_uuid: str):
"""
Fetch engagement data from the database for the given enterprise customer.
Arguments:
enterprise_uuid (str): The UUID of the enterprise customer.
Returns:
(pandas.DataFrame): The engagement data.
"""
enterprise_uuid = enterprise_uuid.replace('-', '')

columns = [
'user_id',
'email',
'enterprise_customer_uuid',
'course_key',
'enroll_type',
'activity_date',
'course_title',
'course_subject',
'is_engaged',
'is_engaged_video',
'is_engaged_forum',
'is_engaged_problem',
'is_active',
'learning_time_seconds',
]
query = get_select_query(
table='fact_enrollment_engagement_day_admin_dash', columns=columns, enterprise_uuid=enterprise_uuid
)

results = run_query(query=query)
if not results:
raise Http404(f'No engagement data found for enterprise {enterprise_uuid}')

engagement = pandas.DataFrame(numpy.array(results), columns=columns)
engagement['activity_date'] = engagement['activity_date'].astype('datetime64[ns]')

return engagement


def fetch_max_enrollment_datetime():
"""
Fetch the latest created date from the enterprise_learner_enrollment table.
created will be same for all records as this is added at the time of data load. Which is when the async process
populates the data in the table. We can use this to get the latest data load time.
"""
query = "SELECT MAX(created) FROM enterprise_learner_enrollment"
results = run_query(query)
if not results:
return None
return pandas.to_datetime(results[0][0])
31 changes: 31 additions & 0 deletions enterprise_data/admin_analytics/database.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
"""
Utility functions for interacting with the database.
"""
from contextlib import closing
from logging import getLogger

from enterprise_data.admin_analytics.constants import DATABASE_CONNECTION_CONFIG, DATABASE_CONNECTOR
from enterprise_data.utils import timeit

LOGGER = getLogger(__name__)


@timeit
def run_query(query):
"""
Run a query on the database and return the results.
Arguments:
query (str): The query to run.
Returns:
(list): The results of the query.
"""
try:
with closing(DATABASE_CONNECTOR(**DATABASE_CONNECTION_CONFIG)) as connection:
with closing(connection.cursor()) as cursor:
cursor.execute(query)
return cursor.fetchall()
except Exception:
LOGGER.exception(f'[run_query]: run_query failed for query "{query}".')
raise
81 changes: 81 additions & 0 deletions enterprise_data/admin_analytics/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
"""
Utility functions for fetching data from the database.
"""
from datetime import datetime

from edx_django_utils.cache import TieredCache, get_cache_key

from enterprise_data.admin_analytics.data_loaders import fetch_engagement_data, fetch_enrollment_data


def get_cache_timeout(cache_expiry):
"""
Helper method to calculate cache timeout in seconds.
Arguments:
cache_expiry (datetime): Datetime object denoting the cache expiry.
Returns:
(int): Cache timeout in seconds.
"""
now = datetime.now()
cache_timeout = 0
if cache_expiry > now:
# Calculate cache expiry in seconds from now.
cache_timeout = (cache_expiry - now).seconds

return cache_timeout


def fetch_and_cache_enrollments_data(enterprise_id, cache_expiry):
"""
Helper method to fetch and cache enrollments data.
Arguments:
enterprise_id (str): UUID of the enterprise customer in string format.
cache_expiry (datetime): Datetime object denoting the cache expiry.
Returns:
(pandas.DataFrame): The enrollments data.
"""
cache_key = get_cache_key(
resource='enterprise-admin-analytics-aggregates-enrollments',
enterprise_customer=enterprise_id,
)
cached_response = TieredCache.get_cached_response(cache_key)

if cached_response.is_found:
return cached_response.value
else:
enrollments = fetch_enrollment_data(enterprise_id)
TieredCache.set_all_tiers(
cache_key, enrollments, get_cache_timeout(cache_expiry)
)
return enrollments


def fetch_and_cache_engagements_data(enterprise_id, cache_expiry):
"""
Helper method to fetch and cache engagements data.
Arguments:
enterprise_id (str): UUID of the enterprise customer in string format.
cache_expiry (datetime): Datetime object denoting the cache expiry.
Returns:
(pandas.DataFrame): The engagements data.
"""
cache_key = get_cache_key(
resource='enterprise-admin-analytics-aggregates-engagements',
enterprise_customer=enterprise_id,
)
cached_response = TieredCache.get_cached_response(cache_key)

if cached_response.is_found:
return cached_response.value
else:
engagements = fetch_engagement_data(enterprise_id)
TieredCache.set_all_tiers(
cache_key, engagements, get_cache_timeout(cache_expiry)
)
return engagements
20 changes: 20 additions & 0 deletions enterprise_data/api/v1/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,3 +196,23 @@ class EnterpriseAdminSummarizeInsightsSerializer(serializers.ModelSerializer):
class Meta:
model = EnterpriseAdminSummarizeInsights
fields = '__all__'


class AdminAnalyticsAggregatesQueryParamsSerializer(serializers.Serializer): # pylint: disable=abstract-method
"""
Serializer for validating admin analytics query params.
"""
start_date = serializers.DateField(required=False)
end_date = serializers.DateField(required=False)

def validate(self, attrs):
"""
Validate the query params.
Raises:
serializers.ValidationError: If start_date is greater than end_date.
"""
if 'start_date' in attrs and 'end_date' in attrs:
if attrs['start_date'] > attrs['end_date']:
raise serializers.ValidationError("start_date should be less than or equal to end_date.")
return attrs
19 changes: 13 additions & 6 deletions enterprise_data/api/v1/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,39 +7,46 @@

from django.urls import re_path

from enterprise_data.api.v1 import views
from enterprise_data.api.v1.views import enterprise_admin as enterprise_admin_views
from enterprise_data.api.v1.views import enterprise_learner as enterprise_learner_views
from enterprise_data.api.v1.views import enterprise_offers as enterprise_offers_views
from enterprise_data.constants import UUID4_REGEX

app_name = 'enterprise_data_api_v1'

router = DefaultRouter()
router.register(
r'enterprise/(?P<enterprise_id>.+)/enrollments',
views.EnterpriseLearnerEnrollmentViewSet,
enterprise_learner_views.EnterpriseLearnerEnrollmentViewSet,
'enterprise-learner-enrollment',
)
router.register(
r'enterprise/(?P<enterprise_id>.+)/offers',
views.EnterpriseOfferViewSet,
enterprise_offers_views.EnterpriseOfferViewSet,
'enterprise-offers',
)
router.register(
r'enterprise/(?P<enterprise_id>.+)/users',
views.EnterpriseLearnerViewSet,
enterprise_learner_views.EnterpriseLearnerViewSet,
'enterprise-learner',
)
router.register(
r'enterprise/(?P<enterprise_id>.+)/learner_completed_courses',
views.EnterpriseLearnerCompletedCoursesViewSet,
enterprise_learner_views.EnterpriseLearnerCompletedCoursesViewSet,
'enterprise-learner-completed-courses',
)

urlpatterns = [
re_path(
fr'^admin/insights/(?P<enterprise_id>{UUID4_REGEX})$',
views.EnterpriseAdminInsightsView.as_view(),
enterprise_admin_views.EnterpriseAdminInsightsView.as_view(),
name='enterprise-admin-insights'
),
re_path(
fr'^admin/anlaytics/(?P<enterprise_id>{UUID4_REGEX})$',
enterprise_admin_views.EnterpriseAdminAnalyticsAggregatesView.as_view(),
name='enterprise-admin-analytics-aggregates'
),
]

urlpatterns += router.urls
Empty file.
26 changes: 26 additions & 0 deletions enterprise_data/api/v1/views/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
"""
Base views for enterprise data api v1.
"""
from edx_rbac.mixins import PermissionRequiredMixin
from edx_rest_framework_extensions.auth.jwt.authentication import JwtAuthentication
from edx_rest_framework_extensions.paginators import DefaultPagination

from enterprise_data.constants import ANALYTICS_API_VERSION_1


class EnterpriseViewSetMixin(PermissionRequiredMixin):
"""
Base class for all Enterprise view sets.
"""
authentication_classes = (JwtAuthentication,)
pagination_class = DefaultPagination
permission_required = 'can_access_enterprise'
API_VERSION = ANALYTICS_API_VERSION_1

def paginate_queryset(self, queryset):
"""
Allows no_page query param to skip pagination
"""
if 'no_page' in self.request.query_params:
return None
return super().paginate_queryset(queryset)
Loading

0 comments on commit 92f0b51

Please sign in to comment.