Skip to content

Commit

Permalink
Merge branch 'openedx:master' into aht007/Ansible-to-Docker
Browse files Browse the repository at this point in the history
  • Loading branch information
aht007 authored Jan 24, 2023
2 parents 3ed4447 + 7857791 commit 5ce22a6
Show file tree
Hide file tree
Showing 22 changed files with 767 additions and 242 deletions.
50 changes: 47 additions & 3 deletions course_discovery/apps/api/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@
CourseType, Curriculum, CurriculumCourseMembership, CurriculumProgramMembership, Degree, DegreeAdditionalMetadata,
DegreeCost, DegreeDeadline, Endorsement, Fact, GeoLocation, IconTextPairing, Image, LevelType, Mode, Organization,
Pathway, Person, PersonAreaOfExpertise, PersonSocialNetwork, Position, Prerequisite, ProductMeta, ProductValue,
Program, ProgramLocationRestriction, ProgramType, Ranking, Seat, SeatType, Specialization, Subject, TaxiForm, Topic,
Track, Video
Program, ProgramLocationRestriction, ProgramType, Ranking, Seat, SeatType, Source, Specialization, Subject,
TaxiForm, Topic, Track, Video
)
from course_discovery.apps.course_metadata.utils import get_course_run_estimated_hours, parse_course_key_fragment
from course_discovery.apps.ietf_language_tags.models import LanguageTag
Expand Down Expand Up @@ -225,6 +225,13 @@ class Meta(TitleDescriptionSerializer.Meta):
model = AdditionalPromoArea


class SourceSerializer(BaseModelSerializer):
"""Serializer for Source"""
class Meta:
model = Source
fields = ('name', 'slug', 'description')


class FactSerializer(HeadingBlurbSerializer):
"""Serializer for Facts """
class Meta(HeadingBlurbSerializer.Meta):
Expand Down Expand Up @@ -1268,6 +1275,7 @@ class CourseSerializer(TaggitSerializer, MinimalCourseSerializer):
geolocation = GeoLocationSerializer(required=False, allow_null=True)
location_restriction = CourseLocationRestrictionSerializer(required=False)
in_year_value = ProductValueSerializer(required=False)
product_source = SourceSerializer(required=False)

def get_organization_logo_override_url(self, obj):
logo_image_override = getattr(obj, 'organization_logo_override', None)
Expand All @@ -1288,6 +1296,7 @@ def prefetch_queryset(cls, partner, queryset=None, course_runs=None): # lint-am
'partner',
'extra_description',
'additional_metadata',
'product_source',
'_official_version',
'canonical_course_run',
'type',
Expand Down Expand Up @@ -1323,7 +1332,8 @@ class Meta(MinimalCourseSerializer.Meta):
'enrollment_count', 'recent_enrollment_count', 'topics', 'partner', 'key_for_reruns', 'url_slug',
'url_slug_history', 'url_redirects', 'course_run_statuses', 'editors', 'collaborators', 'skill_names',
'skills', 'organization_short_code_override', 'organization_logo_override_url',
'enterprise_subscription_inclusion', 'geolocation', 'location_restriction', 'in_year_value'
'enterprise_subscription_inclusion', 'geolocation', 'location_restriction', 'in_year_value',
'product_source',
)
extra_kwargs = {
'partner': {'write_only': True}
Expand Down Expand Up @@ -1400,6 +1410,17 @@ def update_product_meta(self, instance, product_meta_data):
instance.product_meta.keywords.set(keywords, clear=True)
instance.product_meta.save()

def update_product_source(self, instance, product_source):

if instance.product_source:
Source.objects.filter(id=instance.product_source.id).update(
**product_source)
else:
instance.product_source = Source.objects.create(
**product_source,
)
instance.save()

def update_additional_metadata(self, instance, additional_metadata):

facts = additional_metadata.pop('facts', None)
Expand Down Expand Up @@ -1456,6 +1477,9 @@ def update(self, instance, validated_data):
self.update_location_restriction(instance, location_restriction_data)
if 'in_year_value' in validated_data:
self.update_in_year_value(instance, validated_data.pop('in_year_value'))
if 'product_source' in validated_data:
self.update_product_source(
instance, validated_data.pop('product_source'))
return super().update(instance, validated_data)


Expand Down Expand Up @@ -2048,6 +2072,7 @@ class ProgramSerializer(MinimalProgramSerializer):
in_year_value = ProductValueSerializer(required=False)
skill_names = serializers.SerializerMethodField()
skills = serializers.SerializerMethodField()
product_source = SourceSerializer(required=False)

@classmethod
def prefetch_queryset(cls, partner, queryset=None):
Expand All @@ -2066,6 +2091,7 @@ def prefetch_queryset(cls, partner, queryset=None):
'partner',
'geolocation',
'in_year_value',
'product_source',
'location_restriction'
).prefetch_related(
'excluded_course_runs',
Expand Down Expand Up @@ -2109,9 +2135,27 @@ class Meta(MinimalProgramSerializer.Meta):
'staff', 'credit_redemption_overview', 'applicable_seat_types', 'instructor_ordering',
'enrollment_count', 'topics', 'credit_value', 'enterprise_subscription_inclusion', 'geolocation',
'location_restriction', 'is_2u_degree_program', 'in_year_value', 'skill_names', 'skills',
'product_source',
)
read_only_fields = ('enterprise_subscription_inclusion',)

def update_product_source(self, instance, product_source):

if instance.product_source:
Source.objects.filter(
id=instance.product_source.id).update(**product_source)
else:
instance.product_source = Source.objects.create(
**product_source,
)
instance.save()

def update(self, instance, validated_data):
if 'product_source' in validated_data:
self.update_product_source(
instance, validated_data.pop('product_source'))
return super().update(instance, validated_data)


class PathwaySerializer(BaseModelSerializer):
""" Serializer for Pathway. """
Expand Down
4 changes: 3 additions & 1 deletion course_discovery/apps/api/tests/test_serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
OrganizationSerializer, PathwaySerializer, PersonSerializer, PositionSerializer, PrerequisiteSerializer,
ProductMetaSerializer, ProductValueSerializer, ProgramLocationRestrictionSerializer,
ProgramsAffiliateWindowSerializer, ProgramSerializer, ProgramTypeAttrsSerializer, ProgramTypeSerializer,
RankingSerializer, SeatSerializer, SubjectSerializer, TaxiFormSerializer, TopicSerializer,
RankingSerializer, SeatSerializer, SourceSerializer, SubjectSerializer, TaxiFormSerializer, TopicSerializer,
TypeaheadCourseRunSearchSerializer, TypeaheadProgramSearchSerializer, VideoSerializer,
get_lms_course_url_for_archived, get_utm_source_for_user
)
Expand Down Expand Up @@ -169,6 +169,7 @@ def get_expected_data(cls, course, course_skill, request):
'full_description': course.full_description,
'level_type': course.level_type.name_t,
'extra_description': AdditionalPromoAreaSerializer(course.extra_description).data,
'product_source': SourceSerializer(course.product_source).data,
'additional_metadata': AdditionalMetadataSerializer(course.additional_metadata).data,
'subjects': [],
'prerequisites': [],
Expand Down Expand Up @@ -1136,6 +1137,7 @@ def get_expected_data(cls, program, request, include_labels=True):
'topics': [topic.name for topic in program.topics],
'credit_value': program.credit_value,
'enterprise_subscription_inclusion': program.enterprise_subscription_inclusion,
'product_source': SourceSerializer(program.product_source).data,
'organization_short_code_override': program.organization_short_code_override,
'organization_logo_override_url': program.organization_logo_override_url,
'primary_subject_override': SubjectSerializer(program.primary_subject_override).data,
Expand Down
12 changes: 6 additions & 6 deletions course_discovery/apps/api/v1/tests/test_views/test_courses.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def test_get_uuid(self):
""" Verify the endpoint returns the details for a single course with UUID. """
url = reverse('api:v1:course-detail', kwargs={'key': self.course.uuid})

with self.assertNumQueries(44):
with self.assertNumQueries(47):
response = self.client.get(url)
assert response.status_code == 200
assert response.data == self.serialize_course(self.course)
Expand All @@ -88,7 +88,7 @@ def test_get_exclude_deleted_programs(self):
""" Verify the endpoint returns no deleted associated programs """
ProgramFactory(courses=[self.course], status=ProgramStatus.Deleted)
url = reverse('api:v1:course-detail', kwargs={'key': self.course.key})
with self.assertNumQueries(44):
with self.assertNumQueries(47):
response = self.client.get(url)
assert response.status_code == 200
assert response.data.get('programs') == []
Expand All @@ -101,7 +101,7 @@ def test_get_include_deleted_programs(self):
ProgramFactory(courses=[self.course], status=ProgramStatus.Deleted)
url = reverse('api:v1:course-detail', kwargs={'key': self.course.key})
url += '?include_deleted_programs=1'
with self.assertNumQueries(47):
with self.assertNumQueries(50):
response = self.client.get(url)
assert response.status_code == 200
assert response.data == self.serialize_course(self.course, extra_context={'include_deleted_programs': True})
Expand Down Expand Up @@ -249,7 +249,7 @@ def test_list(self):
""" Verify the endpoint returns a list of all courses. """
url = reverse('api:v1:course-list')

with self.assertNumQueries(32):
with self.assertNumQueries(35, threshold=3):
response = self.client.get(url)
assert response.status_code == 200
self.assertListEqual(
Expand All @@ -266,7 +266,7 @@ def test_list_query(self):

# Known to be flaky prior to the addition of tearDown()
# and logout() code which is the same number of additional queries
with self.assertNumQueries(62, threshold=3):
with self.assertNumQueries(65, threshold=3):
response = self.client.get(url)
self.assertListEqual(response.data['results'], self.serialize_course(courses, many=True))

Expand All @@ -276,7 +276,7 @@ def test_list_key_filter(self):
keys = ','.join([course.key for course in courses])
url = '{root}?{params}'.format(root=reverse('api:v1:course-list'), params=urlencode({'keys': keys}))

with self.assertNumQueries(62, threshold=3):
with self.assertNumQueries(65, threshold=3):
response = self.client.get(url)
self.assertListEqual(response.data['results'], self.serialize_course(courses, many=True))

Expand Down
7 changes: 7 additions & 0 deletions course_discovery/apps/course_metadata/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,11 @@ class PositionInline(admin.TabularInline):
extra = 0


class SourceInline(admin.TabularInline):
model = Source
extra = 0


class PersonSocialNetworkInline(admin.TabularInline):
model = PersonSocialNetwork
extra = 0
Expand Down Expand Up @@ -120,6 +125,7 @@ class ProductValueAdmin(admin.ModelAdmin):
@admin.register(Course)
class CourseAdmin(DjangoObjectActions, admin.ModelAdmin):
form = CourseAdminForm
inline = (SourceInline,)
list_display = ('uuid', 'key', 'key_for_reruns', 'title', 'draft',)
list_filter = ('partner',)
ordering = ('key', 'title',)
Expand Down Expand Up @@ -343,6 +349,7 @@ class ProgramLocationRestrictionAdmin(admin.ModelAdmin):
@admin.register(Program)
class ProgramAdmin(DjangoObjectActions, admin.ModelAdmin):
form = ProgramAdminForm
inline = (SourceInline,)
list_display = ('id', 'uuid', 'title', 'type', 'partner', 'status', 'hidden')
list_filter = ('partner', 'type', 'status', ProgramEligibilityFilter, 'hidden')
ordering = ('uuid', 'title', 'status')
Expand Down
7 changes: 7 additions & 0 deletions course_discovery/apps/course_metadata/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,13 @@
'image/svg+xml': 'svg' # SVG image will be converted into PNG, not stored as SVG
}

ALLOWED_ANCHOR_TAG_ATTRIBUTES = ['href', 'title', 'target', 'rel']

DRIVE_LINK_PATTERNS = [r"https://docs\.google\.com/uc\?id=\w+",
r"https://drive\.google\.com/file/d/\w+/view?usp=sharing"]

GOOGLE_CLIENT_API_SCOPE = ['https://www.googleapis.com/auth/drive.readonly']


class PathwayType(Enum):
""" Allowed values for Pathway.pathway_type """
Expand Down
9 changes: 6 additions & 3 deletions course_discovery/apps/course_metadata/contentful_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ def extract_plain_text_from_rich_text(rich_text_dict):
text_list = []
for key, value in rich_text_dict.items():
if key == 'value':
text_list.append(value)
text_list.append(value.strip())
elif isinstance(value, dict): # recursive call if the value is a nested dict
text_list += extract_plain_text_from_rich_text(value)
elif isinstance(value, list): # recursive call if the value is a nested list
Expand All @@ -164,7 +164,7 @@ def rich_text_to_plain_text(rich_text):
Converts rich text from Contentful to plain text.
Plain text resides in the dict with 'value' as dict key name.
"""
return ''.join(extract_plain_text_from_rich_text(rich_text))
return ' '.join(extract_plain_text_from_rich_text(rich_text))


def get_modules_list(entry):
Expand Down Expand Up @@ -351,7 +351,10 @@ def fetch_and_transform_bootcamp_contentful_data():
return transformed_bootcamp_data


def get_aggregated_data_from_contentful_data(data, product_uuid):
def aggregate_contentful_data(data, product_uuid):
"""
Text data extracted from Contentful to be used in EMSI/Lightcast for product skills.
"""
if (data is None) or (product_uuid not in data):
return None

Expand Down
53 changes: 53 additions & 0 deletions course_discovery/apps/course_metadata/googleapi_client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import logging
import re

from django.conf import settings
from google.oauth2.service_account import Credentials
from googleapiclient.discovery import build

from course_discovery.apps.course_metadata.constants import GOOGLE_CLIENT_API_SCOPE

logger = logging.getLogger(__name__)


class GoogleAPIClient:
"""
API Client for Google API to communicate with drive files
"""

def __init__(self):
try:
credentials = Credentials.from_service_account_info(
settings.GOOGLE_SERVICE_ACCOUNT_CREDENTIALS, scopes=GOOGLE_CLIENT_API_SCOPE
)
credentials = credentials.with_subject(settings.LOADER_INGESTION_CONTACT_EMAIL)
self.service = build('drive', 'v3', credentials=credentials)
logger.info('[Connection Successful]: Successful connection with google service account')
except Exception as ex: # pylint: disable=broad-except
logger.exception(f'[Connection Failed]: Failed to connect with google service account error_message: {ex}')

def get_file_metadata(self, url):
try:
file_id = self.get_file_id_from_url(url)
file = self.service.files().get(fileId=file_id).execute() # pylint: disable=no-member
logger.info(f'[File Found]: Found google file {file_id} on requesting {url}')
return file
except Exception as ex: # pylint: disable=broad-except
logger.exception(f'[File Not Found]: No file found for id: {file_id} error_message: {ex}')
return None

@staticmethod
def get_file_id_from_url(url):
match = re.search(r'id=(\w+)', url) or re.search(r'/(?:file/d/|uc\?id=)([-\w]{25,})(?:[&/]|$)', url)
return match.group(1) if match else None

def download_file_by_url(self, url):
content = None
try:
file_id = self.get_file_id_from_url(url)
request = self.service.files().get_media(fileId=file_id) # pylint: disable=no-member
content = request.execute()
logger.info(f'[File Downloaded]: Downloading google file {file_id}')
except Exception as ex: # pylint: disable=broad-except
logger.exception(f'[File Not Downloaded]: No file found for id: {file_id} error_message: {ex}')
return content
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# Generated by Django 3.2.16 on 2023-01-11 13:00

from django.db import migrations, models
import django.db.models.deletion
import django_extensions.db.fields


class Migration(migrations.Migration):

dependencies = [
('course_metadata', '0307_additional_metadata_end_date'),
]

operations = [
migrations.CreateModel(
name='Source',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('created', django_extensions.db.fields.CreationDateTimeField(auto_now_add=True, verbose_name='created')),
('modified', django_extensions.db.fields.ModificationDateTimeField(auto_now=True, verbose_name='modified')),
('name', models.CharField(help_text='Name of the external source.', max_length=255)),
('slug', django_extensions.db.fields.AutoSlugField(blank=True, editable=False, help_text='Leave this field blank to have the value generated automatically.', populate_from='name')),
('description', models.CharField(blank=True, help_text='Description of the external source.', max_length=255)),
],
options={
'get_latest_by': 'modified',
'abstract': False,
},
),
migrations.AddField(
model_name='course',
name='product_source',
field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='courses', to='course_metadata.source'),
),
migrations.AddField(
model_name='historicalcourse',
name='product_source',
field=models.ForeignKey(blank=True, db_constraint=False, null=True, on_delete=django.db.models.deletion.DO_NOTHING, related_name='+', to='course_metadata.source'),
),
migrations.AddField(
model_name='historicalprogram',
name='product_source',
field=models.ForeignKey(blank=True, db_constraint=False, null=True, on_delete=django.db.models.deletion.DO_NOTHING, related_name='+', to='course_metadata.source'),
),
migrations.AddField(
model_name='program',
name='product_source',
field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='programs', to='course_metadata.source'),
),
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Generated by Django 3.2.16 on 2023-01-17 20:18

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('course_metadata', '0308_auto_20230111_1300'),
]

operations = [
migrations.AlterField(
model_name='additionalmetadata',
name='course_term_override',
field=models.CharField(blank=True, default=None, help_text='This field allows for override the default course term', max_length=20, null=True, verbose_name='Course override'),
),
]
Loading

0 comments on commit 5ce22a6

Please sign in to comment.