From 94b501181f2f388f0107bf5f90f6f513962e1138 Mon Sep 17 00:00:00 2001 From: Danyal-Faheem Date: Mon, 5 Aug 2024 18:14:16 +0500 Subject: [PATCH 01/10] feat: auto import course structure on course publish --- tutorcairn/patches/k8s-deployments | 54 +++++++++++++++++++ tutorcairn/patches/k8s-services | 12 +++++ tutorcairn/patches/k8s-volumes | 14 +++++ .../patches/local-docker-compose-dev-services | 9 ++++ .../patches/local-docker-compose-services | 14 +++++ tutorcairn/plugin.py | 2 + .../cairn/apps/openedx/scripts/main.py | 13 +++++ .../apps/openedx/scripts/requirements.txt | 2 + .../apps/vector/partials/common-post.toml | 31 +++++++++++ 9 files changed, 151 insertions(+) create mode 100644 tutorcairn/templates/cairn/apps/openedx/scripts/main.py create mode 100644 tutorcairn/templates/cairn/apps/openedx/scripts/requirements.txt diff --git a/tutorcairn/patches/k8s-deployments b/tutorcairn/patches/k8s-deployments index 4f000b3..791388b 100644 --- a/tutorcairn/patches/k8s-deployments +++ b/tutorcairn/patches/k8s-deployments @@ -327,3 +327,57 @@ spec: persistentVolumeClaim: claimName: cairn-postgresql {% endif %} +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: cairn-watchcourses + labels: + app.kubernetes.io/name: cairn-watchcourses +spec: + selector: + matchLabels: + app.kubernetes.io/name: cairn-cairn-watchcourses + template: + metadata: + labels: + app.kubernetes.io/name: cairn-cairn-watchcourses + spec: + containers: + - name: cairn-watchcourses + image: {{ DOCKER_IMAGE_OPENEDX }} + env: + - name: DJANGO_SETTINGS_MODULE + value: lms.envs.tutor.production + command: ["/bin/bash"] + args: ["-c", "pip install -qr /openedx/scripts/requirements.txt && uvicorn --app-dir /openedx/scripts/ main:app --host 0.0.0.0 --port {{ CAIRN_WATCHCOURSES_PORT }}"] + volumeMounts: + - mountPath: /openedx/edx-platform/lms/envs/tutor/ + name: settings-lms + - mountPath: /openedx/edx-platform/cms/envs/tutor/ + name: settings-cms + - mountPath: /openedx/config + name: config + - mountPath: /openedx/scripts + name: scripts + - mountPath: /openedx/clickhouse-auth.json + name: clickhouse-auth + subPath: auth.json + securityContext: + allowPrivilegeEscalation: false + volumes: + - name: settings-lms + configMap: + name: openedx-settings-lms + - name: settings-cms + configMap: + name: openedx-settings-cms + - name: config + configMap: + name: openedx-config + - name: scripts + configMap: + name: cairn-openedx-scripts + - name: clickhouse-auth + configMap: + name: cairn-clickhouse-auth diff --git a/tutorcairn/patches/k8s-services b/tutorcairn/patches/k8s-services index 208ce0b..bd5fc18 100644 --- a/tutorcairn/patches/k8s-services +++ b/tutorcairn/patches/k8s-services @@ -43,3 +43,15 @@ spec: protocol: TCP selector: app.kubernetes.io/name: cairn-superset +--- +apiVersion: v1 +kind: Service +metadata: + name: cairn-watchcourses +spec: + type: ClusterIP + ports: + - port: 8999 + protocol: TCP + selector: + app.kubernetes.io/name: cairn-watchcourses diff --git a/tutorcairn/patches/k8s-volumes b/tutorcairn/patches/k8s-volumes index 924d5e7..0a1c22f 100644 --- a/tutorcairn/patches/k8s-volumes +++ b/tutorcairn/patches/k8s-volumes @@ -44,3 +44,17 @@ spec: requests: storage: 2Gi {% endif %} +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: cairn-watchcourses + labels: + app.kubernetes.io/component: volume + app.kubernetes.io/name: cairn-watchcourses +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 1Gi diff --git a/tutorcairn/patches/local-docker-compose-dev-services b/tutorcairn/patches/local-docker-compose-dev-services index c3f0fa5..155c0e7 100644 --- a/tutorcairn/patches/local-docker-compose-dev-services +++ b/tutorcairn/patches/local-docker-compose-dev-services @@ -13,3 +13,12 @@ cairn-superset-worker-beat: environment: FLASK_ENV: development +cairn-watchcourses: + <<: *openedx-service + command: "uvicorn --app-dir /openedx/scripts/ main:app --host 0.0.0.0 --port {{ CAIRN_WATCHCOURSES_PORT }}" + ports: + - "8999:8999" + networks: + default: + aliases: + - "{{ CAIRN_WATCHCOURSES_HOST }}" diff --git a/tutorcairn/patches/local-docker-compose-services b/tutorcairn/patches/local-docker-compose-services index a963b52..225399a 100644 --- a/tutorcairn/patches/local-docker-compose-services +++ b/tutorcairn/patches/local-docker-compose-services @@ -85,3 +85,17 @@ cairn-postgresql: depends_on: - permissions {% endif %} +cairn-watchcourses: + image: {{ DOCKER_IMAGE_OPENEDX }} + command: > + bash -c "pip install -qr /openedx/scripts/requirements.txt + && uvicorn --app-dir /openedx/scripts/ main:app --host 0.0.0.0 --port {{ CAIRN_WATCHCOURSES_PORT }}" + restart: unless-stopped + environment: + SETTINGS: ${TUTOR_EDX_PLATFORM_SETTINGS:-tutor.production} + volumes: + - ../apps/openedx/settings/lms:/openedx/edx-platform/lms/envs/tutor:ro + - ../apps/openedx/settings/cms:/openedx/edx-platform/cms/envs/tutor:ro + - ../apps/openedx/config:/openedx/config:ro + - ../plugins/cairn/apps/openedx/scripts:/openedx/scripts:ro + - ../plugins/cairn/apps/clickhouse/auth.json:/openedx/clickhouse-auth.json:ro diff --git a/tutorcairn/plugin.py b/tutorcairn/plugin.py index ab4af53..dcf0c96 100644 --- a/tutorcairn/plugin.py +++ b/tutorcairn/plugin.py @@ -47,6 +47,8 @@ "VECTOR_DOCKER_IMAGE": "docker.io/timberio/vector:0.25.1-alpine", # Auto sync user roles "AUTH_ROLES_SYNC_AT_LOGIN": False, + "WATCHCOURSES_HOST": "cairn-watchcourses", + "WATCHCOURSES_PORT": 8999, }, "unique": { "CLICKHOUSE_PASSWORD": "{{ 20|random_string }}", diff --git a/tutorcairn/templates/cairn/apps/openedx/scripts/main.py b/tutorcairn/templates/cairn/apps/openedx/scripts/main.py new file mode 100644 index 0000000..3bb5a78 --- /dev/null +++ b/tutorcairn/templates/cairn/apps/openedx/scripts/main.py @@ -0,0 +1,13 @@ +import subprocess +from fastapi import FastAPI, Request + +app = FastAPI() + +@app.post("/import_course/") +async def import_course_to_clickhouse(request: Request): + response = await request.json() + course_id = response[0]['course_id'] + # We use a subprocess here as the modulestore data is cached and the + # script tries to insert the same number of blocks everytime + subprocess.call(["python", "/openedx/scripts/importcoursedata.py", "-c", course_id]) + return({"result": "success"}) diff --git a/tutorcairn/templates/cairn/apps/openedx/scripts/requirements.txt b/tutorcairn/templates/cairn/apps/openedx/scripts/requirements.txt new file mode 100644 index 0000000..405599c --- /dev/null +++ b/tutorcairn/templates/cairn/apps/openedx/scripts/requirements.txt @@ -0,0 +1,2 @@ +uvicorn +fastapi diff --git a/tutorcairn/templates/cairn/apps/vector/partials/common-post.toml b/tutorcairn/templates/cairn/apps/vector/partials/common-post.toml index 90171eb..6cb12a8 100644 --- a/tutorcairn/templates/cairn/apps/vector/partials/common-post.toml +++ b/tutorcairn/templates/cairn/apps/vector/partials/common-post.toml @@ -38,6 +38,21 @@ source = ''' .message = parse_json!(.message) ''' +# Parse CMS logs for course publishing event +[transforms.course_published] +type="remap" +inputs = ["openedx_containers"] +source = ''' +parsed, err_regex = parse_regex(.message, r'Updating course overview for (?P\S+?)(?:\s|\.|$)') + if err_regex != null { + log("Unable to parse course_id from log message: " + err_regex, level: "error") + abort + } +. = {"course_id": parsed.course_id} +''' +drop_on_error = true +drop_on_abort = true + ### Sinks # Log all events to stdout, for debugging @@ -58,4 +73,20 @@ database = "{{ CAIRN_CLICKHOUSE_DATABASE }}" table = "_tracking" healthcheck = true +# Log course_published event to stdout for debugging +[sinks.course_published_out] +type = "console" +inputs = ["course_published"] +encoding.codec = "json" +encoding.only_fields = ["course_id"] + +# Send course_id to watchcourses +[sinks.watchcourse] +type = "http" +method = "post" +encoding.codec = "json" +inputs = ["course_published"] +uri = "http://{{ CAIRN_WATCHCOURSES_HOST }}:{{ CAIRN_WATCHCOURSES_PORT }}/import_course/" + + {{ patch("cairn-vector-common-toml") }} From 30f36cba99f95801ac330ecad39f9b99b71eb935 Mon Sep 17 00:00:00 2001 From: Danyal-Faheem Date: Mon, 5 Aug 2024 18:32:43 +0500 Subject: [PATCH 02/10] docs: add changelog entry --- ..._183125_danyal.faheem_import_course_data_on_course_publish.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/20240805_183125_danyal.faheem_import_course_data_on_course_publish.md diff --git a/changelog.d/20240805_183125_danyal.faheem_import_course_data_on_course_publish.md b/changelog.d/20240805_183125_danyal.faheem_import_course_data_on_course_publish.md new file mode 100644 index 0000000..9ca1fee --- /dev/null +++ b/changelog.d/20240805_183125_danyal.faheem_import_course_data_on_course_publish.md @@ -0,0 +1 @@ +- [Improvement] Auto import course structure to clickhouse on course publish by parsing CMS logs. (by @Danyal-Faheem) \ No newline at end of file From 302b64081e57053d433a9e0b40b94e3e06041101 Mon Sep 17 00:00:00 2001 From: Danyal-Faheem Date: Tue, 6 Aug 2024 16:47:41 +0500 Subject: [PATCH 03/10] fix: make requested changes --- .../patches/local-docker-compose-dev-services | 5 ++-- .../patches/local-docker-compose-services | 6 +--- tutorcairn/plugin.py | 2 -- .../cairn/apps/openedx/scripts/main.py | 13 --------- .../cairn/apps/openedx/scripts/server.py | 29 +++++++++++++++++++ .../apps/vector/partials/common-post.toml | 3 +- 6 files changed, 33 insertions(+), 25 deletions(-) delete mode 100644 tutorcairn/templates/cairn/apps/openedx/scripts/main.py create mode 100644 tutorcairn/templates/cairn/apps/openedx/scripts/server.py diff --git a/tutorcairn/patches/local-docker-compose-dev-services b/tutorcairn/patches/local-docker-compose-dev-services index 155c0e7..64ab50b 100644 --- a/tutorcairn/patches/local-docker-compose-dev-services +++ b/tutorcairn/patches/local-docker-compose-dev-services @@ -15,10 +15,9 @@ cairn-superset-worker-beat: cairn-watchcourses: <<: *openedx-service - command: "uvicorn --app-dir /openedx/scripts/ main:app --host 0.0.0.0 --port {{ CAIRN_WATCHCOURSES_PORT }}" ports: - - "8999:8999" + - "9282:9282" networks: default: aliases: - - "{{ CAIRN_WATCHCOURSES_HOST }}" + - "cairn-watchcourses" diff --git a/tutorcairn/patches/local-docker-compose-services b/tutorcairn/patches/local-docker-compose-services index 225399a..4e83ae1 100644 --- a/tutorcairn/patches/local-docker-compose-services +++ b/tutorcairn/patches/local-docker-compose-services @@ -87,12 +87,8 @@ cairn-postgresql: {% endif %} cairn-watchcourses: image: {{ DOCKER_IMAGE_OPENEDX }} - command: > - bash -c "pip install -qr /openedx/scripts/requirements.txt - && uvicorn --app-dir /openedx/scripts/ main:app --host 0.0.0.0 --port {{ CAIRN_WATCHCOURSES_PORT }}" + command: "python /openedx/scripts/server.py" restart: unless-stopped - environment: - SETTINGS: ${TUTOR_EDX_PLATFORM_SETTINGS:-tutor.production} volumes: - ../apps/openedx/settings/lms:/openedx/edx-platform/lms/envs/tutor:ro - ../apps/openedx/settings/cms:/openedx/edx-platform/cms/envs/tutor:ro diff --git a/tutorcairn/plugin.py b/tutorcairn/plugin.py index dcf0c96..ab4af53 100644 --- a/tutorcairn/plugin.py +++ b/tutorcairn/plugin.py @@ -47,8 +47,6 @@ "VECTOR_DOCKER_IMAGE": "docker.io/timberio/vector:0.25.1-alpine", # Auto sync user roles "AUTH_ROLES_SYNC_AT_LOGIN": False, - "WATCHCOURSES_HOST": "cairn-watchcourses", - "WATCHCOURSES_PORT": 8999, }, "unique": { "CLICKHOUSE_PASSWORD": "{{ 20|random_string }}", diff --git a/tutorcairn/templates/cairn/apps/openedx/scripts/main.py b/tutorcairn/templates/cairn/apps/openedx/scripts/main.py deleted file mode 100644 index 3bb5a78..0000000 --- a/tutorcairn/templates/cairn/apps/openedx/scripts/main.py +++ /dev/null @@ -1,13 +0,0 @@ -import subprocess -from fastapi import FastAPI, Request - -app = FastAPI() - -@app.post("/import_course/") -async def import_course_to_clickhouse(request: Request): - response = await request.json() - course_id = response[0]['course_id'] - # We use a subprocess here as the modulestore data is cached and the - # script tries to insert the same number of blocks everytime - subprocess.call(["python", "/openedx/scripts/importcoursedata.py", "-c", course_id]) - return({"result": "success"}) diff --git a/tutorcairn/templates/cairn/apps/openedx/scripts/server.py b/tutorcairn/templates/cairn/apps/openedx/scripts/server.py new file mode 100644 index 0000000..c51b338 --- /dev/null +++ b/tutorcairn/templates/cairn/apps/openedx/scripts/server.py @@ -0,0 +1,29 @@ +from aiohttp import web +import subprocess +from opaque_keys.edx.locator import CourseLocator + +async def import_course_to_clickhouse(request): + + data = await request.json() + + if not isinstance(data, list) or 'course_id' not in data[0]: + return web.json_response({"error":"Value course_id is required."}, status=400) + + course_id = data[0]['course_id'] + + # Verify course_id is a valid course_id + try: + CourseLocator.from_string(course_id) + except: + return web.json_response({"error": "Invalid course_id"}, status=400) + + try: + subprocess.run(["python", "/openedx/scripts/importcoursedata.py", "-c", course_id]) + return web.json_response({"result": "success"}, status=200) + except Exception as e: + return web.json_response({'error': str(e)}, status=400) + +app = web.Application() +app.router.add_post('/import_course/', import_course_to_clickhouse) + +web.run_app(app, host='0.0.0.0', port=9282) diff --git a/tutorcairn/templates/cairn/apps/vector/partials/common-post.toml b/tutorcairn/templates/cairn/apps/vector/partials/common-post.toml index 6cb12a8..4df8a5d 100644 --- a/tutorcairn/templates/cairn/apps/vector/partials/common-post.toml +++ b/tutorcairn/templates/cairn/apps/vector/partials/common-post.toml @@ -86,7 +86,6 @@ type = "http" method = "post" encoding.codec = "json" inputs = ["course_published"] -uri = "http://{{ CAIRN_WATCHCOURSES_HOST }}:{{ CAIRN_WATCHCOURSES_PORT }}/import_course/" - +uri = "http://cairn-watchcourses:9282/import_course/" {{ patch("cairn-vector-common-toml") }} From 53c8c31187dfb261aa3fea72233b562f9167bff6 Mon Sep 17 00:00:00 2001 From: Danyal-Faheem Date: Tue, 6 Aug 2024 17:56:01 +0500 Subject: [PATCH 04/10] fix: remove unnecessary requirements.txt file --- .../templates/cairn/apps/openedx/scripts/requirements.txt | 2 -- 1 file changed, 2 deletions(-) delete mode 100644 tutorcairn/templates/cairn/apps/openedx/scripts/requirements.txt diff --git a/tutorcairn/templates/cairn/apps/openedx/scripts/requirements.txt b/tutorcairn/templates/cairn/apps/openedx/scripts/requirements.txt deleted file mode 100644 index 405599c..0000000 --- a/tutorcairn/templates/cairn/apps/openedx/scripts/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -uvicorn -fastapi From 241f0ae43f010dd8bdf8e65f7aa45aa2b8e91c59 Mon Sep 17 00:00:00 2001 From: Danyal-Faheem Date: Thu, 8 Aug 2024 20:40:29 +0500 Subject: [PATCH 05/10] fix: update k8s command and port --- tutorcairn/patches/k8s-deployments | 5 +---- tutorcairn/patches/k8s-services | 2 +- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/tutorcairn/patches/k8s-deployments b/tutorcairn/patches/k8s-deployments index 791388b..0f98780 100644 --- a/tutorcairn/patches/k8s-deployments +++ b/tutorcairn/patches/k8s-deployments @@ -346,11 +346,8 @@ spec: containers: - name: cairn-watchcourses image: {{ DOCKER_IMAGE_OPENEDX }} - env: - - name: DJANGO_SETTINGS_MODULE - value: lms.envs.tutor.production command: ["/bin/bash"] - args: ["-c", "pip install -qr /openedx/scripts/requirements.txt && uvicorn --app-dir /openedx/scripts/ main:app --host 0.0.0.0 --port {{ CAIRN_WATCHCOURSES_PORT }}"] + args: ["-c", "python /openedx/scripts/server.py"] volumeMounts: - mountPath: /openedx/edx-platform/lms/envs/tutor/ name: settings-lms diff --git a/tutorcairn/patches/k8s-services b/tutorcairn/patches/k8s-services index bd5fc18..1a076e0 100644 --- a/tutorcairn/patches/k8s-services +++ b/tutorcairn/patches/k8s-services @@ -51,7 +51,7 @@ metadata: spec: type: ClusterIP ports: - - port: 8999 + - port: 9282 protocol: TCP selector: app.kubernetes.io/name: cairn-watchcourses From c7a832754f73400461aed3f9bb85d7e6aea634b0 Mon Sep 17 00:00:00 2001 From: Danyal-Faheem Date: Thu, 8 Aug 2024 20:40:57 +0500 Subject: [PATCH 06/10] feat: add batch processing to course_id sinks --- .../apps/openedx/scripts/importcoursedata.py | 3 +- .../cairn/apps/openedx/scripts/server.py | 30 ++++++++++++++----- .../apps/vector/partials/common-post.toml | 2 ++ 3 files changed, 26 insertions(+), 9 deletions(-) diff --git a/tutorcairn/templates/cairn/apps/openedx/scripts/importcoursedata.py b/tutorcairn/templates/cairn/apps/openedx/scripts/importcoursedata.py index 402a49b..b1f1abb 100644 --- a/tutorcairn/templates/cairn/apps/openedx/scripts/importcoursedata.py +++ b/tutorcairn/templates/cairn/apps/openedx/scripts/importcoursedata.py @@ -24,7 +24,8 @@ def main(): description="Import course block information into the datalake" ) parser.add_argument( - "-c", "--course-id", action="append", help="Limit import to these courses" + "-c", "--course-id", action="extend", nargs='*', + help="Limit import to these courses" ) args = parser.parse_args() diff --git a/tutorcairn/templates/cairn/apps/openedx/scripts/server.py b/tutorcairn/templates/cairn/apps/openedx/scripts/server.py index c51b338..863c51a 100644 --- a/tutorcairn/templates/cairn/apps/openedx/scripts/server.py +++ b/tutorcairn/templates/cairn/apps/openedx/scripts/server.py @@ -5,23 +5,37 @@ async def import_course_to_clickhouse(request): data = await request.json() - if not isinstance(data, list) or 'course_id' not in data[0]: return web.json_response({"error":"Value course_id is required."}, status=400) - course_id = data[0]['course_id'] + # Get the list of unique course_ids + unique_courses = list({course['course_id']: course for course in data}.values()) + + course_ids = [] + + for course in unique_courses: + course_id = course['course_id'] + # Verify course_id is a valid course_id + try: + CourseLocator.from_string(course_id) + except: + continue + + course_ids.append(course_id) - # Verify course_id is a valid course_id - try: - CourseLocator.from_string(course_id) - except: - return web.json_response({"error": "Invalid course_id"}, status=400) + # If none of the course_ids are valid, return an error + if not course_ids: + return web.json_response({"error": f"Invalid course_id"}, status=400) + + command = ["python", "/openedx/scripts/importcoursedata.py", "-c"] + command.extend(course_ids) try: - subprocess.run(["python", "/openedx/scripts/importcoursedata.py", "-c", course_id]) + subprocess.run(command) return web.json_response({"result": "success"}, status=200) except Exception as e: return web.json_response({'error': str(e)}, status=400) + app = web.Application() app.router.add_post('/import_course/', import_course_to_clickhouse) diff --git a/tutorcairn/templates/cairn/apps/vector/partials/common-post.toml b/tutorcairn/templates/cairn/apps/vector/partials/common-post.toml index 4df8a5d..8c98695 100644 --- a/tutorcairn/templates/cairn/apps/vector/partials/common-post.toml +++ b/tutorcairn/templates/cairn/apps/vector/partials/common-post.toml @@ -86,6 +86,8 @@ type = "http" method = "post" encoding.codec = "json" inputs = ["course_published"] +batch.timeout_secs = 300 +batch.max_events = 10 uri = "http://cairn-watchcourses:9282/import_course/" {{ patch("cairn-vector-common-toml") }} From 23cee685b9af7199843856dc368ed18797e9673b Mon Sep 17 00:00:00 2001 From: Danyal-Faheem Date: Sun, 18 Aug 2024 20:12:29 +0500 Subject: [PATCH 07/10] fix: update k8s deployment and services --- tutorcairn/patches/k8s-deployments | 6 ++- tutorcairn/patches/k8s-services | 2 +- tutorcairn/patches/k8s-volumes | 14 ------- .../cairn/apps/openedx/scripts/server.py | 42 ++++++++++--------- .../apps/vector/partials/common-post.toml | 6 ++- 5 files changed, 33 insertions(+), 37 deletions(-) diff --git a/tutorcairn/patches/k8s-deployments b/tutorcairn/patches/k8s-deployments index 0f98780..c6bf9be 100644 --- a/tutorcairn/patches/k8s-deployments +++ b/tutorcairn/patches/k8s-deployments @@ -337,11 +337,11 @@ metadata: spec: selector: matchLabels: - app.kubernetes.io/name: cairn-cairn-watchcourses + app.kubernetes.io/name: cairn-watchcourses template: metadata: labels: - app.kubernetes.io/name: cairn-cairn-watchcourses + app.kubernetes.io/name: cairn-watchcourses spec: containers: - name: cairn-watchcourses @@ -362,6 +362,8 @@ spec: subPath: auth.json securityContext: allowPrivilegeEscalation: false + ports: + - containerPort: 9282 volumes: - name: settings-lms configMap: diff --git a/tutorcairn/patches/k8s-services b/tutorcairn/patches/k8s-services index 1a076e0..60fccd0 100644 --- a/tutorcairn/patches/k8s-services +++ b/tutorcairn/patches/k8s-services @@ -49,7 +49,7 @@ kind: Service metadata: name: cairn-watchcourses spec: - type: ClusterIP + type: NodePort ports: - port: 9282 protocol: TCP diff --git a/tutorcairn/patches/k8s-volumes b/tutorcairn/patches/k8s-volumes index 0a1c22f..924d5e7 100644 --- a/tutorcairn/patches/k8s-volumes +++ b/tutorcairn/patches/k8s-volumes @@ -44,17 +44,3 @@ spec: requests: storage: 2Gi {% endif %} ---- -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: cairn-watchcourses - labels: - app.kubernetes.io/component: volume - app.kubernetes.io/name: cairn-watchcourses -spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: 1Gi diff --git a/tutorcairn/templates/cairn/apps/openedx/scripts/server.py b/tutorcairn/templates/cairn/apps/openedx/scripts/server.py index 863c51a..49f2d16 100644 --- a/tutorcairn/templates/cairn/apps/openedx/scripts/server.py +++ b/tutorcairn/templates/cairn/apps/openedx/scripts/server.py @@ -1,13 +1,26 @@ from aiohttp import web import subprocess +import logging from opaque_keys.edx.locator import CourseLocator -async def import_course_to_clickhouse(request): +# Configure logging +logging.basicConfig(level=logging.INFO, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') + +# Get a logger instance +log = logging.getLogger(__name__) + +async def import_courses_to_clickhouse(request): data = await request.json() - if not isinstance(data, list) or 'course_id' not in data[0]: - return web.json_response({"error":"Value course_id is required."}, status=400) - + if not isinstance(data, list): + return web.json_response({"error": f"Incorrect data format. Expected list, got {data.__class__}."}, status=400) + course_ids = [] + for course in data: + course_id = course.get("course_id") + if not isinstance(course_id, str): + return web.json_response({"error": f"Incorrect course_id format. Expected str, got {course_id.__class__}."}, status=400) + # Get the list of unique course_ids unique_courses = list({course['course_id']: course for course in data}.values()) @@ -18,26 +31,17 @@ async def import_course_to_clickhouse(request): # Verify course_id is a valid course_id try: CourseLocator.from_string(course_id) - except: - continue + except Exception as e: + log.exception(f"An error occured: {str(e)}") + return web.json_response({"error": f"Incorrect arguments. Expected valid course_id, got {course_id}."}, status=400) course_ids.append(course_id) - - # If none of the course_ids are valid, return an error - if not course_ids: - return web.json_response({"error": f"Invalid course_id"}, status=400) - command = ["python", "/openedx/scripts/importcoursedata.py", "-c"] - command.extend(course_ids) - - try: - subprocess.run(command) - return web.json_response({"result": "success"}, status=200) - except Exception as e: - return web.json_response({'error': str(e)}, status=400) + subprocess.run(["python", "/openedx/scripts/importcoursedata.py", "-c", *course_ids], check=True) + return web.Response(status=204) app = web.Application() -app.router.add_post('/import_course/', import_course_to_clickhouse) +app.router.add_post('/courses/published/', import_courses_to_clickhouse) web.run_app(app, host='0.0.0.0', port=9282) diff --git a/tutorcairn/templates/cairn/apps/vector/partials/common-post.toml b/tutorcairn/templates/cairn/apps/vector/partials/common-post.toml index 8c98695..ce85ae3 100644 --- a/tutorcairn/templates/cairn/apps/vector/partials/common-post.toml +++ b/tutorcairn/templates/cairn/apps/vector/partials/common-post.toml @@ -86,8 +86,12 @@ type = "http" method = "post" encoding.codec = "json" inputs = ["course_published"] +# Batch events together to reduce the number of times +# the importcoursedata script is run +# Vector will wait 300 secs (5 mins) from the first event +# or until there are 10 events to trigger the watchcourses service batch.timeout_secs = 300 batch.max_events = 10 -uri = "http://cairn-watchcourses:9282/import_course/" +uri = "http://cairn-watchcourses:9282/courses/published/" {{ patch("cairn-vector-common-toml") }} From 379149a126cdf645e34887f0acb04d3552f0c71c Mon Sep 17 00:00:00 2001 From: Danyal-Faheem Date: Mon, 19 Aug 2024 12:38:35 +0500 Subject: [PATCH 08/10] docs: update refreshing course block data guide in readme --- README.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 6809585..7778dbf 100644 --- a/README.rst +++ b/README.rst @@ -132,7 +132,9 @@ To restrict a given user to one or more courses or organizations, select the cou Refreshing course block data ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Course block IDs and names are loaded from the Open edX modulestore into the datalake. After making changes to your course, you might want to refresh the course structure stored in the datalake. To do so, run:: +Cairn has a ``cairn-watchcourses`` service that looks for changes to the course structure and refreshes the course structure in the datalake automatically. However, the changes may take up to 5 minutes to show up in superset as this service utilizes batch processing. + +If you would like to manually refresh the course structure, run:: tutor local do init --limit=cairn From 4bbe2ea6b4217510d89ec5431ccca51cd6d90604 Mon Sep 17 00:00:00 2001 From: Danyal-Faheem Date: Tue, 20 Aug 2024 18:08:09 +0500 Subject: [PATCH 09/10] fix: remove unnecessary comments --- tutorcairn/templates/cairn/apps/openedx/scripts/server.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tutorcairn/templates/cairn/apps/openedx/scripts/server.py b/tutorcairn/templates/cairn/apps/openedx/scripts/server.py index 49f2d16..e6ea440 100644 --- a/tutorcairn/templates/cairn/apps/openedx/scripts/server.py +++ b/tutorcairn/templates/cairn/apps/openedx/scripts/server.py @@ -3,11 +3,9 @@ import logging from opaque_keys.edx.locator import CourseLocator -# Configure logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') -# Get a logger instance log = logging.getLogger(__name__) async def import_courses_to_clickhouse(request): From c818836a7cb7775232e9c4d6391e5f506aaa22ce Mon Sep 17 00:00:00 2001 From: Danyal-Faheem Date: Mon, 2 Sep 2024 11:17:56 +0500 Subject: [PATCH 10/10] refactor: sort imports and add file docstring --- .../cairn/apps/openedx/scripts/server.py | 21 +++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/tutorcairn/templates/cairn/apps/openedx/scripts/server.py b/tutorcairn/templates/cairn/apps/openedx/scripts/server.py index e6ea440..ef4c8d3 100644 --- a/tutorcairn/templates/cairn/apps/openedx/scripts/server.py +++ b/tutorcairn/templates/cairn/apps/openedx/scripts/server.py @@ -1,6 +1,23 @@ -from aiohttp import web -import subprocess +""" +This module provides an HTTP service for importing course data into ClickHouse. + +It defines a single HTTP endpoint that allows for the submission of course IDs, +which are then processed and used to trigger a subprocess for data import. + +Functions: +- import_courses_to_clickhouse(request): Handles POST requests to '/courses/published/'. + Validates the input data, verifies course IDs, and triggers an external Python script + to import the data into ClickHouse. + +Usage: +- python server.py +- Run this module to start the HTTP server. It listens on port 9282 and processes + requests sent to the '/courses/published/' endpoint. +""" import logging +import subprocess + +from aiohttp import web from opaque_keys.edx.locator import CourseLocator logging.basicConfig(level=logging.INFO,