From a93c0c96559dfacc148af12816ae72277ad3614c Mon Sep 17 00:00:00 2001
From: Samson Akol <akolson2010@gmail.com>
Date: Thu, 13 Jun 2024 15:03:45 +0300
Subject: [PATCH 01/21] add docstring to  embed methods

---
 .../contentcuration/utils/recommendations.py  | 36 +++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/contentcuration/contentcuration/utils/recommendations.py b/contentcuration/contentcuration/utils/recommendations.py
index 1c015d2050..df5f074632 100644
--- a/contentcuration/contentcuration/utils/recommendations.py
+++ b/contentcuration/contentcuration/utils/recommendations.py
@@ -89,7 +89,19 @@ def get_recommendations(self, embedding) -> RecommendationsResponse:
         return self.backend.make_request(request)
 
     def embed_topics(self, topics: Dict[str, Any]) -> EmbeddingsResponse:
+        """
+        Embeds the topics and returns an EmbeddingsResponse.
 
+        This method connects to the backend, sends the provided topics as a JSON payload,
+        and requests the backend to embed the topics. If an exception occurs during
+        this process, it returns an EmbeddingsResponse with the exception as the error.
+
+        Parameters:
+        topics (Dict[str, Any]): A dictionary of topics to be embedded.
+
+        Returns:
+        EmbeddingsResponse: An EmbeddingsResponse object containing the embeddings or an error.
+        """
         if not self.backend.connect():
             raise errors.ConnectionError("Connection to the backend failed")
 
@@ -100,7 +112,19 @@ def embed_topics(self, topics: Dict[str, Any]) -> EmbeddingsResponse:
             return EmbeddingsResponse(error=e)
 
     def embed_content(self, nodes: List[ContentNode]) -> EmbeddingsResponse:
+        """
+        Embeds the content nodes and returns an EmbeddingsResponse.
+
+        This method connects to the backend, extracts content metadata from the provided nodes,
+        and sends a request to the backend to embed the content. If an exception occurs during
+        this process, it returns an EmbeddingsResponse with the exception as the error.
+
+        Parameters:
+        nodes (List[ContentNode]): A list of ContentNode objects to be embedded.
 
+        Returns:
+        EmbeddingsResponse: An EmbeddingsResponse object containing the embeddings or an error.
+        """
         if not self.backend.connect():
             raise errors.ConnectionError("Connection to the backend failed")
 
@@ -116,6 +140,18 @@ def embed_content(self, nodes: List[ContentNode]) -> EmbeddingsResponse:
             return EmbeddingsResponse(error=e)
 
     def extract_content(self, node: ContentNode) -> Dict[str, Any]:
+        """
+        Extracts content metadata from a given ContentNode object.
+
+        This method extracts the content metadata from the provided ContentNode object.
+        The extracted metadata is returned as a dictionary.
+
+        Parameters:
+        node (ContentNode): The ContentNode object from which to extract the content metadata.
+
+        Returns:
+        Dict[str, Any]: A dictionary containing the extracted content metadata.
+        """
         return {}
 
 

From df864405c33b385b44a50ab9dcf293826af9d5a0 Mon Sep 17 00:00:00 2001
From: Samson Akol <akolson2010@gmail.com>
Date: Tue, 18 Jun 2024 19:27:30 +0300
Subject: [PATCH 02/21] updates embed_content to use methods as building blocks

---
 .../tests/utils/test_automation_manager.py    |  28 -----
 .../tests/utils/test_recommendations.py       |  21 ++--
 .../utils/automation_manager.py               |  28 +++--
 .../contentcuration/utils/recommendations.py  | 105 +++++++++++++-----
 4 files changed, 106 insertions(+), 76 deletions(-)

diff --git a/contentcuration/contentcuration/tests/utils/test_automation_manager.py b/contentcuration/contentcuration/tests/utils/test_automation_manager.py
index a01eaaa228..5f1833d6cf 100644
--- a/contentcuration/contentcuration/tests/utils/test_automation_manager.py
+++ b/contentcuration/contentcuration/tests/utils/test_automation_manager.py
@@ -1,5 +1,4 @@
 import unittest
-from unittest.mock import MagicMock
 
 from contentcuration.utils.automation_manager import AutomationManager
 
@@ -11,30 +10,3 @@ def setUp(self):
     def test_creation(self):
         # Check if an instance of AutomationManager is created successfully
         self.assertIsInstance(self.automation_manager, AutomationManager)
-
-    def test_generate_embedding(self):
-        text = "Some text that needs to be embedded"
-        # Mock the generate_embedding method of RecommendationsAdapter
-        # as the implementation is yet to be done
-        self.automation_manager.recommendations_backend_adapter.generate_embedding = MagicMock(return_value=[0.1, 0.2, 0.3])
-        embedding_vector = self.automation_manager.generate_embedding(text)
-        self.assertIsNotNone(embedding_vector)
-
-    def test_embedding_exists(self):
-        embedding_vector = [0.1, 0.2, 0.3]
-        # Currently no solid implementation exists for this
-        # So the embadding_exists function returns true anyways
-        exists = self.automation_manager.embedding_exists(embedding_vector)
-        self.assertTrue(exists)
-
-    def test_load_recommendations(self):
-        embedding_vector = [0.1, 0.2, 0.3]
-        self.automation_manager.recommendations_backend_adapter.get_recommendations = MagicMock(return_value=["item1", "item2"])
-        recommendations = self.automation_manager.load_recommendations(embedding_vector)
-        self.assertIsInstance(recommendations, list)
-
-    def test_cache_embeddings(self):
-        embeddings_list = [[0.1, 0.2, 0.3]]
-        # Currently the function returns true anyways
-        success = self.automation_manager.cache_embeddings(embeddings_list)
-        self.assertTrue(success)
diff --git a/contentcuration/contentcuration/tests/utils/test_recommendations.py b/contentcuration/contentcuration/tests/utils/test_recommendations.py
index 3ef11e43e5..0bbb9272dd 100644
--- a/contentcuration/contentcuration/tests/utils/test_recommendations.py
+++ b/contentcuration/contentcuration/tests/utils/test_recommendations.py
@@ -7,6 +7,7 @@
 from contentcuration.utils.recommendations import EmbeddingsResponse
 from contentcuration.utils.recommendations import Recommendations
 from contentcuration.utils.recommendations import RecommendationsAdapter
+from contentcuration.utils.recommendations import RecommendationsResponse
 
 
 class RecommendationsTestCase(TestCase):
@@ -41,33 +42,33 @@ def test_adapter_initialization(self):
         self.assertIsInstance(self.adapter, RecommendationsAdapter)
 
     @patch('contentcuration.utils.recommendations.EmbedTopicsRequest')
-    def test_embed_topics_backend_connect_success(self, embed_topics_request_mock):
+    def test_get_recommendations_backend_connect_success(self, get_recommendations_request_mock):
         self.adapter.backend.connect.return_value = True
-        self.adapter.backend.make_request.return_value = MagicMock(spec=EmbeddingsResponse)
-        response = self.adapter.embed_topics(self.topic)
+        self.adapter.backend.make_request.return_value = MagicMock(spec=RecommendationsResponse)
+        response = self.adapter.get_recommendations(self.topic)
         self.adapter.backend.connect.assert_called_once()
         self.adapter.backend.make_request.assert_called_once()
-        self.assertIsInstance(response, EmbeddingsResponse)
+        self.assertIsInstance(response, RecommendationsResponse)
 
-    def test_embed_topics_backend_connect_failure(self):
+    def test_get_recommendations_backend_connect_failure(self):
         self.adapter.backend.connect.return_value = False
         with self.assertRaises(errors.ConnectionError):
-            self.adapter.embed_topics(self.topic)
+            self.adapter.get_recommendations(self.topic)
         self.adapter.backend.connect.assert_called_once()
         self.adapter.backend.make_request.assert_not_called()
 
     @patch('contentcuration.utils.recommendations.EmbedTopicsRequest')
-    def test_embed_topics_make_request_exception(self, embed_topics_request_mock):
+    def test_get_recommendations_make_request_exception(self, get_recommendations_request_mock):
         self.adapter.backend.connect.return_value = True
         self.adapter.backend.make_request.side_effect = Exception("Mocked exception")
-        response = self.adapter.embed_topics(self.topic)
+        response = self.adapter.get_recommendations(self.topic)
         self.adapter.backend.connect.assert_called_once()
         self.adapter.backend.make_request.assert_called_once()
-        self.assertIsInstance(response, EmbeddingsResponse)
+        self.assertIsInstance(response, RecommendationsResponse)
         self.assertEqual(str(response.error), "Mocked exception")
 
     @patch('contentcuration.utils.recommendations.EmbedContentRequest')
-    def test_embed_content_backend_connect_success(self, embed_content_request_mock):
+    def test_embed_content_backend_connect_success(self, get_recommendations_request_mock):
         self.adapter.backend.connect.return_value = True
         self.adapter.backend.make_request.return_value = MagicMock(spec=EmbeddingsResponse)
         response = self.adapter.embed_content(self.resources)
diff --git a/contentcuration/contentcuration/utils/automation_manager.py b/contentcuration/contentcuration/utils/automation_manager.py
index c609064fe7..9ccb2ea22a 100644
--- a/contentcuration/contentcuration/utils/automation_manager.py
+++ b/contentcuration/contentcuration/utils/automation_manager.py
@@ -1,3 +1,6 @@
+from typing import Any
+from typing import Dict
+
 from contentcuration.utils.recommendations import RecommendationsAdapter
 from contentcuration.utils.recommendations import RecommendationsBackendFactory
 
@@ -8,16 +11,15 @@ def __init__(self):
         self.recommendations_backend_instance = self.recommendations_backend_factory.create_backend()
         self.recommendations_backend_adapter = RecommendationsAdapter(self.recommendations_backend_instance)
 
-    def generate_embedding(self, text):
+    def generate_embedding(self, topic: Dict[str, Any]):
         """
-        Generate an embedding vector for the given text.
+        Generate an embedding vector for the given topic.
         Args:
-            text (str): The text for which to generate an embedding.
+            topic (dict): The topic for which to generate an embedding vector.
         Returns:
             Vector: The generated embedding vector.
         """
-        embedding_vector = self.recommendations_backend_adapter.generate_embedding(text=text)
-        return embedding_vector
+        return self.recommendations_backend_adapter.generate_embedding(topic)
 
     def embedding_exists(self, embedding):
         """
@@ -29,16 +31,18 @@ def embedding_exists(self, embedding):
         """
         return self.recommendations_backend_adapter.embedding_exists(embedding=embedding)
 
-    def load_recommendations(self, embedding):
+    def load_recommendations(self, topic: Dict[str, Any], override_threshold=False):
         """
-        Load recommendations based on the given embedding vector.
-        Args:
-            embedding (Vector): The embedding vector to use for recommendations.
+        Load recommendations for the given topic.
+
+        Parameters:
+            :param topic: A dictionary containing the topic for which to get recommendations.
+            :param override_threshold: A boolean flag to override the recommendation threshold.
+
         Returns:
-            list: A list of recommended items.
+            list: A list of recommended resources.
         """
-        # Need to extract the recommendation list from the ResponseObject and change the return statement
-        self.recommendations_backend_adapter.get_recommendations(embedding=embedding)
+        self.recommendations_backend_adapter.get_recommendations(topic=topic, override_threshold=override_threshold)
         return []
 
     def cache_embeddings(self, embeddings):
diff --git a/contentcuration/contentcuration/utils/recommendations.py b/contentcuration/contentcuration/utils/recommendations.py
index df5f074632..ba1df07375 100644
--- a/contentcuration/contentcuration/utils/recommendations.py
+++ b/contentcuration/contentcuration/utils/recommendations.py
@@ -9,6 +9,7 @@
 from automation.utils.appnexus.base import BackendFactory
 from automation.utils.appnexus.base import BackendRequest
 from automation.utils.appnexus.base import BackendResponse
+from le_utils.constants import content_kinds
 
 from contentcuration.models import ContentNode
 
@@ -38,12 +39,12 @@ def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
 
 
-class EmbedTopicsRequest(RecommendationsBackendRequest):
+class EmbedTopicsRequest(EmbeddingsRequest):
     path = '/embed-topics'
     method = 'POST'
 
 
-class EmbedContentRequest(RecommendationsBackendRequest):
+class EmbedContentRequest(EmbeddingsRequest):
     path = '/embed-content'
     method = 'POST'
 
@@ -61,8 +62,16 @@ def create_backend(self) -> Backend:
 
 class RecommendationsAdapter(Adapter):
 
-    def generate_embedding(self, text) -> EmbeddingsResponse:
-        request = EmbeddingsRequest()
+    def generate_embedding(self, request: EmbeddingsRequest) -> EmbeddingsResponse:
+        """
+        Generate an embedding vector for the given content.
+
+        Parameters:
+            :param request: The EmbeddingsRequest object containing the request details.
+            :param content: The topic for which to generate an embedding vector.
+        Returns:
+            EmbeddingsResponse: An object containing the embeddings or an error.
+        """
         return self.backend.make_request(request)
 
     def embedding_exists(self, embedding) -> bool:
@@ -84,32 +93,43 @@ def cache_embeddings(self, embeddings_list) -> bool:
                 return False
         return True
 
-    def get_recommendations(self, embedding) -> RecommendationsResponse:
-        request = RecommendationsRequest(embedding)
-        return self.backend.make_request(request)
-
-    def embed_topics(self, topics: Dict[str, Any]) -> EmbeddingsResponse:
+    def get_recommendations(self, topic: Dict[str, Any], override_threshold=False) -> RecommendationsResponse:
         """
-        Embeds the topics and returns an EmbeddingsResponse.
+        Get recommendations for the given topic.
 
-        This method connects to the backend, sends the provided topics as a JSON payload,
-        and requests the backend to embed the topics. If an exception occurs during
-        this process, it returns an EmbeddingsResponse with the exception as the error.
+        This method connects to the backend, sends a request to get recommendations for a given
+        topic, and returns a RecommendationsResponse object containing the recommendations
+        or an error.
 
         Parameters:
-        topics (Dict[str, Any]): A dictionary of topics to be embedded.
+            :param topic: The topic for which to get recommendations.
+            :param override_threshold: A boolean flag to override the recommendation threshold.
 
         Returns:
-        EmbeddingsResponse: An EmbeddingsResponse object containing the embeddings or an error.
+            RecommendationsResponse: An object containing a list of recommendations or an error.
         """
         if not self.backend.connect():
             raise errors.ConnectionError("Connection to the backend failed")
-
         try:
-            embed_topics_request = EmbedTopicsRequest(json=topics)
-            return self.backend.make_request(embed_topics_request)
+            # Generate the embedding for the topic
+            request = EmbedTopicsRequest(json=topic)
+            embedding = self.generate_embedding(request)
+            if embedding.error is not None:
+                return RecommendationsResponse(error=embedding.error)
+            # Cache the embedding if it does not exist
+            if not self.embedding_exists(embedding):
+                self.cache_embeddings([embedding])
+
+            # Send a request to get recommendations for the topic
+            request = RecommendationsRequest(
+                method='GET',
+                path='/recommendations',
+                params={'override_threshold': override_threshold},
+                json=topic,
+            )
+            return self.backend.make_request(request)
         except Exception as e:
-            return EmbeddingsResponse(error=e)
+            return RecommendationsResponse(error=e)
 
     def embed_content(self, nodes: List[ContentNode]) -> EmbeddingsResponse:
         """
@@ -129,13 +149,20 @@ def embed_content(self, nodes: List[ContentNode]) -> EmbeddingsResponse:
             raise errors.ConnectionError("Connection to the backend failed")
 
         try:
-            resources = [self.extract_content(node) for node in nodes]
-            json = {
-                'resources': resources,
-                'metadata': {}
-            }
-            embed_content_request = EmbedContentRequest(json=json)
-            return self.backend.make_request(embed_content_request)
+            failed_requests = []
+            for node in nodes:
+                resource = self.extract_content(node)
+                json = {
+                    'resources': [resource],
+                    'metadata': {}
+                }
+                request = EmbedContentRequest(json=json)
+                embedding = self.generate_embedding(request)
+                if embedding.error is not None:
+                    failed_requests.append(request)
+                if embedding.error is None and not self.embedding_exists(embedding):
+                    self.cache_embeddings([embedding])
+            return EmbeddingsResponse(failed_requests=failed_requests)
         except Exception as e:
             return EmbeddingsResponse(error=e)
 
@@ -152,6 +179,32 @@ def extract_content(self, node: ContentNode) -> Dict[str, Any]:
         Returns:
         Dict[str, Any]: A dictionary containing the extracted content metadata.
         """
+        contentkind = node.kind
+        if contentkind.kind == content_kinds.AUDIO:
+            # handle audio content
+            pass
+        elif contentkind.kind == content_kinds.VIDEO:
+            # handle video content
+            pass
+        elif contentkind.kind == content_kinds.EXERCISE:
+            # handle exercise content
+            pass
+        elif contentkind.kind == content_kinds.DOCUMENT:
+            # handle document content
+            pass
+        elif contentkind.kind == content_kinds.HTML5:
+            # handle html5 content
+            pass
+        elif contentkind.kind == content_kinds.H5P:
+            # handle h5p content
+            pass
+        elif contentkind.kind == content_kinds.ZIM:
+            # handle zim content
+            pass
+        else:
+            # handle topic content or any other kind
+            pass
+
         return {}
 
 

From b8fdb18fb4389b26c78cb5e8d7601334f2a9e606 Mon Sep 17 00:00:00 2001
From: Samson Akol <akolson2010@gmail.com>
Date: Thu, 11 Jul 2024 14:03:49 +0300
Subject: [PATCH 03/21] Implements extract content urls

---
 contentcuration/automation/models.py          |  13 +-
 .../automation/utils/appnexus/base.py         |   7 +-
 .../utils/automation_manager.py               |  24 +-
 .../contentcuration/utils/recommendations.py  | 219 ++++++++----------
 4 files changed, 120 insertions(+), 143 deletions(-)

diff --git a/contentcuration/automation/models.py b/contentcuration/automation/models.py
index 0b4331b362..ef6adc38c7 100644
--- a/contentcuration/automation/models.py
+++ b/contentcuration/automation/models.py
@@ -1,3 +1,12 @@
-# from django.db import models
+import uuid
 
-# Create your models here.
+from django.db import models
+from django.db.models import JSONField
+
+
+class RecommendationsCache(models.Model):
+    id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
+    request = JSONField(default=dict, null=True)
+    response = JSONField(default=dict, null=True)
+    priority = models.IntegerField(default=0)
+    timestamp = models.DateTimeField(auto_now_add=True)
diff --git a/contentcuration/automation/utils/appnexus/base.py b/contentcuration/automation/utils/appnexus/base.py
index 0998753bd8..f3ea8a9023 100644
--- a/contentcuration/automation/utils/appnexus/base.py
+++ b/contentcuration/automation/utils/appnexus/base.py
@@ -169,11 +169,10 @@ def connect(self, **kwargs):
 
     def make_request(self, request):
         """ Make a request to the backend service. """
-        response = self._make_request(request)
         try:
-            info = response.json()
-            info.update({"status_code": response.status_code})
-            return BackendResponse(**info)
+            response = self._make_request(request)
+            response_body = dict(data=response.json())
+            return BackendResponse(**response_body)
         except ValueError as e:
             logging.exception(e)
             raise errors.InvalidResponse("Invalid response from backend")
diff --git a/contentcuration/contentcuration/utils/automation_manager.py b/contentcuration/contentcuration/utils/automation_manager.py
index 9ccb2ea22a..82a4344c02 100644
--- a/contentcuration/contentcuration/utils/automation_manager.py
+++ b/contentcuration/contentcuration/utils/automation_manager.py
@@ -19,17 +19,10 @@ def generate_embedding(self, topic: Dict[str, Any]):
         Returns:
             Vector: The generated embedding vector.
         """
-        return self.recommendations_backend_adapter.generate_embedding(topic)
+        return self.recommendations_backend_adapter.generate_embeddings(topic)
 
-    def embedding_exists(self, embedding):
-        """
-        Check if the given embedding vector exists.
-        Args:
-            embedding (Vector): The embedding vector to check.
-        Returns:
-            bool: True if the embedding exists, False otherwise.
-        """
-        return self.recommendations_backend_adapter.embedding_exists(embedding=embedding)
+    def response_exists(self, request):
+        return self.recommendations_backend_adapter.response_exists(request=request)
 
     def load_recommendations(self, topic: Dict[str, Any], override_threshold=False):
         """
@@ -45,12 +38,5 @@ def load_recommendations(self, topic: Dict[str, Any], override_threshold=False):
         self.recommendations_backend_adapter.get_recommendations(topic=topic, override_threshold=override_threshold)
         return []
 
-    def cache_embeddings(self, embeddings):
-        """
-        Cache a list of embedding vectors.
-        Args:
-            embeddings (list): A list of embedding vectors to cache.
-        Returns:
-            bool: True if caching was successful, False otherwise.
-        """
-        return self.recommendations_backend_adapter.cache_embeddings(embeddings)
+    def cache_embeddings(self, request, response):
+        return self.recommendations_backend_adapter.cache_embeddings_request(request, response)
diff --git a/contentcuration/contentcuration/utils/recommendations.py b/contentcuration/contentcuration/utils/recommendations.py
index ba1df07375..5e6f2e36c0 100644
--- a/contentcuration/contentcuration/utils/recommendations.py
+++ b/contentcuration/contentcuration/utils/recommendations.py
@@ -1,8 +1,10 @@
+import logging
 from typing import Any
 from typing import Dict
 from typing import List
 from typing import Union
 
+from automation.models import RecommendationsCache
 from automation.utils.appnexus import errors
 from automation.utils.appnexus.base import Adapter
 from automation.utils.appnexus.base import Backend
@@ -10,8 +12,11 @@
 from automation.utils.appnexus.base import BackendRequest
 from automation.utils.appnexus.base import BackendResponse
 from le_utils.constants import content_kinds
+from le_utils.constants import format_presets
 
+from contentcuration.models import Channel
 from contentcuration.models import ContentNode
+from contentcuration.models import File
 
 
 class RecommendationsBackendRequest(BackendRequest):
@@ -62,109 +67,67 @@ def create_backend(self) -> Backend:
 
 class RecommendationsAdapter(Adapter):
 
-    def generate_embedding(self, request: EmbeddingsRequest) -> EmbeddingsResponse:
-        """
-        Generate an embedding vector for the given content.
-
-        Parameters:
-            :param request: The EmbeddingsRequest object containing the request details.
-            :param content: The topic for which to generate an embedding vector.
-        Returns:
-            EmbeddingsResponse: An object containing the embeddings or an error.
-        """
-        return self.backend.make_request(request)
-
-    def embedding_exists(self, embedding) -> bool:
-        # Need to implement the logic to check if the embeddigns exist
-        # Return True if the embedding exists, or False otherwise
-        return True
-
-    def cache_embeddings(self, embeddings_list) -> bool:
-        for embedding in embeddings_list:
-            try:
-                # Attempt to cache the embedding
-                # Write the caching logic
-                # A conrner case to look at here is if one of the embedding fails to get cached
-                # we need to handel it so that only the once that were not succesfull
-                # are attempted to cache again
-                pass
-            except Exception as e:
-                print(e)
-                return False
-        return True
-
-    def get_recommendations(self, topic: Dict[str, Any], override_threshold=False) -> RecommendationsResponse:
-        """
-        Get recommendations for the given topic.
-
-        This method connects to the backend, sends a request to get recommendations for a given
-        topic, and returns a RecommendationsResponse object containing the recommendations
-        or an error.
+    def generate_embeddings(self, request: EmbeddingsRequest,
+                            cache: bool = True) -> EmbeddingsResponse:
 
-        Parameters:
-            :param topic: The topic for which to get recommendations.
-            :param override_threshold: A boolean flag to override the recommendation threshold.
-
-        Returns:
-            RecommendationsResponse: An object containing a list of recommendations or an error.
-        """
         if not self.backend.connect():
             raise errors.ConnectionError("Connection to the backend failed")
+
+        if cache:
+            cached_response = self.response_exists(request)
+            if cached_response:
+                return cached_response
+
         try:
-            # Generate the embedding for the topic
-            request = EmbedTopicsRequest(json=topic)
-            embedding = self.generate_embedding(request)
-            if embedding.error is not None:
-                return RecommendationsResponse(error=embedding.error)
-            # Cache the embedding if it does not exist
-            if not self.embedding_exists(embedding):
-                self.cache_embeddings([embedding])
-
-            # Send a request to get recommendations for the topic
-            request = RecommendationsRequest(
-                method='GET',
-                path='/recommendations',
-                params={'override_threshold': override_threshold},
-                json=topic,
-            )
-            return self.backend.make_request(request)
+            response = self.backend.make_request(request)
         except Exception as e:
-            return RecommendationsResponse(error=e)
-
-    def embed_content(self, nodes: List[ContentNode]) -> EmbeddingsResponse:
-        """
-        Embeds the content nodes and returns an EmbeddingsResponse.
+            response = EmbeddingsResponse(error=e)
 
-        This method connects to the backend, extracts content metadata from the provided nodes,
-        and sends a request to the backend to embed the content. If an exception occurs during
-        this process, it returns an EmbeddingsResponse with the exception as the error.
+        if not response.error and cache:
+            self.cache_embeddings_request(request, response)
 
-        Parameters:
-        nodes (List[ContentNode]): A list of ContentNode objects to be embedded.
+        return response
 
-        Returns:
-        EmbeddingsResponse: An EmbeddingsResponse object containing the embeddings or an error.
-        """
-        if not self.backend.connect():
-            raise errors.ConnectionError("Connection to the backend failed")
+    def response_exists(self, request) -> EmbeddingsResponse:
+        cache = RecommendationsCache.objects.filter(request=request).first()
+        if cache:
+            cached_response = cache.response
+        else:
+            cached_response = None
+        return EmbeddingsResponse(cached_response)
 
+    def cache_embeddings_request(self, request: BackendRequest, response: BackendResponse) -> bool:
         try:
-            failed_requests = []
-            for node in nodes:
-                resource = self.extract_content(node)
-                json = {
-                    'resources': [resource],
-                    'metadata': {}
-                }
-                request = EmbedContentRequest(json=json)
-                embedding = self.generate_embedding(request)
-                if embedding.error is not None:
-                    failed_requests.append(request)
-                if embedding.error is None and not self.embedding_exists(embedding):
-                    self.cache_embeddings([embedding])
-            return EmbeddingsResponse(failed_requests=failed_requests)
+            RecommendationsCache.objects.create(request=request, response=response, priority=2)
+            return True
         except Exception as e:
-            return EmbeddingsResponse(error=e)
+            logging.exception(e)
+            return False
+
+    def get_recommendations(self, topic: Dict[str, Any],
+                            override_threshold=False) -> RecommendationsResponse:
+        # Generate the embedding for the topic
+        recommendations = []
+        request = EmbedTopicsRequest(
+            params={'override_threshold': override_threshold},
+            json=topic,
+        )
+        response = self.generate_embeddings(request=request)
+        nodes = response.data
+        if nodes is not None and len(nodes) > 0:
+            node_ids = [node['contentnode_id'] for node in nodes]
+            recommendations = ContentNode.objects.filter(id__in=node_ids)
+
+        return RecommendationsResponse(recommendations)
+
+    def embed_content(self, nodes: List[ContentNode]) -> EmbeddingsResponse:
+        for i in range(0, len(nodes), 20):
+            batch = nodes[i:i + 20]
+            content = [self.extract_content(node) for node in batch]
+            request = EmbedContentRequest(json=content)
+            self.generate_embeddings(request=request, cache=False)
+
+        return EmbeddingsResponse()
 
     def extract_content(self, node: ContentNode) -> Dict[str, Any]:
         """
@@ -179,33 +142,53 @@ def extract_content(self, node: ContentNode) -> Dict[str, Any]:
         Returns:
         Dict[str, Any]: A dictionary containing the extracted content metadata.
         """
-        contentkind = node.kind
-        if contentkind.kind == content_kinds.AUDIO:
-            # handle audio content
-            pass
-        elif contentkind.kind == content_kinds.VIDEO:
-            # handle video content
-            pass
-        elif contentkind.kind == content_kinds.EXERCISE:
-            # handle exercise content
-            pass
-        elif contentkind.kind == content_kinds.DOCUMENT:
-            # handle document content
-            pass
-        elif contentkind.kind == content_kinds.HTML5:
-            # handle html5 content
-            pass
-        elif contentkind.kind == content_kinds.H5P:
-            # handle h5p content
-            pass
-        elif contentkind.kind == content_kinds.ZIM:
-            # handle zim content
-            pass
-        else:
-            # handle topic content or any other kind
-            pass
+        contentkind_to_presets = {
+            content_kinds.AUDIO: [format_presets.AUDIO, format_presets.AUDIO_DEPENDENCY],
+            content_kinds.VIDEO: [
+                format_presets.VIDEO_DEPENDENCY,
+                format_presets.VIDEO_HIGH_RES,
+                format_presets.VIDEO_LOW_RES,
+                format_presets.VIDEO_SUBTITLE,
+            ],
+            content_kinds.EXERCISE: [format_presets.DOCUMENT, format_presets.EPUB],
+            content_kinds.DOCUMENT: [format_presets.DOCUMENT, format_presets.EPUB],
+            content_kinds.HTML5: [format_presets.HTML5_ZIP],
+            content_kinds.H5P: [format_presets.H5P_ZIP],
+            content_kinds.ZIM: [format_presets.ZIM],
+        }
 
-        return {}
+        contentkind = node.kind
+        presets = contentkind_to_presets.get(contentkind.kind)
+        files = self.get_content_files(node, presets) if presets else None
+
+        channel = Channel.object.filter(main_tree=node).first()
+        channel_id = channel.id if channel else None
+
+        return {
+            "resources": {
+                "id": node.node_id,
+                "title": node.title,
+                "description": node.description,
+                "text": "",
+                "language": node.language.lang_code if node.language else None,
+                "files": files
+            },
+            "metadata": {
+                "channel_id": channel_id,
+            },
+        }
+
+    def get_content_files(self, node, presets) -> List[Dict[str, Any]]:
+        node_files = File.objects.filter(contentnode=node, preset__in=presets)
+        files = []
+        for file in node_files:
+            file_dict = {
+                'url': file.source_url,
+                'preset': file.preset_id,
+                'language': file.language.lang_code if file.language else None
+            }
+            files.append(file_dict)
+        return files
 
 
 class Recommendations(Backend):

From 8c10f77a7f3515289cdd6a6866f8d01c20c5a73a Mon Sep 17 00:00:00 2001
From: Samson Akol <akolson2010@gmail.com>
Date: Thu, 11 Jul 2024 14:34:43 +0300
Subject: [PATCH 04/21] Runs recommendations cache migrations

---
 .../automation/migrations/0001_initial.py     | 26 +++++++++++++++++++
 contentcuration/contentcuration/settings.py   |  1 +
 2 files changed, 27 insertions(+)
 create mode 100644 contentcuration/automation/migrations/0001_initial.py

diff --git a/contentcuration/automation/migrations/0001_initial.py b/contentcuration/automation/migrations/0001_initial.py
new file mode 100644
index 0000000000..85e58b7e16
--- /dev/null
+++ b/contentcuration/automation/migrations/0001_initial.py
@@ -0,0 +1,26 @@
+# Generated by Django 3.2.24 on 2024-07-11 11:33
+import uuid
+
+from django.db import migrations
+from django.db import models
+
+
+class Migration(migrations.Migration):
+
+    initial = True
+
+    dependencies = [
+    ]
+
+    operations = [
+        migrations.CreateModel(
+            name='RecommendationsCache',
+            fields=[
+                ('id', models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False)),
+                ('request', models.JSONField(default=dict, null=True)),
+                ('response', models.JSONField(default=dict, null=True)),
+                ('priority', models.IntegerField(default=0)),
+                ('timestamp', models.DateTimeField(auto_now_add=True)),
+            ],
+        ),
+    ]
diff --git a/contentcuration/contentcuration/settings.py b/contentcuration/contentcuration/settings.py
index 8d2d9d8832..d162c5c6de 100644
--- a/contentcuration/contentcuration/settings.py
+++ b/contentcuration/contentcuration/settings.py
@@ -89,6 +89,7 @@
     'django.contrib.postgres',
     'django_celery_results',
     'kolibri_public',
+    'automation',
 )
 
 SESSION_ENGINE = "django.contrib.sessions.backends.cached_db"

From 63de9d92ef7333c4d001e466daf0e233ffb638fd Mon Sep 17 00:00:00 2001
From: Samson Akol <akolson2010@gmail.com>
Date: Mon, 15 Jul 2024 19:19:47 +0300
Subject: [PATCH 05/21] clean up and tests

---
 .../migrations/0002_auto_20240711_1938.py     |  22 +++
 contentcuration/automation/models.py          |   2 +-
 .../automation/tests/appnexus/test_base.py    | 121 ++++++-------
 .../automation/utils/appnexus/base.py         |   3 +-
 .../tests/utils/test_recommendations.py       |  52 +++---
 .../utils/automation_manager.py               |  40 ++---
 .../contentcuration/utils/recommendations.py  | 160 +++++++++++++-----
 7 files changed, 228 insertions(+), 172 deletions(-)
 create mode 100644 contentcuration/automation/migrations/0002_auto_20240711_1938.py

diff --git a/contentcuration/automation/migrations/0002_auto_20240711_1938.py b/contentcuration/automation/migrations/0002_auto_20240711_1938.py
new file mode 100644
index 0000000000..b0f813b599
--- /dev/null
+++ b/contentcuration/automation/migrations/0002_auto_20240711_1938.py
@@ -0,0 +1,22 @@
+# Generated by Django 3.2.24 on 2024-07-11 19:38
+from django.db import migrations
+from django.db import models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('automation', '0001_initial'),
+    ]
+
+    operations = [
+        migrations.RemoveField(
+            model_name='recommendationscache',
+            name='priority',
+        ),
+        migrations.AddField(
+            model_name='recommendationscache',
+            name='rank',
+            field=models.FloatField(default=0.0, null=True),
+        ),
+    ]
diff --git a/contentcuration/automation/models.py b/contentcuration/automation/models.py
index ef6adc38c7..c5ade9096d 100644
--- a/contentcuration/automation/models.py
+++ b/contentcuration/automation/models.py
@@ -8,5 +8,5 @@ class RecommendationsCache(models.Model):
     id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
     request = JSONField(default=dict, null=True)
     response = JSONField(default=dict, null=True)
-    priority = models.IntegerField(default=0)
+    rank = models.FloatField(default=0.0, null=True)
     timestamp = models.DateTimeField(auto_now_add=True)
diff --git a/contentcuration/automation/tests/appnexus/test_base.py b/contentcuration/automation/tests/appnexus/test_base.py
index b06cc213b5..3fa3fa8e74 100644
--- a/contentcuration/automation/tests/appnexus/test_base.py
+++ b/contentcuration/automation/tests/appnexus/test_base.py
@@ -1,69 +1,26 @@
 import time
-import pytest
-import requests
 from unittest.mock import patch
 
-from automation.utils.appnexus.base import Adapter
+import mock
+import pytest
+import requests
 from automation.utils.appnexus.base import Backend
 from automation.utils.appnexus.base import BackendRequest
+from automation.utils.appnexus.base import BackendResponse
 from automation.utils.appnexus.base import SessionWithMaxConnectionAge
 from automation.utils.appnexus.errors import ConnectionError
+from automation.utils.appnexus.errors import InvalidResponse
 
 
-class MockBackend(Backend):
-    base_url = 'https://kolibri-dev.learningequality.org'
-    connect_endpoint = '/status'
-    def connect(self) -> None:
-        return super().connect()
-
-    def make_request(self, request):
-        return super().make_request(request)
-
-class ErrorBackend(Backend):
-    base_url = 'https://bad-url.com'
-    connect_endpoint = '/status'
-    def connect(self) -> None:
-        return super().connect()
-    
-    def make_request(self, request):
-        return super().make_request(request)
-
-
-class MockAdapter(Adapter):
-    def mockoperation(self):
-        pass
-
-
-def test_backend_singleton():
-    b1, b2 = MockBackend(), MockBackend()
-    assert id(b1) == id(b2)
-
-
-def test_adapter_creation():
-    a = MockAdapter(backend=MockBackend)
-    assert isinstance(a, Adapter)
-
-
-def test_adapter_backend_default():
-    b = MockBackend()
-    adapter = Adapter(backend=b)
-    assert isinstance(adapter.backend, Backend)
-
-
-def test_adapter_backend_custom():
-    b = MockBackend()
-    a = Adapter(backend=b)
-    assert a.backend is b
-
 def test_session_with_max_connection_age_request():
     with patch.object(requests.Session, 'request') as mock_request:
         session = SessionWithMaxConnectionAge()
         session.request('GET', 'https://example.com')
         assert mock_request.call_count == 1
 
+
 def test_session_with_max_connection_age_not_closing_connections():
-    with patch.object(requests.Session, 'close') as mock_close,\
-        patch.object(requests.Session, 'request') as mock_request:
+    with patch.object(requests.Session, 'close') as mock_close, patch.object(requests.Session, 'request') as mock_request:
         session = SessionWithMaxConnectionAge(60)
         session.request('GET', 'https://example.com')
         time.sleep(0.1)
@@ -72,9 +29,9 @@ def test_session_with_max_connection_age_not_closing_connections():
         assert mock_close.call_count == 0
         assert mock_request.call_count == 2
 
+
 def test_session_with_max_connection_age_closing_connections():
-    with patch.object(requests.Session, 'close') as mock_close,\
-        patch.object(requests.Session, 'request') as mock_request:
+    with patch.object(requests.Session, 'close') as mock_close, patch.object(requests.Session, 'request') as mock_request:
         session = SessionWithMaxConnectionAge(1)
         session.request('GET', 'https://example.com')
         time.sleep(2)
@@ -83,33 +40,55 @@ def test_session_with_max_connection_age_closing_connections():
         assert mock_close.call_count == 1
         assert mock_request.call_count == 2
 
-def test_backend_connect():
-    backend = MockBackend()
-    connected = backend.connect()
 
-    assert connected is True
+@mock.patch("automation.utils.appnexus.base.Backend.connect")
+def test_backend_connect(mock_connect):
+    mock_connect.return_value = True
+
+    backend = Backend()
+    result = backend.connect()
+
+    mock_connect.assert_called_once()
+    assert result is True
 
-def test_backend_connect_error():
-    backend = ErrorBackend()
-    connected = backend.connect()
 
-    assert connected is False
+@mock.patch("automation.utils.appnexus.base.Backend.connect")
+def test_backend_connect_error(mock_connect):
+    mock_connect.side_effect = [ConnectionError("Failed to connect"), False]
 
-def test_backend_request():
-    request = BackendRequest('GET', '/api/public/info')
+    backend = Backend()
 
-    backend = MockBackend()
+    with pytest.raises(ConnectionError) as exc_info:
+        backend.connect()
+    assert str(exc_info.value) == "Failed to connect"
+
+    result = backend.connect()
+    assert result is False
+
+    assert mock_connect.call_count == 2
+
+
+@mock.patch("automation.utils.appnexus.base.Backend.make_request")
+def test_backend_request(mock_make_request):
+    mock_response = BackendResponse(data=[{"key": "value"}])
+    mock_make_request.return_value = mock_response
+
+    backend = Backend()
+    request = BackendRequest(method="GET", path="/api/test")
     response = backend.make_request(request)
 
-    assert response.status_code == 200
-    assert len(response.__dict__) > 0
+    assert response == mock_response
+    mock_make_request.assert_called_once_with(request)
+
 
-def test_backend_request_error():
-    request = BackendRequest('GET', '/api/public/info')
+@mock.patch("automation.utils.appnexus.base.Backend.make_request")
+def test_backend_request_error(mock_make_request):
+    mock_make_request.side_effect = InvalidResponse("Request failed")
 
-    backend = ErrorBackend()
+    backend = Backend()
+    request = BackendRequest(method="GET", path="/api/test")
 
-    with pytest.raises(ConnectionError) as error:
+    with pytest.raises(InvalidResponse) as exc_info:
         backend.make_request(request)
-    
-    assert "Unable to connect to" in str(error.value)
+    assert str(exc_info.value) == "Request failed"
+    mock_make_request.assert_called_once_with(request)
diff --git a/contentcuration/automation/utils/appnexus/base.py b/contentcuration/automation/utils/appnexus/base.py
index f3ea8a9023..626bca1d58 100644
--- a/contentcuration/automation/utils/appnexus/base.py
+++ b/contentcuration/automation/utils/appnexus/base.py
@@ -56,8 +56,7 @@ def __init__(
 
 class BackendResponse(object):
     """ Class that should be inherited by specific backend for its responses"""
-    def __init__(self, error=None, **kwargs):
-        self.error = error
+    def __init__(self, **kwargs):
         for key, value in kwargs.items():
             setattr(self, key, value)
 
diff --git a/contentcuration/contentcuration/tests/utils/test_recommendations.py b/contentcuration/contentcuration/tests/utils/test_recommendations.py
index 0bbb9272dd..e0fce53b76 100644
--- a/contentcuration/contentcuration/tests/utils/test_recommendations.py
+++ b/contentcuration/contentcuration/tests/utils/test_recommendations.py
@@ -5,6 +5,7 @@
 
 from contentcuration.models import ContentNode
 from contentcuration.utils.recommendations import EmbeddingsResponse
+from contentcuration.utils.recommendations import EmbedTopicsRequest
 from contentcuration.utils.recommendations import Recommendations
 from contentcuration.utils.recommendations import RecommendationsAdapter
 from contentcuration.utils.recommendations import RecommendationsResponse
@@ -12,9 +13,9 @@
 
 class RecommendationsTestCase(TestCase):
     def test_backend_initialization(self):
-        recomendations = Recommendations()
-        self.assertIsNotNone(recomendations)
-        self.assertIsInstance(recomendations, Recommendations)
+        recommendations = Recommendations()
+        self.assertIsNotNone(recommendations)
+        self.assertIsInstance(recommendations, Recommendations)
 
 
 class RecommendationsAdapterTestCase(TestCase):
@@ -43,52 +44,45 @@ def test_adapter_initialization(self):
 
     @patch('contentcuration.utils.recommendations.EmbedTopicsRequest')
     def test_get_recommendations_backend_connect_success(self, get_recommendations_request_mock):
+        mock_response = MagicMock(spec=RecommendationsResponse)
+        mock_response.data = []
+        mock_response.error = None
+
         self.adapter.backend.connect.return_value = True
-        self.adapter.backend.make_request.return_value = MagicMock(spec=RecommendationsResponse)
+        self.adapter.backend.make_request.return_value = mock_response
         response = self.adapter.get_recommendations(self.topic)
         self.adapter.backend.connect.assert_called_once()
         self.adapter.backend.make_request.assert_called_once()
         self.assertIsInstance(response, RecommendationsResponse)
 
-    def test_get_recommendations_backend_connect_failure(self):
+    @patch('contentcuration.utils.recommendations.EmbedTopicsRequest')
+    def test_get_recommendations_backend_connect_failure(self, embed_topics_request_mock):
+        mock_request_instance = MagicMock(spec=EmbedTopicsRequest)
+        embed_topics_request_mock.return_value = mock_request_instance
+
         self.adapter.backend.connect.return_value = False
         with self.assertRaises(errors.ConnectionError):
             self.adapter.get_recommendations(self.topic)
         self.adapter.backend.connect.assert_called_once()
         self.adapter.backend.make_request.assert_not_called()
 
-    @patch('contentcuration.utils.recommendations.EmbedTopicsRequest')
-    def test_get_recommendations_make_request_exception(self, get_recommendations_request_mock):
-        self.adapter.backend.connect.return_value = True
-        self.adapter.backend.make_request.side_effect = Exception("Mocked exception")
-        response = self.adapter.get_recommendations(self.topic)
-        self.adapter.backend.connect.assert_called_once()
-        self.adapter.backend.make_request.assert_called_once()
-        self.assertIsInstance(response, RecommendationsResponse)
-        self.assertEqual(str(response.error), "Mocked exception")
-
     @patch('contentcuration.utils.recommendations.EmbedContentRequest')
-    def test_embed_content_backend_connect_success(self, get_recommendations_request_mock):
+    def test_embed_content_backend_connect_success(self, embed_content_request_mock):
+        mock_response = MagicMock(spec=EmbeddingsResponse)
+        mock_response.error = None
+
         self.adapter.backend.connect.return_value = True
-        self.adapter.backend.make_request.return_value = MagicMock(spec=EmbeddingsResponse)
+        self.adapter.backend.make_request.return_value = mock_response
         response = self.adapter.embed_content(self.resources)
         self.adapter.backend.connect.assert_called_once()
         self.adapter.backend.make_request.assert_called_once()
-        self.assertIsInstance(response, EmbeddingsResponse)
+        self.assertIsInstance(response, bool)
+        self.assertTrue(response)
 
-    def test_embed_content_backend_connect_failure(self):
+    @patch('contentcuration.utils.recommendations.EmbedContentRequest')
+    def test_embed_content_backend_connect_failure(self, embed_content_request_mock):
         self.adapter.backend.connect.return_value = False
         with self.assertRaises(errors.ConnectionError):
             self.adapter.embed_content(self.resources)
         self.adapter.backend.connect.assert_called_once()
         self.adapter.backend.make_request.assert_not_called()
-
-    @patch('contentcuration.utils.recommendations.EmbedContentRequest')
-    def test_embed_content_make_request_exception(self, embed_content_request_mock):
-        self.adapter.backend.connect.return_value = True
-        self.adapter.backend.make_request.side_effect = Exception("Mocked exception")
-        response = self.adapter.embed_content(self.resources)
-        self.adapter.backend.connect.assert_called_once()
-        self.adapter.backend.make_request.assert_called_once()
-        self.assertIsInstance(response, EmbeddingsResponse)
-        self.assertEqual(str(response.error), "Mocked exception")
diff --git a/contentcuration/contentcuration/utils/automation_manager.py b/contentcuration/contentcuration/utils/automation_manager.py
index 82a4344c02..e84fc38389 100644
--- a/contentcuration/contentcuration/utils/automation_manager.py
+++ b/contentcuration/contentcuration/utils/automation_manager.py
@@ -1,42 +1,36 @@
 from typing import Any
 from typing import Dict
+from typing import List
 
+from contentcuration.models import ContentNode
 from contentcuration.utils.recommendations import RecommendationsAdapter
 from contentcuration.utils.recommendations import RecommendationsBackendFactory
 
 
 class AutomationManager:
     def __init__(self):
-        self.recommendations_backend_factory = RecommendationsBackendFactory()
-        self.recommendations_backend_instance = self.recommendations_backend_factory.create_backend()
-        self.recommendations_backend_adapter = RecommendationsAdapter(self.recommendations_backend_instance)
+        self.factory = RecommendationsBackendFactory()
+        self.instance = self.factory.create_backend()
+        self.adapter = RecommendationsAdapter(self.instance)
 
-    def generate_embedding(self, topic: Dict[str, Any]):
+    def generate_embeddings(self, nodes: List[ContentNode]):
         """
-        Generate an embedding vector for the given topic.
-        Args:
-            topic (dict): The topic for which to generate an embedding vector.
-        Returns:
-            Vector: The generated embedding vector.
-        """
-        return self.recommendations_backend_adapter.generate_embeddings(topic)
+        Generates embeddings for the given list of nodes. This process is async.
+
+        :param nodes: The list of nodes for which to generate embeddings.
 
-    def response_exists(self, request):
-        return self.recommendations_backend_adapter.response_exists(request=request)
+        :return: A boolean indicating that the process has started.
+        """
+        return self.adapter.embed_content(nodes)
 
     def load_recommendations(self, topic: Dict[str, Any], override_threshold=False):
         """
-        Load recommendations for the given topic.
+        Loads recommendations for the given topic.
 
-        Parameters:
-            :param topic: A dictionary containing the topic for which to get recommendations.
-            :param override_threshold: A boolean flag to override the recommendation threshold.
+        :param topic: A dictionary containing the topic for which to get recommendations.
+        :param override_threshold: A boolean flag to override the recommendation threshold.
 
-        Returns:
-            list: A list of recommended resources.
+        :return: A list of recommendations for the given topic.
         """
-        self.recommendations_backend_adapter.get_recommendations(topic=topic, override_threshold=override_threshold)
+        self.adapter.get_recommendations(topic=topic, override_threshold=override_threshold)
         return []
-
-    def cache_embeddings(self, request, response):
-        return self.recommendations_backend_adapter.cache_embeddings_request(request, response)
diff --git a/contentcuration/contentcuration/utils/recommendations.py b/contentcuration/contentcuration/utils/recommendations.py
index 5e6f2e36c0..d172dcbbb4 100644
--- a/contentcuration/contentcuration/utils/recommendations.py
+++ b/contentcuration/contentcuration/utils/recommendations.py
@@ -20,28 +20,29 @@
 
 
 class RecommendationsBackendRequest(BackendRequest):
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
 
 
 class RecommendationsRequest(RecommendationsBackendRequest):
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
 
 
 class EmbeddingsRequest(RecommendationsBackendRequest):
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
 
 
 class RecommendationsBackendResponse(BackendResponse):
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
 
 
 class RecommendationsResponse(RecommendationsBackendResponse):
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
+    def __init__(self, results: List[Any], **kwargs):
+        super().__init__(**kwargs)
+        self.results = results
 
 
 class EmbedTopicsRequest(EmbeddingsRequest):
@@ -55,8 +56,8 @@ class EmbedContentRequest(EmbeddingsRequest):
 
 
 class EmbeddingsResponse(RecommendationsBackendResponse):
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
 
 
 class RecommendationsBackendFactory(BackendFactory):
@@ -69,18 +70,25 @@ class RecommendationsAdapter(Adapter):
 
     def generate_embeddings(self, request: EmbeddingsRequest,
                             cache: bool = True) -> EmbeddingsResponse:
+        """
+        Generates recommendations for the given request.
 
+        :param request: The request for which to generate embeddings.
+        :param cache: Whether to cache the embeddings request.
+        :return: The response containing the recommendations.
+        :rtype: EmbeddingsResponse
+        """
         if not self.backend.connect():
             raise errors.ConnectionError("Connection to the backend failed")
 
-        if cache:
-            cached_response = self.response_exists(request)
-            if cached_response:
-                return cached_response
+        cached_response = self.response_exists(request)
+        if cached_response:
+            return cached_response
 
         try:
             response = self.backend.make_request(request)
         except Exception as e:
+            logging.exception(e)
             response = EmbeddingsResponse(error=e)
 
         if not response.error and cache:
@@ -88,17 +96,44 @@ def generate_embeddings(self, request: EmbeddingsRequest,
 
         return response
 
-    def response_exists(self, request) -> EmbeddingsResponse:
-        cache = RecommendationsCache.objects.filter(request=request).first()
-        if cache:
-            cached_response = cache.response
-        else:
-            cached_response = None
-        return EmbeddingsResponse(cached_response)
+    def response_exists(self, request) -> Union[EmbeddingsResponse, None]:
+        """
+        Checks if a cached response exists for the given request.
+
+        :param request: The request for which to check if a cached response exists.
+        :return: The cached response if it exists, otherwise None.
+        :rtype: Union[EmbeddingsResponse, None]
+        """
+        try:
+            cache = list(RecommendationsCache.objects.filter(request=request).order_by('rank'))
+            data = [entry.response for entry in cache]
+            if len(data) > 0:
+                return EmbeddingsResponse(data=data)
+            else:
+                return None
+        except Exception as e:
+            logging.exception(e)
+            return None
 
     def cache_embeddings_request(self, request: BackendRequest, response: BackendResponse) -> bool:
+        """
+        Caches the recommendations request and response.
+
+        :param request: The request to cache.
+        :param response: The response to cache.
+        :return: A boolean indicating whether the caching was successful.
+        :rtype: bool
+        """
         try:
-            RecommendationsCache.objects.create(request=request, response=response, priority=2)
+            nodes = self._extract_data(response)
+            cache = [
+                RecommendationsCache(
+                    request=request,
+                    response=node,
+                    rank=node['rank'],
+                ) for node in nodes
+            ]
+            RecommendationsCache.objects.bulk_create(cache)
             return True
         except Exception as e:
             logging.exception(e)
@@ -106,41 +141,55 @@ def cache_embeddings_request(self, request: BackendRequest, response: BackendRes
 
     def get_recommendations(self, topic: Dict[str, Any],
                             override_threshold=False) -> RecommendationsResponse:
-        # Generate the embedding for the topic
+        """
+        Get recommendations for the given topic.
+
+        :param topic: A dictionary containing the topic for which to get recommendations. See
+        https://github.com/learningequality/le-utils/blob/main/spec/schema-embed_topics_request.json
+        :param override_threshold: A boolean flag to override the recommendation threshold.
+        :return: The recommendations for the given topic. :rtype: RecommendationsResponse
+        """
         recommendations = []
         request = EmbedTopicsRequest(
             params={'override_threshold': override_threshold},
             json=topic,
         )
         response = self.generate_embeddings(request=request)
-        nodes = response.data
-        if nodes is not None and len(nodes) > 0:
+        nodes = self._extract_data(response)
+        if len(nodes) > 0:
             node_ids = [node['contentnode_id'] for node in nodes]
-            recommendations = ContentNode.objects.filter(id__in=node_ids)
+            recommendations = list(ContentNode.objects.filter(id__in=node_ids))
 
-        return RecommendationsResponse(recommendations)
+        return RecommendationsResponse(results=recommendations)
 
-    def embed_content(self, nodes: List[ContentNode]) -> EmbeddingsResponse:
+    def _extract_data(self, response: BackendResponse) -> List[Dict[str, Any]]:
+        return response.data if response.data is not None else []
+
+    def embed_content(self, nodes: List[ContentNode]) -> bool:
+        """
+        Embeds the content for the given nodes. This is an asynchronous process and could take a
+        while to complete. This process is handled by our curriculum automation service.
+        See https://github.com/learningequality/curriculum-automation
+
+        :param nodes: The nodes for which to embed the content.
+        :return: A boolean indicating that content embedding process has started.
+        :rtype: bool
+        """
         for i in range(0, len(nodes), 20):
             batch = nodes[i:i + 20]
             content = [self.extract_content(node) for node in batch]
             request = EmbedContentRequest(json=content)
             self.generate_embeddings(request=request, cache=False)
 
-        return EmbeddingsResponse()
+        return True
 
     def extract_content(self, node: ContentNode) -> Dict[str, Any]:
         """
-        Extracts content metadata from a given ContentNode object.
+        Extracts the content from the given node.
 
-        This method extracts the content metadata from the provided ContentNode object.
-        The extracted metadata is returned as a dictionary.
-
-        Parameters:
-        node (ContentNode): The ContentNode object from which to extract the content metadata.
-
-        Returns:
-        Dict[str, Any]: A dictionary containing the extracted content metadata.
+        :param node: The node from which to extract the content.
+        :return: A dictionary containing the extracted content.
+        :rtype: Dict[str, Any]
         """
         contentkind_to_presets = {
             content_kinds.AUDIO: [format_presets.AUDIO, format_presets.AUDIO_DEPENDENCY],
@@ -159,10 +208,14 @@ def extract_content(self, node: ContentNode) -> Dict[str, Any]:
 
         contentkind = node.kind
         presets = contentkind_to_presets.get(contentkind.kind)
-        files = self.get_content_files(node, presets) if presets else None
+        files = self._get_content_files(node, presets) if presets else None
 
-        channel = Channel.object.filter(main_tree=node).first()
-        channel_id = channel.id if channel else None
+        try:
+            channel = Channel.object.filter(main_tree=node).first()
+            channel_id = channel.id if channel else None
+        except Exception as e:
+            logging.exception(e)
+            channel_id = None
 
         return {
             "resources": {
@@ -178,8 +231,23 @@ def extract_content(self, node: ContentNode) -> Dict[str, Any]:
             },
         }
 
-    def get_content_files(self, node, presets) -> List[Dict[str, Any]]:
-        node_files = File.objects.filter(contentnode=node, preset__in=presets)
+    def _get_content_files(self, node, presets) -> List[Dict[str, Any]]:
+        """
+        Get the content files for the given node and presets. See
+        https://github.com/learningequality/le-utils/blob/main/spec/schema-embed_topics_request.json
+        for the file schema.
+
+        :param node: The node for which to get the content files.
+        :param presets: The presets for which to get the content files.
+        :return: A list of dictionaries containing the content files.
+        :rtype: List[Dict[str, Any]]
+        """
+        try:
+            node_files = File.objects.filter(contentnode=node, preset__in=presets)
+        except Exception as e:
+            logging.exception(e)
+            node_files = []
+
         files = []
         for file in node_files:
             file_dict = {
@@ -193,7 +261,7 @@ def get_content_files(self, node, presets) -> List[Dict[str, Any]]:
 
 class Recommendations(Backend):
 
-    def connect(self) -> None:
+    def connect(self) -> bool:
         return super().connect()
 
     def make_request(self, request) -> Union[EmbeddingsResponse, RecommendationsResponse]:

From 61e8b62979a56ab22b44cdaae501db012099a773 Mon Sep 17 00:00:00 2001
From: Samson Akol <akolson2010@gmail.com>
Date: Tue, 16 Jul 2024 18:28:06 +0300
Subject: [PATCH 06/21] Remove extra migrations

---
 .../automation/migrations/0001_initial.py     | 26 -------------------
 .../migrations/0002_auto_20240711_1938.py     | 22 ----------------
 2 files changed, 48 deletions(-)
 delete mode 100644 contentcuration/automation/migrations/0001_initial.py
 delete mode 100644 contentcuration/automation/migrations/0002_auto_20240711_1938.py

diff --git a/contentcuration/automation/migrations/0001_initial.py b/contentcuration/automation/migrations/0001_initial.py
deleted file mode 100644
index 85e58b7e16..0000000000
--- a/contentcuration/automation/migrations/0001_initial.py
+++ /dev/null
@@ -1,26 +0,0 @@
-# Generated by Django 3.2.24 on 2024-07-11 11:33
-import uuid
-
-from django.db import migrations
-from django.db import models
-
-
-class Migration(migrations.Migration):
-
-    initial = True
-
-    dependencies = [
-    ]
-
-    operations = [
-        migrations.CreateModel(
-            name='RecommendationsCache',
-            fields=[
-                ('id', models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False)),
-                ('request', models.JSONField(default=dict, null=True)),
-                ('response', models.JSONField(default=dict, null=True)),
-                ('priority', models.IntegerField(default=0)),
-                ('timestamp', models.DateTimeField(auto_now_add=True)),
-            ],
-        ),
-    ]
diff --git a/contentcuration/automation/migrations/0002_auto_20240711_1938.py b/contentcuration/automation/migrations/0002_auto_20240711_1938.py
deleted file mode 100644
index b0f813b599..0000000000
--- a/contentcuration/automation/migrations/0002_auto_20240711_1938.py
+++ /dev/null
@@ -1,22 +0,0 @@
-# Generated by Django 3.2.24 on 2024-07-11 19:38
-from django.db import migrations
-from django.db import models
-
-
-class Migration(migrations.Migration):
-
-    dependencies = [
-        ('automation', '0001_initial'),
-    ]
-
-    operations = [
-        migrations.RemoveField(
-            model_name='recommendationscache',
-            name='priority',
-        ),
-        migrations.AddField(
-            model_name='recommendationscache',
-            name='rank',
-            field=models.FloatField(default=0.0, null=True),
-        ),
-    ]

From b920691832399b9ce57c3573b2ac5c7f4881ccea Mon Sep 17 00:00:00 2001
From: Samson Akol <akolson2010@gmail.com>
Date: Tue, 16 Jul 2024 18:31:55 +0300
Subject: [PATCH 07/21] Adds new migration

---
 .../automation/migrations/0001_initial.py     | 26 +++++++++++++++++++
 1 file changed, 26 insertions(+)
 create mode 100644 contentcuration/automation/migrations/0001_initial.py

diff --git a/contentcuration/automation/migrations/0001_initial.py b/contentcuration/automation/migrations/0001_initial.py
new file mode 100644
index 0000000000..c23cdeeba5
--- /dev/null
+++ b/contentcuration/automation/migrations/0001_initial.py
@@ -0,0 +1,26 @@
+# Generated by Django 3.2.24 on 2024-07-16 15:30
+import uuid
+
+from django.db import migrations
+from django.db import models
+
+
+class Migration(migrations.Migration):
+
+    initial = True
+
+    dependencies = [
+    ]
+
+    operations = [
+        migrations.CreateModel(
+            name='RecommendationsCache',
+            fields=[
+                ('id', models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False)),
+                ('request', models.JSONField(default=dict, null=True)),
+                ('response', models.JSONField(default=dict, null=True)),
+                ('rank', models.FloatField(default=0.0, null=True)),
+                ('timestamp', models.DateTimeField(auto_now_add=True)),
+            ],
+        ),
+    ]

From 8c23f5fdf522469dea92684d4db6a49703ad8dcc Mon Sep 17 00:00:00 2001
From: Samson Akol <akolson2010@gmail.com>
Date: Wed, 24 Jul 2024 17:42:40 +0300
Subject: [PATCH 08/21] implements feedback

---
 .../automation/migrations/0001_initial.py     |  16 +-
 contentcuration/automation/models.py          |  13 +-
 .../tests/utils/test_recommendations.py       |  50 ++++-
 .../utils/automation_manager.py               |  10 +-
 .../contentcuration/utils/recommendations.py  | 190 ++++++++++++------
 5 files changed, 205 insertions(+), 74 deletions(-)

diff --git a/contentcuration/automation/migrations/0001_initial.py b/contentcuration/automation/migrations/0001_initial.py
index c23cdeeba5..87ee422398 100644
--- a/contentcuration/automation/migrations/0001_initial.py
+++ b/contentcuration/automation/migrations/0001_initial.py
@@ -1,26 +1,32 @@
-# Generated by Django 3.2.24 on 2024-07-16 15:30
+# Generated by Django 3.2.24 on 2024-07-24 13:18
 import uuid
 
+import django.db.models.deletion
 from django.db import migrations
 from django.db import models
 
 
 class Migration(migrations.Migration):
-
     initial = True
 
     dependencies = [
+        ('kolibri_public', '0003_alter_file_preset'),
     ]
 
     operations = [
         migrations.CreateModel(
             name='RecommendationsCache',
             fields=[
-                ('id', models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False)),
-                ('request', models.JSONField(default=dict, null=True)),
-                ('response', models.JSONField(default=dict, null=True)),
+                ('id', models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True,
+                                        serialize=False)),
+                ('request_hash', models.CharField(max_length=32, null=True)),
                 ('rank', models.FloatField(default=0.0, null=True)),
+                ('override_threshold', models.BooleanField(default=False)),
                 ('timestamp', models.DateTimeField(auto_now_add=True)),
+                ('response', models.ForeignKey(blank=True, null=True,
+                                               on_delete=django.db.models.deletion.SET_NULL,
+                                               related_name='recommendations',
+                                               to='kolibri_public.contentnode')),
             ],
         ),
     ]
diff --git a/contentcuration/automation/models.py b/contentcuration/automation/models.py
index c5ade9096d..39b1819f59 100644
--- a/contentcuration/automation/models.py
+++ b/contentcuration/automation/models.py
@@ -1,12 +1,19 @@
 import uuid
 
 from django.db import models
-from django.db.models import JSONField
+from kolibri_public.models import ContentNode
 
 
 class RecommendationsCache(models.Model):
     id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
-    request = JSONField(default=dict, null=True)
-    response = JSONField(default=dict, null=True)
+    request_hash = models.CharField(max_length=32, null=True)
+    response = models.ForeignKey(
+        ContentNode,
+        null=True,
+        blank=True,
+        related_name='recommendations',
+        on_delete=models.SET_NULL,
+    )
     rank = models.FloatField(default=0.0, null=True)
+    override_threshold = models.BooleanField(default=False)
     timestamp = models.DateTimeField(auto_now_add=True)
diff --git a/contentcuration/contentcuration/tests/utils/test_recommendations.py b/contentcuration/contentcuration/tests/utils/test_recommendations.py
index e0fce53b76..f395005b63 100644
--- a/contentcuration/contentcuration/tests/utils/test_recommendations.py
+++ b/contentcuration/contentcuration/tests/utils/test_recommendations.py
@@ -34,6 +34,7 @@ def setUp(self):
                 }
             ]
         }
+        self.channel_id = 'test_channel_id'
         self.resources = [
             MagicMock(spec=ContentNode),
         ]
@@ -42,6 +43,51 @@ def test_adapter_initialization(self):
         self.assertIsNotNone(self.adapter)
         self.assertIsInstance(self.adapter, RecommendationsAdapter)
 
+    def test_generate_embeddings_connect_failure(self):
+        mock_response = MagicMock(spec=EmbeddingsResponse)
+
+        self.adapter.backend.connect.return_value = False
+        self.adapter.backend.make_request.return_value = mock_response
+        with self.assertRaises(errors.ConnectionError):
+            self.adapter.generate_embeddings(EmbedTopicsRequest(
+                method='POST',
+                url='http://test.com',
+                path='/test/path',
+            ))
+        self.adapter.backend.connect.assert_called_once()
+        self.adapter.backend.make_request.assert_not_called()
+
+    def test_generate_embeddings(self):
+        mock_response = MagicMock(spec=EmbeddingsResponse)
+        mock_response.error = None
+
+        self.adapter.backend.connect.return_value = True
+        self.adapter.backend.make_request.return_value = mock_response
+        response = self.adapter.generate_embeddings(EmbedTopicsRequest(
+            method='POST',
+            url='http://test.com',
+            path='/test/path',
+        ))
+        self.adapter.backend.connect.assert_called_once()
+        self.adapter.backend.make_request.assert_called_once()
+        self.assertIsInstance(response, EmbeddingsResponse)
+
+    def test_generate_embeddings_failure(self):
+        mock_response = MagicMock(spec=EmbeddingsResponse)
+        mock_response.error = {}
+
+        self.adapter.backend.connect.return_value = True
+        self.adapter.backend.make_request.return_value = mock_response
+        response = self.adapter.generate_embeddings(EmbedTopicsRequest(
+            method='POST',
+            url='http://test.com',
+            path='/test/path',
+        ))
+        self.adapter.backend.connect.assert_called_once()
+        self.adapter.backend.make_request.assert_called_once()
+        self.assertIsInstance(response, EmbeddingsResponse)
+        self.assertIsNotNone(response.error)
+
     @patch('contentcuration.utils.recommendations.EmbedTopicsRequest')
     def test_get_recommendations_backend_connect_success(self, get_recommendations_request_mock):
         mock_response = MagicMock(spec=RecommendationsResponse)
@@ -73,7 +119,7 @@ def test_embed_content_backend_connect_success(self, embed_content_request_mock)
 
         self.adapter.backend.connect.return_value = True
         self.adapter.backend.make_request.return_value = mock_response
-        response = self.adapter.embed_content(self.resources)
+        response = self.adapter.embed_content(self.channel_id, self.resources)
         self.adapter.backend.connect.assert_called_once()
         self.adapter.backend.make_request.assert_called_once()
         self.assertIsInstance(response, bool)
@@ -83,6 +129,6 @@ def test_embed_content_backend_connect_success(self, embed_content_request_mock)
     def test_embed_content_backend_connect_failure(self, embed_content_request_mock):
         self.adapter.backend.connect.return_value = False
         with self.assertRaises(errors.ConnectionError):
-            self.adapter.embed_content(self.resources)
+            self.adapter.embed_content(self.channel_id, self.resources)
         self.adapter.backend.connect.assert_called_once()
         self.adapter.backend.make_request.assert_not_called()
diff --git a/contentcuration/contentcuration/utils/automation_manager.py b/contentcuration/contentcuration/utils/automation_manager.py
index e84fc38389..fbc76a0b5e 100644
--- a/contentcuration/contentcuration/utils/automation_manager.py
+++ b/contentcuration/contentcuration/utils/automation_manager.py
@@ -1,8 +1,11 @@
 from typing import Any
 from typing import Dict
 from typing import List
+from typing import Union
 
-from contentcuration.models import ContentNode
+from kolibri_public.models import ContentNode as PublicContentNode
+
+from contentcuration.models import ContentNode as ContentNode
 from contentcuration.utils.recommendations import RecommendationsAdapter
 from contentcuration.utils.recommendations import RecommendationsBackendFactory
 
@@ -13,15 +16,16 @@ def __init__(self):
         self.instance = self.factory.create_backend()
         self.adapter = RecommendationsAdapter(self.instance)
 
-    def generate_embeddings(self, nodes: List[ContentNode]):
+    def generate_embeddings(self, channel_id: str, nodes: List[Union[ContentNode, PublicContentNode]]):
         """
         Generates embeddings for the given list of nodes. This process is async.
 
+        :param channel_id: The channel id to which the nodes belong.
         :param nodes: The list of nodes for which to generate embeddings.
 
         :return: A boolean indicating that the process has started.
         """
-        return self.adapter.embed_content(nodes)
+        return self.adapter.embed_content(channel_id, nodes)
 
     def load_recommendations(self, topic: Dict[str, Any], override_threshold=False):
         """
diff --git a/contentcuration/contentcuration/utils/recommendations.py b/contentcuration/contentcuration/utils/recommendations.py
index d172dcbbb4..c0a604911d 100644
--- a/contentcuration/contentcuration/utils/recommendations.py
+++ b/contentcuration/contentcuration/utils/recommendations.py
@@ -1,3 +1,5 @@
+import hashlib
+import json
 import logging
 from typing import Any
 from typing import Dict
@@ -11,11 +13,11 @@
 from automation.utils.appnexus.base import BackendFactory
 from automation.utils.appnexus.base import BackendRequest
 from automation.utils.appnexus.base import BackendResponse
+from kolibri_public.models import ContentNode as PublicContentNode
 from le_utils.constants import content_kinds
 from le_utils.constants import format_presets
 
-from contentcuration.models import Channel
-from contentcuration.models import ContentNode
+from contentcuration.models import ContentNode as ContentNode
 from contentcuration.models import File
 
 
@@ -68,32 +70,23 @@ def create_backend(self) -> Backend:
 
 class RecommendationsAdapter(Adapter):
 
-    def generate_embeddings(self, request: EmbeddingsRequest,
-                            cache: bool = True) -> EmbeddingsResponse:
+    def generate_embeddings(self, request: EmbeddingsRequest) -> EmbeddingsResponse:
         """
-        Generates recommendations for the given request.
+        Generates embeddings for the given request.
 
         :param request: The request for which to generate embeddings.
-        :param cache: Whether to cache the embeddings request.
         :return: The response containing the recommendations.
         :rtype: EmbeddingsResponse
         """
         if not self.backend.connect():
             raise errors.ConnectionError("Connection to the backend failed")
 
-        cached_response = self.response_exists(request)
-        if cached_response:
-            return cached_response
-
         try:
             response = self.backend.make_request(request)
         except Exception as e:
             logging.exception(e)
             response = EmbeddingsResponse(error=e)
 
-        if not response.error and cache:
-            self.cache_embeddings_request(request, response)
-
         return response
 
     def response_exists(self, request) -> Union[EmbeddingsResponse, None]:
@@ -105,8 +98,13 @@ def response_exists(self, request) -> Union[EmbeddingsResponse, None]:
         :rtype: Union[EmbeddingsResponse, None]
         """
         try:
-            cache = list(RecommendationsCache.objects.filter(request=request).order_by('rank'))
-            data = [entry.response for entry in cache]
+            request_hash = self._generate_request_hash(request)
+            cache = list(
+                RecommendationsCache.objects.filter(request_hash=request_hash).order_by('rank'))
+            data = [{
+                'contentnode_id': entry.response,
+                'rank': entry.rank,
+            } for entry in cache]
             if len(data) > 0:
                 return EmbeddingsResponse(data=data)
             else:
@@ -115,6 +113,23 @@ def response_exists(self, request) -> Union[EmbeddingsResponse, None]:
             logging.exception(e)
             return None
 
+    def _generate_request_hash(self, request) -> str:
+        """
+        Generates a unique hash for a given request.
+
+        This method serializes the request attributes that make it unique,
+        then generates a hash of this serialization.
+
+        :param request: The request for which to generate a unique hash.
+        :return: A unique hash representing the request
+        """
+        unique_attributes = json.dumps({
+            'params': request.params,
+            'json': request.json,
+        }, sort_keys=True).encode('utf-8')
+
+        return hashlib.md5(unique_attributes).hexdigest()
+
     def cache_embeddings_request(self, request: BackendRequest, response: BackendResponse) -> bool:
         """
         Caches the recommendations request and response.
@@ -125,12 +140,15 @@ def cache_embeddings_request(self, request: BackendRequest, response: BackendRes
         :rtype: bool
         """
         try:
+            request_hash = self._generate_request_hash(request)
             nodes = self._extract_data(response)
+            override_threshold = request.params.get('override_threshold', False)
             cache = [
                 RecommendationsCache(
-                    request=request,
-                    response=node,
+                    request_hash=request_hash,
+                    response=node['contentnode_id'],
                     rank=node['rank'],
+                    override_threshold=override_threshold,
                 ) for node in nodes
             ]
             RecommendationsCache.objects.bulk_create(cache)
@@ -149,12 +167,21 @@ def get_recommendations(self, topic: Dict[str, Any],
         :param override_threshold: A boolean flag to override the recommendation threshold.
         :return: The recommendations for the given topic. :rtype: RecommendationsResponse
         """
+
         recommendations = []
         request = EmbedTopicsRequest(
             params={'override_threshold': override_threshold},
             json=topic,
         )
-        response = self.generate_embeddings(request=request)
+
+        cached_response = self.response_exists(request)
+        if cached_response:
+            response = cached_response
+        else:
+            response = self.generate_embeddings(request=request)
+            if not response.error:
+                self.cache_embeddings_request(request, response)
+
         nodes = self._extract_data(response)
         if len(nodes) > 0:
             node_ids = [node['contentnode_id'] for node in nodes]
@@ -163,27 +190,61 @@ def get_recommendations(self, topic: Dict[str, Any],
         return RecommendationsResponse(results=recommendations)
 
     def _extract_data(self, response: BackendResponse) -> List[Dict[str, Any]]:
-        return response.data if response.data is not None else []
+        """
+        Extracts the data from the given response.
+
+        The response is of the form:
+
+        {
+            "data": [
+                {
+                    "contentnode_id": "<some node id>",
+                    "rank": 0.7
+                }
+            ]
+        }
+
+        :param response: A response from which to extract the data.
+        :return: The extracted data.
+        :rtype: List[Dict[str, Any]]
+        """
+        return response.data if not response.data else []
 
-    def embed_content(self, nodes: List[ContentNode]) -> bool:
+    def embed_content(self, channel_id: str,
+                      nodes: List[Union[ContentNode, PublicContentNode]]) -> bool:
         """
         Embeds the content for the given nodes. This is an asynchronous process and could take a
         while to complete. This process is handled by our curriculum automation service.
-        See https://github.com/learningequality/curriculum-automation
+        See https://github.com/learningequality/curriculum-automation. Also, see
+        https://github.com/learningequality/le-utils/blob/main/spec/schema-embed_content_request.json
+        for the schema.
 
+        :param channel_id: The channel ID to which the nodes belong.
         :param nodes: The nodes for which to embed the content.
         :return: A boolean indicating that content embedding process has started.
         :rtype: bool
         """
+        if not self.backend.connect():
+            raise errors.ConnectionError("Connection to the backend failed")
+
         for i in range(0, len(nodes), 20):
-            batch = nodes[i:i + 20]
-            content = [self.extract_content(node) for node in batch]
-            request = EmbedContentRequest(json=content)
-            self.generate_embeddings(request=request, cache=False)
+            try:
+                batch = nodes[i:i + 20]
+                content = [self.extract_content(node) for node in batch]
+                content_body = {
+                    'resources': content,
+                    'metadata': {
+                        'channel_id': channel_id,
+                    }
+                }
+                request = EmbedContentRequest(json=content_body)
+                self.backend.make_request(request)
+            except Exception as e:
+                logging.exception(e)
 
         return True
 
-    def extract_content(self, node: ContentNode) -> Dict[str, Any]:
+    def extract_content(self, node) -> Dict[str, Any]:
         """
         Extracts the content from the given node.
 
@@ -192,16 +253,29 @@ def extract_content(self, node: ContentNode) -> Dict[str, Any]:
         :rtype: Dict[str, Any]
         """
         contentkind_to_presets = {
-            content_kinds.AUDIO: [format_presets.AUDIO, format_presets.AUDIO_DEPENDENCY],
+            content_kinds.AUDIO: [
+                format_presets.AUDIO,
+                format_presets.AUDIO_DEPENDENCY,
+            ],
             content_kinds.VIDEO: [
-                format_presets.VIDEO_DEPENDENCY,
                 format_presets.VIDEO_HIGH_RES,
                 format_presets.VIDEO_LOW_RES,
                 format_presets.VIDEO_SUBTITLE,
+                format_presets.VIDEO_DEPENDENCY,
+            ],
+            content_kinds.EXERCISE: [
+                format_presets.EXERCISE,
+                format_presets.QTI_ZIP,
+            ],
+            content_kinds.DOCUMENT: [
+                format_presets.DOCUMENT,
+                format_presets.EPUB,
+            ],
+            content_kinds.HTML5: [
+                format_presets.HTML5_ZIP,
+                format_presets.AUDIO_DEPENDENCY,
+                format_presets.VIDEO_DEPENDENCY,
             ],
-            content_kinds.EXERCISE: [format_presets.DOCUMENT, format_presets.EPUB],
-            content_kinds.DOCUMENT: [format_presets.DOCUMENT, format_presets.EPUB],
-            content_kinds.HTML5: [format_presets.HTML5_ZIP],
             content_kinds.H5P: [format_presets.H5P_ZIP],
             content_kinds.ZIM: [format_presets.ZIM],
         }
@@ -210,54 +284,48 @@ def extract_content(self, node: ContentNode) -> Dict[str, Any]:
         presets = contentkind_to_presets.get(contentkind.kind)
         files = self._get_content_files(node, presets) if presets else None
 
-        try:
-            channel = Channel.object.filter(main_tree=node).first()
-            channel_id = channel.id if channel else None
-        except Exception as e:
-            logging.exception(e)
-            channel_id = None
-
         return {
-            "resources": {
-                "id": node.node_id,
-                "title": node.title,
-                "description": node.description,
-                "text": "",
-                "language": node.language.lang_code if node.language else None,
-                "files": files
-            },
-            "metadata": {
-                "channel_id": channel_id,
-            },
+            "id": node.node_id,
+            "title": node.title,
+            "description": node.description,
+            "text": "",
+            "language": node.language.lang_code if node.language else None,
+            "files": files,
         }
 
     def _get_content_files(self, node, presets) -> List[Dict[str, Any]]:
         """
-        Get the content files for the given node and presets. See
-        https://github.com/learningequality/le-utils/blob/main/spec/schema-embed_topics_request.json
-        for the file schema.
+        Get the content files for the given node and presets.
 
         :param node: The node for which to get the content files.
         :param presets: The presets for which to get the content files.
         :return: A list of dictionaries containing the content files.
         :rtype: List[Dict[str, Any]]
         """
+        files = []
         try:
             node_files = File.objects.filter(contentnode=node, preset__in=presets)
+            for file in node_files:
+                files.append(self._format_file_data(file))
         except Exception as e:
             logging.exception(e)
-            node_files = []
 
-        files = []
-        for file in node_files:
-            file_dict = {
-                'url': file.source_url,
-                'preset': file.preset_id,
-                'language': file.language.lang_code if file.language else None
-            }
-            files.append(file_dict)
         return files
 
+    def _format_file_data(self, file) -> Dict[str, Any]:
+        """
+        Format the file data into a dictionary.
+
+        :param file: The file for which to format its data.
+        :return: A dictionary containing the formatted file data.
+        :rtype: Dict[str, Any]
+        """
+        return {
+            'url': file.file_on_disk,
+            'preset': file.preset_id,
+            'language': file.language.lang_code if file.language else None,
+        }
+
 
 class Recommendations(Backend):
 

From 9dbbc5c0fa0b62843e0fb577c478bfe8dda9a3e1 Mon Sep 17 00:00:00 2001
From: Samson Akol <akolson2010@gmail.com>
Date: Fri, 26 Jul 2024 14:56:34 +0300
Subject: [PATCH 09/21] Use values to query dictionary

---
 .../automation/migrations/0001_initial.py     | 32 -------------------
 contentcuration/automation/models.py          |  2 +-
 .../contentcuration/utils/recommendations.py  | 13 ++++----
 3 files changed, 7 insertions(+), 40 deletions(-)
 delete mode 100644 contentcuration/automation/migrations/0001_initial.py

diff --git a/contentcuration/automation/migrations/0001_initial.py b/contentcuration/automation/migrations/0001_initial.py
deleted file mode 100644
index 87ee422398..0000000000
--- a/contentcuration/automation/migrations/0001_initial.py
+++ /dev/null
@@ -1,32 +0,0 @@
-# Generated by Django 3.2.24 on 2024-07-24 13:18
-import uuid
-
-import django.db.models.deletion
-from django.db import migrations
-from django.db import models
-
-
-class Migration(migrations.Migration):
-    initial = True
-
-    dependencies = [
-        ('kolibri_public', '0003_alter_file_preset'),
-    ]
-
-    operations = [
-        migrations.CreateModel(
-            name='RecommendationsCache',
-            fields=[
-                ('id', models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True,
-                                        serialize=False)),
-                ('request_hash', models.CharField(max_length=32, null=True)),
-                ('rank', models.FloatField(default=0.0, null=True)),
-                ('override_threshold', models.BooleanField(default=False)),
-                ('timestamp', models.DateTimeField(auto_now_add=True)),
-                ('response', models.ForeignKey(blank=True, null=True,
-                                               on_delete=django.db.models.deletion.SET_NULL,
-                                               related_name='recommendations',
-                                               to='kolibri_public.contentnode')),
-            ],
-        ),
-    ]
diff --git a/contentcuration/automation/models.py b/contentcuration/automation/models.py
index 39b1819f59..a3d618f462 100644
--- a/contentcuration/automation/models.py
+++ b/contentcuration/automation/models.py
@@ -7,7 +7,7 @@
 class RecommendationsCache(models.Model):
     id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
     request_hash = models.CharField(max_length=32, null=True)
-    response = models.ForeignKey(
+    contentnode_id = models.ForeignKey(
         ContentNode,
         null=True,
         blank=True,
diff --git a/contentcuration/contentcuration/utils/recommendations.py b/contentcuration/contentcuration/utils/recommendations.py
index c0a604911d..49859744c7 100644
--- a/contentcuration/contentcuration/utils/recommendations.py
+++ b/contentcuration/contentcuration/utils/recommendations.py
@@ -99,12 +99,11 @@ def response_exists(self, request) -> Union[EmbeddingsResponse, None]:
         """
         try:
             request_hash = self._generate_request_hash(request)
-            cache = list(
-                RecommendationsCache.objects.filter(request_hash=request_hash).order_by('rank'))
-            data = [{
-                'contentnode_id': entry.response,
-                'rank': entry.rank,
-            } for entry in cache]
+            data = list(
+                RecommendationsCache.objects.filter(request_hash=request_hash)
+                .order_by('rank')
+                .values('contentnode_id', 'rank')
+            )
             if len(data) > 0:
                 return EmbeddingsResponse(data=data)
             else:
@@ -146,7 +145,7 @@ def cache_embeddings_request(self, request: BackendRequest, response: BackendRes
             cache = [
                 RecommendationsCache(
                     request_hash=request_hash,
-                    response=node['contentnode_id'],
+                    contentnode_id=node['contentnode_id'],
                     rank=node['rank'],
                     override_threshold=override_threshold,
                 ) for node in nodes

From c7c598a5d7ce344f991ea25e34bc2024efa8ed64 Mon Sep 17 00:00:00 2001
From: Samson Akol <akolson2010@gmail.com>
Date: Mon, 29 Jul 2024 14:18:50 +0300
Subject: [PATCH 10/21] Improves cache implementation

---
 .../contentcuration/utils/recommendations.py  | 35 +++++++++++++++----
 1 file changed, 28 insertions(+), 7 deletions(-)

diff --git a/contentcuration/contentcuration/utils/recommendations.py b/contentcuration/contentcuration/utils/recommendations.py
index 49859744c7..04fc6b8a3c 100644
--- a/contentcuration/contentcuration/utils/recommendations.py
+++ b/contentcuration/contentcuration/utils/recommendations.py
@@ -119,11 +119,18 @@ def _generate_request_hash(self, request) -> str:
         This method serializes the request attributes that make it unique,
         then generates a hash of this serialization.
 
+        To prevent cache duplication, the hash is generated
+        independent of the override_threshold parameter.
+
         :param request: The request for which to generate a unique hash.
         :return: A unique hash representing the request
         """
+
+        params_copy = request.params.copy() if request.params else {}
+        params_copy.pop('override_threshold', None)
+
         unique_attributes = json.dumps({
-            'params': request.params,
+            'params': params_copy,
             'json': request.json,
         }, sort_keys=True).encode('utf-8')
 
@@ -131,31 +138,45 @@ def _generate_request_hash(self, request) -> str:
 
     def cache_embeddings_request(self, request: BackendRequest, response: BackendResponse) -> bool:
         """
-        Caches the recommendations request and response.
+        Caches the recommendations request and response. It performs a bulk insert of the
+        recommendations into the RecommendationsCache table, ignoring any conflicts.
 
         :param request: The request to cache.
         :param response: The response to cache.
         :return: A boolean indicating whether the caching was successful.
         :rtype: bool
         """
+
         try:
-            request_hash = self._generate_request_hash(request)
             nodes = self._extract_data(response)
-            override_threshold = request.params.get('override_threshold', False)
-            cache = [
+            request_hash = self._generate_request_hash(request)
+            existing_cache = set(RecommendationsCache.objects.filter(request_hash=request_hash)
+                                 .values_list('contentnode_id', flat=True))
+            override_threshold = self._extract_override_threshold(request)
+            new_cache = [
                 RecommendationsCache(
                     request_hash=request_hash,
                     contentnode_id=node['contentnode_id'],
                     rank=node['rank'],
                     override_threshold=override_threshold,
-                ) for node in nodes
+                ) for node in nodes if node['contentnode_id'] not in existing_cache
             ]
-            RecommendationsCache.objects.bulk_create(cache)
+            RecommendationsCache.objects.bulk_create(new_cache, ignore_conflicts=True)
             return True
         except Exception as e:
             logging.exception(e)
             return False
 
+    def _extract_override_threshold(self, request) -> bool:
+        """
+        Extracts the override_threshold parameter from the request safely.
+
+        :param request: The request containing the parameters.
+        :return: The value of the override_threshold parameter, or False if not present.
+        :rtype: bool
+        """
+        return request.params.get('override_threshold', False) if request.params else False
+
     def get_recommendations(self, topic: Dict[str, Any],
                             override_threshold=False) -> RecommendationsResponse:
         """

From a8cca1e5664443e73c94e198a3da266fa5ffa628 Mon Sep 17 00:00:00 2001
From: Samson Akol <akolson2010@gmail.com>
Date: Tue, 30 Jul 2024 15:27:26 +0300
Subject: [PATCH 11/21] Adjusts docstring description

---
 contentcuration/contentcuration/utils/recommendations.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/contentcuration/contentcuration/utils/recommendations.py b/contentcuration/contentcuration/utils/recommendations.py
index 04fc6b8a3c..33e3bcaaf0 100644
--- a/contentcuration/contentcuration/utils/recommendations.py
+++ b/contentcuration/contentcuration/utils/recommendations.py
@@ -114,10 +114,8 @@ def response_exists(self, request) -> Union[EmbeddingsResponse, None]:
 
     def _generate_request_hash(self, request) -> str:
         """
-        Generates a unique hash for a given request.
-
-        This method serializes the request attributes that make it unique,
-        then generates a hash of this serialization.
+        Generates a unique hash for a given request. It serializes the request
+        attributes that make it unique, then generates a hash of this serialization.
 
         To prevent cache duplication, the hash is generated
         independent of the override_threshold parameter.

From 2ebd5ee3433c4258e413f1428c2c205e9496ab03 Mon Sep 17 00:00:00 2001
From: Samson Akol <akolson2010@gmail.com>
Date: Tue, 30 Jul 2024 15:36:56 +0300
Subject: [PATCH 12/21] Adds unique constraint to cache model

---
 contentcuration/automation/models.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/contentcuration/automation/models.py b/contentcuration/automation/models.py
index a3d618f462..8e1bc2496b 100644
--- a/contentcuration/automation/models.py
+++ b/contentcuration/automation/models.py
@@ -17,3 +17,6 @@ class RecommendationsCache(models.Model):
     rank = models.FloatField(default=0.0, null=True)
     override_threshold = models.BooleanField(default=False)
     timestamp = models.DateTimeField(auto_now_add=True)
+
+    class Meta:
+        unique_together = ('request_hash', 'contentnode_id')

From b7af67fcc78036c2b5757bdc31eee0ee7e0c8fc9 Mon Sep 17 00:00:00 2001
From: Samson Akol <akolson2010@gmail.com>
Date: Tue, 30 Jul 2024 16:35:09 +0300
Subject: [PATCH 13/21] Adds indexes for optimized data querying

---
 contentcuration/automation/models.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/contentcuration/automation/models.py b/contentcuration/automation/models.py
index 8e1bc2496b..2d7c5807e6 100644
--- a/contentcuration/automation/models.py
+++ b/contentcuration/automation/models.py
@@ -4,6 +4,11 @@
 from kolibri_public.models import ContentNode
 
 
+REQUEST_HASH_INDEX_NAME = "request_hash_idx"
+CONTENTNODE_ID_INDEX_NAME = "contentnode_id_idx"
+REQUEST_CONTENTNODE_INDEX_NAME = "request_hash_contentnode_id_idx"
+
+
 class RecommendationsCache(models.Model):
     id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
     request_hash = models.CharField(max_length=32, null=True)
@@ -20,3 +25,11 @@ class RecommendationsCache(models.Model):
 
     class Meta:
         unique_together = ('request_hash', 'contentnode_id')
+        indexes = [
+            models.Index(fields=['request_hash'], name=REQUEST_HASH_INDEX_NAME),
+            models.Index(fields=['contentnode_id'], name=CONTENTNODE_ID_INDEX_NAME),
+            models.Index(
+                fields=['request_hash', 'contentnode_id'],
+                name=REQUEST_CONTENTNODE_INDEX_NAME,
+            ),
+        ]

From 50fc1d824bc8945d52bcc7aa16a314df796109a7 Mon Sep 17 00:00:00 2001
From: Samson Akol <akolson2010@gmail.com>
Date: Tue, 30 Jul 2024 16:42:20 +0300
Subject: [PATCH 14/21] Adds migrations

---
 .../automation/migrations/0001_initial.py     | 49 +++++++++++++++++++
 contentcuration/automation/models.py          |  2 +-
 2 files changed, 50 insertions(+), 1 deletion(-)
 create mode 100644 contentcuration/automation/migrations/0001_initial.py

diff --git a/contentcuration/automation/migrations/0001_initial.py b/contentcuration/automation/migrations/0001_initial.py
new file mode 100644
index 0000000000..82ee0c1188
--- /dev/null
+++ b/contentcuration/automation/migrations/0001_initial.py
@@ -0,0 +1,49 @@
+# Generated by Django 3.2.24 on 2024-07-30 13:36
+import uuid
+
+import django.db.models.deletion
+from django.db import migrations
+from django.db import models
+
+
+class Migration(migrations.Migration):
+    initial = True
+
+    dependencies = [
+        ('kolibri_public', '0003_alter_file_preset'),
+    ]
+
+    operations = [
+        migrations.CreateModel(
+            name='RecommendationsCache',
+            fields=[
+                ('id', models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True,
+                                        serialize=False)),
+                ('request_hash', models.CharField(max_length=32, null=True)),
+                ('rank', models.FloatField(default=0.0, null=True)),
+                ('override_threshold', models.BooleanField(default=False)),
+                ('timestamp', models.DateTimeField(auto_now_add=True)),
+                ('contentnode_id', models.ForeignKey(blank=True, null=True,
+                                                     on_delete=django.db.models.deletion.SET_NULL,
+                                                     related_name='recommendations',
+                                                     to='kolibri_public.contentnode')),
+            ],
+        ),
+        migrations.AddIndex(
+            model_name='recommendationscache',
+            index=models.Index(fields=['request_hash'], name='request_hash_idx'),
+        ),
+        migrations.AddIndex(
+            model_name='recommendationscache',
+            index=models.Index(fields=['contentnode_id'], name='contentnode_id_idx'),
+        ),
+        migrations.AddIndex(
+            model_name='recommendationscache',
+            index=models.Index(fields=['request_hash', 'contentnode_id'],
+                               name='request_hash_contentnode_idx'),
+        ),
+        migrations.AlterUniqueTogether(
+            name='recommendationscache',
+            unique_together={('request_hash', 'contentnode_id')},
+        ),
+    ]
diff --git a/contentcuration/automation/models.py b/contentcuration/automation/models.py
index 2d7c5807e6..dc518eeda9 100644
--- a/contentcuration/automation/models.py
+++ b/contentcuration/automation/models.py
@@ -6,7 +6,7 @@
 
 REQUEST_HASH_INDEX_NAME = "request_hash_idx"
 CONTENTNODE_ID_INDEX_NAME = "contentnode_id_idx"
-REQUEST_CONTENTNODE_INDEX_NAME = "request_hash_contentnode_id_idx"
+REQUEST_CONTENTNODE_INDEX_NAME = "request_hash_contentnode_idx"
 
 
 class RecommendationsCache(models.Model):

From 0def99e926facd3aa40a2f8a478b117f1e739369 Mon Sep 17 00:00:00 2001
From: Samson Akol <akolson2010@gmail.com>
Date: Thu, 1 Aug 2024 16:48:37 +0300
Subject: [PATCH 15/21] Writes queries to get main_tree_id

---
 .../automation/migrations/0001_initial.py     | 49 -------------------
 contentcuration/automation/models.py          |  5 --
 .../contentcuration/utils/recommendations.py  | 41 +++++++++++++---
 3 files changed, 35 insertions(+), 60 deletions(-)
 delete mode 100644 contentcuration/automation/migrations/0001_initial.py

diff --git a/contentcuration/automation/migrations/0001_initial.py b/contentcuration/automation/migrations/0001_initial.py
deleted file mode 100644
index 82ee0c1188..0000000000
--- a/contentcuration/automation/migrations/0001_initial.py
+++ /dev/null
@@ -1,49 +0,0 @@
-# Generated by Django 3.2.24 on 2024-07-30 13:36
-import uuid
-
-import django.db.models.deletion
-from django.db import migrations
-from django.db import models
-
-
-class Migration(migrations.Migration):
-    initial = True
-
-    dependencies = [
-        ('kolibri_public', '0003_alter_file_preset'),
-    ]
-
-    operations = [
-        migrations.CreateModel(
-            name='RecommendationsCache',
-            fields=[
-                ('id', models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True,
-                                        serialize=False)),
-                ('request_hash', models.CharField(max_length=32, null=True)),
-                ('rank', models.FloatField(default=0.0, null=True)),
-                ('override_threshold', models.BooleanField(default=False)),
-                ('timestamp', models.DateTimeField(auto_now_add=True)),
-                ('contentnode_id', models.ForeignKey(blank=True, null=True,
-                                                     on_delete=django.db.models.deletion.SET_NULL,
-                                                     related_name='recommendations',
-                                                     to='kolibri_public.contentnode')),
-            ],
-        ),
-        migrations.AddIndex(
-            model_name='recommendationscache',
-            index=models.Index(fields=['request_hash'], name='request_hash_idx'),
-        ),
-        migrations.AddIndex(
-            model_name='recommendationscache',
-            index=models.Index(fields=['contentnode_id'], name='contentnode_id_idx'),
-        ),
-        migrations.AddIndex(
-            model_name='recommendationscache',
-            index=models.Index(fields=['request_hash', 'contentnode_id'],
-                               name='request_hash_contentnode_idx'),
-        ),
-        migrations.AlterUniqueTogether(
-            name='recommendationscache',
-            unique_together={('request_hash', 'contentnode_id')},
-        ),
-    ]
diff --git a/contentcuration/automation/models.py b/contentcuration/automation/models.py
index dc518eeda9..9925efb20f 100644
--- a/contentcuration/automation/models.py
+++ b/contentcuration/automation/models.py
@@ -6,7 +6,6 @@
 
 REQUEST_HASH_INDEX_NAME = "request_hash_idx"
 CONTENTNODE_ID_INDEX_NAME = "contentnode_id_idx"
-REQUEST_CONTENTNODE_INDEX_NAME = "request_hash_contentnode_idx"
 
 
 class RecommendationsCache(models.Model):
@@ -28,8 +27,4 @@ class Meta:
         indexes = [
             models.Index(fields=['request_hash'], name=REQUEST_HASH_INDEX_NAME),
             models.Index(fields=['contentnode_id'], name=CONTENTNODE_ID_INDEX_NAME),
-            models.Index(
-                fields=['request_hash', 'contentnode_id'],
-                name=REQUEST_CONTENTNODE_INDEX_NAME,
-            ),
         ]
diff --git a/contentcuration/contentcuration/utils/recommendations.py b/contentcuration/contentcuration/utils/recommendations.py
index 33e3bcaaf0..7748a7f622 100644
--- a/contentcuration/contentcuration/utils/recommendations.py
+++ b/contentcuration/contentcuration/utils/recommendations.py
@@ -13,10 +13,16 @@
 from automation.utils.appnexus.base import BackendFactory
 from automation.utils.appnexus.base import BackendRequest
 from automation.utils.appnexus.base import BackendResponse
+from django.db.models import F
+from django.db.models import OuterRef
+from django.db.models import Subquery
+from django.db.models import Value
+from django.db.models.functions import Replace
 from kolibri_public.models import ContentNode as PublicContentNode
 from le_utils.constants import content_kinds
 from le_utils.constants import format_presets
 
+from contentcuration.models import Channel
 from contentcuration.models import ContentNode as ContentNode
 from contentcuration.models import File
 
@@ -99,11 +105,11 @@ def response_exists(self, request) -> Union[EmbeddingsResponse, None]:
         """
         try:
             request_hash = self._generate_request_hash(request)
-            data = list(
-                RecommendationsCache.objects.filter(request_hash=request_hash)
-                .order_by('rank')
-                .values('contentnode_id', 'rank')
-            )
+            override_threshold = self._extract_override_threshold(request)
+            data = list(RecommendationsCache.objects
+                        .filter(request_hash=request_hash, override_threshold=override_threshold)
+                        .order_by('override_threshold', 'rank')
+                        .values('contentnode_id', 'rank'))
             if len(data) > 0:
                 return EmbeddingsResponse(data=data)
             else:
@@ -203,7 +209,21 @@ def get_recommendations(self, topic: Dict[str, Any],
         nodes = self._extract_data(response)
         if len(nodes) > 0:
             node_ids = [node['contentnode_id'] for node in nodes]
-            recommendations = list(ContentNode.objects.filter(id__in=node_ids))
+
+            # Get the channel_id from PublicContentNode based on matching node_id from ContentNode
+            channel_id_subquery = PublicContentNode.objects.filter(
+                self._normalize_uuid(F('id')) == self._normalize_uuid(OuterRef('node_id'))
+            ).values('channel_id')[:1]
+
+            # Get main_tree_id from Channel based on channel_id obtained from channel_id_subquery
+            main_tree_id_subquery = Channel.objects.filter(
+                self._normalize_uuid(F('id')) == self._normalize_uuid(Subquery(channel_id_subquery))
+            ).values('main_tree_id')[:1]
+
+            # Annotate main_tree_id onto ContentNode
+            recommendations = ContentNode.objects.filter(id__in=node_ids).annotate(
+                main_tree_id=Subquery(main_tree_id_subquery)
+            ).values('id', 'node_id', 'main_tree_id', 'parent_id')
 
         return RecommendationsResponse(results=recommendations)
 
@@ -228,6 +248,15 @@ def _extract_data(self, response: BackendResponse) -> List[Dict[str, Any]]:
         """
         return response.data if not response.data else []
 
+    def _normalize_uuid(self, field):
+        """
+        Removes hyphens from a UUID field.
+
+        :param field: The field (such as F() object or OuterRef) whose value needs normalization.
+        :return: The normalized field expression without hyphens.
+        """
+        return Replace(field, Value('-'), Value(''))
+
     def embed_content(self, channel_id: str,
                       nodes: List[Union[ContentNode, PublicContentNode]]) -> bool:
         """

From 374da3fe5f181aef2321895b6b18080f1ad49c26 Mon Sep 17 00:00:00 2001
From: Samson Akol <akolson2010@gmail.com>
Date: Thu, 1 Aug 2024 16:53:46 +0300
Subject: [PATCH 16/21] Reruns migrations

---
 .../automation/migrations/0001_initial.py     | 44 +++++++++++++++++++
 1 file changed, 44 insertions(+)
 create mode 100644 contentcuration/automation/migrations/0001_initial.py

diff --git a/contentcuration/automation/migrations/0001_initial.py b/contentcuration/automation/migrations/0001_initial.py
new file mode 100644
index 0000000000..96e54e69c9
--- /dev/null
+++ b/contentcuration/automation/migrations/0001_initial.py
@@ -0,0 +1,44 @@
+# Generated by Django 3.2.24 on 2024-08-01 13:52
+import uuid
+
+import django.db.models.deletion
+from django.db import migrations
+from django.db import models
+
+
+class Migration(migrations.Migration):
+    initial = True
+
+    dependencies = [
+        ('kolibri_public', '0003_alter_file_preset'),
+    ]
+
+    operations = [
+        migrations.CreateModel(
+            name='RecommendationsCache',
+            fields=[
+                ('id', models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True,
+                                        serialize=False)),
+                ('request_hash', models.CharField(max_length=32, null=True)),
+                ('rank', models.FloatField(default=0.0, null=True)),
+                ('override_threshold', models.BooleanField(default=False)),
+                ('timestamp', models.DateTimeField(auto_now_add=True)),
+                ('contentnode_id', models.ForeignKey(blank=True, null=True,
+                                                     on_delete=django.db.models.deletion.SET_NULL,
+                                                     related_name='recommendations',
+                                                     to='kolibri_public.contentnode')),
+            ],
+        ),
+        migrations.AddIndex(
+            model_name='recommendationscache',
+            index=models.Index(fields=['request_hash'], name='request_hash_idx'),
+        ),
+        migrations.AddIndex(
+            model_name='recommendationscache',
+            index=models.Index(fields=['contentnode_id'], name='contentnode_id_idx'),
+        ),
+        migrations.AlterUniqueTogether(
+            name='recommendationscache',
+            unique_together={('request_hash', 'contentnode_id')},
+        ),
+    ]

From d70e97817962551b244314af56b8d9927c486187 Mon Sep 17 00:00:00 2001
From: Samson Akol <akolson2010@gmail.com>
Date: Wed, 7 Aug 2024 17:17:45 +0300
Subject: [PATCH 17/21] adds more tests

---
 .../automation/migrations/0001_initial.py     |  14 +-
 contentcuration/automation/models.py          |  10 +-
 .../tests/test_recommendations_cache_model.py |  64 ++++
 .../tests/utils/test_recommendations.py       | 335 ++++++++++++++----
 .../contentcuration/utils/recommendations.py  |  72 ++--
 5 files changed, 395 insertions(+), 100 deletions(-)
 create mode 100644 contentcuration/automation/tests/test_recommendations_cache_model.py

diff --git a/contentcuration/automation/migrations/0001_initial.py b/contentcuration/automation/migrations/0001_initial.py
index 96e54e69c9..6152c0f4b2 100644
--- a/contentcuration/automation/migrations/0001_initial.py
+++ b/contentcuration/automation/migrations/0001_initial.py
@@ -1,4 +1,4 @@
-# Generated by Django 3.2.24 on 2024-08-01 13:52
+# Generated by Django 3.2.24 on 2024-08-05 21:23
 import uuid
 
 import django.db.models.deletion
@@ -23,10 +23,10 @@ class Migration(migrations.Migration):
                 ('rank', models.FloatField(default=0.0, null=True)),
                 ('override_threshold', models.BooleanField(default=False)),
                 ('timestamp', models.DateTimeField(auto_now_add=True)),
-                ('contentnode_id', models.ForeignKey(blank=True, null=True,
-                                                     on_delete=django.db.models.deletion.SET_NULL,
-                                                     related_name='recommendations',
-                                                     to='kolibri_public.contentnode')),
+                ('contentnode', models.ForeignKey(blank=True, null=True,
+                                                  on_delete=django.db.models.deletion.CASCADE,
+                                                  related_name='recommendations',
+                                                  to='kolibri_public.contentnode')),
             ],
         ),
         migrations.AddIndex(
@@ -35,10 +35,10 @@ class Migration(migrations.Migration):
         ),
         migrations.AddIndex(
             model_name='recommendationscache',
-            index=models.Index(fields=['contentnode_id'], name='contentnode_id_idx'),
+            index=models.Index(fields=['contentnode'], name='contentnode_idx'),
         ),
         migrations.AlterUniqueTogether(
             name='recommendationscache',
-            unique_together={('request_hash', 'contentnode_id')},
+            unique_together={('request_hash', 'contentnode')},
         ),
     ]
diff --git a/contentcuration/automation/models.py b/contentcuration/automation/models.py
index 9925efb20f..7000ddf420 100644
--- a/contentcuration/automation/models.py
+++ b/contentcuration/automation/models.py
@@ -5,26 +5,26 @@
 
 
 REQUEST_HASH_INDEX_NAME = "request_hash_idx"
-CONTENTNODE_ID_INDEX_NAME = "contentnode_id_idx"
+CONTENTNODE_INDEX_NAME = "contentnode_idx"
 
 
 class RecommendationsCache(models.Model):
     id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
     request_hash = models.CharField(max_length=32, null=True)
-    contentnode_id = models.ForeignKey(
+    contentnode = models.ForeignKey(
         ContentNode,
         null=True,
         blank=True,
         related_name='recommendations',
-        on_delete=models.SET_NULL,
+        on_delete=models.CASCADE,
     )
     rank = models.FloatField(default=0.0, null=True)
     override_threshold = models.BooleanField(default=False)
     timestamp = models.DateTimeField(auto_now_add=True)
 
     class Meta:
-        unique_together = ('request_hash', 'contentnode_id')
+        unique_together = ('request_hash', 'contentnode')
         indexes = [
             models.Index(fields=['request_hash'], name=REQUEST_HASH_INDEX_NAME),
-            models.Index(fields=['contentnode_id'], name=CONTENTNODE_ID_INDEX_NAME),
+            models.Index(fields=['contentnode'], name=CONTENTNODE_INDEX_NAME),
         ]
diff --git a/contentcuration/automation/tests/test_recommendations_cache_model.py b/contentcuration/automation/tests/test_recommendations_cache_model.py
new file mode 100644
index 0000000000..60caaca95e
--- /dev/null
+++ b/contentcuration/automation/tests/test_recommendations_cache_model.py
@@ -0,0 +1,64 @@
+import uuid
+
+from automation.models import RecommendationsCache
+from django.db import IntegrityError
+from kolibri_public.models import ContentNode
+
+from contentcuration.tests.base import StudioTestCase
+
+
+class TestRecommendationsCache(StudioTestCase):
+
+    def setUp(self):
+        self.content_node = ContentNode.objects.create(
+            id=uuid.uuid4(),
+            title='Test Content Node',
+            content_id=uuid.uuid4(),
+            channel_id=uuid.uuid4(),
+        )
+        self.cache = RecommendationsCache.objects.create(
+            request_hash='test_hash',
+            contentnode_id=self.content_node,
+            rank=1.0,
+            override_threshold=False
+        )
+
+    def test_cache_creation(self):
+        self.assertIsInstance(self.cache, RecommendationsCache)
+        self.assertEqual(self.cache.request_hash, 'test_hash')
+        self.assertEqual(self.cache.contentnode_id, self.content_node)
+        self.assertEqual(self.cache.rank, 1.0)
+        self.assertFalse(self.cache.override_threshold)
+
+    def test_cache_retrieval(self):
+        retrieved_cache = RecommendationsCache.objects.get(request_hash='test_hash')
+        self.assertEqual(retrieved_cache, self.cache)
+
+    def test_cache_uniqueness(self):
+        with self.assertRaises(IntegrityError):
+            RecommendationsCache.objects.create(
+                request_hash='test_hash',
+                contentnode_id=self.content_node,
+                rank=2.0,
+                override_threshold=True
+            )
+
+    def test_bulk_create_ignore_conflicts_true(self):
+        initial_count = RecommendationsCache.objects.count()
+        try:
+            RecommendationsCache.objects.bulk_create(
+                [self.cache, self.cache],
+                ignore_conflicts=True
+            )
+        except IntegrityError:
+            self.fail("bulk_create raised IntegrityError unexpectedly!")
+
+        final_count = RecommendationsCache.objects.count()
+        self.assertEqual(initial_count, final_count)
+
+    def test_bulk_create_ignore_conflicts_false(self):
+        with self.assertRaises(IntegrityError):
+            RecommendationsCache.objects.bulk_create(
+                [self.cache, self.cache],
+                ignore_conflicts=False
+            )
diff --git a/contentcuration/contentcuration/tests/utils/test_recommendations.py b/contentcuration/contentcuration/tests/utils/test_recommendations.py
index f395005b63..c6e9796d4a 100644
--- a/contentcuration/contentcuration/tests/utils/test_recommendations.py
+++ b/contentcuration/contentcuration/tests/utils/test_recommendations.py
@@ -1,8 +1,16 @@
+import copy
+import uuid
+
+from automation.models import RecommendationsCache
 from automation.utils.appnexus import errors
+from automation.utils.appnexus.base import BackendResponse
 from django.test import TestCase
+from kolibri_public.models import ContentNode as PublicContentNode
 from mock import MagicMock
 from mock import patch
 
+from contentcuration.models import Channel
+from contentcuration.models import ContentKind
 from contentcuration.models import ContentNode
 from contentcuration.utils.recommendations import EmbeddingsResponse
 from contentcuration.utils.recommendations import EmbedTopicsRequest
@@ -19,9 +27,10 @@ def test_backend_initialization(self):
 
 
 class RecommendationsAdapterTestCase(TestCase):
-    def setUp(self):
-        self.adapter = RecommendationsAdapter(MagicMock())
-        self.topic = {
+    @classmethod
+    def setUpTestData(cls):
+        cls.adapter = RecommendationsAdapter(MagicMock())
+        cls.topic = {
             'id': 'topic_id',
             'title': 'topic_title',
             'description': 'topic_description',
@@ -34,101 +43,297 @@ def setUp(self):
                 }
             ]
         }
-        self.channel_id = 'test_channel_id'
-        self.resources = [
-            MagicMock(spec=ContentNode),
+        cls.channel_id = 'test_channel_id'
+        cls.resources = [MagicMock(spec=ContentNode)]
+
+        cls.request = EmbedTopicsRequest(
+            method='POST',
+            url='http://test.com',
+            path='/test/path',
+            params={'override_threshold': False},
+            json=cls.topic
+        )
+        cls.api_response = BackendResponse(data=[
+            {'contentnode_id': '1234567890abcdef1234567890abcdef', 'rank': 0.9},
+            {'contentnode_id': 'abcdef1234567890abcdef1234567890', 'rank': 0.8}
+        ])
+        cls.recommendations_response = [
+            {
+                'id': '1234567890abcdef1234567890abcdef',
+                'node_id': '1234567890abcdef1234567890abcdef',
+                'parent_id': None,
+                'main_tree_id': '4387374055304864a731f3e705d64639'
+            },
+            {
+                'id': 'abcdef1234567890abcdef1234567890',
+                'node_id': 'abcdef1234567890abcdef1234567890',
+                'parent_id': None,
+                'main_tree_id': '0548a548dda8487b8ac2f81145737cfc'
+            }
         ]
 
+        cls.content_kind = ContentKind.objects.create(kind='topic')
+
+        cls.channel_1 = Channel.objects.create(
+            id='ddec09d74e834241a580c480ee37879c',
+            name='Channel 1',
+            main_tree=ContentNode.objects.create(
+                id='4387374055304864a731f3e705d64639',
+                title='Main tree 1',
+                content_id=uuid.uuid4(),
+                node_id='e947222469504e789476cf3ffc5e3801',
+                kind=cls.content_kind
+            )
+        )
+        cls.channel_2 = Channel.objects.create(
+            id='84fcaec1e0514b62899d7f436384c401',
+            name='Channel 2',
+            main_tree=ContentNode.objects.create(
+                id='0548a548dda8487b8ac2f81145737cfc',
+                title='Main tree 2',
+                content_id=uuid.uuid4(),
+                node_id='f7547941d75d4712a53f566b4bf93250',
+                kind=cls.content_kind
+            )
+        )
+
+        cls.public_content_node_1 = PublicContentNode.objects.create(
+            id=uuid.UUID('1234567890abcdef1234567890abcdef'),
+            title='Public Content Node 1',
+            content_id=uuid.uuid4(),
+            channel_id=uuid.UUID('ddec09d74e834241a580c480ee37879c'),
+        )
+        cls.public_content_node_2 = PublicContentNode.objects.create(
+            id=uuid.UUID('abcdef1234567890abcdef1234567890'),
+            title='Public Content Node 2',
+            content_id=uuid.uuid4(),
+            channel_id=uuid.UUID('84fcaec1e0514b62899d7f436384c401'),
+        )
+
+        cls.content_node_1 = ContentNode.objects.create(
+            id='1234567890abcdef1234567890abcdef',
+            title='Content Node 1',
+            content_id=uuid.uuid4(),
+            node_id='1234567890abcdef1234567890abcdef',
+            kind=cls.content_kind
+        )
+        cls.content_node_2 = ContentNode.objects.create(
+            id='abcdef1234567890abcdef1234567890',
+            title='Content Node 2',
+            content_id=uuid.uuid4(),
+            node_id='abcdef1234567890abcdef1234567890',
+            kind=cls.content_kind
+        )
+
+        cls.cache = RecommendationsCache.objects.create(
+            request_hash=cls.adapter._generate_request_hash(cls.request),
+            contentnode=cls.public_content_node_1,
+            rank=1.0,
+            override_threshold=False
+        )
+
+    def assert_backend_call(self, mock_response_exists, response_exists_value, connect_value,
+                            make_request_value, method, *args):
+        mock_response_exists.return_value = response_exists_value
+        self.adapter.backend.connect.return_value = connect_value
+        self.adapter.backend.make_request.return_value = make_request_value
+
+        if response_exists_value:
+            result = method(*args)
+            mock_response_exists.assert_called_once()
+            self.adapter.backend.connect.assert_not_called()
+            self.adapter.backend.make_request.assert_not_called()
+            return result
+        else:
+            if connect_value:
+                result = method(*args)
+                self.adapter.backend.connect.assert_called_once()
+                self.adapter.backend.make_request.assert_called_once()
+                return result
+            else:
+                with self.assertRaises(errors.ConnectionError):
+                    method(*args)
+                self.adapter.backend.connect.assert_called_once()
+                self.adapter.backend.make_request.assert_not_called()
+
     def test_adapter_initialization(self):
         self.assertIsNotNone(self.adapter)
         self.assertIsInstance(self.adapter, RecommendationsAdapter)
 
-    def test_generate_embeddings_connect_failure(self):
+    @patch('contentcuration.utils.recommendations.RecommendationsAdapter.response_exists')
+    def test_generate_embeddings_connect_failure(self, mock_response_exists):
         mock_response = MagicMock(spec=EmbeddingsResponse)
+        self.assert_backend_call(mock_response_exists, None, False, mock_response,
+                                 self.adapter.generate_embeddings, self.request)
 
-        self.adapter.backend.connect.return_value = False
-        self.adapter.backend.make_request.return_value = mock_response
-        with self.assertRaises(errors.ConnectionError):
-            self.adapter.generate_embeddings(EmbedTopicsRequest(
-                method='POST',
-                url='http://test.com',
-                path='/test/path',
-            ))
-        self.adapter.backend.connect.assert_called_once()
-        self.adapter.backend.make_request.assert_not_called()
-
-    def test_generate_embeddings(self):
+    @patch('contentcuration.utils.recommendations.RecommendationsAdapter.response_exists')
+    def test_generate_embeddings(self, mock_response_exists):
         mock_response = MagicMock(spec=EmbeddingsResponse)
         mock_response.error = None
-
-        self.adapter.backend.connect.return_value = True
-        self.adapter.backend.make_request.return_value = mock_response
-        response = self.adapter.generate_embeddings(EmbedTopicsRequest(
-            method='POST',
-            url='http://test.com',
-            path='/test/path',
-        ))
-        self.adapter.backend.connect.assert_called_once()
-        self.adapter.backend.make_request.assert_called_once()
+        response = self.assert_backend_call(mock_response_exists, None, True, mock_response,
+                                            self.adapter.generate_embeddings, self.request)
         self.assertIsInstance(response, EmbeddingsResponse)
 
-    def test_generate_embeddings_failure(self):
+    @patch('contentcuration.utils.recommendations.RecommendationsAdapter.response_exists')
+    def test_generate_embeddings_failure(self, mock_response_exists):
         mock_response = MagicMock(spec=EmbeddingsResponse)
         mock_response.error = {}
+        response = self.assert_backend_call(mock_response_exists, None, True, mock_response,
+                                            self.adapter.generate_embeddings, self.request)
+        self.assertIsInstance(response, EmbeddingsResponse)
+        self.assertIsNotNone(response.error)
+
+    def test_response_exists(self):
+        response = self.adapter.response_exists(self.request)
+        public_content_node_id = str(self.public_content_node_1.id).replace('-', '')
 
-        self.adapter.backend.connect.return_value = True
-        self.adapter.backend.make_request.return_value = mock_response
-        response = self.adapter.generate_embeddings(EmbedTopicsRequest(
+        self.assertIsNotNone(response)
+        self.assertIsInstance(response, EmbeddingsResponse)
+        self.assertEqual(len(response.data), 1)
+        self.assertEqual(response.data[0]['contentnode_id'], public_content_node_id)
+        self.assertEqual(response.data[0]['rank'], 1.0)
+
+    def test_response_does_not_exist(self):
+        new_request = EmbedTopicsRequest(
             method='POST',
             url='http://test.com',
             path='/test/path',
-        ))
-        self.adapter.backend.connect.assert_called_once()
-        self.adapter.backend.make_request.assert_called_once()
-        self.assertIsInstance(response, EmbeddingsResponse)
-        self.assertIsNotNone(response.error)
+            params={'override_threshold': True},
+            json={'topic': 'new_test_topic'}
+        )
+        response = self.adapter.response_exists(new_request)
+        self.assertIsNone(response)
+
+    def cache_request_test_helper(self, request_json, response_data, expected_count):
+        new_request = copy.deepcopy(self.request)
+        new_request.json = request_json
+        response_copy = copy.deepcopy(self.api_response)
+        response_copy.data = response_data
+
+        result = self.adapter.cache_embeddings_request(new_request, response_copy)
+        self.assertTrue(result)
+
+        cached_items = RecommendationsCache.objects.filter(
+            request_hash=self.adapter._generate_request_hash(new_request)
+        )
+        self.assertEqual(cached_items.count(), expected_count)
+
+    def test_cache_embeddings_request_success(self):
+        self.cache_request_test_helper({'topic': 'new_test_topic_1'}, self.api_response.data, 2)
+
+    def test_cache_embeddings_request_empty_data(self):
+        self.cache_request_test_helper({'topic': 'new_test_topic_2'}, [], 0)
 
+    def test_cache_embeddings_request_ignore_duplicates(self):
+        duplicate_data = [
+            {'contentnode_id': '1234567890abcdef1234567890abcdef', 'rank': 0.9},
+            {'contentnode_id': '1234567890abcdef1234567890abcdef', 'rank': 0.9}
+        ]
+        self.cache_request_test_helper({'topic': 'new_test_topic_3'}, duplicate_data, 1)
+
+    @patch('contentcuration.utils.recommendations.RecommendationsAdapter.cache_embeddings_request')
+    @patch('contentcuration.utils.recommendations.RecommendationsAdapter.generate_embeddings')
+    @patch('contentcuration.utils.recommendations.RecommendationsAdapter.response_exists')
     @patch('contentcuration.utils.recommendations.EmbedTopicsRequest')
-    def test_get_recommendations_backend_connect_success(self, get_recommendations_request_mock):
-        mock_response = MagicMock(spec=RecommendationsResponse)
-        mock_response.data = []
+    def test_get_recommendations_success(self, mock_embed_topics_request, mock_response_exists,
+                                         mock_generate_embeddings, mock_cache_embeddings_request):
+        mock_response_exists.return_value = None
+        mock_response = MagicMock(spec=EmbeddingsResponse)
+        mock_response.data = copy.deepcopy(self.api_response.data)
         mock_response.error = None
+        mock_generate_embeddings.return_value = mock_response
 
-        self.adapter.backend.connect.return_value = True
-        self.adapter.backend.make_request.return_value = mock_response
         response = self.adapter.get_recommendations(self.topic)
-        self.adapter.backend.connect.assert_called_once()
-        self.adapter.backend.make_request.assert_called_once()
+
+        mock_response_exists.assert_called_once()
+        mock_generate_embeddings.assert_called_once()
         self.assertIsInstance(response, RecommendationsResponse)
+        self.assertListEqual(list(response.results), self.recommendations_response)
+        self.assertEqual(len(response.results), 2)
 
+    @patch('contentcuration.utils.recommendations.RecommendationsAdapter._extract_data')
+    @patch('contentcuration.utils.recommendations.RecommendationsAdapter.response_exists')
     @patch('contentcuration.utils.recommendations.EmbedTopicsRequest')
-    def test_get_recommendations_backend_connect_failure(self, embed_topics_request_mock):
+    def test_get_recommendations_failure(self, mock_embed_topics_request, mock_response_exists,
+                                         mock_extract_data):
         mock_request_instance = MagicMock(spec=EmbedTopicsRequest)
-        embed_topics_request_mock.return_value = mock_request_instance
+        mock_embed_topics_request.return_value = mock_request_instance
 
-        self.adapter.backend.connect.return_value = False
-        with self.assertRaises(errors.ConnectionError):
-            self.adapter.get_recommendations(self.topic)
-        self.adapter.backend.connect.assert_called_once()
-        self.adapter.backend.make_request.assert_not_called()
+        self.assert_backend_call(mock_response_exists, None, False, None,
+                                 self.adapter.get_recommendations, self.topic)
 
+    @patch('contentcuration.utils.recommendations.RecommendationsAdapter._extract_data')
+    @patch('contentcuration.utils.recommendations.RecommendationsAdapter.response_exists')
     @patch('contentcuration.utils.recommendations.EmbedContentRequest')
-    def test_embed_content_backend_connect_success(self, embed_content_request_mock):
+    def test_embed_content_success(self, mock_embed_topics_request, mock_response_exists,
+                                   mock_extract_data):
         mock_response = MagicMock(spec=EmbeddingsResponse)
         mock_response.error = None
-
-        self.adapter.backend.connect.return_value = True
-        self.adapter.backend.make_request.return_value = mock_response
-        response = self.adapter.embed_content(self.channel_id, self.resources)
-        self.adapter.backend.connect.assert_called_once()
-        self.adapter.backend.make_request.assert_called_once()
+        response = self.assert_backend_call(mock_response_exists, None, True, mock_response,
+                                            self.adapter.embed_content, self.channel_id,
+                                            self.resources)
         self.assertIsInstance(response, bool)
         self.assertTrue(response)
 
+    @patch('contentcuration.utils.recommendations.RecommendationsAdapter.response_exists')
     @patch('contentcuration.utils.recommendations.EmbedContentRequest')
-    def test_embed_content_backend_connect_failure(self, embed_content_request_mock):
-        self.adapter.backend.connect.return_value = False
-        with self.assertRaises(errors.ConnectionError):
-            self.adapter.embed_content(self.channel_id, self.resources)
-        self.adapter.backend.connect.assert_called_once()
-        self.adapter.backend.make_request.assert_not_called()
+    def test_embed_content_failure(self, mock_embed_topics_request, mock_response_exists):
+        response = self.assert_backend_call(mock_response_exists, None, False,
+                                            None, self.adapter.embed_content,
+                                            self.channel_id,
+                                            self.resources)
+
+        self.assertIsNone(response)
+
+    def extract_content_test_helper(self, mock_node, file_return_value, expected_result):
+        with patch('contentcuration.utils.recommendations.File.objects.filter',
+                   return_value=file_return_value):
+            result = self.adapter.extract_content(mock_node)
+            self.assertEqual(result, expected_result)
+
+    def test_extract_content(self):
+        mock_node = MagicMock(spec=ContentNode)
+        mock_node.node_id = '1234567890abcdef1234567890abcdef'
+        mock_node.title = 'Sample Title'
+        mock_node.description = 'Sample Description'
+        mock_node.language.lang_code = 'en'
+        mock_node.kind.kind = 'video'
+
+        mock_file_instance = MagicMock()
+        mock_file_instance.file_on_disk = 'path/to/file.mp4'
+        mock_file_instance.preset_id = 'video_high_res'
+        mock_file_instance.language.lang_code = 'en'
+
+        expected_result = {
+            "id": '1234567890abcdef1234567890abcdef',
+            "title": 'Sample Title',
+            "description": 'Sample Description',
+            "text": "",
+            "language": 'en',
+            "files": [
+                {
+                    'url': 'path/to/file.mp4',
+                    'preset': 'video_high_res',
+                    'language': 'en',
+                }
+            ],
+        }
+        self.extract_content_test_helper(mock_node, [mock_file_instance], expected_result)
+
+    def test_extract_content_no_files(self):
+        mock_node = MagicMock(spec=ContentNode)
+        mock_node.node_id = '1234567890abcdef1234567890abcdef'
+        mock_node.title = 'Sample Title'
+        mock_node.description = 'Sample Description'
+        mock_node.language.lang_code = 'en'
+        mock_node.kind.kind = 'video'
+
+        expected_result = {
+            "id": '1234567890abcdef1234567890abcdef',
+            "title": 'Sample Title',
+            "description": 'Sample Description',
+            "text": "",
+            "language": 'en',
+            "files": [],
+        }
+        self.extract_content_test_helper(mock_node, [], expected_result)
diff --git a/contentcuration/contentcuration/utils/recommendations.py b/contentcuration/contentcuration/utils/recommendations.py
index 7748a7f622..2ea5f7b611 100644
--- a/contentcuration/contentcuration/utils/recommendations.py
+++ b/contentcuration/contentcuration/utils/recommendations.py
@@ -16,8 +16,8 @@
 from django.db.models import F
 from django.db.models import OuterRef
 from django.db.models import Subquery
-from django.db.models import Value
-from django.db.models.functions import Replace
+from django.db.models import UUIDField
+from django.db.models.functions import Cast
 from kolibri_public.models import ContentNode as PublicContentNode
 from le_utils.constants import content_kinds
 from le_utils.constants import format_presets
@@ -153,9 +153,8 @@ def cache_embeddings_request(self, request: BackendRequest, response: BackendRes
 
         try:
             nodes = self._extract_data(response)
+            valid_nodes = self._validate_nodes(nodes)
             request_hash = self._generate_request_hash(request)
-            existing_cache = set(RecommendationsCache.objects.filter(request_hash=request_hash)
-                                 .values_list('contentnode_id', flat=True))
             override_threshold = self._extract_override_threshold(request)
             new_cache = [
                 RecommendationsCache(
@@ -163,7 +162,7 @@ def cache_embeddings_request(self, request: BackendRequest, response: BackendRes
                     contentnode_id=node['contentnode_id'],
                     rank=node['rank'],
                     override_threshold=override_threshold,
-                ) for node in nodes if node['contentnode_id'] not in existing_cache
+                ) for node in valid_nodes
             ]
             RecommendationsCache.objects.bulk_create(new_cache, ignore_conflicts=True)
             return True
@@ -172,7 +171,7 @@ def cache_embeddings_request(self, request: BackendRequest, response: BackendRes
             return False
 
     def _extract_override_threshold(self, request) -> bool:
-        """
+        """\
         Extracts the override_threshold parameter from the request safely.
 
         :param request: The request containing the parameters.
@@ -208,21 +207,25 @@ def get_recommendations(self, topic: Dict[str, Any],
 
         nodes = self._extract_data(response)
         if len(nodes) > 0:
-            node_ids = [node['contentnode_id'] for node in nodes]
+            node_ids = self._extract_node_ids(nodes)
 
-            # Get the channel_id from PublicContentNode based on matching node_id from ContentNode
-            channel_id_subquery = PublicContentNode.objects.filter(
-                self._normalize_uuid(F('id')) == self._normalize_uuid(OuterRef('node_id'))
-            ).values('channel_id')[:1]
+            # Get Channel.main_tree_id using the PublicContentNode.channel_id
+            channel_subquery = Channel.objects.annotate(
+                channel_id=self._cast_to_uuid(F('id'))
+            ).filter(
+                channel_id=OuterRef('channel_id')
+            ).values('main_tree_id')[:1]
 
-            # Get main_tree_id from Channel based on channel_id obtained from channel_id_subquery
-            main_tree_id_subquery = Channel.objects.filter(
-                self._normalize_uuid(F('id')) == self._normalize_uuid(Subquery(channel_id_subquery))
+            # Get the PublicContentNode.channel_id based on ContentNode.node_id
+            public_contentnode_subquery = PublicContentNode.objects.filter(
+                id=self._cast_to_uuid(OuterRef('node_id'))
+            ).annotate(
+                main_tree_id=Subquery(channel_subquery)
             ).values('main_tree_id')[:1]
 
-            # Annotate main_tree_id onto ContentNode
-            recommendations = ContentNode.objects.filter(id__in=node_ids).annotate(
-                main_tree_id=Subquery(main_tree_id_subquery)
+            # Annotate `main_tree_id` onto ContentNode
+            recommendations = ContentNode.objects.filter(node_id__in=node_ids).annotate(
+                main_tree_id=Subquery(public_contentnode_subquery),
             ).values('id', 'node_id', 'main_tree_id', 'parent_id')
 
         return RecommendationsResponse(results=recommendations)
@@ -246,16 +249,39 @@ def _extract_data(self, response: BackendResponse) -> List[Dict[str, Any]]:
         :return: The extracted data.
         :rtype: List[Dict[str, Any]]
         """
-        return response.data if not response.data else []
+        return response.data if response.data else []
+
+    def _validate_nodes(self, nodes: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+        """
+        Validates the recommended nodes by checking if they exist in the database.
+
+        :param nodes: The nodes to validate.
+        :return: A list of valid recommended nodes that exist in the database.
+        :rtype: List[Dict[str, Any]]
+        """
+        node_ids = self._extract_node_ids(nodes)
+        existing_node_ids = set(
+            PublicContentNode.objects.filter(id__in=node_ids).values_list('id', flat=True))
+        return [node for node in nodes if node['contentnode_id'] in existing_node_ids]
+
+    def _extract_node_ids(self, nodes: List[Dict[str, Any]]) -> List[str]:
+        """
+        Extracts the node IDs from the given nodes.
+
+        :param nodes: The nodes from which to extract the node IDs.
+        :return: A list of node IDs.
+        :rtype: List[str]
+        """
+        return [node['contentnode_id'] for node in nodes]
 
-    def _normalize_uuid(self, field):
+    def _cast_to_uuid(self, field):
         """
-        Removes hyphens from a UUID field.
+        Casts the given field to a UUIDField.
 
-        :param field: The field (such as F() object or OuterRef) whose value needs normalization.
-        :return: The normalized field expression without hyphens.
+        :param field: The field (such as F() object or OuterRef) to cast.
+        :return: The field cast to a UUIDField.
         """
-        return Replace(field, Value('-'), Value(''))
+        return Cast(field, output_field=UUIDField())
 
     def embed_content(self, channel_id: str,
                       nodes: List[Union[ContentNode, PublicContentNode]]) -> bool:

From bd5c01896f2ae60bb14ccfc437e6356269cf25c7 Mon Sep 17 00:00:00 2001
From: Samson Akol <akolson2010@gmail.com>
Date: Wed, 7 Aug 2024 17:24:15 +0300
Subject: [PATCH 18/21] Fixes failing tests

---
 .../automation/tests/test_recommendations_cache_model.py    | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/contentcuration/automation/tests/test_recommendations_cache_model.py b/contentcuration/automation/tests/test_recommendations_cache_model.py
index 60caaca95e..9cb2e96c9a 100644
--- a/contentcuration/automation/tests/test_recommendations_cache_model.py
+++ b/contentcuration/automation/tests/test_recommendations_cache_model.py
@@ -18,7 +18,7 @@ def setUp(self):
         )
         self.cache = RecommendationsCache.objects.create(
             request_hash='test_hash',
-            contentnode_id=self.content_node,
+            contentnode=self.content_node,
             rank=1.0,
             override_threshold=False
         )
@@ -26,7 +26,7 @@ def setUp(self):
     def test_cache_creation(self):
         self.assertIsInstance(self.cache, RecommendationsCache)
         self.assertEqual(self.cache.request_hash, 'test_hash')
-        self.assertEqual(self.cache.contentnode_id, self.content_node)
+        self.assertEqual(self.cache.contentnode, self.content_node)
         self.assertEqual(self.cache.rank, 1.0)
         self.assertFalse(self.cache.override_threshold)
 
@@ -38,7 +38,7 @@ def test_cache_uniqueness(self):
         with self.assertRaises(IntegrityError):
             RecommendationsCache.objects.create(
                 request_hash='test_hash',
-                contentnode_id=self.content_node,
+                contentnode=self.content_node,
                 rank=2.0,
                 override_threshold=True
             )

From fa5253b481bbe298f23de4bf0fc389c1e3873df4 Mon Sep 17 00:00:00 2001
From: Samson Akol <akolson2010@gmail.com>
Date: Wed, 7 Aug 2024 17:47:18 +0300
Subject: [PATCH 19/21] adds more tests

---
 .../contentcuration/tests/utils/test_recommendations.py     | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/contentcuration/contentcuration/tests/utils/test_recommendations.py b/contentcuration/contentcuration/tests/utils/test_recommendations.py
index c6e9796d4a..f63ee53668 100644
--- a/contentcuration/contentcuration/tests/utils/test_recommendations.py
+++ b/contentcuration/contentcuration/tests/utils/test_recommendations.py
@@ -231,6 +231,12 @@ def test_cache_embeddings_request_ignore_duplicates(self):
         ]
         self.cache_request_test_helper({'topic': 'new_test_topic_3'}, duplicate_data, 1)
 
+    def test_cache_embeddings_request_invalid_data(self):
+        invalid_data = [
+            {'contentnode_id': '1234567890abcdef1234567890abcdee', 'rank': 0.6}
+        ]
+        self.cache_request_test_helper({'topic': 'new_test_topic_4'}, invalid_data, 0)
+
     @patch('contentcuration.utils.recommendations.RecommendationsAdapter.cache_embeddings_request')
     @patch('contentcuration.utils.recommendations.RecommendationsAdapter.generate_embeddings')
     @patch('contentcuration.utils.recommendations.RecommendationsAdapter.response_exists')

From cb2f47cf8fdb100cd3057f1cc29ad51f93685bb2 Mon Sep 17 00:00:00 2001
From: Samson Akol <akolson2010@gmail.com>
Date: Thu, 8 Aug 2024 18:25:10 +0300
Subject: [PATCH 20/21] cte implementation initial commit

---
 .../tests/utils/test_recommendations.py       | 24 ++++-----
 .../contentcuration/utils/recommendations.py  | 54 ++++++++++++-------
 2 files changed, 46 insertions(+), 32 deletions(-)

diff --git a/contentcuration/contentcuration/tests/utils/test_recommendations.py b/contentcuration/contentcuration/tests/utils/test_recommendations.py
index f63ee53668..8cb64e5929 100644
--- a/contentcuration/contentcuration/tests/utils/test_recommendations.py
+++ b/contentcuration/contentcuration/tests/utils/test_recommendations.py
@@ -54,8 +54,8 @@ def setUpTestData(cls):
             json=cls.topic
         )
         cls.api_response = BackendResponse(data=[
-            {'contentnode_id': '1234567890abcdef1234567890abcdef', 'rank': 0.9},
-            {'contentnode_id': 'abcdef1234567890abcdef1234567890', 'rank': 0.8}
+            {'contentnode_id': 'f0ab32ce7dee4ee8a2d8e3dc2cf8a4a3', 'rank': 0.9},
+            {'contentnode_id': '98bd9283c7d24e02b338d7f52eabf9f6', 'rank': 0.8}
         ])
         cls.recommendations_response = [
             {
@@ -82,7 +82,7 @@ def setUpTestData(cls):
                 title='Main tree 1',
                 content_id=uuid.uuid4(),
                 node_id='e947222469504e789476cf3ffc5e3801',
-                kind=cls.content_kind
+                kind=cls.content_kind,
             )
         )
         cls.channel_2 = Channel.objects.create(
@@ -93,36 +93,36 @@ def setUpTestData(cls):
                 title='Main tree 2',
                 content_id=uuid.uuid4(),
                 node_id='f7547941d75d4712a53f566b4bf93250',
-                kind=cls.content_kind
+                kind=cls.content_kind,
             )
         )
 
         cls.public_content_node_1 = PublicContentNode.objects.create(
-            id=uuid.UUID('1234567890abcdef1234567890abcdef'),
+            id=uuid.UUID('f0ab32ce7dee4ee8a2d8e3dc2cf8a4a3'),
             title='Public Content Node 1',
             content_id=uuid.uuid4(),
             channel_id=uuid.UUID('ddec09d74e834241a580c480ee37879c'),
         )
         cls.public_content_node_2 = PublicContentNode.objects.create(
-            id=uuid.UUID('abcdef1234567890abcdef1234567890'),
+            id=uuid.UUID('98bd9283c7d24e02b338d7f52eabf9f6'),
             title='Public Content Node 2',
             content_id=uuid.uuid4(),
             channel_id=uuid.UUID('84fcaec1e0514b62899d7f436384c401'),
         )
 
         cls.content_node_1 = ContentNode.objects.create(
-            id='1234567890abcdef1234567890abcdef',
+            id='3c4d5847dd2f45568ce9ecd53e843a76',
             title='Content Node 1',
             content_id=uuid.uuid4(),
-            node_id='1234567890abcdef1234567890abcdef',
-            kind=cls.content_kind
+            node_id='f0ab32ce7dee4ee8a2d8e3dc2cf8a4a3',
+            kind=cls.content_kind,
         )
         cls.content_node_2 = ContentNode.objects.create(
-            id='abcdef1234567890abcdef1234567890',
+            id='b41b9289c1e84ea4869a8fbbf85b9a15',
             title='Content Node 2',
             content_id=uuid.uuid4(),
-            node_id='abcdef1234567890abcdef1234567890',
-            kind=cls.content_kind
+            node_id='98bd9283c7d24e02b338d7f52eabf9f6',
+            kind=cls.content_kind,
         )
 
         cls.cache = RecommendationsCache.objects.create(
diff --git a/contentcuration/contentcuration/utils/recommendations.py b/contentcuration/contentcuration/utils/recommendations.py
index 2ea5f7b611..be2daae66d 100644
--- a/contentcuration/contentcuration/utils/recommendations.py
+++ b/contentcuration/contentcuration/utils/recommendations.py
@@ -1,6 +1,7 @@
 import hashlib
 import json
 import logging
+import uuid
 from typing import Any
 from typing import Dict
 from typing import List
@@ -13,11 +14,12 @@
 from automation.utils.appnexus.base import BackendFactory
 from automation.utils.appnexus.base import BackendRequest
 from automation.utils.appnexus.base import BackendResponse
+from django.db.models import Exists
 from django.db.models import F
 from django.db.models import OuterRef
-from django.db.models import Subquery
 from django.db.models import UUIDField
 from django.db.models.functions import Cast
+from django_cte import With
 from kolibri_public.models import ContentNode as PublicContentNode
 from le_utils.constants import content_kinds
 from le_utils.constants import format_presets
@@ -208,25 +210,37 @@ def get_recommendations(self, topic: Dict[str, Any],
         nodes = self._extract_data(response)
         if len(nodes) > 0:
             node_ids = self._extract_node_ids(nodes)
-
-            # Get Channel.main_tree_id using the PublicContentNode.channel_id
-            channel_subquery = Channel.objects.annotate(
-                channel_id=self._cast_to_uuid(F('id'))
-            ).filter(
-                channel_id=OuterRef('channel_id')
-            ).values('main_tree_id')[:1]
-
-            # Get the PublicContentNode.channel_id based on ContentNode.node_id
-            public_contentnode_subquery = PublicContentNode.objects.filter(
-                id=self._cast_to_uuid(OuterRef('node_id'))
-            ).annotate(
-                main_tree_id=Subquery(channel_subquery)
-            ).values('main_tree_id')[:1]
-
-            # Annotate `main_tree_id` onto ContentNode
-            recommendations = ContentNode.objects.filter(node_id__in=node_ids).annotate(
-                main_tree_id=Subquery(public_contentnode_subquery),
-            ).values('id', 'node_id', 'main_tree_id', 'parent_id')
+            cast_node_ids = [uuid.UUID(node_id) for node_id in node_ids]
+
+            channel_cte = With(
+                Channel.objects.annotate(
+                    channel_id=self._cast_to_uuid(F('id'))
+                ).filter(
+                    Exists(
+                        PublicContentNode.objects.filter(
+                            id__in=cast_node_ids,
+                            channel_id=OuterRef('channel_id')
+                        )
+                    )
+                ).values(
+                    'main_tree_id',
+                    tree_id=F('main_tree__tree_id'),
+                ).distinct()
+            )
+            print(list(Channel.objects.all()))
+
+            recommendations = channel_cte.join(
+                ContentNode.objects.filter(node_id__in=node_ids),
+                tree_id=channel_cte.col.tree_id
+            ).with_cte(channel_cte) .annotate(
+                main_tree_id=channel_cte.col.main_tree_id
+            ).values(
+                'id',
+                'node_id',
+                'main_tree_id',
+                'parent_id',
+            )
+            print(list(recommendations))
 
         return RecommendationsResponse(results=recommendations)
 

From 902875eb55228831e87a3dbe4f734bce64490479 Mon Sep 17 00:00:00 2001
From: Samson Akol <akolson2010@gmail.com>
Date: Sat, 10 Aug 2024 02:01:11 +0300
Subject: [PATCH 21/21] Implements CTE to get recommendations

---
 .../tests/utils/test_recommendations.py       | 96 +++++++------------
 .../contentcuration/utils/recommendations.py  |  4 +-
 2 files changed, 34 insertions(+), 66 deletions(-)

diff --git a/contentcuration/contentcuration/tests/utils/test_recommendations.py b/contentcuration/contentcuration/tests/utils/test_recommendations.py
index 8cb64e5929..92c730cded 100644
--- a/contentcuration/contentcuration/tests/utils/test_recommendations.py
+++ b/contentcuration/contentcuration/tests/utils/test_recommendations.py
@@ -9,9 +9,9 @@
 from mock import MagicMock
 from mock import patch
 
-from contentcuration.models import Channel
-from contentcuration.models import ContentKind
 from contentcuration.models import ContentNode
+from contentcuration.tests import testdata
+from contentcuration.tests.base import StudioTestCase
 from contentcuration.utils.recommendations import EmbeddingsResponse
 from contentcuration.utils.recommendations import EmbedTopicsRequest
 from contentcuration.utils.recommendations import Recommendations
@@ -26,7 +26,8 @@ def test_backend_initialization(self):
         self.assertIsInstance(recommendations, Recommendations)
 
 
-class RecommendationsAdapterTestCase(TestCase):
+class RecommendationsAdapterTestCase(StudioTestCase):
+
     @classmethod
     def setUpTestData(cls):
         cls.adapter = RecommendationsAdapter(MagicMock())
@@ -54,77 +55,25 @@ def setUpTestData(cls):
             json=cls.topic
         )
         cls.api_response = BackendResponse(data=[
-            {'contentnode_id': 'f0ab32ce7dee4ee8a2d8e3dc2cf8a4a3', 'rank': 0.9},
-            {'contentnode_id': '98bd9283c7d24e02b338d7f52eabf9f6', 'rank': 0.8}
+            {'contentnode_id': '1234567890abcdef1234567890abcdef', 'rank': 0.9},
+            {'contentnode_id': 'abcdef1234567890abcdef1234567890', 'rank': 0.8},
+            {'contentnode_id': '00000000000000000000000000000003', 'rank': 0.8},
+            {'contentnode_id': '00000000000000000000000000000005', 'rank': 0.8},
         ])
-        cls.recommendations_response = [
-            {
-                'id': '1234567890abcdef1234567890abcdef',
-                'node_id': '1234567890abcdef1234567890abcdef',
-                'parent_id': None,
-                'main_tree_id': '4387374055304864a731f3e705d64639'
-            },
-            {
-                'id': 'abcdef1234567890abcdef1234567890',
-                'node_id': 'abcdef1234567890abcdef1234567890',
-                'parent_id': None,
-                'main_tree_id': '0548a548dda8487b8ac2f81145737cfc'
-            }
-        ]
-
-        cls.content_kind = ContentKind.objects.create(kind='topic')
-
-        cls.channel_1 = Channel.objects.create(
-            id='ddec09d74e834241a580c480ee37879c',
-            name='Channel 1',
-            main_tree=ContentNode.objects.create(
-                id='4387374055304864a731f3e705d64639',
-                title='Main tree 1',
-                content_id=uuid.uuid4(),
-                node_id='e947222469504e789476cf3ffc5e3801',
-                kind=cls.content_kind,
-            )
-        )
-        cls.channel_2 = Channel.objects.create(
-            id='84fcaec1e0514b62899d7f436384c401',
-            name='Channel 2',
-            main_tree=ContentNode.objects.create(
-                id='0548a548dda8487b8ac2f81145737cfc',
-                title='Main tree 2',
-                content_id=uuid.uuid4(),
-                node_id='f7547941d75d4712a53f566b4bf93250',
-                kind=cls.content_kind,
-            )
-        )
 
         cls.public_content_node_1 = PublicContentNode.objects.create(
-            id=uuid.UUID('f0ab32ce7dee4ee8a2d8e3dc2cf8a4a3'),
+            id=uuid.UUID('1234567890abcdef1234567890abcdef'),
             title='Public Content Node 1',
             content_id=uuid.uuid4(),
             channel_id=uuid.UUID('ddec09d74e834241a580c480ee37879c'),
         )
         cls.public_content_node_2 = PublicContentNode.objects.create(
-            id=uuid.UUID('98bd9283c7d24e02b338d7f52eabf9f6'),
+            id=uuid.UUID('abcdef1234567890abcdef1234567890'),
             title='Public Content Node 2',
             content_id=uuid.uuid4(),
             channel_id=uuid.UUID('84fcaec1e0514b62899d7f436384c401'),
         )
 
-        cls.content_node_1 = ContentNode.objects.create(
-            id='3c4d5847dd2f45568ce9ecd53e843a76',
-            title='Content Node 1',
-            content_id=uuid.uuid4(),
-            node_id='f0ab32ce7dee4ee8a2d8e3dc2cf8a4a3',
-            kind=cls.content_kind,
-        )
-        cls.content_node_2 = ContentNode.objects.create(
-            id='b41b9289c1e84ea4869a8fbbf85b9a15',
-            title='Content Node 2',
-            content_id=uuid.uuid4(),
-            node_id='98bd9283c7d24e02b338d7f52eabf9f6',
-            kind=cls.content_kind,
-        )
-
         cls.cache = RecommendationsCache.objects.create(
             request_hash=cls.adapter._generate_request_hash(cls.request),
             contentnode=cls.public_content_node_1,
@@ -216,6 +165,7 @@ def cache_request_test_helper(self, request_json, response_data, expected_count)
         cached_items = RecommendationsCache.objects.filter(
             request_hash=self.adapter._generate_request_hash(new_request)
         )
+        print(list(cached_items.values('request_hash', 'contentnode', 'rank', 'override_threshold')))
         self.assertEqual(cached_items.count(), expected_count)
 
     def test_cache_embeddings_request_success(self):
@@ -243,6 +193,23 @@ def test_cache_embeddings_request_invalid_data(self):
     @patch('contentcuration.utils.recommendations.EmbedTopicsRequest')
     def test_get_recommendations_success(self, mock_embed_topics_request, mock_response_exists,
                                          mock_generate_embeddings, mock_cache_embeddings_request):
+        channel = testdata.channel('Public Channel')
+        channel.public = True
+        channel.save()
+
+        public_node_1 = PublicContentNode.objects.create(
+            id=uuid.UUID('00000000000000000000000000000003'),
+            title='Video 1',
+            content_id=uuid.uuid4(),
+            channel_id=uuid.UUID(channel.id),
+        )
+        public_node_2 = PublicContentNode.objects.create(
+            id=uuid.UUID('00000000000000000000000000000005'),
+            title='Exercise 1',
+            content_id=uuid.uuid4(),
+            channel_id=uuid.UUID(channel.id),
+        )
+
         mock_response_exists.return_value = None
         mock_response = MagicMock(spec=EmbeddingsResponse)
         mock_response.data = copy.deepcopy(self.api_response.data)
@@ -250,12 +217,15 @@ def test_get_recommendations_success(self, mock_embed_topics_request, mock_respo
         mock_generate_embeddings.return_value = mock_response
 
         response = self.adapter.get_recommendations(self.topic)
+        results = list(response.results)
+        expected_node_ids = [public_node_1.id.hex, public_node_2.id.hex]
+        actual_node_ids = [result["node_id"] for result in results]
 
         mock_response_exists.assert_called_once()
         mock_generate_embeddings.assert_called_once()
         self.assertIsInstance(response, RecommendationsResponse)
-        self.assertListEqual(list(response.results), self.recommendations_response)
-        self.assertEqual(len(response.results), 2)
+        self.assertListEqual(expected_node_ids, actual_node_ids)
+        self.assertEqual(len(results), 2)
 
     @patch('contentcuration.utils.recommendations.RecommendationsAdapter._extract_data')
     @patch('contentcuration.utils.recommendations.RecommendationsAdapter.response_exists')
diff --git a/contentcuration/contentcuration/utils/recommendations.py b/contentcuration/contentcuration/utils/recommendations.py
index be2daae66d..20c6cb05f2 100644
--- a/contentcuration/contentcuration/utils/recommendations.py
+++ b/contentcuration/contentcuration/utils/recommendations.py
@@ -227,12 +227,11 @@ def get_recommendations(self, topic: Dict[str, Any],
                     tree_id=F('main_tree__tree_id'),
                 ).distinct()
             )
-            print(list(Channel.objects.all()))
 
             recommendations = channel_cte.join(
                 ContentNode.objects.filter(node_id__in=node_ids),
                 tree_id=channel_cte.col.tree_id
-            ).with_cte(channel_cte) .annotate(
+            ).with_cte(channel_cte).annotate(
                 main_tree_id=channel_cte.col.main_tree_id
             ).values(
                 'id',
@@ -240,7 +239,6 @@ def get_recommendations(self, topic: Dict[str, Any],
                 'main_tree_id',
                 'parent_id',
             )
-            print(list(recommendations))
 
         return RecommendationsResponse(results=recommendations)