Skip to content

Commit

Permalink
Merge pull request #4604 from akolson/embed-content-logic
Browse files Browse the repository at this point in the history
Extract content urls
  • Loading branch information
akolson authored Aug 12, 2024
2 parents b96d249 + 902875e commit 3d2f924
Show file tree
Hide file tree
Showing 10 changed files with 843 additions and 255 deletions.
44 changes: 44 additions & 0 deletions contentcuration/automation/migrations/0001_initial.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# Generated by Django 3.2.24 on 2024-08-05 21:23
import uuid

import django.db.models.deletion
from django.db import migrations
from django.db import models


class Migration(migrations.Migration):
initial = True

dependencies = [
('kolibri_public', '0003_alter_file_preset'),
]

operations = [
migrations.CreateModel(
name='RecommendationsCache',
fields=[
('id', models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True,
serialize=False)),
('request_hash', models.CharField(max_length=32, null=True)),
('rank', models.FloatField(default=0.0, null=True)),
('override_threshold', models.BooleanField(default=False)),
('timestamp', models.DateTimeField(auto_now_add=True)),
('contentnode', models.ForeignKey(blank=True, null=True,
on_delete=django.db.models.deletion.CASCADE,
related_name='recommendations',
to='kolibri_public.contentnode')),
],
),
migrations.AddIndex(
model_name='recommendationscache',
index=models.Index(fields=['request_hash'], name='request_hash_idx'),
),
migrations.AddIndex(
model_name='recommendationscache',
index=models.Index(fields=['contentnode'], name='contentnode_idx'),
),
migrations.AlterUniqueTogether(
name='recommendationscache',
unique_together={('request_hash', 'contentnode')},
),
]
31 changes: 29 additions & 2 deletions contentcuration/automation/models.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,30 @@
# from django.db import models
import uuid

# Create your models here.
from django.db import models
from kolibri_public.models import ContentNode


REQUEST_HASH_INDEX_NAME = "request_hash_idx"
CONTENTNODE_INDEX_NAME = "contentnode_idx"


class RecommendationsCache(models.Model):
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
request_hash = models.CharField(max_length=32, null=True)
contentnode = models.ForeignKey(
ContentNode,
null=True,
blank=True,
related_name='recommendations',
on_delete=models.CASCADE,
)
rank = models.FloatField(default=0.0, null=True)
override_threshold = models.BooleanField(default=False)
timestamp = models.DateTimeField(auto_now_add=True)

class Meta:
unique_together = ('request_hash', 'contentnode')
indexes = [
models.Index(fields=['request_hash'], name=REQUEST_HASH_INDEX_NAME),
models.Index(fields=['contentnode'], name=CONTENTNODE_INDEX_NAME),
]
121 changes: 50 additions & 71 deletions contentcuration/automation/tests/appnexus/test_base.py
Original file line number Diff line number Diff line change
@@ -1,69 +1,26 @@
import time
import pytest
import requests
from unittest.mock import patch

from automation.utils.appnexus.base import Adapter
import mock
import pytest
import requests
from automation.utils.appnexus.base import Backend
from automation.utils.appnexus.base import BackendRequest
from automation.utils.appnexus.base import BackendResponse
from automation.utils.appnexus.base import SessionWithMaxConnectionAge
from automation.utils.appnexus.errors import ConnectionError
from automation.utils.appnexus.errors import InvalidResponse


class MockBackend(Backend):
base_url = 'https://kolibri-dev.learningequality.org'
connect_endpoint = '/status'
def connect(self) -> None:
return super().connect()

def make_request(self, request):
return super().make_request(request)

class ErrorBackend(Backend):
base_url = 'https://bad-url.com'
connect_endpoint = '/status'
def connect(self) -> None:
return super().connect()

def make_request(self, request):
return super().make_request(request)


class MockAdapter(Adapter):
def mockoperation(self):
pass


def test_backend_singleton():
b1, b2 = MockBackend(), MockBackend()
assert id(b1) == id(b2)


def test_adapter_creation():
a = MockAdapter(backend=MockBackend)
assert isinstance(a, Adapter)


def test_adapter_backend_default():
b = MockBackend()
adapter = Adapter(backend=b)
assert isinstance(adapter.backend, Backend)


def test_adapter_backend_custom():
b = MockBackend()
a = Adapter(backend=b)
assert a.backend is b

def test_session_with_max_connection_age_request():
with patch.object(requests.Session, 'request') as mock_request:
session = SessionWithMaxConnectionAge()
session.request('GET', 'https://example.com')
assert mock_request.call_count == 1


def test_session_with_max_connection_age_not_closing_connections():
with patch.object(requests.Session, 'close') as mock_close,\
patch.object(requests.Session, 'request') as mock_request:
with patch.object(requests.Session, 'close') as mock_close, patch.object(requests.Session, 'request') as mock_request:
session = SessionWithMaxConnectionAge(60)
session.request('GET', 'https://example.com')
time.sleep(0.1)
Expand All @@ -72,9 +29,9 @@ def test_session_with_max_connection_age_not_closing_connections():
assert mock_close.call_count == 0
assert mock_request.call_count == 2


def test_session_with_max_connection_age_closing_connections():
with patch.object(requests.Session, 'close') as mock_close,\
patch.object(requests.Session, 'request') as mock_request:
with patch.object(requests.Session, 'close') as mock_close, patch.object(requests.Session, 'request') as mock_request:
session = SessionWithMaxConnectionAge(1)
session.request('GET', 'https://example.com')
time.sleep(2)
Expand All @@ -83,33 +40,55 @@ def test_session_with_max_connection_age_closing_connections():
assert mock_close.call_count == 1
assert mock_request.call_count == 2

def test_backend_connect():
backend = MockBackend()
connected = backend.connect()

assert connected is True
@mock.patch("automation.utils.appnexus.base.Backend.connect")
def test_backend_connect(mock_connect):
mock_connect.return_value = True

backend = Backend()
result = backend.connect()

mock_connect.assert_called_once()
assert result is True

def test_backend_connect_error():
backend = ErrorBackend()
connected = backend.connect()

assert connected is False
@mock.patch("automation.utils.appnexus.base.Backend.connect")
def test_backend_connect_error(mock_connect):
mock_connect.side_effect = [ConnectionError("Failed to connect"), False]

def test_backend_request():
request = BackendRequest('GET', '/api/public/info')
backend = Backend()

backend = MockBackend()
with pytest.raises(ConnectionError) as exc_info:
backend.connect()
assert str(exc_info.value) == "Failed to connect"

result = backend.connect()
assert result is False

assert mock_connect.call_count == 2


@mock.patch("automation.utils.appnexus.base.Backend.make_request")
def test_backend_request(mock_make_request):
mock_response = BackendResponse(data=[{"key": "value"}])
mock_make_request.return_value = mock_response

backend = Backend()
request = BackendRequest(method="GET", path="/api/test")
response = backend.make_request(request)

assert response.status_code == 200
assert len(response.__dict__) > 0
assert response == mock_response
mock_make_request.assert_called_once_with(request)


def test_backend_request_error():
request = BackendRequest('GET', '/api/public/info')
@mock.patch("automation.utils.appnexus.base.Backend.make_request")
def test_backend_request_error(mock_make_request):
mock_make_request.side_effect = InvalidResponse("Request failed")

backend = ErrorBackend()
backend = Backend()
request = BackendRequest(method="GET", path="/api/test")

with pytest.raises(ConnectionError) as error:
with pytest.raises(InvalidResponse) as exc_info:
backend.make_request(request)

assert "Unable to connect to" in str(error.value)
assert str(exc_info.value) == "Request failed"
mock_make_request.assert_called_once_with(request)
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import uuid

from automation.models import RecommendationsCache
from django.db import IntegrityError
from kolibri_public.models import ContentNode

from contentcuration.tests.base import StudioTestCase


class TestRecommendationsCache(StudioTestCase):

def setUp(self):
self.content_node = ContentNode.objects.create(
id=uuid.uuid4(),
title='Test Content Node',
content_id=uuid.uuid4(),
channel_id=uuid.uuid4(),
)
self.cache = RecommendationsCache.objects.create(
request_hash='test_hash',
contentnode=self.content_node,
rank=1.0,
override_threshold=False
)

def test_cache_creation(self):
self.assertIsInstance(self.cache, RecommendationsCache)
self.assertEqual(self.cache.request_hash, 'test_hash')
self.assertEqual(self.cache.contentnode, self.content_node)
self.assertEqual(self.cache.rank, 1.0)
self.assertFalse(self.cache.override_threshold)

def test_cache_retrieval(self):
retrieved_cache = RecommendationsCache.objects.get(request_hash='test_hash')
self.assertEqual(retrieved_cache, self.cache)

def test_cache_uniqueness(self):
with self.assertRaises(IntegrityError):
RecommendationsCache.objects.create(
request_hash='test_hash',
contentnode=self.content_node,
rank=2.0,
override_threshold=True
)

def test_bulk_create_ignore_conflicts_true(self):
initial_count = RecommendationsCache.objects.count()
try:
RecommendationsCache.objects.bulk_create(
[self.cache, self.cache],
ignore_conflicts=True
)
except IntegrityError:
self.fail("bulk_create raised IntegrityError unexpectedly!")

final_count = RecommendationsCache.objects.count()
self.assertEqual(initial_count, final_count)

def test_bulk_create_ignore_conflicts_false(self):
with self.assertRaises(IntegrityError):
RecommendationsCache.objects.bulk_create(
[self.cache, self.cache],
ignore_conflicts=False
)
10 changes: 4 additions & 6 deletions contentcuration/automation/utils/appnexus/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,7 @@ def __init__(

class BackendResponse(object):
""" Class that should be inherited by specific backend for its responses"""
def __init__(self, error=None, **kwargs):
self.error = error
def __init__(self, **kwargs):
for key, value in kwargs.items():
setattr(self, key, value)

Expand Down Expand Up @@ -169,11 +168,10 @@ def connect(self, **kwargs):

def make_request(self, request):
""" Make a request to the backend service. """
response = self._make_request(request)
try:
info = response.json()
info.update({"status_code": response.status_code})
return BackendResponse(**info)
response = self._make_request(request)
response_body = dict(data=response.json())
return BackendResponse(**response_body)
except ValueError as e:
logging.exception(e)
raise errors.InvalidResponse("Invalid response from backend")
Expand Down
1 change: 1 addition & 0 deletions contentcuration/contentcuration/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@
'django.contrib.postgres',
'django_celery_results',
'kolibri_public',
'automation',
)

SESSION_ENGINE = "django.contrib.sessions.backends.cached_db"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import unittest
from unittest.mock import MagicMock

from contentcuration.utils.automation_manager import AutomationManager

Expand All @@ -11,30 +10,3 @@ def setUp(self):
def test_creation(self):
# Check if an instance of AutomationManager is created successfully
self.assertIsInstance(self.automation_manager, AutomationManager)

def test_generate_embedding(self):
text = "Some text that needs to be embedded"
# Mock the generate_embedding method of RecommendationsAdapter
# as the implementation is yet to be done
self.automation_manager.recommendations_backend_adapter.generate_embedding = MagicMock(return_value=[0.1, 0.2, 0.3])
embedding_vector = self.automation_manager.generate_embedding(text)
self.assertIsNotNone(embedding_vector)

def test_embedding_exists(self):
embedding_vector = [0.1, 0.2, 0.3]
# Currently no solid implementation exists for this
# So the embadding_exists function returns true anyways
exists = self.automation_manager.embedding_exists(embedding_vector)
self.assertTrue(exists)

def test_load_recommendations(self):
embedding_vector = [0.1, 0.2, 0.3]
self.automation_manager.recommendations_backend_adapter.get_recommendations = MagicMock(return_value=["item1", "item2"])
recommendations = self.automation_manager.load_recommendations(embedding_vector)
self.assertIsInstance(recommendations, list)

def test_cache_embeddings(self):
embeddings_list = [[0.1, 0.2, 0.3]]
# Currently the function returns true anyways
success = self.automation_manager.cache_embeddings(embeddings_list)
self.assertTrue(success)
Loading

0 comments on commit 3d2f924

Please sign in to comment.