Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Extract content urls #4604

Merged
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 44 additions & 0 deletions contentcuration/automation/migrations/0001_initial.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# Generated by Django 3.2.24 on 2024-08-01 13:52
import uuid

import django.db.models.deletion
from django.db import migrations
from django.db import models


class Migration(migrations.Migration):
initial = True

dependencies = [
('kolibri_public', '0003_alter_file_preset'),
]

operations = [
migrations.CreateModel(
name='RecommendationsCache',
fields=[
('id', models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True,
serialize=False)),
('request_hash', models.CharField(max_length=32, null=True)),
('rank', models.FloatField(default=0.0, null=True)),
('override_threshold', models.BooleanField(default=False)),
bjester marked this conversation as resolved.
Show resolved Hide resolved
('timestamp', models.DateTimeField(auto_now_add=True)),
('contentnode_id', models.ForeignKey(blank=True, null=True,
on_delete=django.db.models.deletion.SET_NULL,
related_name='recommendations',
to='kolibri_public.contentnode')),
],
),
migrations.AddIndex(
model_name='recommendationscache',
index=models.Index(fields=['request_hash'], name='request_hash_idx'),
),
migrations.AddIndex(
model_name='recommendationscache',
index=models.Index(fields=['contentnode_id'], name='contentnode_id_idx'),
),
migrations.AlterUniqueTogether(
name='recommendationscache',
unique_together={('request_hash', 'contentnode_id')},
),
]
31 changes: 29 additions & 2 deletions contentcuration/automation/models.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,30 @@
# from django.db import models
import uuid

# Create your models here.
from django.db import models
from kolibri_public.models import ContentNode


REQUEST_HASH_INDEX_NAME = "request_hash_idx"
CONTENTNODE_ID_INDEX_NAME = "contentnode_id_idx"


class RecommendationsCache(models.Model):
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
request_hash = models.CharField(max_length=32, null=True)
contentnode_id = models.ForeignKey(
ContentNode,
null=True,
blank=True,
related_name='recommendations',
on_delete=models.SET_NULL,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the reasoning for setting this to null on deletion of the node?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In light of your comment here i guess it defeats to purpose setting it null and keeping the record. I think a better approach would be to cascade the deletion?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah I think cascade deletion should be fine.

)
rank = models.FloatField(default=0.0, null=True)
override_threshold = models.BooleanField(default=False)
timestamp = models.DateTimeField(auto_now_add=True)

class Meta:
unique_together = ('request_hash', 'contentnode_id')
indexes = [
models.Index(fields=['request_hash'], name=REQUEST_HASH_INDEX_NAME),
models.Index(fields=['contentnode_id'], name=CONTENTNODE_ID_INDEX_NAME),
]
121 changes: 50 additions & 71 deletions contentcuration/automation/tests/appnexus/test_base.py
Original file line number Diff line number Diff line change
@@ -1,69 +1,26 @@
import time
import pytest
import requests
from unittest.mock import patch

from automation.utils.appnexus.base import Adapter
import mock
import pytest
import requests
from automation.utils.appnexus.base import Backend
from automation.utils.appnexus.base import BackendRequest
from automation.utils.appnexus.base import BackendResponse
from automation.utils.appnexus.base import SessionWithMaxConnectionAge
from automation.utils.appnexus.errors import ConnectionError
from automation.utils.appnexus.errors import InvalidResponse


class MockBackend(Backend):
base_url = 'https://kolibri-dev.learningequality.org'
connect_endpoint = '/status'
def connect(self) -> None:
return super().connect()

def make_request(self, request):
return super().make_request(request)

class ErrorBackend(Backend):
base_url = 'https://bad-url.com'
connect_endpoint = '/status'
def connect(self) -> None:
return super().connect()

def make_request(self, request):
return super().make_request(request)


class MockAdapter(Adapter):
def mockoperation(self):
pass


def test_backend_singleton():
b1, b2 = MockBackend(), MockBackend()
assert id(b1) == id(b2)


def test_adapter_creation():
a = MockAdapter(backend=MockBackend)
assert isinstance(a, Adapter)


def test_adapter_backend_default():
b = MockBackend()
adapter = Adapter(backend=b)
assert isinstance(adapter.backend, Backend)


def test_adapter_backend_custom():
b = MockBackend()
a = Adapter(backend=b)
assert a.backend is b

def test_session_with_max_connection_age_request():
with patch.object(requests.Session, 'request') as mock_request:
session = SessionWithMaxConnectionAge()
session.request('GET', 'https://example.com')
assert mock_request.call_count == 1


def test_session_with_max_connection_age_not_closing_connections():
with patch.object(requests.Session, 'close') as mock_close,\
patch.object(requests.Session, 'request') as mock_request:
with patch.object(requests.Session, 'close') as mock_close, patch.object(requests.Session, 'request') as mock_request:
session = SessionWithMaxConnectionAge(60)
session.request('GET', 'https://example.com')
time.sleep(0.1)
Expand All @@ -72,9 +29,9 @@ def test_session_with_max_connection_age_not_closing_connections():
assert mock_close.call_count == 0
assert mock_request.call_count == 2


def test_session_with_max_connection_age_closing_connections():
with patch.object(requests.Session, 'close') as mock_close,\
patch.object(requests.Session, 'request') as mock_request:
with patch.object(requests.Session, 'close') as mock_close, patch.object(requests.Session, 'request') as mock_request:
session = SessionWithMaxConnectionAge(1)
session.request('GET', 'https://example.com')
time.sleep(2)
Expand All @@ -83,33 +40,55 @@ def test_session_with_max_connection_age_closing_connections():
assert mock_close.call_count == 1
assert mock_request.call_count == 2

def test_backend_connect():
backend = MockBackend()
connected = backend.connect()

assert connected is True
@mock.patch("automation.utils.appnexus.base.Backend.connect")
def test_backend_connect(mock_connect):
mock_connect.return_value = True

backend = Backend()
result = backend.connect()

mock_connect.assert_called_once()
assert result is True

def test_backend_connect_error():
backend = ErrorBackend()
connected = backend.connect()

assert connected is False
@mock.patch("automation.utils.appnexus.base.Backend.connect")
def test_backend_connect_error(mock_connect):
mock_connect.side_effect = [ConnectionError("Failed to connect"), False]

def test_backend_request():
request = BackendRequest('GET', '/api/public/info')
backend = Backend()

backend = MockBackend()
with pytest.raises(ConnectionError) as exc_info:
backend.connect()
assert str(exc_info.value) == "Failed to connect"

result = backend.connect()
assert result is False

assert mock_connect.call_count == 2


@mock.patch("automation.utils.appnexus.base.Backend.make_request")
def test_backend_request(mock_make_request):
mock_response = BackendResponse(data=[{"key": "value"}])
mock_make_request.return_value = mock_response

backend = Backend()
request = BackendRequest(method="GET", path="/api/test")
response = backend.make_request(request)

assert response.status_code == 200
assert len(response.__dict__) > 0
assert response == mock_response
mock_make_request.assert_called_once_with(request)


def test_backend_request_error():
request = BackendRequest('GET', '/api/public/info')
@mock.patch("automation.utils.appnexus.base.Backend.make_request")
def test_backend_request_error(mock_make_request):
mock_make_request.side_effect = InvalidResponse("Request failed")

backend = ErrorBackend()
backend = Backend()
request = BackendRequest(method="GET", path="/api/test")

with pytest.raises(ConnectionError) as error:
with pytest.raises(InvalidResponse) as exc_info:
backend.make_request(request)

assert "Unable to connect to" in str(error.value)
assert str(exc_info.value) == "Request failed"
mock_make_request.assert_called_once_with(request)
10 changes: 4 additions & 6 deletions contentcuration/automation/utils/appnexus/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,7 @@ def __init__(

class BackendResponse(object):
""" Class that should be inherited by specific backend for its responses"""
def __init__(self, error=None, **kwargs):
self.error = error
def __init__(self, **kwargs):
for key, value in kwargs.items():
setattr(self, key, value)

Expand Down Expand Up @@ -169,11 +168,10 @@ def connect(self, **kwargs):

def make_request(self, request):
""" Make a request to the backend service. """
response = self._make_request(request)
try:
info = response.json()
info.update({"status_code": response.status_code})
return BackendResponse(**info)
response = self._make_request(request)
response_body = dict(data=response.json())
return BackendResponse(**response_body)
except ValueError as e:
logging.exception(e)
raise errors.InvalidResponse("Invalid response from backend")
Expand Down
1 change: 1 addition & 0 deletions contentcuration/contentcuration/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@
'django.contrib.postgres',
'django_celery_results',
'kolibri_public',
'automation',
)

SESSION_ENGINE = "django.contrib.sessions.backends.cached_db"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import unittest
from unittest.mock import MagicMock

from contentcuration.utils.automation_manager import AutomationManager

Expand All @@ -11,30 +10,3 @@ def setUp(self):
def test_creation(self):
# Check if an instance of AutomationManager is created successfully
self.assertIsInstance(self.automation_manager, AutomationManager)

def test_generate_embedding(self):
text = "Some text that needs to be embedded"
# Mock the generate_embedding method of RecommendationsAdapter
# as the implementation is yet to be done
self.automation_manager.recommendations_backend_adapter.generate_embedding = MagicMock(return_value=[0.1, 0.2, 0.3])
embedding_vector = self.automation_manager.generate_embedding(text)
self.assertIsNotNone(embedding_vector)

def test_embedding_exists(self):
embedding_vector = [0.1, 0.2, 0.3]
# Currently no solid implementation exists for this
# So the embadding_exists function returns true anyways
exists = self.automation_manager.embedding_exists(embedding_vector)
self.assertTrue(exists)

def test_load_recommendations(self):
embedding_vector = [0.1, 0.2, 0.3]
self.automation_manager.recommendations_backend_adapter.get_recommendations = MagicMock(return_value=["item1", "item2"])
recommendations = self.automation_manager.load_recommendations(embedding_vector)
self.assertIsInstance(recommendations, list)

def test_cache_embeddings(self):
embeddings_list = [[0.1, 0.2, 0.3]]
# Currently the function returns true anyways
success = self.automation_manager.cache_embeddings(embeddings_list)
self.assertTrue(success)
Loading