Skip to content

Commit

Permalink
Merge pull request #4824 from bjester/far-fetching-sql-details
Browse files Browse the repository at this point in the history
Update node details query to use CTE and more robust tests
  • Loading branch information
rtibbles authored Nov 15, 2024
2 parents 78b3b71 + dad41bb commit e349e81
Show file tree
Hide file tree
Showing 5 changed files with 185 additions and 38 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,10 @@
</VBtn>
</template>
<VList>
<VListTile @click="generatePdf">
<VListTile @click="generatePDF">
<VListTileTitle>{{ $tr('downloadPDF') }}</VListTileTitle>
</VListTile>
<VListTile data-test="dl-csv" @click="generateChannelsCSV([channelWithDetails])">
<VListTile data-test="dl-csv" @click="generateCSV">
<VListTileTitle>{{ $tr('downloadCSV') }}</VListTileTitle>
</VListTile>
</VList>
Expand Down Expand Up @@ -114,14 +114,26 @@
},
mounted() {
this.load();
this.$analytics.trackAction('channel_details', 'View', {
id: this.channelId,
});
},
methods: {
...mapActions('channel', ['loadChannel', 'loadChannelDetails']),
async generatePdf() {
async generatePDF() {
this.$analytics.trackEvent('channel_details', 'Download PDF', {
id: this.channelId,
});
this.loading = true;
await this.generateChannelsPDF([this.channelWithDetails]);
this.loading = false;
},
generateCSV() {
this.$analytics.trackEvent('channel_details', 'Download CSV', {
id: this.channelId,
});
this.generateChannelsCSV([this.channelWithDetails]);
},
load() {
this.loading = true;
const channelPromise = this.loadChannel(this.channelId);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -493,6 +493,14 @@
return this.categories.join(', ');
},
},
mounted() {
if (!this.isChannel) {
// Track node details view when not a channel-- is this happening?
this.$analytics.trackAction('node_details', 'View', {
id: this._details.id,
});
}
},
methods: {
channelUrl(channel) {
return window.Urls.channel(channel.id);
Expand Down
6 changes: 2 additions & 4 deletions contentcuration/contentcuration/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1556,7 +1556,7 @@ def get_nodes_with_title(cls, title, limit_to_children_of=None):
return root.get_descendants().filter(title=title)
return cls.objects.filter(title=title)

def get_details(self, channel_id=None):
def get_details(self, channel=None):
"""
Returns information about the node and its children, including total size, languages, files, etc.
Expand All @@ -1578,9 +1578,7 @@ def get_details(self, channel_id=None):
# Get resources
resources = descendants.exclude(kind=content_kinds.TOPIC).order_by()

if channel_id:
channel = Channel.objects.filter(id=channel_id)[0]
else:
if not channel:
channel = self.get_channel()

if not resources.exists():
Expand Down
142 changes: 129 additions & 13 deletions contentcuration/contentcuration/tests/views/test_nodes.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import datetime
import json
from contextlib import contextmanager

import pytz
from django.conf import settings
Expand All @@ -8,11 +9,16 @@
from mock import Mock
from mock import patch

from contentcuration.models import ContentNode
from contentcuration.tasks import generatenodediff_task
from contentcuration.tests.base import BaseAPITestCase


class NodesViewsTestCase(BaseAPITestCase):
def tearDown(self):
super().tearDown()
cache.clear()

def test_get_node_diff__missing_contentnode(self):
response = self.get(reverse("get_node_diff", kwargs=dict(updated_id="abc123", original_id="def456")))
self.assertEqual(response.status_code, 404)
Expand All @@ -32,29 +38,139 @@ def test_get_node_diff__task_processing(self, mock_find_incomplete_ids):
response = self.get(reverse("get_node_diff", kwargs=dict(updated_id=pk, original_id=pk)))
self.assertEqual(response.status_code, 302)

def test_get_channel_details(self):
url = reverse('get_channel_details', kwargs={"channel_id": self.channel.id})
response = self.get(url)

class DetailsTestCase(BaseAPITestCase):
def setUp(self):
super().setUp()
self.default_details = {
"resource_count": 5,
"resource_size": 100,
"kind_count": {"document": 3, "video": 2}
}
# see tree.json for where this comes from
self.node = ContentNode.objects.get(node_id="00000000000000000000000000000001")
# required by get_node_details
self.channel.make_public()

def tearDown(self):
super().tearDown()
cache.clear()

def _set_cache(self, node, last_update=None):
data = self.default_details.copy()
if last_update is not None:
data.update(last_update=pytz.utc.localize(last_update).strftime(settings.DATE_TIME_FORMAT))

cache_key = "details_{}".format(node.node_id)
cache.set(cache_key, json.dumps(data))

@contextmanager
def _check_details(self, node=None):
endpoint = "get_channel_details" if node is None else "get_node_details"
param = {"channel_id": self.channel.id} \
if endpoint == "get_channel_details" \
else {"node_id": node.id}
url = reverse(endpoint, kwargs=param)
response = self.get(url)
print(response.content)
details = json.loads(response.content)
assert details['resource_count'] > 0
assert details['resource_size'] > 0
assert len(details['kind_count']) > 0
yield details

def assertDetailsEqual(self, details, expected):
self.assertEqual(details['resource_count'], expected['resource_count'])
self.assertEqual(details['resource_size'], expected['resource_size'])
self.assertEqual(details['kind_count'], expected['kind_count'])

@patch("contentcuration.models.ContentNode.get_details")
def test_get_channel_details__uncached(self, mock_get_details):
mock_get_details.return_value = {
"resource_count": 7,
"resource_size": 200,
"kind_count": {"document": 33, "video": 22}
}
with self._check_details() as details:
self.assertDetailsEqual(details, mock_get_details.return_value)

def test_get_channel_details_cached(self):
cache_key = "details_{}".format(self.channel.main_tree.id)
mock_get_details.assert_called_once_with(channel=self.channel)

@patch("contentcuration.views.nodes.getnodedetails_task")
def test_get_channel_details__cached(self, task_mock):
# force the cache to update by adding a very old cache entry. Since Celery tasks run sync in the test suite,
# get_channel_details will return an updated cache value rather than generate it async.
data = {"last_update": pytz.utc.localize(datetime.datetime(1990, 1, 1)).strftime(settings.DATE_TIME_FORMAT)}
cache.set(cache_key, json.dumps(data))
self._set_cache(self.channel.main_tree, last_update=datetime.datetime(1990, 1, 1))

with patch("contentcuration.views.nodes.getnodedetails_task") as task_mock:
url = reverse('get_channel_details', kwargs={"channel_id": self.channel.id})
self.get(url)
with self._check_details() as details:
# check cache was returned
self.assertDetailsEqual(details, self.default_details)
# Check that the outdated cache prompts an asynchronous cache update
task_mock.enqueue.assert_called_once_with(self.user, node_id=self.channel.main_tree.id)

@patch("contentcuration.views.nodes.getnodedetails_task")
def test_get_channel_details__cached__not_updated__no_enqueue(self, task_mock):
# nothing changed,
self.channel.main_tree.get_descendants(include_self=False).update(changed=False)
self._set_cache(self.channel.main_tree, last_update=datetime.datetime(1990, 1, 1))

with self._check_details() as details:
# check cache was returned
self.assertDetailsEqual(details, self.default_details)
task_mock.enqueue.assert_not_called()

@patch("contentcuration.views.nodes.getnodedetails_task")
def test_get_channel_details__cached__no_enqueue(self, task_mock):
# test last update handling
self._set_cache(self.channel.main_tree, last_update=datetime.datetime(2099, 1, 1))

with self._check_details() as details:
# check cache was returned
self.assertDetailsEqual(details, self.default_details)
task_mock.enqueue.assert_not_called()

@patch("contentcuration.models.ContentNode.get_details")
def test_get_node_details__uncached(self, mock_get_details):
mock_get_details.return_value = {
"resource_count": 7,
"resource_size": 200,
"kind_count": {"document": 33, "video": 22}
}
with self._check_details(node=self.node) as details:
self.assertDetailsEqual(details, mock_get_details.return_value)

mock_get_details.assert_called_once_with(channel=self.channel)

@patch("contentcuration.views.nodes.getnodedetails_task")
def test_get_node_details__cached(self, task_mock):
# force the cache to update by adding a very old cache entry. Since Celery tasks run sync in the test suite,
# get_channel_details will return an updated cache value rather than generate it async.
self._set_cache(self.node, last_update=datetime.datetime(1990, 1, 1))

with self._check_details(node=self.node) as details:
# check cache was returned
self.assertDetailsEqual(details, self.default_details)
# Check that the outdated cache prompts an asynchronous cache update
task_mock.enqueue.assert_called_once_with(self.user, node_id=self.node.pk)

@patch("contentcuration.views.nodes.getnodedetails_task")
def test_get_node_details__cached__not_updated__no_enqueue(self, task_mock):
# nothing changed,
self.channel.main_tree.get_descendants(include_self=False).update(changed=False)
self._set_cache(self.node, last_update=datetime.datetime(1990, 1, 1))

with self._check_details(node=self.node) as details:
# check cache was returned
self.assertDetailsEqual(details, self.default_details)
task_mock.enqueue.assert_not_called()

@patch("contentcuration.views.nodes.getnodedetails_task")
def test_get_node_details__cached__no_enqueue(self, task_mock):
# test last update handling
self._set_cache(self.node, last_update=datetime.datetime(2099, 1, 1))

with self._check_details(node=self.node) as details:
# check cache was returned
self.assertDetailsEqual(details, self.default_details)
task_mock.enqueue.assert_not_called()


class ChannelDetailsEndpointTestCase(BaseAPITestCase):
def test_200_post(self):
Expand Down
49 changes: 31 additions & 18 deletions contentcuration/contentcuration/views/nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from django.http import HttpResponse
from django.http import HttpResponseNotFound
from django.shortcuts import get_object_or_404
from django_cte import With
from rest_framework import status
from rest_framework.authentication import SessionAuthentication
from rest_framework.authentication import TokenAuthentication
Expand Down Expand Up @@ -36,7 +37,7 @@ def get_channel_details(request, channel_id):
channel = get_object_or_404(Channel.filter_view_queryset(Channel.objects.all(), request.user), id=channel_id)
if not channel.main_tree:
raise Http404
data = get_node_details_cached(request.user, channel.main_tree, channel_id=channel_id)
data = get_node_details_cached(request.user, channel.main_tree, channel)
return HttpResponse(json.dumps(data))


Expand All @@ -47,29 +48,41 @@ def get_node_details(request, node_id):
channel = node.get_channel()
if channel and not channel.public:
return HttpResponseNotFound("No topic found for {}".format(node_id))
data = get_node_details_cached(request.user, node)
data = get_node_details_cached(request.user, node, channel)
return HttpResponse(json.dumps(data))


def get_node_details_cached(user, node, channel_id=None):
def get_node_details_cached(user, node, channel):
cached_data = cache.get("details_{}".format(node.node_id))
if cached_data:
descendants = (
node.get_descendants()
.prefetch_related("children", "files", "tags")
.select_related("license", "language")
)
channel = node.get_channel()

if cached_data:
# If channel is a sushi chef channel, use date created for faster query
# Otherwise, find the last time anything was updated in the channel
last_update = (
channel.main_tree.created
if channel and channel.ricecooker_version
else descendants.filter(changed=True)
.aggregate(latest_update=Max("modified"))
.get("latest_update")
)
if channel and channel.ricecooker_version:
last_update = channel.main_tree.created
else:
# The query should never span across multiple trees, so we can filter by tree_id first.
# Since the tree_id is indexed, this should be a fast query, and perfect candidate
# for the CTE select query.
cte = With(
ContentNode.objects.filter(tree_id=node.tree_id)
.values("id", "modified", "changed", "tree_id", "parent_id", "lft", "rght")
.order_by()
)
last_update_qs = cte.queryset().with_cte(cte).filter(changed=True)

# If the node is not the root node, the intent is to calculate node details for the
# descendants of this node
if node.parent_id is not None:
# See MPTTModel.get_descendants for details on the lft/rght query
last_update_qs = last_update_qs.filter(
lft__gte=node.lft + 1, rght__lte=node.rght - 1
)
else:
# Maintain that query should not 'include_self'
last_update_qs = last_update_qs.filter(parent_id__isnull=False)

last_update = last_update_qs.aggregate(latest_update=Max("modified")).get("latest_update")

if last_update:
last_cache_update = datetime.strptime(
Expand All @@ -80,7 +93,7 @@ def get_node_details_cached(user, node, channel_id=None):
getnodedetails_task.enqueue(user, node_id=node.pk)
return json.loads(cached_data)

return node.get_details(channel_id=channel_id)
return node.get_details(channel=channel)


@api_view(["GET"])
Expand Down

0 comments on commit e349e81

Please sign in to comment.