feat: add admin action for dump to coursegraph

This introduces two admin actions:
* Dump to CourseGraph (respect cache), and
* Dump to CourseGraph (override cache)

which allow admins to select a collection of courses from Django
admin and dump them to the Neo4j instance specified by
settings.COURSEGRAPH_CONNECTION, with or without respecting
the cache (that is: whether the course has already been dumped
since its last publishing).
This commit is contained in:
Kyle McCormick
2022-01-24 11:52:49 -05:00
committed by Julia Eskew
parent d75a32c009
commit d16fe9d427
4 changed files with 371 additions and 0 deletions

View File

@@ -0,0 +1,123 @@
"""
Admin site bindings for coursegraph
"""
import logging
from django.contrib import admin, messages
from django.utils.translation import gettext as _
from edx_django_utils.admin.mixins import ReadOnlyAdminMixin
from .models import CourseGraphCourseDump
from .tasks import ModuleStoreSerializer
log = logging.getLogger(__name__)
@admin.action(
permissions=['change'],
description=_("Dump courses to CourseGraph (respect cache)"),
)
def dump_courses(modeladmin, request, queryset):
"""
Admin action to enqueue Dump-to-CourseGraph tasks for a set of courses,
excluding courses that haven't been published since they were last dumped.
queryset is a QuerySet of CourseGraphCourseDump objects, which are just
CourseOverview objects under the hood.
"""
all_course_keys = queryset.values_list('id', flat=True)
serializer = ModuleStoreSerializer(all_course_keys)
try:
submitted, skipped = serializer.dump_courses_to_neo4j()
# Unfortunately there is no unified base class for the reasonable
# exceptions we could expect from py2neo (connection unavailable, bolt protocol
# error, and so on), so we just catch broadly, show a generic error banner,
# and then log the exception for site operators to look at.
except Exception as err: # pylint: disable=broad-except
log.exception(
"Failed to enqueue CourseGraph dumps to Neo4j (respecting cache): %s",
", ".join(str(course_key) for course_key in all_course_keys),
)
modeladmin.message_user(
request,
_("Error enqueueing dumps for {} course(s): {}").format(
len(all_course_keys), str(err)
),
level=messages.ERROR,
)
return
if submitted:
modeladmin.message_user(
request,
_(
"Enqueued dumps for {} course(s). Skipped {} unchanged course(s)."
).format(len(submitted), len(skipped)),
level=messages.SUCCESS,
)
else:
modeladmin.message_user(
request,
_(
"Skipped all {} course(s), as they were unchanged.",
).format(len(skipped)),
level=messages.WARNING,
)
@admin.action(
permissions=['change'],
description=_("Dump courses to CourseGraph (override cache)")
)
def dump_courses_overriding_cache(modeladmin, request, queryset):
"""
Admin action to enqueue Dump-to-CourseGraph tasks for a set of courses
(whether or not they have been published recently).
queryset is a QuerySet of CourseGraphCourseDump objects, which are just
CourseOverview objects under the hood.
"""
all_course_keys = queryset.values_list('id', flat=True)
serializer = ModuleStoreSerializer(all_course_keys)
try:
submitted, _skipped = serializer.dump_courses_to_neo4j(override_cache=True)
# Unfortunately there is no unified base class for the reasonable
# exceptions we could expect from py2neo (connection unavailable, bolt protocol
# error, and so on), so we just catch broadly, show a generic error banner,
# and then log the exception for site operators to look at.
except Exception as err: # pylint: disable=broad-except
log.exception(
"Failed to enqueue CourseGraph Neo4j course dumps (overriding cache): %s",
", ".join(str(course_key) for course_key in all_course_keys),
)
modeladmin.message_user(
request,
_("Error enqueueing dumps for {} course(s): {}").format(
len(all_course_keys), str(err)
),
level=messages.ERROR,
)
return
modeladmin.message_user(
request,
_("Enqueued dumps for {} course(s).").format(len(submitted)),
level=messages.SUCCESS,
)
@admin.register(CourseGraphCourseDump)
class CourseGraphCourseDumpAdmin(ReadOnlyAdminMixin, admin.ModelAdmin):
"""
Model admin for "Course graph course dumps".
Just a read-only table with some useful metadata, allowing admin users to
select courses to be dumped to CourseGraph.
"""
list_display = [
'id',
'display_name',
'modified',
'enrollment_start',
'enrollment_end',
]
search_fields = ['id', 'display_name']
actions = [dump_courses, dump_courses_overriding_cache]

View File

@@ -0,0 +1,21 @@
"""
(Proxy) models supporting CourseGraph.
"""
from openedx.core.djangoapps.content.course_overviews.models import CourseOverview
class CourseGraphCourseDump(CourseOverview):
"""
Proxy model for CourseOverview.
Does *not* create/update/delete CourseOverview objects - only reads the objects.
Uses the course IDs of the CourseOverview objects to determine which courses
can be dumped to CourseGraph.
"""
class Meta:
proxy = True
def __str__(self):
"""Represent ourselves with the course key."""
return str(self.id)

View File

@@ -0,0 +1,227 @@
"""
Shallow tests for CourseGraph dump-queueing Django admin interface.
See ..management.commands.tests.test_dump_to_neo4j for more comprehensive
tests of dump_course_to_neo4j.
"""
from unittest import mock
import py2neo
from django.test import TestCase
from django.test.utils import override_settings
from freezegun import freeze_time
from openedx.core.djangoapps.content.course_overviews.tests.factories import CourseOverviewFactory
from openedx.core.djangoapps.content.course_overviews.models import CourseOverview
from .. import admin, tasks
_coursegraph_connection = {
"protocol": "bolt",
"secure": True,
"host": "example.edu",
"port": 7687,
"user": "neo4j",
"password": "fake-coursegraph-password",
}
_configure_coursegraph_connection = override_settings(
COURSEGRAPH_CONNECTION=_coursegraph_connection,
)
_patch_log_exception = mock.patch.object(
admin.log, 'exception', autospec=True
)
_patch_apply_dump_task = mock.patch.object(
tasks.dump_course_to_neo4j, 'apply_async'
)
_pretend_last_course_dump_was_may_2020 = mock.patch.object(
tasks,
'get_command_last_run',
new=(lambda _key, _graph: "2020-05-01"),
)
_patch_neo4j_graph = mock.patch.object(
tasks, 'Graph', autospec=True
)
_make_neo4j_graph_raise = mock.patch.object(
tasks, 'Graph', side_effect=py2neo.ConnectionUnavailable(
'we failed to connect or something!'
)
)
class CourseGraphAdminActionsTestCase(TestCase):
"""
Test CourseGraph Django admin actions.
"""
@classmethod
def setUpTestData(cls):
"""
Make course overviews with varying modification dates.
"""
super().setUpTestData()
cls.course_updated_in_april = CourseOverviewFactory(run='april_update')
cls.course_updated_in_june = CourseOverviewFactory(run='june_update')
cls.course_updated_in_july = CourseOverviewFactory(run='july_update')
cls.course_updated_in_august = CourseOverviewFactory(run='august_update')
# For each course overview, make an arbitrary update and then save()
# so that its `.modified` date is set.
with freeze_time("2020-04-01"):
cls.course_updated_in_april.marketing_url = "https://example.org"
cls.course_updated_in_april.save()
with freeze_time("2020-06-01"):
cls.course_updated_in_june.marketing_url = "https://example.org"
cls.course_updated_in_june.save()
with freeze_time("2020-07-01"):
cls.course_updated_in_july.marketing_url = "https://example.org"
cls.course_updated_in_july.save()
with freeze_time("2020-08-01"):
cls.course_updated_in_august.marketing_url = "https://example.org"
cls.course_updated_in_august.save()
@_configure_coursegraph_connection
@_pretend_last_course_dump_was_may_2020
@_patch_neo4j_graph
@_patch_apply_dump_task
@_patch_log_exception
def test_dump_courses(self, mock_log_exception, mock_apply_dump_task, mock_neo4j_graph):
"""
Test that dump_courses admin action dumps requested courses iff they have
been modified since the last dump to coursegraph.
"""
modeladmin_mock = mock.MagicMock()
# Request all courses except the August-updated one
requested_course_keys = {
str(self.course_updated_in_april.id),
str(self.course_updated_in_june.id),
str(self.course_updated_in_july.id),
}
admin.dump_courses(
modeladmin=modeladmin_mock,
request=mock.MagicMock(),
queryset=CourseOverview.objects.filter(id__in=requested_course_keys),
)
# User should have been messaged
assert modeladmin_mock.message_user.call_count == 1
assert modeladmin_mock.message_user.call_args.args[1] == (
"Enqueued dumps for 2 course(s). Skipped 1 unchanged course(s)."
)
# For enqueueing, graph should've been authenticated once, using configured settings.
assert mock_neo4j_graph.call_count == 1
assert mock_neo4j_graph.call_args.args == ()
assert mock_neo4j_graph.call_args.kwargs == _coursegraph_connection
# No errors should've been logged.
assert mock_log_exception.call_count == 0
# April course should have been skipped because the command was last run in May.
# Dumps for June and July courses should have been enqueued.
assert mock_apply_dump_task.call_count == 2
actual_dumped_course_keys = {
call_args.kwargs['kwargs']['course_key_string']
for call_args in mock_apply_dump_task.call_args_list
}
expected_dumped_course_keys = {
str(self.course_updated_in_june.id),
str(self.course_updated_in_july.id),
}
assert actual_dumped_course_keys == expected_dumped_course_keys
@_configure_coursegraph_connection
@_pretend_last_course_dump_was_may_2020
@_patch_neo4j_graph
@_patch_apply_dump_task
@_patch_log_exception
def test_dump_courses_overriding_cache(self, mock_log_exception, mock_apply_dump_task, mock_neo4j_graph):
"""
Test that dump_coursese_overriding_cach admin action dumps requested courses
whether or not they been modified since the last dump to coursegraph.
"""
modeladmin_mock = mock.MagicMock()
# Request all courses except the August-updated one
requested_course_keys = {
str(self.course_updated_in_april.id),
str(self.course_updated_in_june.id),
str(self.course_updated_in_july.id),
}
admin.dump_courses_overriding_cache(
modeladmin=modeladmin_mock,
request=mock.MagicMock(),
queryset=CourseOverview.objects.filter(id__in=requested_course_keys),
)
# User should have been messaged
assert modeladmin_mock.message_user.call_count == 1
assert modeladmin_mock.message_user.call_args.args[1] == (
"Enqueued dumps for 3 course(s)."
)
# For enqueueing, graph should've been authenticated once, using configured settings.
assert mock_neo4j_graph.call_count == 1
assert mock_neo4j_graph.call_args.args == ()
assert mock_neo4j_graph.call_args.kwargs == _coursegraph_connection
# No errors should've been logged.
assert mock_log_exception.call_count == 0
# April, June, and July courses should have all been dumped.
assert mock_apply_dump_task.call_count == 3
actual_dumped_course_keys = {
call_args.kwargs['kwargs']['course_key_string']
for call_args in mock_apply_dump_task.call_args_list
}
expected_dumped_course_keys = {
str(self.course_updated_in_april.id),
str(self.course_updated_in_june.id),
str(self.course_updated_in_july.id),
}
assert actual_dumped_course_keys == expected_dumped_course_keys
@_configure_coursegraph_connection
@_pretend_last_course_dump_was_may_2020
@_make_neo4j_graph_raise
@_patch_apply_dump_task
@_patch_log_exception
def test_dump_courses_error(self, mock_log_exception, mock_apply_dump_task, mock_neo4j_graph):
"""
Test that the dump_courses admin action dumps messages the user if an error
occurs when trying to enqueue course dumps.
"""
modeladmin_mock = mock.MagicMock()
# Request dump of all four courses.
admin.dump_courses(
modeladmin=modeladmin_mock,
request=mock.MagicMock(),
queryset=CourseOverview.objects.all()
)
# Admin user should have been messaged about failure.
assert modeladmin_mock.message_user.call_count == 1
assert modeladmin_mock.message_user.call_args.args[1] == (
"Error enqueueing dumps for 4 course(s): we failed to connect or something!"
)
# For enqueueing, graph should've been authenticated once, using configured settings.
assert mock_neo4j_graph.call_count == 1
assert mock_neo4j_graph.call_args.args == ()
assert mock_neo4j_graph.call_args.kwargs == _coursegraph_connection
# Exception should have been logged.
assert mock_log_exception.call_count == 1
assert "Failed to enqueue" in mock_log_exception.call_args.args[0]
# No courses should have been dumped.
assert mock_apply_dump_task.call_count == 0