diff --git a/cms/envs/common.py b/cms/envs/common.py index 6f4725f805..9c54fac20d 100644 --- a/cms/envs/common.py +++ b/cms/envs/common.py @@ -878,6 +878,9 @@ INSTALLED_APPS = ( 'openedx.core.djangoapps.content.course_structures.apps.CourseStructuresConfig', 'openedx.core.djangoapps.content.block_structure.apps.BlockStructureConfig', + # Coursegraph + 'openedx.core.djangoapps.coursegraph.apps.CoursegraphConfig', + # Credit courses 'openedx.core.djangoapps.credit', diff --git a/lms/envs/common.py b/lms/envs/common.py index 0f27c61456..bfeb9a3036 100644 --- a/lms/envs/common.py +++ b/lms/envs/common.py @@ -2034,6 +2034,9 @@ INSTALLED_APPS = ( 'openedx.core.djangoapps.content.block_structure.apps.BlockStructureConfig', 'lms.djangoapps.course_blocks', + # Coursegraph + 'openedx.core.djangoapps.coursegraph.apps.CoursegraphConfig', + # Old course structure API 'course_structure_api', diff --git a/openedx/core/djangoapps/coursegraph/__init__.py b/openedx/core/djangoapps/coursegraph/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/openedx/core/djangoapps/coursegraph/apps.py b/openedx/core/djangoapps/coursegraph/apps.py new file mode 100644 index 0000000000..1c49ead924 --- /dev/null +++ b/openedx/core/djangoapps/coursegraph/apps.py @@ -0,0 +1,20 @@ +""" +Coursegraph Application Configuration + +Signal handlers are connected here. +""" + +from django.apps import AppConfig + + +class CoursegraphConfig(AppConfig): + """ + AppConfig for courseware app + """ + name = 'openedx.core.djangoapps.coursegraph' + + def ready(self): + """ + Import signals on startup + """ + from openedx.core.djangoapps.coursegraph import signals # pylint: disable=unused-variable diff --git a/openedx/core/djangoapps/coursegraph/management/__init__.py b/openedx/core/djangoapps/coursegraph/management/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/openedx/core/djangoapps/coursegraph/management/commands/__init__.py b/openedx/core/djangoapps/coursegraph/management/commands/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/lms/djangoapps/courseware/management/commands/dump_to_neo4j.py b/openedx/core/djangoapps/coursegraph/management/commands/dump_to_neo4j.py similarity index 71% rename from lms/djangoapps/courseware/management/commands/dump_to_neo4j.py rename to openedx/core/djangoapps/coursegraph/management/commands/dump_to_neo4j.py index 726af44a9e..65d9fe30a5 100644 --- a/lms/djangoapps/courseware/management/commands/dump_to_neo4j.py +++ b/openedx/core/djangoapps/coursegraph/management/commands/dump_to_neo4j.py @@ -2,18 +2,23 @@ This file contains a management command for exporting the modulestore to neo4j, a graph database. """ -from __future__ import unicode_literals +from __future__ import unicode_literals, print_function import logging -from django.conf import settings -from django.core.management.base import BaseCommand, CommandError +from django.core.management.base import BaseCommand from django.utils import six +from opaque_keys.edx.keys import CourseKey from py2neo import Graph, Node, Relationship, authenticate from py2neo.compat import integer, string, unicode as neo4j_unicode from request_cache.middleware import RequestCache from xmodule.modulestore.django import modulestore -from opaque_keys.edx.keys import CourseKey + +from openedx.core.djangoapps.coursegraph.utils import ( + CommandLastRunCache, + CourseLastPublishedCache, +) + log = logging.getLogger(__name__) @@ -25,13 +30,17 @@ bolt_log.setLevel(logging.ERROR) ITERABLE_NEO4J_TYPES = (tuple, list, set, frozenset) PRIMITIVE_NEO4J_TYPES = (integer, string, neo4j_unicode, float, bool) +COMMAND_LAST_RUN_CACHE = CommandLastRunCache() +COURSE_LAST_PUBLISHED_CACHE = CourseLastPublishedCache() + class ModuleStoreSerializer(object): """ Class with functionality to serialize a modulestore into subgraphs, one graph per course. """ - def load_course_keys(self, courses=None): + + def __init__(self, courses=None): """ Sets the object's course_keys attribute from the `courses` parameter. If that parameter isn't furnished, loads all course_keys from the @@ -148,7 +157,6 @@ class ModuleStoreSerializer(object): return coerced_value - @staticmethod def add_to_transaction(neo4j_entities, transaction): """ @@ -159,12 +167,40 @@ class ModuleStoreSerializer(object): for entity in neo4j_entities: transaction.create(entity) + @staticmethod + def should_dump_course(course_key): + """ + Only dump the course if it's been changed since the last time it's been + dumped. + :param course_key: a CourseKey object. + :return: bool. Whether or not this course should be dumped to neo4j. + """ - def dump_courses_to_neo4j(self, graph): + last_this_command_was_run = COMMAND_LAST_RUN_CACHE.get(course_key) + last_course_had_published_event = COURSE_LAST_PUBLISHED_CACHE.get( + course_key + ) + + # if we have no record of this course being serialized, serialize it + if last_this_command_was_run is None: + return True + + # if we've serialized the course recently and we have no published + # events, we can skip re-serializing it + if last_this_command_was_run and last_course_had_published_event is None: + return False + + # otherwise, serialize if the command was run before the course's last + # published event + return last_this_command_was_run < last_course_had_published_event + + def dump_courses_to_neo4j(self, graph, override_cache=False): """ Parameters ---------- graph: py2neo graph object + override_cache: serialize the courses even if they'be been recently + serialized Returns two lists: one of the courses that were successfully written to neo4j, and one of courses that were not. @@ -185,6 +221,11 @@ class ModuleStoreSerializer(object): index + 1, total_number_of_courses, ) + + if not (override_cache or self.should_dump_course(course_key)): + log.info("skipping dumping %s, since it hasn't changed", course_key) + continue + nodes, relationships = self.serialize_course(course_key) log.info( "%d nodes and %d relationships in %s", @@ -217,6 +258,7 @@ class ModuleStoreSerializer(object): unsuccessful_courses.append(course_string) else: + COMMAND_LAST_RUN_CACHE.set(course_key) successful_courses.append(course_string) return successful_courses, unsuccessful_courses @@ -228,20 +270,34 @@ class Command(BaseCommand): Takes the following named arguments: host: the host of the neo4j server - port: the port on the server that accepts https requests + https_port: the port on the neo4j server that accepts https requests + http_port: the port on the neo4j server that accepts http requests + secure: if set, connects to server over https, otherwise uses http user: the username for the neo4j user password: the user's password + courses: list of course key strings to serialize. If not specified, all + courses in the modulestore are serialized. + override: if true, dump all--or all specified--courses, regardless of when + they were last dumped. If false, or not set, only dump those courses that + were updated since the last time the command was run. Example usage: - python manage.py lms dump_to_neo4j --host localhost --port 7473 \ - --user user --password password --settings=aws + python manage.py lms dump_to_neo4j --host localhost --https_port 7473 \ + --secure --user user --password password --settings=aws """ def add_arguments(self, parser): parser.add_argument('--host', type=unicode) - parser.add_argument('--port', type=int) + parser.add_argument('--https_port', type=int, default=7473) + parser.add_argument('--http_port', type=int, default=7474) + parser.add_argument('--secure', action='store_true') parser.add_argument('--user', type=unicode) parser.add_argument('--password', type=unicode) parser.add_argument('--courses', type=unicode, nargs='*') + parser.add_argument( + '--override', + action='store_true', + help='dump all--or all specified--courses, ignoring cache', + ) def handle(self, *args, **options): # pylint: disable=unused-argument """ @@ -249,12 +305,14 @@ class Command(BaseCommand): those graphs to neo4j. """ host = options['host'] - port = options['port'] + https_port = options['https_port'] + http_port = options['http_port'] + secure = options['secure'] neo4j_user = options['user'] neo4j_password = options['password'] authenticate( - "{host}:{port}".format(host=host, port=port), + "{host}:{port}".format(host=host, port=https_port if secure else http_port), neo4j_user, neo4j_password, ) @@ -263,20 +321,26 @@ class Command(BaseCommand): bolt=True, password=neo4j_password, user=neo4j_user, - https_port=port, + https_port=https_port, + http_port=http_port, host=host, - secure=True + secure=secure, ) - mss = ModuleStoreSerializer() - mss.load_course_keys(options['courses']) + mss = ModuleStoreSerializer(options['courses']) - successful_courses, unsuccessful_courses = mss.dump_courses_to_neo4j(graph) + successful_courses, unsuccessful_courses = mss.dump_courses_to_neo4j( + graph, override_cache=options['override'] + ) + + if not successful_courses and not unsuccessful_courses: + print("No courses exported to neo4j at all!") + return if successful_courses: print( "These courses exported to neo4j successfully:\n\t" + - "\n\t".join(successful_courses) + "\n\t".join(successful_courses) ) else: print("No courses exported to neo4j successfully.") diff --git a/openedx/core/djangoapps/coursegraph/management/commands/tests/__init__.py b/openedx/core/djangoapps/coursegraph/management/commands/tests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/lms/djangoapps/courseware/management/commands/tests/test_dump_to_neo4j.py b/openedx/core/djangoapps/coursegraph/management/commands/tests/test_dump_to_neo4j.py similarity index 62% rename from lms/djangoapps/courseware/management/commands/tests/test_dump_to_neo4j.py rename to openedx/core/djangoapps/coursegraph/management/commands/tests/test_dump_to_neo4j.py index c08b0af297..275e54cd1d 100644 --- a/lms/djangoapps/courseware/management/commands/tests/test_dump_to_neo4j.py +++ b/openedx/core/djangoapps/coursegraph/management/commands/tests/test_dump_to_neo4j.py @@ -4,18 +4,21 @@ Tests for the dump_to_neo4j management command. """ from __future__ import unicode_literals +from datetime import datetime + import ddt import mock -from courseware.management.commands.dump_to_neo4j import ( - ModuleStoreSerializer, - ITERABLE_NEO4J_TYPES, -) from django.core.management import call_command -from django.core.management.base import CommandError from django.utils import six from xmodule.modulestore.tests.django_utils import SharedModuleStoreTestCase from xmodule.modulestore.tests.factories import CourseFactory, ItemFactory +from openedx.core.djangoapps.coursegraph.management.commands.dump_to_neo4j import ( + ModuleStoreSerializer, + ITERABLE_NEO4J_TYPES, +) +from openedx.core.djangoapps.coursegraph.signals import _listen_for_course_publish + class TestDumpToNeo4jCommandBase(SharedModuleStoreTestCase): """ @@ -44,7 +47,7 @@ class TestDumpToNeo4jCommand(TestDumpToNeo4jCommandBase): Tests for the dump to neo4j management command """ - @mock.patch('courseware.management.commands.dump_to_neo4j.Graph') + @mock.patch('openedx.core.djangoapps.coursegraph.management.commands.dump_to_neo4j.Graph') @ddt.data(1, 2) def test_dump_specific_courses(self, number_of_courses, mock_graph_class): """ @@ -59,7 +62,7 @@ class TestDumpToNeo4jCommand(TestDumpToNeo4jCommandBase): 'dump_to_neo4j', courses=self.course_strings[:number_of_courses], host='mock_host', - port=7473, + http_port=7474, user='mock_user', password='mock_password', ) @@ -68,7 +71,7 @@ class TestDumpToNeo4jCommand(TestDumpToNeo4jCommandBase): self.assertEqual(mock_transaction.commit.call_count, number_of_courses) self.assertEqual(mock_transaction.commit.rollback.call_count, 0) - @mock.patch('courseware.management.commands.dump_to_neo4j.Graph') + @mock.patch('openedx.core.djangoapps.coursegraph.management.commands.dump_to_neo4j.Graph') def test_dump_all_courses(self, mock_graph_class): """ Test if you don't specify which courses to dump, then you'll dump @@ -82,7 +85,7 @@ class TestDumpToNeo4jCommand(TestDumpToNeo4jCommandBase): call_command( 'dump_to_neo4j', host='mock_host', - port=7473, + http_port=7474, user='mock_user', password='mock_password', ) @@ -97,13 +100,17 @@ class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase): """ Tests for the ModuleStoreSerializer """ + @classmethod + def setUpClass(cls): + """Any ModuleStore course/content operations can go here.""" + super(TestModuleStoreSerializer, cls).setUpClass() + cls.mss = ModuleStoreSerializer() + def test_serialize_item(self): """ Tests the serialize_item method. """ - mss = ModuleStoreSerializer() - mss.load_course_keys() - fields, label = mss.serialize_item(self.course) + fields, label = self.mss.serialize_item(self.course) self.assertEqual(label, "course") self.assertIn("edited_on", fields.keys()) self.assertIn("display_name", fields.keys()) @@ -117,9 +124,7 @@ class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase): """ Tests the serialize_course method. """ - mss = ModuleStoreSerializer() - mss.load_course_keys() - nodes, relationships = mss.serialize_course( + nodes, relationships = self.mss.serialize_course( self.course.id ) self.assertEqual(len(nodes), 9) @@ -154,7 +159,7 @@ class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase): """ Tests the coerce_types helper for the neo4j base types """ - coerced_value = ModuleStoreSerializer().coerce_types(original_value) + coerced_value = self.mss.coerce_types(original_value) self.assertEqual(coerced_value, coerced_expected) def test_dump_to_neo4j(self): @@ -166,10 +171,7 @@ class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase): mock_transaction = mock.Mock() mock_graph.begin.return_value = mock_transaction - mss = ModuleStoreSerializer() - mss.load_course_keys() - - successful, unsuccessful = mss.dump_courses_to_neo4j(mock_graph) + successful, unsuccessful = self.mss.dump_courses_to_neo4j(mock_graph) self.assertEqual(mock_graph.begin.call_count, 2) self.assertEqual(mock_transaction.commit.call_count, 2) @@ -193,9 +195,7 @@ class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase): mock_graph.begin.return_value = mock_transaction mock_transaction.run.side_effect = ValueError('Something went wrong!') - mss = ModuleStoreSerializer() - mss.load_course_keys() - successful, unsuccessful = mss.dump_courses_to_neo4j(mock_graph) + successful, unsuccessful = self.mss.dump_courses_to_neo4j(mock_graph) self.assertEqual(mock_graph.begin.call_count, 2) self.assertEqual(mock_transaction.commit.call_count, 0) @@ -203,3 +203,76 @@ class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase): self.assertEqual(len(successful), 0) self.assertItemsEqual(unsuccessful, self.course_strings) + + @ddt.data( + (True, 2), + (False, 0), + ) + @ddt.unpack + def test_dump_to_neo4j_cache(self, override_cache, expected_number_courses): + """ + Tests the caching mechanism and override to make sure we only publish + recently updated courses. + """ + mock_graph = mock.Mock() + + # run once to warm the cache + successful, unsuccessful = self.mss.dump_courses_to_neo4j(mock_graph) + self.assertEqual(len(successful + unsuccessful), len(self.course_strings)) + + # when run the second time, only dump courses if the cache override + # is enabled + successful, unsuccessful = self.mss.dump_courses_to_neo4j( + mock_graph, override_cache=override_cache + ) + self.assertEqual(len(successful + unsuccessful), expected_number_courses) + + def test_dump_to_neo4j_published(self): + """ + Tests that we only dump those courses that have been published after + the last time the command was been run. + """ + mock_graph = mock.Mock() + + # run once to warm the cache + successful, unsuccessful = self.mss.dump_courses_to_neo4j(mock_graph) + self.assertEqual(len(successful + unsuccessful), len(self.course_strings)) + + # simulate one of the courses being published + _listen_for_course_publish(None, self.course.id) + + # make sure only the published course was dumped + successful, unsuccessful = self.mss.dump_courses_to_neo4j(mock_graph) + self.assertEqual(len(unsuccessful), 0) + self.assertEqual(len(successful), 1) + self.assertEqual(successful[0], unicode(self.course.id)) + + @ddt.data( + (datetime(2016, 3, 30), datetime(2016, 3, 31), True), + (datetime(2016, 3, 31), datetime(2016, 3, 30), False), + (datetime(2016, 3, 31), None, False), + (None, datetime(2016, 3, 30), True), + (None, None, True), + ) + @ddt.unpack + @mock.patch('openedx.core.djangoapps.coursegraph.management.commands.dump_to_neo4j.COMMAND_LAST_RUN_CACHE') + @mock.patch('openedx.core.djangoapps.coursegraph.management.commands.dump_to_neo4j.COURSE_LAST_PUBLISHED_CACHE') + def test_should_dump_course( + self, + last_command_run, + last_course_published, + should_dump, + mock_course_last_published_cache, + mock_command_last_run_cache, + ): + """ + Tests whether a course should be dumped given the last time it was + dumped and the last time it was published. + """ + mock_command_last_run_cache.get.return_value = last_command_run + mock_course_last_published_cache.get.return_value = last_course_published + mock_course_key = mock.Mock + self.assertEqual( + self.mss.should_dump_course(mock_course_key), + should_dump + ) diff --git a/openedx/core/djangoapps/coursegraph/signals.py b/openedx/core/djangoapps/coursegraph/signals.py new file mode 100644 index 0000000000..19522b5eb8 --- /dev/null +++ b/openedx/core/djangoapps/coursegraph/signals.py @@ -0,0 +1,15 @@ +""" +Signal handlers for the CourseGraph application +""" +from django.dispatch.dispatcher import receiver +from xmodule.modulestore.django import SignalHandler + +from openedx.core.djangoapps.coursegraph.utils import CourseLastPublishedCache + + +@receiver(SignalHandler.course_published) +def _listen_for_course_publish(sender, course_key, **kwargs): # pylint: disable=unused-argument + """ + Register when the course was published on a course publish event + """ + CourseLastPublishedCache().set(course_key) diff --git a/openedx/core/djangoapps/coursegraph/tests/__init__.py b/openedx/core/djangoapps/coursegraph/tests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/openedx/core/djangoapps/coursegraph/tests/test_signals.py b/openedx/core/djangoapps/coursegraph/tests/test_signals.py new file mode 100644 index 0000000000..838405e468 --- /dev/null +++ b/openedx/core/djangoapps/coursegraph/tests/test_signals.py @@ -0,0 +1,27 @@ +""" +Tests for coursegraph's signal handler on course publish +""" +from __future__ import unicode_literals + +from opaque_keys.edx.keys import CourseKey + +from openedx.core.djangoapps.coursegraph.signals import _listen_for_course_publish +from openedx.core.djangoapps.coursegraph.utils import CourseLastPublishedCache +from openedx.core.djangolib.testing.utils import CacheIsolationTestCase + + +class TestCourseGraphSignalHandler(CacheIsolationTestCase): + """ + Tests for the course publish course handler + """ + ENABLED_CACHES = ['default'] + + def test_cache_set_on_course_publish(self): + """ + Tests that the last published cache is set on course publish + """ + course_key = CourseKey.from_string('course-v1:org+course+run') + last_published_cache = CourseLastPublishedCache() + self.assertIsNone(last_published_cache.get(course_key)) + _listen_for_course_publish(None, course_key) + self.assertIsNotNone(last_published_cache.get(course_key)) diff --git a/openedx/core/djangoapps/coursegraph/utils.py b/openedx/core/djangoapps/coursegraph/utils.py new file mode 100644 index 0000000000..d55743f924 --- /dev/null +++ b/openedx/core/djangoapps/coursegraph/utils.py @@ -0,0 +1,52 @@ +""" +Helpers for the CourseGraph app +""" +from django.core.cache import cache +from django.utils import timezone + + +class TimeRecordingCacheBase(object): + """ + A base class for caching the current time for some key. + """ + # cache_prefix should be defined in children classes + cache_prefix = None + _cache = cache + + def _key(self, course_key): + """ + Make a cache key from the prefix and a course_key + :param course_key: CourseKey object + :return: a cache key + """ + return self.cache_prefix + unicode(course_key) + + def get(self, course_key): + """ + Gets the time value associated with the CourseKey. + :param course_key: a CourseKey object. + :return: the time the key was last set. + """ + return self._cache.get(self._key(course_key)) + + def set(self, course_key): + """ + Sets the current time for a CourseKey key. + :param course_key: a CourseKey object. + """ + return self._cache.set(self._key(course_key), timezone.now()) + + +class CourseLastPublishedCache(TimeRecordingCacheBase): + """ + Used to record the last time that a course had a publish event run on it. + """ + cache_prefix = u'course_last_published' + + +class CommandLastRunCache(TimeRecordingCacheBase): + """ + Used to record the last time that the dump_to_neo4j command was run on a + course. + """ + cache_prefix = u'dump_to_neo4j_command_last_run'