From 01a0f6d6eee411935cd9178709c3dbe450a94bf4 Mon Sep 17 00:00:00 2001 From: Adam Palay Date: Wed, 31 Aug 2016 17:18:47 -0400 Subject: [PATCH] allow ability to specify which courses to write to neo4j --- .../management/commands/dump_to_neo4j.py | 153 ++++++++++++------ .../commands/tests/test_dump_to_neo4j.py | 127 ++++++++++----- 2 files changed, 191 insertions(+), 89 deletions(-) diff --git a/lms/djangoapps/courseware/management/commands/dump_to_neo4j.py b/lms/djangoapps/courseware/management/commands/dump_to_neo4j.py index 3de7b7862e..726af44a9e 100644 --- a/lms/djangoapps/courseware/management/commands/dump_to_neo4j.py +++ b/lms/djangoapps/courseware/management/commands/dump_to_neo4j.py @@ -13,6 +13,7 @@ from py2neo import Graph, Node, Relationship, authenticate from py2neo.compat import integer, string, unicode as neo4j_unicode from request_cache.middleware import RequestCache from xmodule.modulestore.django import modulestore +from opaque_keys.edx.keys import CourseKey log = logging.getLogger(__name__) @@ -30,8 +31,20 @@ class ModuleStoreSerializer(object): Class with functionality to serialize a modulestore into subgraphs, one graph per course. """ - def __init__(self): - self.all_courses = modulestore().get_course_summaries() + def load_course_keys(self, courses=None): + """ + Sets the object's course_keys attribute from the `courses` parameter. + If that parameter isn't furnished, loads all course_keys from the + modulestore. + :param courses: string serialization of course keys + """ + if courses: + course_keys = [CourseKey.from_string(course.strip()) for course in courses] + else: + course_keys = [ + course.id for course in modulestore().get_course_summaries() + ] + self.course_keys = course_keys @staticmethod def serialize_item(item): @@ -136,6 +149,79 @@ class ModuleStoreSerializer(object): return coerced_value + @staticmethod + def add_to_transaction(neo4j_entities, transaction): + """ + Args: + neo4j_entities: a list of Nodes or Relationships + transaction: a neo4j transaction + """ + for entity in neo4j_entities: + transaction.create(entity) + + + def dump_courses_to_neo4j(self, graph): + """ + Parameters + ---------- + graph: py2neo graph object + + Returns two lists: one of the courses that were successfully written + to neo4j, and one of courses that were not. + ------- + """ + total_number_of_courses = len(self.course_keys) + + successful_courses = [] + unsuccessful_courses = [] + + for index, course_key in enumerate(self.course_keys): + # first, clear the request cache to prevent memory leaks + RequestCache.clear_request_cache() + + log.info( + "Now exporting %s to neo4j: course %d of %d total courses", + course_key, + index + 1, + total_number_of_courses, + ) + nodes, relationships = self.serialize_course(course_key) + log.info( + "%d nodes and %d relationships in %s", + len(nodes), + len(relationships), + course_key, + ) + + transaction = graph.begin() + course_string = six.text_type(course_key) + try: + # first, delete existing course + transaction.run( + "MATCH (n:item) WHERE n.course_key='{}' DETACH DELETE n".format( + course_string + ) + ) + + # now, re-add it + self.add_to_transaction(nodes, transaction) + self.add_to_transaction(relationships, transaction) + transaction.commit() + + except Exception: # pylint: disable=broad-except + log.exception( + "Error trying to dump course %s to neo4j, rolling back", + course_string + ) + transaction.rollback() + unsuccessful_courses.append(course_string) + + else: + successful_courses.append(course_string) + + return successful_courses, unsuccessful_courses + + class Command(BaseCommand): """ Command to dump modulestore data to neo4j @@ -155,16 +241,7 @@ class Command(BaseCommand): parser.add_argument('--port', type=int) parser.add_argument('--user', type=unicode) parser.add_argument('--password', type=unicode) - - @staticmethod - def add_to_transaction(neo4j_entities, transaction): - """ - Args: - neo4j_entities: a list of Nodes or Relationships - transaction: a neo4j transaction - """ - for entity in neo4j_entities: - transaction.create(entity) + parser.add_argument('--courses', type=unicode, nargs='*') def handle(self, *args, **options): # pylint: disable=unused-argument """ @@ -192,44 +269,22 @@ class Command(BaseCommand): ) mss = ModuleStoreSerializer() + mss.load_course_keys(options['courses']) - total_number_of_courses = len(mss.all_courses) + successful_courses, unsuccessful_courses = mss.dump_courses_to_neo4j(graph) - for index, course in enumerate(mss.all_courses): - # first, clear the request cache to prevent memory leaks - RequestCache.clear_request_cache() - - log.info( - "Now exporting %s to neo4j: course %d of %d total courses", - course.id, - index + 1, - total_number_of_courses + if successful_courses: + print( + "These courses exported to neo4j successfully:\n\t" + + "\n\t".join(successful_courses) ) - nodes, relationships = mss.serialize_course(course.id) - log.info( - "%d nodes and %d relationships in %s", - len(nodes), - len(relationships), - course.id + else: + print("No courses exported to neo4j successfully.") + + if unsuccessful_courses: + print( + "These courses did not export to neo4j successfully:\n\t" + + "\n\t".join(unsuccessful_courses) ) - - transaction = graph.begin() - try: - # first, delete existing course - transaction.run( - "MATCH (n:item) WHERE n.course_key='{}' DETACH DELETE n".format( - six.text_type(course.id) - ) - ) - - # now, re-add it - self.add_to_transaction(nodes, transaction) - self.add_to_transaction(relationships, transaction) - transaction.commit() - - except Exception: # pylint: disable=broad-except - log.exception( - "Error trying to dump course %s to neo4j, rolling back", - six.text_type(course.id) - ) - transaction.rollback() + else: + print("All courses exported to neo4j successfully.") diff --git a/lms/djangoapps/courseware/management/commands/tests/test_dump_to_neo4j.py b/lms/djangoapps/courseware/management/commands/tests/test_dump_to_neo4j.py index 1afb2e39a7..c08b0af297 100644 --- a/lms/djangoapps/courseware/management/commands/tests/test_dump_to_neo4j.py +++ b/lms/djangoapps/courseware/management/commands/tests/test_dump_to_neo4j.py @@ -35,18 +35,46 @@ class TestDumpToNeo4jCommandBase(SharedModuleStoreTestCase): cls.course2 = CourseFactory.create() + cls.course_strings = [six.text_type(cls.course.id), six.text_type(cls.course2.id)] + @ddt.ddt class TestDumpToNeo4jCommand(TestDumpToNeo4jCommandBase): """ Tests for the dump to neo4j management command """ + @mock.patch('courseware.management.commands.dump_to_neo4j.Graph') - def test_dump_to_neo4j(self, mock_graph_class): + @ddt.data(1, 2) + def test_dump_specific_courses(self, number_of_courses, mock_graph_class): """ - Tests the dump_to_neo4j management command works against a mock - py2neo Graph + Test that you can specify which courses you want to dump. """ + + mock_graph = mock_graph_class.return_value + mock_transaction = mock.Mock() + mock_graph.begin.return_value = mock_transaction + + call_command( + 'dump_to_neo4j', + courses=self.course_strings[:number_of_courses], + host='mock_host', + port=7473, + user='mock_user', + password='mock_password', + ) + + self.assertEqual(mock_graph.begin.call_count, number_of_courses) + self.assertEqual(mock_transaction.commit.call_count, number_of_courses) + self.assertEqual(mock_transaction.commit.rollback.call_count, 0) + + @mock.patch('courseware.management.commands.dump_to_neo4j.Graph') + def test_dump_all_courses(self, mock_graph_class): + """ + Test if you don't specify which courses to dump, then you'll dump + all of them. + """ + mock_graph = mock_graph_class.return_value mock_transaction = mock.Mock() mock_graph.begin.return_value = mock_transaction @@ -61,35 +89,7 @@ class TestDumpToNeo4jCommand(TestDumpToNeo4jCommandBase): self.assertEqual(mock_graph.begin.call_count, 2) self.assertEqual(mock_transaction.commit.call_count, 2) - self.assertEqual(mock_transaction.rollback.call_count, 0) - - # 7 nodes + 9 relationships from the first course - # 2 nodes and no relationships from the second - self.assertEqual(mock_transaction.create.call_count, 18) - self.assertEqual(mock_transaction.run.call_count, 2) - - @mock.patch('courseware.management.commands.dump_to_neo4j.Graph') - def test_dump_to_neo4j_rollback(self, mock_graph_class): - """ - Tests that the management command handles the case where there's - an exception trying to write to the neo4j database. - """ - mock_graph = mock_graph_class.return_value - mock_transaction = mock.Mock() - mock_graph.begin.return_value = mock_transaction - mock_transaction.run.side_effect = ValueError('Something went wrong!') - - call_command( - 'dump_to_neo4j', - host='mock_host', - port=7473, - user='mock_user', - password='mock_password', - ) - - self.assertEqual(mock_graph.begin.call_count, 2) - self.assertEqual(mock_transaction.commit.call_count, 0) - self.assertEqual(mock_transaction.rollback.call_count, 2) + self.assertEqual(mock_transaction.commit.rollback.call_count, 0) @ddt.ddt @@ -97,15 +97,13 @@ class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase): """ Tests for the ModuleStoreSerializer """ - def setUp(self): - super(TestModuleStoreSerializer, self).setUp() - self.modulestore_serializer = ModuleStoreSerializer() - def test_serialize_item(self): """ Tests the serialize_item method. """ - fields, label = self.modulestore_serializer.serialize_item(self.course) + mss = ModuleStoreSerializer() + mss.load_course_keys() + fields, label = mss.serialize_item(self.course) self.assertEqual(label, "course") self.assertIn("edited_on", fields.keys()) self.assertIn("display_name", fields.keys()) @@ -119,7 +117,9 @@ class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase): """ Tests the serialize_course method. """ - nodes, relationships = self.modulestore_serializer.serialize_course( + mss = ModuleStoreSerializer() + mss.load_course_keys() + nodes, relationships = mss.serialize_course( self.course.id ) self.assertEqual(len(nodes), 9) @@ -135,7 +135,7 @@ class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase): # each element in the iterable is not unicode: self.assertFalse(any(isinstance(tab, six.text_type) for tab in example_iterable)) # but after they are coerced, they are: - coerced = self.modulestore_serializer.coerce_types(example_iterable) + coerced = ModuleStoreSerializer().coerce_types(example_iterable) self.assertTrue(all(isinstance(tab, six.text_type) for tab in coerced)) # finally, make sure we haven't changed the type: self.assertEqual(type(coerced), iterable_type) @@ -154,5 +154,52 @@ class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase): """ Tests the coerce_types helper for the neo4j base types """ - coerced_value = self.modulestore_serializer.coerce_types(original_value) + coerced_value = ModuleStoreSerializer().coerce_types(original_value) self.assertEqual(coerced_value, coerced_expected) + + def test_dump_to_neo4j(self): + """ + Tests the dump_to_neo4j method works against a mock + py2neo Graph + """ + mock_graph = mock.Mock() + mock_transaction = mock.Mock() + mock_graph.begin.return_value = mock_transaction + + mss = ModuleStoreSerializer() + mss.load_course_keys() + + successful, unsuccessful = mss.dump_courses_to_neo4j(mock_graph) + + self.assertEqual(mock_graph.begin.call_count, 2) + self.assertEqual(mock_transaction.commit.call_count, 2) + self.assertEqual(mock_transaction.rollback.call_count, 0) + + # 7 nodes + 9 relationships from the first course + # 2 nodes and no relationships from the second + self.assertEqual(mock_transaction.create.call_count, 18) + self.assertEqual(mock_transaction.run.call_count, 2) + + self.assertEqual(len(unsuccessful), 0) + self.assertItemsEqual(successful, self.course_strings) + + def test_dump_to_neo4j_rollback(self): + """ + Tests that the the dump_to_neo4j method handles the case where there's + an exception trying to write to the neo4j database. + """ + mock_graph = mock.Mock() + mock_transaction = mock.Mock() + mock_graph.begin.return_value = mock_transaction + mock_transaction.run.side_effect = ValueError('Something went wrong!') + + mss = ModuleStoreSerializer() + mss.load_course_keys() + successful, unsuccessful = mss.dump_courses_to_neo4j(mock_graph) + + self.assertEqual(mock_graph.begin.call_count, 2) + self.assertEqual(mock_transaction.commit.call_count, 0) + self.assertEqual(mock_transaction.rollback.call_count, 2) + + self.assertEqual(len(successful), 0) + self.assertItemsEqual(unsuccessful, self.course_strings)