allow ability to specify which courses to write to neo4j

This commit is contained in:
Adam Palay
2016-08-31 17:18:47 -04:00
parent cf2c48ab8f
commit 01a0f6d6ee
2 changed files with 191 additions and 89 deletions

View File

@@ -13,6 +13,7 @@ from py2neo import Graph, Node, Relationship, authenticate
from py2neo.compat import integer, string, unicode as neo4j_unicode
from request_cache.middleware import RequestCache
from xmodule.modulestore.django import modulestore
from opaque_keys.edx.keys import CourseKey
log = logging.getLogger(__name__)
@@ -30,8 +31,20 @@ class ModuleStoreSerializer(object):
Class with functionality to serialize a modulestore into subgraphs,
one graph per course.
"""
def __init__(self):
self.all_courses = modulestore().get_course_summaries()
def load_course_keys(self, courses=None):
"""
Sets the object's course_keys attribute from the `courses` parameter.
If that parameter isn't furnished, loads all course_keys from the
modulestore.
:param courses: string serialization of course keys
"""
if courses:
course_keys = [CourseKey.from_string(course.strip()) for course in courses]
else:
course_keys = [
course.id for course in modulestore().get_course_summaries()
]
self.course_keys = course_keys
@staticmethod
def serialize_item(item):
@@ -136,6 +149,79 @@ class ModuleStoreSerializer(object):
return coerced_value
@staticmethod
def add_to_transaction(neo4j_entities, transaction):
"""
Args:
neo4j_entities: a list of Nodes or Relationships
transaction: a neo4j transaction
"""
for entity in neo4j_entities:
transaction.create(entity)
def dump_courses_to_neo4j(self, graph):
"""
Parameters
----------
graph: py2neo graph object
Returns two lists: one of the courses that were successfully written
to neo4j, and one of courses that were not.
-------
"""
total_number_of_courses = len(self.course_keys)
successful_courses = []
unsuccessful_courses = []
for index, course_key in enumerate(self.course_keys):
# first, clear the request cache to prevent memory leaks
RequestCache.clear_request_cache()
log.info(
"Now exporting %s to neo4j: course %d of %d total courses",
course_key,
index + 1,
total_number_of_courses,
)
nodes, relationships = self.serialize_course(course_key)
log.info(
"%d nodes and %d relationships in %s",
len(nodes),
len(relationships),
course_key,
)
transaction = graph.begin()
course_string = six.text_type(course_key)
try:
# first, delete existing course
transaction.run(
"MATCH (n:item) WHERE n.course_key='{}' DETACH DELETE n".format(
course_string
)
)
# now, re-add it
self.add_to_transaction(nodes, transaction)
self.add_to_transaction(relationships, transaction)
transaction.commit()
except Exception: # pylint: disable=broad-except
log.exception(
"Error trying to dump course %s to neo4j, rolling back",
course_string
)
transaction.rollback()
unsuccessful_courses.append(course_string)
else:
successful_courses.append(course_string)
return successful_courses, unsuccessful_courses
class Command(BaseCommand):
"""
Command to dump modulestore data to neo4j
@@ -155,16 +241,7 @@ class Command(BaseCommand):
parser.add_argument('--port', type=int)
parser.add_argument('--user', type=unicode)
parser.add_argument('--password', type=unicode)
@staticmethod
def add_to_transaction(neo4j_entities, transaction):
"""
Args:
neo4j_entities: a list of Nodes or Relationships
transaction: a neo4j transaction
"""
for entity in neo4j_entities:
transaction.create(entity)
parser.add_argument('--courses', type=unicode, nargs='*')
def handle(self, *args, **options): # pylint: disable=unused-argument
"""
@@ -192,44 +269,22 @@ class Command(BaseCommand):
)
mss = ModuleStoreSerializer()
mss.load_course_keys(options['courses'])
total_number_of_courses = len(mss.all_courses)
successful_courses, unsuccessful_courses = mss.dump_courses_to_neo4j(graph)
for index, course in enumerate(mss.all_courses):
# first, clear the request cache to prevent memory leaks
RequestCache.clear_request_cache()
log.info(
"Now exporting %s to neo4j: course %d of %d total courses",
course.id,
index + 1,
total_number_of_courses
if successful_courses:
print(
"These courses exported to neo4j successfully:\n\t" +
"\n\t".join(successful_courses)
)
nodes, relationships = mss.serialize_course(course.id)
log.info(
"%d nodes and %d relationships in %s",
len(nodes),
len(relationships),
course.id
else:
print("No courses exported to neo4j successfully.")
if unsuccessful_courses:
print(
"These courses did not export to neo4j successfully:\n\t" +
"\n\t".join(unsuccessful_courses)
)
transaction = graph.begin()
try:
# first, delete existing course
transaction.run(
"MATCH (n:item) WHERE n.course_key='{}' DETACH DELETE n".format(
six.text_type(course.id)
)
)
# now, re-add it
self.add_to_transaction(nodes, transaction)
self.add_to_transaction(relationships, transaction)
transaction.commit()
except Exception: # pylint: disable=broad-except
log.exception(
"Error trying to dump course %s to neo4j, rolling back",
six.text_type(course.id)
)
transaction.rollback()
else:
print("All courses exported to neo4j successfully.")

View File

@@ -35,18 +35,46 @@ class TestDumpToNeo4jCommandBase(SharedModuleStoreTestCase):
cls.course2 = CourseFactory.create()
cls.course_strings = [six.text_type(cls.course.id), six.text_type(cls.course2.id)]
@ddt.ddt
class TestDumpToNeo4jCommand(TestDumpToNeo4jCommandBase):
"""
Tests for the dump to neo4j management command
"""
@mock.patch('courseware.management.commands.dump_to_neo4j.Graph')
def test_dump_to_neo4j(self, mock_graph_class):
@ddt.data(1, 2)
def test_dump_specific_courses(self, number_of_courses, mock_graph_class):
"""
Tests the dump_to_neo4j management command works against a mock
py2neo Graph
Test that you can specify which courses you want to dump.
"""
mock_graph = mock_graph_class.return_value
mock_transaction = mock.Mock()
mock_graph.begin.return_value = mock_transaction
call_command(
'dump_to_neo4j',
courses=self.course_strings[:number_of_courses],
host='mock_host',
port=7473,
user='mock_user',
password='mock_password',
)
self.assertEqual(mock_graph.begin.call_count, number_of_courses)
self.assertEqual(mock_transaction.commit.call_count, number_of_courses)
self.assertEqual(mock_transaction.commit.rollback.call_count, 0)
@mock.patch('courseware.management.commands.dump_to_neo4j.Graph')
def test_dump_all_courses(self, mock_graph_class):
"""
Test if you don't specify which courses to dump, then you'll dump
all of them.
"""
mock_graph = mock_graph_class.return_value
mock_transaction = mock.Mock()
mock_graph.begin.return_value = mock_transaction
@@ -61,35 +89,7 @@ class TestDumpToNeo4jCommand(TestDumpToNeo4jCommandBase):
self.assertEqual(mock_graph.begin.call_count, 2)
self.assertEqual(mock_transaction.commit.call_count, 2)
self.assertEqual(mock_transaction.rollback.call_count, 0)
# 7 nodes + 9 relationships from the first course
# 2 nodes and no relationships from the second
self.assertEqual(mock_transaction.create.call_count, 18)
self.assertEqual(mock_transaction.run.call_count, 2)
@mock.patch('courseware.management.commands.dump_to_neo4j.Graph')
def test_dump_to_neo4j_rollback(self, mock_graph_class):
"""
Tests that the management command handles the case where there's
an exception trying to write to the neo4j database.
"""
mock_graph = mock_graph_class.return_value
mock_transaction = mock.Mock()
mock_graph.begin.return_value = mock_transaction
mock_transaction.run.side_effect = ValueError('Something went wrong!')
call_command(
'dump_to_neo4j',
host='mock_host',
port=7473,
user='mock_user',
password='mock_password',
)
self.assertEqual(mock_graph.begin.call_count, 2)
self.assertEqual(mock_transaction.commit.call_count, 0)
self.assertEqual(mock_transaction.rollback.call_count, 2)
self.assertEqual(mock_transaction.commit.rollback.call_count, 0)
@ddt.ddt
@@ -97,15 +97,13 @@ class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase):
"""
Tests for the ModuleStoreSerializer
"""
def setUp(self):
super(TestModuleStoreSerializer, self).setUp()
self.modulestore_serializer = ModuleStoreSerializer()
def test_serialize_item(self):
"""
Tests the serialize_item method.
"""
fields, label = self.modulestore_serializer.serialize_item(self.course)
mss = ModuleStoreSerializer()
mss.load_course_keys()
fields, label = mss.serialize_item(self.course)
self.assertEqual(label, "course")
self.assertIn("edited_on", fields.keys())
self.assertIn("display_name", fields.keys())
@@ -119,7 +117,9 @@ class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase):
"""
Tests the serialize_course method.
"""
nodes, relationships = self.modulestore_serializer.serialize_course(
mss = ModuleStoreSerializer()
mss.load_course_keys()
nodes, relationships = mss.serialize_course(
self.course.id
)
self.assertEqual(len(nodes), 9)
@@ -135,7 +135,7 @@ class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase):
# each element in the iterable is not unicode:
self.assertFalse(any(isinstance(tab, six.text_type) for tab in example_iterable))
# but after they are coerced, they are:
coerced = self.modulestore_serializer.coerce_types(example_iterable)
coerced = ModuleStoreSerializer().coerce_types(example_iterable)
self.assertTrue(all(isinstance(tab, six.text_type) for tab in coerced))
# finally, make sure we haven't changed the type:
self.assertEqual(type(coerced), iterable_type)
@@ -154,5 +154,52 @@ class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase):
"""
Tests the coerce_types helper for the neo4j base types
"""
coerced_value = self.modulestore_serializer.coerce_types(original_value)
coerced_value = ModuleStoreSerializer().coerce_types(original_value)
self.assertEqual(coerced_value, coerced_expected)
def test_dump_to_neo4j(self):
"""
Tests the dump_to_neo4j method works against a mock
py2neo Graph
"""
mock_graph = mock.Mock()
mock_transaction = mock.Mock()
mock_graph.begin.return_value = mock_transaction
mss = ModuleStoreSerializer()
mss.load_course_keys()
successful, unsuccessful = mss.dump_courses_to_neo4j(mock_graph)
self.assertEqual(mock_graph.begin.call_count, 2)
self.assertEqual(mock_transaction.commit.call_count, 2)
self.assertEqual(mock_transaction.rollback.call_count, 0)
# 7 nodes + 9 relationships from the first course
# 2 nodes and no relationships from the second
self.assertEqual(mock_transaction.create.call_count, 18)
self.assertEqual(mock_transaction.run.call_count, 2)
self.assertEqual(len(unsuccessful), 0)
self.assertItemsEqual(successful, self.course_strings)
def test_dump_to_neo4j_rollback(self):
"""
Tests that the the dump_to_neo4j method handles the case where there's
an exception trying to write to the neo4j database.
"""
mock_graph = mock.Mock()
mock_transaction = mock.Mock()
mock_graph.begin.return_value = mock_transaction
mock_transaction.run.side_effect = ValueError('Something went wrong!')
mss = ModuleStoreSerializer()
mss.load_course_keys()
successful, unsuccessful = mss.dump_courses_to_neo4j(mock_graph)
self.assertEqual(mock_graph.begin.call_count, 2)
self.assertEqual(mock_transaction.commit.call_count, 0)
self.assertEqual(mock_transaction.rollback.call_count, 2)
self.assertEqual(len(successful), 0)
self.assertItemsEqual(unsuccessful, self.course_strings)