add precedes relationship to coursegraph (EDUCATOR-484)
This commit is contained in:
@@ -33,16 +33,25 @@ class ModuleStoreSerializer(object):
|
||||
one graph per course.
|
||||
"""
|
||||
|
||||
def __init__(self, courses=None, skip=None):
|
||||
def __init__(self, course_keys):
|
||||
self.course_keys = course_keys
|
||||
|
||||
@classmethod
|
||||
def create(cls, courses=None, skip=None):
|
||||
"""
|
||||
Sets the object's course_keys attribute from the `courses` parameter.
|
||||
If that parameter isn't furnished, loads all course_keys from the
|
||||
modulestore.
|
||||
|
||||
Filters out course_keys in the `skip` parameter, if provided.
|
||||
Args:
|
||||
|
||||
Arguments:
|
||||
courses: A list of string serializations of course keys.
|
||||
For example, ["course-v1:org+course+run"].
|
||||
skip: Also a list of string serializations of course keys.
|
||||
|
||||
Returns:
|
||||
a ModulestoreSerializer instance
|
||||
"""
|
||||
if courses:
|
||||
course_keys = [CourseKey.from_string(course.strip()) for course in courses]
|
||||
@@ -51,14 +60,14 @@ class ModuleStoreSerializer(object):
|
||||
course.id for course in modulestore().get_course_summaries()
|
||||
]
|
||||
if skip is not None:
|
||||
skip_keys = [CourseKey.from_string(course.strip()) for course in skip]
|
||||
skip_keys = set([CourseKey.from_string(course.strip()) for course in skip])
|
||||
course_keys = [course_key for course_key in course_keys if course_key not in skip_keys]
|
||||
self.course_keys = course_keys
|
||||
return cls(course_keys)
|
||||
|
||||
@staticmethod
|
||||
def serialize_item(item):
|
||||
"""
|
||||
Args:
|
||||
Arguments:
|
||||
item: an XBlock
|
||||
|
||||
Returns:
|
||||
@@ -100,7 +109,7 @@ class ModuleStoreSerializer(object):
|
||||
def serialize_course(self, course_id):
|
||||
"""
|
||||
Serializes a course into py2neo Nodes and Relationships
|
||||
Args:
|
||||
Arguments:
|
||||
course_id: CourseKey of the course we want to serialize
|
||||
|
||||
Returns:
|
||||
@@ -125,6 +134,7 @@ class ModuleStoreSerializer(object):
|
||||
# create relationships
|
||||
relationships = []
|
||||
for item in items:
|
||||
previous_child_node = None
|
||||
for index, child_loc in enumerate(item.get_children()):
|
||||
parent_node = location_to_node.get(item.location)
|
||||
child_node = location_to_node.get(child_loc.location)
|
||||
@@ -133,13 +143,20 @@ class ModuleStoreSerializer(object):
|
||||
relationship = Relationship(parent_node, "PARENT_OF", child_node)
|
||||
relationships.append(relationship)
|
||||
|
||||
if previous_child_node:
|
||||
ordering_relationship = Relationship(
|
||||
previous_child_node, "PRECEDES", child_node
|
||||
)
|
||||
relationships.append(ordering_relationship)
|
||||
previous_child_node = child_node
|
||||
|
||||
nodes = location_to_node.values()
|
||||
return nodes, relationships
|
||||
|
||||
@staticmethod
|
||||
def coerce_types(value):
|
||||
"""
|
||||
Args:
|
||||
Arguments:
|
||||
value: the value of an xblock's field
|
||||
|
||||
Returns: either the value, a text version of the value, or, if the
|
||||
@@ -159,7 +176,7 @@ class ModuleStoreSerializer(object):
|
||||
@staticmethod
|
||||
def add_to_transaction(neo4j_entities, transaction):
|
||||
"""
|
||||
Args:
|
||||
Arguments:
|
||||
neo4j_entities: a list of Nodes or Relationships
|
||||
transaction: a neo4j transaction
|
||||
"""
|
||||
@@ -170,7 +187,7 @@ class ModuleStoreSerializer(object):
|
||||
def get_command_last_run(course_key, graph):
|
||||
"""
|
||||
This information is stored on the course node of a course in neo4j
|
||||
Args:
|
||||
Arguments:
|
||||
course_key: a CourseKey
|
||||
graph: a py2neo Graph
|
||||
|
||||
@@ -195,7 +212,7 @@ class ModuleStoreSerializer(object):
|
||||
"""
|
||||
We use the CourseStructure table to get when this course was last
|
||||
published.
|
||||
Args:
|
||||
Arguments:
|
||||
course_key: a CourseKey
|
||||
|
||||
Returns: The datetime the course was last published at, converted into
|
||||
@@ -214,7 +231,7 @@ class ModuleStoreSerializer(object):
|
||||
"""
|
||||
Only dump the course if it's been changed since the last time it's been
|
||||
dumped.
|
||||
Args:
|
||||
Arguments:
|
||||
course_key: a CourseKey object.
|
||||
graph: a py2neo Graph object.
|
||||
|
||||
@@ -240,11 +257,50 @@ class ModuleStoreSerializer(object):
|
||||
# before the course's last published event
|
||||
return last_this_command_was_run < course_last_published_date
|
||||
|
||||
def dump_course_to_neo4j(self, course_key, graph):
|
||||
"""
|
||||
Serializes a course and writes it to neo4j.
|
||||
|
||||
Arguments:
|
||||
course_key: course key for the course to be exported
|
||||
graph: py2neo graph object
|
||||
"""
|
||||
|
||||
nodes, relationships = self.serialize_course(course_key)
|
||||
log.info(
|
||||
"%d nodes and %d relationships in %s",
|
||||
len(nodes),
|
||||
len(relationships),
|
||||
course_key,
|
||||
)
|
||||
|
||||
transaction = graph.begin()
|
||||
course_string = six.text_type(course_key)
|
||||
try:
|
||||
# first, delete existing course
|
||||
transaction.run(
|
||||
"MATCH (n:item) WHERE n.course_key='{}' DETACH DELETE n".format(
|
||||
course_string
|
||||
)
|
||||
)
|
||||
|
||||
# now, re-add it
|
||||
self.add_to_transaction(nodes, transaction)
|
||||
self.add_to_transaction(relationships, transaction)
|
||||
transaction.commit()
|
||||
|
||||
except Exception: # pylint: disable=broad-except
|
||||
log.exception(
|
||||
"Error trying to dump course %s to neo4j, rolling back",
|
||||
course_string
|
||||
)
|
||||
transaction.rollback()
|
||||
|
||||
def dump_courses_to_neo4j(self, graph, override_cache=False):
|
||||
"""
|
||||
Method that iterates through a list of courses in a modulestore,
|
||||
serializes them, then writes them to neo4j
|
||||
Args:
|
||||
serializes them, then submits tasks to write them to neo4j.
|
||||
Arguments:
|
||||
graph: py2neo graph object
|
||||
override_cache: serialize the courses even if they'be been recently
|
||||
serialized
|
||||
@@ -255,8 +311,8 @@ class ModuleStoreSerializer(object):
|
||||
|
||||
total_number_of_courses = len(self.course_keys)
|
||||
|
||||
successful_courses = []
|
||||
unsuccessful_courses = []
|
||||
submitted_courses = []
|
||||
skipped_courses = []
|
||||
|
||||
for index, course_key in enumerate(self.course_keys):
|
||||
# first, clear the request cache to prevent memory leaks
|
||||
@@ -271,43 +327,13 @@ class ModuleStoreSerializer(object):
|
||||
|
||||
if not (override_cache or self.should_dump_course(course_key, graph)):
|
||||
log.info("skipping dumping %s, since it hasn't changed", course_key)
|
||||
continue
|
||||
|
||||
nodes, relationships = self.serialize_course(course_key)
|
||||
log.info(
|
||||
"%d nodes and %d relationships in %s",
|
||||
len(nodes),
|
||||
len(relationships),
|
||||
course_key,
|
||||
)
|
||||
|
||||
transaction = graph.begin()
|
||||
course_string = six.text_type(course_key)
|
||||
try:
|
||||
# first, delete existing course
|
||||
transaction.run(
|
||||
"MATCH (n:item) WHERE n.course_key='{}' DETACH DELETE n".format(
|
||||
course_string
|
||||
)
|
||||
)
|
||||
|
||||
# now, re-add it
|
||||
self.add_to_transaction(nodes, transaction)
|
||||
self.add_to_transaction(relationships, transaction)
|
||||
transaction.commit()
|
||||
|
||||
except Exception: # pylint: disable=broad-except
|
||||
log.exception(
|
||||
"Error trying to dump course %s to neo4j, rolling back",
|
||||
course_string
|
||||
)
|
||||
transaction.rollback()
|
||||
unsuccessful_courses.append(course_string)
|
||||
skipped_courses.append(unicode(course_key))
|
||||
|
||||
else:
|
||||
successful_courses.append(course_string)
|
||||
self.dump_course_to_neo4j(course_key, graph)
|
||||
submitted_courses.append(unicode(course_key))
|
||||
|
||||
return successful_courses, unsuccessful_courses
|
||||
return submitted_courses, skipped_courses
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
@@ -374,28 +400,24 @@ class Command(BaseCommand):
|
||||
secure=secure,
|
||||
)
|
||||
|
||||
mss = ModuleStoreSerializer(options['courses'], options['skip'])
|
||||
mss = ModuleStoreSerializer.create(options['courses'], options['skip'])
|
||||
|
||||
successful_courses, unsuccessful_courses = mss.dump_courses_to_neo4j(
|
||||
submitted_courses, skipped_courses = mss.dump_courses_to_neo4j(
|
||||
graph, override_cache=options['override']
|
||||
)
|
||||
|
||||
if not successful_courses and not unsuccessful_courses:
|
||||
log.info(
|
||||
"%d courses submitted for export to neo4j. %d courses skipped.",
|
||||
len(submitted_courses),
|
||||
len(skipped_courses),
|
||||
)
|
||||
|
||||
if not submitted_courses:
|
||||
print("No courses exported to neo4j at all!")
|
||||
return
|
||||
|
||||
if successful_courses:
|
||||
if submitted_courses:
|
||||
print(
|
||||
"These courses exported to neo4j successfully:\n\t" +
|
||||
"\n\t".join(successful_courses)
|
||||
"These courses were submitted for export to neo4j successfully:\n\t" +
|
||||
"\n\t".join(submitted_courses)
|
||||
)
|
||||
else:
|
||||
print("No courses exported to neo4j successfully.")
|
||||
|
||||
if unsuccessful_courses:
|
||||
print(
|
||||
"These courses did not export to neo4j successfully:\n\t" +
|
||||
"\n\t".join(unsuccessful_courses)
|
||||
)
|
||||
else:
|
||||
print("All courses exported to neo4j successfully.")
|
||||
|
||||
@@ -31,6 +31,26 @@ class TestDumpToNeo4jCommandBase(SharedModuleStoreTestCase):
|
||||
"""
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
"""
|
||||
Creates two courses; one that's just a course module, and one that
|
||||
looks like:
|
||||
course
|
||||
|
|
||||
chapter
|
||||
|
|
||||
sequential
|
||||
|
|
||||
vertical
|
||||
/ | \ \
|
||||
/ | \ ----------
|
||||
/ | \ \
|
||||
/ | --- \
|
||||
/ | \ \
|
||||
html -> problem -> video -> video2
|
||||
|
||||
The side-pointing arrows (->) are PRECEDES relationships; the more
|
||||
vertical lines are PARENT_OF relationships.
|
||||
"""
|
||||
super(TestDumpToNeo4jCommandBase, cls).setUpClass()
|
||||
cls.course = CourseFactory.create()
|
||||
cls.chapter = ItemFactory.create(parent=cls.course, category='chapter')
|
||||
@@ -51,7 +71,7 @@ class TestDumpToNeo4jCommandBase(SharedModuleStoreTestCase):
|
||||
Replaces the py2neo Graph object with a MockGraph; similarly replaces
|
||||
NodeSelector with MockNodeSelector.
|
||||
|
||||
Args:
|
||||
Arguments:
|
||||
mock_selector_class: a mocked NodeSelector class
|
||||
mock_graph_class: a mocked Graph class
|
||||
transaction_errors: a bool for whether we should get errors
|
||||
@@ -71,7 +91,7 @@ class TestDumpToNeo4jCommandBase(SharedModuleStoreTestCase):
|
||||
"""
|
||||
Asserts that we have the expected number of courses, commits, and
|
||||
rollbacks after we dump the modulestore to neo4j
|
||||
Args:
|
||||
Arguments:
|
||||
mock_graph: a MockGraph backend
|
||||
number_of_courses: number of courses we expect to find
|
||||
number_commits: number of commits we expect against the graph
|
||||
@@ -203,7 +223,7 @@ class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase):
|
||||
def setUpClass(cls):
|
||||
"""Any ModuleStore course/content operations can go here."""
|
||||
super(TestModuleStoreSerializer, cls).setUpClass()
|
||||
cls.mss = ModuleStoreSerializer()
|
||||
cls.mss = ModuleStoreSerializer.create()
|
||||
|
||||
def test_serialize_item(self):
|
||||
"""
|
||||
@@ -228,7 +248,81 @@ class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase):
|
||||
"""
|
||||
nodes, relationships = self.mss.serialize_course(self.course.id)
|
||||
self.assertEqual(len(nodes), 9)
|
||||
self.assertEqual(len(relationships), 7)
|
||||
# the course has 7 "PARENT_OF" relationships and 3 "PRECEDES"
|
||||
self.assertEqual(len(relationships), 10)
|
||||
|
||||
@staticmethod
|
||||
def _extract_relationship_pairs(relationships, relationship_type):
|
||||
"""
|
||||
Extracts a list of XBlock location tuples from a list of Relationships.
|
||||
|
||||
Arguments:
|
||||
relationships: list of py2neo `Relationship` objects
|
||||
relationship_type: the type of relationship to filter `relationships`
|
||||
by.
|
||||
Returns:
|
||||
List of tuples of the locations of of the relationships'
|
||||
constituent nodes.
|
||||
"""
|
||||
relationship_pairs = [
|
||||
tuple([node["location"] for node in rel.nodes()])
|
||||
for rel in relationships if rel.type() == relationship_type
|
||||
]
|
||||
return relationship_pairs
|
||||
|
||||
@staticmethod
|
||||
def _extract_location_pair(xblock1, xblock2):
|
||||
"""
|
||||
Returns a tuple of locations from two XBlocks.
|
||||
|
||||
Arguments:
|
||||
xblock1: an xblock
|
||||
xblock2: also an xblock
|
||||
|
||||
Returns:
|
||||
A tuple of the string representations of those XBlocks' locations.
|
||||
"""
|
||||
return (unicode(xblock1.location), unicode(xblock2.location))
|
||||
|
||||
def assertBlockPairIsRelationship(self, xblock1, xblock2, relationships, relationship_type):
|
||||
"""
|
||||
Helper assertion that a pair of xblocks have a certain kind of
|
||||
relationship with one another.
|
||||
"""
|
||||
relationship_pairs = self._extract_relationship_pairs(relationships, relationship_type)
|
||||
location_pair = self._extract_location_pair(xblock1, xblock2)
|
||||
self.assertIn(location_pair, relationship_pairs)
|
||||
|
||||
def assertBlockPairIsNotRelationship(self, xblock1, xblock2, relationships, relationship_type):
|
||||
"""
|
||||
The opposite of `assertBlockPairIsRelationship`: asserts that a pair
|
||||
of xblocks do NOT have a certain kind of relationship.
|
||||
"""
|
||||
relationship_pairs = self._extract_relationship_pairs(relationships, relationship_type)
|
||||
location_pair = self._extract_location_pair(xblock1, xblock2)
|
||||
self.assertNotIn(location_pair, relationship_pairs)
|
||||
|
||||
def test_precedes_relationship(self):
|
||||
"""
|
||||
Tests that two nodes that should have a precedes relationship have it.
|
||||
"""
|
||||
__, relationships = self.mss.serialize_course(self.course.id)
|
||||
self.assertBlockPairIsRelationship(self.video, self.video2, relationships, "PRECEDES")
|
||||
self.assertBlockPairIsNotRelationship(self.video2, self.video, relationships, "PRECEDES")
|
||||
self.assertBlockPairIsNotRelationship(self.vertical, self.video, relationships, "PRECEDES")
|
||||
self.assertBlockPairIsNotRelationship(self.html, self.video, relationships, "PRECEDES")
|
||||
|
||||
def test_parent_relationship(self):
|
||||
"""
|
||||
Test that two nodes that should have a parent_of relationship have it.
|
||||
"""
|
||||
__, relationships = self.mss.serialize_course(self.course.id)
|
||||
self.assertBlockPairIsRelationship(self.vertical, self.video, relationships, "PARENT_OF")
|
||||
self.assertBlockPairIsRelationship(self.vertical, self.html, relationships, "PARENT_OF")
|
||||
self.assertBlockPairIsRelationship(self.course, self.chapter, relationships, "PARENT_OF")
|
||||
self.assertBlockPairIsNotRelationship(self.course, self.video, relationships, "PARENT_OF")
|
||||
self.assertBlockPairIsNotRelationship(self.video, self.vertical, relationships, "PARENT_OF")
|
||||
self.assertBlockPairIsNotRelationship(self.video, self.html, relationships, "PARENT_OF")
|
||||
|
||||
def test_nodes_have_indices(self):
|
||||
"""
|
||||
@@ -277,7 +371,7 @@ class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase):
|
||||
mock_graph = MockGraph()
|
||||
mock_selector_class.return_value = MockNodeSelector(mock_graph)
|
||||
|
||||
successful, unsuccessful = self.mss.dump_courses_to_neo4j(mock_graph)
|
||||
submitted, skipped = self.mss.dump_courses_to_neo4j(mock_graph)
|
||||
|
||||
self.assertCourseDump(
|
||||
mock_graph,
|
||||
@@ -290,9 +384,7 @@ class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase):
|
||||
# 2 nodes and no relationships from the second
|
||||
|
||||
self.assertEqual(len(mock_graph.nodes), 11)
|
||||
|
||||
self.assertEqual(len(unsuccessful), 0)
|
||||
self.assertItemsEqual(successful, self.course_strings)
|
||||
self.assertItemsEqual(submitted, self.course_strings)
|
||||
|
||||
@mock.patch('openedx.core.djangoapps.coursegraph.management.commands.dump_to_neo4j.NodeSelector')
|
||||
def test_dump_to_neo4j_rollback(self, mock_selector_class):
|
||||
@@ -303,7 +395,7 @@ class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase):
|
||||
mock_graph = MockGraph(transaction_errors=True)
|
||||
mock_selector_class.return_value = MockNodeSelector(mock_graph)
|
||||
|
||||
successful, unsuccessful = self.mss.dump_courses_to_neo4j(mock_graph)
|
||||
submitted, skipped = self.mss.dump_courses_to_neo4j(mock_graph)
|
||||
|
||||
self.assertCourseDump(
|
||||
mock_graph,
|
||||
@@ -312,8 +404,7 @@ class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase):
|
||||
number_rollbacks=2,
|
||||
)
|
||||
|
||||
self.assertEqual(len(successful), 0)
|
||||
self.assertItemsEqual(unsuccessful, self.course_strings)
|
||||
self.assertItemsEqual(submitted, self.course_strings)
|
||||
|
||||
@mock.patch('openedx.core.djangoapps.coursegraph.management.commands.dump_to_neo4j.NodeSelector')
|
||||
@ddt.data((True, 2), (False, 0))
|
||||
@@ -333,10 +424,10 @@ class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase):
|
||||
|
||||
# when run the second time, only dump courses if the cache override
|
||||
# is enabled
|
||||
successful, unsuccessful = self.mss.dump_courses_to_neo4j(
|
||||
submitted, __ = self.mss.dump_courses_to_neo4j(
|
||||
mock_graph, override_cache=override_cache
|
||||
)
|
||||
self.assertEqual(len(successful + unsuccessful), expected_number_courses)
|
||||
self.assertEqual(len(submitted), expected_number_courses)
|
||||
|
||||
@mock.patch('openedx.core.djangoapps.coursegraph.management.commands.dump_to_neo4j.NodeSelector')
|
||||
def test_dump_to_neo4j_published(self, mock_selector_class):
|
||||
@@ -348,17 +439,16 @@ class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase):
|
||||
mock_selector_class.return_value = MockNodeSelector(mock_graph)
|
||||
|
||||
# run once to warm the cache
|
||||
successful, unsuccessful = self.mss.dump_courses_to_neo4j(mock_graph)
|
||||
self.assertEqual(len(successful + unsuccessful), len(self.course_strings))
|
||||
submitted, skipped = self.mss.dump_courses_to_neo4j(mock_graph)
|
||||
self.assertEqual(len(submitted), len(self.course_strings))
|
||||
|
||||
# simulate one of the courses being published
|
||||
listen_for_course_publish(None, self.course.id)
|
||||
|
||||
# make sure only the published course was dumped
|
||||
successful, unsuccessful = self.mss.dump_courses_to_neo4j(mock_graph)
|
||||
self.assertEqual(len(unsuccessful), 0)
|
||||
self.assertEqual(len(successful), 1)
|
||||
self.assertEqual(successful[0], unicode(self.course.id))
|
||||
submitted, __ = self.mss.dump_courses_to_neo4j(mock_graph)
|
||||
self.assertEqual(len(submitted), 1)
|
||||
self.assertEqual(submitted[0], unicode(self.course.id))
|
||||
|
||||
@ddt.data(
|
||||
(six.text_type(datetime(2016, 3, 30)), six.text_type(datetime(2016, 3, 31)), True),
|
||||
@@ -373,7 +463,7 @@ class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase):
|
||||
Tests whether a course should be dumped given the last time it was
|
||||
dumped and the last time it was published.
|
||||
"""
|
||||
mss = ModuleStoreSerializer()
|
||||
mss = ModuleStoreSerializer.create()
|
||||
mss.get_command_last_run = lambda course_key, graph: last_command_run
|
||||
mss.get_course_last_published = lambda course_key: last_course_published
|
||||
mock_course_key = mock.Mock
|
||||
|
||||
Reference in New Issue
Block a user