diff --git a/openedx/core/djangoapps/coursegraph/management/commands/tests/test_dump_to_neo4j.py b/openedx/core/djangoapps/coursegraph/management/commands/tests/test_dump_to_neo4j.py index 61349eda8a..0d03f56d5f 100644 --- a/openedx/core/djangoapps/coursegraph/management/commands/tests/test_dump_to_neo4j.py +++ b/openedx/core/djangoapps/coursegraph/management/commands/tests/test_dump_to_neo4j.py @@ -509,11 +509,29 @@ class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase): @mock.patch('openedx.core.djangoapps.coursegraph.tasks.get_course_last_published') @mock.patch('openedx.core.djangoapps.coursegraph.tasks.get_command_last_run') @ddt.data( - (str(datetime(2016, 3, 30)), str(datetime(2016, 3, 31)), True), - (str(datetime(2016, 3, 31)), str(datetime(2016, 3, 30)), False), - (str(datetime(2016, 3, 31)), None, False), - (None, str(datetime(2016, 3, 30)), True), - (None, None, True), + ( + str(datetime(2016, 3, 30)), str(datetime(2016, 3, 31)), + (True, ( + 'course has been published since last neo4j update time - ' + 'update date 2016-03-30 00:00:00 < published date 2016-03-31 00:00:00' + )) + ), + ( + str(datetime(2016, 3, 31)), str(datetime(2016, 3, 30)), + (False, None) + ), + ( + str(datetime(2016, 3, 31)), None, + (False, None) + ), + ( + None, str(datetime(2016, 3, 30)), + (True, 'no record of the last neo4j update time for the course') + ), + ( + None, None, + (True, 'no record of the last neo4j update time for the course') + ), ) @ddt.unpack def test_should_dump_course( diff --git a/openedx/core/djangoapps/coursegraph/tasks.py b/openedx/core/djangoapps/coursegraph/tasks.py index f43384990f..bfca4f79b6 100644 --- a/openedx/core/djangoapps/coursegraph/tasks.py +++ b/openedx/core/djangoapps/coursegraph/tasks.py @@ -231,7 +231,9 @@ def should_dump_course(course_key, graph): course_key: a CourseKey object. graph: a py2neo Graph object. - Returns: bool of whether this course should be dumped to neo4j. + Returns: + - whether this course should be dumped to neo4j (bool) + - reason why course needs to be dumped (string, None if doesn't need to be dumped) """ last_this_command_was_run = get_command_last_run(course_key, graph) @@ -241,17 +243,27 @@ def should_dump_course(course_key, graph): # if we don't have a record of the last time this command was run, # we should serialize the course and dump it if last_this_command_was_run is None: - return True + return ( + True, + "no record of the last neo4j update time for the course" + ) # if we've serialized the course recently and we have no published # events, we will not dump it, and so we can skip serializing it # again here if last_this_command_was_run and course_last_published_date is None: - return False + return (False, None) # otherwise, serialize and dump the course if the command was run # before the course's last published event - return last_this_command_was_run < course_last_published_date + needs_update = last_this_command_was_run < course_last_published_date + update_reason = None + if needs_update: + update_reason = ( + f"course has been published since last neo4j update time - " + f"update date {last_this_command_was_run} < published date {course_last_published_date}" + ) + return (needs_update, update_reason) @shared_task @@ -366,11 +378,15 @@ class ModuleStoreSerializer: total_number_of_courses, ) - if not (override_cache or should_dump_course(course_key, graph)): + (needs_dump, reason) = should_dump_course(course_key, graph) + if not (override_cache or needs_dump): log.info("skipping submitting %s, since it hasn't changed", course_key) skipped_courses.append(str(course_key)) continue + if override_cache: + reason = "override_cache is True" + log.info("submitting %s, because %s", course_key, reason) dump_course_to_neo4j.apply_async( args=[str(course_key), credentials], )