From 9b59b5e92a35811e7991449af03aa3cfd54e4196 Mon Sep 17 00:00:00 2001 From: Julia Eskew Date: Fri, 18 Mar 2022 13:11:40 -0400 Subject: [PATCH] feat: Add detailed logging messages about each course updated in Neo4j (coursegraph). TNL owns coursegraph and we've seen 7000+ courses be submitted for update weekly. While log message exist for each course not submitted, no log message currently exists for each submitted course. This commit adds logs for those submitted courses as well. --- .../commands/tests/test_dump_to_neo4j.py | 28 +++++++++++++++---- openedx/core/djangoapps/coursegraph/tasks.py | 26 +++++++++++++---- 2 files changed, 44 insertions(+), 10 deletions(-) diff --git a/openedx/core/djangoapps/coursegraph/management/commands/tests/test_dump_to_neo4j.py b/openedx/core/djangoapps/coursegraph/management/commands/tests/test_dump_to_neo4j.py index 61349eda8a..0d03f56d5f 100644 --- a/openedx/core/djangoapps/coursegraph/management/commands/tests/test_dump_to_neo4j.py +++ b/openedx/core/djangoapps/coursegraph/management/commands/tests/test_dump_to_neo4j.py @@ -509,11 +509,29 @@ class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase): @mock.patch('openedx.core.djangoapps.coursegraph.tasks.get_course_last_published') @mock.patch('openedx.core.djangoapps.coursegraph.tasks.get_command_last_run') @ddt.data( - (str(datetime(2016, 3, 30)), str(datetime(2016, 3, 31)), True), - (str(datetime(2016, 3, 31)), str(datetime(2016, 3, 30)), False), - (str(datetime(2016, 3, 31)), None, False), - (None, str(datetime(2016, 3, 30)), True), - (None, None, True), + ( + str(datetime(2016, 3, 30)), str(datetime(2016, 3, 31)), + (True, ( + 'course has been published since last neo4j update time - ' + 'update date 2016-03-30 00:00:00 < published date 2016-03-31 00:00:00' + )) + ), + ( + str(datetime(2016, 3, 31)), str(datetime(2016, 3, 30)), + (False, None) + ), + ( + str(datetime(2016, 3, 31)), None, + (False, None) + ), + ( + None, str(datetime(2016, 3, 30)), + (True, 'no record of the last neo4j update time for the course') + ), + ( + None, None, + (True, 'no record of the last neo4j update time for the course') + ), ) @ddt.unpack def test_should_dump_course( diff --git a/openedx/core/djangoapps/coursegraph/tasks.py b/openedx/core/djangoapps/coursegraph/tasks.py index f43384990f..bfca4f79b6 100644 --- a/openedx/core/djangoapps/coursegraph/tasks.py +++ b/openedx/core/djangoapps/coursegraph/tasks.py @@ -231,7 +231,9 @@ def should_dump_course(course_key, graph): course_key: a CourseKey object. graph: a py2neo Graph object. - Returns: bool of whether this course should be dumped to neo4j. + Returns: + - whether this course should be dumped to neo4j (bool) + - reason why course needs to be dumped (string, None if doesn't need to be dumped) """ last_this_command_was_run = get_command_last_run(course_key, graph) @@ -241,17 +243,27 @@ def should_dump_course(course_key, graph): # if we don't have a record of the last time this command was run, # we should serialize the course and dump it if last_this_command_was_run is None: - return True + return ( + True, + "no record of the last neo4j update time for the course" + ) # if we've serialized the course recently and we have no published # events, we will not dump it, and so we can skip serializing it # again here if last_this_command_was_run and course_last_published_date is None: - return False + return (False, None) # otherwise, serialize and dump the course if the command was run # before the course's last published event - return last_this_command_was_run < course_last_published_date + needs_update = last_this_command_was_run < course_last_published_date + update_reason = None + if needs_update: + update_reason = ( + f"course has been published since last neo4j update time - " + f"update date {last_this_command_was_run} < published date {course_last_published_date}" + ) + return (needs_update, update_reason) @shared_task @@ -366,11 +378,15 @@ class ModuleStoreSerializer: total_number_of_courses, ) - if not (override_cache or should_dump_course(course_key, graph)): + (needs_dump, reason) = should_dump_course(course_key, graph) + if not (override_cache or needs_dump): log.info("skipping submitting %s, since it hasn't changed", course_key) skipped_courses.append(str(course_key)) continue + if override_cache: + reason = "override_cache is True" + log.info("submitting %s, because %s", course_key, reason) dump_course_to_neo4j.apply_async( args=[str(course_key), credentials], )