refactor: read course publish date from overview, not block structure

The `get_course_last_published` function is used by CourseGraph to
determine whether or not a course should be dumped to Neo4j.
If the course hasn't been published since it was last dumped to
Neo4j, then it can be skipped (unless the override_cache option
is enabled).

The function was previously built using the BlockStructure
data model. While this worked fine in Production instances that
enable `block_structure.storage_backing_for_cache`, this
implementation did NOT work in development environments,
which do not use the BlockStrcture model.

Instead, we switch to using CourseOverview.modified to
approximate when a course was last published. This is method
has fewer moving parts and is universally available across
instances.
This commit is contained in:
Kyle McCormick
2022-02-07 16:13:53 -05:00
committed by Julia Eskew
parent 696984a2bd
commit d75a32c009
2 changed files with 21 additions and 19 deletions

View File

@@ -220,6 +220,7 @@ class TestDumpToNeo4jCommand(TestDumpToNeo4jCommandBase):
number_rollbacks=0,
)
@mock.patch('cms.djangoapps.coursegraph.tasks.NodeMatcher')
@mock.patch('cms.djangoapps.coursegraph.tasks.Graph', autospec=True)
@override_settings(
COURSEGRAPH_CONNECTION=dict(
@@ -231,12 +232,15 @@ class TestDumpToNeo4jCommand(TestDumpToNeo4jCommandBase):
password="default-password",
)
)
def test_dump_to_neo4j_connection_defaults(self, mock_graph_class):
def test_dump_to_neo4j_connection_defaults(self, mock_graph_class, mock_matcher_class):
"""
Test that user can override individual settings.COURSEGRAPH_CONNECTION parameters
by passing them to `dump_to_neo4j`, whilst falling back to the ones that they
don't override.
"""
self.setup_mock_graph(
mock_matcher_class, mock_graph_class
)
call_command(
'dump_to_neo4j',
courses=self.course_strings[:1],
@@ -244,7 +248,8 @@ class TestDumpToNeo4jCommand(TestDumpToNeo4jCommandBase):
secure=False,
password="overridden-password",
)
mock_graph_class.assert_called_once_with(
assert mock_graph_class.call_args.args == ()
assert mock_graph_class.call_args.kwargs == dict(
# From settings:
protocol='bolt',

View File

@@ -134,29 +134,26 @@ def get_command_last_run(course_key, graph):
def get_course_last_published(course_key):
"""
We use the CourseStructure table to get when this course was last
published.
Approximately when was a course last published?
We use the 'modified' column in the CourseOverview table as a quick and easy
(although perhaps inexact) way of determining when a course was last
published. This works because CourseOverview rows are re-written upon
course publish.
Args:
course_key: a CourseKey
Returns: The datetime the course was last published at, converted into
text, or None, if there's no record of the last time this course
was published.
Returns: The datetime the course was last published at, stringified.
Uses Python's default str(...) implementation for datetimes, which
is sortable and similar to ISO 8601:
https://docs.python.org/3/library/datetime.html#datetime.date.__str__
"""
# Import is placed here to avoid model import at project startup.
from xmodule.modulestore.django import modulestore
from openedx.core.djangoapps.content.block_structure.models import BlockStructureModel
from openedx.core.djangoapps.content.block_structure.exceptions import BlockStructureNotFound
from openedx.core.djangoapps.content.course_overviews.models import CourseOverview
store = modulestore()
course_usage_key = store.make_course_usage_key(course_key)
try:
structure = BlockStructureModel.get(course_usage_key)
course_last_published_date = str(structure.modified)
except BlockStructureNotFound:
course_last_published_date = None
return course_last_published_date
approx_last_published = CourseOverview.get_from_id(course_key).modified
return str(approx_last_published)
def strip_branch_and_version(location):