feat: cache course index queries (#29107)

* feat: store split modulestore's course indexes in Django/MySQL

Course outline generation is calling active_versions hundreds or even thousands of times on course_publish. In practice, the handling of a block requires a fetch of the course index. That requires a call to the active_versions table for each touching of a block. In larger courses, that can mean 1000s of calls to the db to retrieve them. A quick solution to this problem is to use a request cache, so we don't have to query the slow db every time and memoize the relevant data
[An example run can be seen here.](https://one.newrelic.com/launcher/nr1-core.explorer?platform[filters]=IihuYW1lIGxpa2UgJ3Byb2QtZWR4LWVkeGFwcCcgb3IgaWQgPSAncHJvZC1lZHgtZWR4YXBwJyBvciBkb21haW5JZCA9ICdwcm9kLWVkeC1lZHhhcHAnKSBhbmQgKG5hbWUgbGlrZSAnbG1zJyBvciBpZCA9ICdsbXMnIG9yIGRvbWFpbklkID0gJ2xtcycpIg==&platform[accountId]=88178&platform[timeRange][begin_time]=1626715880701&platform[timeRange][end_time]=1627320680701&pane=eyJuZXJkbGV0SWQiOiJhcG0tbmVyZGxldHMudHJhbnNhY3Rpb25zIiwiZW50aXR5R3VpZCI6Ik9EZ3hOemg4UVZCTmZFRlFVRXhKUTBGVVNVOU9mRFk1TVRNM05EUTROQSIsInNlbGVjdGVkU2VyaWVzIjoiZjAzYjNmNzY5OTQ0MjlmOTFhYWQ4MDBkNTEwZTU5MDM5OWNjMzNhMSIsImRyaWxsZG93biI6eyJ0cmFuc2FjdGlvbk5hbWUiOiJPdGhlclRyYW5zYWN0aW9uL0NlbGVyeS9jbXMuZGphbmdvYXBwcy5jb250ZW50c3RvcmUudGFza3MudXBkYXRlX291dGxpbmVfZnJvbV9tb2R1bGVzdG9yZV90YXNrIn19&cards[0]=eyJuZXJkbGV0SWQiOiJhcG0tbmVyZGxldHMudHJhbnNhY3Rpb24tdHJhY2UiLCJ0cmFjZUlkIjoiY2I2OGNkMDktZWI5Yi0xMWViLWJkY2QtMDI0MmFjMTEwMDBlXzI1MjU0MV8zMTgzMTMiLCJlbnRpdHlHdWlkIjoiT0RneE56aDhRVkJOZkVGUVVFeEpRMEZVU1U5T2ZEWTVNVE0zTkRRNE5BIn0=&sidebars[0]=eyJuZXJkbGV0SWQiOiJucjEtY29yZS5hY3Rpb25zIiwic2VsZWN0ZWROZXJkbGV0Ijp7Im5lcmRsZXRJZCI6ImFwbS1uZXJkbGV0cy50cmFuc2FjdGlvbnMifSwiZW50aXR5R3VpZCI6Ik9EZ3hOemg4UVZCTmZFRlFVRXhKUTBGVVNVOU9mRFk1TVRNM05EUTROQSJ9&state=d9946155-ea53-cb11-c1c0-cc873d6c7d39)

Useful information to include:
In theory, this should provide a minor performance boost to authors and learners, and be noticeable in the above function trace, once live on prod. Note, that in several places the cache must be invalidated, as to prevent a stale cache.


Co-authored-by: Braden MacDonald <braden@opencraft.com>
This commit is contained in:
connorhaugh
2021-10-27 13:53:10 -04:00
committed by GitHub
parent ec7205e679
commit b01e773d2b
12 changed files with 54 additions and 32 deletions

View File

@@ -25,7 +25,8 @@ from xmodule.exceptions import HeartbeatFailure
from xmodule.modulestore import BlockData
from xmodule.modulestore.split_mongo import BlockKey
from xmodule.mongo_utils import connect_to_mongodb, create_collection_index
from openedx.core.lib.cache_utils import request_cached
from edx_django_utils.cache import RequestCache
log = logging.getLogger(__name__)
@@ -261,6 +262,9 @@ class MongoPersistenceBackend:
# only before returning. Also makes pymongo report write errors.
kwargs['w'] = 1
#make sure the course index cache is fresh.
RequestCache(namespace="course_index_cache").clear()
self.database = connect_to_mongodb(
db, host,
port=port, tz_aware=tz_aware, user=user, password=password,
@@ -532,6 +536,7 @@ class MongoPersistenceBackend:
"""
Closes any open connections to the underlying databases
"""
RequestCache(namespace="course_index_cache").clear()
self.database.client.close()
def _drop_database(self, database=True, collections=True, connections=True):
@@ -546,6 +551,7 @@ class MongoPersistenceBackend:
If connections is True, then close the connection to the database as well.
"""
RequestCache(namespace="course_index_cache").clear()
connection = self.database.client
if database:
@@ -571,7 +577,13 @@ class DjangoFlexPersistenceBackend(MongoPersistenceBackend):
# Structures and definitions are only supported in MongoDB for now.
# Course indexes are read from MySQL and written to both MongoDB and MySQL
# Course indexes are cached within the process using their key and ignore_case atrributes as keys.
# This method is request cached. The keys to the cache are the arguements to the method.
# The `self` arguement is discarded as a key using an isinstance check.
# This is because the DjangoFlexPersistenceBackend could be different in reference to the same course key.
@request_cached(
"course_index_cache",
arg_map_function=lambda arg: str(arg) if not isinstance(arg, DjangoFlexPersistenceBackend) else "")
def get_course_index(self, key, ignore_case=False):
"""
Get the course_index from the persistence mechanism whose id is the given key
@@ -650,6 +662,10 @@ class DjangoFlexPersistenceBackend(MongoPersistenceBackend):
"""
Create the course_index in the db
"""
# clear the whole course_index request cache, required for sucessfully cloning a course.
# This is a relatively large hammer for the problem, but we mostly only use one course at a time.
RequestCache(namespace="course_index_cache").clear()
course_index['last_update'] = datetime.datetime.now(pytz.utc)
new_index = SplitModulestoreCourseIndex(**SplitModulestoreCourseIndex.fields_from_v1_schema(course_index))
new_index.save()
@@ -669,6 +685,7 @@ class DjangoFlexPersistenceBackend(MongoPersistenceBackend):
# "last_update not only tells us when this course was last updated but also helps prevent collisions"
# This code is just copying the behavior of the existing MongoPersistenceBackend
# See https://github.com/edx/edx-platform/pull/5200 for context
RequestCache(namespace="course_index_cache").clear()
course_index['last_update'] = datetime.datetime.now(pytz.utc)
# Find the SplitModulestoreCourseIndex entry that we'll be updating:
try:
@@ -730,6 +747,7 @@ class DjangoFlexPersistenceBackend(MongoPersistenceBackend):
"""
Delete the course_index from the persistence mechanism whose id is the given course_index
"""
RequestCache(namespace="course_index_cache").clear()
SplitModulestoreCourseIndex.objects.filter(course_id=course_key).delete()
# TEMP: Also write to MongoDB, so we can switch back to using it if this new MySQL version doesn't work well:
super().delete_course_index(course_key)
@@ -738,6 +756,7 @@ class DjangoFlexPersistenceBackend(MongoPersistenceBackend):
"""
Reset data for testing.
"""
RequestCache(namespace="course_index_cache").clear()
try:
SplitModulestoreCourseIndex.objects.all().delete()
except TransactionManagementError as err:

View File

@@ -275,7 +275,7 @@ class ModuleStoreIsolationMixin(CacheIsolationMixin, SignalIsolationMixin):
"""
MODULESTORE = functools.partial(mixed_store_config, mkdtemp_clean(), {})
CONTENTSTORE = functools.partial(contentstore_config)
ENABLED_CACHES = ['default', 'mongo_metadata_inheritance', 'loc_cache']
ENABLED_CACHES = ['default', 'mongo_metadata_inheritance', 'loc_cache', 'course_index_cache']
# List of modulestore signals enabled for this test. Defaults to an empty
# list. The list of signals available is found on the SignalHandler class,

View File

@@ -368,7 +368,7 @@ class TestMixedModuleStore(CommonMixedModuleStoreSetup):
# fake: one w/ wildcard version
# split: has one lookup for the course and then one for the course items
# but the active_versions check is done in MySQL
@ddt.data((ModuleStoreEnum.Type.mongo, [1, 1], 0), (ModuleStoreEnum.Type.split, [2, 2], 0))
@ddt.data((ModuleStoreEnum.Type.mongo, [1, 1], 0), (ModuleStoreEnum.Type.split, [2, 1], 0))
@ddt.unpack
def test_has_item(self, default_ms, max_find, max_send):
self.initdb(default_ms)
@@ -391,7 +391,7 @@ class TestMixedModuleStore(CommonMixedModuleStoreSetup):
# split:
# problem: active_versions, structure
# non-existent problem: ditto
@ddt.data((ModuleStoreEnum.Type.mongo, 0, [3, 2], 0), (ModuleStoreEnum.Type.split, 0, [2, 2], 0))
@ddt.data((ModuleStoreEnum.Type.mongo, 0, [3, 2], 0), (ModuleStoreEnum.Type.split, 0, [2, 1], 0))
@ddt.unpack
def test_get_item(self, default_ms, num_mysql, max_find, max_send):
self.initdb(default_ms)
@@ -414,7 +414,7 @@ class TestMixedModuleStore(CommonMixedModuleStoreSetup):
# Split:
# mysql: fetch course's active version from SplitModulestoreCourseIndex, spurious refetch x2
# find: get structure
@ddt.data((ModuleStoreEnum.Type.mongo, 0, 14, 0), (ModuleStoreEnum.Type.split, 0, 4, 0))
@ddt.data((ModuleStoreEnum.Type.mongo, 0, 14, 0), (ModuleStoreEnum.Type.split, 0, 3, 0))
@ddt.unpack
def test_get_items(self, default_ms, num_mysql, max_find, max_send):
self.initdb(default_ms)
@@ -522,7 +522,7 @@ class TestMixedModuleStore(CommonMixedModuleStoreSetup):
# mysql: SplitModulestoreCourseIndex - select 2x (by course_id, by objectid), update, update historical record
# find: definitions (calculator field), structures
# sends: 2 sends to update index & structure (note, it would also be definition if a content field changed)
@ddt.data((ModuleStoreEnum.Type.mongo, 0, 7, 5), (ModuleStoreEnum.Type.split, 3, 3, 2))
@ddt.data((ModuleStoreEnum.Type.mongo, 0, 7, 5), (ModuleStoreEnum.Type.split, 3, 2, 2))
@ddt.unpack
def test_update_item(self, default_ms, num_mysql, max_find, max_send):
"""
@@ -1043,7 +1043,7 @@ class TestMixedModuleStore(CommonMixedModuleStoreSetup):
# executed twice, possibly unnecessarily)
# find: 2 reads of structure, definition (s/b lazy; so, unnecessary),
# plus 1 wildcard find in draft mongo which has none
@ddt.data((ModuleStoreEnum.Type.mongo, 0, 3, 0), (ModuleStoreEnum.Type.split, 0, 6, 0))
@ddt.data((ModuleStoreEnum.Type.mongo, 0, 3, 0), (ModuleStoreEnum.Type.split, 0, 5, 0))
@ddt.unpack
def test_get_courses(self, default_ms, num_mysql, max_find, max_send):
self.initdb(default_ms)
@@ -1638,7 +1638,7 @@ class TestMixedModuleStore(CommonMixedModuleStoreSetup):
# 8-9. get vertical, compute inheritance
# 10-11. get other vertical_x1b (why?) and compute inheritance
# Split: loading structure from mongo (also loads active version from MySQL, not tracked here)
@ddt.data((ModuleStoreEnum.Type.mongo, 0, [12, 3], 0), (ModuleStoreEnum.Type.split, 0, [3, 2], 0))
@ddt.data((ModuleStoreEnum.Type.mongo, 0, [12, 3], 0), (ModuleStoreEnum.Type.split, 0, [3, 1], 0))
@ddt.unpack
def test_path_to_location(self, default_ms, num_mysql, num_finds, num_sends):
"""
@@ -1802,6 +1802,7 @@ class TestMixedModuleStore(CommonMixedModuleStoreSetup):
# Find the version_guid of our course by diving into Split Mongo.
split = self._get_split_modulestore()
course_index = split.get_course_index(self.course.location.course_key)
log.warning(f"Banana course index: {course_index}")
original_version_guid = course_index["versions"]["published-branch"]
# Reset course to currently-published version.

View File

@@ -226,8 +226,8 @@ class TestGetBlocksQueryCounts(TestGetBlocksQueryCountsBase):
@ddt.data(
(ModuleStoreEnum.Type.mongo, 5, True, 24),
(ModuleStoreEnum.Type.mongo, 5, False, 14),
(ModuleStoreEnum.Type.split, 3, True, 24),
(ModuleStoreEnum.Type.split, 3, False, 14),
(ModuleStoreEnum.Type.split, 2, True, 24),
(ModuleStoreEnum.Type.split, 2, False, 14),
)
@ddt.unpack
def test_query_counts_uncached(self, store_type, expected_mongo_queries, with_storage_backing, num_sql_queries):

View File

@@ -1016,7 +1016,7 @@ class TestTOC(ModuleStoreTestCase):
# - 1 for 5 definitions
# Split makes 1 MySQL query to render the toc:
# - 1 MySQL for the active version at the start of the bulk operation (no mongo calls)
@ddt.data((ModuleStoreEnum.Type.mongo, 3, 0, 0), (ModuleStoreEnum.Type.split, 2, 0, 1))
@ddt.data((ModuleStoreEnum.Type.mongo, 3, 0, 0), (ModuleStoreEnum.Type.split, 2, 0, 0))
@ddt.unpack
def test_toc_toy_from_chapter(self, default_ms, setup_finds, setup_sends, toc_finds):
with self.store.default_store(default_ms):
@@ -1056,7 +1056,7 @@ class TestTOC(ModuleStoreTestCase):
# - 1 for 5 definitions
# Split makes 1 MySQL query to render the toc:
# - 1 MySQL for the active version at the start of the bulk operation (no mongo calls)
@ddt.data((ModuleStoreEnum.Type.mongo, 3, 0, 0), (ModuleStoreEnum.Type.split, 2, 0, 1))
@ddt.data((ModuleStoreEnum.Type.mongo, 3, 0, 0), (ModuleStoreEnum.Type.split, 2, 0, 0))
@ddt.unpack
def test_toc_toy_from_section(self, default_ms, setup_finds, setup_sends, toc_finds):
with self.store.default_store(default_ms):

View File

@@ -162,9 +162,9 @@ class RenderXBlockTestMixin(MasqueradeMixin, metaclass=ABCMeta):
@ddt.data(
('vertical_block', ModuleStoreEnum.Type.mongo, 13),
('vertical_block', ModuleStoreEnum.Type.split, 6),
('vertical_block', ModuleStoreEnum.Type.split, 5),
('html_block', ModuleStoreEnum.Type.mongo, 14),
('html_block', ModuleStoreEnum.Type.split, 6),
('html_block', ModuleStoreEnum.Type.split, 5),
)
@ddt.unpack
def test_courseware_html(self, block_name, default_store, mongo_calls):

View File

@@ -402,7 +402,7 @@ class ViewsQueryCountTestCase(
@ddt.data(
(ModuleStoreEnum.Type.mongo, 3, 4, 39),
(ModuleStoreEnum.Type.split, 3, 13, 39),
(ModuleStoreEnum.Type.split, 3, 11, 39),
)
@ddt.unpack
@count_queries
@@ -411,7 +411,7 @@ class ViewsQueryCountTestCase(
@ddt.data(
(ModuleStoreEnum.Type.mongo, 3, 3, 35),
(ModuleStoreEnum.Type.split, 3, 10, 35),
(ModuleStoreEnum.Type.split, 3, 9, 35),
)
@ddt.unpack
@count_queries

View File

@@ -483,15 +483,15 @@ class SingleThreadQueryCountTestCase(ForumsEnableMixin, ModuleStoreTestCase):
(ModuleStoreEnum.Type.mongo, False, 1, 5, 2, 21, 7),
(ModuleStoreEnum.Type.mongo, False, 50, 5, 2, 21, 7),
# split mongo: 3 queries, regardless of thread response size.
(ModuleStoreEnum.Type.split, False, 1, 3, 3, 21, 8),
(ModuleStoreEnum.Type.split, False, 50, 3, 3, 21, 8),
(ModuleStoreEnum.Type.split, False, 1, 2, 2, 21, 8),
(ModuleStoreEnum.Type.split, False, 50, 2, 2, 21, 8),
# Enabling Enterprise integration should have no effect on the number of mongo queries made.
(ModuleStoreEnum.Type.mongo, True, 1, 5, 2, 21, 7),
(ModuleStoreEnum.Type.mongo, True, 50, 5, 2, 21, 7),
# split mongo: 3 queries, regardless of thread response size.
(ModuleStoreEnum.Type.split, True, 1, 3, 3, 21, 8),
(ModuleStoreEnum.Type.split, True, 50, 3, 3, 21, 8),
(ModuleStoreEnum.Type.split, True, 1, 2, 2, 21, 8),
(ModuleStoreEnum.Type.split, True, 50, 2, 2, 21, 8),
)
@ddt.unpack
def test_number_of_mongo_queries(

View File

@@ -164,8 +164,9 @@ class RecalculateSubsectionGradeTest(HasCourseWithProblemsMixin, ModuleStoreTest
@ddt.data(
(ModuleStoreEnum.Type.mongo, 1, 40, True),
(ModuleStoreEnum.Type.mongo, 1, 40, False),
(ModuleStoreEnum.Type.split, 3, 40, True),
(ModuleStoreEnum.Type.split, 3, 40, False),
(ModuleStoreEnum.Type.split, 2, 40, True),
(ModuleStoreEnum.Type.split, 2, 40, False),
)
@ddt.unpack
def test_query_counts(self, default_store, num_mongo_calls, num_sql_calls, create_multiple_subsections):
@@ -178,7 +179,7 @@ class RecalculateSubsectionGradeTest(HasCourseWithProblemsMixin, ModuleStoreTest
@ddt.data(
(ModuleStoreEnum.Type.mongo, 1, 40),
(ModuleStoreEnum.Type.split, 3, 40),
(ModuleStoreEnum.Type.split, 2, 40),
)
@ddt.unpack
def test_query_counts_dont_change_with_more_content(self, default_store, num_mongo_calls, num_sql_calls):
@@ -224,7 +225,7 @@ class RecalculateSubsectionGradeTest(HasCourseWithProblemsMixin, ModuleStoreTest
@ddt.data(
(ModuleStoreEnum.Type.mongo, 1, 23),
(ModuleStoreEnum.Type.split, 3, 23),
(ModuleStoreEnum.Type.split, 2, 23),
)
@ddt.unpack
def test_persistent_grades_not_enabled_on_course(self, default_store, num_mongo_queries, num_sql_queries):
@@ -239,7 +240,7 @@ class RecalculateSubsectionGradeTest(HasCourseWithProblemsMixin, ModuleStoreTest
@ddt.data(
(ModuleStoreEnum.Type.mongo, 1, 41),
(ModuleStoreEnum.Type.split, 3, 41),
(ModuleStoreEnum.Type.split, 2, 41),
)
@ddt.unpack
def test_persistent_grades_enabled_on_course(self, default_store, num_mongo_queries, num_sql_queries):

View File

@@ -673,6 +673,6 @@ class TestInstructorDashboardPerformance(ModuleStoreTestCase, LoginEnrollmentTes
# check MongoDB calls count
url = reverse('spoc_gradebook', kwargs={'course_id': self.course.id})
with check_mongo_calls(9):
with check_mongo_calls(7):
response = self.client.get(url)
assert response.status_code == 200

View File

@@ -376,11 +376,12 @@ class BookmarkModelTests(BookmarksTestsBase):
# (ModuleStoreEnum.Type.mongo, 6, 3, 3), Too slow.
(ModuleStoreEnum.Type.mongo, 2, 4, 4),
# (ModuleStoreEnum.Type.mongo, 4, 4, 4),
(ModuleStoreEnum.Type.split, 2, 2, 2),
(ModuleStoreEnum.Type.split, 4, 2, 2),
(ModuleStoreEnum.Type.split, 2, 3, 2),
(ModuleStoreEnum.Type.split, 2, 2, 1),
(ModuleStoreEnum.Type.split, 4, 2, 1),
(ModuleStoreEnum.Type.split, 2, 3, 1),
# (ModuleStoreEnum.Type.split, 4, 3, 1),
(ModuleStoreEnum.Type.split, 2, 4, 2),
(ModuleStoreEnum.Type.split, 2, 4, 1),
)
@ddt.unpack
def test_get_path_queries(self, store_type, children_per_block, depth, expected_mongo_calls):

View File

@@ -378,7 +378,7 @@ class CourseOverviewTestCase(CatalogIntegrationMixin, ModuleStoreTestCase, Cache
course_overview = CourseOverview._create_or_update(course) # pylint: disable=protected-access
assert course_overview.lowest_passing_grade is None
@ddt.data((ModuleStoreEnum.Type.mongo, 4, 4), (ModuleStoreEnum.Type.split, 3, 3))
@ddt.data((ModuleStoreEnum.Type.mongo, 4, 4), (ModuleStoreEnum.Type.split, 2, 2))
@ddt.unpack
def test_versioning(self, modulestore_type, min_mongo_calls, max_mongo_calls):
"""