feat: store split modulestore's course indexes in Django/MySQL

Split modulestore persists data in three MongoDB "collections": course_index (list of courses and the current version of each), structure (outline of the courses, and some XBlock fields), and definition (other XBlock fields). While "structure" and "definition" data can get very large, which is one of the reasons MongoDB was chosen for modulestore, the course index data is very small.

By moving course index data to MySQL / a django model, we get these advantages:
* Full history of changes to the course index data is now preserved
* Includes a django admin view to inspect the list of courses and libraries
* It's much easier to "reset" a corrupted course to a known working state, by using the simple-history revert tools from the django admin.
* The remaining MongoDB collections (structure and definition) are essentially just used as key-value stores of large JSON data structures. This paves the way for future changes that allow migrating courses one at a time from MongoDB to S3, and thus eliminating any use of MongoDB by split modulestore, simplifying the stack.
This commit is contained in:
Braden MacDonald
2021-09-29 15:55:52 -07:00
committed by David Ormsbee
parent d20a769f73
commit 96e5ff8dce
52 changed files with 729 additions and 205 deletions

View File

@@ -276,11 +276,11 @@ class BookmarkModelTests(BookmarksTestsBase):
(ModuleStoreEnum.Type.mongo, 'sequential_1', ['chapter_1'], 4),
(ModuleStoreEnum.Type.mongo, 'vertical_1', ['chapter_1', 'sequential_1'], 6),
(ModuleStoreEnum.Type.mongo, 'html_1', ['chapter_1', 'sequential_2', 'vertical_2'], 7),
(ModuleStoreEnum.Type.split, 'course', [], 3),
(ModuleStoreEnum.Type.split, 'chapter_1', [], 2),
(ModuleStoreEnum.Type.split, 'sequential_1', ['chapter_1'], 2),
(ModuleStoreEnum.Type.split, 'vertical_1', ['chapter_1', 'sequential_1'], 2),
(ModuleStoreEnum.Type.split, 'html_1', ['chapter_1', 'sequential_2', 'vertical_2'], 2),
(ModuleStoreEnum.Type.split, 'course', [], 2),
(ModuleStoreEnum.Type.split, 'chapter_1', [], 1),
(ModuleStoreEnum.Type.split, 'sequential_1', ['chapter_1'], 1),
(ModuleStoreEnum.Type.split, 'vertical_1', ['chapter_1', 'sequential_1'], 1),
(ModuleStoreEnum.Type.split, 'html_1', ['chapter_1', 'sequential_2', 'vertical_2'], 1),
)
@ddt.unpack
def test_path_and_queries_on_create(self, store_type, block_to_bookmark, ancestors_attrs, expected_mongo_calls):
@@ -376,11 +376,11 @@ class BookmarkModelTests(BookmarksTestsBase):
# (ModuleStoreEnum.Type.mongo, 6, 3, 3), Too slow.
(ModuleStoreEnum.Type.mongo, 2, 4, 4),
# (ModuleStoreEnum.Type.mongo, 4, 4, 4),
(ModuleStoreEnum.Type.split, 2, 2, 2),
(ModuleStoreEnum.Type.split, 4, 2, 2),
(ModuleStoreEnum.Type.split, 2, 3, 2),
# (ModuleStoreEnum.Type.split, 4, 3, 2),
(ModuleStoreEnum.Type.split, 2, 4, 2),
(ModuleStoreEnum.Type.split, 2, 2, 1),
(ModuleStoreEnum.Type.split, 4, 2, 1),
(ModuleStoreEnum.Type.split, 2, 3, 1),
# (ModuleStoreEnum.Type.split, 4, 3, 1),
(ModuleStoreEnum.Type.split, 2, 4, 1),
)
@ddt.unpack
def test_get_path_queries(self, store_type, children_per_block, depth, expected_mongo_calls):

View File

@@ -108,10 +108,10 @@ class XBlockCacheTaskTests(BookmarksTestsBase):
(ModuleStoreEnum.Type.mongo, 4, 3, 5),
(ModuleStoreEnum.Type.mongo, 2, 4, 6),
# (ModuleStoreEnum.Type.mongo, 4, 4, 6), Too slow.
(ModuleStoreEnum.Type.split, 2, 2, 3),
(ModuleStoreEnum.Type.split, 4, 2, 3),
(ModuleStoreEnum.Type.split, 2, 3, 3),
(ModuleStoreEnum.Type.split, 2, 4, 3),
(ModuleStoreEnum.Type.split, 2, 2, 2),
(ModuleStoreEnum.Type.split, 4, 2, 2),
(ModuleStoreEnum.Type.split, 2, 3, 2),
(ModuleStoreEnum.Type.split, 2, 4, 2),
)
@ddt.unpack
def test_calculate_course_xblocks_data_queries(self, store_type, children_per_block, depth, expected_mongo_calls):

View File

@@ -378,7 +378,7 @@ class CourseOverviewTestCase(CatalogIntegrationMixin, ModuleStoreTestCase, Cache
course_overview = CourseOverview._create_or_update(course) # pylint: disable=protected-access
assert course_overview.lowest_passing_grade is None
@ddt.data((ModuleStoreEnum.Type.mongo, 4, 4), (ModuleStoreEnum.Type.split, 3, 3))
@ddt.data((ModuleStoreEnum.Type.mongo, 4, 4), (ModuleStoreEnum.Type.split, 2, 2))
@ddt.unpack
def test_versioning(self, modulestore_type, min_mongo_calls, max_mongo_calls):
"""

View File

@@ -743,7 +743,7 @@ class TestCohortsAndPartitionGroups(ModuleStoreTestCase):
self.partition_id,
self.group1_id,
)
with pytest.raises(IntegrityError):
with pytest.raises(IntegrityError), self.allow_transaction_exception():
self._link_cohort_partition_group(
self.first_cohort,
self.partition_id,

View File

@@ -396,7 +396,15 @@ class SequenceApiTestViews(MasqueradeMixin, BaseCoursewareTests):
def test_hidden_after_due(self, is_past_due, masquerade_config, expected_hidden, expected_banner):
"""Validate the metadata when hide-after-due is set for a sequence"""
due = datetime.now() + timedelta(days=-1 if is_past_due else 1)
sequence = ItemFactory(parent=self.chapter, category='sequential', hide_after_due=True, due=due)
sequence = ItemFactory(
parent_location=self.chapter.location,
# ^ It is very important that we use parent_location=self.chapter.location (and not parent=self.chapter), as
# chapter is a class attribute and passing it by value will update its .children=[] which will then leak
# into other tests and cause errors if the children no longer exist.
category='sequential',
hide_after_due=True,
due=due,
)
CourseEnrollment.enroll(self.user, self.course.id)

View File

@@ -23,7 +23,8 @@ class UserPreferenceModelTest(ModuleStoreTestCase):
def test_duplicate_user_key(self):
user = UserFactory.create()
UserPreferenceFactory.create(user=user, key="testkey", value="first")
pytest.raises(IntegrityError, UserPreferenceFactory.create)
with self.allow_transaction_exception():
pytest.raises(IntegrityError, UserPreferenceFactory.create)
def test_arbitrary_values(self):
user = UserFactory.create()

View File

@@ -205,8 +205,8 @@ class TestCourseHomePage(CourseHomePageTestCase): # lint-amnesty, pylint: disab
# Fetch the view and verify the query counts
# TODO: decrease query count as part of REVO-28
with self.assertNumQueries(72, table_blacklist=QUERY_COUNT_TABLE_BLACKLIST):
with check_mongo_calls(4):
with self.assertNumQueries(73, table_blacklist=QUERY_COUNT_TABLE_BLACKLIST):
with check_mongo_calls(3):
url = course_home_url(self.course)
self.client.get(url)

View File

@@ -49,7 +49,7 @@ class TestCourseUpdatesPage(BaseCourseUpdatesTestCase):
# Fetch the view and verify that the query counts haven't changed
# TODO: decrease query count as part of REVO-28
with self.assertNumQueries(50, table_blacklist=QUERY_COUNT_TABLE_BLACKLIST):
with check_mongo_calls(4):
with self.assertNumQueries(51, table_blacklist=QUERY_COUNT_TABLE_BLACKLIST):
with check_mongo_calls(3):
url = course_updates_url(self.course)
self.client.get(url)

View File

@@ -62,8 +62,8 @@ class SubmitCompletionTestCase(CompletionSetUpMixin, TestCase):
self.set_up_completion()
def test_changed_value(self):
with self.assertNumQueries(SELECT + UPDATE + 2 * SAVEPOINT + 2 * OTHER):
# OTHER = user exists, completion exists
with self.assertNumQueries(SELECT + UPDATE + 2 * SAVEPOINT + 4 * OTHER):
# OTHER = user exists, completion exists, 2x look up course in splitmodulestorecourseindex
completion, isnew = models.BlockCompletion.objects.submit_completion(
user=self.user,
block_key=self.block_key,
@@ -88,7 +88,7 @@ class SubmitCompletionTestCase(CompletionSetUpMixin, TestCase):
def test_new_user(self):
newuser = UserFactory()
with self.assertNumQueries(SELECT + UPDATE + 4 * SAVEPOINT):
with self.assertNumQueries(SELECT + UPDATE + 4 * SAVEPOINT + 2 * OTHER):
_, isnew = models.BlockCompletion.objects.submit_completion(
user=newuser,
block_key=self.block_key,
@@ -99,7 +99,7 @@ class SubmitCompletionTestCase(CompletionSetUpMixin, TestCase):
def test_new_block(self):
newblock = UsageKey.from_string('block-v1:edx+test+run+type@video+block@puppers')
with self.assertNumQueries(SELECT + UPDATE + 4 * SAVEPOINT):
with self.assertNumQueries(SELECT + UPDATE + 4 * SAVEPOINT + 2 * OTHER):
_, isnew = models.BlockCompletion.objects.submit_completion(
user=self.user,
block_key=newblock,