From 43bc683d80c620d9752e68a317d51891a00a060a Mon Sep 17 00:00:00 2001 From: Braden MacDonald Date: Wed, 27 Oct 2021 14:51:55 -0700 Subject: [PATCH] feat: Add a data migration to copy all course index data into MySQL --- .../migrations/0002_data_migration.py | 61 +++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 common/djangoapps/split_modulestore_django/migrations/0002_data_migration.py diff --git a/common/djangoapps/split_modulestore_django/migrations/0002_data_migration.py b/common/djangoapps/split_modulestore_django/migrations/0002_data_migration.py new file mode 100644 index 0000000000..3d33e53ab5 --- /dev/null +++ b/common/djangoapps/split_modulestore_django/migrations/0002_data_migration.py @@ -0,0 +1,61 @@ +from django.db import migrations, models +from django.db.utils import IntegrityError + + +from xmodule.modulestore import ModuleStoreEnum +from xmodule.modulestore.django import modulestore + +from ..models import SplitModulestoreCourseIndex as SplitModulestoreCourseIndex_Real + + +def forwards_func(apps, schema_editor): + """ + Copy all course index data from MongoDB to MySQL, unless it's already present in MySQL. + + This migration is used as part of an upgrade path from storing course indexes in MongoDB to storing them in MySQL. + On edX.org, we began writing to MySQL+MongoDB before we deployed this migration, so some courses are already in + MySQL. But any courses that haven't been modified recently would only be in MongoDB and need to be copied over to + MySQL before we can switch reading course indexes to MySQL. + """ + db_alias = schema_editor.connection.alias + SplitModulestoreCourseIndex = apps.get_model("split_modulestore_django", "SplitModulestoreCourseIndex") + split_modulestore = modulestore()._get_modulestore_by_type(ModuleStoreEnum.Type.split) + + for course_index in split_modulestore.db_connection.find_matching_course_indexes(force_mongo=True): + data = SplitModulestoreCourseIndex_Real.fields_from_v1_schema(course_index) + course_id = data["course_id"] + + try: + mysql_entry = SplitModulestoreCourseIndex.objects.get(course_id=course_id) + # This course index ("active version") already exists in MySQL. + # Let's just make sure it's the latest version. If the MongoDB somehow contains a newer version, something + # has gone wrong and we should investigate to ensure we're not losing any data. + if mysql_entry.edited_on < data["edited_on"]: + raise ValueError( + f"Course {course_id} already exists in MySQL but the MongoDB version is newer. " + "That's unexpected because since the course index table was added to MySQL, there has never been a " + "time when we would write course_indexes updates only to MongoDB without also writing to MySQL." + ) + except SplitModulestoreCourseIndex.DoesNotExist: + # This course exists in MongoDB but hasn't yet been migrated to MySQL. Do that now. + SplitModulestoreCourseIndex(**data).save(using=db_alias) + +def reverse_func(apps, schema_editor): + """ + Reversing the data migration is a no-op, because edX.org used a migration path path that started with writing to + both MySQL+MongoDB while still reading from MongoDB, then later executed this data migration, then later cut over to + reading from MySQL only. If we reversed this by deleting all entries, it would undo any writes that took place + before this data migration, which are unrelated. + """ + pass + + +class Migration(migrations.Migration): + + dependencies = [ + ('split_modulestore_django', '0001_initial'), + ] + + operations = [ + migrations.RunPython(forwards_func, reverse_func), + ]