fix!: split modulestore's has_course(ignore_case=True) was not working (#38044)

BREAKING CHANGE: this forces course IDs in modulestore to be unique (case insensitive). This was always supposed to be the case, but it wasn't working properly on MySQL. Upgrading past this commit may cause a migration failure if you have conflicting course IDs - see the migration 0004 docstring for details.
This commit is contained in:
Braden MacDonald
2026-02-19 15:12:41 -08:00
committed by Braden MacDonald
parent 3e522d5272
commit 12e9af4f5c
5 changed files with 117 additions and 12 deletions

View File

@@ -1275,6 +1275,7 @@ class ContentStoreTest(ContentStoreTestCase):
resp = self.client.ajax_post('/course/', self.course_data)
self.assertEqual(resp.status_code, 200)
data = parse_json(resp)
assert 'ErrMsg' in data, "Expected the course creation to fail"
self.assertRegex(data['ErrMsg'], error_message)
if test_enrollment:
# One test case involves trying to create the same course twice. Hence for that course,

View File

@@ -0,0 +1,67 @@
# Generated by Django 5.2.11 on 2026-02-19 23:23
import django.db.models.functions.text
from django.db import migrations, models
import opaque_keys.edx.django.models
class Migration(migrations.Migration):
"""
Force course_id to be case-insensitively unique.
Note: due to a bug we had for several years, it's possible that this
migration will fail because you have some duplicate entries in the
split_modulestore_django_splitmodulestorecourseindex database table that
differ only in case. Note that other parts of the system (like
CourseOverview) are case-insensitive so only allow one version of the course
ID to be stored, but this table would allow multiple if they differ in case.
Such courses would likely be broken/buggy because so many other parts of the
system use a case-insensitive course_id.
So if you encounter this issue and the migration fails due to a duplicate:
Try to figure out which is the "correct" ID (e.g. the one used in the
CourseOverview, CourseEnrollment, and CoursewareStudentModule tables) and
then delete the "incorrect" ID from this table. You can easily do
this from the Django admin at:
(studio)/admin/split_modulestore_django/splitmodulestorecourseindex/
Be sure to take a screenshot or record the `objectid`, `draft_version`,
`published_version`, and `wiki_slug` fields before you do so, so that you
can reverse the deletion if needed.
Deleting rows from this table will NOT delete any actual course content from
MongoDB nor cascade the deletion to any other rows in other MySQL tables; it
just removes a duplicate record that points to [unused?] course data.
"""
dependencies = [
("split_modulestore_django", "0003_alter_historicalsplitmodulestorecourseindex_options"),
]
operations = [
# Make 'course_id' case-insensitively unique (in addition to its existing case-sensitively unique index)
migrations.AddConstraint(
model_name="splitmodulestorecourseindex",
constraint=models.UniqueConstraint(
django.db.models.functions.text.Lower("course_id"),
name="splitmodulestorecourseindex_courseid_unique_ci",
),
),
# Mark the 'course_id' field as 'case_sensitive=True' now that LearningContextKeyField supports it upstream.
# This part of the migration should have no effect on the database, as migration 0001 already explicitly set a
# case-sensitive collation. We just now have a way to indicate that in the model field definition.
migrations.AlterField(
model_name="splitmodulestorecourseindex",
name="course_id",
field=opaque_keys.edx.django.models.LearningContextKeyField(
case_sensitive=True, max_length=255, unique=True
),
),
migrations.AlterField(
model_name="historicalsplitmodulestorecourseindex",
name="course_id",
field=opaque_keys.edx.django.models.LearningContextKeyField(
case_sensitive=True, db_index=True, max_length=255
),
),
]

View File

@@ -34,8 +34,13 @@ class SplitModulestoreCourseIndex(models.Model):
# For compatibility with MongoDB, each course index must have an ObjectId. We still have an integer primary key too.
objectid = models.CharField(max_length=24, null=False, blank=False, unique=True)
# The ID of this course (or library). Must start with "course-v1:" or "library-v1:"
course_id = LearningContextKeyField(max_length=255, db_index=True, unique=True, null=False)
# course_id: The ID of this course (or library). Must start with "course-v1:" or "library-v1:"
# This is case-sensitive; however, many other parts of the system aren't case sensitive, so we add an explicit index
# on Lower(course_id) to make this case-insensitively unique as well.
# So: (1) queries of course_id by default are case-sensitive. (2) queries that want to be case-insensitive need to
# explicitly compare Lower(course_id) with the lowercase key in question. (3) Course IDs that differ only in case
# are prohibited.
course_id = LearningContextKeyField(case_sensitive=True, unique=True, null=False)
# Extract the "org" value from the course_id key so that we can search by org.
# This gets set automatically by clean()
org = models.CharField(max_length=255, db_index=True)
@@ -81,6 +86,13 @@ class SplitModulestoreCourseIndex(models.Model):
class Meta:
ordering = ["course_id"]
verbose_name_plural = "Split modulestore course indexes"
constraints = [
# Explicitly force "course_id" to be case-insensitively unique
models.UniqueConstraint(
models.functions.Lower("course_id"),
name="splitmodulestorecourseindex_courseid_unique_ci",
),
]
def as_v1_schema(self):
""" Return in the same format as was stored in MongoDB """
@@ -160,6 +172,6 @@ class SplitModulestoreCourseIndex(models.Model):
def save(self, *args, **kwargs):
""" Save this model """
# Override to ensure that full_clean()/clean() is always called, so that the checks in clean() above are run.
# But don't validate_unique(), it just runs extra queries and the database enforces it anyways.
self.full_clean(validate_unique=False)
# But don't run validations; they just run extra queries and the database enforces them anyways.
self.full_clean(validate_unique=False, validate_constraints=False)
return super().save(*args, **kwargs)

View File

@@ -2,8 +2,10 @@
from datetime import datetime
from bson.objectid import ObjectId
from django.db import IntegrityError, transaction
from django.test import TestCase
from opaque_keys.edx.keys import CourseKey
import pytest
from common.djangoapps.split_modulestore_django.models import SplitModulestoreCourseIndex
from xmodule.modulestore import ModuleStoreEnum # lint-amnesty, pylint: disable=wrong-import-order
@@ -16,12 +18,22 @@ class SplitModulestoreCourseIndexTest(TestCase):
"""
Make sure the course_id column is case sensitive.
2021 note:
Although the platform code generally tries to prevent having two courses whose IDs differ only by case
(e.g. https://git.io/J6voR , note `ignore_case=True`), we found at least one pair of courses on stage that
differs only by case in its `org` ID (`edx` vs `edX`). So for backwards compatibility with MongoDB and to avoid
issues for anyone else with similar course IDs that differ only by case, we've made the new version case
sensitive too. The system still tries to prevent creation of courses that differ only by course (that hasn't
changed), but now the MySQL version won't break if that has somehow happened.
2026 note:
Due to a serious bug where the system was NOT preventing duplicate courses from being created on MySQL, we
decided to tighten this up and introduce a new UNIQUE(LOWER(course_id)) index to prevent duplicates at the
database level. This does mean that the migration will fail if anyone has such duplicates in their system, but
those duplicates are going to have other bugs anyways; in such cases we recommend deleting one of the duplicate
SplitModulestoreCourseIndex entries, or changing its course_id to a dummy value.
"""
course_index_common = {
"course": "TL101",
@@ -38,8 +50,16 @@ class SplitModulestoreCourseIndexTest(TestCase):
data1 = SplitModulestoreCourseIndex.fields_from_v1_schema(course_index_1)
data2 = SplitModulestoreCourseIndex.fields_from_v1_schema(course_index_2)
SplitModulestoreCourseIndex(**data1).save()
# This next line will fail if the course_id column is not case-sensitive:
SplitModulestoreCourseIndex(**data2).save()
# Also check deletion, to ensure the course_id historical record is not unique or case sensitive:
SplitModulestoreCourseIndex.objects.get(course_id=CourseKey.from_string("course-v1:edx+TL101+2015")).delete()
SplitModulestoreCourseIndex.objects.get(course_id=CourseKey.from_string("course-v1:edX+TL101+2015")).delete()
# This next line should fail, because the database itself prevents duplicates:
with pytest.raises(IntegrityError):
with transaction.atomic():
SplitModulestoreCourseIndex(**data2).save()
id_1 = "course-v1:edx+TL101+2015"
id_2 = "course-v1:edX+TL101+2015"
# Retrieving a course by its exact course_id should work:
SplitModulestoreCourseIndex.objects.get(course_id=CourseKey.from_string(id_1))
# but retrieving a course with the wrong case should throw an error:
with pytest.raises(SplitModulestoreCourseIndex.DoesNotExist):
SplitModulestoreCourseIndex.objects.get(course_id=CourseKey.from_string(id_2))

View File

@@ -15,6 +15,9 @@ from zoneinfo import ZoneInfo
from ccx_keys.locator import CCXLocator
from django.core.cache import caches, InvalidCacheBackendError
from django.db.models import F
from django.db.models.functions import Lower
from django.db.models.lookups import Exact
from django.db.transaction import TransactionManagementError
import pymongo
# Import this just to export it
@@ -659,13 +662,15 @@ class DjangoFlexPersistenceBackend(MongoPersistenceBackend):
# We never include the branch or the version in the course key in the SplitModulestoreCourseIndex table:
key = key.for_branch(None).version_agnostic()
if not ignore_case:
query = {"course_id": key}
query_expr = Exact(F("course_id"), str(key))
else:
# Case insensitive search is important when creating courses to reject course IDs that differ only by
# capitalization.
query = {"course_id__iexact": key}
# WARNING: 'course_id__iexact=key' does not work on this table as it uses a case-sensitive collation.
# We need to use the following explicit lowercase comparison in order to correctly query:
query_expr = Exact(Lower("course_id"), str(key).lower())
try:
return SplitModulestoreCourseIndex.objects.get(**query).as_v1_schema()
return SplitModulestoreCourseIndex.objects.get(query_expr).as_v1_schema()
except SplitModulestoreCourseIndex.DoesNotExist:
# The mongo implementation does not retrieve by string key; it retrieves by (org, course, run) tuple.
# As a result, it will handle read requests for a CCX key like