feat: Models for import_from_modulestore (#36515)

A new application has been created, described in this ADR:
https://github.com/openedx/edx-platform/pull/36545

have been created, as well as related models for mapping original content and
new content created during the import process. Python and Django APIs, as well
as a Django admin interface, will soon follow.
This commit is contained in:
Ivan Niedielnitsev
2025-04-17 22:03:46 +03:00
committed by GitHub
parent fd7ce0e273
commit 505b4f466c
14 changed files with 246 additions and 5 deletions

View File

@@ -238,6 +238,7 @@
"cms/djangoapps/cms_user_tasks/",
"cms/djangoapps/course_creators/",
"cms/djangoapps/export_course_metadata/",
"cms/djangoapps/import_from_modulestore/",
"cms/djangoapps/maintenance/",
"cms/djangoapps/models/",
"cms/djangoapps/pipeline_js/",

View File

@@ -0,0 +1,31 @@
========================
Import from Modulestore
========================
The new Django application `import_from_modulestore` is designed to
automate the process of importing course legacy OLX content from Modulestore
to Content Libraries. The application allows users to easily and quickly
migrate existing course content, minimizing the manual work and potential
errors associated with manual migration.
The new app makes the import process automated and easy to manage.
The main problems solved by the application:
* Reducing the time to import course content.
* Ensuring data integrity during the transfer.
* Ability to choose which content to import before the final import.
------------------------------
Import from Modulestore Usage
------------------------------
* Import course elements at the level of sections, subsections, units,
and xblocks into the Content Libraries.
* Choose the structure of this import, whether it will be only xblocks
from a particular course or full sections/subsections/units.
* Store the history of imports.
* Synchronize the course content with the library content (when re-importing,
the blocks can be updated according to changes in the original course).
* The new import mechanism ensures data integrity at the time of importing
by saving the course in StagedContent.
* Importing the legacy library content into the new Content Libraries.

View File

@@ -0,0 +1,35 @@
"""
This module contains the admin configuration for the Import model.
"""
from django.contrib import admin
from .models import Import, PublishableEntityImport, PublishableEntityMapping
class ImportAdmin(admin.ModelAdmin):
"""
Admin configuration for the Import model.
"""
list_display = (
'uuid',
'created',
'status',
'source_key',
'target_change',
)
list_filter = (
'status',
)
search_fields = (
'source_key',
'target_change',
)
raw_id_fields = ('user',)
readonly_fields = ('status',)
admin.site.register(Import, ImportAdmin)
admin.site.register(PublishableEntityImport)
admin.site.register(PublishableEntityMapping)

View File

@@ -0,0 +1,13 @@
"""
App for importing from the modulestore tools.
"""
from django.apps import AppConfig
class ImportFromModulestoreConfig(AppConfig):
"""
App for importing legacy content from the modulestore.
"""
name = 'cms.djangoapps.import_from_modulestore'

View File

@@ -0,0 +1,20 @@
"""
This module contains the data models for the import_from_modulestore app.
"""
from django.db.models import TextChoices
from django.utils.translation import gettext_lazy as _
class ImportStatus(TextChoices):
"""
The status of this modulestore-to-learning-core import.
"""
NOT_STARTED = 'not_started', _('Waiting to stage content')
STAGING = 'staging', _('Staging content for import')
STAGING_FAILED = _('Failed to stage content')
STAGED = 'staged', _('Content is staged and ready for import')
IMPORTING = 'importing', _('Importing staged content')
IMPORTING_FAILED = 'importing_failed', _('Failed to import staged content')
IMPORTED = 'imported', _('Successfully imported content')
CANCELED = 'canceled', _('Canceled')

View File

@@ -0,0 +1,140 @@
"""
Models for the course to library import app.
"""
import uuid as uuid_tools
from django.contrib.auth import get_user_model
from django.db import models
from django.utils.translation import gettext_lazy as _
from model_utils.models import TimeStampedModel
from opaque_keys.edx.django.models import (
LearningContextKeyField,
UsageKeyField,
)
from openedx_learning.api.authoring_models import LearningPackage, PublishableEntity
from .data import ImportStatus
User = get_user_model()
class Import(TimeStampedModel):
"""
Represents the action of a user importing a modulestore-based course or legacy
library into a learning-core based learning package (today, that is always a content library).
"""
uuid = models.UUIDField(default=uuid_tools.uuid4, editable=False, unique=True)
status = models.CharField(
max_length=100,
choices=ImportStatus.choices,
default=ImportStatus.NOT_STARTED,
db_index=True
)
user = models.ForeignKey(User, on_delete=models.CASCADE)
# Note: For now, this will always be a course key. In the future, it may be a legacy library key.
source_key = LearningContextKeyField(help_text=_('The modulestore course'), max_length=255, db_index=True)
target_change = models.ForeignKey(to='oel_publishing.DraftChangeLog', on_delete=models.SET_NULL, null=True)
class Meta:
verbose_name = _('Import from modulestore')
verbose_name_plural = _('Imports from modulestore')
def __str__(self):
return f'{self.source_key}{self.target_change}'
def set_status(self, status: ImportStatus):
"""
Set import status.
"""
self.status = status
self.save()
if status in [ImportStatus.IMPORTED, ImportStatus.CANCELED]:
self.clean_related_staged_content()
def clean_related_staged_content(self) -> None:
"""
Clean related staged content.
"""
for staged_content_for_import in self.staged_content_for_import.all():
staged_content_for_import.staged_content.delete()
class PublishableEntityMapping(TimeStampedModel):
"""
Represents a mapping between a source usage key and a target publishable entity.
"""
source_usage_key = UsageKeyField(
max_length=255,
help_text=_('Original usage key/ID of the thing that has been imported.'),
)
target_package = models.ForeignKey(LearningPackage, on_delete=models.CASCADE)
target_entity = models.ForeignKey(PublishableEntity, on_delete=models.CASCADE)
class Meta:
unique_together = ('source_usage_key', 'target_package')
def __str__(self):
return f'{self.source_usage_key}{self.target_entity}'
class PublishableEntityImport(TimeStampedModel):
"""
Represents a publishableentity version that has been imported into a learning package (e.g. content library)
This is a many-to-many relationship between a container version and a course to library import.
"""
import_event = models.ForeignKey(Import, on_delete=models.CASCADE)
resulting_mapping = models.ForeignKey(PublishableEntityMapping, on_delete=models.SET_NULL, null=True, blank=True)
resulting_change = models.OneToOneField(
to='oel_publishing.DraftChangeLogRecord',
# a changelog record can be pruned, which would set this to NULL, but not delete the
# entire import record
null=True,
on_delete=models.SET_NULL,
)
class Meta:
unique_together = (
('import_event', 'resulting_mapping'),
)
def __str__(self):
return f'{self.import_event}{self.resulting_mapping}'
class StagedContentForImport(TimeStampedModel):
"""
Represents m2m relationship between an import and staged content created for that import.
"""
import_event = models.ForeignKey(
Import,
on_delete=models.CASCADE,
related_name='staged_content_for_import',
)
staged_content = models.OneToOneField(
to='content_staging.StagedContent',
on_delete=models.CASCADE,
related_name='staged_content_for_import',
)
# Since StagedContent stores all the keys of the saved blocks, this field was added to optimize search.
source_usage_key = UsageKeyField(
max_length=255,
help_text=_(
'The original Usage key of the highest-level component that was saved in StagedContent.'
),
)
class Meta:
unique_together = (
('import_event', 'staged_content'),
)
def __str__(self):
return f'{self.import_event}{self.staged_content}'

View File

@@ -1667,6 +1667,7 @@ INSTALLED_APPS = [
'openedx.core.djangoapps.course_groups', # not used in cms (yet), but tests run
'cms.djangoapps.xblock_config.apps.XBlockConfig',
'cms.djangoapps.export_course_metadata.apps.ExportCourseMetadataConfig',
'cms.djangoapps.import_from_modulestore.apps.ImportFromModulestoreConfig',
# New (Learning-Core-based) XBlock runtime
'openedx.core.djangoapps.xblock.apps.StudioXBlockAppConfig',

View File

@@ -112,7 +112,7 @@ numpy<2.0.0
# Date: 2023-09-18
# pinning this version to avoid updates while the library is being developed
# Issue for unpinning: https://github.com/openedx/edx-platform/issues/35269
openedx-learning==0.23.0
openedx-learning==0.23.1
# Date: 2023-11-29
# Open AI version 1.0.0 dropped support for openai.ChatCompletion which is currently in use in enterprise.

View File

@@ -820,7 +820,7 @@ openedx-filters==2.0.1
# ora2
openedx-forum==0.2.0
# via -r requirements/edx/kernel.in
openedx-learning==0.23.0
openedx-learning==0.23.1
# via
# -c requirements/edx/../constraints.txt
# -r requirements/edx/kernel.in

View File

@@ -1383,7 +1383,7 @@ openedx-forum==0.2.0
# via
# -r requirements/edx/doc.txt
# -r requirements/edx/testing.txt
openedx-learning==0.23.0
openedx-learning==0.23.1
# via
# -c requirements/edx/../constraints.txt
# -r requirements/edx/doc.txt

View File

@@ -992,7 +992,7 @@ openedx-filters==2.0.1
# ora2
openedx-forum==0.2.0
# via -r requirements/edx/base.txt
openedx-learning==0.23.0
openedx-learning==0.23.1
# via
# -c requirements/edx/../constraints.txt
# -r requirements/edx/base.txt

View File

@@ -1050,7 +1050,7 @@ openedx-filters==2.0.1
# ora2
openedx-forum==0.2.0
# via -r requirements/edx/base.txt
openedx-learning==0.23.0
openedx-learning==0.23.1
# via
# -c requirements/edx/../constraints.txt
# -r requirements/edx/base.txt