Files
edx-platform/xmodule/modulestore/split_migrator.py
2022-06-20 18:20:06 +05:00

232 lines
12 KiB
Python

'''
Code for migrating from other modulestores to the split_mongo modulestore.
Exists at the top level of modulestore b/c it needs to know about and access each modulestore.
In general, it's strategy is to treat the other modulestores as read-only and to never directly
manipulate storage but use existing api's.
'''
import logging
from opaque_keys.edx.locator import CourseLocator
from xblock.fields import Reference, ReferenceList, ReferenceValueDict
from xmodule.modulestore import ModuleStoreEnum
from xmodule.modulestore.exceptions import ItemNotFoundError
log = logging.getLogger(__name__)
class SplitMigrator:
"""
Copies courses from old mongo to split mongo and sets up location mapping so any references to the old
name will be able to find the new elements.
"""
def __init__(self, split_modulestore, source_modulestore):
super().__init__()
self.split_modulestore = split_modulestore
self.source_modulestore = source_modulestore
def migrate_mongo_course(
self, source_course_key, user_id, new_org=None, new_course=None, new_run=None, fields=None, **kwargs
):
"""
Create a new course in split_mongo representing the published and draft versions of the course from the
original mongo store. And return the new CourseLocator
If the new course already exists, this raises DuplicateItemError
:param source_course_key: which course to migrate
:param user_id: the user whose action is causing this migration
:param new_org, new_course, new_run: (optional) identifiers for the new course. Defaults to
the source_course_key's values.
"""
# the only difference in data between the old and split_mongo xblocks are the locations;
# so, any field which holds a location must change to a Locator; otherwise, the persistence
# layer and kvs's know how to store it.
# locations are in location, children, conditionals, course.tab
# create the course: set fields to explicitly_set for each scope, id_root = new_course_locator, master_branch = 'production' # lint-amnesty, pylint: disable=line-too-long
original_course = self.source_modulestore.get_course(source_course_key, **kwargs)
if original_course is None:
raise ItemNotFoundError(str(source_course_key))
if new_org is None:
new_org = source_course_key.org
if new_course is None:
new_course = source_course_key.course
if new_run is None:
new_run = source_course_key.run
new_course_key = CourseLocator(new_org, new_course, new_run, branch=ModuleStoreEnum.BranchName.published)
with self.split_modulestore.bulk_operations(new_course_key):
new_fields = self._get_fields_translate_references(original_course, new_course_key, None)
if fields:
new_fields.update(fields)
new_course = self.split_modulestore.create_course(
new_org, new_course, new_run, user_id,
fields=new_fields,
master_branch=ModuleStoreEnum.BranchName.published,
skip_auto_publish=True,
**kwargs
)
self._copy_published_modules_to_course(
new_course, original_course.location, source_course_key, user_id, **kwargs
)
# TODO: This should be merged back into the above transaction, but can't be until split.py
# is refactored to have more coherent access patterns
with self.split_modulestore.bulk_operations(new_course_key):
# create a new version for the drafts
self._add_draft_modules_to_course(new_course.location, source_course_key, user_id, **kwargs)
return new_course.id
def _copy_published_modules_to_course(self, new_course, old_course_loc, source_course_key, user_id, **kwargs):
"""
Copy all of the modules from the 'direct' version of the course to the new split course.
"""
course_version_locator = new_course.id.version_agnostic()
# iterate over published course elements. Wildcarding rather than descending b/c some elements are orphaned (e.g., # lint-amnesty, pylint: disable=line-too-long
# course about pages, conditionals)
for module in self.source_modulestore.get_items(
source_course_key, revision=ModuleStoreEnum.RevisionOption.published_only, **kwargs
):
# don't copy the course again.
if module.location != old_course_loc:
# create split_xblock using split.create_item
# NOTE: the below auto populates the children when it migrates the parent; so,
# it doesn't need the parent as the first arg. That is, it translates and populates
# the 'children' field as it goes.
_new_module = self.split_modulestore.create_item(
user_id,
course_version_locator,
module.location.block_type,
block_id=module.location.block_id,
fields=self._get_fields_translate_references(
module, course_version_locator, new_course.location.block_id
),
skip_auto_publish=True,
**kwargs
)
# after done w/ published items, add version for DRAFT pointing to the published structure
index_info = self.split_modulestore.get_course_index_info(course_version_locator)
versions = index_info['versions']
versions[ModuleStoreEnum.BranchName.draft] = versions[ModuleStoreEnum.BranchName.published]
self.split_modulestore.update_course_index(course_version_locator, index_info)
# clean up orphans in published version: in old mongo, parents pointed to the union of their published and draft
# children which meant some pointers were to non-existent locations in 'direct'
self.split_modulestore.fix_not_found(course_version_locator, user_id)
def _add_draft_modules_to_course(self, published_course_usage_key, source_course_key, user_id, **kwargs):
"""
update each draft. Create any which don't exist in published and attach to their parents.
"""
# each true update below will trigger a new version of the structure. We may want to just have one new version
# but that's for a later date.
new_draft_course_loc = published_course_usage_key.course_key.for_branch(ModuleStoreEnum.BranchName.draft)
# to prevent race conditions of grandchilden being added before their parents and thus having no parent to
# add to
awaiting_adoption = {}
for module in self.source_modulestore.get_items(
source_course_key, revision=ModuleStoreEnum.RevisionOption.draft_only, **kwargs
):
new_locator = new_draft_course_loc.make_usage_key(module.category, module.location.block_id)
if self.split_modulestore.has_item(new_locator):
# was in 'direct' so draft is a new version
split_module = self.split_modulestore.get_item(new_locator, **kwargs)
# need to remove any no-longer-explicitly-set values and add/update any now set values.
for name, field in split_module.fields.items():
if field.is_set_on(split_module) and not module.fields[name].is_set_on(module):
field.delete_from(split_module)
for field, value in self._get_fields_translate_references(
module, new_draft_course_loc, published_course_usage_key.block_id, field_names=False
).items():
field.write_to(split_module, value)
_new_module = self.split_modulestore.update_item(split_module, user_id, **kwargs)
else:
# only a draft version (aka, 'private').
_new_module = self.split_modulestore.create_item(
user_id, new_draft_course_loc,
new_locator.block_type,
block_id=new_locator.block_id,
fields=self._get_fields_translate_references(
module, new_draft_course_loc, published_course_usage_key.block_id
),
**kwargs
)
awaiting_adoption[module.location] = new_locator
for draft_location, new_locator in awaiting_adoption.items():
parent_loc = self.source_modulestore.get_parent_location(
draft_location, revision=ModuleStoreEnum.RevisionOption.draft_preferred, **kwargs
)
if parent_loc is None:
log.warning('No parent found in source course for %s', draft_location)
continue
old_parent = self.source_modulestore.get_item(parent_loc, **kwargs)
split_parent_loc = new_draft_course_loc.make_usage_key(
parent_loc.block_type,
parent_loc.block_id if parent_loc.block_type != 'course' else published_course_usage_key.block_id
)
new_parent = self.split_modulestore.get_item(split_parent_loc, **kwargs)
# this only occurs if the parent was also awaiting adoption: skip this one, go to next
if any(new_locator.block_id == child.block_id for child in new_parent.children):
continue
# find index for module: new_parent may be missing quite a few of old_parent's children
new_parent_cursor = 0
for old_child_loc in old_parent.children:
if old_child_loc.block_id == draft_location.block_id:
break # moved cursor enough, insert it here
# sibling may move cursor
for idx in range(new_parent_cursor, len(new_parent.children)):
if new_parent.children[idx].block_id == old_child_loc.block_id:
new_parent_cursor = idx + 1
break # skipped sibs enough, pick back up scan
new_parent.children.insert(new_parent_cursor, new_locator)
new_parent = self.split_modulestore.update_item(new_parent, user_id)
def _get_fields_translate_references(self, xblock, new_course_key, course_block_id, field_names=True):
"""
Return a dictionary of field: value pairs for explicitly set fields
but convert all references to their BlockUsageLocators
Args:
field_names: if Truthy, the dictionary keys are the field names. If falsey, the keys are the
field objects.
"""
def get_translation(location):
"""
Convert the location
"""
return new_course_key.make_usage_key(
location.block_type,
location.block_id if location.block_type != 'course' else course_block_id
)
result = {}
for field_name, field in xblock.fields.items():
if field.is_set_on(xblock):
field_value = field.read_from(xblock)
field_key = field_name if field_names else field
if isinstance(field, Reference) and field_value is not None:
result[field_key] = get_translation(field_value)
elif isinstance(field, ReferenceList):
result[field_key] = [
get_translation(ele) for ele in field_value
]
elif isinstance(field, ReferenceValueDict):
result[field_key] = {
key: get_translation(subvalue)
for key, subvalue in field_value.items()
}
else:
result[field_key] = field_value
return result