232 lines
12 KiB
Python
232 lines
12 KiB
Python
'''
|
|
Code for migrating from other modulestores to the split_mongo modulestore.
|
|
|
|
Exists at the top level of modulestore b/c it needs to know about and access each modulestore.
|
|
|
|
In general, it's strategy is to treat the other modulestores as read-only and to never directly
|
|
manipulate storage but use existing api's.
|
|
'''
|
|
|
|
|
|
import logging
|
|
|
|
from opaque_keys.edx.locator import CourseLocator
|
|
from xblock.fields import Reference, ReferenceList, ReferenceValueDict
|
|
|
|
from xmodule.modulestore import ModuleStoreEnum
|
|
from xmodule.modulestore.exceptions import ItemNotFoundError
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
|
class SplitMigrator:
|
|
"""
|
|
Copies courses from old mongo to split mongo and sets up location mapping so any references to the old
|
|
name will be able to find the new elements.
|
|
"""
|
|
def __init__(self, split_modulestore, source_modulestore):
|
|
super().__init__()
|
|
self.split_modulestore = split_modulestore
|
|
self.source_modulestore = source_modulestore
|
|
|
|
def migrate_mongo_course(
|
|
self, source_course_key, user_id, new_org=None, new_course=None, new_run=None, fields=None, **kwargs
|
|
):
|
|
"""
|
|
Create a new course in split_mongo representing the published and draft versions of the course from the
|
|
original mongo store. And return the new CourseLocator
|
|
|
|
If the new course already exists, this raises DuplicateItemError
|
|
|
|
:param source_course_key: which course to migrate
|
|
:param user_id: the user whose action is causing this migration
|
|
:param new_org, new_course, new_run: (optional) identifiers for the new course. Defaults to
|
|
the source_course_key's values.
|
|
"""
|
|
# the only difference in data between the old and split_mongo xblocks are the locations;
|
|
# so, any field which holds a location must change to a Locator; otherwise, the persistence
|
|
# layer and kvs's know how to store it.
|
|
# locations are in location, children, conditionals, course.tab
|
|
|
|
# create the course: set fields to explicitly_set for each scope, id_root = new_course_locator, master_branch = 'production' # lint-amnesty, pylint: disable=line-too-long
|
|
original_course = self.source_modulestore.get_course(source_course_key, **kwargs)
|
|
if original_course is None:
|
|
raise ItemNotFoundError(str(source_course_key))
|
|
|
|
if new_org is None:
|
|
new_org = source_course_key.org
|
|
if new_course is None:
|
|
new_course = source_course_key.course
|
|
if new_run is None:
|
|
new_run = source_course_key.run
|
|
|
|
new_course_key = CourseLocator(new_org, new_course, new_run, branch=ModuleStoreEnum.BranchName.published)
|
|
with self.split_modulestore.bulk_operations(new_course_key):
|
|
new_fields = self._get_fields_translate_references(original_course, new_course_key, None)
|
|
if fields:
|
|
new_fields.update(fields)
|
|
new_course = self.split_modulestore.create_course(
|
|
new_org, new_course, new_run, user_id,
|
|
fields=new_fields,
|
|
master_branch=ModuleStoreEnum.BranchName.published,
|
|
skip_auto_publish=True,
|
|
**kwargs
|
|
)
|
|
|
|
self._copy_published_modules_to_course(
|
|
new_course, original_course.location, source_course_key, user_id, **kwargs
|
|
)
|
|
|
|
# TODO: This should be merged back into the above transaction, but can't be until split.py
|
|
# is refactored to have more coherent access patterns
|
|
with self.split_modulestore.bulk_operations(new_course_key):
|
|
|
|
# create a new version for the drafts
|
|
self._add_draft_modules_to_course(new_course.location, source_course_key, user_id, **kwargs)
|
|
|
|
return new_course.id
|
|
|
|
def _copy_published_modules_to_course(self, new_course, old_course_loc, source_course_key, user_id, **kwargs):
|
|
"""
|
|
Copy all of the modules from the 'direct' version of the course to the new split course.
|
|
"""
|
|
course_version_locator = new_course.id.version_agnostic()
|
|
|
|
# iterate over published course elements. Wildcarding rather than descending b/c some elements are orphaned (e.g., # lint-amnesty, pylint: disable=line-too-long
|
|
# course about pages, conditionals)
|
|
for module in self.source_modulestore.get_items(
|
|
source_course_key, revision=ModuleStoreEnum.RevisionOption.published_only, **kwargs
|
|
):
|
|
# don't copy the course again.
|
|
if module.location != old_course_loc:
|
|
# create split_xblock using split.create_item
|
|
# NOTE: the below auto populates the children when it migrates the parent; so,
|
|
# it doesn't need the parent as the first arg. That is, it translates and populates
|
|
# the 'children' field as it goes.
|
|
_new_module = self.split_modulestore.create_item(
|
|
user_id,
|
|
course_version_locator,
|
|
module.location.block_type,
|
|
block_id=module.location.block_id,
|
|
fields=self._get_fields_translate_references(
|
|
module, course_version_locator, new_course.location.block_id
|
|
),
|
|
skip_auto_publish=True,
|
|
**kwargs
|
|
)
|
|
# after done w/ published items, add version for DRAFT pointing to the published structure
|
|
index_info = self.split_modulestore.get_course_index_info(course_version_locator)
|
|
versions = index_info['versions']
|
|
versions[ModuleStoreEnum.BranchName.draft] = versions[ModuleStoreEnum.BranchName.published]
|
|
self.split_modulestore.update_course_index(course_version_locator, index_info)
|
|
|
|
# clean up orphans in published version: in old mongo, parents pointed to the union of their published and draft
|
|
# children which meant some pointers were to non-existent locations in 'direct'
|
|
self.split_modulestore.fix_not_found(course_version_locator, user_id)
|
|
|
|
def _add_draft_modules_to_course(self, published_course_usage_key, source_course_key, user_id, **kwargs):
|
|
"""
|
|
update each draft. Create any which don't exist in published and attach to their parents.
|
|
"""
|
|
# each true update below will trigger a new version of the structure. We may want to just have one new version
|
|
# but that's for a later date.
|
|
new_draft_course_loc = published_course_usage_key.course_key.for_branch(ModuleStoreEnum.BranchName.draft)
|
|
# to prevent race conditions of grandchilden being added before their parents and thus having no parent to
|
|
# add to
|
|
awaiting_adoption = {}
|
|
for module in self.source_modulestore.get_items(
|
|
source_course_key, revision=ModuleStoreEnum.RevisionOption.draft_only, **kwargs
|
|
):
|
|
new_locator = new_draft_course_loc.make_usage_key(module.category, module.location.block_id)
|
|
if self.split_modulestore.has_item(new_locator):
|
|
# was in 'direct' so draft is a new version
|
|
split_module = self.split_modulestore.get_item(new_locator, **kwargs)
|
|
# need to remove any no-longer-explicitly-set values and add/update any now set values.
|
|
for name, field in split_module.fields.items():
|
|
if field.is_set_on(split_module) and not module.fields[name].is_set_on(module):
|
|
field.delete_from(split_module)
|
|
for field, value in self._get_fields_translate_references(
|
|
module, new_draft_course_loc, published_course_usage_key.block_id, field_names=False
|
|
).items():
|
|
field.write_to(split_module, value)
|
|
|
|
_new_module = self.split_modulestore.update_item(split_module, user_id, **kwargs)
|
|
else:
|
|
# only a draft version (aka, 'private').
|
|
_new_module = self.split_modulestore.create_item(
|
|
user_id, new_draft_course_loc,
|
|
new_locator.block_type,
|
|
block_id=new_locator.block_id,
|
|
fields=self._get_fields_translate_references(
|
|
module, new_draft_course_loc, published_course_usage_key.block_id
|
|
),
|
|
**kwargs
|
|
)
|
|
awaiting_adoption[module.location] = new_locator
|
|
for draft_location, new_locator in awaiting_adoption.items():
|
|
parent_loc = self.source_modulestore.get_parent_location(
|
|
draft_location, revision=ModuleStoreEnum.RevisionOption.draft_preferred, **kwargs
|
|
)
|
|
if parent_loc is None:
|
|
log.warning('No parent found in source course for %s', draft_location)
|
|
continue
|
|
old_parent = self.source_modulestore.get_item(parent_loc, **kwargs)
|
|
split_parent_loc = new_draft_course_loc.make_usage_key(
|
|
parent_loc.block_type,
|
|
parent_loc.block_id if parent_loc.block_type != 'course' else published_course_usage_key.block_id
|
|
)
|
|
new_parent = self.split_modulestore.get_item(split_parent_loc, **kwargs)
|
|
# this only occurs if the parent was also awaiting adoption: skip this one, go to next
|
|
if any(new_locator.block_id == child.block_id for child in new_parent.children):
|
|
continue
|
|
# find index for module: new_parent may be missing quite a few of old_parent's children
|
|
new_parent_cursor = 0
|
|
for old_child_loc in old_parent.children:
|
|
if old_child_loc.block_id == draft_location.block_id:
|
|
break # moved cursor enough, insert it here
|
|
# sibling may move cursor
|
|
for idx in range(new_parent_cursor, len(new_parent.children)):
|
|
if new_parent.children[idx].block_id == old_child_loc.block_id:
|
|
new_parent_cursor = idx + 1
|
|
break # skipped sibs enough, pick back up scan
|
|
new_parent.children.insert(new_parent_cursor, new_locator)
|
|
new_parent = self.split_modulestore.update_item(new_parent, user_id)
|
|
|
|
def _get_fields_translate_references(self, xblock, new_course_key, course_block_id, field_names=True):
|
|
"""
|
|
Return a dictionary of field: value pairs for explicitly set fields
|
|
but convert all references to their BlockUsageLocators
|
|
Args:
|
|
field_names: if Truthy, the dictionary keys are the field names. If falsey, the keys are the
|
|
field objects.
|
|
"""
|
|
def get_translation(location):
|
|
"""
|
|
Convert the location
|
|
"""
|
|
return new_course_key.make_usage_key(
|
|
location.block_type,
|
|
location.block_id if location.block_type != 'course' else course_block_id
|
|
)
|
|
|
|
result = {}
|
|
for field_name, field in xblock.fields.items():
|
|
if field.is_set_on(xblock):
|
|
field_value = field.read_from(xblock)
|
|
field_key = field_name if field_names else field
|
|
if isinstance(field, Reference) and field_value is not None:
|
|
result[field_key] = get_translation(field_value)
|
|
elif isinstance(field, ReferenceList):
|
|
result[field_key] = [
|
|
get_translation(ele) for ele in field_value
|
|
]
|
|
elif isinstance(field, ReferenceValueDict):
|
|
result[field_key] = {
|
|
key: get_translation(subvalue)
|
|
for key, subvalue in field_value.items()
|
|
}
|
|
else:
|
|
result[field_key] = field_value
|
|
|
|
return result
|