diff --git a/common/lib/xmodule/xmodule/modulestore/split_migrator.py b/common/lib/xmodule/xmodule/modulestore/split_migrator.py new file mode 100644 index 0000000000..46d23ee3b0 --- /dev/null +++ b/common/lib/xmodule/xmodule/modulestore/split_migrator.py @@ -0,0 +1,181 @@ +''' +Code for migrating from other modulestores to the split_mongo modulestore. + +Exists at the top level of modulestore b/c it needs to know about and access each modulestore. + +In general, it's strategy is to treat the other modulestores as read-only and to never directly +manipulate storage but use existing api's. +''' +from xmodule.modulestore import Location +from xmodule.modulestore.locator import CourseLocator +from xmodule.modulestore.mongo import draft + +class SplitMigrator(object): + """ + Copies courses from old mongo to split mongo and sets up location mapping so any references to the old + name will be able to find the new elements. + """ + def __init__(self, split_modulestore, direct_modulestore, draft_modulestore, loc_mapper): + super(SplitMigrator, self).__init__() + self.split_modulestore = split_modulestore + self.direct_modulestore = direct_modulestore + self.draft_modulestore = draft_modulestore + self.loc_mapper = loc_mapper + + def migrate_mongo_course(self, course_location, user_id, new_course_id=None): + """ + Create a new course in split_mongo representing the published and draft versions of the course from the + original mongo store. And return the new_course_id (which the caller can also get by calling + self.loc_mapper.translate_location(old_course_location) + + If the new course already exists, this raises DuplicateItemError + + :param course_location: a Location whose category is 'course' and points to the course + :param user_id: the user whose action is causing this migration + :param new_course_id: (optional) the Locator.course_id for the new course. Defaults to + whatever translate_location_to_locator returns + """ + new_course_id = self.loc_mapper.create_map_entry(course_location, course_id=new_course_id) + old_course_id = course_location.course_id + # the only difference in data between the old and split_mongo xblocks are the locations; + # so, any field which holds a location must change to a Locator; otherwise, the persistence + # layer and kvs's know how to store it. + # locations are in location, children, conditionals, course.tab + + # create the course: set fields to explicitly_set for each scope, id_root = new_course_id, master_branch = 'production' + original_course = self.direct_modulestore.get_item(course_location) + new_course_root_locator = self.loc_mapper.translate_location(old_course_id, course_location) + new_course = self.split_modulestore.create_course( + course_location.org, original_course.display_name, + user_id, id_root=new_course_id, + fields=self._get_json_fields_translate_children(original_course, old_course_id, True), + root_usage_id=new_course_root_locator.usage_id, + master_branch=new_course_root_locator.branch + ) + + self._copy_published_modules_to_course(new_course, course_location, old_course_id, user_id) + self._add_draft_modules_to_course(new_course_id, old_course_id, course_location, user_id) + + return new_course_id + + + def _copy_published_modules_to_course(self, new_course, old_course_loc, old_course_id, user_id): + """ + Copy all of the modules from the 'direct' version of the course to the new split course. + """ + course_version_locator = new_course.location.as_course_locator() + + # iterate over published course elements. Wildcarding rather than descending b/c some elements are orphaned (e.g., + # course about pages, conditionals) + for module in self.direct_modulestore.get_items( + old_course_loc.replace(category=None, name=None, revision=None), + old_course_id + ): + # don't copy the course again. No drafts should get here but check + if module.location != old_course_loc and not getattr(module, 'is_draft', False): + # create split_xblock using split.create_item + # where usage_id is computed by translate_location_to_locator + new_locator = self.loc_mapper.translate_location( + old_course_id, module.location, True, add_entry_if_missing=True + ) + _new_module = self.split_modulestore.create_item( + course_version_locator, module.category, user_id, + usage_id=new_locator.usage_id, + fields=self._get_json_fields_translate_children(module, old_course_id, True), + continue_version=True + ) + # after done w/ published items, add version for 'draft' pointing to the published structure + index_info = self.split_modulestore.get_course_index_info(course_version_locator) + versions = index_info['versions'] + versions['draft'] = versions['published'] + self.split_modulestore.update_course_index(course_version_locator, {'versions': versions}, update_versions=True) + + # clean up orphans in published version: in old mongo, parents pointed to the union of their published and draft + # children which meant some pointers were to non-existent locations in 'direct' + self.split_modulestore.internal_clean_children(course_version_locator) + + + def _add_draft_modules_to_course(self, new_course_id, old_course_id, old_course_loc, user_id): + """ + update each draft. Create any which don't exist in published and attach to their parents. + """ + # each true update below will trigger a new version of the structure. We may want to just have one new version + # but that's for a later date. + new_draft_course_loc = CourseLocator(course_id=new_course_id, branch='draft') + # to prevent race conditions of grandchilden being added before their parents and thus having no parent to + # add to + awaiting_adoption = {} + for module in self.draft_modulestore.get_items( + old_course_loc.replace(category=None, name=None, revision=draft.DRAFT), + old_course_id + ): + if getattr(module, 'is_draft', False): + new_locator = self.loc_mapper.translate_location( + old_course_id, module.location, False, add_entry_if_missing=True + ) + if self.split_modulestore.has_item(new_course_id, new_locator): + # was in 'direct' so draft is a new version + split_module = self.split_modulestore.get_item(new_locator) + # need to remove any no-longer-explicitly-set values and add/update any now set values. + for name, field in split_module.fields.iteritems(): + if field.is_set_on(split_module) and not module.fields[name].is_set_on(module): + field.delete_from(split_module) + for name, field in module.fields.iteritems(): + # draft children will insert themselves and the others are here already; so, don't do it 2x + if name != 'children' and field.is_set_on(module): + field.write_to(split_module, field.read_from(module)) + + _new_module = self.split_modulestore.update_item(split_module, user_id) + else: + # only a draft version (aka, 'private'). parent needs updated too. + # create a new course version just in case the current head is also the prod head + _new_module = self.split_modulestore.create_item( + new_draft_course_loc, module.category, user_id, + usage_id=new_locator.usage_id, + fields=self._get_json_fields_translate_children(module, old_course_id, True) + ) + awaiting_adoption[module.location] = new_locator.usage_id + for draft_location, new_usage_id in awaiting_adoption.iteritems(): + for parent_loc in self.draft_modulestore.get_parent_locations(draft_location, old_course_id): + old_parent = self.draft_modulestore.get_item(parent_loc) + new_parent = self.split_modulestore.get_item( + self.loc_mapper.translate_location(old_course_id, old_parent.location, False) + ) + # this only occurs if the parent was also awaiting adoption + if new_usage_id in new_parent.children: + break + # find index for module: new_parent may be missing quite a few of old_parent's children + new_parent_cursor = 0 + draft_location = draft_location.url() # need as string + for old_child_loc in old_parent.children: + if old_child_loc == draft_location: + break + sibling_loc = self.loc_mapper.translate_location(old_course_id, Location(old_child_loc), False) + # sibling may move cursor + for idx in range(new_parent_cursor, len(new_parent.children)): + if new_parent.children[idx] == sibling_loc.usage_id: + new_parent_cursor = idx + 1 + break + new_parent.children.insert(new_parent_cursor, new_usage_id) + new_parent = self.split_modulestore.update_item(new_parent, user_id) + + + def _get_json_fields_translate_children(self, xblock, old_course_id, published): + fields = self.get_json_fields_explicitly_set(xblock) + # this will too generously copy the children even for ones that don't exist in the published b/c the old mongo + # had no way of not having parents point to draft only children :-( + if 'children' in fields: + fields['children'] = [ + self.loc_mapper.translate_location( + old_course_id, Location(child), published, add_entry_if_missing=True + ).usage_id + for child in fields['children']] + return fields + + + def get_json_fields_explicitly_set(self, xblock): + """ + Get the json repr for fields set on this specific xblock + :param xblock: + """ + return {field.name: field.read_json(xblock) for field in xblock.fields.itervalues() if field.is_set_on(xblock)} diff --git a/common/lib/xmodule/xmodule/modulestore/split_mongo/split.py b/common/lib/xmodule/xmodule/modulestore/split_mongo/split.py index 49a02d817f..3b3acbc9f2 100644 --- a/common/lib/xmodule/xmodule/modulestore/split_mongo/split.py +++ b/common/lib/xmodule/xmodule/modulestore/split_mongo/split.py @@ -1201,7 +1201,12 @@ class SplitMongoModuleStore(ModuleStoreBase): inheriting_settings[field_name] = block_fields[field_name] for child in block_fields.get('children', []): - self.inherit_settings(block_map, block_map[child], inheriting_settings) + try: + self.inherit_settings(block_map, block_map[child], inheriting_settings) + except KeyError: + # here's where we need logic for looking up in other structures when we allow cross pointers + # but it's also getting this during course creation if creating top down w/ children set. + pass def descendants(self, block_map, usage_id, depth, descendent_map): """ @@ -1236,6 +1241,22 @@ class SplitMongoModuleStore(ModuleStoreBase): else: return DescriptionLocator(definition['_id']) + def internal_clean_children(self, course_locator): + """ + Only intended for rather low level methods to use. Goes through the children attrs of + each block removing any whose usage_id is not a member of the course. Does not generate + a new version of the course but overwrites the existing one. + + :param course_locator: the course to clean + """ + original_structure = self._lookup_course(course_locator) + for block in original_structure['blocks'].itervalues(): + if 'fields' in block and 'children' in block['fields']: + block['fields']["children"] = [ + usage_id for usage_id in block['fields']["children"] if usage_id in original_structure['blocks'] + ] + self.structures.update({'_id': original_structure['_id']}, original_structure) + def _block_matches(self, value, qualifiers): ''' diff --git a/common/lib/xmodule/xmodule/modulestore/tests/test_split_migrator.py b/common/lib/xmodule/xmodule/modulestore/tests/test_split_migrator.py new file mode 100644 index 0000000000..86c99f7015 --- /dev/null +++ b/common/lib/xmodule/xmodule/modulestore/tests/test_split_migrator.py @@ -0,0 +1,263 @@ +""" +Created on Sep 10, 2013 + +@author: dmitchell + +Tests for split_migrator + +""" +import unittest +import uuid +import random +import mock +import datetime +from xmodule.fields import Date +from xmodule.modulestore import Location +from xmodule.modulestore.inheritance import InheritanceMixin +from xmodule.modulestore.loc_mapper_store import LocMapperStore +from xmodule.modulestore.mongo.draft import DraftModuleStore +from xmodule.modulestore.split_mongo.split import SplitMongoModuleStore +from xmodule.modulestore.mongo.base import MongoModuleStore +from xmodule.modulestore.split_migrator import SplitMigrator +from xmodule.modulestore.mongo import draft + + +class TestMigration(unittest.TestCase): + + # Snippet of what would be in the django settings envs file + modulestore_options = { + 'default_class': 'xmodule.raw_module.RawDescriptor', + 'host': 'localhost', + 'db': 'test_xmodule', + 'collection': 'modulestore{0}'.format(uuid.uuid4().hex), + 'fs_root': '', + 'render_template': mock.Mock(return_value=""), + 'xblock_mixins': (InheritanceMixin,) + } + + def setUp(self): + super(TestMigration, self).setUp() + self.loc_mapper = LocMapperStore(**self.modulestore_options) + self.old_mongo = MongoModuleStore(**self.modulestore_options) + self.draft_mongo = DraftModuleStore(**self.modulestore_options) + self.split_mongo = SplitMongoModuleStore( + loc_mapper=self.loc_mapper, **self.modulestore_options + ) + self.migrator = SplitMigrator(self.split_mongo, self.old_mongo, self.draft_mongo, self.loc_mapper) + self.course_location = None + self.create_source_course() + + def tearDown(self): + dbref = self.loc_mapper.db + dbref.drop_collection(self.loc_mapper.location_map) + split_db = self.split_mongo.db + split_db.drop_collection(split_db.course_index) + split_db.drop_collection(split_db.structures) + split_db.drop_collection(split_db.definitions) + # old_mongo doesn't give a db attr, but all of the dbs are the same + dbref.drop_collection(self.old_mongo.collection) + + dbref.connection.close() + + super(TestMigration, self).tearDown() + + def _create_and_get_item(self, store, location, data, metadata, runtime=None): + store.create_and_save_xmodule(location, data, metadata, runtime) + return store.get_item(location) + + def create_source_course(self): + """ + A course testing all of the conversion mechanisms: + * some inheritable settings + * sequences w/ draft and live intermixed children to ensure all get to the draft but + only the live ones get to published. Some are only draft, some are both, some are only live. + * about, static_tab, and conditional documents + """ + location = Location('i4x', 'test_org', 'test_course', 'course', 'runid') + self.course_location = location + date_proxy = Date() + metadata = { + 'start': date_proxy.to_json(datetime.datetime(2000, 3, 13, 4)), + 'display_name': 'Migration test course', + } + data = { + 'wiki_slug': 'test_course_slug' + } + course_root = self._create_and_get_item(self.old_mongo, location, data, metadata) + runtime = course_root.runtime + # chapters + location = location.replace(category='chapter', name=uuid.uuid4().hex) + chapter1 = self._create_and_get_item(self.old_mongo, location, {}, {'display_name': 'Chapter 1'}, runtime) + course_root.children.append(chapter1.location.url()) + location = location.replace(category='chapter', name=uuid.uuid4().hex) + chapter2 = self._create_and_get_item(self.old_mongo, location, {}, {'display_name': 'Chapter 2'}, runtime) + course_root.children.append(chapter2.location.url()) + self.old_mongo.update_children(course_root.location, course_root.children) + # vertical in live only + location = location.replace(category='vertical', name=uuid.uuid4().hex) + live_vert = self._create_and_get_item(self.old_mongo, location, {}, {'display_name': 'Live vertical'}, runtime) + chapter1.children.append(live_vert.location.url()) + self.create_random_units(self.old_mongo, live_vert) + # vertical in both live and draft + location = location.replace(category='vertical', name=uuid.uuid4().hex) + both_vert = self._create_and_get_item( + self.old_mongo, location, {}, {'display_name': 'Both vertical'}, runtime + ) + draft_both = self._create_and_get_item( + self.draft_mongo, location, {}, {'display_name': 'Both vertical renamed'}, runtime + ) + chapter1.children.append(both_vert.location.url()) + self.create_random_units(self.old_mongo, both_vert, self.draft_mongo, draft_both) + # vertical in draft only (x2) + location = location.replace(category='vertical', name=uuid.uuid4().hex) + draft_vert = self._create_and_get_item(self.draft_mongo, + location, {}, {'display_name': 'Draft vertical'}, runtime) + chapter1.children.append(draft_vert.location.url()) + self.create_random_units(self.draft_mongo, draft_vert) + location = location.replace(category='vertical', name=uuid.uuid4().hex) + draft_vert = self._create_and_get_item(self.draft_mongo, + location, {}, {'display_name': 'Draft vertical2'}, runtime) + chapter1.children.append(draft_vert.location.url()) + self.create_random_units(self.draft_mongo, draft_vert) + # and finally one in live only (so published has to skip 2) + location = location.replace(category='vertical', name=uuid.uuid4().hex) + live_vert = self._create_and_get_item(self.old_mongo, + location, {}, {'display_name': 'Live vertical end'}, runtime) + chapter1.children.append(live_vert.location.url()) + self.create_random_units(self.old_mongo, live_vert) + + # update the chapter + self.old_mongo.update_children(chapter1.location, chapter1.children) + + # now the other one w/ the conditional + # first create some show children + indirect1 = self._create_and_get_item(self.old_mongo, + location.replace(category='discussion', name=uuid.uuid4().hex), + "", {'display_name': 'conditional show 1'}, runtime + ) + indirect2 = self._create_and_get_item(self.old_mongo, + location.replace(category='html', name=uuid.uuid4().hex), + "", {'display_name': 'conditional show 2'}, runtime + ) + location = location.replace(category='conditional', name=uuid.uuid4().hex) + metadata = { + 'xml_attributes' : { + 'sources': [live_vert.location.url(), ], + 'completed': True, + }, + } + data = { + 'show_tag_list': [indirect1.location.url(), indirect2.location.url()] + } + conditional = self._create_and_get_item(self.old_mongo, location, data, metadata, runtime) + conditional.children = [indirect1.location.url(), indirect2.location.url()] + # add direct children + self.create_random_units(self.old_mongo, conditional) + chapter2.children.append(conditional.location.url()) + self.old_mongo.update_children(chapter2.location, chapter2.children) + + # and the ancillary docs (not children) + location = location.replace(category='static_tab', name=uuid.uuid4().hex) + # the below automatically adds the tab to the course + _tab = self._create_and_get_item(self.old_mongo, location, "", {'display_name': 'Tab uno'}, runtime) + + location = location.replace(category='about', name='overview') + _overview = self._create_and_get_item(self.old_mongo, location, "
test
", {}, runtime) + location = location.replace(category='course_info', name='updates') + _overview = self._create_and_get_item(self.old_mongo, + location, "test