""" Each store has slightly different semantics wrt draft v published. XML doesn't officially recognize draft but does hold it in a subdir. Old mongo has a virtual but not physical draft for every unit in published state. Split mongo has a physical for every unit in every state. Given that, here's a table of semantics and behaviors where - means no record and letters indicate values. For xml, (-, x) means the item is published and can be edited. For split, it means the item's been deleted from draft and will be deleted from published the next time it gets published. old mongo can't represent that virtual state (2nd row in table) In the table body, the tuples represent virtual modulestore result. The row headers represent the pre-import modulestore state. Modulestore virtual | XML physical (draft, published) (draft, published) | (-, -) | (x, -) | (x, x) | (x, y) | (-, x) ----------------------+-------------------------------------------- (-, -) | (-, -) | (x, -) | (x, x) | (x, y) | (-, x) (-, a) | (-, a) | (x, a) | (x, x) | (x, y) | (-, x) : deleted from draft before import (a, -) | (a, -) | (x, -) | (x, x) | (x, y) | (a, x) (a, a) | (a, a) | (x, a) | (x, x) | (x, y) | (a, x) (a, b) | (a, b) | (x, b) | (x, x) | (x, y) | (a, x) """ import json import logging import mimetypes import os import re from abc import abstractmethod import xblock from django.core.exceptions import ObjectDoesNotExist from django.utils.translation import gettext as _ from lxml import etree from opaque_keys.edx.keys import UsageKey from opaque_keys.edx.locator import LibraryLocator from path import Path as path from xblock.core import XBlockMixin from xblock.fields import Reference, ReferenceList, ReferenceValueDict, Scope from xblock.runtime import DictKeyValueStore, KvsFieldData from common.djangoapps.util.monitoring import monitor_import_failure from xmodule.assetstore import AssetMetadata from xmodule.contentstore.content import StaticContent from xmodule.errortracker import make_error_tracker from xmodule.modulestore import ModuleStoreEnum from xmodule.modulestore.django import ASSET_IGNORE_REGEX from xmodule.modulestore.exceptions import DuplicateCourseError from xmodule.modulestore.mongo.base import MongoRevisionKey from xmodule.modulestore.store_utilities import draft_node_constructor, get_draft_subtree_roots from xmodule.modulestore.xml import ImportSystem, LibraryXMLModuleStore, XMLModuleStore from xmodule.tabs import CourseTabList from xmodule.util.misc import escape_invalid_characters from xmodule.x_module import XModuleMixin from openedx.core.djangoapps.content_tagging.api import import_course_tags_from_csv from .inheritance import own_metadata from .store_utilities import rewrite_nonportable_content_links log = logging.getLogger(__name__) DEFAULT_STATIC_CONTENT_SUBDIR = 'static' class CourseImportException(Exception): """ Base exception class for course import workflows. """ def __init__(self): super().__init__(self.description) # pylint: disable=no-member class ErrorReadingFileException(CourseImportException): """ Raised when error occurs while trying to read a file. """ MESSAGE_TEMPLATE = _('Error while reading {}. Check file for XML errors.') def __init__(self, filename, **kwargs): self.description = self.MESSAGE_TEMPLATE.format(filename) super().__init__(**kwargs) class BlockFailedToImport(CourseImportException): """ Raised when a block is failed to import. """ MESSAGE_TEMPLATE = _('Failed to import block: {} at location: {}') def __init__(self, display_name, location, **kwargs): self.description = self.MESSAGE_TEMPLATE.format(display_name, location) super().__init__(**kwargs) class LocationMixin(XBlockMixin): """ Adds a `location` property to an :class:`XBlock` so it is more compatible with old-style :class:`XModule` API. This is a simplified version of :class:`XModuleMixin`. """ @property def location(self): """ Get the UsageKey of this block. """ return self.scope_ids.usage_id @location.setter def location(self, value): """ Set the UsageKey of this block. """ assert isinstance(value, UsageKey) self.scope_ids = self.scope_ids._replace( def_id=value, usage_id=value, ) class StaticContentImporter: # lint-amnesty, pylint: disable=missing-class-docstring def __init__(self, static_content_store, course_data_path, target_id): self.static_content_store = static_content_store self.target_id = target_id self.course_data_path = course_data_path try: with open(course_data_path / 'policies/assets.json') as f: self.policy = json.load(f) except (OSError, ValueError) as err: # lint-amnesty, pylint: disable=unused-variable # xml backed courses won't have this file, only exported courses; # so, its absence is not really an exception. self.policy = {} mimetypes.add_type('application/octet-stream', '.sjson') mimetypes.add_type('application/octet-stream', '.srt') self.mimetypes_list = list(mimetypes.types_map.values()) def import_static_content_directory(self, content_subdir=DEFAULT_STATIC_CONTENT_SUBDIR, verbose=False): # lint-amnesty, pylint: disable=missing-function-docstring remap_dict = {} static_dir = self.course_data_path / content_subdir for dirname, _, filenames in os.walk(static_dir): for filename in filenames: file_path = os.path.join(dirname, filename) if re.match(ASSET_IGNORE_REGEX, filename): if verbose: log.debug('skipping static content %s...', file_path) continue if verbose: log.debug('importing static content %s...', file_path) imported_file_attrs = self.import_static_file(file_path, base_dir=static_dir) if imported_file_attrs: # store the remapping information which will be needed # to subsitute in the module data remap_dict[imported_file_attrs[0]] = imported_file_attrs[1] return remap_dict def import_static_file(self, full_file_path, base_dir): # lint-amnesty, pylint: disable=missing-function-docstring filename = os.path.basename(full_file_path) try: with open(full_file_path, 'rb') as f: data = f.read() except OSError: # OS X "companion files". See # http://www.diigo.com/annotated/0c936fda5da4aa1159c189cea227e174 if filename.startswith('._'): return None # Not a 'hidden file', then re-raise exception raise # strip away leading path from the name file_subpath = full_file_path.replace(base_dir, '') if file_subpath.startswith('/'): file_subpath = file_subpath[1:] asset_key = StaticContent.compute_location(self.target_id, file_subpath) policy_ele = self.policy.get(asset_key.path, {}) # During export display name is used to create files, strip away slashes from name displayname = escape_invalid_characters( name=policy_ele.get('displayname', filename), invalid_char_list=['/', '\\'] ) locked = policy_ele.get('locked', False) mime_type = policy_ele.get('contentType') # Check extracted contentType in list of all valid mimetypes if not mime_type or mime_type not in self.mimetypes_list: mime_type = mimetypes.guess_type(filename)[0] # Assign guessed mimetype content = StaticContent( asset_key, displayname, mime_type, data, import_path=file_subpath, locked=locked ) # first let's save a thumbnail so we can get back a thumbnail location thumbnail_content, thumbnail_location = self.static_content_store.generate_thumbnail(content) if thumbnail_content is not None: content.thumbnail_location = thumbnail_location # then commit the content try: self.static_content_store.save(content) except Exception as err: # lint-amnesty, pylint: disable=broad-except msg = f'Error importing {file_subpath}, error={err}' log.exception(f'Course import {self.target_id}: {msg}') monitor_import_failure(self.target_id, 'Updating', exception=err) return file_subpath, asset_key class ImportManager: """ Import xml-based courselikes from data_dir into modulestore. Returns: list of new courselike objects Args: store: a modulestore implementing ModuleStoreWriteBase in which to store the imported courselikes. data_dir: the root directory from which to find the xml courselikes. source_dirs: If specified, the list of data_dir subdirectories to load. Otherwise, load all dirs target_id: is the Locator that all blocks should be remapped to after import off disk. NOTE: this only makes sense if importing only one courselike. If there are more than one courselike loaded from data_dir/source_dirs & you supply this id, an AssertException will be raised. static_content_store: the static asset store do_import_static: if True, then import the courselike's static files into static_content_store This can be employed for courselikes which have substantial unchanging static content, which is too inefficient to import every time the course is loaded. Static content for some courses may also be served directly by nginx, instead of going through django. do_import_python_lib: if True, import a courselike's python lib file into static_content_store if it exists. This can be useful if the static content import needs to be skipped (e.g.: for performance reasons), but the python lib still needs to be imported. If static content is imported, then the python lib file will be imported regardless of this value. create_if_not_present: If True, then a new courselike is created if it doesn't already exist. Otherwise, it throws an InvalidLocationError if the courselike does not exist. static_content_subdir: The subdirectory that contains static content. python_lib_filename: The filename of the courselike's python library. Course authors can optionally create this file to implement custom logic in their course. default_class, load_error_blocks: are arguments for constructing the XMLModuleStore (see its doc) """ store_class = XMLModuleStore def __init__( self, store, user_id, data_dir, source_dirs=None, default_class='xmodule.hidden_block.HiddenBlock', load_error_blocks=True, static_content_store=None, target_id=None, verbose=False, do_import_static=True, do_import_python_lib=True, create_if_not_present=False, raise_on_failure=False, static_content_subdir=DEFAULT_STATIC_CONTENT_SUBDIR, python_lib_filename='python_lib.zip', ): self.store = store self.user_id = user_id self.data_dir = data_dir self.source_dirs = source_dirs self.load_error_blocks = load_error_blocks self.static_content_store = static_content_store self.target_id = target_id self.verbose = verbose self.static_content_subdir = static_content_subdir self.python_lib_filename = python_lib_filename self.do_import_static = do_import_static self.do_import_python_lib = do_import_python_lib self.create_if_not_present = create_if_not_present self.raise_on_failure = raise_on_failure self.xml_module_store = self.store_class( data_dir, default_class=default_class, source_dirs=source_dirs, load_error_blocks=load_error_blocks, xblock_mixins=store.xblock_mixins, xblock_select=store.xblock_select, target_course_id=target_id, ) self.logger, self.errors = make_error_tracker() def preflight(self): """ Perform any pre-import sanity checks. """ # If we're going to remap the ID, then we can only do that with # a single target if self.target_id: assert len(self.xml_module_store.modules) == 1, 'Store unable to load course correctly.' def import_static(self, data_path, dest_id): """ Import all static items into the content store. """ if self.static_content_store is None: log.warning( f'Course import {self.target_id}: Static content store is None. Skipping static content import.' ) return static_content_importer = StaticContentImporter( self.static_content_store, course_data_path=data_path, target_id=dest_id ) if self.do_import_static: if self.verbose: log.info(f'Course import {self.target_id}: Importing static content and python library') # first pass to find everything in the static content directory static_content_importer.import_static_content_directory( content_subdir=self.static_content_subdir, verbose=self.verbose ) elif self.do_import_python_lib and self.python_lib_filename: if self.verbose: log.info( f'Course import {self.target_id}: Skipping static content import, still importing python library' ) python_lib_dir_path = data_path / self.static_content_subdir python_lib_full_path = python_lib_dir_path / self.python_lib_filename if os.path.isfile(python_lib_full_path): static_content_importer.import_static_file( python_lib_full_path, base_dir=python_lib_dir_path ) else: if self.verbose: log.info(f'Course import {self.target_id}: Skipping import of static content and python library') # No matter what do_import_static is, import "static_import" directory. # This is needed because the "about" pages (eg "overview") are # loaded via load_extra_content, and do not inherit the lms # metadata from the course block, and thus do not get # "static_content_store" properly defined. Static content # referenced in those extra pages thus need to come through the # c4x:// contentstore, unfortunately. Tell users to copy that # content into the "static_import" subdir. simport = 'static_import' if os.path.exists(data_path / simport): if self.verbose: log.info(f'Course import {self.target_id}: Importing {simport} directory') static_content_importer.import_static_content_directory( content_subdir=simport, verbose=self.verbose ) def import_asset_metadata(self, data_dir, course_id): """ Read in assets XML file, parse it, and add all asset metadata to the modulestore. """ asset_dir = path(data_dir) / AssetMetadata.EXPORTED_ASSET_DIR assets_filename = AssetMetadata.EXPORTED_ASSET_FILENAME asset_xml_file = asset_dir / assets_filename def make_asset_id(course_id, asset_xml): """ Construct an asset ID out of a complete asset XML section. """ asset_type = None asset_name = None for child in asset_xml.iterchildren(): if child.tag == AssetMetadata.ASSET_TYPE_ATTR: asset_type = child.text elif child.tag == AssetMetadata.ASSET_BASENAME_ATTR: asset_name = child.text return course_id.make_asset_key(asset_type, asset_name) all_assets = [] try: xml_data = etree.parse(asset_xml_file).getroot() assert xml_data.tag == AssetMetadata.ALL_ASSETS_XML_TAG for asset in xml_data.iterchildren(): if asset.tag == AssetMetadata.ASSET_XML_TAG: # Construct the asset key. asset_key = make_asset_id(course_id, asset) asset_md = AssetMetadata(asset_key) asset_md.from_xml(asset) all_assets.append(asset_md) except OSError: # file does not exist. logging.info(f'Course import {course_id}: No {assets_filename} file present.') return except Exception as exc: # pylint: disable=W0703 if self.raise_on_failure: # lint-amnesty, pylint: disable=no-else-raise monitor_import_failure(course_id, 'Updating', exception=exc) logging.exception(f'Course import {course_id}: Error while parsing {assets_filename}.') raise ErrorReadingFileException(assets_filename) # pylint: disable=raise-missing-from else: return # Now add all asset metadata to the modulestore. if len(all_assets) > 0: self.store.save_asset_metadata_list(all_assets, all_assets[0].edited_by, import_only=True) def import_courselike(self, runtime, courselike_key, dest_id, source_courselike): """ Import the base block """ if self.verbose: log.debug("Scanning %s for courselike block...", courselike_key) # Quick scan to get course block as we need some info from there. # Also we need to make sure that the course block is committed # first into the store course_data_path = path(self.data_dir) / source_courselike.data_dir log.debug('======> IMPORTING courselike %s', courselike_key) if not self.do_import_static: # for old-style xblock where this was actually linked to kvs source_courselike.static_asset_path = source_courselike.data_dir source_courselike.save() log.debug('course static_asset_path=%s', source_courselike.static_asset_path) log.debug('course data_dir=%s', source_courselike.data_dir) with self.store.branch_setting(ModuleStoreEnum.Branch.draft_preferred, dest_id): course = _update_and_import_block( source_courselike, self.store, self.user_id, courselike_key, dest_id, do_import_static=self.do_import_static, runtime=runtime, ) self.static_updater(course, source_courselike, courselike_key, dest_id, runtime) self.store.update_item(course, self.user_id) return course, course_data_path @abstractmethod def static_updater(self, course, source_courselike, courselike_key, dest_id, runtime): """ Updates any special static items, such as PDF coursebooks. """ pass # lint-amnesty, pylint: disable=unnecessary-pass @abstractmethod def get_dest_id(self, courselike_key): """ Given a courselike_key, get the version of the key that will actually be used in the modulestore for import. """ raise NotImplementedError @abstractmethod def get_courselike(self, courselike_key, runtime, dest_id): """ Given a key, a runtime, and an intended destination key, get the block for the courselike we'll be importing into. """ raise NotImplementedError @abstractmethod def import_children(self, source_courselike, courselike, courselike_key, dest_id): """ To be overloaded with a method that installs the child items into self.store. """ raise NotImplementedError @abstractmethod def import_drafts(self, courselike, courselike_key, data_path, dest_id): """ To be overloaded with a method that installs the draft items into self.store. """ raise NotImplementedError @abstractmethod def import_tags(self, data_path, dest_id): """ To be overloaded with a method that adds tags to already imported blocks """ raise NotImplementedError def recursive_build(self, source_courselike, courselike, courselike_key, dest_id): """ Recursively imports all child blocks from the temporary modulestore into the target modulestore. """ all_locs = set(self.xml_module_store.modules[courselike_key].keys()) all_locs.remove(source_courselike.location) def depth_first(subtree): """ Import top down just so import code can make assumptions about parents always being available """ if subtree.has_children: for child in subtree.get_children(): try: all_locs.remove(child.location) except KeyError: # tolerate same child occurring under 2 parents such as in # ContentStoreTest.test_image_import pass if self.verbose: log.debug('importing block location %s', child.location) try: _update_and_import_block( child, self.store, self.user_id, courselike_key, dest_id, do_import_static=self.do_import_static, runtime=courselike.runtime, ) except Exception: log.exception( f'Course import {dest_id}: failed to import block location {child.location}' ) raise BlockFailedToImport(child.display_name, child.location) # pylint: disable=raise-missing-from depth_first(child) depth_first(source_courselike) for leftover in all_locs: if self.verbose: log.debug('importing block location %s', leftover) try: _update_and_import_block( self.xml_module_store.get_item(leftover), self.store, self.user_id, courselike_key, dest_id, do_import_static=self.do_import_static, runtime=courselike.runtime, ) except Exception: log.exception( f'Course import {dest_id}: failed to import block location {leftover}' ) # pylint: disable=raise-missing-from raise BlockFailedToImport(leftover.display_name, leftover.location) def run_imports(self): """ Iterate over the given directories and yield courses. """ self.preflight() for courselike_key in self.xml_module_store.modules.keys(): try: dest_id, runtime = self.get_dest_id(courselike_key) except DuplicateCourseError: continue # This bulk operation wraps all the operations to populate the published branch. with self.store.bulk_operations(dest_id): # Retrieve the course itself. source_courselike, courselike, data_path = self.get_courselike(courselike_key, runtime, dest_id) # Import all static pieces. self.import_static(data_path, dest_id) # Import asset metadata stored in XML. self.import_asset_metadata(data_path, dest_id) # Import all children self.import_children(source_courselike, courselike, courselike_key, dest_id) # This bulk operation wraps all the operations to populate the draft branch with any items # from the /drafts subdirectory. # Drafts must be imported in a separate bulk operation from published items to import properly, # due to the recursive_build() above creating a draft item for each course block # and then publishing it. with self.store.bulk_operations(dest_id): # Import all draft items into the courselike. courselike = self.import_drafts(courselike, courselike_key, data_path, dest_id) with self.store.bulk_operations(dest_id): try: self.import_tags(data_path, dest_id) except FileNotFoundError: logging.info(f'Course import {dest_id}: No tags.csv file present.') except ValueError as e: logging.info(f'Course import {dest_id}: {str(e)}') yield courselike class CourseImportManager(ImportManager): """ Import manager for Courses. """ store_class = XMLModuleStore def get_courselike(self, courselike_key, runtime, dest_id): """ Given a key, runtime, and target key, get the version of the course from the temporary modulestore. """ source_course = self.xml_module_store.get_course(courselike_key) # STEP 1: find and import course block course, course_data_path = self.import_courselike( runtime, courselike_key, dest_id, source_course, ) return source_course, course, course_data_path def get_dest_id(self, courselike_key): """ Get the course key that will be used for the target modulestore. """ if self.target_id is not None: dest_id = self.target_id else: # Note that dest_course_id will be in the format for the default modulestore. dest_id = self.store.make_course_key(courselike_key.org, courselike_key.course, courselike_key.run) existing_id = self.store.has_course(dest_id, ignore_case=True) # store.has_course will return the course_key in the format for the modulestore in which it was found. # This may be different from dest_course_id, so correct to the format found. if existing_id: dest_id = existing_id runtime = None # Creates a new course if it doesn't already exist if self.create_if_not_present and not existing_id: try: new_course = self.store.create_course( dest_id.org, dest_id.course, dest_id.run, self.user_id ) runtime = new_course.runtime except DuplicateCourseError: log.debug( "Skipping import of course with id, %s, " "since it collides with an existing one", dest_id ) raise return dest_id, runtime def static_updater(self, course, source_courselike, courselike_key, dest_id, runtime): """ Update special static assets, such as PDF textbooks and wiki resources. """ for entry in course.pdf_textbooks: for chapter in entry.get('chapters', []): if StaticContent.is_c4x_path(chapter.get('url', '')): asset_key = StaticContent.get_location_from_path(chapter['url']) chapter['url'] = StaticContent.get_static_path_from_location(asset_key) # Original wiki_slugs had value location.course. To make them unique this was changed to 'org.course.name'. # If we are importing into a course with a different course_id and wiki_slug is equal to either of these default # values then remap it so that the wiki does not point to the old wiki. if courselike_key != course.id: original_unique_wiki_slug = '{}.{}.{}'.format( courselike_key.org, courselike_key.course, courselike_key.run ) if course.wiki_slug in (original_unique_wiki_slug, courselike_key.course): course.wiki_slug = '{}.{}.{}'.format( course.id.org, course.id.course, course.id.run, ) # cdodge: more hacks (what else). Seems like we have a # problem when importing a course (like 6.002) which # does not have any tabs defined in the policy file. # The import goes fine and then displays fine in LMS, # but if someone tries to add a new tab in the CMS, then # the LMS barfs because it expects that -- if there are # *any* tabs -- then there at least needs to be # some predefined ones if course.tabs is None or len(course.tabs) == 0: CourseTabList.initialize_default(course) def import_children(self, source_courselike, courselike, courselike_key, dest_id): """ Imports all children into the desired store. """ # The branch setting of published_only forces an overwrite of all draft modules # during the course import. with self.store.branch_setting(ModuleStoreEnum.Branch.published_only, dest_id): self.recursive_build(source_courselike, courselike, courselike_key, dest_id) def import_drafts(self, courselike, courselike_key, data_path, dest_id): """ Imports all drafts into the desired store. """ # Import any draft items with self.store.branch_setting(ModuleStoreEnum.Branch.draft_preferred, dest_id): _import_course_draft( self.xml_module_store, self.store, self.user_id, data_path, courselike_key, dest_id, courselike.runtime ) # Importing the drafts potentially triggered a new structure version. # If so, the HEAD version_guid of the passed-in courselike will be out-of-date. # Fetch the course to return the most recent course version. return self.store.get_course(courselike.id.replace(branch=None, version_guid=None)) def import_tags(self, data_path, dest_id): """ Imports tags into course blocks. """ csv_path = path(data_path) / 'tags.csv' import_course_tags_from_csv(csv_path, dest_id) class LibraryImportManager(ImportManager): """ Import manager for Libraries """ store_class = LibraryXMLModuleStore def get_dest_id(self, courselike_key): """ Get the LibraryLocator that will be used in the target modulestore. """ if self.target_id is not None: dest_id = self.target_id else: dest_id = LibraryLocator(self.target_id.org, self.target_id.library) existing_lib = self.store.get_library(dest_id, ignore_case=True) runtime = None if existing_lib: dest_id = existing_lib.location.library_key runtime = existing_lib.runtime if self.create_if_not_present and not existing_lib: try: library = self.store.create_library( org=self.target_id.org, library=self.target_id.library, user_id=self.user_id, fields={"display_name": ""}, ) runtime = library.runtime except DuplicateCourseError: log.debug( "Skipping import of Library with id %s, " "since it collides with an existing one", dest_id ) raise return dest_id, runtime def get_courselike(self, courselike_key, runtime, dest_id): """ Get the block of the library from the XML import modulestore. """ source_library = self.xml_module_store.get_library(courselike_key) library, library_data_path = self.import_courselike( runtime, courselike_key, dest_id, source_library, ) return source_library, library, library_data_path def static_updater(self, course, source_courselike, courselike_key, dest_id, runtime): """ Libraries have no special static items to import. """ pass # lint-amnesty, pylint: disable=unnecessary-pass def import_children(self, source_courselike, courselike, courselike_key, dest_id): """ Imports all children into the desired store. """ self.recursive_build(source_courselike, courselike, courselike_key, dest_id) def import_drafts(self, courselike, courselike_key, data_path, dest_id): """ Imports all drafts into the desired store. """ return courselike def import_tags(self, data_path, dest_id): """ Imports tags into library blocks """ # We don't support tags in v1 libraries, and v2 libraries don't have # an import/export format defined yet. No action needed here for now. def import_course_from_xml(*args, **kwargs): """ Thin wrapper for the Course Import Manager. See ImportManager for details. """ manager = CourseImportManager(*args, **kwargs) return list(manager.run_imports()) def import_library_from_xml(*args, **kwargs): """ Thin wrapper for the Library Import Manager. See ImportManager for details. """ manager = LibraryImportManager(*args, **kwargs) return list(manager.run_imports()) def _update_and_import_block( # pylint: disable=too-many-statements block, store, user_id, source_course_id, dest_course_id, do_import_static=True, runtime=None): """ Update all the block reference fields to the destination course id, then import the block into the destination course. """ logging.debug('processing import of blocks %s...', str(block.location)) def _update_block_references(block, source_course_id, dest_course_id): """ Move the block to a new course. """ def _convert_ref_fields_to_new_namespace(reference): """ Convert a reference to the new namespace, but only if the original namespace matched the original course. Otherwise, returns the input value. """ assert isinstance(reference, UsageKey) if source_course_id == reference.course_key: return reference.map_into_course(dest_course_id) else: return reference fields = {} for field_name, field in block.fields.items(): if field.scope != Scope.parent and field.is_set_on(block): if isinstance(field, Reference): value = field.read_from(block) if value is None: fields[field_name] = None else: fields[field_name] = _convert_ref_fields_to_new_namespace(field.read_from(block)) elif isinstance(field, ReferenceList): references = field.read_from(block) fields[field_name] = [_convert_ref_fields_to_new_namespace(reference) for reference in references] elif isinstance(field, ReferenceValueDict): reference_dict = field.read_from(block) fields[field_name] = { key: _convert_ref_fields_to_new_namespace(reference) for key, reference in reference_dict.items() } elif field_name == 'xml_attributes': value = field.read_from(block) # remove any export/import only xml_attributes # which are used to wire together draft imports if 'parent_url' in value: del value['parent_url'] if 'parent_sequential_url' in value: del value['parent_sequential_url'] if 'index_in_children_list' in value: del value['index_in_children_list'] fields[field_name] = value else: fields[field_name] = field.read_from(block) return fields if do_import_static and 'data' in block.fields and isinstance(block.fields['data'], xblock.fields.String): # we want to convert all 'non-portable' links in the module_data # (if it is a string) to portable strings (e.g. /static/) block.data = rewrite_nonportable_content_links( source_course_id, dest_course_id, block.data ) fields = _update_block_references(block, source_course_id, dest_course_id) asides = block.get_asides() if isinstance(block, XModuleMixin) else None if block.location.block_type == 'library_content': with store.branch_setting(branch_setting=ModuleStoreEnum.Branch.published_only): lib_content_block_already_published = store.has_item(block.location) block = store.import_xblock( user_id, dest_course_id, block.location.block_type, block.location.block_id, fields, runtime, asides=asides ) # TODO: Move this code once the following condition is met. # Get to the point where XML import is happening inside the # modulestore that is eventually going to store the data. # Ticket: https://openedx.atlassian.net/browse/PLAT-1046 # Special case handling for library content blocks. The fact that this is # in Modulestore code is _bad_ and breaks abstraction barriers, but is too # much work to factor out at this point. if block.location.block_type == 'library_content': # If library exists, update source_library_version and children # according to this existing library and library content block. if block.source_library_id and store.get_library(block.source_library_key): # If the library content block is already in the course, then don't # sync the children when we re-import it. This lets us address # TNL-7507 (Randomized Content Block Settings Lost in Course Import) # while still avoiding AA-310, where the IDs of the children for an # existing library_content block might be altered, losing student # user state. # # Note that while this method is run on import, it's also run when # adding the library content from Studio for the first time. # # TLDR: When importing, we only copy the default values from content # in a library the first time that library_content block is created. # Future imports ignore what's in the library so as not to disrupt # course state. You _can_ still update to the library via the Studio # UI for updating to the latest version of a library for this block. if lib_content_block_already_published: return block try: # Update library content block's children on draft branch with store.branch_setting(branch_setting=ModuleStoreEnum.Branch.draft_preferred): try: block.sync_from_library() except ObjectDoesNotExist: # If the source library does not exist, that's OK, the library content will still kinda work. # Unfortunately, any setting defaults that are set in the library will be missing. # TODO save library default settings to course's OLX and then load them here if available: # https://github.com/openedx/edx-platform/issues/33742 pass except ValueError as err: # The specified library version does not exist. log.error(err) else: # Publish it if importing the course for branch setting published_only. if store.get_branch_setting() == ModuleStoreEnum.Branch.published_only: store.publish(block.location, user_id) return block def _import_course_draft( xml_module_store, store, user_id, course_data_path, source_course_id, target_id, mongo_runtime ): """ This method will import all the content inside of the 'drafts' folder, if content exists. NOTE: This is not a full course import! In our current application, only verticals (and blocks beneath) can be in draft. Therefore, different call points into the import process_xml are used as the XMLModuleStore() constructor cannot simply be called (as is done for importing public content). """ draft_dir = course_data_path + "/drafts" if not os.path.exists(draft_dir): return # create a new 'System' object which will manage the importing errorlog = make_error_tracker() # The course_dir as passed to ImportSystem is expected to just be relative, not # the complete path including data_dir. ImportSystem will concatenate the two together. data_dir = xml_module_store.data_dir # Whether or not data_dir ends with a "/" differs in production vs. test. if not data_dir.endswith("/"): data_dir += "/" # Remove absolute path, leaving relative /drafts. draft_course_dir = draft_dir.replace(data_dir, '', 1) system = ImportSystem( xmlstore=xml_module_store, course_id=source_course_id, course_dir=draft_course_dir, error_tracker=errorlog.tracker, load_error_blocks=False, mixins=xml_module_store.xblock_mixins, services={'field-data': KvsFieldData(kvs=DictKeyValueStore())}, target_course_id=target_id, ) def _import_block(block): # IMPORTANT: Be sure to update the block location in the NEW namespace block_location = block.location.map_into_course(target_id) # Update the block's location to DRAFT revision # We need to call this method (instead of updating the location directly) # to ensure that pure XBlock field data is updated correctly. _update_block_location(block, block_location.replace(revision=MongoRevisionKey.draft)) parent_url = get_parent_url(block) index = index_in_children_list(block) # make sure our parent has us in its list of children # this is to make sure private only blocks show up # in the list of children since they would have been # filtered out from the non-draft store export. if parent_url is not None and index is not None: course_key = block.location.course_key parent_location = UsageKey.from_string(parent_url).map_into_course(course_key) # IMPORTANT: Be sure to update the parent in the NEW namespace parent_location = parent_location.map_into_course(target_id) parent = store.get_item(parent_location, depth=0) non_draft_location = block.location.map_into_course(target_id) if not any(child.block_id == block.location.block_id for child in parent.children): parent.children.insert(index, non_draft_location) store.update_item(parent, user_id) _update_and_import_block( block, store, user_id, source_course_id, target_id, runtime=mongo_runtime, ) for child in block.get_children(): _import_block(child) # Now walk the /drafts directory. # Each file in the directory will be a draft copy of the vertical. # First it is necessary to order the draft items by their desired index in the child list, # since the order in which os.walk() returns the files is not guaranteed. drafts = [] for rootdir, __, filenames in os.walk(draft_dir): for filename in filenames: if filename.startswith('._'): # Skip any OSX quarantine files, prefixed with a '._'. continue block_path = os.path.join(rootdir, filename) with open(block_path) as f: try: xml = f.read() # The process_xml() call below recursively processes all descendants. If # we call this on all verticals in a course with verticals nested below # the unit level, we try to import the same content twice, causing naming conflicts. # Therefore only process verticals at the unit level, assuming that any other # verticals must be descendants. if 'index_in_children_list' in xml: block = system.process_xml(xml) # HACK: since we are doing partial imports of drafts # the vertical doesn't have the 'url-name' set in the # attributes (they are normally in the parent object, # aka sequential), so we have to replace the location.name # with the XML filename that is part of the pack filename, __ = os.path.splitext(filename) block.location = block.location.replace(name=filename) index = index_in_children_list(block) parent_url = get_parent_url(block, xml) draft_url = str(block.location) draft = draft_node_constructor( block=block, url=draft_url, parent_url=parent_url, index=index ) drafts.append(draft) except Exception: # pylint: disable=broad-except logging.exception('Error while parsing course drafts xml.') # Sort drafts by `index_in_children_list` attribute. drafts.sort(key=lambda x: x.index) for draft in get_draft_subtree_roots(drafts): try: _import_block(draft.module) except Exception: # pylint: disable=broad-except logging.exception(f'Course import {source_course_id}: while importing draft block {draft.module}') def allowed_metadata_by_category(category): # should this be in the descriptors?!? return { 'vertical': [], 'chapter': ['start'], 'sequential': ['due', 'relative_weeks_due', 'format', 'start', 'graded'] }.get(category, ['*']) def check_block_metadata_editability(block): """ Assert that there is no metadata within a particular block that we can't support editing. However we always allow 'display_name' and 'xml_attributes' """ allowed = allowed_metadata_by_category(block.location.block_type) if '*' in allowed: # everything is allowed return 0 allowed = allowed + ['xml_attributes', 'display_name'] err_cnt = 0 illegal_keys = set(own_metadata(block).keys()) - set(allowed) if len(illegal_keys) > 0: err_cnt = err_cnt + 1 print( ": found non-editable metadata on {url}. " "These metadata keys are not supported = {keys}".format( url=str(block.location), keys=illegal_keys ) ) return err_cnt def get_parent_url(block, xml=None): """ Get the parent_url, if any, from block using xml as an alternative source. If it finds it in xml but not on block, it modifies block so that the next call to this w/o the xml will get the parent url """ if hasattr(block, 'xml_attributes'): return block.xml_attributes.get( # handle deprecated old attr 'parent_url', block.xml_attributes.get('parent_sequential_url') ) if xml is not None: create_xml_attributes(block, xml) return get_parent_url(block) # don't reparse xml b/c don't infinite recurse but retry above lines return None def index_in_children_list(block, xml=None): """ Get the index_in_children_list, if any, from block using xml as an alternative source. If it finds it in xml but not on block, it modifies block so that the next call to this w/o the xml will get the field. """ if hasattr(block, 'xml_attributes'): val = block.xml_attributes.get('index_in_children_list') if val is not None: return int(val) return None if xml is not None: create_xml_attributes(block, xml) return index_in_children_list(block) # don't reparse xml b/c don't infinite recurse but retry above lines return None def create_xml_attributes(block, xml): """ Make up for blocks which don't define xml_attributes by creating them here and populating """ xml_attrs = {} for attr, val in xml.attrib.items(): if attr not in block.fields: # translate obsolete attr if attr == 'parent_sequential_url': attr = 'parent_url' xml_attrs[attr] = val # now cache it on block where it's expected block.xml_attributes = xml_attrs def validate_no_non_editable_metadata(module_store, course_id, category): # lint-amnesty, pylint: disable=missing-function-docstring err_cnt = 0 for block_loc in module_store.modules[course_id]: block = module_store.modules[course_id][block_loc] if block.location.block_type == category: err_cnt = err_cnt + check_block_metadata_editability(block) return err_cnt def validate_category_hierarchy( # lint-amnesty, pylint: disable=missing-function-docstring module_store, course_id, parent_category, expected_child_category): err_cnt = 0 parents = [] # get all blocks of parent_category for block in module_store.modules[course_id].values(): if block.location.block_type == parent_category: parents.append(block) for parent in parents: for child_loc in parent.children: if child_loc.block_type != expected_child_category: err_cnt += 1 print( "ERROR: child {child} of parent {parent} was expected to be " "category of {expected} but was {actual}".format( child=child_loc, parent=parent.location, expected=expected_child_category, actual=child_loc.block_type ) ) return err_cnt def validate_data_source_path_existence(path, is_err=True, extra_msg=None): # lint-amnesty, pylint: disable=missing-function-docstring, redefined-outer-name _cnt = 0 if not os.path.exists(path): print( "{type}: Expected folder at {path}. {extra}".format( type='ERROR' if is_err else 'WARNING', path=path, extra=extra_msg or "", ) ) _cnt = 1 return _cnt def validate_data_source_paths(data_dir, course_dir): # lint-amnesty, pylint: disable=missing-function-docstring # check that there is a '/static/' directory course_path = data_dir / course_dir err_cnt = 0 warn_cnt = 0 err_cnt += validate_data_source_path_existence(course_path / 'static') warn_cnt += validate_data_source_path_existence( course_path / 'static/subs', is_err=False, extra_msg='Video captions (if they are used) will not work unless they are static/subs.' ) return err_cnt, warn_cnt def validate_course_policy(module_store, course_id): """ Validate that the course explicitly sets values for any fields whose defaults may have changed between the export and the import. Does not add to error count as these are just warnings. """ # is there a reliable way to get the block location just given the course_id? warn_cnt = 0 for block in module_store.modules[course_id].values(): if block.location.block_type == 'course': if not block._field_data.has(block, 'rerandomize'): # lint-amnesty, pylint: disable=protected-access warn_cnt += 1 print( 'WARN: course policy does not specify value for ' '"rerandomize" whose default is now "never". ' 'The behavior of your course may change.' ) if not block._field_data.has(block, 'showanswer'): # lint-amnesty, pylint: disable=protected-access warn_cnt += 1 print( 'WARN: course policy does not specify value for ' '"showanswer" whose default is now "finished". ' 'The behavior of your course may change.' ) return warn_cnt def perform_xlint( # lint-amnesty, pylint: disable=missing-function-docstring data_dir, source_dirs, default_class='xmodule.hidden_block.HiddenBlock', load_error_blocks=True, xblock_mixins=(LocationMixin, XModuleMixin)): err_cnt = 0 warn_cnt = 0 module_store = XMLModuleStore( data_dir, default_class=default_class, source_dirs=source_dirs, load_error_blocks=load_error_blocks, xblock_mixins=xblock_mixins ) # check all data source path information for course_dir in source_dirs: _err_cnt, _warn_cnt = validate_data_source_paths(path(data_dir), course_dir) err_cnt += _err_cnt warn_cnt += _warn_cnt # first count all errors and warnings as part of the XMLModuleStore import for err_log in module_store._course_errors.values(): # pylint: disable=protected-access for err_log_entry in err_log.errors: msg = err_log_entry[0] if msg.startswith('ERROR:'): err_cnt += 1 else: warn_cnt += 1 # then count outright all courses that failed to load at all for err_log in module_store.errored_courses.values(): for err_log_entry in err_log.errors: msg = err_log_entry[0] print(msg) if msg.startswith('ERROR:'): err_cnt += 1 else: warn_cnt += 1 for course_id in module_store.modules.keys(): # constrain that courses only have 'chapter' children err_cnt += validate_category_hierarchy( module_store, course_id, "course", "chapter" ) # constrain that chapters only have 'sequentials' err_cnt += validate_category_hierarchy( module_store, course_id, "chapter", "sequential" ) # constrain that sequentials only have 'verticals' err_cnt += validate_category_hierarchy( module_store, course_id, "sequential", "vertical" ) # validate the course policy overrides any defaults # which have changed over time warn_cnt += validate_course_policy(module_store, course_id) # don't allow metadata on verticals, since we can't edit them in studio err_cnt += validate_no_non_editable_metadata( module_store, course_id, "vertical" ) # don't allow metadata on chapters, since we can't edit them in studio err_cnt += validate_no_non_editable_metadata( module_store, course_id, "chapter" ) # don't allow metadata on sequences that we can't edit err_cnt += validate_no_non_editable_metadata( module_store, course_id, "sequential" ) # check for a presence of a course marketing video if not module_store.has_item(course_id.make_usage_key('about', 'video')): print( "WARN: Missing course marketing video. It is recommended " "that every course have a marketing video." ) warn_cnt += 1 print("\n") print("------------------------------------------") print("VALIDATION SUMMARY: {err} Errors {warn} Warnings".format( err=err_cnt, warn=warn_cnt )) if err_cnt > 0: print( "This course is not suitable for importing. Please fix courseware " "according to specifications before importing." ) elif warn_cnt > 0: print( "This course can be imported, but some errors may occur " "during the run of the course. It is recommend that you fix " "your courseware before importing" ) else: print("This course can be imported successfully.") return err_cnt def _update_block_location(block, new_location): """ Update a block's location. If the block is a pure XBlock (not an XModule), then its field data keys will need to be updated to include the new location. Args: block (XModuleMixin): The block to update. new_location (Location): The new location of the block. Returns: None """ # Retrieve the content and settings fields that have been explicitly set # to ensure that they are properly re-keyed in the XBlock field data. rekey_fields = ( list(block.get_explicitly_set_fields_by_scope(Scope.content).keys()) + list(block.get_explicitly_set_fields_by_scope(Scope.settings).keys()) + list(block.get_explicitly_set_fields_by_scope(Scope.children).keys()) ) block.location = new_location # Pure XBlocks store the field data in a key-value store # in which one component of the key is the XBlock's location (equivalent to "scope_ids"). # Since we've changed the XBlock's location, we need to re-save # all the XBlock's fields so they will be stored using the new location in the key. # However, since XBlocks only save "dirty" fields, we need to call # XBlock's `force_save_fields_method` if len(rekey_fields) > 0: block.force_save_fields(rekey_fields)