From 23d649d7e39cee52fef0a93f12b10ae26a26df84 Mon Sep 17 00:00:00 2001 From: Braden MacDonald Date: Fri, 7 Feb 2020 15:19:54 -0800 Subject: [PATCH] REST API to export modulestore XBlocks as OLX. This was originally a separate plugin called openedx-olx-rest-api. It provides a Studio API that any user with course authoring permission can use to get the OLX of an individual XBlock or a unit. Without this, the only way to get an XBlock's OLX was to download the tarball of the entire course. Examples of usage (be logged in to Studio on devstack): Simple HTML XBlock: http://localhost:18010/api/olx-export/v1/xblock/block-v1:edX+DemoX+Demo_Course+type@html+block@030e35c4756a4ddc8d40b95fbbfff4d4/ Exporting a unit: http://localhost:18010/api/olx-export/v1/xblock/block-v1:edX+DemoX+Demo_Course+type@vertical+block@134df56c516a4a0dbb24dd5facef746e/ Example output for an HTML block: { "root_block_id":"block-v1:edX+DemoX+Demo_Course+type@html+block@030e35c4756a4ddc8d40b95fbbfff4d4", "blocks":{ "block-v1:edX+DemoX+Demo_Course+type@html+block@030e35c4756a4ddc8d40b95fbbfff4d4":{ "olx":"Welcome to the edX Demo Course Introduction.

\n]]>\n" } } } The code is designed primarily for use when importing content into Blockstore. So it will: * Export HTML blocks as a combined OLX/HTML file, with the HTML in a CDATA section * Convert vertical blocks to unit blocks (unit is like a vertical but has no UI elements) * Detect static files (such as images) used by the XBlock and list the absolute URL of each static file in the "static_files": {...} JSON element for each XBlock that has at least one static file usage. This can handle static files that are in mongo ("contentstore" / "Files & Uploads") as well as files generated on-the-fly during OLX serialization via the export_fs API (mostly this is video transcripts). --- .../core/djangoapps/olx_rest_api/__init__.py | 0 .../core/djangoapps/olx_rest_api/adapters.py | 129 ++++++++++++++ openedx/core/djangoapps/olx_rest_api/apps.py | 25 +++ .../olx_rest_api/block_serializer.py | 163 ++++++++++++++++++ .../djangoapps/olx_rest_api/test_adapters.py | 49 ++++++ .../djangoapps/olx_rest_api/test_views.py | 127 ++++++++++++++ openedx/core/djangoapps/olx_rest_api/urls.py | 14 ++ openedx/core/djangoapps/olx_rest_api/views.py | 117 +++++++++++++ setup.py | 1 + 9 files changed, 625 insertions(+) create mode 100644 openedx/core/djangoapps/olx_rest_api/__init__.py create mode 100644 openedx/core/djangoapps/olx_rest_api/adapters.py create mode 100644 openedx/core/djangoapps/olx_rest_api/apps.py create mode 100644 openedx/core/djangoapps/olx_rest_api/block_serializer.py create mode 100644 openedx/core/djangoapps/olx_rest_api/test_adapters.py create mode 100644 openedx/core/djangoapps/olx_rest_api/test_views.py create mode 100644 openedx/core/djangoapps/olx_rest_api/urls.py create mode 100644 openedx/core/djangoapps/olx_rest_api/views.py diff --git a/openedx/core/djangoapps/olx_rest_api/__init__.py b/openedx/core/djangoapps/olx_rest_api/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/openedx/core/djangoapps/olx_rest_api/adapters.py b/openedx/core/djangoapps/olx_rest_api/adapters.py new file mode 100644 index 0000000000..97af88ecda --- /dev/null +++ b/openedx/core/djangoapps/olx_rest_api/adapters.py @@ -0,0 +1,129 @@ +""" +Helpers required to adapt to differing APIs +""" +from contextlib import contextmanager +import logging +import re + +from opaque_keys import InvalidKeyError +from opaque_keys.edx.keys import AssetKey, CourseKey +from fs.memoryfs import MemoryFS +from fs.wrapfs import WrapFS + +from static_replace import replace_static_urls +from xmodule.contentstore.content import StaticContent +from xmodule.assetstore.assetmgr import AssetManager +from xmodule.modulestore.django import modulestore as store +from xmodule.modulestore.exceptions import ItemNotFoundError +from xmodule.exceptions import NotFoundError +from xmodule.xml_module import XmlParserMixin + +log = logging.getLogger(__name__) + + +def get_block(usage_key): + """ + Return an XBlock from modulestore. + """ + return store().get_item(usage_key) + + +def get_asset_content_from_path(course_key, asset_path): + """ + Locate the given asset content, load it into memory, and return it. + Returns None if the asset is not found. + """ + try: + asset_key = StaticContent.get_asset_key_from_path(course_key, asset_path) + return AssetManager.find(asset_key) + except (ItemNotFoundError, NotFoundError): + return None + + +def rewrite_absolute_static_urls(text, course_id): + """ + Convert absolute URLs like + https://studio-site.opencraft.hosting/asset-v1:LabXchange+101+2019+type@asset+block@SCI_1.2_Image_.png + to the proper + /static/SCI_1.2_Image_.png + format for consistency and portability. + """ + assert isinstance(course_id, CourseKey) + asset_full_url_re = r'https?://[^/]+/(?P[^\s\'"&]+)' + + def check_asset_key(match_obj): + """ + If this URL's path part is an AssetKey from the same course, rewrite it. + """ + try: + asset_key = AssetKey.from_string(match_obj.group('maybe_asset_key')) + except InvalidKeyError: + return match_obj.group(0) # Not an asset key; do not rewrite + if asset_key.course_key == course_id: + return '/static/' + asset_key.path # Rewrite this to portable form + else: + return match_obj.group(0) # From a different course; do not rewrite + + return re.sub(asset_full_url_re, check_asset_key, text) + + +def collect_assets_from_text(text, course_id, include_content=False): + """ + Yield dicts of asset content and path from static asset paths found in the given text. + Make sure to have replaced the URLs with rewrite_absolute_static_urls first. + If include_content is True, the result will include a contentstore + StaticContent file object which wraps the actual binary content of the file. + """ + # Replace static urls like '/static/foo.png' + static_paths = [] + # Drag-and-drop-v2 has + # "/static/blah.png" + # which must be changed to "/static/blah.png" for replace_static_urls to work: + text2 = text.replace(""", '"') + replace_static_urls(text=text2, course_id=course_id, static_paths_out=static_paths) + for (path, uri) in static_paths: + if path.startswith('/static/'): + path = path[8:] + info = { + 'path': path, + 'url': '/' + str(course_id.make_asset_key("asset", path)), + } + if include_content: + content = get_asset_content_from_path(course_id, path) + if content is None: + log.error("Static asset not found: (%s, %s)", path, uri) + else: + info['content'] = content + yield info + + +@contextmanager +def override_export_fs(block): + """ + Hack required for some legacy XBlocks which inherit + XModuleDescriptor.add_xml_to_node() instead of the usual + XmlSerializationMixin.add_xml_to_node() method. + This method temporarily replaces a block's runtime's + 'export_fs' system with an in-memory filesystem. + This method also abuses the XmlParserMixin.export_to_file() + API to prevent the XModule export code from exporting each + block as two files (one .olx pointing to one .xml file). + The export_to_file was meant to be used only by the + customtag XModule but it makes our lives here much easier. + """ + fs = WrapFS(MemoryFS()) + fs.makedir('course') + fs.makedir('course/static') # Video XBlock requires this directory to exists, to put srt files etc. + + old_export_fs = block.runtime.export_fs + block.runtime.export_fs = fs + if hasattr(block, 'export_to_file'): + old_export_to_file = block.export_to_file + block.export_to_file = lambda: False + old_global_export_to_file = XmlParserMixin.export_to_file + XmlParserMixin.export_to_file = lambda _: False # So this applies to child blocks that get loaded during export + yield fs + block.runtime.export_fs = old_export_fs + if hasattr(block, 'export_to_file'): + block.export_to_file = old_export_to_file + XmlParserMixin.export_to_file = old_global_export_to_file diff --git a/openedx/core/djangoapps/olx_rest_api/apps.py b/openedx/core/djangoapps/olx_rest_api/apps.py new file mode 100644 index 0000000000..da7fd95ca0 --- /dev/null +++ b/openedx/core/djangoapps/olx_rest_api/apps.py @@ -0,0 +1,25 @@ +# -*- coding: utf-8 -*- +""" +olx_rest_api Django application initialization. +""" +from django.apps import AppConfig + +from openedx.core.djangoapps.plugins.constants import PluginURLs, ProjectType + + +class OlxRestApiAppConfig(AppConfig): + """ + Configuration for the olx_rest_api Django plugin application. + See: https://github.com/edx/edx-platform/blob/master/openedx/core/djangoapps/plugins/README.rst + """ + + name = 'openedx.core.djangoapps.olx_rest_api' + verbose_name = 'Modulestore OLX REST API' + plugin_app = { + PluginURLs.CONFIG: { + ProjectType.CMS: { + # The namespace to provide to django's urls.include. + PluginURLs.NAMESPACE: 'olx_rest_api', + }, + }, + } diff --git a/openedx/core/djangoapps/olx_rest_api/block_serializer.py b/openedx/core/djangoapps/olx_rest_api/block_serializer.py new file mode 100644 index 0000000000..36ed6a59c7 --- /dev/null +++ b/openedx/core/djangoapps/olx_rest_api/block_serializer.py @@ -0,0 +1,163 @@ +""" +Code for serializing a modulestore XBlock to OLX suitable for import into +Blockstore. +""" +import logging +import os +from collections import namedtuple + +from lxml import etree + +from . import adapters + +log = logging.getLogger(__name__) + +# A static file required by an XBlock +StaticFile = namedtuple('StaticFile', ['name', 'url', 'data']) + + +def blockstore_def_key_from_modulestore_usage_key(usage_key): + """ + In modulestore, the "definition key" is a MongoDB ObjectID kept in split's + definitions table, which theoretically allows the same block to be used in + many places (each with a unique usage key). However, that functionality is + not exposed in Studio (other than via content libraries). So when we import + into Blockstore, we assume that each usage is unique, don't generate a usage + key, and create a new "definition key" from the original usage key. + So modulestore usage key + block-v1:A+B+C+type@html+block@introduction + will become Blockstore definition key + html/introduction + """ + block_type = usage_key.block_type + if block_type == 'vertical': + # We transform to + block_type = "unit" + return block_type + "/" + usage_key.block_id + + +class XBlockSerializer(object): + """ + This class will serialize an XBlock, producing: + (1) A new definition ID for use in Blockstore + (2) an XML string defining the XBlock and referencing the IDs of its + children (but not containing the actual XML of its children) + (3) a list of any static files required by the XBlock and their URL + """ + + def __init__(self, block): + """ + Serialize an XBlock to an OLX string + supporting files, and store the + resulting data in this object. + """ + self.orig_block_key = block.scope_ids.usage_id + self.static_files = [] + self.def_id = blockstore_def_key_from_modulestore_usage_key(self.orig_block_key) + + # Special cases: + if self.orig_block_key.block_type == 'html': + self.serialize_html_block(block) + else: + self.serialize_normal_block(block) + + course_key = self.orig_block_key.course_key + # Search the OLX for references to files stored in the course's + # "Files & Uploads" (contentstore): + self.olx_str = adapters.rewrite_absolute_static_urls(self.olx_str, course_key) + for asset in adapters.collect_assets_from_text(self.olx_str, course_key): + path = asset['path'] + if path not in [sf.name for sf in self.static_files]: + self.static_files.append(StaticFile(name=path, url=asset['url'], data=None)) + + def serialize_normal_block(self, block): + """ + Serialize an XBlock to XML. + + This method is used for every block type except HTML, which uses + serialize_html_block() instead. + """ + # Create an XML node to hold the exported data + olx_node = etree.Element("root") # The node name doesn't matter: add_xml_to_node will change it + # ^ Note: We could pass nsmap=xblock.core.XML_NAMESPACES here, but the + # resulting XML namespace attributes don't seem that useful? + with adapters.override_export_fs(block) as filesystem: # Needed for XBlocks that inherit XModuleDescriptor + # Tell the block to serialize itself as XML/OLX: + if not block.has_children: + block.add_xml_to_node(olx_node) + else: + # We don't want the children serialized at this time, because + # otherwise we can't tell which files in 'filesystem' belong to + # this block and which belong to its children. So, temporarily + # disable any children: + children = block.children + block.children = [] + block.add_xml_to_node(olx_node) + block.children = children + + # Now the block/module may have exported addtional data as files in + # 'filesystem'. If so, store them: + for item in filesystem.walk(): # pylint: disable=not-callable + for unit_file in item.files: + file_path = os.path.join(item.path, unit_file.name) + with filesystem.open(file_path, 'rb') as fh: + data = fh.read() + self.static_files.append(StaticFile(name=unit_file.name, data=data, url=None)) + # Apply some transformations to the OLX: + self.transform_olx(olx_node, usage_id=block.scope_ids.usage_id) + # Add tags for each child (XBlock XML export + # normally puts children inline as e.g. tags, but we want + # references to them only.) + if block.has_children: + for child_id in block.children: + # In modulestore, the "definition key" is a MongoDB ObjectID + # kept in split's definitions table, which theoretically allows + # the same block to be used in many places (each with a unique + # usage key). However, that functionality is not exposed in + # Studio (other than via content libraries). So when we import + # into Blockstore, we assume that each usage is unique, don't + # generate a usage key, and create a new "definition key" from + # the original usage key. + # So modulestore usage key + # block-v1:A+B+C+type@html+block@introduction + # will become Blockstore definition key + # html+introduction + # + # If we needed the real definition key, we could get it via + # child = block.runtime.get_block(child_id) + # child_def_id = str(child.scope_ids.def_id) + # and then use + # + def_id = blockstore_def_key_from_modulestore_usage_key(child_id) + olx_node.append(olx_node.makeelement("xblock-include", {"definition": def_id})) + # Store the resulting XML as a string: + self.olx_str = etree.tostring(olx_node, encoding="unicode", pretty_print=True) + + def serialize_html_block(self, block): + """ + Special case handling for HTML blocks + """ + olx_node = etree.Element("html") + if block.display_name: + olx_node.attrib["display_name"] = block.display_name + olx_node.text = etree.CDATA("\n" + block.data + "\n") + self.olx_str = etree.tostring(olx_node, encoding="unicode", pretty_print=True) + + def transform_olx(self, olx_node, usage_id): + """ + Apply transformations to the given OLX etree Node. + """ + # Remove 'url_name' - we store the definition key in the folder name + # that holds the OLX and the usage key elsewhere, so specifying it + # within the OLX file is redundant and can lead to issues if the file is + # copied and pasted elsewhere in the bundle with a new definition key. + olx_node.attrib.pop('url_name', None) + # Convert to the new tag/block + if olx_node.tag == 'vertical': + olx_node.tag = 'unit' + for key in olx_node.attrib.keys(): + if key not in ('display_name', 'url_name'): + log.warning( + ' tag attribute "%s" will be ignored after conversion to (in %s)', + key, + str(usage_id) + ) diff --git a/openedx/core/djangoapps/olx_rest_api/test_adapters.py b/openedx/core/djangoapps/olx_rest_api/test_adapters.py new file mode 100644 index 0000000000..8a77d378b2 --- /dev/null +++ b/openedx/core/djangoapps/olx_rest_api/test_adapters.py @@ -0,0 +1,49 @@ +""" +Test the OLX REST API adapters code +""" +import unittest + +from opaque_keys.edx.keys import CourseKey + +from openedx.core.djangoapps.olx_rest_api import adapters + + +class TestAdapters(unittest.TestCase): + """ + Test the OLX REST API adapters code + """ + + def test_rewrite_absolute_static_urls(self): + """ + Test that rewrite_absolute_static_urls() can find and replace all uses + of absolute Studio URLs in a course. + + Some criteria: + - Rewriting only happens if the course ID is the same. If the absolute + URL points to a different course, the new /static/foo.png form won't + work. + """ + # Note that this doesn't have to be well-formed OLX + course_id = CourseKey.from_string("course-v1:TestCourse+101+2020") + olx_in = """ + + + + View a file with accented characters in the filename. + + Not an asset link. + + + """ + olx_expected = """ + + + + View a file with accented characters in the filename. + + Not an asset link. + + + """ + olx_out = adapters.rewrite_absolute_static_urls(olx_in, course_id) + self.assertEqual(olx_out, olx_expected) diff --git a/openedx/core/djangoapps/olx_rest_api/test_views.py b/openedx/core/djangoapps/olx_rest_api/test_views.py new file mode 100644 index 0000000000..4e01e5752f --- /dev/null +++ b/openedx/core/djangoapps/olx_rest_api/test_views.py @@ -0,0 +1,127 @@ +""" +Test for the OLX REST API app. +""" +import re +from xml.dom import minidom + +from openedx.core.djangolib.testing.utils import skip_unless_cms +from student.roles import CourseStaffRole +from student.tests.factories import CourseEnrollmentFactory, UserFactory +from xmodule.modulestore import ModuleStoreEnum +from xmodule.modulestore.tests.django_utils import SharedModuleStoreTestCase +from xmodule.modulestore.tests.factories import ToyCourseFactory + + +@skip_unless_cms +class OlxRestApiTestCase(SharedModuleStoreTestCase): + """ + Test the views (and consequently all the other code) of the OLX REST API. + """ + + @classmethod + def setUpClass(cls): + """ + Set up a course for use in these tests + """ + super().setUpClass() + with cls.store.default_store(ModuleStoreEnum.Type.split): + cls.course = ToyCourseFactory.create(modulestore=cls.store) + assert str(cls.course.id).startswith("course-v1:"), "This test is for split mongo course exports only" + cls.unit_key = cls.course.id.make_usage_key('vertical', 'vertical_test') + + def setUp(self): + """ + Per-test setup + """ + super().setUp() + self.user = UserFactory.create(password='edx') + CourseEnrollmentFactory.create(user=self.user, course_id=self.course.id) + self.client.login(username=self.user.username, password='edx') + + # Helper methods: + + def assertXmlEqual(self, xml_str_a, xml_str_b): + """ + Assert that the given XML strings are equal, + ignoring attribute order and some whitespace variations. + """ + def clean(xml_str): + # Collapse repeated whitespace: + xml_str = re.sub(r'(\s)\s+', r'\1', xml_str) + xml_bytes = xml_str.encode('utf8') + return minidom.parseString(xml_bytes).toprettyxml() + self.assertEqual(clean(xml_str_a), clean(xml_str_b)) + + def get_olx_response_for_block(self, block_id): + return self.client.get('/api/olx-export/v1/xblock/{}/'.format(block_id)) + + # Actual tests: + + def test_no_permission(self): + """ + A regular user enrolled in the course (but not part of the authoring + team) should not be able to use the API. + """ + response = self.get_olx_response_for_block(self.unit_key) + self.assertEqual(response.status_code, 403) + self.assertEqual( + response.json()['detail'], + 'You must be a member of the course team in Studio to export OLX using this API.' + ) + + def test_export(self): + """ + A staff user should be able to use this API to get the OLX of XBlocks in + the course. + """ + CourseStaffRole(self.course.id).add_users(self.user) + + response = self.get_olx_response_for_block(self.unit_key) + self.assertEqual(response.status_code, 200) + self.assertEqual( + response.json()['root_block_id'], + str(self.unit_key), + ) + blocks = response.json()['blocks'] + # Check the OLX of the root block: + self.assertXmlEqual( + blocks[str(self.unit_key)]['olx'], + '\n' + ' \n' + ' \n' + ' \n' + ' \n' + '\n' + ) + # Check the OLX of a video + self.assertXmlEqual( + blocks[str(self.course.id.make_usage_key('video', 'sample_video'))]['olx'], + '