Merge pull request #23068 from open-craft/olx-rest-api

REST API to export modulestore XBlocks as OLX
This commit is contained in:
David Ormsbee
2020-02-25 15:11:08 -05:00
committed by GitHub
9 changed files with 625 additions and 0 deletions

View File

@@ -0,0 +1,129 @@
"""
Helpers required to adapt to differing APIs
"""
from contextlib import contextmanager
import logging
import re
from opaque_keys import InvalidKeyError
from opaque_keys.edx.keys import AssetKey, CourseKey
from fs.memoryfs import MemoryFS
from fs.wrapfs import WrapFS
from static_replace import replace_static_urls
from xmodule.contentstore.content import StaticContent
from xmodule.assetstore.assetmgr import AssetManager
from xmodule.modulestore.django import modulestore as store
from xmodule.modulestore.exceptions import ItemNotFoundError
from xmodule.exceptions import NotFoundError
from xmodule.xml_module import XmlParserMixin
log = logging.getLogger(__name__)
def get_block(usage_key):
"""
Return an XBlock from modulestore.
"""
return store().get_item(usage_key)
def get_asset_content_from_path(course_key, asset_path):
"""
Locate the given asset content, load it into memory, and return it.
Returns None if the asset is not found.
"""
try:
asset_key = StaticContent.get_asset_key_from_path(course_key, asset_path)
return AssetManager.find(asset_key)
except (ItemNotFoundError, NotFoundError):
return None
def rewrite_absolute_static_urls(text, course_id):
"""
Convert absolute URLs like
https://studio-site.opencraft.hosting/asset-v1:LabXchange+101+2019+type@asset+block@SCI_1.2_Image_.png
to the proper
/static/SCI_1.2_Image_.png
format for consistency and portability.
"""
assert isinstance(course_id, CourseKey)
asset_full_url_re = r'https?://[^/]+/(?P<maybe_asset_key>[^\s\'"&]+)'
def check_asset_key(match_obj):
"""
If this URL's path part is an AssetKey from the same course, rewrite it.
"""
try:
asset_key = AssetKey.from_string(match_obj.group('maybe_asset_key'))
except InvalidKeyError:
return match_obj.group(0) # Not an asset key; do not rewrite
if asset_key.course_key == course_id:
return '/static/' + asset_key.path # Rewrite this to portable form
else:
return match_obj.group(0) # From a different course; do not rewrite
return re.sub(asset_full_url_re, check_asset_key, text)
def collect_assets_from_text(text, course_id, include_content=False):
"""
Yield dicts of asset content and path from static asset paths found in the given text.
Make sure to have replaced the URLs with rewrite_absolute_static_urls first.
If include_content is True, the result will include a contentstore
StaticContent file object which wraps the actual binary content of the file.
"""
# Replace static urls like '/static/foo.png'
static_paths = []
# Drag-and-drop-v2 has
# &quot;/static/blah.png&quot;
# which must be changed to "/static/blah.png" for replace_static_urls to work:
text2 = text.replace("&quot;", '"')
replace_static_urls(text=text2, course_id=course_id, static_paths_out=static_paths)
for (path, uri) in static_paths:
if path.startswith('/static/'):
path = path[8:]
info = {
'path': path,
'url': '/' + str(course_id.make_asset_key("asset", path)),
}
if include_content:
content = get_asset_content_from_path(course_id, path)
if content is None:
log.error("Static asset not found: (%s, %s)", path, uri)
else:
info['content'] = content
yield info
@contextmanager
def override_export_fs(block):
"""
Hack required for some legacy XBlocks which inherit
XModuleDescriptor.add_xml_to_node() instead of the usual
XmlSerializationMixin.add_xml_to_node() method.
This method temporarily replaces a block's runtime's
'export_fs' system with an in-memory filesystem.
This method also abuses the XmlParserMixin.export_to_file()
API to prevent the XModule export code from exporting each
block as two files (one .olx pointing to one .xml file).
The export_to_file was meant to be used only by the
customtag XModule but it makes our lives here much easier.
"""
fs = WrapFS(MemoryFS())
fs.makedir('course')
fs.makedir('course/static') # Video XBlock requires this directory to exists, to put srt files etc.
old_export_fs = block.runtime.export_fs
block.runtime.export_fs = fs
if hasattr(block, 'export_to_file'):
old_export_to_file = block.export_to_file
block.export_to_file = lambda: False
old_global_export_to_file = XmlParserMixin.export_to_file
XmlParserMixin.export_to_file = lambda _: False # So this applies to child blocks that get loaded during export
yield fs
block.runtime.export_fs = old_export_fs
if hasattr(block, 'export_to_file'):
block.export_to_file = old_export_to_file
XmlParserMixin.export_to_file = old_global_export_to_file

View File

@@ -0,0 +1,25 @@
# -*- coding: utf-8 -*-
"""
olx_rest_api Django application initialization.
"""
from django.apps import AppConfig
from openedx.core.djangoapps.plugins.constants import PluginURLs, ProjectType
class OlxRestApiAppConfig(AppConfig):
"""
Configuration for the olx_rest_api Django plugin application.
See: https://github.com/edx/edx-platform/blob/master/openedx/core/djangoapps/plugins/README.rst
"""
name = 'openedx.core.djangoapps.olx_rest_api'
verbose_name = 'Modulestore OLX REST API'
plugin_app = {
PluginURLs.CONFIG: {
ProjectType.CMS: {
# The namespace to provide to django's urls.include.
PluginURLs.NAMESPACE: 'olx_rest_api',
},
},
}

View File

@@ -0,0 +1,163 @@
"""
Code for serializing a modulestore XBlock to OLX suitable for import into
Blockstore.
"""
import logging
import os
from collections import namedtuple
from lxml import etree
from . import adapters
log = logging.getLogger(__name__)
# A static file required by an XBlock
StaticFile = namedtuple('StaticFile', ['name', 'url', 'data'])
def blockstore_def_key_from_modulestore_usage_key(usage_key):
"""
In modulestore, the "definition key" is a MongoDB ObjectID kept in split's
definitions table, which theoretically allows the same block to be used in
many places (each with a unique usage key). However, that functionality is
not exposed in Studio (other than via content libraries). So when we import
into Blockstore, we assume that each usage is unique, don't generate a usage
key, and create a new "definition key" from the original usage key.
So modulestore usage key
block-v1:A+B+C+type@html+block@introduction
will become Blockstore definition key
html/introduction
"""
block_type = usage_key.block_type
if block_type == 'vertical':
# We transform <vertical> to <unit>
block_type = "unit"
return block_type + "/" + usage_key.block_id
class XBlockSerializer(object):
"""
This class will serialize an XBlock, producing:
(1) A new definition ID for use in Blockstore
(2) an XML string defining the XBlock and referencing the IDs of its
children (but not containing the actual XML of its children)
(3) a list of any static files required by the XBlock and their URL
"""
def __init__(self, block):
"""
Serialize an XBlock to an OLX string + supporting files, and store the
resulting data in this object.
"""
self.orig_block_key = block.scope_ids.usage_id
self.static_files = []
self.def_id = blockstore_def_key_from_modulestore_usage_key(self.orig_block_key)
# Special cases:
if self.orig_block_key.block_type == 'html':
self.serialize_html_block(block)
else:
self.serialize_normal_block(block)
course_key = self.orig_block_key.course_key
# Search the OLX for references to files stored in the course's
# "Files & Uploads" (contentstore):
self.olx_str = adapters.rewrite_absolute_static_urls(self.olx_str, course_key)
for asset in adapters.collect_assets_from_text(self.olx_str, course_key):
path = asset['path']
if path not in [sf.name for sf in self.static_files]:
self.static_files.append(StaticFile(name=path, url=asset['url'], data=None))
def serialize_normal_block(self, block):
"""
Serialize an XBlock to XML.
This method is used for every block type except HTML, which uses
serialize_html_block() instead.
"""
# Create an XML node to hold the exported data
olx_node = etree.Element("root") # The node name doesn't matter: add_xml_to_node will change it
# ^ Note: We could pass nsmap=xblock.core.XML_NAMESPACES here, but the
# resulting XML namespace attributes don't seem that useful?
with adapters.override_export_fs(block) as filesystem: # Needed for XBlocks that inherit XModuleDescriptor
# Tell the block to serialize itself as XML/OLX:
if not block.has_children:
block.add_xml_to_node(olx_node)
else:
# We don't want the children serialized at this time, because
# otherwise we can't tell which files in 'filesystem' belong to
# this block and which belong to its children. So, temporarily
# disable any children:
children = block.children
block.children = []
block.add_xml_to_node(olx_node)
block.children = children
# Now the block/module may have exported addtional data as files in
# 'filesystem'. If so, store them:
for item in filesystem.walk(): # pylint: disable=not-callable
for unit_file in item.files:
file_path = os.path.join(item.path, unit_file.name)
with filesystem.open(file_path, 'rb') as fh:
data = fh.read()
self.static_files.append(StaticFile(name=unit_file.name, data=data, url=None))
# Apply some transformations to the OLX:
self.transform_olx(olx_node, usage_id=block.scope_ids.usage_id)
# Add <xblock-include /> tags for each child (XBlock XML export
# normally puts children inline as e.g. <html> tags, but we want
# references to them only.)
if block.has_children:
for child_id in block.children:
# In modulestore, the "definition key" is a MongoDB ObjectID
# kept in split's definitions table, which theoretically allows
# the same block to be used in many places (each with a unique
# usage key). However, that functionality is not exposed in
# Studio (other than via content libraries). So when we import
# into Blockstore, we assume that each usage is unique, don't
# generate a usage key, and create a new "definition key" from
# the original usage key.
# So modulestore usage key
# block-v1:A+B+C+type@html+block@introduction
# will become Blockstore definition key
# html+introduction
#
# If we needed the real definition key, we could get it via
# child = block.runtime.get_block(child_id)
# child_def_id = str(child.scope_ids.def_id)
# and then use
# <xblock-include definition={child_def_id} usage={child_id.block_id} />
def_id = blockstore_def_key_from_modulestore_usage_key(child_id)
olx_node.append(olx_node.makeelement("xblock-include", {"definition": def_id}))
# Store the resulting XML as a string:
self.olx_str = etree.tostring(olx_node, encoding="unicode", pretty_print=True)
def serialize_html_block(self, block):
"""
Special case handling for HTML blocks
"""
olx_node = etree.Element("html")
if block.display_name:
olx_node.attrib["display_name"] = block.display_name
olx_node.text = etree.CDATA("\n" + block.data + "\n")
self.olx_str = etree.tostring(olx_node, encoding="unicode", pretty_print=True)
def transform_olx(self, olx_node, usage_id):
"""
Apply transformations to the given OLX etree Node.
"""
# Remove 'url_name' - we store the definition key in the folder name
# that holds the OLX and the usage key elsewhere, so specifying it
# within the OLX file is redundant and can lead to issues if the file is
# copied and pasted elsewhere in the bundle with a new definition key.
olx_node.attrib.pop('url_name', None)
# Convert <vertical> to the new <unit> tag/block
if olx_node.tag == 'vertical':
olx_node.tag = 'unit'
for key in olx_node.attrib.keys():
if key not in ('display_name', 'url_name'):
log.warning(
'<vertical> tag attribute "%s" will be ignored after conversion to <unit> (in %s)',
key,
str(usage_id)
)

View File

@@ -0,0 +1,49 @@
"""
Test the OLX REST API adapters code
"""
import unittest
from opaque_keys.edx.keys import CourseKey
from openedx.core.djangoapps.olx_rest_api import adapters
class TestAdapters(unittest.TestCase):
"""
Test the OLX REST API adapters code
"""
def test_rewrite_absolute_static_urls(self):
"""
Test that rewrite_absolute_static_urls() can find and replace all uses
of absolute Studio URLs in a course.
Some criteria:
- Rewriting only happens if the course ID is the same. If the absolute
URL points to a different course, the new /static/foo.png form won't
work.
"""
# Note that this doesn't have to be well-formed OLX
course_id = CourseKey.from_string("course-v1:TestCourse+101+2020")
olx_in = """
<problem>
<img src="https://studio.example.com/asset-v1:TestCourse+101+2020+type@asset+block@SCI_1.2_Image_.png">
<a href='https://studio.example.com/asset-v1:TestCourse+101+2020+type@asset+block@Québec.html'>
View a file with accented characters in the filename.
</a>
<a href="https://studio.example.com/xblock/block-v1:foo">Not an asset link</a>.
<img src="https://studio.example.com/asset-v1:OtherCourse+500+2020+type@asset+block@exclude_me.png">
</problem>
"""
olx_expected = """
<problem>
<img src="/static/SCI_1.2_Image_.png">
<a href='/static/Québec.html'>
View a file with accented characters in the filename.
</a>
<a href="https://studio.example.com/xblock/block-v1:foo">Not an asset link</a>.
<img src="https://studio.example.com/asset-v1:OtherCourse+500+2020+type@asset+block@exclude_me.png">
</problem>
"""
olx_out = adapters.rewrite_absolute_static_urls(olx_in, course_id)
self.assertEqual(olx_out, olx_expected)

View File

@@ -0,0 +1,127 @@
"""
Test for the OLX REST API app.
"""
import re
from xml.dom import minidom
from openedx.core.djangolib.testing.utils import skip_unless_cms
from student.roles import CourseStaffRole
from student.tests.factories import CourseEnrollmentFactory, UserFactory
from xmodule.modulestore import ModuleStoreEnum
from xmodule.modulestore.tests.django_utils import SharedModuleStoreTestCase
from xmodule.modulestore.tests.factories import ToyCourseFactory
@skip_unless_cms
class OlxRestApiTestCase(SharedModuleStoreTestCase):
"""
Test the views (and consequently all the other code) of the OLX REST API.
"""
@classmethod
def setUpClass(cls):
"""
Set up a course for use in these tests
"""
super().setUpClass()
with cls.store.default_store(ModuleStoreEnum.Type.split):
cls.course = ToyCourseFactory.create(modulestore=cls.store)
assert str(cls.course.id).startswith("course-v1:"), "This test is for split mongo course exports only"
cls.unit_key = cls.course.id.make_usage_key('vertical', 'vertical_test')
def setUp(self):
"""
Per-test setup
"""
super().setUp()
self.user = UserFactory.create(password='edx')
CourseEnrollmentFactory.create(user=self.user, course_id=self.course.id)
self.client.login(username=self.user.username, password='edx')
# Helper methods:
def assertXmlEqual(self, xml_str_a, xml_str_b):
"""
Assert that the given XML strings are equal,
ignoring attribute order and some whitespace variations.
"""
def clean(xml_str):
# Collapse repeated whitespace:
xml_str = re.sub(r'(\s)\s+', r'\1', xml_str)
xml_bytes = xml_str.encode('utf8')
return minidom.parseString(xml_bytes).toprettyxml()
self.assertEqual(clean(xml_str_a), clean(xml_str_b))
def get_olx_response_for_block(self, block_id):
return self.client.get('/api/olx-export/v1/xblock/{}/'.format(block_id))
# Actual tests:
def test_no_permission(self):
"""
A regular user enrolled in the course (but not part of the authoring
team) should not be able to use the API.
"""
response = self.get_olx_response_for_block(self.unit_key)
self.assertEqual(response.status_code, 403)
self.assertEqual(
response.json()['detail'],
'You must be a member of the course team in Studio to export OLX using this API.'
)
def test_export(self):
"""
A staff user should be able to use this API to get the OLX of XBlocks in
the course.
"""
CourseStaffRole(self.course.id).add_users(self.user)
response = self.get_olx_response_for_block(self.unit_key)
self.assertEqual(response.status_code, 200)
self.assertEqual(
response.json()['root_block_id'],
str(self.unit_key),
)
blocks = response.json()['blocks']
# Check the OLX of the root block:
self.assertXmlEqual(
blocks[str(self.unit_key)]['olx'],
'<unit>\n'
' <xblock-include definition="video/sample_video"/>\n'
' <xblock-include definition="video/separate_file_video"/>\n'
' <xblock-include definition="video/video_with_end_time"/>\n'
' <xblock-include definition="poll_question/T1_changemind_poll_foo_2"/>\n'
'</unit>\n'
)
# Check the OLX of a video
self.assertXmlEqual(
blocks[str(self.course.id.make_usage_key('video', 'sample_video'))]['olx'],
'<video youtube="0.75:JMD_ifUUfsU,1.00:OEoXaMPEzfM,1.25:AKqURZnYqpk,1.50:DYpADpL7jAY" '
'display_name="default" youtube_id_0_75="JMD_ifUUfsU" youtube_id_1_0="OEoXaMPEzfM" '
'youtube_id_1_25="AKqURZnYqpk" youtube_id_1_5="DYpADpL7jAY"/>\n'
)
def test_html_with_static_asset(self):
"""
Test that HTML gets converted to use CDATA and static assets are
handled.
"""
CourseStaffRole(self.course.id).add_users(self.user)
block_id = str(self.course.id.make_usage_key('html', 'just_img'))
response = self.get_olx_response_for_block(block_id)
self.assertEqual(response.status_code, 200)
self.assertEqual(response.json()['root_block_id'], block_id)
block_data = response.json()['blocks'][block_id]
self.assertXmlEqual(
block_data['olx'],
'''
<html display_name="Text"><![CDATA[
<img src="/static/foo_bar.jpg" />
]]></html>
'''
)
self.assertIn('static_files', block_data)
self.assertIn('foo_bar.jpg', block_data['static_files'])
url = block_data['static_files']['foo_bar.jpg']['url']
self.assertEqual(url, 'http://testserver/asset-v1:edX+toy+2012_Fall+type@asset+block@foo_bar.jpg')

View File

@@ -0,0 +1,14 @@
"""
Studio URL configuration for openedx-olx-rest-api.
"""
from django.conf.urls import include, url
from . import views
urlpatterns = [
url(r'^api/olx-export/v1/', include([
url(r'xblock/(?P<usage_key_str>[^/]+)/$', views.get_block_olx),
# Get a static file from an XBlock that's not part of contentstore/GridFS
url(r'xblock-export-file/(?P<usage_key_str>[^/]+)/(?P<path>.+)$', views.get_block_exportfs_file),
])),
]

View File

@@ -0,0 +1,117 @@
"""
REST API for getting modulestore XBlocks as OLX
"""
from django.http import HttpResponse
from opaque_keys import InvalidKeyError
from opaque_keys.edx.keys import UsageKey
from opaque_keys.edx.locator import CourseLocator
from rest_framework.decorators import api_view
from rest_framework.exceptions import NotFound, PermissionDenied, ValidationError
from rest_framework.response import Response
from student.auth import has_studio_read_access
from openedx.core.lib.api.view_utils import view_auth_classes
from . import adapters
from .block_serializer import XBlockSerializer
@api_view(['GET'])
@view_auth_classes()
def get_block_olx(request, usage_key_str):
"""
Given a modulestore XBlock usage ID (block-v1:...), get its OLX and a list
of any static asset files it uses.
(There are other APIs for getting the OLX of Blockstore XBlocks.)
"""
# Parse the usage key:
try:
usage_key = UsageKey.from_string(usage_key_str)
except (ValueError, InvalidKeyError):
raise ValidationError('Invalid usage key')
if usage_key.block_type in ('course', 'chapter', 'sequential'):
raise ValidationError('Requested XBlock tree is too large - export verticals or their children only')
course_key = usage_key.context_key
if not isinstance(course_key, CourseLocator):
raise ValidationError('Invalid usage key: not a modulestore course')
# Make sure the user has permission on that course
if not has_studio_read_access(request.user, course_key):
raise PermissionDenied("You must be a member of the course team in Studio to export OLX using this API.")
# Step 1: Serialize the XBlocks to OLX files + static asset files
serialized_blocks = {} # Key is each XBlock's original usage key
def serialize_block(block_key):
""" Inner method to recursively serialize an XBlock to OLX """
if block_key in serialized_blocks:
return
block = adapters.get_block(block_key)
serialized_blocks[block_key] = XBlockSerializer(block)
if block.has_children:
for child_id in block.children:
serialize_block(child_id)
serialize_block(usage_key)
result = {
"root_block_id": str(usage_key),
"blocks": {},
}
# For each XBlock that we're exporting:
for this_usage_key, data in serialized_blocks.items():
block_data_out = {"olx": data.olx_str}
for asset_file in data.static_files:
if asset_file.url:
url = request.build_absolute_uri(asset_file.url)
else:
# The file is not in GridFS so we don't have a URL for it; serve it
# via our own get_block_exportfs_file API endpoint.
url = request.build_absolute_uri(
'/api/olx-export/v1/xblock-export-file/' + str(this_usage_key) + '/' + asset_file.name,
)
block_data_out.setdefault("static_files", {})[asset_file.name] = {"url": url}
result["blocks"][str(data.orig_block_key)] = block_data_out
return Response(result)
@api_view(['GET'])
@view_auth_classes()
def get_block_exportfs_file(request, usage_key_str, path):
"""
Serve a static file that got added to the XBlock's export_fs during XBlock
serialization. Typically these would be video transcript files.
"""
# Parse the usage key:
try:
usage_key = UsageKey.from_string(usage_key_str)
except (ValueError, InvalidKeyError):
raise ValidationError('Invalid usage key')
if usage_key.block_type in ('course', 'chapter', 'sequential'):
raise ValidationError('Requested XBlock tree is too large - export verticals or their children only')
course_key = usage_key.context_key
if not isinstance(course_key, CourseLocator):
raise ValidationError('Invalid usage key: not a modulestore course')
# Make sure the user has permission on that course
if not has_studio_read_access(request.user, course_key):
raise PermissionDenied("You must be a member of the course team in Studio to export OLX using this API.")
block = adapters.get_block(usage_key)
serialized = XBlockSerializer(block)
static_file = None
for f in serialized.static_files:
if f.name == path:
static_file = f
break
if static_file is None:
raise NotFound
response = HttpResponse(static_file.data, content_type='application/octet-stream')
response['Content-Disposition'] = 'attachment; filename="{}"'.format(path)
return response

View File

@@ -96,6 +96,7 @@ setup(
# consolidate the multiple discussions-related Django apps and
# either put them in the openedx/ dir, or in another repo entirely.
"discussion = lms.djangoapps.discussion.apps:DiscussionConfig",
"olx_rest_api = openedx.core.djangoapps.olx_rest_api.apps:OlxRestApiAppConfig",
"plugins = openedx.core.djangoapps.plugins.apps:PluginsConfig",
"schedules = openedx.core.djangoapps.schedules.apps:SchedulesConfig",
"theming = openedx.core.djangoapps.theming.apps:ThemingConfig",