From a84ef1b24369ebc1734a73750e5cf6bc96bbfa33 Mon Sep 17 00:00:00 2001 From: Chris Dodge Date: Thu, 11 Oct 2012 12:10:28 -0400 Subject: [PATCH 1/5] wip: static asset import --- .../management/commands/import.py | 3 +- .../xmodule/xmodule/contentstore/content.py | 48 +++++++++++++++++++ .../xmodule/modulestore/xml_importer.py | 47 +++++++++++++++++- 3 files changed, 96 insertions(+), 2 deletions(-) diff --git a/cms/djangoapps/contentstore/management/commands/import.py b/cms/djangoapps/contentstore/management/commands/import.py index a8ec2c2685..d15abc53bd 100644 --- a/cms/djangoapps/contentstore/management/commands/import.py +++ b/cms/djangoapps/contentstore/management/commands/import.py @@ -5,6 +5,7 @@ from django.core.management.base import BaseCommand, CommandError from xmodule.modulestore.xml_importer import import_from_xml from xmodule.modulestore.django import modulestore +from xmodule.contentstore.django import contentstore unnamed_modules = 0 @@ -26,4 +27,4 @@ class Command(BaseCommand): print "Importing. Data_dir={data}, course_dirs={courses}".format( data=data_dir, courses=course_dirs) - import_from_xml(modulestore('direct'), data_dir, course_dirs, load_error_modules=False) + import_from_xml(modulestore('direct'), data_dir, course_dirs, load_error_modules=False,static_content_store=contentstore()) diff --git a/common/lib/xmodule/xmodule/contentstore/content.py b/common/lib/xmodule/xmodule/contentstore/content.py index 48ef078fd9..22b959cd59 100644 --- a/common/lib/xmodule/xmodule/contentstore/content.py +++ b/common/lib/xmodule/xmodule/contentstore/content.py @@ -5,7 +5,11 @@ XASSET_THUMBNAIL_TAIL_NAME = '.thumbnail.jpg' import os import logging +import StringIO + from xmodule.modulestore import Location +from .django import contentstore +from PIL import Image class StaticContent(object): def __init__(self, loc, name, content_type, data, last_modified_at=None): @@ -24,6 +28,10 @@ class StaticContent(object): @staticmethod def compute_location(org, course, name, revision=None): + # replace some illegal characters + # for example, when importing courseware static assets, typically the source repository has subdirectories. + # right now the content store does not support a hierarchy structure, so collapse those subpaths + name = name.replace('/', '_') return Location([XASSET_LOCATION_TAG, org, course, 'asset', name, revision]) def get_id(self): @@ -66,3 +74,43 @@ class ContentStore(object): def get_all_content_for_course(self, location): raise NotImplementedError + + def generate_thumbnail(self, content): + thumbnail_content = None + # if we're uploading an image, then let's generate a thumbnail so that we can + # serve it up when needed without having to rescale on the fly + if content.content_type is not None and content.content_type.split('/')[0] == 'image': + try: + # use PIL to do the thumbnail generation (http://www.pythonware.com/products/pil/) + # My understanding is that PIL will maintain aspect ratios while restricting + # the max-height/width to be whatever you pass in as 'size' + # @todo: move the thumbnail size to a configuration setting?!? + im = Image.open(StringIO.StringIO(content.data)) + + # I've seen some exceptions from the PIL library when trying to save palletted + # PNG files to JPEG. Per the google-universe, they suggest converting to RGB first. + im = im.convert('RGB') + size = 128, 128 + im.thumbnail(size, Image.ANTIALIAS) + thumbnail_file = StringIO.StringIO() + im.save(thumbnail_file, 'JPEG') + thumbnail_file.seek(0) + + # use a naming convention to associate originals with the thumbnail + thumbnail_name = content.generate_thumbnail_name() + + # then just store this thumbnail as any other piece of content + thumbnail_file_location = StaticContent.compute_location(content.location.org, content.location.course, + thumbnail_name) + thumbnail_content = StaticContent(thumbnail_file_location, thumbnail_name, + 'image/jpeg', thumbnail_file) + + contentstore().save(thumbnail_content) + except: + raise + + return thumbnail_content + + + + diff --git a/common/lib/xmodule/xmodule/modulestore/xml_importer.py b/common/lib/xmodule/xmodule/modulestore/xml_importer.py index 1522288896..0a8e986c14 100644 --- a/common/lib/xmodule/xmodule/modulestore/xml_importer.py +++ b/common/lib/xmodule/xmodule/modulestore/xml_importer.py @@ -1,14 +1,18 @@ import logging +import os +import mimetypes from .xml import XMLModuleStore from .exceptions import DuplicateItemError +from xmodule.modulestore import Location +from xmodule.contentstore.content import StaticContent log = logging.getLogger(__name__) def import_from_xml(store, data_dir, course_dirs=None, default_class='xmodule.raw_module.RawDescriptor', - load_error_modules=True): + load_error_modules=True, static_content_store=None): """ Import the specified xml data_dir into the "store" modulestore, using org and course as the location org and course. @@ -23,9 +27,16 @@ def import_from_xml(store, data_dir, course_dirs=None, course_dirs=course_dirs, load_error_modules=load_error_modules, ) + for course_id in module_store.modules.keys(): + course_data_dir = None + course_loc = None + for module in module_store.modules[course_id].itervalues(): + if module.category == 'course': + course_loc = module.location + if 'data' in module.definition: store.update_item(module.location, module.definition['data']) if 'children' in module.definition: @@ -33,5 +44,39 @@ def import_from_xml(store, data_dir, course_dirs=None, # NOTE: It's important to use own_metadata here to avoid writing # inherited metadata everywhere. store.update_metadata(module.location, dict(module.own_metadata)) + course_data_dir = module.metadata['data_dir'] + + if static_content_store is not None: + ''' + now import all static assets + ''' + static_dir = '{0}/{1}/static/'.format(data_dir, course_data_dir) + + for dirname, dirnames, filenames in os.walk(static_dir): + for filename in filenames: + + try: + content_path = os.path.join(dirname, filename) + fullname_with_subpath = content_path.replace(static_dir, '') # strip away leading path from the name + content_loc = StaticContent.compute_location(course_loc.org, course_loc.course, fullname_with_subpath) + mime_type = mimetypes.guess_type(filename)[0] + + print 'importing static asset {0} of mime-type {1} from path {2}'.format(content_loc, + mime_type, content_path) + + f = open(content_path, 'rb') + data = f.read() + f.close() + + content = StaticContent(content_loc, filename, mime_type, data) + + static_content_store.save(content) + + # this will be a NOP if content is not an image + thumbnail_content = static_content_store.generate_thumbnail(content) + + except: + raise + return module_store From ab1decd97b8bd3e7425f28486d61e46d07016586 Mon Sep 17 00:00:00 2001 From: Chris Dodge Date: Thu, 11 Oct 2012 12:51:33 -0400 Subject: [PATCH 2/5] import static assets. Also consolidate asset import/thumbnail generation into a single code base --- cms/djangoapps/contentstore/views.py | 70 +++---------------- .../xmodule/xmodule/contentstore/content.py | 4 +- .../xmodule/modulestore/xml_importer.py | 10 +-- 3 files changed, 19 insertions(+), 65 deletions(-) diff --git a/cms/djangoapps/contentstore/views.py b/cms/djangoapps/contentstore/views.py index 26ed3c53d2..684f419936 100644 --- a/cms/djangoapps/contentstore/views.py +++ b/cms/djangoapps/contentstore/views.py @@ -632,74 +632,26 @@ def upload_asset(request, org, course, coursename): # nomenclature since we're using a FileSystem paradigm here. We're just imposing # the Location string formatting expectations to keep things a bit more consistent - name = request.FILES['file'].name + filename = request.FILES['file'].name mime_type = request.FILES['file'].content_type filedata = request.FILES['file'].read() - thumbnail_file_location = None + content_loc = StaticContent.compute_location(org, course, filename) + content = StaticContent(content_loc, filename, mime_type, filedata) - # if the upload asset is an image, we can generate a thumbnail from it - # let's do so now, so that we have the thumbnail location which we need - # so that the asset can point to it - if mime_type.split('/')[0] == 'image': - try: - # not sure if this is necessary, but let's rewind the stream just in case - request.FILES['file'].seek(0) + # first let's save a thumbnail so we can get back a thumbnail location + thumbnail_content = contentstore().generate_thumbnail(content) - # use PIL to do the thumbnail generation (http://www.pythonware.com/products/pil/) - # My understanding is that PIL will maintain aspect ratios while restricting - # the max-height/width to be whatever you pass in as 'size' - # @todo: move the thumbnail size to a configuration setting?!? - im = Image.open(request.FILES['file']) + if thumbnail_content is not None: + content.thumbnail_location = thumbnail_content.location + del_cached_content(thumbnail_content.location) - # I've seen some exceptions from the PIL library when trying to save palletted - # PNG files to JPEG. Per the google-universe, they suggest converting to RGB first. - im = im.convert('RGB') - size = 128, 128 - im.thumbnail(size, Image.ANTIALIAS) - thumbnail_file = StringIO.StringIO() - im.save(thumbnail_file, 'JPEG') - thumbnail_file.seek(0) - - # use a naming convention to associate originals with the thumbnail - thumbnail_name = StaticContent.generate_thumbnail_name(name) - - # then just store this thumbnail as any other piece of content - thumbnail_file_location = StaticContent.compute_location(org, course, - thumbnail_name, is_thumbnail=True) - thumbnail_content = StaticContent(thumbnail_file_location, thumbnail_name, - 'image/jpeg', thumbnail_file) - contentstore().save(thumbnail_content) - - # remove any cached content at this location, as thumbnails are treated just like any - # other bit of static content - del_cached_content(thumbnail_content.location) - - # not sure if this is necessary, but let's rewind the stream just in case - request.FILES['file'].seek(0) - except: - # catch, log, and continue as thumbnails are not a hard requirement - logging.error('Failed to generate thumbnail for {0}. Continuing...'.format(name)) - thumbnail_file_location = None - - - file_location = StaticContent.compute_location(org, course, name) - - # create a StaticContent entity and point to the thumbnail - content = StaticContent(file_location, name, mime_type, filedata, thumbnail_location = thumbnail_file_location) - - # first commit to the DB + #then commit the content contentstore().save(content) - - # then remove the cache so we're not serving up stale content - # NOTE: we're not re-populating the cache here as the DB owns the last-modified timestamp - # which is used when serving up static content. This integrity is needed for - # browser-side caching support. We *could* re-fetch the saved content so that we have the - # timestamp populated, but we might as well wait for the first real request to come in - # to re-populate the cache. del_cached_content(content.location) + response = HttpResponse('Upload completed') - response['asset_url'] = StaticContent.get_url_path_from_location(file_location) + response['asset_url'] = StaticContent.get_url_path_from_location(content.location) return response ''' diff --git a/common/lib/xmodule/xmodule/contentstore/content.py b/common/lib/xmodule/xmodule/contentstore/content.py index 953ecabaf9..7e70ced08e 100644 --- a/common/lib/xmodule/xmodule/contentstore/content.py +++ b/common/lib/xmodule/xmodule/contentstore/content.py @@ -118,11 +118,11 @@ class ContentStore(object): thumbnail_file.seek(0) # use a naming convention to associate originals with the thumbnail - thumbnail_name = content.generate_thumbnail_name() + thumbnail_name = StaticContent.generate_thumbnail_name(content.location.name) # then just store this thumbnail as any other piece of content thumbnail_file_location = StaticContent.compute_location(content.location.org, content.location.course, - thumbnail_name) + thumbnail_name, is_thumbnail = True) thumbnail_content = StaticContent(thumbnail_file_location, thumbnail_name, 'image/jpeg', thumbnail_file) diff --git a/common/lib/xmodule/xmodule/modulestore/xml_importer.py b/common/lib/xmodule/xmodule/modulestore/xml_importer.py index 0a8e986c14..1e52e7e2bd 100644 --- a/common/lib/xmodule/xmodule/modulestore/xml_importer.py +++ b/common/lib/xmodule/xmodule/modulestore/xml_importer.py @@ -70,13 +70,15 @@ def import_from_xml(store, data_dir, course_dirs=None, content = StaticContent(content_loc, filename, mime_type, data) - static_content_store.save(content) - - # this will be a NOP if content is not an image + # first let's save a thumbnail so we can get back a thumbnail location thumbnail_content = static_content_store.generate_thumbnail(content) + if thumbnail_content is not None: + content.thumbnail_location = thumbnail_content.location + + #then commit the content + static_content_store.save(content) except: raise - return module_store From a521ea239fbee8e8468ccb6cc0d2a951c8a55933 Mon Sep 17 00:00:00 2001 From: Chris Dodge Date: Thu, 11 Oct 2012 13:41:16 -0400 Subject: [PATCH 3/5] baseline working for importing courseware and static assets --- cms/envs/dev.py | 1 + .../lib/xmodule/xmodule/contentstore/mongo.py | 2 +- .../xmodule/modulestore/xml_importer.py | 115 ++++++++++++------ 3 files changed, 77 insertions(+), 41 deletions(-) diff --git a/cms/envs/dev.py b/cms/envs/dev.py index e00c52a13e..faf7dc8886 100644 --- a/cms/envs/dev.py +++ b/cms/envs/dev.py @@ -12,6 +12,7 @@ TEMPLATE_DEBUG = DEBUG LOGGING = get_logger_config(ENV_ROOT / "log", logging_env="dev", tracking_filename="tracking.log", + dev_env = True, debug=True) modulestore_options = { diff --git a/common/lib/xmodule/xmodule/contentstore/mongo.py b/common/lib/xmodule/xmodule/contentstore/mongo.py index 9a99006e61..bd8e9dc3ca 100644 --- a/common/lib/xmodule/xmodule/contentstore/mongo.py +++ b/common/lib/xmodule/xmodule/contentstore/mongo.py @@ -18,7 +18,7 @@ class MongoContentStore(ContentStore): logging.debug( 'Using MongoDB for static content serving at host={0} db={1}'.format(host,db)) _db = Connection(host=host, port=port, **kwargs)[db] - if self.user is not None and self.password is not None: + if user is not None and password is not None: _db.authenticate(user, password) self.fs = gridfs.GridFS(_db) diff --git a/common/lib/xmodule/xmodule/modulestore/xml_importer.py b/common/lib/xmodule/xmodule/modulestore/xml_importer.py index 1e52e7e2bd..80ad9bf8bb 100644 --- a/common/lib/xmodule/xmodule/modulestore/xml_importer.py +++ b/common/lib/xmodule/xmodule/modulestore/xml_importer.py @@ -5,10 +5,67 @@ import mimetypes from .xml import XMLModuleStore from .exceptions import DuplicateItemError from xmodule.modulestore import Location -from xmodule.contentstore.content import StaticContent +from xmodule.contentstore.content import StaticContent, XASSET_SRCREF_PREFIX log = logging.getLogger(__name__) +def import_static_content(modules, data_dir, static_content_store): + + remap_dict = {} + + course_data_dir = None + course_loc = None + + # quick scan to find the course module and pull out the data_dir and location + # maybe there an easier way to look this up?!? + + for module in modules.itervalues(): + if module.category == 'course': + course_loc = module.location + course_data_dir = module.metadata['data_dir'] + + if course_data_dir is None or course_loc is None: + return remap_dict + + ''' + now import all static assets + ''' + static_dir = '{0}/{1}/static/'.format(data_dir, course_data_dir) + + for dirname, dirnames, filenames in os.walk(static_dir): + for filename in filenames: + + try: + content_path = os.path.join(dirname, filename) + fullname_with_subpath = content_path.replace(static_dir, '') # strip away leading path from the name + content_loc = StaticContent.compute_location(course_loc.org, course_loc.course, fullname_with_subpath) + mime_type = mimetypes.guess_type(filename)[0] + + print 'importing static asset {0} of mime-type {1} from path {2}'.format(content_loc, + mime_type, content_path) + + f = open(content_path, 'rb') + data = f.read() + f.close() + + content = StaticContent(content_loc, filename, mime_type, data) + + # first let's save a thumbnail so we can get back a thumbnail location + thumbnail_content = static_content_store.generate_thumbnail(content) + + if thumbnail_content is not None: + content.thumbnail_location = thumbnail_content.location + + #then commit the content + static_content_store.save(content) + + #store the remapping information which will be needed to subsitute in the module data + remap_dict[fullname_with_subpath] = content_loc.name + + except: + raise + + return remap_dict def import_from_xml(store, data_dir, course_dirs=None, default_class='xmodule.raw_module.RawDescriptor', @@ -29,56 +86,34 @@ def import_from_xml(store, data_dir, course_dirs=None, ) for course_id in module_store.modules.keys(): - course_data_dir = None - course_loc = None + + remap_dict = {} + if static_content_store is not None: + remap_dict = import_static_content(module_store.modules[course_id], data_dir, static_content_store) for module in module_store.modules[course_id].itervalues(): if module.category == 'course': course_loc = module.location + course_data_dir = module.metadata['data_dir'] if 'data' in module.definition: - store.update_item(module.location, module.definition['data']) + module_data = module.definition['data'] + + # cdodge: update any references to the static content paths + # This is a bit brute force - simple search/replace - but it's unlikely that such references to '/static/....' + # would occur naturally (in the wild) + if '/static/' in module_data: + for subkey in remap_dict.keys(): + module_data = module_data.replace('/static/' + subkey, 'xasset:' + remap_dict[subkey]) + logging.debug("was {0} now {1}".format(module.definition['data'], module_data)) + + store.update_item(module.location, module_data) if 'children' in module.definition: store.update_children(module.location, module.definition['children']) # NOTE: It's important to use own_metadata here to avoid writing # inherited metadata everywhere. store.update_metadata(module.location, dict(module.own_metadata)) - course_data_dir = module.metadata['data_dir'] - - if static_content_store is not None: - ''' - now import all static assets - ''' - static_dir = '{0}/{1}/static/'.format(data_dir, course_data_dir) - - for dirname, dirnames, filenames in os.walk(static_dir): - for filename in filenames: - - try: - content_path = os.path.join(dirname, filename) - fullname_with_subpath = content_path.replace(static_dir, '') # strip away leading path from the name - content_loc = StaticContent.compute_location(course_loc.org, course_loc.course, fullname_with_subpath) - mime_type = mimetypes.guess_type(filename)[0] - - print 'importing static asset {0} of mime-type {1} from path {2}'.format(content_loc, - mime_type, content_path) - - f = open(content_path, 'rb') - data = f.read() - f.close() - - content = StaticContent(content_loc, filename, mime_type, data) - - # first let's save a thumbnail so we can get back a thumbnail location - thumbnail_content = static_content_store.generate_thumbnail(content) - - if thumbnail_content is not None: - content.thumbnail_location = thumbnail_content.location - - #then commit the content - static_content_store.save(content) - except: - raise + return module_store From bf392e360a29aa503227ad006c19d25ac43907b6 Mon Sep 17 00:00:00 2001 From: Chris Dodge Date: Thu, 11 Oct 2012 13:52:15 -0400 Subject: [PATCH 4/5] on import set course metadata to hide the progress tab since we don't yet support grading policies --- common/lib/xmodule/xmodule/modulestore/xml_importer.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/common/lib/xmodule/xmodule/modulestore/xml_importer.py b/common/lib/xmodule/xmodule/modulestore/xml_importer.py index 80ad9bf8bb..1ab283e7f6 100644 --- a/common/lib/xmodule/xmodule/modulestore/xml_importer.py +++ b/common/lib/xmodule/xmodule/modulestore/xml_importer.py @@ -94,8 +94,8 @@ def import_from_xml(store, data_dir, course_dirs=None, for module in module_store.modules[course_id].itervalues(): if module.category == 'course': - course_loc = module.location - course_data_dir = module.metadata['data_dir'] + # HACK: for now we don't support progress tabs. There's a special metadata configuration setting for this. + module.metadata['hide_progress_tab'] = True if 'data' in module.definition: module_data = module.definition['data'] @@ -106,11 +106,13 @@ def import_from_xml(store, data_dir, course_dirs=None, if '/static/' in module_data: for subkey in remap_dict.keys(): module_data = module_data.replace('/static/' + subkey, 'xasset:' + remap_dict[subkey]) - logging.debug("was {0} now {1}".format(module.definition['data'], module_data)) - + store.update_item(module.location, module_data) + + if 'children' in module.definition: store.update_children(module.location, module.definition['children']) + # NOTE: It's important to use own_metadata here to avoid writing # inherited metadata everywhere. store.update_metadata(module.location, dict(module.own_metadata)) From 82abdd07dc4600f248fb2746d93dc8b19a966d53 Mon Sep 17 00:00:00 2001 From: Chris Dodge Date: Thu, 11 Oct 2012 16:23:28 -0400 Subject: [PATCH 5/5] respond to some of Cale's comments --- .../xmodule/modulestore/xml_importer.py | 22 ++++++++++++------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/common/lib/xmodule/xmodule/modulestore/xml_importer.py b/common/lib/xmodule/xmodule/modulestore/xml_importer.py index 1ab283e7f6..7adbd0aaa6 100644 --- a/common/lib/xmodule/xmodule/modulestore/xml_importer.py +++ b/common/lib/xmodule/xmodule/modulestore/xml_importer.py @@ -27,9 +27,8 @@ def import_static_content(modules, data_dir, static_content_store): if course_data_dir is None or course_loc is None: return remap_dict - ''' - now import all static assets - ''' + + # now import all static assets static_dir = '{0}/{1}/static/'.format(data_dir, course_data_dir) for dirname, dirnames, filenames in os.walk(static_dir): @@ -85,8 +84,10 @@ def import_from_xml(store, data_dir, course_dirs=None, load_error_modules=load_error_modules, ) + # NOTE: the XmlModuleStore does not implement get_items() which would be a preferable means + # to enumerate the entire collection of course modules. It will be left as a TBD to implement that + # method on XmlModuleStore. for course_id in module_store.modules.keys(): - remap_dict = {} if static_content_store is not None: remap_dict = import_static_content(module_store.modules[course_id], data_dir, static_content_store) @@ -103,10 +104,15 @@ def import_from_xml(store, data_dir, course_dirs=None, # cdodge: update any references to the static content paths # This is a bit brute force - simple search/replace - but it's unlikely that such references to '/static/....' # would occur naturally (in the wild) - if '/static/' in module_data: - for subkey in remap_dict.keys(): - module_data = module_data.replace('/static/' + subkey, 'xasset:' + remap_dict[subkey]) - + # @TODO, sorry a bit of technical debt here. There are some helper methods in xmodule_modifiers.py and static_replace.py which could + # better do the url replace on the html rendering side rather than on the ingest side + try: + if '/static/' in module_data: + for subkey in remap_dict.keys(): + module_data = module_data.replace('/static/' + subkey, 'xasset:' + remap_dict[subkey]) + except: + pass # part of the techincal debt is that module_data might not be a string (e.g. ABTest) + store.update_item(module.location, module_data)