From 6642cdddae80aefcd275bc90e54a854b0d4551e3 Mon Sep 17 00:00:00 2001 From: Chris Dodge Date: Mon, 20 May 2013 15:37:02 -0400 Subject: [PATCH] support uploading and referencing assets as streams rather than having to read everything into memory first --- cms/djangoapps/contentstore/views/assets.py | 37 +++++++++++++----- common/djangoapps/contentserver/middleware.py | 17 +++++--- .../xmodule/xmodule/contentstore/content.py | 39 ++++++++++++++++++- .../lib/xmodule/xmodule/contentstore/mongo.py | 36 +++++++++++++---- 4 files changed, 104 insertions(+), 25 deletions(-) diff --git a/cms/djangoapps/contentstore/views/assets.py b/cms/djangoapps/contentstore/views/assets.py index c85570fede..cf684ab3ba 100644 --- a/cms/djangoapps/contentstore/views/assets.py +++ b/cms/djangoapps/contentstore/views/assets.py @@ -89,11 +89,11 @@ def asset_index(request, org, course, name): }) -@login_required @ensure_csrf_cookie +@login_required def upload_asset(request, org, course, coursename): ''' - cdodge: this method allows for POST uploading of files into the course asset library, which will + This method allows for POST uploading of files into the course asset library, which will be supported by GridFS in MongoDB. ''' if request.method != 'POST': @@ -118,16 +118,25 @@ def upload_asset(request, org, course, coursename): # compute a 'filename' which is similar to the location formatting, we're using the 'filename' # nomenclature since we're using a FileSystem paradigm here. We're just imposing # the Location string formatting expectations to keep things a bit more consistent - - filename = request.FILES['file'].name - mime_type = request.FILES['file'].content_type - filedata = request.FILES['file'].read() + upload_file = request.FILES['file'] + filename = upload_file.name + mime_type = upload_file.content_type content_loc = StaticContent.compute_location(org, course, filename) - content = StaticContent(content_loc, filename, mime_type, filedata) + + chunked = upload_file.multiple_chunks() + if chunked: + content = StaticContent(content_loc, filename, mime_type, upload_file.chunks()) + else: + content = StaticContent(content_loc, filename, mime_type, upload_file.read()) + + thumbnail_content = None + thumbnail_location = None # first let's see if a thumbnail can be created - (thumbnail_content, thumbnail_location) = contentstore().generate_thumbnail(content) + (thumbnail_content, thumbnail_location) = contentstore().generate_thumbnail(content, + tempfile_path=None if not chunked else + upload_file.temporary_file_path()) # delete cached thumbnail even if one couldn't be created this time (else the old thumbnail will continue to show) del_cached_content(thumbnail_location) @@ -208,7 +217,9 @@ def remove_asset(request, org, course, name): @ensure_csrf_cookie @login_required def import_course(request, org, course, name): - + """ + This method will handle a POST request to upload and import a .tar.gz file into a specified course + """ location = get_location_and_verify_access(request, org, course, name) if request.method == 'POST': @@ -282,6 +293,10 @@ def import_course(request, org, course, name): @ensure_csrf_cookie @login_required def generate_export_course(request, org, course, name): + """ + This method will serialize out a course to a .tar.gz file which contains a XML-based representation of + the course + """ location = get_location_and_verify_access(request, org, course, name) loc = Location(location) @@ -312,7 +327,9 @@ def generate_export_course(request, org, course, name): @ensure_csrf_cookie @login_required def export_course(request, org, course, name): - + """ + This method serves up the 'Export Course' page + """ location = get_location_and_verify_access(request, org, course, name) course_module = modulestore().get_item(location) diff --git a/common/djangoapps/contentserver/middleware.py b/common/djangoapps/contentserver/middleware.py index 7deb0901aa..1f5e65b910 100644 --- a/common/djangoapps/contentserver/middleware.py +++ b/common/djangoapps/contentserver/middleware.py @@ -6,6 +6,7 @@ from xmodule.modulestore import InvalidLocationError from cache_toolbox.core import get_cached_content, set_cached_content from xmodule.exceptions import NotFoundError +import logging class StaticContentServer(object): def process_request(self, request): @@ -24,17 +25,21 @@ class StaticContentServer(object): if content is None: # nope, not in cache, let's fetch from DB try: - content = contentstore().find(loc) + content = contentstore().find(loc, as_stream=True) except NotFoundError: response = HttpResponse() response.status_code = 404 return response - # since we fetched it from DB, let's cache it going forward - set_cached_content(content) + # since we fetched it from DB, let's cache it going forward, but only if it's < 1MB + # this is because I haven't been able to find a means to stream data out of memcached + if content.length is not None: + if content.length < 1048576: + # since we've queried as a stream, let's read in the stream into memory to set in cache + content = content.copy_to_in_mem() + set_cached_content(content) else: - # @todo: we probably want to have 'cache hit' counters so we can - # measure the efficacy of our caches + # NOP here, but we may wish to add a "cache-hit" counter in the future pass # see if the last-modified at hasn't changed, if not return a 302 (Not Modified) @@ -50,7 +55,7 @@ class StaticContentServer(object): if if_modified_since == last_modified_at_str: return HttpResponseNotModified() - response = HttpResponse(content.data, content_type=content.content_type) + response = HttpResponse(content.stream_data(), content_type=content.content_type) response['Last-Modified'] = last_modified_at_str return response diff --git a/common/lib/xmodule/xmodule/contentstore/content.py b/common/lib/xmodule/xmodule/contentstore/content.py index b0d8c357a3..28a78ea8c1 100644 --- a/common/lib/xmodule/xmodule/contentstore/content.py +++ b/common/lib/xmodule/xmodule/contentstore/content.py @@ -14,11 +14,13 @@ from PIL import Image class StaticContent(object): - def __init__(self, loc, name, content_type, data, last_modified_at=None, thumbnail_location=None, import_path=None): + def __init__(self, loc, name, content_type, data, last_modified_at=None, thumbnail_location=None, import_path=None, + length=None): self.location = loc self.name = name # a display string which can be edited, and thus not part of the location which needs to be fixed self.content_type = content_type - self.data = data + self._data = data + self.length = length self.last_modified_at = last_modified_at self.thumbnail_location = Location(thumbnail_location) if thumbnail_location is not None else None # optional information about where this file was imported from. This is needed to support import/export @@ -45,6 +47,10 @@ class StaticContent(object): def get_url_path(self): return StaticContent.get_url_path_from_location(self.location) + @property + def data(self): + return self._data + @staticmethod def get_url_path_from_location(location): if location is not None: @@ -80,6 +86,35 @@ class StaticContent(object): loc = StaticContent.compute_location(course_namespace.org, course_namespace.course, path) return StaticContent.get_url_path_from_location(loc) + def stream_data(self): + yield self._data + + +class StaticContentStream(StaticContent): + def __init__(self, loc, name, content_type, stream, last_modified_at=None, thumbnail_location=None, import_path=None, + length=None): + super(StaticContentStream, self).__init__(loc, name, content_type, None, last_modified_at=last_modified_at, + thumbnail_location=thumbnail_location, import_path=import_path, + length=length) + self._stream = stream + + def stream_data(self): + while True: + chunk = self._stream.read(1024) + if len(chunk) == 0: + break + yield chunk + + def close(self): + self._stream.close() + + def copy_to_in_mem(self): + self._stream.seek(0) + content = StaticContent(self.location, self.name, self.content_type, self._stream.read(), + last_modified_at=self.last_modified_at, thumbnail_location=self.thumbnail_location, + import_path=self.import_path, length=self.length) + return content + class ContentStore(object): ''' diff --git a/common/lib/xmodule/xmodule/contentstore/mongo.py b/common/lib/xmodule/xmodule/contentstore/mongo.py index 8cb4143fd9..482ce585e6 100644 --- a/common/lib/xmodule/xmodule/contentstore/mongo.py +++ b/common/lib/xmodule/xmodule/contentstore/mongo.py @@ -8,7 +8,7 @@ from xmodule.contentstore.content import XASSET_LOCATION_TAG import logging -from .content import StaticContent, ContentStore +from .content import StaticContent, ContentStore, StaticContentStream from xmodule.exceptions import NotFoundError from fs.osfs import OSFS import os @@ -47,20 +47,42 @@ class MongoContentStore(ContentStore): if self.fs.exists({"_id": id}): self.fs.delete(id) - def find(self, location, throw_on_not_found=True): + def find(self, location, throw_on_not_found=True, as_stream=False): id = StaticContent.get_id_from_location(location) try: - with self.fs.get(id) as fp: - return StaticContent(location, fp.displayname, fp.content_type, fp.read(), - fp.uploadDate, - thumbnail_location=fp.thumbnail_location if hasattr(fp, 'thumbnail_location') else None, - import_path=fp.import_path if hasattr(fp, 'import_path') else None) + if as_stream: + fp = self.fs.get(id) + return StaticContentStream(location, fp.displayname, fp.content_type, fp, last_modified_at=fp.uploadDate, + thumbnail_location=fp.thumbnail_location if hasattr(fp, 'thumbnail_location') else None, + import_path=fp.import_path if hasattr(fp, 'import_path') else None, + length=fp.length) + else: + with self.fs.get(id) as fp: + return StaticContent(location, fp.displayname, fp.content_type, fp.read(), last_modified_at=fp.uploadDate, + thumbnail_location=fp.thumbnail_location if hasattr(fp, 'thumbnail_location') else None, + import_path=fp.import_path if hasattr(fp, 'import_path') else None, + length=fp.length) except NoFile: if throw_on_not_found: raise NotFoundError() else: return None + def get_stream(self, location): + id = StaticContent.get_id_from_location(location) + try: + handle = self.fs.get(id) + except NoFile: + raise NotFoundError() + + return handle + + def close_stream(self, handle): + try: + handle.close() + except: + pass + def export(self, location, output_directory): content = self.find(location)