From d991595ecb8e75382a43220d228d1d628101fd40 Mon Sep 17 00:00:00 2001 From: Julian Arni Date: Fri, 16 Aug 2013 13:57:02 -0400 Subject: [PATCH] Split import-export into new file --- .../contentstore/tests/test_assets.py | 72 ---- .../contentstore/tests/test_import_export.py | 85 +++++ cms/djangoapps/contentstore/views/__init__.py | 1 + cms/djangoapps/contentstore/views/assets.py | 248 +------------- .../contentstore/views/import_export.py | 309 ++++++++++++++++++ 5 files changed, 396 insertions(+), 319 deletions(-) create mode 100644 cms/djangoapps/contentstore/tests/test_import_export.py create mode 100644 cms/djangoapps/contentstore/views/import_export.py diff --git a/cms/djangoapps/contentstore/tests/test_assets.py b/cms/djangoapps/contentstore/tests/test_assets.py index 9add306d1d..9bde503b4d 100644 --- a/cms/djangoapps/contentstore/tests/test_assets.py +++ b/cms/djangoapps/contentstore/tests/test_assets.py @@ -75,78 +75,6 @@ class UploadTestCase(CourseTestCase): resp = self.client.get(self.url) self.assertEquals(resp.status_code, 405) -class ImportTestCase(CourseTestCase): - """ - Unit tests for importing a course - """ - - def setUp(self): - super(ImportTestCase, self).setUp() - self.url = reverse("import_course", kwargs={ - 'org': self.course.location.org, - 'course': self.course.location.course, - 'name': self.course.location.name, - }) - self.content_dir = tempfile.mkdtemp() - - def touch(name): - """ Equivalent to shell's 'touch'""" - with file(name, 'a'): - os.utime(name, None) - - # Create tar test files - good_dir = tempfile.mkdtemp(dir=self.content_dir) - os.makedirs(os.path.join(good_dir, "course")) - with open(os.path.join(good_dir, "course.xml") , "w+") as f: - f.write('') - - with open(os.path.join(good_dir, "course", "2013_Spring.xml"), "w+") as f: - f.write('') - - - self.good_tar = os.path.join(self.content_dir, "good.tar.gz") - with tarfile.open(self.good_tar, "w:gz") as gtar: - gtar.add(good_dir) - - bad_dir = tempfile.mkdtemp(dir=self.content_dir) - touch(os.path.join(bad_dir, "bad.xml")) - self.bad_tar = os.path.join(self.content_dir, "bad.tar.gz") - with tarfile.open(self.bad_tar, "w:gz") as btar: - btar.add(bad_dir) - - def tearDown(self): - shutil.rmtree(self.content_dir) - - def test_no_coursexml(self): - """ - Check that the response for a tar.gz import without a course.xml is - correct. - """ - with open(self.bad_tar) as btar: - resp = self.client.post( - self.url, - { - "name": self.bad_tar, - "course-data": [btar] - }) - self.assertEquals(resp.status_code, 415) - - def test_with_coursexml(self): - """ - Check that the response for a tar.gz import with a course.xml is - correct. - """ - with open(self.good_tar) as gtar: - resp = self.client.post( - self.url, - { - "name": self.good_tar, - "course-data": [gtar] - }) - self.assert2XX(resp.status_code) - - - class AssetsToJsonTestCase(TestCase): """ diff --git a/cms/djangoapps/contentstore/tests/test_import_export.py b/cms/djangoapps/contentstore/tests/test_import_export.py new file mode 100644 index 0000000000..d09f3a9715 --- /dev/null +++ b/cms/djangoapps/contentstore/tests/test_import_export.py @@ -0,0 +1,85 @@ +""" +Unit tests for course import and export +""" +import os +import shutil +import tarfile +import tempfile +from .utils import CourseTestCase +from django.core.urlresolvers import reverse + +from xmodule.modulestore import Location +from contentstore.views import import_export + + +class ImportTestCase(CourseTestCase): + """ + Unit tests for importing a course + """ + + def setUp(self): + super(ImportTestCase, self).setUp() + self.url = reverse("import_course", kwargs={ + 'org': self.course.location.org, + 'course': self.course.location.course, + 'name': self.course.location.name, + }) + self.content_dir = tempfile.mkdtemp() + + def touch(name): + """ Equivalent to shell's 'touch'""" + with file(name, 'a'): + os.utime(name, None) + + # Create tar test files ----------------------------------------------- + # OK course: + good_dir = tempfile.mkdtemp(dir=self.content_dir) + os.makedirs(os.path.join(good_dir, "course")) + with open(os.path.join(good_dir, "course.xml") , "w+") as f: + f.write('') + + with open(os.path.join(good_dir, "course", "2013_Spring.xml"), "w+") as f: + f.write('') + + self.good_tar = os.path.join(self.content_dir, "good.tar.gz") + with tarfile.open(self.good_tar, "w:gz") as gtar: + gtar.add(good_dir) + + # Bad course (no 'course.xml' file): + bad_dir = tempfile.mkdtemp(dir=self.content_dir) + touch(os.path.join(bad_dir, "bad.xml")) + self.bad_tar = os.path.join(self.content_dir, "bad.tar.gz") + with tarfile.open(self.bad_tar, "w:gz") as btar: + btar.add(bad_dir) + + def tearDown(self): + shutil.rmtree(self.content_dir) + + def test_no_coursexml(self): + """ + Check that the response for a tar.gz import without a course.xml is + correct. + """ + with open(self.bad_tar) as btar: + resp = self.client.post( + self.url, + { + "name": self.bad_tar, + "course-data": [btar] + }) + self.assertEquals(resp.status_code, 415) + + def test_with_coursexml(self): + """ + Check that the response for a tar.gz import with a course.xml is + correct. + """ + with open(self.good_tar) as gtar: + resp = self.client.post( + self.url, + { + "name": self.good_tar, + "course-data": [gtar] + }) + self.assert2XX(resp.status_code) + diff --git a/cms/djangoapps/contentstore/views/__init__.py b/cms/djangoapps/contentstore/views/__init__.py index 197c54ff36..10f6fb79a7 100644 --- a/cms/djangoapps/contentstore/views/__init__.py +++ b/cms/djangoapps/contentstore/views/__init__.py @@ -10,6 +10,7 @@ from .component import * from .course import * from .error import * from .item import * +from .import_export import * from .preview import * from .public import * from .user import * diff --git a/cms/djangoapps/contentstore/views/assets.py b/cms/djangoapps/contentstore/views/assets.py index 2ca55148fd..23f855d33c 100644 --- a/cms/djangoapps/contentstore/views/assets.py +++ b/cms/djangoapps/contentstore/views/assets.py @@ -36,8 +36,7 @@ from .access import get_location_and_verify_access from util.json_request import JsonResponse -__all__ = ['asset_index', 'upload_asset', 'import_course', - 'generate_export_course', 'export_course'] +__all__ = ['asset_index', 'upload_asset'] MAX_UP_LENGTH = 20000352 # Max chunk size @@ -265,248 +264,3 @@ def remove_asset(request, org, course, name): return HttpResponse() -@ensure_csrf_cookie -@require_http_methods(("GET", "POST", "PUT")) -@login_required -def import_course(request, org, course, name): - """ - This method will handle a POST request to upload and import a .tar.gz file - into a specified course - """ - location = get_location_and_verify_access(request, org, course, name) - - if request.method == 'POST': - - data_root = path(settings.GITHUB_REPO_ROOT) - course_subdir = "{0}-{1}-{2}".format(org, course, name) - course_dir = data_root / course_subdir - - filename = request.FILES['course-data'].name - if not filename.endswith('.tar.gz'): - return JsonResponse( - { 'ErrMsg': 'We only support uploading a .tar.gz file.' }, - status=415 - ) - temp_filepath = course_dir / filename - - if not course_dir.isdir(): - os.mkdir(course_dir) - - logging.debug('importing course to {0}'.format(temp_filepath)) - - # Get upload chunks byte ranges - try: - matches = CONTENT_RE.search(request.META["HTTP_CONTENT_RANGE"]) - content_range = matches.groupdict() - except KeyError: # Single chunk - no Content-Range header - content_range = {'start': 0, 'stop': 9, 'end': 10} - - # stream out the uploaded files in chunks to disk - if int(content_range['start']) == 0: - mode = "wb+" - else: - mode = "ab+" - size = os.path.getsize(temp_filepath) - # Check to make sure we haven't missed a chunk - # This shouldn't happen, even if different instances are handling - # the same session, but it's always better to catch errors earlier. - if size != int(content_range['start']): - log.warning( - "Reported range %s does not match size downloaded so far %s", - size, - content_range['start'] - ) - return JsonResponse( - { 'ErrMsg': 'File upload corrupted. Please try again' }, - status=409 - ) - - - with open(temp_filepath, mode) as temp_file: - for chunk in request.FILES['course-data'].chunks(): - temp_file.write(chunk) - - size = os.path.getsize(temp_filepath) - - if int(content_range['stop']) != int(content_range['end']) - 1: - # More chunks coming - return JsonResponse({ - "files": [{ - "name": filename, - "size": size, - "deleteUrl": "", - "deleteType": "", - "url": reverse('import_course', kwargs={ - 'org': location.org, - 'course': location.course, - 'name': location.name - }), - "thumbnailUrl": "" - }] - }) - - else: #This was the last chunk. - - # 'Lock' with status info. - lock_filepath = data_root / (filename + ".lock") - - with open(lock_filepath, 'w+') as lf: - lf.write("Extracting") - - tar_file = tarfile.open(temp_filepath) - tar_file.extractall(course_dir + '/') - - with open(lock_filepath, 'w+') as lf: - lf.write("Verifying") - - # find the 'course.xml' file - dirpath = None - - coursexmls = ((d, f) for d, _, f in os.walk(course_dir) - if f.count('course.xml') > 0) - - try: - (dirpath, fname) = coursexmls.next() - except StopIteration: - return JsonResponse( - {'ErrMsg': 'Could not find the course.xml file in the package.' }, - status=415 - ) - - logging.debug('found course.xml at {0}'.format(dirpath)) - - if dirpath != course_dir: - for fname in os.listdir(dirpath): - shutil.move(dirpath / fname, course_dir) - - _module_store, course_items = import_from_xml( - modulestore('direct'), - settings.GITHUB_REPO_ROOT, - [course_subdir], - load_error_modules=False, - static_content_store=contentstore(), - target_location_namespace=location, - draft_store=modulestore() - ) - - # we can blow this away when we're done importing. - shutil.rmtree(course_dir) - - logging.debug('new course at {0}'.format(course_items[0].location)) - - with open(lock_filepath, 'w') as lf: - lf.write("Updating course") - - create_all_course_groups(request.user, course_items[0].location) - logging.debug('created all course groups at {0}'.format(course_items[0].location)) - - os.remove(lock_filepath) - - return JsonResponse({'Status': 'OK'}) - else: - course_module = modulestore().get_item(location) - - return render_to_response('import.html', { - 'context_course': course_module, - 'successful_import_redirect_url': reverse('course_index', kwargs={ - 'org': location.org, - 'course': location.course, - 'name': location.name, - }) - }) - - -@ensure_csrf_cookie -@login_required -def generate_export_course(request, org, course, name): - """ - This method will serialize out a course to a .tar.gz file which contains a - XML-based representation of the course - """ - location = get_location_and_verify_access(request, org, course, name) - course_module = modulestore().get_instance(location.course_id, location) - loc = Location(location) - export_file = NamedTemporaryFile(prefix=name + '.', suffix=".tar.gz") - - root_dir = path(mkdtemp()) - - try: - export_to_xml(modulestore('direct'), contentstore(), loc, root_dir, name, modulestore()) - except SerializationError, e: - logging.exception('There was an error exporting course {0}. {1}'.format(course_module.location, unicode(e))) - - unit = None - failed_item = None - parent = None - try: - failed_item = modulestore().get_instance(course_module.location.course_id, e.location) - parent_locs = modulestore().get_parent_locations(failed_item.location, course_module.location.course_id) - - if len(parent_locs) > 0: - parent = modulestore().get_item(parent_locs[0]) - if parent.location.category == 'vertical': - unit = parent - except: - # if we have a nested exception, then we'll show the more generic error message - pass - - return render_to_response('export.html', { - 'context_course': course_module, - 'successful_import_redirect_url': '', - 'in_err': True, - 'raw_err_msg': str(e), - 'failed_module': failed_item, - 'unit': unit, - 'edit_unit_url': reverse('edit_unit', kwargs={ - 'location': parent.location - }) if parent else '', - 'course_home_url': reverse('course_index', kwargs={ - 'org': org, - 'course': course, - 'name': name - }) - }) - except Exception, e: - logging.exception('There was an error exporting course {0}. {1}'.format(course_module.location, unicode(e))) - return render_to_response('export.html', { - 'context_course': course_module, - 'successful_import_redirect_url': '', - 'in_err': True, - 'unit': None, - 'raw_err_msg': str(e), - 'course_home_url': reverse('course_index', kwargs={ - 'org': org, - 'course': course, - 'name': name - }) - }) - - logging.debug('tar file being generated at {0}'.format(export_file.name)) - tar_file = tarfile.open(name=export_file.name, mode='w:gz') - tar_file.add(root_dir / name, arcname=name) - tar_file.close() - - # remove temp dir - shutil.rmtree(root_dir / name) - - wrapper = FileWrapper(export_file) - response = HttpResponse(wrapper, content_type='application/x-tgz') - response['Content-Disposition'] = 'attachment; filename=%s' % os.path.basename(export_file.name) - response['Content-Length'] = os.path.getsize(export_file.name) - return response - - -@ensure_csrf_cookie -@login_required -def export_course(request, org, course, name): - """ - This method serves up the 'Export Course' page - """ - location = get_location_and_verify_access(request, org, course, name) - - course_module = modulestore().get_item(location) - - return render_to_response('export.html', { - 'context_course': course_module, - 'successful_import_redirect_url': '' - }) diff --git a/cms/djangoapps/contentstore/views/import_export.py b/cms/djangoapps/contentstore/views/import_export.py new file mode 100644 index 0000000000..83bcdd402f --- /dev/null +++ b/cms/djangoapps/contentstore/views/import_export.py @@ -0,0 +1,309 @@ +""" +These views handle all actions in Studio related to import and exporting of courses +""" +import logging +import os +import tarfile +import shutil +import re +from tempfile import mkdtemp +from path import path + +from django.conf import settings +from django.http import HttpResponse +from django.contrib.auth.decorators import login_required +from django_future.csrf import ensure_csrf_cookie +from django.core.urlresolvers import reverse +from django.core.servers.basehttp import FileWrapper +from django.core.files.temp import NamedTemporaryFile +from django.views.decorators.http import require_http_methods + +from mitxmako.shortcuts import render_to_response +from cache_toolbox.core import del_cached_content +from auth.authz import create_all_course_groups + +from xmodule.modulestore.xml_importer import import_from_xml +from xmodule.contentstore.django import contentstore +from xmodule.modulestore.xml_exporter import export_to_xml +from xmodule.modulestore.django import modulestore +from xmodule.modulestore import Location +from xmodule.exceptions import SerializationError + +from .access import get_location_and_verify_access +from util.json_request import JsonResponse + + +__all__ = ['import_course', 'generate_export_course', 'export_course'] + +log = logging.getLogger(__name__) + + +MAX_UP_LENGTH = 20000352 # Max chunk size for uploads + +# Regex to capture Content-Range header ranges. +CONTENT_RE = re.compile(r"(?P\d{1,11})-(?P\d{1,11})/(?P\d{1,11})") + + +@ensure_csrf_cookie +@require_http_methods(("GET", "POST", "PUT")) +@login_required +def import_course(request, org, course, name): + """ + This method will handle a POST request to upload and import a .tar.gz file + into a specified course + """ + location = get_location_and_verify_access(request, org, course, name) + + if request.method == 'POST': + + data_root = path(settings.GITHUB_REPO_ROOT) + course_subdir = "{0}-{1}-{2}".format(org, course, name) + course_dir = data_root / course_subdir + + filename = request.FILES['course-data'].name + if not filename.endswith('.tar.gz'): + return JsonResponse( + { 'ErrMsg': 'We only support uploading a .tar.gz file.' }, + status=415 + ) + temp_filepath = course_dir / filename + + if not course_dir.isdir(): + os.mkdir(course_dir) + + logging.debug('importing course to {0}'.format(temp_filepath)) + + # Get upload chunks byte ranges + try: + matches = CONTENT_RE.search(request.META["HTTP_CONTENT_RANGE"]) + content_range = matches.groupdict() + except KeyError: # Single chunk - no Content-Range header + content_range = {'start': 0, 'stop': 9, 'end': 10} + + # stream out the uploaded files in chunks to disk + if int(content_range['start']) == 0: + mode = "wb+" + else: + mode = "ab+" + size = os.path.getsize(temp_filepath) + # Check to make sure we haven't missed a chunk + # This shouldn't happen, even if different instances are handling + # the same session, but it's always better to catch errors earlier. + if size != int(content_range['start']): + log.warning( + "Reported range %s does not match size downloaded so far %s", + size, + content_range['start'] + ) + return JsonResponse( + { 'ErrMsg': 'File upload corrupted. Please try again' }, + status=409 + ) + + + with open(temp_filepath, mode) as temp_file: + for chunk in request.FILES['course-data'].chunks(): + temp_file.write(chunk) + + size = os.path.getsize(temp_filepath) + + if int(content_range['stop']) != int(content_range['end']) - 1: + # More chunks coming + return JsonResponse({ + "files": [{ + "name": filename, + "size": size, + "deleteUrl": "", + "deleteType": "", + "url": reverse('import_course', kwargs={ + 'org': location.org, + 'course': location.course, + 'name': location.name + }), + "thumbnailUrl": "" + }] + }) + + else: # This was the last chunk. + + # 'Lock' with status info. + lock_filepath = data_root / (filename + ".lock") + + with open(lock_filepath, 'w+') as lf: + lf.write("Extracting") + + tar_file = tarfile.open(temp_filepath) + tar_file.extractall(course_dir + '/') + + with open(lock_filepath, 'w+') as lf: + lf.write("Verifying") + + # find the 'course.xml' file + dirpath = None + + def get_all_files(directory): + """ + For each file in the directory, yield a 2-tuple of (file-name, + directory-path) + """ + for dirpath, _dirnames, filenames in os.walk(directory): + for filename in filenames: + yield (filename, dirpath) + + def get_dir_for_fname(directory, filename): + """ + Returns the dirpath for the first file found in the directory + with the given name. If there is no file in the directory with + the specified name, return None. + """ + for fname, dirpath in get_all_files(directory): + if fname == filename: + return dirpath + return None + + fname = "course.xml" + + dirpath = get_dir_for_fname(course_dir, fname) + + if not dirpath: + return JsonResponse( + {'ErrMsg': 'Could not find the course.xml file in the package.' }, + status=415 + ) + + logging.debug('found course.xml at {0}'.format(dirpath)) + + if dirpath != course_dir: + for fname in os.listdir(dirpath): + shutil.move(dirpath / fname, course_dir) + + _module_store, course_items = import_from_xml( + modulestore('direct'), + settings.GITHUB_REPO_ROOT, + [course_subdir], + load_error_modules=False, + static_content_store=contentstore(), + target_location_namespace=location, + draft_store=modulestore() + ) + + # we can blow this away when we're done importing. + shutil.rmtree(course_dir) + + logging.debug('new course at {0}'.format(course_items[0].location)) + + with open(lock_filepath, 'w') as lf: + lf.write("Updating course") + + create_all_course_groups(request.user, course_items[0].location) + logging.debug('created all course groups at {0}'.format(course_items[0].location)) + + os.remove(lock_filepath) + + return JsonResponse({'Status': 'OK'}) + else: + course_module = modulestore().get_item(location) + + return render_to_response('import.html', { + 'context_course': course_module, + 'successful_import_redirect_url': reverse('course_index', kwargs={ + 'org': location.org, + 'course': location.course, + 'name': location.name, + }) + }) + + +@ensure_csrf_cookie +@login_required +def generate_export_course(request, org, course, name): + """ + This method will serialize out a course to a .tar.gz file which contains a + XML-based representation of the course + """ + location = get_location_and_verify_access(request, org, course, name) + course_module = modulestore().get_instance(location.course_id, location) + loc = Location(location) + export_file = NamedTemporaryFile(prefix=name + '.', suffix=".tar.gz") + + root_dir = path(mkdtemp()) + + try: + export_to_xml(modulestore('direct'), contentstore(), loc, root_dir, name, modulestore()) + except SerializationError, e: + logging.exception('There was an error exporting course {0}. {1}'.format(course_module.location, unicode(e))) + unit = None + failed_item = None + parent = None + try: + failed_item = modulestore().get_instance(course_module.location.course_id, e.location) + parent_locs = modulestore().get_parent_locations(failed_item.location, course_module.location.course_id) + + if len(parent_locs) > 0: + parent = modulestore().get_item(parent_locs[0]) + if parent.location.category == 'vertical': + unit = parent + except: + # if we have a nested exception, then we'll show the more generic error message + pass + + return render_to_response('export.html', { + 'context_course': course_module, + 'successful_import_redirect_url': '', + 'in_err': True, + 'raw_err_msg': str(e), + 'failed_module': failed_item, + 'unit': unit, + 'edit_unit_url': reverse('edit_unit', kwargs={ + 'location': parent.location + }) if parent else '', + 'course_home_url': reverse('course_index', kwargs={ + 'org': org, + 'course': course, + 'name': name + }) + }) + except Exception, e: + logging.exception('There was an error exporting course {0}. {1}'.format(course_module.location, unicode(e))) + return render_to_response('export.html', { + 'context_course': course_module, + 'successful_import_redirect_url': '', + 'in_err': True, + 'unit': None, + 'raw_err_msg': str(e), + 'course_home_url': reverse('course_index', kwargs={ + 'org': org, + 'course': course, + 'name': name + }) + }) + + logging.debug('tar file being generated at {0}'.format(export_file.name)) + tar_file = tarfile.open(name=export_file.name, mode='w:gz') + tar_file.add(root_dir / name, arcname=name) + tar_file.close() + + # remove temp dir + shutil.rmtree(root_dir / name) + + wrapper = FileWrapper(export_file) + response = HttpResponse(wrapper, content_type='application/x-tgz') + response['Content-Disposition'] = 'attachment; filename=%s' % os.path.basename(export_file.name) + response['Content-Length'] = os.path.getsize(export_file.name) + return response + + +@ensure_csrf_cookie +@login_required +def export_course(request, org, course, name): + """ + This method serves up the 'Export Course' page + """ + location = get_location_and_verify_access(request, org, course, name) + + course_module = modulestore().get_item(location) + + return render_to_response('export.html', { + 'context_course': course_module, + 'successful_import_redirect_url': '' + })