From 83a451dc0b242bb7b6cf29e48c5a3d94e6501857 Mon Sep 17 00:00:00 2001 From: Jesse Zoldak Date: Mon, 30 Jan 2017 19:21:43 -0500 Subject: [PATCH] Revert "PLAT-1104 Import courses asynchronously" --- cms/djangoapps/contentstore/storage.py | 23 - cms/djangoapps/contentstore/tasks.py | 268 +----------- .../contentstore/views/import_export.py | 407 +++++++++++------- .../views/tests/test_import_export.py | 14 +- cms/envs/aws.py | 14 - cms/envs/common.py | 2 - lms/envs/common.py | 3 - pavelib/paver_tests/test_servers.py | 4 +- pavelib/servers.py | 4 +- requirements/edx/base.txt | 2 +- 10 files changed, 278 insertions(+), 463 deletions(-) delete mode 100644 cms/djangoapps/contentstore/storage.py diff --git a/cms/djangoapps/contentstore/storage.py b/cms/djangoapps/contentstore/storage.py deleted file mode 100644 index 2779b25809..0000000000 --- a/cms/djangoapps/contentstore/storage.py +++ /dev/null @@ -1,23 +0,0 @@ -""" -Storage backend for course import and export. -""" -from __future__ import absolute_import - -from django.conf import settings -from django.core.files.storage import get_storage_class - -from storages.backends.s3boto import S3BotoStorage -from storages.utils import setting - - -class ImportExportS3Storage(S3BotoStorage): # pylint: disable=abstract-method - """ - S3 backend for course import and export OLX files. - """ - - def __init__(self): - bucket = setting('COURSE_IMPORT_EXPORT_BUCKET', settings.AWS_STORAGE_BUCKET_NAME) - super(ImportExportS3Storage, self).__init__(bucket=bucket, querystring_auth=True) - -# pylint: disable=invalid-name -course_import_export_storage = get_storage_class(settings.COURSE_IMPORT_EXPORT_STORAGE)() diff --git a/cms/djangoapps/contentstore/tasks.py b/cms/djangoapps/contentstore/tasks.py index 242679318b..b67600e238 100644 --- a/cms/djangoapps/contentstore/tasks.py +++ b/cms/djangoapps/contentstore/tasks.py @@ -1,52 +1,24 @@ """ This file contains celery tasks for contentstore views """ -from __future__ import absolute_import - -import base64 import json import logging -import os -import shutil -import tarfile -from datetime import datetime - from celery.task import task from celery.utils.log import get_task_logger -from path import Path as path +from datetime import datetime from pytz import UTC -from six import iteritems, text_type -from django.conf import settings from django.contrib.auth.models import User -from django.core.exceptions import SuspiciousOperation -from django.test import RequestFactory -from django.utils.text import get_valid_filename -from django.utils.translation import ugettext as _ -from djcelery.common import respect_language -from user_tasks.tasks import UserTask - -import dogstats_wrapper as dog_stats_api from contentstore.courseware_index import CoursewareSearchIndexer, LibrarySearchIndexer, SearchIndexingError -from contentstore.storage import course_import_export_storage from contentstore.utils import initialize_permissions from course_action_state.models import CourseRerunState -from models.settings.course_metadata import CourseMetadata from opaque_keys.edx.keys import CourseKey -from opaque_keys.edx.locator import LibraryLocator -from openedx.core.lib.extract_tar import safetar_extractall -from student.auth import has_course_author_access -from xmodule.contentstore.django import contentstore from xmodule.course_module import CourseFields -from xmodule.modulestore import COURSE_ROOT, LIBRARY_ROOT from xmodule.modulestore.django import modulestore from xmodule.modulestore.exceptions import DuplicateCourseError, ItemNotFoundError -from xmodule.modulestore.xml_importer import import_course_from_xml, import_library_from_xml - LOGGER = get_task_logger(__name__) -FILE_READ_CHUNK = 1024 # bytes FULL_COURSE_REINDEX_THRESHOLD = 1 @@ -58,10 +30,10 @@ def rerun_course(source_course_key_string, destination_course_key_string, user_i # import here, at top level this import prevents the celery workers from starting up correctly from edxval.api import copy_course_videos - source_course_key = CourseKey.from_string(source_course_key_string) - destination_course_key = CourseKey.from_string(destination_course_key_string) try: # deserialize the payload + source_course_key = CourseKey.from_string(source_course_key_string) + destination_course_key = CourseKey.from_string(destination_course_key_string) fields = deserialize_fields(fields) if fields else None # use the split modulestore as the store for the rerun course, @@ -81,7 +53,7 @@ def rerun_course(source_course_key_string, destination_course_key_string, user_i return "succeeded" - except DuplicateCourseError: + except DuplicateCourseError as exc: # do NOT delete the original course, only update the status CourseRerunState.objects.failed(course_key=destination_course_key) logging.exception(u'Course Rerun Error') @@ -100,12 +72,12 @@ def rerun_course(source_course_key_string, destination_course_key_string, user_i # it's possible there was an error even before the course module was created pass - return u"exception: " + text_type(exc) + return "exception: " + unicode(exc) def deserialize_fields(json_fields): fields = json.loads(json_fields) - for field_name, value in iteritems(fields): + for field_name, value in fields.iteritems(): fields[field_name] = getattr(CourseFields, field_name).from_json(value) return fields @@ -127,9 +99,9 @@ def update_search_index(course_id, triggered_time_isoformat): CoursewareSearchIndexer.index(modulestore(), course_key, triggered_at=(_parse_time(triggered_time_isoformat))) except SearchIndexingError as exc: - LOGGER.error(u'Search indexing error for complete course %s - %s', course_id, text_type(exc)) + LOGGER.error('Search indexing error for complete course %s - %s', course_id, unicode(exc)) else: - LOGGER.debug(u'Search indexing successful for complete course %s', course_id) + LOGGER.debug('Search indexing successful for complete course %s', course_id) @task() @@ -140,9 +112,9 @@ def update_library_index(library_id, triggered_time_isoformat): LibrarySearchIndexer.index(modulestore(), library_key, triggered_at=(_parse_time(triggered_time_isoformat))) except SearchIndexingError as exc: - LOGGER.error(u'Search indexing error for library %s - %s', library_id, text_type(exc)) + LOGGER.error('Search indexing error for library %s - %s', library_id, unicode(exc)) else: - LOGGER.debug(u'Search indexing successful for library %s', library_id) + LOGGER.debug('Search indexing successful for library %s', library_id) @task() @@ -153,223 +125,3 @@ def push_course_update_task(course_key_string, course_subscription_id, course_di # TODO Use edx-notifications library instead (MA-638). from .push_notification import send_push_course_update send_push_course_update(course_key_string, course_subscription_id, course_display_name) - - -class CourseImportTask(UserTask): # pylint: disable=abstract-method - """ - Base class for course and library import tasks. - """ - - @staticmethod - def calculate_total_steps(arguments_dict): - """ - Get the number of in-progress steps in the import process, as shown in the UI. - - For reference, these are: - - 1. Unpacking - 2. Verifying - 3. Updating - - Note that the task does a little cleanup work after ``completed_steps`` - reaches its final value, so the task isn't truly finished until the - ``state`` field becomes "Succeeded". - """ - return 3 - - @classmethod - def generate_name(cls, arguments_dict): - """ - Create a name for this particular import task instance. - - Arguments: - arguments_dict (dict): The arguments given to the task function - - Returns: - text_type: The generated name - """ - key = arguments_dict[u'course_key_string'] - filename = arguments_dict[u'archive_name'] - return u'Import of {} from {}'.format(key, filename) - - -@task(base=CourseImportTask, bind=True) -def import_olx(self, user_id, course_key_string, archive_path, archive_name, language): - """ - Import a course or library from a provided OLX .tar.gz archive. - """ - courselike_key = CourseKey.from_string(course_key_string) - try: - user = User.objects.get(pk=user_id) - except User.DoesNotExist: - with respect_language(language): - self.status.fail(_(u'Unknown User ID: {0}').format(user_id)) - return - if not has_course_author_access(user, courselike_key): - with respect_language(language): - self.status.fail(_(u'Permission denied')) - return - - is_library = isinstance(courselike_key, LibraryLocator) - is_course = not is_library - if is_library: - root_name = LIBRARY_ROOT - courselike_module = modulestore().get_library(courselike_key) - import_func = import_library_from_xml - else: - root_name = COURSE_ROOT - courselike_module = modulestore().get_course(courselike_key) - import_func = import_course_from_xml - - # Locate the uploaded OLX archive (and download it from S3 if necessary) - # Do everything in a try-except block to make sure everything is properly cleaned up. - data_root = path(settings.GITHUB_REPO_ROOT) - subdir = base64.urlsafe_b64encode(repr(courselike_key)) - course_dir = data_root / subdir - try: - self.status.set_state(u'Unpacking') - - if not archive_name.endswith(u'.tar.gz'): - with respect_language(language): - self.status.fail(_(u'We only support uploading a .tar.gz file.')) - return - - temp_filepath = course_dir / get_valid_filename(archive_name) - if not course_dir.isdir(): # pylint: disable=no-value-for-parameter - os.mkdir(course_dir) - - LOGGER.debug(u'importing course to {0}'.format(temp_filepath)) - - # Copy the OLX archive from where it was uploaded to (S3, Swift, file system, etc.) - if not course_import_export_storage.exists(archive_path): - LOGGER.info(u'Course import %s: Uploaded file %s not found', courselike_key, archive_path) - with respect_language(language): - self.status.fail(_(u'Tar file not found')) - return - with course_import_export_storage.open(archive_path, 'rb') as source: - with open(temp_filepath, 'wb') as destination: - def read_chunk(): - """ - Read and return a sequence of bytes from the source file. - """ - return source.read(FILE_READ_CHUNK) - for chunk in iter(read_chunk, b''): - destination.write(chunk) - LOGGER.info(u'Course import %s: Download from storage complete', courselike_key) - # Delete from source location - course_import_export_storage.delete(archive_path) - - # If the course has an entrance exam then remove it and its corresponding milestone. - # current course state before import. - if is_course: - if courselike_module.entrance_exam_enabled: - fake_request = RequestFactory().get(u'/') - fake_request.user = user - from contentstore.views.entrance_exam import remove_entrance_exam_milestone_reference - # TODO: Is this really ok? Seems dangerous for a live course - remove_entrance_exam_milestone_reference(fake_request, courselike_key) - LOGGER.info( - u'entrance exam milestone content reference for course %s has been removed', - courselike_module.id - ) - # Send errors to client with stage at which error occurred. - except Exception as exception: # pylint: disable=broad-except - if course_dir.isdir(): # pylint: disable=no-value-for-parameter - shutil.rmtree(course_dir) - LOGGER.info(u'Course import %s: Temp data cleared', courselike_key) - - LOGGER.exception(u'Error importing course %s', courselike_key) - self.status.fail(text_type(exception)) - return - - # try-finally block for proper clean up after receiving file. - try: - tar_file = tarfile.open(temp_filepath) - try: - safetar_extractall(tar_file, (course_dir + u'/').encode(u'utf-8')) - except SuspiciousOperation as exc: - LOGGER.info(u'Course import %s: Unsafe tar file - %s', courselike_key, exc.args[0]) - with respect_language(language): - self.status.fail(_(u'Unsafe tar file. Aborting import.')) - return - finally: - tar_file.close() - - LOGGER.info(u'Course import %s: Uploaded file extracted', courselike_key) - self.status.set_state(u'Verifying') - self.status.increment_completed_steps() - - # find the 'course.xml' file - def get_all_files(directory): - """ - For each file in the directory, yield a 2-tuple of (file-name, - directory-path) - """ - for directory_path, _dirnames, filenames in os.walk(directory): - for filename in filenames: - yield (filename, directory_path) - - def get_dir_for_filename(directory, filename): - """ - Returns the directory path for the first file found in the directory - with the given name. If there is no file in the directory with - the specified name, return None. - """ - for name, directory_path in get_all_files(directory): - if name == filename: - return directory_path - return None - - dirpath = get_dir_for_filename(course_dir, root_name) - if not dirpath: - with respect_language(language): - self.status.fail(_(u'Could not find the {0} file in the package.').format(root_name)) - return - - dirpath = os.path.relpath(dirpath, data_root) - LOGGER.debug(u'found %s at %s', root_name, dirpath) - - LOGGER.info(u'Course import %s: Extracted file verified', courselike_key) - self.status.set_state(u'Updating') - self.status.increment_completed_steps() - - with dog_stats_api.timer( - u'courselike_import.time', - tags=[u"courselike:{}".format(courselike_key)] - ): - courselike_items = import_func( - modulestore(), user.id, - settings.GITHUB_REPO_ROOT, [dirpath], - load_error_modules=False, - static_content_store=contentstore(), - target_id=courselike_key - ) - - new_location = courselike_items[0].location - LOGGER.debug(u'new course at %s', new_location) - - LOGGER.info(u'Course import %s: Course import successful', courselike_key) - self.status.increment_completed_steps() - except Exception as exception: # pylint: disable=broad-except - LOGGER.exception(u'error importing course') - self.status.fail(text_type(exception)) - finally: - if course_dir.isdir(): # pylint: disable=no-value-for-parameter - shutil.rmtree(course_dir) - LOGGER.info(u'Course import %s: Temp data cleared', courselike_key) - - if self.status.completed_steps == 3 and is_course: - # Reload the course so we have the latest state - course = modulestore().get_course(courselike_key) - if course.entrance_exam_enabled: - entrance_exam_chapter = modulestore().get_items( - course.id, - qualifiers={u'category': u'chapter'}, - settings={u'is_entrance_exam': True} - )[0] - - metadata = {u'entrance_exam_id': text_type(entrance_exam_chapter.location)} - CourseMetadata.update_from_dict(metadata, course, user) - from contentstore.views.entrance_exam import add_entrance_exam_milestone - add_entrance_exam_milestone(course.id, entrance_exam_chapter) - LOGGER.info(u'Course %s Entrance exam imported', course.id) diff --git a/cms/djangoapps/contentstore/views/import_export.py b/cms/djangoapps/contentstore/views/import_export.py index aa19861f92..610279baff 100644 --- a/cms/djangoapps/contentstore/views/import_export.py +++ b/cms/djangoapps/contentstore/views/import_export.py @@ -11,35 +11,37 @@ import tarfile from path import Path as path from tempfile import mkdtemp -from six import text_type - from django.conf import settings from django.contrib.auth.decorators import login_required -from django.core.exceptions import PermissionDenied -from django.core.files import File +from django.core.exceptions import SuspiciousOperation, PermissionDenied from django.core.files.temp import NamedTemporaryFile from django.core.servers.basehttp import FileWrapper -from django.db import transaction from django.http import HttpResponse, HttpResponseNotFound, Http404 from django.utils.translation import ugettext as _ from django.views.decorators.csrf import ensure_csrf_cookie from django.views.decorators.http import require_http_methods, require_GET +import dogstats_wrapper as dog_stats_api from edxmako.shortcuts import render_to_response from xmodule.contentstore.django import contentstore from xmodule.exceptions import SerializationError from xmodule.modulestore.django import modulestore from opaque_keys.edx.keys import CourseKey from opaque_keys.edx.locator import LibraryLocator -from user_tasks.models import UserTaskStatus +from xmodule.modulestore.xml_importer import import_course_from_xml, import_library_from_xml from xmodule.modulestore.xml_exporter import export_course_to_xml, export_library_to_xml +from xmodule.modulestore import COURSE_ROOT, LIBRARY_ROOT from student.auth import has_course_author_access +from openedx.core.lib.extract_tar import safetar_extractall from util.json_request import JsonResponse from util.views import ensure_valid_course_key -from contentstore.storage import course_import_export_storage -from contentstore.tasks import CourseImportTask, import_olx +from models.settings.course_metadata import CourseMetadata +from contentstore.views.entrance_exam import ( + add_entrance_exam_milestone, + remove_entrance_exam_milestone_reference +) from contentstore.utils import reverse_course_url, reverse_usage_url, reverse_library_url @@ -57,7 +59,6 @@ log = logging.getLogger(__name__) CONTENT_RE = re.compile(r"(?P\d{1,11})-(?P\d{1,11})/(?P\d{1,11})") -@transaction.non_atomic_requests @login_required @ensure_csrf_cookie @require_http_methods(("GET", "POST", "PUT")) @@ -75,13 +76,26 @@ def import_handler(request, course_key_string): courselike_key = CourseKey.from_string(course_key_string) library = isinstance(courselike_key, LibraryLocator) if library: + root_name = LIBRARY_ROOT successful_url = reverse_library_url('library_handler', courselike_key) context_name = 'context_library' courselike_module = modulestore().get_library(courselike_key) + import_func = import_library_from_xml else: + root_name = COURSE_ROOT successful_url = reverse_course_url('course_handler', courselike_key) context_name = 'context_course' courselike_module = modulestore().get_course(courselike_key) + import_func = import_course_from_xml + return _import_handler( + request, courselike_key, root_name, successful_url, context_name, courselike_module, import_func + ) + + +def _import_handler(request, courselike_key, root_name, successful_url, context_name, courselike_module, import_func): + """ + Parameterized function containing the meat of import_handler. + """ if not has_course_author_access(request.user, courselike_key): raise PermissionDenied() @@ -89,7 +103,235 @@ def import_handler(request, course_key_string): if request.method == 'GET': raise NotImplementedError('coming soon') else: - return _write_chunk(request, courselike_key) + # Do everything in a try-except block to make sure everything is properly cleaned up. + try: + data_root = path(settings.GITHUB_REPO_ROOT) + subdir = base64.urlsafe_b64encode(repr(courselike_key)) + course_dir = data_root / subdir + filename = request.FILES['course-data'].name + + # Use sessions to keep info about import progress + session_status = request.session.setdefault("import_status", {}) + courselike_string = unicode(courselike_key) + filename + _save_request_status(request, courselike_string, 0) + + # If the course has an entrance exam then remove it and its corresponding milestone. + # current course state before import. + if root_name == COURSE_ROOT: + if courselike_module.entrance_exam_enabled: + remove_entrance_exam_milestone_reference(request, courselike_key) + log.info( + "entrance exam milestone content reference for course %s has been removed", + courselike_module.id + ) + + if not filename.endswith('.tar.gz'): + _save_request_status(request, courselike_string, -1) + return JsonResponse( + { + 'ErrMsg': _('We only support uploading a .tar.gz file.'), + 'Stage': -1 + }, + status=415 + ) + + temp_filepath = course_dir / filename + if not course_dir.isdir(): + os.mkdir(course_dir) + + logging.debug('importing course to {0}'.format(temp_filepath)) + + # Get upload chunks byte ranges + try: + matches = CONTENT_RE.search(request.META["HTTP_CONTENT_RANGE"]) + content_range = matches.groupdict() + except KeyError: # Single chunk + # no Content-Range header, so make one that will work + content_range = {'start': 0, 'stop': 1, 'end': 2} + + # stream out the uploaded files in chunks to disk + if int(content_range['start']) == 0: + mode = "wb+" + else: + mode = "ab+" + size = os.path.getsize(temp_filepath) + # Check to make sure we haven't missed a chunk + # This shouldn't happen, even if different instances are handling + # the same session, but it's always better to catch errors earlier. + if size < int(content_range['start']): + _save_request_status(request, courselike_string, -1) + log.warning( + "Reported range %s does not match size downloaded so far %s", + content_range['start'], + size + ) + return JsonResponse( + { + 'ErrMsg': _('File upload corrupted. Please try again'), + 'Stage': -1 + }, + status=409 + ) + # The last request sometimes comes twice. This happens because + # nginx sends a 499 error code when the response takes too long. + elif size > int(content_range['stop']) and size == int(content_range['end']): + return JsonResponse({'ImportStatus': 1}) + + with open(temp_filepath, mode) as temp_file: + for chunk in request.FILES['course-data'].chunks(): + temp_file.write(chunk) + + size = os.path.getsize(temp_filepath) + + if int(content_range['stop']) != int(content_range['end']) - 1: + # More chunks coming + return JsonResponse({ + "files": [{ + "name": filename, + "size": size, + "deleteUrl": "", + "deleteType": "", + "url": reverse_course_url('import_handler', courselike_key), + "thumbnailUrl": "" + }] + }) + # Send errors to client with stage at which error occurred. + except Exception as exception: # pylint: disable=broad-except + _save_request_status(request, courselike_string, -1) + if course_dir.isdir(): + shutil.rmtree(course_dir) + log.info("Course import %s: Temp data cleared", courselike_key) + + log.exception( + "error importing course" + ) + return JsonResponse( + { + 'ErrMsg': str(exception), + 'Stage': -1 + }, + status=400 + ) + + # try-finally block for proper clean up after receiving last chunk. + try: + # This was the last chunk. + log.info("Course import %s: Upload complete", courselike_key) + _save_request_status(request, courselike_string, 1) + + tar_file = tarfile.open(temp_filepath) + try: + safetar_extractall(tar_file, (course_dir + '/').encode('utf-8')) + except SuspiciousOperation as exc: + _save_request_status(request, courselike_string, -1) + return JsonResponse( + { + 'ErrMsg': 'Unsafe tar file. Aborting import.', + 'SuspiciousFileOperationMsg': exc.args[0], + 'Stage': -1 + }, + status=400 + ) + finally: + tar_file.close() + + log.info("Course import %s: Uploaded file extracted", courselike_key) + _save_request_status(request, courselike_string, 2) + + # find the 'course.xml' file + def get_all_files(directory): + """ + For each file in the directory, yield a 2-tuple of (file-name, + directory-path) + """ + for dirpath, _dirnames, filenames in os.walk(directory): + for filename in filenames: + yield (filename, dirpath) + + def get_dir_for_fname(directory, filename): + """ + Returns the dirpath for the first file found in the directory + with the given name. If there is no file in the directory with + the specified name, return None. + """ + for fname, dirpath in get_all_files(directory): + if fname == filename: + return dirpath + return None + + dirpath = get_dir_for_fname(course_dir, root_name) + if not dirpath: + _save_request_status(request, courselike_string, -2) + return JsonResponse( + { + 'ErrMsg': _('Could not find the {0} file in the package.').format(root_name), + 'Stage': -2 + }, + status=415 + ) + + dirpath = os.path.relpath(dirpath, data_root) + logging.debug('found %s at %s', root_name, dirpath) + + log.info("Course import %s: Extracted file verified", courselike_key) + _save_request_status(request, courselike_string, 3) + + with dog_stats_api.timer( + 'courselike_import.time', + tags=[u"courselike:{}".format(courselike_key)] + ): + courselike_items = import_func( + modulestore(), request.user.id, + settings.GITHUB_REPO_ROOT, [dirpath], + load_error_modules=False, + static_content_store=contentstore(), + target_id=courselike_key + ) + + new_location = courselike_items[0].location + logging.debug('new course at %s', new_location) + + log.info("Course import %s: Course import successful", courselike_key) + _save_request_status(request, courselike_string, 4) + + # Send errors to client with stage at which error occurred. + except Exception as exception: # pylint: disable=broad-except + log.exception( + "error importing course" + ) + return JsonResponse( + { + 'ErrMsg': str(exception), + 'Stage': -session_status[courselike_string] + }, + status=400 + ) + + finally: + if course_dir.isdir(): + shutil.rmtree(course_dir) + log.info("Course import %s: Temp data cleared", courselike_key) + # set failed stage number with negative sign in case of unsuccessful import + if session_status[courselike_string] != 4: + _save_request_status(request, courselike_string, -abs(session_status[courselike_string])) + + # status == 4 represents that course has been imported successfully. + if session_status[courselike_string] == 4 and root_name == COURSE_ROOT: + # Reload the course so we have the latest state + course = modulestore().get_course(courselike_key) + if course.entrance_exam_enabled: + entrance_exam_chapter = modulestore().get_items( + course.id, + qualifiers={'category': 'chapter'}, + settings={'is_entrance_exam': True} + )[0] + + metadata = {'entrance_exam_id': unicode(entrance_exam_chapter.location)} + CourseMetadata.update_from_dict(metadata, course, request.user) + add_entrance_exam_milestone(course.id, entrance_exam_chapter) + log.info("Course %s Entrance exam imported", course.id) + + return JsonResponse({'Status': 'OK'}) elif request.method == 'GET': # assume html status_url = reverse_course_url( "import_status_handler", courselike_key, kwargs={'filename': "fillerName"} @@ -116,122 +358,6 @@ def _save_request_status(request, key, status): request.session.save() -def _write_chunk(request, courselike_key): - """ - Write the OLX file data chunk from the given request to the local filesystem. - """ - # Upload .tar.gz to local filesystem for one-server installations not using S3 or Swift - data_root = path(settings.GITHUB_REPO_ROOT) - subdir = base64.urlsafe_b64encode(repr(courselike_key)) - course_dir = data_root / subdir - filename = request.FILES['course-data'].name - - courselike_string = text_type(courselike_key) + filename - # Do everything in a try-except block to make sure everything is properly cleaned up. - try: - # Use sessions to keep info about import progress - _save_request_status(request, courselike_string, 0) - - if not filename.endswith('.tar.gz'): - _save_request_status(request, courselike_string, -1) - return JsonResponse( - { - 'ErrMsg': _('We only support uploading a .tar.gz file.'), - 'Stage': -1 - }, - status=415 - ) - - temp_filepath = course_dir / filename - if not course_dir.isdir(): # pylint: disable=no-value-for-parameter - os.mkdir(course_dir) - - logging.debug('importing course to {0}'.format(temp_filepath)) - - # Get upload chunks byte ranges - try: - matches = CONTENT_RE.search(request.META["HTTP_CONTENT_RANGE"]) - content_range = matches.groupdict() - except KeyError: # Single chunk - # no Content-Range header, so make one that will work - content_range = {'start': 0, 'stop': 1, 'end': 2} - - # stream out the uploaded files in chunks to disk - if int(content_range['start']) == 0: - mode = "wb+" - else: - mode = "ab+" - size = os.path.getsize(temp_filepath) - # Check to make sure we haven't missed a chunk - # This shouldn't happen, even if different instances are handling - # the same session, but it's always better to catch errors earlier. - if size < int(content_range['start']): - _save_request_status(request, courselike_string, -1) - log.warning( - "Reported range %s does not match size downloaded so far %s", - content_range['start'], - size - ) - return JsonResponse( - { - 'ErrMsg': _('File upload corrupted. Please try again'), - 'Stage': -1 - }, - status=409 - ) - # The last request sometimes comes twice. This happens because - # nginx sends a 499 error code when the response takes too long. - elif size > int(content_range['stop']) and size == int(content_range['end']): - return JsonResponse({'ImportStatus': 1}) - - with open(temp_filepath, mode) as temp_file: - for chunk in request.FILES['course-data'].chunks(): - temp_file.write(chunk) - - size = os.path.getsize(temp_filepath) - - if int(content_range['stop']) != int(content_range['end']) - 1: - # More chunks coming - return JsonResponse({ - "files": [{ - "name": filename, - "size": size, - "deleteUrl": "", - "deleteType": "", - "url": reverse_course_url('import_handler', courselike_key), - "thumbnailUrl": "" - }] - }) - - log.info("Course import %s: Upload complete", courselike_key) - with open(temp_filepath, 'rb') as local_file: - django_file = File(local_file) - storage_path = course_import_export_storage.save(u'olx_import/' + filename, django_file) - import_olx.delay( - request.user.id, text_type(courselike_key), storage_path, filename, request.LANGUAGE_CODE) - - # Send errors to client with stage at which error occurred. - except Exception as exception: # pylint: disable=broad-except - _save_request_status(request, courselike_string, -1) - if course_dir.isdir(): # pylint: disable=no-value-for-parameter - shutil.rmtree(course_dir) - log.info("Course import %s: Temp data cleared", courselike_key) - - log.exception( - "error importing course" - ) - return JsonResponse( - { - 'ErrMsg': str(exception), - 'Stage': -1 - }, - status=400 - ) - - return JsonResponse({'ImportStatus': 1}) - - -@transaction.non_atomic_requests @require_GET @ensure_csrf_cookie @login_required @@ -242,9 +368,9 @@ def import_status_handler(request, course_key_string, filename=None): -X : Import unsuccessful due to some error with X as stage [0-3] 0 : No status info found (import done or upload still in progress) - 1 : Unpacking - 2 : Verifying - 3 : Updating + 1 : Extracting file + 2 : Validating. + 3 : Importing to mongo 4 : Import successful """ @@ -252,23 +378,11 @@ def import_status_handler(request, course_key_string, filename=None): if not has_course_author_access(request.user, course_key): raise PermissionDenied() - # The task status record is authoritative once it's been created - args = {u'course_key_string': course_key_string, u'archive_name': filename} - name = CourseImportTask.generate_name(args) - task_status = UserTaskStatus.objects.filter(name=name).order_by(u'-created').first() - if task_status is None: - # The task hasn't been initialized yet; did we store info in the session already? - try: - session_status = request.session["import_status"] - status = session_status[course_key_string + filename] - except KeyError: - status = 0 - elif task_status.state == UserTaskStatus.SUCCEEDED: - status = 4 - elif task_status.state in (UserTaskStatus.FAILED, UserTaskStatus.CANCELED): - status = max(-(task_status.completed_steps + 1), -3) - else: - status = min(task_status.completed_steps + 1, 3) + try: + session_status = request.session["import_status"] + status = session_status[course_key_string + filename] + except KeyError: + status = 0 return JsonResponse({"ImportStatus": status}) @@ -342,7 +456,6 @@ def send_tarball(tarball): return response -@transaction.non_atomic_requests @ensure_csrf_cookie @login_required @require_http_methods(("GET",)) diff --git a/cms/djangoapps/contentstore/views/tests/test_import_export.py b/cms/djangoapps/contentstore/views/tests/test_import_export.py index aa65668810..ddbee107bf 100644 --- a/cms/djangoapps/contentstore/views/tests/test_import_export.py +++ b/cms/djangoapps/contentstore/views/tests/test_import_export.py @@ -184,7 +184,7 @@ class ImportTestCase(CourseTestCase): "name": self.bad_tar, "course-data": [btar] }) - self.assertEquals(resp.status_code, 200) + self.assertEquals(resp.status_code, 415) # Check that `import_status` returns the appropriate stage (i.e., the # stage at which import failed). resp_status = self.client.get( @@ -336,16 +336,8 @@ class ImportTestCase(CourseTestCase): with open(tarpath) as tar: args = {"name": tarpath, "course-data": [tar]} resp = self.client.post(self.url, args) - self.assertEquals(resp.status_code, 200) - resp = self.client.get( - reverse_course_url( - 'import_status_handler', - self.course.id, - kwargs={'filename': os.path.split(tarpath)[1]} - ) - ) - status = json.loads(resp.content)["ImportStatus"] - self.assertEqual(status, -1) + self.assertEquals(resp.status_code, 400) + self.assertIn("SuspiciousFileOperation", resp.content) try_tar(self._fifo_tar()) try_tar(self._symlink_tar()) diff --git a/cms/envs/aws.py b/cms/envs/aws.py index 16b7bacf39..bae96d6093 100644 --- a/cms/envs/aws.py +++ b/cms/envs/aws.py @@ -296,17 +296,10 @@ AWS_SECRET_ACCESS_KEY = AUTH_TOKENS["AWS_SECRET_ACCESS_KEY"] if AWS_SECRET_ACCESS_KEY == "": AWS_SECRET_ACCESS_KEY = None -AWS_STORAGE_BUCKET_NAME = AUTH_TOKENS.get('AWS_STORAGE_BUCKET_NAME', 'edxuploads') - # Disabling querystring auth instructs Boto to exclude the querystring parameters (e.g. signature, access key) it # normally appends to every returned URL. AWS_QUERYSTRING_AUTH = AUTH_TOKENS.get('AWS_QUERYSTRING_AUTH', True) -AWS_DEFAULT_ACL = 'private' -AWS_BUCKET_ACL = AWS_DEFAULT_ACL -AWS_QUERYSTRING_EXPIRE = 7 * 24 * 60 * 60 # 7 days -AWS_S3_CUSTOM_DOMAIN = AUTH_TOKENS.get('AWS_S3_CUSTOM_DOMAIN', 'edxuploads.s3.amazonaws.com') - if AUTH_TOKENS.get('DEFAULT_FILE_STORAGE'): DEFAULT_FILE_STORAGE = AUTH_TOKENS.get('DEFAULT_FILE_STORAGE') elif AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY: @@ -314,13 +307,6 @@ elif AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY: else: DEFAULT_FILE_STORAGE = 'django.core.files.storage.FileSystemStorage' -COURSE_IMPORT_EXPORT_BUCKET = ENV_TOKENS.get('COURSE_IMPORT_EXPORT_BUCKET', '') - -if COURSE_IMPORT_EXPORT_BUCKET: - COURSE_IMPORT_EXPORT_STORAGE = 'contentstore.storage.ImportExportS3Storage' -else: - COURSE_IMPORT_EXPORT_STORAGE = DEFAULT_FILE_STORAGE - DATABASES = AUTH_TOKENS['DATABASES'] # The normal database user does not have enough permissions to run migrations. diff --git a/cms/envs/common.py b/cms/envs/common.py index f38eda75c5..b04b16af6a 100644 --- a/cms/envs/common.py +++ b/cms/envs/common.py @@ -548,8 +548,6 @@ LOCALE_PATHS = (REPO_ROOT + '/conf/locale',) # edx-platform/conf/locale/ # Messages MESSAGE_STORAGE = 'django.contrib.messages.storage.session.SessionStorage' -COURSE_IMPORT_EXPORT_STORAGE = 'django.core.files.storage.FileSystemStorage' - ##### EMBARGO ##### EMBARGO_SITE_REDIRECT_URL = None diff --git a/lms/envs/common.py b/lms/envs/common.py index 2ffdb9d81d..f8504e55c5 100644 --- a/lms/envs/common.py +++ b/lms/envs/common.py @@ -2182,9 +2182,6 @@ CSRF_COOKIE_SECURE = False REST_FRAMEWORK = { 'DEFAULT_PAGINATION_CLASS': 'openedx.core.lib.api.paginators.DefaultPagination', - 'DEFAULT_RENDERER_CLASSES': ( - 'rest_framework.renderers.JSONRenderer', - ), 'PAGE_SIZE': 10, 'URL_FORMAT_OVERRIDE': None, } diff --git a/pavelib/paver_tests/test_servers.py b/pavelib/paver_tests/test_servers.py index 8bfcd8298a..bed8acf5e0 100644 --- a/pavelib/paver_tests/test_servers.py +++ b/pavelib/paver_tests/test_servers.py @@ -141,7 +141,7 @@ class TestPaverServerTasks(PaverTestCase): """ Test the "celery" task. """ - settings = options.get("settings", "devstack_with_worker") + settings = options.get("settings", "dev_with_worker") call_task("pavelib.servers.celery", options=options) self.assertEquals(self.task_messages, [EXPECTED_CELERY_COMMAND.format(settings=settings)]) @@ -292,7 +292,7 @@ class TestPaverServerTasks(PaverTestCase): port=8001, ) ) - expected_messages.append(EXPECTED_CELERY_COMMAND.format(settings="devstack_with_worker")) + expected_messages.append(EXPECTED_CELERY_COMMAND.format(settings="dev_with_worker")) self.assertEquals(self.task_messages, expected_messages) def expected_sass_commands(self, system=None, asset_settings=u"test_static_optimized"): diff --git a/pavelib/servers.py b/pavelib/servers.py index e2c3bbe234..cf4711f728 100644 --- a/pavelib/servers.py +++ b/pavelib/servers.py @@ -157,7 +157,7 @@ def celery(options): """ Runs Celery workers. """ - settings = getattr(options, 'settings', 'devstack_with_worker') + settings = getattr(options, 'settings', 'dev_with_worker') run_process(django_cmd('lms', settings, 'celery', 'worker', '--beat', '--loglevel=INFO', '--pythonpath=.')) @@ -187,7 +187,7 @@ def run_all_servers(options): """ settings = getattr(options, 'settings', DEFAULT_SETTINGS) asset_settings = getattr(options, 'asset_settings', settings) - worker_settings = getattr(options, 'worker_settings', 'devstack_with_worker') + worker_settings = getattr(options, 'worker_settings', 'dev_with_worker') fast = getattr(options, 'fast', False) optimized = getattr(options, 'optimized', False) diff --git a/requirements/edx/base.txt b/requirements/edx/base.txt index 058d14a7e5..d8d125fa85 100644 --- a/requirements/edx/base.txt +++ b/requirements/edx/base.txt @@ -34,7 +34,7 @@ django-simple-history==1.6.3 django-statici18n==1.1.5 django-storages==1.4.1 django-method-override==0.1.0 -django-user-tasks==0.1.4 +django-user-tasks==0.1.2 # We need a fix to DRF 3.2.x, for now use it from our own cherry-picked repo #djangorestframework>=3.1,<3.2 git+https://github.com/edx/django-rest-framework.git@3c72cb5ee5baebc4328947371195eae2077197b0#egg=djangorestframework==3.2.3