feat: add support for zip files to course and library import code (#34191)

Co-authored-by: Rodrigo Ferreira de Souza <rodfersou@gmail.com>
This commit is contained in:
Kshitij Sobti
2024-03-08 23:21:35 +05:30
committed by GitHub
parent c0aba8a805
commit c3c0adf978
11 changed files with 275 additions and 194 deletions

View File

@@ -19,6 +19,7 @@ from user_tasks.models import UserTaskStatus
from cms.djangoapps.contentstore.storage import course_import_export_storage
from cms.djangoapps.contentstore.tasks import CourseImportTask, import_olx
from cms.djangoapps.contentstore.utils import IMPORTABLE_FILE_TYPES
from openedx.core.lib.api.view_utils import DeveloperErrorViewMixin, view_auth_classes
from .utils import course_author_access_required
@@ -44,8 +45,8 @@ class CourseImportView(CourseImportExportViewMixin, GenericAPIView):
"""
**Use Case**
* Start an asynchronous task to import a course from a .tar.gz file into
the specified course ID, overwriting the existing course
* Start an asynchronous task to import a course from a .tar.gz or .zip
file into the specified course ID, overwriting the existing course
* Get a status on an asynchronous task import
**Example Requests**
@@ -59,7 +60,7 @@ class CourseImportView(CourseImportExportViewMixin, GenericAPIView):
* course_id: (required) A string representation of a Course ID,
e.g., course-v1:edX+DemoX+Demo_Course
* course_data: (required) The course .tar.gz file to import
* course_data: (required) The course .tar.gz or .zip file to import
**POST Response Values**
@@ -83,7 +84,7 @@ class CourseImportView(CourseImportExportViewMixin, GenericAPIView):
A GET request must include the following parameters.
* task_id: (required) The UUID of the task to check, e.g. "4b357bb3-2a1e-441d-9f6c-2210cf76606f"
* filename: (required) The filename of the uploaded course .tar.gz
* filename: (required) The filename of the uploaded course .tar.gz or .zip
**GET Response Values**
@@ -124,7 +125,7 @@ class CourseImportView(CourseImportExportViewMixin, GenericAPIView):
)
filename = request.FILES['course_data'].name
if not filename.endswith('.tar.gz'):
if not filename.endswith(IMPORTABLE_FILE_TYPES):
raise self.api_error(
status_code=status.HTTP_400_BAD_REQUEST,
developer_message='Parameter in the wrong format',

View File

@@ -14,5 +14,5 @@ PERMISSION_DENIED = _('Permission denied')
UNKNOWN_ERROR_IN_IMPORT = _('Unknown error while importing course.')
UNKNOWN_ERROR_IN_UNPACKING = _('An Unknown error occurred during the unpacking step.')
UNKNOWN_USER_ID = _('Unknown User ID: {0}')
UNSAFE_TAR_FILE = _('Unsafe tar file. Aborting import.')
UNSAFE_ARCHIVE_FILE = _('Unsafe archive file. Aborting import.')
USER_PERMISSION_DENIED = _('User permission denied.')

View File

@@ -5,7 +5,6 @@ Script for importing a content library from a tar.gz file
import base64
import os
import tarfile
from django.conf import settings
from django.contrib.auth.models import User # lint-amnesty, pylint: disable=imported-auth-user
@@ -16,7 +15,7 @@ from opaque_keys.edx.locator import LibraryLocator
from path import Path
from cms.djangoapps.contentstore.utils import add_instructor
from openedx.core.lib.extract_tar import safetar_extractall
from openedx.core.lib.extract_archive import safe_extractall
from xmodule.contentstore.django import contentstore # lint-amnesty, pylint: disable=wrong-import-order
from xmodule.modulestore import ModuleStoreEnum # lint-amnesty, pylint: disable=wrong-import-order
from xmodule.modulestore.django import modulestore # lint-amnesty, pylint: disable=wrong-import-order
@@ -47,13 +46,10 @@ class Command(BaseCommand):
course_dir = data_root / subdir
# Extract library archive
tar_file = tarfile.open(archive_path) # lint-amnesty, pylint: disable=consider-using-with
try:
safetar_extractall(tar_file, course_dir)
safe_extractall(archive_path, course_dir)
except SuspiciousOperation as exc:
raise CommandError(f'\n=== Course import {archive_path}: Unsafe tar file - {exc.args[0]}\n') # lint-amnesty, pylint: disable=raise-missing-from
finally:
tar_file.close()
raise CommandError(f'\n=== Course import {archive_path}: Unsafe tar file - {exc.args[0]}\n') from exc
# Paths to the library.xml file
abs_xml_path = os.path.join(course_dir, 'library')

View File

@@ -47,8 +47,13 @@ from cms.djangoapps.contentstore.courseware_index import (
SearchIndexingError
)
from cms.djangoapps.contentstore.storage import course_import_export_storage
from cms.djangoapps.contentstore.utils import delete_course # lint-amnesty, pylint: disable=wrong-import-order
from cms.djangoapps.contentstore.utils import initialize_permissions, reverse_usage_url, translation_language
from cms.djangoapps.contentstore.utils import (
IMPORTABLE_FILE_TYPES,
initialize_permissions,
reverse_usage_url,
translation_language,
delete_course
)
from cms.djangoapps.models.settings.course_metadata import CourseMetadata
from common.djangoapps.course_action_state.models import CourseRerunState
from common.djangoapps.student.auth import has_course_author_access
@@ -62,20 +67,15 @@ from openedx.core.djangoapps.discussions.models import DiscussionsConfiguration,
from openedx.core.djangoapps.discussions.tasks import update_unit_discussion_state_from_discussion_blocks
from openedx.core.djangoapps.embargo.models import CountryAccessRule, RestrictedCourse
from openedx.core.lib.blockstore_api import get_collection
from openedx.core.lib.extract_tar import safetar_extractall
from xmodule.contentstore.django import contentstore # lint-amnesty, pylint: disable=wrong-import-order
from xmodule.course_block import CourseFields # lint-amnesty, pylint: disable=wrong-import-order
from xmodule.exceptions import SerializationError # lint-amnesty, pylint: disable=wrong-import-order
from xmodule.modulestore import ModuleStoreEnum # lint-amnesty, pylint: disable=wrong-import-order
from xmodule.modulestore import COURSE_ROOT, LIBRARY_ROOT # lint-amnesty, pylint: disable=wrong-import-order
from xmodule.modulestore.django import modulestore # lint-amnesty, pylint: disable=wrong-import-order
from xmodule.modulestore.exceptions import ItemNotFoundError # lint-amnesty, pylint: disable=wrong-import-order
from xmodule.modulestore.exceptions import DuplicateCourseError, InvalidProctoringProvider
from xmodule.modulestore.xml_exporter import export_library_to_xml # lint-amnesty, pylint: disable=wrong-import-order
from xmodule.modulestore.xml_exporter import export_course_to_xml
from xmodule.modulestore.xml_importer import import_library_from_xml # lint-amnesty, pylint: disable=wrong-import-order
from xmodule.modulestore.xml_importer import CourseImportException, import_course_from_xml
from openedx.core.lib.extract_archive import safe_extractall
from xmodule.contentstore.django import contentstore
from xmodule.course_block import CourseFields
from xmodule.exceptions import SerializationError
from xmodule.modulestore import COURSE_ROOT, LIBRARY_ROOT, ModuleStoreEnum
from xmodule.modulestore.django import modulestore
from xmodule.modulestore.exceptions import DuplicateCourseError, InvalidProctoringProvider, ItemNotFoundError
from xmodule.modulestore.xml_exporter import export_course_to_xml, export_library_to_xml
from xmodule.modulestore.xml_importer import CourseImportException, import_course_from_xml, import_library_from_xml
from .outlines import update_outline_from_modulestore
from .outlines_regenerate import CourseOutlineRegenerate
from .toggles import bypass_olx_failure_enabled
@@ -480,7 +480,7 @@ def sync_discussion_settings(course_key, user):
# lint-amnesty, pylint: disable=too-many-statements
def import_olx(self, user_id, course_key_string, archive_path, archive_name, language):
"""
Import a course or library from a provided OLX .tar.gz archive.
Import a course or library from a provided OLX .tar.gz or .zip archive.
"""
set_code_owner_attribute_from_module(__name__)
current_step = 'Unpacking'
@@ -517,7 +517,7 @@ def import_olx(self, user_id, course_key_string, archive_path, archive_name, lan
def file_is_supported():
"""Check if it is a supported file."""
file_is_valid = archive_name.endswith('.tar.gz')
file_is_valid = archive_name.endswith(IMPORTABLE_FILE_TYPES)
if not file_is_valid:
message = f'Unsupported file {archive_name}'
@@ -647,17 +647,14 @@ def import_olx(self, user_id, course_key_string, archive_path, archive_name, lan
# try-finally block for proper clean up after receiving file.
try:
tar_file = tarfile.open(temp_filepath) # lint-amnesty, pylint: disable=consider-using-with
try:
safetar_extractall(tar_file, (course_dir + '/'))
safe_extractall(temp_filepath, course_dir)
except SuspiciousOperation as exc:
with translation_language(language):
self.status.fail(UserErrors.UNSAFE_TAR_FILE)
LOGGER.error(f'{log_prefix}: Unsafe tar file')
self.status.fail(UserErrors.UNSAFE_ARCHIVE_FILE)
LOGGER.error(f'{log_prefix}: Unsafe archive file')
monitor_import_failure(courselike_key, current_step, exception=exc)
return
finally:
tar_file.close()
current_step = 'Verifying'
self.status.set_state(current_step)

View File

@@ -101,6 +101,7 @@ from xmodule.partitions.partitions_service import get_all_partitions_for_course
from xmodule.services import SettingsService, ConfigurationService, TeamsConfigurationService
IMPORTABLE_FILE_TYPES = ('.tar.gz', '.zip')
log = logging.getLogger(__name__)

View File

@@ -42,8 +42,13 @@ from xmodule.modulestore.django import modulestore # lint-amnesty, pylint: disa
from ..storage import course_import_export_storage
from ..tasks import CourseExportTask, CourseImportTask, export_olx, import_olx
from ..toggles import use_new_export_page, use_new_import_page
from ..utils import reverse_course_url, reverse_library_url, get_export_url, get_import_url
from ..utils import (
reverse_course_url,
reverse_library_url,
get_export_url,
get_import_url,
IMPORTABLE_FILE_TYPES,
)
__all__ = [
'import_handler', 'import_status_handler',
'export_handler', 'export_output_handler', 'export_status_handler',
@@ -70,7 +75,7 @@ def import_handler(request, course_key_string):
html: return html page for import page
json: not supported
POST or PUT
json: import a course via the .tar.gz file specified in request.FILES
json: import a course via the .tar.gz or .zip file specified in request.FILES
"""
courselike_key = CourseKey.from_string(course_key_string)
library = isinstance(courselike_key, LibraryLocator)
@@ -123,7 +128,7 @@ def _write_chunk(request, courselike_key): # lint-amnesty, pylint: disable=too-
"""
Write the OLX file data chunk from the given request to the local filesystem.
"""
# Upload .tar.gz to local filesystem for one-server installations not using S3 or Swift
# Upload .tar.gz or .zip to local filesystem for one-server installations not using S3 or Swift
data_root = path(settings.GITHUB_REPO_ROOT)
subdir = base64.urlsafe_b64encode(repr(courselike_key).encode('utf-8')).decode('utf-8')
course_dir = data_root / subdir
@@ -141,8 +146,8 @@ def _write_chunk(request, courselike_key): # lint-amnesty, pylint: disable=too-
# Use sessions to keep info about import progress
_save_request_status(request, courselike_string, 0)
if not filename.endswith('.tar.gz'):
error_message = _('We only support uploading a .tar.gz file.')
if not filename.endswith(IMPORTABLE_FILE_TYPES):
error_message = _('We support uploading files in one of the following formats: {IMPORTABLE_FILE_TYPES}')
_save_request_status(request, courselike_string, -1)
monitor_import_failure(courselike_key, current_step, message=error_message)
return error_response(error_message, 415, 0)

View File

@@ -13,6 +13,7 @@ import tempfile
from io import BytesIO
from unittest.mock import Mock, patch
from uuid import uuid4
from zipfile import ZipFile
import ddt
import lxml
@@ -37,20 +38,20 @@ from cms.djangoapps.models.settings.course_metadata import CourseMetadata
from common.djangoapps.student import auth
from common.djangoapps.student.roles import CourseInstructorRole, CourseStaffRole
from common.djangoapps.util import milestones_helpers
from openedx.core.lib.extract_tar import safetar_extractall
from xmodule.contentstore.django import contentstore # lint-amnesty, pylint: disable=wrong-import-order
from xmodule.modulestore import LIBRARY_ROOT, ModuleStoreEnum # lint-amnesty, pylint: disable=wrong-import-order
from xmodule.modulestore.django import modulestore # lint-amnesty, pylint: disable=wrong-import-order
from xmodule.modulestore.exceptions import DuplicateCourseError, InvalidProctoringProvider # lint-amnesty, pylint: disable=wrong-import-order
from xmodule.modulestore.tests.factories import CourseFactory, BlockFactory, LibraryFactory # lint-amnesty, pylint: disable=wrong-import-order
from xmodule.modulestore.tests.utils import SPLIT_MODULESTORE_SETUP, TEST_DATA_DIR, MongoContentstoreBuilder # lint-amnesty, pylint: disable=wrong-import-order
from xmodule.modulestore.xml_exporter import export_course_to_xml, export_library_to_xml # lint-amnesty, pylint: disable=wrong-import-order
from xmodule.modulestore.xml_importer import ( # lint-amnesty, pylint: disable=wrong-import-order
from openedx.core.lib.extract_archive import safe_extractall
from xmodule.contentstore.django import contentstore
from xmodule.modulestore import LIBRARY_ROOT, ModuleStoreEnum
from xmodule.modulestore.django import modulestore
from xmodule.modulestore.exceptions import DuplicateCourseError, InvalidProctoringProvider
from xmodule.modulestore.tests.factories import BlockFactory, CourseFactory, LibraryFactory
from xmodule.modulestore.tests.utils import SPLIT_MODULESTORE_SETUP, TEST_DATA_DIR, MongoContentstoreBuilder
from xmodule.modulestore.xml_exporter import export_course_to_xml, export_library_to_xml
from xmodule.modulestore.xml_importer import (
BlockFailedToImport,
CourseImportManager,
ErrorReadingFileException,
import_course_from_xml,
import_library_from_xml,
BlockFailedToImport,
import_library_from_xml
)
TASK_LOGGER = 'cms.djangoapps.contentstore.tasks.LOGGER'
@@ -183,6 +184,18 @@ class ImportTestCase(CourseTestCase):
with tarfile.open(self.good_tar, "w:gz") as gtar:
gtar.add(good_dir)
self.good_zip = os.path.join(self.content_dir, "good.zip")
with ZipFile(self.good_zip, "w") as gzip:
for folder_name, subfolders, filenames in os.walk(good_dir):
for filename in filenames:
file_path = os.path.join(folder_name, filename)
gzip.write(file_path, file_path[len(good_dir) + 1:])
self.good_archives = {
'zip': self.good_zip,
'tar': self.good_tar,
}
# Bad course (no 'course.xml' file):
bad_dir = tempfile.mkdtemp(dir=self.content_dir)
touch(os.path.join(bad_dir, "bad.xml"))
@@ -190,6 +203,17 @@ class ImportTestCase(CourseTestCase):
with tarfile.open(self.bad_tar, "w:gz") as btar:
btar.add(bad_dir)
self.bad_zip = os.path.join(self.content_dir, "bad.zip")
with ZipFile(self.bad_zip, "w") as bzip:
for folder_name, subfolders, filenames in os.walk(bad_dir):
for filename in filenames:
file_path = os.path.join(folder_name, filename)
bzip.write(file_path, file_path[len(good_dir) + 1:])
self.bad_archives = {
'zip': self.bad_zip,
'tar': self.bad_tar,
}
self.unsafe_common_dir = path(tempfile.mkdtemp(dir=self.content_dir))
self.log_prefix = f"Course import {self.course.id}:"
@@ -211,53 +235,60 @@ class ImportTestCase(CourseTestCase):
if expected_message:
self.assertEqual(response['Message'], expected_message)
def get_import_status(self, course_id, tarfile_path):
def get_import_status(self, course_id, file_path):
"""Helper method to get course import status."""
resp = self.client.get(
reverse_course_url(
'import_status_handler',
course_id,
kwargs={'filename': os.path.split(tarfile_path)[1]}
kwargs={'filename': os.path.split(file_path)[1]}
)
)
return json.loads(resp.content)
def import_tarfile_in_course(self, tarfile_path):
"""Helper method to import provided tarfile in the course."""
with open(tarfile_path, 'rb') as gtar:
args = {"name": tarfile_path, "course-data": [gtar]}
def import_file_in_course(self, file_path):
"""Helper method to import provided file in the course."""
with open(file_path, 'rb') as file_data:
args = {"name": file_path, "course-data": [file_data]}
return self.client.post(self.url, args)
@patch(TASK_LOGGER)
def test_no_coursexml(self, mocked_log):
@ddt.data('tar', 'zip')
def test_no_coursexml(self, fmt, mocked_log):
"""
Check that the response for a tar.gz import without a course.xml is
Check that the response for a file import without a course.xml is
correct.
"""
bad_file = self.bad_archives[fmt]
error_msg = import_error.FILE_MISSING.format('course.xml')
expected_error_mesg = f'{self.log_prefix} {error_msg}'
response = self.import_tarfile_in_course(self.bad_tar)
response = self.import_file_in_course(bad_file)
self.assertEqual(response.status_code, 200)
mocked_log.error.assert_called_once_with(expected_error_mesg)
# Check that `import_status` returns the appropriate stage (i.e., the
# stage at which import failed).
resp_status = self.get_import_status(self.course.id, self.bad_tar)
resp_status = self.get_import_status(self.course.id, bad_file)
self.assertImportStatusResponse(resp_status, self.VerifyingError, error_msg)
def test_with_coursexml(self):
@ddt.data('zip', 'tar')
def test_with_coursexml(self, fmt):
"""
Check that the response for a tar.gz import with a course.xml is
Check that the response for a file import with a course.xml is
correct.
"""
response = self.import_tarfile_in_course(self.good_tar)
good_file = self.good_archives[fmt]
response = self.import_file_in_course(good_file)
self.assertEqual(response.status_code, 200)
def test_import_in_existing_course(self):
@ddt.data('zip', 'tar')
def test_import_in_existing_course(self, fmt):
"""
Check that course is imported successfully in existing course and users have their access roles
"""
good_file = self.good_archives[fmt]
# Create a non_staff user and add it to course staff only
__, nonstaff_user = self.create_non_staff_authed_user_client()
auth.add_users(self.user, CourseStaffRole(self.course.id), nonstaff_user)
@@ -267,7 +298,7 @@ class ImportTestCase(CourseTestCase):
display_name_before_import = course.display_name
# Check that global staff user can import course
response = self.import_tarfile_in_course(self.good_tar)
response = self.import_file_in_course(good_file)
self.assertEqual(response.status_code, 200)
course = self.store.get_course(self.course.id)
@@ -283,7 +314,7 @@ class ImportTestCase(CourseTestCase):
# Now course staff user can also successfully import course
self.client.login(username=nonstaff_user.username, password='foo')
resp = self.import_tarfile_in_course(self.good_tar)
resp = self.import_file_in_course(good_file)
self.assertEqual(resp.status_code, 200)
# Now check that non_staff user has his same role
@@ -375,7 +406,7 @@ class ImportTestCase(CourseTestCase):
def try_tar(tarpath):
""" Attempt to tar an unacceptable file """
resp = self.import_tarfile_in_course(tarpath)
resp = self.import_file_in_course(tarpath)
self.assertEqual(resp.status_code, 200)
resp = self.get_import_status(self.course.id, tarpath)
@@ -452,8 +483,7 @@ class ImportTestCase(CourseTestCase):
extract_dir_relative = path.relpath(extract_dir, settings.DATA_DIR)
try:
with tarfile.open(path(TEST_DATA_DIR) / 'imports' / 'library.HhJfPD.tar.gz') as tar:
safetar_extractall(tar, extract_dir)
safe_extractall(path(TEST_DATA_DIR) / 'imports' / 'library.HhJfPD.tar.gz', extract_dir)
library_items = import_library_from_xml(
self.store,
self.user.id,
@@ -497,8 +527,7 @@ class ImportTestCase(CourseTestCase):
extract_dir_relative = path.relpath(extract_dir, settings.DATA_DIR)
try:
with tarfile.open(path(TEST_DATA_DIR) / 'imports' / 'library.HhJfPD.tar.gz') as tar:
safetar_extractall(tar, extract_dir)
safe_extractall(path(TEST_DATA_DIR) / 'imports' / 'library.HhJfPD.tar.gz', extract_dir)
import_library_from_xml(
self.store,
self.user.id,
@@ -530,8 +559,7 @@ class ImportTestCase(CourseTestCase):
extract_dir_relative = path.relpath(extract_dir, settings.DATA_DIR)
try:
with tarfile.open(path(TEST_DATA_DIR) / 'imports' / 'library.HhJfPD.tar.gz') as tar:
safetar_extractall(tar, extract_dir)
safe_extractall(path(TEST_DATA_DIR) / 'imports' / 'library.HhJfPD.tar.gz', extract_dir)
import_library_from_xml(
source_store,
self.user.id,
@@ -547,72 +575,83 @@ class ImportTestCase(CourseTestCase):
shutil.rmtree(extract_dir)
@patch(TASK_LOGGER)
def test_import_failed_with_no_user_permission(self, mocked_log):
@ddt.data('zip', 'tar')
def test_import_failed_with_no_user_permission(self, fmt, mocked_log):
"""
Tests course import failure when user have no permission
"""
good_file = self.good_archives[fmt]
expected_error_mesg = f'{self.log_prefix} User permission denied: {self.user.username}'
with patch('cms.djangoapps.contentstore.tasks.has_course_author_access', Mock(return_value=False)):
response = self.import_tarfile_in_course(self.good_tar)
response = self.import_file_in_course(good_file)
self.assertEqual(response.status_code, 200)
mocked_log.error.assert_called_once_with(expected_error_mesg)
status_response = self.get_import_status(self.course.id, self.good_tar)
status_response = self.get_import_status(self.course.id, good_file)
self.assertImportStatusResponse(status_response, self.UnpackingError, import_error.COURSE_PERMISSION_DENIED)
@patch(TASK_LOGGER)
def test_import_failed_with_unknown_user(self, mocked_log):
@ddt.data('zip', 'tar')
def test_import_failed_with_unknown_user(self, fmt, mocked_log):
"""
Tests that course import failure with an unknown user id.
"""
expected_error_mesg = f'{self.log_prefix} Unknown User: {self.user.id}'
good_file = self.good_archives[fmt]
with patch('django.contrib.auth.models.User.objects.get', side_effect=User.DoesNotExist):
response = self.import_tarfile_in_course(self.good_tar)
response = self.import_file_in_course(good_file)
self.assertEqual(response.status_code, 200)
mocked_log.error.assert_called_once_with(expected_error_mesg)
status_response = self.get_import_status(self.course.id, self.good_tar)
status_response = self.get_import_status(self.course.id, good_file)
self.assertImportStatusResponse(status_response, self.UnpackingError, import_error.USER_PERMISSION_DENIED)
@patch(TASK_LOGGER)
def test_import_failed_with_unsafe_tarfile(self, mocked_log):
@ddt.data('zip', 'tar')
def test_import_failed_with_unsafe_file(self, fmt, mocked_log):
"""
Tests course import failure with unsafe tar file.
Tests course import failure with unsafe file.
"""
expected_error_mesg = f'{self.log_prefix} Unsafe tar file'
with patch('cms.djangoapps.contentstore.tasks.safetar_extractall', side_effect=SuspiciousOperation):
response = self.import_tarfile_in_course(self.good_tar)
good_file = self.good_archives[fmt]
expected_error_mesg = f'{self.log_prefix} Unsafe archive file'
with patch('cms.djangoapps.contentstore.tasks.safe_extractall', side_effect=SuspiciousOperation):
response = self.import_file_in_course(good_file)
self.assertEqual(response.status_code, 200)
mocked_log.error.assert_called_once_with(expected_error_mesg)
status_response = self.get_import_status(self.course.id, self.good_tar)
self.assertImportStatusResponse(status_response, self.UnpackingError, import_error.UNSAFE_TAR_FILE)
status_response = self.get_import_status(self.course.id, good_file)
self.assertImportStatusResponse(status_response, self.UnpackingError, import_error.UNSAFE_ARCHIVE_FILE)
@patch(TASK_LOGGER)
def test_import_failed_with_unknown_unpacking_error(self, mocked_log):
@ddt.data('zip', 'tar')
def test_import_failed_with_unknown_unpacking_error(self, fmt, mocked_log):
"""
Tests that course import failure for unknown error while unpacking
"""
good_file = self.good_archives[fmt]
expected_error_mesg = f'{self.log_prefix} Unknown error while unpacking'
with patch.object(course_import_export_storage, 'open', side_effect=Exception):
response = self.import_tarfile_in_course(self.good_tar)
response = self.import_file_in_course(good_file)
self.assertEqual(response.status_code, 200)
mocked_log.exception.assert_called_once_with(expected_error_mesg, exc_info=True)
status_response = self.get_import_status(self.course.id, self.good_tar)
status_response = self.get_import_status(self.course.id, good_file)
self.assertImportStatusResponse(status_response, self.UnpackingError, import_error.UNKNOWN_ERROR_IN_UNPACKING)
@patch(TASK_LOGGER)
@patch('olxcleaner.validate')
@patch('cms.djangoapps.contentstore.tasks.report_error_summary')
@patch('cms.djangoapps.contentstore.tasks.report_errors')
def test_import_failed_with_olx_validations(self, mocked_report, mocked_summary, mocked_validate, mocked_log):
@ddt.data('zip', 'tar')
def test_import_failed_with_olx_validations(self, fmt, mocked_report, mocked_summary, mocked_validate,
mocked_log):
"""
Tests that course import failure for unknown error while unpacking
"""
good_file = self.good_archives[fmt]
errors = [Mock(description='DuplicateURLNameError', level_val=3)]
mocked_summary.return_value = [f'ERROR {error.description} found in content' for error in errors]
mocked_report.return_value = [f'Errors: {len(errors)}']
@@ -621,12 +660,12 @@ class ImportTestCase(CourseTestCase):
]
expected_error_mesg = f'{self.log_prefix} CourseOlx validation failed.'
with patch.dict(settings.FEATURES, ENABLE_COURSE_OLX_VALIDATION=True):
response = self.import_tarfile_in_course(self.good_tar)
response = self.import_file_in_course(good_file)
self.assertEqual(response.status_code, 200)
mocked_log.error.assert_called_once_with(expected_error_mesg)
status_response = self.get_import_status(self.course.id, self.good_tar)
status_response = self.get_import_status(self.course.id, good_file)
self.assertImportStatusResponse(status_response, self.VerifyingError, import_error.OLX_VALIDATION_FAILED)
@patch(TASK_LOGGER)
@@ -645,14 +684,16 @@ class ImportTestCase(CourseTestCase):
"""
Test that when course import fails with a known failure, user get a descriptive error message.
"""
mocked_import.side_effect = exc
expected_exception_messages = f"{self.log_prefix} Error while importing course: {str(exc)}"
response = self.import_tarfile_in_course(self.good_tar)
self.assertEqual(response.status_code, 200)
mocked_log.exception.assert_called_once_with(expected_exception_messages)
for good_file in self.good_archives.values():
mocked_import.side_effect = exc
expected_exception_messages = f"{self.log_prefix} Error while importing course: {str(exc)}"
response = self.import_file_in_course(good_file)
self.assertEqual(response.status_code, 200)
mocked_log.exception.assert_called_once_with(expected_exception_messages)
mocked_log.reset_mock()
status_response = self.get_import_status(self.course.id, self.good_tar)
self.assertImportStatusResponse(status_response, self.UpdatingError, expected_mesg)
status_response = self.get_import_status(self.course.id, good_file)
self.assertImportStatusResponse(status_response, self.UpdatingError, expected_mesg)
@patch(TASK_LOGGER)
@patch.object(CourseImportManager, 'import_children')
@@ -665,22 +706,26 @@ class ImportTestCase(CourseTestCase):
"""
Test that import status and logged exception when course import fails with an unknown failure.
"""
mocked_import.side_effect = exception
expected_exc_mesg = f"{self.log_prefix} Error while importing course: {str(exception)}"
response = self.import_tarfile_in_course(self.good_tar)
self.assertEqual(response.status_code, 200)
mocked_log.exception.assert_called_once_with(expected_exc_mesg)
for good_file in self.good_archives.values():
mocked_import.side_effect = exception
expected_exc_mesg = f"{self.log_prefix} Error while importing course: {str(exception)}"
response = self.import_file_in_course(good_file)
self.assertEqual(response.status_code, 200)
mocked_log.exception.assert_called_once_with(expected_exc_mesg)
mocked_log.reset_mock()
status_response = self.get_import_status(self.course.id, self.good_tar)
self.assertImportStatusResponse(status_response, self.UpdatingError, import_error.UNKNOWN_ERROR_IN_IMPORT)
status_response = self.get_import_status(self.course.id, good_file)
self.assertImportStatusResponse(status_response, self.UpdatingError, import_error.UNKNOWN_ERROR_IN_IMPORT)
def test_import_status_response_is_not_cached(self):
@ddt.data('zip', 'tar')
def test_import_status_response_is_not_cached(self, fmt):
"""To test import_status endpoint response is not cached"""
good_file = self.good_archives[fmt]
resp = self.client.get(
reverse_course_url(
'import_status_handler',
self.course.id,
kwargs={'filename': os.path.split(self.good_tar)[1]}
kwargs={'filename': os.path.split(good_file)[1]}
)
)
self.assertEqual(resp.headers['Cache-Control'], 'no-cache, no-store, must-revalidate')

View File

@@ -9,6 +9,8 @@ define([
], function(domReady, Import, $, gettext) {
'use strict';
const IMPORTABLE_FILE_TYPES = /\.tar\.gz$|\.zip$/;
return {
Import: function(feedbackUrl, library) {
var dbError,
@@ -35,7 +37,7 @@ define([
var filepath = $(this).val(),
msg;
if (filepath.substr(filepath.length - 6, 6) === 'tar.gz') {
if (IMPORTABLE_FILE_TYPES.test(filepath)) {
$('.error-block').hide();
$('.file-name').text($(this).val().replace('C:\\fakepath\\', ''));
$('.file-name-block').show();
@@ -44,7 +46,7 @@ define([
$('.progress').show();
} else {
msg = gettext('File format not supported. Please upload a file with a {ext} extension.')
.replace('{ext}', '<code>tar.gz</code>');
.replace('{ext}', '<code>tar.gz or zip</code>');
$('.error-block').text(msg).show();
}
@@ -84,7 +86,7 @@ define([
file = data.files[0];
if (file.name.match(/tar\.gz$/)) {
if (IMPORTABLE_FILE_TYPES.test(file.name)) {
$submitBtn.click(function(event) {
event.preventDefault();

View File

@@ -45,13 +45,14 @@ else:
<article class="content-primary" role="main">
<div class="introduction">
## Translators: ".tar.gz" is a file extension, and files with that extension are called "gzipped tar files": these terms should not be translated
## Translators: ".zip" is a file extension, and files with that extension are called "zipped files": these terms should not be translated
%if library:
<p>${Text(_("Be sure you want to import a library before continuing. The contents of the imported library will replace the contents of the existing library. {em_start}You cannot undo a library import{em_end}. Before you proceed, we recommend that you export the current library, so that you have a backup copy of it.")).format(em_start=HTML('<strong>'), em_end=HTML("</strong>"))}</p>
<p>${_("The library that you import must be in a .tar.gz file (that is, a .tar file compressed with GNU Zip). This .tar.gz file must contain a library.xml file. It may also contain other files.")}</p>
<p>${_("The import process has five stages. During the first two stages, you must stay on this page. You can leave this page after the Unpacking stage has completed. We recommend, however, that you don't make important changes to your library until the import operation has completed.")}</p>
%else:
<p>${Text(_("Be sure you want to import a course before continuing. The contents of the imported course will replace the contents of the existing course. {em_start}You cannot undo a course import{em_end}. Before you proceed, we recommend that you export the current course, so that you have a backup copy of it.")).format(em_start=HTML('<strong>'), em_end=HTML("</strong>"))}</p>
<p>${_("The course that you import must be in a .tar.gz file (that is, a .tar file compressed with GNU Zip). This .tar.gz file must contain a course.xml file. It may also contain other files.")}</p>
<p>${_("The course that you import must be in a .tar.gz file (that is, a .tar file compressed with GNU Zip) or .zip (that is a compressed file). This .tar.gz or .zip file must contain a course.xml file. It may also contain other files.")}</p>
<p>${_("The import process has five stages. During the first two stages, you must stay on this page. You can leave this page after the Unpacking stage has completed. We recommend, however, that you don't make important changes to your course until the import operation has completed.")}</p>
%endif
@@ -60,11 +61,12 @@ else:
<form id="fileupload" method="post" enctype="multipart/form-data" class="import-form">
## Translators: ".tar.gz" is a file extension, and files with that extension are called "gzipped tar files": these terms should not be translated
## Translators: ".zip" is a file extension, and files with that extension are called "zipped files": these terms should not be translated
<h2 class="title">
%if library:
${_("Select a .tar.gz File to Replace Your Library Content")}
%else:
${_("Select a .tar.gz File to Replace Your Course Content")}
${_("Select a .tar.gz or .zip File to Replace Your Course Content")}
%endif
</h2>

View File

@@ -0,0 +1,109 @@
"""
Safe version of extractall which does not extract any files that would
be, or symlink to a file that is, outside of the directory extracted in.
Adapted from:
http://stackoverflow.com/questions/10060069/safely-extract-zip-or-tar-using-python
"""
import logging
from os.path import abspath, dirname
from os.path import join as joinpath
from os.path import realpath
from typing import List, Union
from zipfile import ZipFile, ZipInfo
from tarfile import TarFile, TarInfo
from django.conf import settings
from django.core.exceptions import SuspiciousOperation
log = logging.getLogger(__name__)
def resolved(rpath):
"""
Returns the canonical absolute path of `rpath`.
"""
return realpath(abspath(rpath))
def _is_bad_path(path, base):
"""
Is (the canonical absolute path of) `path` outside `base`?
"""
return not resolved(joinpath(base, path)).startswith(base)
def _is_bad_link(info, base):
"""
Does the file sym- or hard-link to files outside `base`?
"""
# Links are interpreted relative to the directory containing the link
tip = resolved(joinpath(base, dirname(info.name)))
return _is_bad_path(info.linkname, base=tip)
def _check_tarinfo(finfo: TarInfo, base: str):
"""
Checks a file in a tar archive (TarInfo object) for safety.
It ensures that the file isn't a hard link or symlink to a file pointing to
a path outside the archive and checks that the file isn't a device file.
Raises:
SuspiciousOperation: If the TarInfo object is found to be a
hard link, symlink, or a special device file.
"""
if finfo.issym() and _is_bad_link(finfo, base):
log.debug("File %r is blocked: Hard link to %r", finfo.name, finfo.linkname)
raise SuspiciousOperation("Hard link")
if finfo.islnk() and _is_bad_link(finfo, base):
log.debug("File %r is blocked: Symlink to %r", finfo.name, finfo.linkname)
raise SuspiciousOperation("Symlink")
if finfo.isdev():
log.debug("File %r is blocked: FIFO, device or character file", finfo.name)
raise SuspiciousOperation("Dev file")
def _checkmembers(members: Union[List[ZipInfo], List[TarInfo]], base: str):
"""
Check that all elements of the archive file are safe.
"""
base = resolved(base)
# check that we're not trying to import outside of the github_repo_root
if not base.startswith(resolved(settings.GITHUB_REPO_ROOT)):
raise SuspiciousOperation("Attempted to import course outside of data dir")
for finfo in members:
if isinstance(finfo, ZipInfo):
filename = finfo.filename
elif isinstance(finfo, TarInfo):
filename = finfo.name
_check_tarinfo(finfo, base)
if _is_bad_path(filename, base):
log.debug("File %r is blocked (illegal path)", filename)
raise SuspiciousOperation("Illegal path")
def safe_extractall(file_name, output_path):
"""
Extract Zip or Tar files
"""
archive = None
if not output_path.endswith("/"):
output_path += "/"
try:
if file_name.endswith(".zip"):
archive = ZipFile(file_name, "r")
members = archive.infolist()
elif file_name.endswith(".tar.gz"):
archive = TarFile.open(file_name)
members = archive.getmembers()
else:
raise ValueError("Unsupported archive format")
_checkmembers(members, output_path)
archive.extractall(output_path)
finally:
if archive:
archive.close()

View File

@@ -1,77 +0,0 @@
"""
Safe version of tarfile.extractall which does not extract any files that would
be, or symlink to a file that is, outside of the directory extracted in.
Adapted from:
http://stackoverflow.com/questions/10060069/safely-extract-zip-or-tar-using-python
"""
import logging
from os.path import join as joinpath
from os.path import abspath, dirname, realpath
from django.conf import settings
from django.core.exceptions import SuspiciousOperation
log = logging.getLogger(__name__)
def resolved(rpath):
"""
Returns the canonical absolute path of `rpath`.
"""
return realpath(abspath(rpath))
def _is_bad_path(path, base):
"""
Is (the canonical absolute path of) `path` outside `base`?
"""
return not resolved(joinpath(base, path)).startswith(base)
def _is_bad_link(info, base):
"""
Does the file sym- or hard-link to files outside `base`?
"""
# Links are interpreted relative to the directory containing the link
tip = resolved(joinpath(base, dirname(info.name)))
return _is_bad_path(info.linkname, base=tip)
def safemembers(members, base):
"""
Check that all elements of a tar file are safe.
"""
base = resolved(base)
# check that we're not trying to import outside of the github_repo_root
if not base.startswith(resolved(settings.GITHUB_REPO_ROOT)):
raise SuspiciousOperation("Attempted to import course outside of data dir")
for finfo in members:
if _is_bad_path(finfo.name, base): # lint-amnesty, pylint: disable=no-else-raise
log.debug("File %r is blocked (illegal path)", finfo.name)
raise SuspiciousOperation("Illegal path")
elif finfo.issym() and _is_bad_link(finfo, base):
log.debug("File %r is blocked: Hard link to %r", finfo.name, finfo.linkname)
raise SuspiciousOperation("Hard link")
elif finfo.islnk() and _is_bad_link(finfo, base):
log.debug("File %r is blocked: Symlink to %r", finfo.name,
finfo.linkname)
raise SuspiciousOperation("Symlink")
elif finfo.isdev():
log.debug("File %r is blocked: FIFO, device or character file",
finfo.name)
raise SuspiciousOperation("Dev file")
return members
def safetar_extractall(tar_file, path=".", members=None): # pylint: disable=unused-argument
"""
Safe version of `tar_file.extractall()`.
"""
path = str(path)
return tar_file.extractall(path, safemembers(tar_file, path))