feat: add support for zip files to course and library import code (#34191)
Co-authored-by: Rodrigo Ferreira de Souza <rodfersou@gmail.com>
This commit is contained in:
@@ -19,6 +19,7 @@ from user_tasks.models import UserTaskStatus
|
||||
|
||||
from cms.djangoapps.contentstore.storage import course_import_export_storage
|
||||
from cms.djangoapps.contentstore.tasks import CourseImportTask, import_olx
|
||||
from cms.djangoapps.contentstore.utils import IMPORTABLE_FILE_TYPES
|
||||
from openedx.core.lib.api.view_utils import DeveloperErrorViewMixin, view_auth_classes
|
||||
|
||||
from .utils import course_author_access_required
|
||||
@@ -44,8 +45,8 @@ class CourseImportView(CourseImportExportViewMixin, GenericAPIView):
|
||||
"""
|
||||
**Use Case**
|
||||
|
||||
* Start an asynchronous task to import a course from a .tar.gz file into
|
||||
the specified course ID, overwriting the existing course
|
||||
* Start an asynchronous task to import a course from a .tar.gz or .zip
|
||||
file into the specified course ID, overwriting the existing course
|
||||
* Get a status on an asynchronous task import
|
||||
|
||||
**Example Requests**
|
||||
@@ -59,7 +60,7 @@ class CourseImportView(CourseImportExportViewMixin, GenericAPIView):
|
||||
|
||||
* course_id: (required) A string representation of a Course ID,
|
||||
e.g., course-v1:edX+DemoX+Demo_Course
|
||||
* course_data: (required) The course .tar.gz file to import
|
||||
* course_data: (required) The course .tar.gz or .zip file to import
|
||||
|
||||
**POST Response Values**
|
||||
|
||||
@@ -83,7 +84,7 @@ class CourseImportView(CourseImportExportViewMixin, GenericAPIView):
|
||||
A GET request must include the following parameters.
|
||||
|
||||
* task_id: (required) The UUID of the task to check, e.g. "4b357bb3-2a1e-441d-9f6c-2210cf76606f"
|
||||
* filename: (required) The filename of the uploaded course .tar.gz
|
||||
* filename: (required) The filename of the uploaded course .tar.gz or .zip
|
||||
|
||||
**GET Response Values**
|
||||
|
||||
@@ -124,7 +125,7 @@ class CourseImportView(CourseImportExportViewMixin, GenericAPIView):
|
||||
)
|
||||
|
||||
filename = request.FILES['course_data'].name
|
||||
if not filename.endswith('.tar.gz'):
|
||||
if not filename.endswith(IMPORTABLE_FILE_TYPES):
|
||||
raise self.api_error(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
developer_message='Parameter in the wrong format',
|
||||
|
||||
@@ -14,5 +14,5 @@ PERMISSION_DENIED = _('Permission denied')
|
||||
UNKNOWN_ERROR_IN_IMPORT = _('Unknown error while importing course.')
|
||||
UNKNOWN_ERROR_IN_UNPACKING = _('An Unknown error occurred during the unpacking step.')
|
||||
UNKNOWN_USER_ID = _('Unknown User ID: {0}')
|
||||
UNSAFE_TAR_FILE = _('Unsafe tar file. Aborting import.')
|
||||
UNSAFE_ARCHIVE_FILE = _('Unsafe archive file. Aborting import.')
|
||||
USER_PERMISSION_DENIED = _('User permission denied.')
|
||||
|
||||
@@ -5,7 +5,6 @@ Script for importing a content library from a tar.gz file
|
||||
|
||||
import base64
|
||||
import os
|
||||
import tarfile
|
||||
|
||||
from django.conf import settings
|
||||
from django.contrib.auth.models import User # lint-amnesty, pylint: disable=imported-auth-user
|
||||
@@ -16,7 +15,7 @@ from opaque_keys.edx.locator import LibraryLocator
|
||||
from path import Path
|
||||
|
||||
from cms.djangoapps.contentstore.utils import add_instructor
|
||||
from openedx.core.lib.extract_tar import safetar_extractall
|
||||
from openedx.core.lib.extract_archive import safe_extractall
|
||||
from xmodule.contentstore.django import contentstore # lint-amnesty, pylint: disable=wrong-import-order
|
||||
from xmodule.modulestore import ModuleStoreEnum # lint-amnesty, pylint: disable=wrong-import-order
|
||||
from xmodule.modulestore.django import modulestore # lint-amnesty, pylint: disable=wrong-import-order
|
||||
@@ -47,13 +46,10 @@ class Command(BaseCommand):
|
||||
course_dir = data_root / subdir
|
||||
|
||||
# Extract library archive
|
||||
tar_file = tarfile.open(archive_path) # lint-amnesty, pylint: disable=consider-using-with
|
||||
try:
|
||||
safetar_extractall(tar_file, course_dir)
|
||||
safe_extractall(archive_path, course_dir)
|
||||
except SuspiciousOperation as exc:
|
||||
raise CommandError(f'\n=== Course import {archive_path}: Unsafe tar file - {exc.args[0]}\n') # lint-amnesty, pylint: disable=raise-missing-from
|
||||
finally:
|
||||
tar_file.close()
|
||||
raise CommandError(f'\n=== Course import {archive_path}: Unsafe tar file - {exc.args[0]}\n') from exc
|
||||
|
||||
# Paths to the library.xml file
|
||||
abs_xml_path = os.path.join(course_dir, 'library')
|
||||
|
||||
@@ -47,8 +47,13 @@ from cms.djangoapps.contentstore.courseware_index import (
|
||||
SearchIndexingError
|
||||
)
|
||||
from cms.djangoapps.contentstore.storage import course_import_export_storage
|
||||
from cms.djangoapps.contentstore.utils import delete_course # lint-amnesty, pylint: disable=wrong-import-order
|
||||
from cms.djangoapps.contentstore.utils import initialize_permissions, reverse_usage_url, translation_language
|
||||
from cms.djangoapps.contentstore.utils import (
|
||||
IMPORTABLE_FILE_TYPES,
|
||||
initialize_permissions,
|
||||
reverse_usage_url,
|
||||
translation_language,
|
||||
delete_course
|
||||
)
|
||||
from cms.djangoapps.models.settings.course_metadata import CourseMetadata
|
||||
from common.djangoapps.course_action_state.models import CourseRerunState
|
||||
from common.djangoapps.student.auth import has_course_author_access
|
||||
@@ -62,20 +67,15 @@ from openedx.core.djangoapps.discussions.models import DiscussionsConfiguration,
|
||||
from openedx.core.djangoapps.discussions.tasks import update_unit_discussion_state_from_discussion_blocks
|
||||
from openedx.core.djangoapps.embargo.models import CountryAccessRule, RestrictedCourse
|
||||
from openedx.core.lib.blockstore_api import get_collection
|
||||
from openedx.core.lib.extract_tar import safetar_extractall
|
||||
from xmodule.contentstore.django import contentstore # lint-amnesty, pylint: disable=wrong-import-order
|
||||
from xmodule.course_block import CourseFields # lint-amnesty, pylint: disable=wrong-import-order
|
||||
from xmodule.exceptions import SerializationError # lint-amnesty, pylint: disable=wrong-import-order
|
||||
from xmodule.modulestore import ModuleStoreEnum # lint-amnesty, pylint: disable=wrong-import-order
|
||||
from xmodule.modulestore import COURSE_ROOT, LIBRARY_ROOT # lint-amnesty, pylint: disable=wrong-import-order
|
||||
from xmodule.modulestore.django import modulestore # lint-amnesty, pylint: disable=wrong-import-order
|
||||
from xmodule.modulestore.exceptions import ItemNotFoundError # lint-amnesty, pylint: disable=wrong-import-order
|
||||
from xmodule.modulestore.exceptions import DuplicateCourseError, InvalidProctoringProvider
|
||||
from xmodule.modulestore.xml_exporter import export_library_to_xml # lint-amnesty, pylint: disable=wrong-import-order
|
||||
from xmodule.modulestore.xml_exporter import export_course_to_xml
|
||||
from xmodule.modulestore.xml_importer import import_library_from_xml # lint-amnesty, pylint: disable=wrong-import-order
|
||||
from xmodule.modulestore.xml_importer import CourseImportException, import_course_from_xml
|
||||
|
||||
from openedx.core.lib.extract_archive import safe_extractall
|
||||
from xmodule.contentstore.django import contentstore
|
||||
from xmodule.course_block import CourseFields
|
||||
from xmodule.exceptions import SerializationError
|
||||
from xmodule.modulestore import COURSE_ROOT, LIBRARY_ROOT, ModuleStoreEnum
|
||||
from xmodule.modulestore.django import modulestore
|
||||
from xmodule.modulestore.exceptions import DuplicateCourseError, InvalidProctoringProvider, ItemNotFoundError
|
||||
from xmodule.modulestore.xml_exporter import export_course_to_xml, export_library_to_xml
|
||||
from xmodule.modulestore.xml_importer import CourseImportException, import_course_from_xml, import_library_from_xml
|
||||
from .outlines import update_outline_from_modulestore
|
||||
from .outlines_regenerate import CourseOutlineRegenerate
|
||||
from .toggles import bypass_olx_failure_enabled
|
||||
@@ -480,7 +480,7 @@ def sync_discussion_settings(course_key, user):
|
||||
# lint-amnesty, pylint: disable=too-many-statements
|
||||
def import_olx(self, user_id, course_key_string, archive_path, archive_name, language):
|
||||
"""
|
||||
Import a course or library from a provided OLX .tar.gz archive.
|
||||
Import a course or library from a provided OLX .tar.gz or .zip archive.
|
||||
"""
|
||||
set_code_owner_attribute_from_module(__name__)
|
||||
current_step = 'Unpacking'
|
||||
@@ -517,7 +517,7 @@ def import_olx(self, user_id, course_key_string, archive_path, archive_name, lan
|
||||
|
||||
def file_is_supported():
|
||||
"""Check if it is a supported file."""
|
||||
file_is_valid = archive_name.endswith('.tar.gz')
|
||||
file_is_valid = archive_name.endswith(IMPORTABLE_FILE_TYPES)
|
||||
|
||||
if not file_is_valid:
|
||||
message = f'Unsupported file {archive_name}'
|
||||
@@ -647,17 +647,14 @@ def import_olx(self, user_id, course_key_string, archive_path, archive_name, lan
|
||||
|
||||
# try-finally block for proper clean up after receiving file.
|
||||
try:
|
||||
tar_file = tarfile.open(temp_filepath) # lint-amnesty, pylint: disable=consider-using-with
|
||||
try:
|
||||
safetar_extractall(tar_file, (course_dir + '/'))
|
||||
safe_extractall(temp_filepath, course_dir)
|
||||
except SuspiciousOperation as exc:
|
||||
with translation_language(language):
|
||||
self.status.fail(UserErrors.UNSAFE_TAR_FILE)
|
||||
LOGGER.error(f'{log_prefix}: Unsafe tar file')
|
||||
self.status.fail(UserErrors.UNSAFE_ARCHIVE_FILE)
|
||||
LOGGER.error(f'{log_prefix}: Unsafe archive file')
|
||||
monitor_import_failure(courselike_key, current_step, exception=exc)
|
||||
return
|
||||
finally:
|
||||
tar_file.close()
|
||||
|
||||
current_step = 'Verifying'
|
||||
self.status.set_state(current_step)
|
||||
|
||||
@@ -101,6 +101,7 @@ from xmodule.partitions.partitions_service import get_all_partitions_for_course
|
||||
from xmodule.services import SettingsService, ConfigurationService, TeamsConfigurationService
|
||||
|
||||
|
||||
IMPORTABLE_FILE_TYPES = ('.tar.gz', '.zip')
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
|
||||
@@ -42,8 +42,13 @@ from xmodule.modulestore.django import modulestore # lint-amnesty, pylint: disa
|
||||
from ..storage import course_import_export_storage
|
||||
from ..tasks import CourseExportTask, CourseImportTask, export_olx, import_olx
|
||||
from ..toggles import use_new_export_page, use_new_import_page
|
||||
from ..utils import reverse_course_url, reverse_library_url, get_export_url, get_import_url
|
||||
|
||||
from ..utils import (
|
||||
reverse_course_url,
|
||||
reverse_library_url,
|
||||
get_export_url,
|
||||
get_import_url,
|
||||
IMPORTABLE_FILE_TYPES,
|
||||
)
|
||||
__all__ = [
|
||||
'import_handler', 'import_status_handler',
|
||||
'export_handler', 'export_output_handler', 'export_status_handler',
|
||||
@@ -70,7 +75,7 @@ def import_handler(request, course_key_string):
|
||||
html: return html page for import page
|
||||
json: not supported
|
||||
POST or PUT
|
||||
json: import a course via the .tar.gz file specified in request.FILES
|
||||
json: import a course via the .tar.gz or .zip file specified in request.FILES
|
||||
"""
|
||||
courselike_key = CourseKey.from_string(course_key_string)
|
||||
library = isinstance(courselike_key, LibraryLocator)
|
||||
@@ -123,7 +128,7 @@ def _write_chunk(request, courselike_key): # lint-amnesty, pylint: disable=too-
|
||||
"""
|
||||
Write the OLX file data chunk from the given request to the local filesystem.
|
||||
"""
|
||||
# Upload .tar.gz to local filesystem for one-server installations not using S3 or Swift
|
||||
# Upload .tar.gz or .zip to local filesystem for one-server installations not using S3 or Swift
|
||||
data_root = path(settings.GITHUB_REPO_ROOT)
|
||||
subdir = base64.urlsafe_b64encode(repr(courselike_key).encode('utf-8')).decode('utf-8')
|
||||
course_dir = data_root / subdir
|
||||
@@ -141,8 +146,8 @@ def _write_chunk(request, courselike_key): # lint-amnesty, pylint: disable=too-
|
||||
# Use sessions to keep info about import progress
|
||||
_save_request_status(request, courselike_string, 0)
|
||||
|
||||
if not filename.endswith('.tar.gz'):
|
||||
error_message = _('We only support uploading a .tar.gz file.')
|
||||
if not filename.endswith(IMPORTABLE_FILE_TYPES):
|
||||
error_message = _('We support uploading files in one of the following formats: {IMPORTABLE_FILE_TYPES}')
|
||||
_save_request_status(request, courselike_string, -1)
|
||||
monitor_import_failure(courselike_key, current_step, message=error_message)
|
||||
return error_response(error_message, 415, 0)
|
||||
|
||||
@@ -13,6 +13,7 @@ import tempfile
|
||||
from io import BytesIO
|
||||
from unittest.mock import Mock, patch
|
||||
from uuid import uuid4
|
||||
from zipfile import ZipFile
|
||||
|
||||
import ddt
|
||||
import lxml
|
||||
@@ -37,20 +38,20 @@ from cms.djangoapps.models.settings.course_metadata import CourseMetadata
|
||||
from common.djangoapps.student import auth
|
||||
from common.djangoapps.student.roles import CourseInstructorRole, CourseStaffRole
|
||||
from common.djangoapps.util import milestones_helpers
|
||||
from openedx.core.lib.extract_tar import safetar_extractall
|
||||
from xmodule.contentstore.django import contentstore # lint-amnesty, pylint: disable=wrong-import-order
|
||||
from xmodule.modulestore import LIBRARY_ROOT, ModuleStoreEnum # lint-amnesty, pylint: disable=wrong-import-order
|
||||
from xmodule.modulestore.django import modulestore # lint-amnesty, pylint: disable=wrong-import-order
|
||||
from xmodule.modulestore.exceptions import DuplicateCourseError, InvalidProctoringProvider # lint-amnesty, pylint: disable=wrong-import-order
|
||||
from xmodule.modulestore.tests.factories import CourseFactory, BlockFactory, LibraryFactory # lint-amnesty, pylint: disable=wrong-import-order
|
||||
from xmodule.modulestore.tests.utils import SPLIT_MODULESTORE_SETUP, TEST_DATA_DIR, MongoContentstoreBuilder # lint-amnesty, pylint: disable=wrong-import-order
|
||||
from xmodule.modulestore.xml_exporter import export_course_to_xml, export_library_to_xml # lint-amnesty, pylint: disable=wrong-import-order
|
||||
from xmodule.modulestore.xml_importer import ( # lint-amnesty, pylint: disable=wrong-import-order
|
||||
from openedx.core.lib.extract_archive import safe_extractall
|
||||
from xmodule.contentstore.django import contentstore
|
||||
from xmodule.modulestore import LIBRARY_ROOT, ModuleStoreEnum
|
||||
from xmodule.modulestore.django import modulestore
|
||||
from xmodule.modulestore.exceptions import DuplicateCourseError, InvalidProctoringProvider
|
||||
from xmodule.modulestore.tests.factories import BlockFactory, CourseFactory, LibraryFactory
|
||||
from xmodule.modulestore.tests.utils import SPLIT_MODULESTORE_SETUP, TEST_DATA_DIR, MongoContentstoreBuilder
|
||||
from xmodule.modulestore.xml_exporter import export_course_to_xml, export_library_to_xml
|
||||
from xmodule.modulestore.xml_importer import (
|
||||
BlockFailedToImport,
|
||||
CourseImportManager,
|
||||
ErrorReadingFileException,
|
||||
import_course_from_xml,
|
||||
import_library_from_xml,
|
||||
BlockFailedToImport,
|
||||
import_library_from_xml
|
||||
)
|
||||
|
||||
TASK_LOGGER = 'cms.djangoapps.contentstore.tasks.LOGGER'
|
||||
@@ -183,6 +184,18 @@ class ImportTestCase(CourseTestCase):
|
||||
with tarfile.open(self.good_tar, "w:gz") as gtar:
|
||||
gtar.add(good_dir)
|
||||
|
||||
self.good_zip = os.path.join(self.content_dir, "good.zip")
|
||||
with ZipFile(self.good_zip, "w") as gzip:
|
||||
for folder_name, subfolders, filenames in os.walk(good_dir):
|
||||
for filename in filenames:
|
||||
file_path = os.path.join(folder_name, filename)
|
||||
gzip.write(file_path, file_path[len(good_dir) + 1:])
|
||||
|
||||
self.good_archives = {
|
||||
'zip': self.good_zip,
|
||||
'tar': self.good_tar,
|
||||
}
|
||||
|
||||
# Bad course (no 'course.xml' file):
|
||||
bad_dir = tempfile.mkdtemp(dir=self.content_dir)
|
||||
touch(os.path.join(bad_dir, "bad.xml"))
|
||||
@@ -190,6 +203,17 @@ class ImportTestCase(CourseTestCase):
|
||||
with tarfile.open(self.bad_tar, "w:gz") as btar:
|
||||
btar.add(bad_dir)
|
||||
|
||||
self.bad_zip = os.path.join(self.content_dir, "bad.zip")
|
||||
with ZipFile(self.bad_zip, "w") as bzip:
|
||||
for folder_name, subfolders, filenames in os.walk(bad_dir):
|
||||
for filename in filenames:
|
||||
file_path = os.path.join(folder_name, filename)
|
||||
bzip.write(file_path, file_path[len(good_dir) + 1:])
|
||||
|
||||
self.bad_archives = {
|
||||
'zip': self.bad_zip,
|
||||
'tar': self.bad_tar,
|
||||
}
|
||||
self.unsafe_common_dir = path(tempfile.mkdtemp(dir=self.content_dir))
|
||||
self.log_prefix = f"Course import {self.course.id}:"
|
||||
|
||||
@@ -211,53 +235,60 @@ class ImportTestCase(CourseTestCase):
|
||||
if expected_message:
|
||||
self.assertEqual(response['Message'], expected_message)
|
||||
|
||||
def get_import_status(self, course_id, tarfile_path):
|
||||
def get_import_status(self, course_id, file_path):
|
||||
"""Helper method to get course import status."""
|
||||
resp = self.client.get(
|
||||
reverse_course_url(
|
||||
'import_status_handler',
|
||||
course_id,
|
||||
kwargs={'filename': os.path.split(tarfile_path)[1]}
|
||||
kwargs={'filename': os.path.split(file_path)[1]}
|
||||
)
|
||||
)
|
||||
return json.loads(resp.content)
|
||||
|
||||
def import_tarfile_in_course(self, tarfile_path):
|
||||
"""Helper method to import provided tarfile in the course."""
|
||||
with open(tarfile_path, 'rb') as gtar:
|
||||
args = {"name": tarfile_path, "course-data": [gtar]}
|
||||
def import_file_in_course(self, file_path):
|
||||
"""Helper method to import provided file in the course."""
|
||||
with open(file_path, 'rb') as file_data:
|
||||
args = {"name": file_path, "course-data": [file_data]}
|
||||
return self.client.post(self.url, args)
|
||||
|
||||
@patch(TASK_LOGGER)
|
||||
def test_no_coursexml(self, mocked_log):
|
||||
@ddt.data('tar', 'zip')
|
||||
def test_no_coursexml(self, fmt, mocked_log):
|
||||
"""
|
||||
Check that the response for a tar.gz import without a course.xml is
|
||||
Check that the response for a file import without a course.xml is
|
||||
correct.
|
||||
"""
|
||||
bad_file = self.bad_archives[fmt]
|
||||
error_msg = import_error.FILE_MISSING.format('course.xml')
|
||||
expected_error_mesg = f'{self.log_prefix} {error_msg}'
|
||||
response = self.import_tarfile_in_course(self.bad_tar)
|
||||
response = self.import_file_in_course(bad_file)
|
||||
|
||||
self.assertEqual(response.status_code, 200)
|
||||
mocked_log.error.assert_called_once_with(expected_error_mesg)
|
||||
|
||||
# Check that `import_status` returns the appropriate stage (i.e., the
|
||||
# stage at which import failed).
|
||||
resp_status = self.get_import_status(self.course.id, self.bad_tar)
|
||||
resp_status = self.get_import_status(self.course.id, bad_file)
|
||||
self.assertImportStatusResponse(resp_status, self.VerifyingError, error_msg)
|
||||
|
||||
def test_with_coursexml(self):
|
||||
@ddt.data('zip', 'tar')
|
||||
def test_with_coursexml(self, fmt):
|
||||
"""
|
||||
Check that the response for a tar.gz import with a course.xml is
|
||||
Check that the response for a file import with a course.xml is
|
||||
correct.
|
||||
"""
|
||||
response = self.import_tarfile_in_course(self.good_tar)
|
||||
good_file = self.good_archives[fmt]
|
||||
response = self.import_file_in_course(good_file)
|
||||
self.assertEqual(response.status_code, 200)
|
||||
|
||||
def test_import_in_existing_course(self):
|
||||
@ddt.data('zip', 'tar')
|
||||
def test_import_in_existing_course(self, fmt):
|
||||
"""
|
||||
Check that course is imported successfully in existing course and users have their access roles
|
||||
"""
|
||||
good_file = self.good_archives[fmt]
|
||||
|
||||
# Create a non_staff user and add it to course staff only
|
||||
__, nonstaff_user = self.create_non_staff_authed_user_client()
|
||||
auth.add_users(self.user, CourseStaffRole(self.course.id), nonstaff_user)
|
||||
@@ -267,7 +298,7 @@ class ImportTestCase(CourseTestCase):
|
||||
display_name_before_import = course.display_name
|
||||
|
||||
# Check that global staff user can import course
|
||||
response = self.import_tarfile_in_course(self.good_tar)
|
||||
response = self.import_file_in_course(good_file)
|
||||
self.assertEqual(response.status_code, 200)
|
||||
|
||||
course = self.store.get_course(self.course.id)
|
||||
@@ -283,7 +314,7 @@ class ImportTestCase(CourseTestCase):
|
||||
|
||||
# Now course staff user can also successfully import course
|
||||
self.client.login(username=nonstaff_user.username, password='foo')
|
||||
resp = self.import_tarfile_in_course(self.good_tar)
|
||||
resp = self.import_file_in_course(good_file)
|
||||
self.assertEqual(resp.status_code, 200)
|
||||
|
||||
# Now check that non_staff user has his same role
|
||||
@@ -375,7 +406,7 @@ class ImportTestCase(CourseTestCase):
|
||||
|
||||
def try_tar(tarpath):
|
||||
""" Attempt to tar an unacceptable file """
|
||||
resp = self.import_tarfile_in_course(tarpath)
|
||||
resp = self.import_file_in_course(tarpath)
|
||||
self.assertEqual(resp.status_code, 200)
|
||||
|
||||
resp = self.get_import_status(self.course.id, tarpath)
|
||||
@@ -452,8 +483,7 @@ class ImportTestCase(CourseTestCase):
|
||||
extract_dir_relative = path.relpath(extract_dir, settings.DATA_DIR)
|
||||
|
||||
try:
|
||||
with tarfile.open(path(TEST_DATA_DIR) / 'imports' / 'library.HhJfPD.tar.gz') as tar:
|
||||
safetar_extractall(tar, extract_dir)
|
||||
safe_extractall(path(TEST_DATA_DIR) / 'imports' / 'library.HhJfPD.tar.gz', extract_dir)
|
||||
library_items = import_library_from_xml(
|
||||
self.store,
|
||||
self.user.id,
|
||||
@@ -497,8 +527,7 @@ class ImportTestCase(CourseTestCase):
|
||||
extract_dir_relative = path.relpath(extract_dir, settings.DATA_DIR)
|
||||
|
||||
try:
|
||||
with tarfile.open(path(TEST_DATA_DIR) / 'imports' / 'library.HhJfPD.tar.gz') as tar:
|
||||
safetar_extractall(tar, extract_dir)
|
||||
safe_extractall(path(TEST_DATA_DIR) / 'imports' / 'library.HhJfPD.tar.gz', extract_dir)
|
||||
import_library_from_xml(
|
||||
self.store,
|
||||
self.user.id,
|
||||
@@ -530,8 +559,7 @@ class ImportTestCase(CourseTestCase):
|
||||
extract_dir_relative = path.relpath(extract_dir, settings.DATA_DIR)
|
||||
|
||||
try:
|
||||
with tarfile.open(path(TEST_DATA_DIR) / 'imports' / 'library.HhJfPD.tar.gz') as tar:
|
||||
safetar_extractall(tar, extract_dir)
|
||||
safe_extractall(path(TEST_DATA_DIR) / 'imports' / 'library.HhJfPD.tar.gz', extract_dir)
|
||||
import_library_from_xml(
|
||||
source_store,
|
||||
self.user.id,
|
||||
@@ -547,72 +575,83 @@ class ImportTestCase(CourseTestCase):
|
||||
shutil.rmtree(extract_dir)
|
||||
|
||||
@patch(TASK_LOGGER)
|
||||
def test_import_failed_with_no_user_permission(self, mocked_log):
|
||||
@ddt.data('zip', 'tar')
|
||||
def test_import_failed_with_no_user_permission(self, fmt, mocked_log):
|
||||
"""
|
||||
Tests course import failure when user have no permission
|
||||
"""
|
||||
good_file = self.good_archives[fmt]
|
||||
expected_error_mesg = f'{self.log_prefix} User permission denied: {self.user.username}'
|
||||
with patch('cms.djangoapps.contentstore.tasks.has_course_author_access', Mock(return_value=False)):
|
||||
response = self.import_tarfile_in_course(self.good_tar)
|
||||
response = self.import_file_in_course(good_file)
|
||||
self.assertEqual(response.status_code, 200)
|
||||
mocked_log.error.assert_called_once_with(expected_error_mesg)
|
||||
|
||||
status_response = self.get_import_status(self.course.id, self.good_tar)
|
||||
status_response = self.get_import_status(self.course.id, good_file)
|
||||
self.assertImportStatusResponse(status_response, self.UnpackingError, import_error.COURSE_PERMISSION_DENIED)
|
||||
|
||||
@patch(TASK_LOGGER)
|
||||
def test_import_failed_with_unknown_user(self, mocked_log):
|
||||
@ddt.data('zip', 'tar')
|
||||
def test_import_failed_with_unknown_user(self, fmt, mocked_log):
|
||||
"""
|
||||
Tests that course import failure with an unknown user id.
|
||||
"""
|
||||
expected_error_mesg = f'{self.log_prefix} Unknown User: {self.user.id}'
|
||||
|
||||
good_file = self.good_archives[fmt]
|
||||
with patch('django.contrib.auth.models.User.objects.get', side_effect=User.DoesNotExist):
|
||||
response = self.import_tarfile_in_course(self.good_tar)
|
||||
response = self.import_file_in_course(good_file)
|
||||
self.assertEqual(response.status_code, 200)
|
||||
mocked_log.error.assert_called_once_with(expected_error_mesg)
|
||||
|
||||
status_response = self.get_import_status(self.course.id, self.good_tar)
|
||||
status_response = self.get_import_status(self.course.id, good_file)
|
||||
self.assertImportStatusResponse(status_response, self.UnpackingError, import_error.USER_PERMISSION_DENIED)
|
||||
|
||||
@patch(TASK_LOGGER)
|
||||
def test_import_failed_with_unsafe_tarfile(self, mocked_log):
|
||||
@ddt.data('zip', 'tar')
|
||||
def test_import_failed_with_unsafe_file(self, fmt, mocked_log):
|
||||
"""
|
||||
Tests course import failure with unsafe tar file.
|
||||
Tests course import failure with unsafe file.
|
||||
"""
|
||||
expected_error_mesg = f'{self.log_prefix} Unsafe tar file'
|
||||
with patch('cms.djangoapps.contentstore.tasks.safetar_extractall', side_effect=SuspiciousOperation):
|
||||
response = self.import_tarfile_in_course(self.good_tar)
|
||||
good_file = self.good_archives[fmt]
|
||||
expected_error_mesg = f'{self.log_prefix} Unsafe archive file'
|
||||
with patch('cms.djangoapps.contentstore.tasks.safe_extractall', side_effect=SuspiciousOperation):
|
||||
response = self.import_file_in_course(good_file)
|
||||
|
||||
self.assertEqual(response.status_code, 200)
|
||||
mocked_log.error.assert_called_once_with(expected_error_mesg)
|
||||
|
||||
status_response = self.get_import_status(self.course.id, self.good_tar)
|
||||
self.assertImportStatusResponse(status_response, self.UnpackingError, import_error.UNSAFE_TAR_FILE)
|
||||
status_response = self.get_import_status(self.course.id, good_file)
|
||||
self.assertImportStatusResponse(status_response, self.UnpackingError, import_error.UNSAFE_ARCHIVE_FILE)
|
||||
|
||||
@patch(TASK_LOGGER)
|
||||
def test_import_failed_with_unknown_unpacking_error(self, mocked_log):
|
||||
@ddt.data('zip', 'tar')
|
||||
def test_import_failed_with_unknown_unpacking_error(self, fmt, mocked_log):
|
||||
"""
|
||||
Tests that course import failure for unknown error while unpacking
|
||||
"""
|
||||
good_file = self.good_archives[fmt]
|
||||
expected_error_mesg = f'{self.log_prefix} Unknown error while unpacking'
|
||||
with patch.object(course_import_export_storage, 'open', side_effect=Exception):
|
||||
response = self.import_tarfile_in_course(self.good_tar)
|
||||
response = self.import_file_in_course(good_file)
|
||||
|
||||
self.assertEqual(response.status_code, 200)
|
||||
mocked_log.exception.assert_called_once_with(expected_error_mesg, exc_info=True)
|
||||
|
||||
status_response = self.get_import_status(self.course.id, self.good_tar)
|
||||
status_response = self.get_import_status(self.course.id, good_file)
|
||||
self.assertImportStatusResponse(status_response, self.UnpackingError, import_error.UNKNOWN_ERROR_IN_UNPACKING)
|
||||
|
||||
@patch(TASK_LOGGER)
|
||||
@patch('olxcleaner.validate')
|
||||
@patch('cms.djangoapps.contentstore.tasks.report_error_summary')
|
||||
@patch('cms.djangoapps.contentstore.tasks.report_errors')
|
||||
def test_import_failed_with_olx_validations(self, mocked_report, mocked_summary, mocked_validate, mocked_log):
|
||||
@ddt.data('zip', 'tar')
|
||||
def test_import_failed_with_olx_validations(self, fmt, mocked_report, mocked_summary, mocked_validate,
|
||||
mocked_log):
|
||||
"""
|
||||
Tests that course import failure for unknown error while unpacking
|
||||
"""
|
||||
good_file = self.good_archives[fmt]
|
||||
errors = [Mock(description='DuplicateURLNameError', level_val=3)]
|
||||
mocked_summary.return_value = [f'ERROR {error.description} found in content' for error in errors]
|
||||
mocked_report.return_value = [f'Errors: {len(errors)}']
|
||||
@@ -621,12 +660,12 @@ class ImportTestCase(CourseTestCase):
|
||||
]
|
||||
expected_error_mesg = f'{self.log_prefix} CourseOlx validation failed.'
|
||||
with patch.dict(settings.FEATURES, ENABLE_COURSE_OLX_VALIDATION=True):
|
||||
response = self.import_tarfile_in_course(self.good_tar)
|
||||
response = self.import_file_in_course(good_file)
|
||||
|
||||
self.assertEqual(response.status_code, 200)
|
||||
mocked_log.error.assert_called_once_with(expected_error_mesg)
|
||||
|
||||
status_response = self.get_import_status(self.course.id, self.good_tar)
|
||||
status_response = self.get_import_status(self.course.id, good_file)
|
||||
self.assertImportStatusResponse(status_response, self.VerifyingError, import_error.OLX_VALIDATION_FAILED)
|
||||
|
||||
@patch(TASK_LOGGER)
|
||||
@@ -645,14 +684,16 @@ class ImportTestCase(CourseTestCase):
|
||||
"""
|
||||
Test that when course import fails with a known failure, user get a descriptive error message.
|
||||
"""
|
||||
mocked_import.side_effect = exc
|
||||
expected_exception_messages = f"{self.log_prefix} Error while importing course: {str(exc)}"
|
||||
response = self.import_tarfile_in_course(self.good_tar)
|
||||
self.assertEqual(response.status_code, 200)
|
||||
mocked_log.exception.assert_called_once_with(expected_exception_messages)
|
||||
for good_file in self.good_archives.values():
|
||||
mocked_import.side_effect = exc
|
||||
expected_exception_messages = f"{self.log_prefix} Error while importing course: {str(exc)}"
|
||||
response = self.import_file_in_course(good_file)
|
||||
self.assertEqual(response.status_code, 200)
|
||||
mocked_log.exception.assert_called_once_with(expected_exception_messages)
|
||||
mocked_log.reset_mock()
|
||||
|
||||
status_response = self.get_import_status(self.course.id, self.good_tar)
|
||||
self.assertImportStatusResponse(status_response, self.UpdatingError, expected_mesg)
|
||||
status_response = self.get_import_status(self.course.id, good_file)
|
||||
self.assertImportStatusResponse(status_response, self.UpdatingError, expected_mesg)
|
||||
|
||||
@patch(TASK_LOGGER)
|
||||
@patch.object(CourseImportManager, 'import_children')
|
||||
@@ -665,22 +706,26 @@ class ImportTestCase(CourseTestCase):
|
||||
"""
|
||||
Test that import status and logged exception when course import fails with an unknown failure.
|
||||
"""
|
||||
mocked_import.side_effect = exception
|
||||
expected_exc_mesg = f"{self.log_prefix} Error while importing course: {str(exception)}"
|
||||
response = self.import_tarfile_in_course(self.good_tar)
|
||||
self.assertEqual(response.status_code, 200)
|
||||
mocked_log.exception.assert_called_once_with(expected_exc_mesg)
|
||||
for good_file in self.good_archives.values():
|
||||
mocked_import.side_effect = exception
|
||||
expected_exc_mesg = f"{self.log_prefix} Error while importing course: {str(exception)}"
|
||||
response = self.import_file_in_course(good_file)
|
||||
self.assertEqual(response.status_code, 200)
|
||||
mocked_log.exception.assert_called_once_with(expected_exc_mesg)
|
||||
mocked_log.reset_mock()
|
||||
|
||||
status_response = self.get_import_status(self.course.id, self.good_tar)
|
||||
self.assertImportStatusResponse(status_response, self.UpdatingError, import_error.UNKNOWN_ERROR_IN_IMPORT)
|
||||
status_response = self.get_import_status(self.course.id, good_file)
|
||||
self.assertImportStatusResponse(status_response, self.UpdatingError, import_error.UNKNOWN_ERROR_IN_IMPORT)
|
||||
|
||||
def test_import_status_response_is_not_cached(self):
|
||||
@ddt.data('zip', 'tar')
|
||||
def test_import_status_response_is_not_cached(self, fmt):
|
||||
"""To test import_status endpoint response is not cached"""
|
||||
good_file = self.good_archives[fmt]
|
||||
resp = self.client.get(
|
||||
reverse_course_url(
|
||||
'import_status_handler',
|
||||
self.course.id,
|
||||
kwargs={'filename': os.path.split(self.good_tar)[1]}
|
||||
kwargs={'filename': os.path.split(good_file)[1]}
|
||||
)
|
||||
)
|
||||
self.assertEqual(resp.headers['Cache-Control'], 'no-cache, no-store, must-revalidate')
|
||||
|
||||
@@ -9,6 +9,8 @@ define([
|
||||
], function(domReady, Import, $, gettext) {
|
||||
'use strict';
|
||||
|
||||
const IMPORTABLE_FILE_TYPES = /\.tar\.gz$|\.zip$/;
|
||||
|
||||
return {
|
||||
Import: function(feedbackUrl, library) {
|
||||
var dbError,
|
||||
@@ -35,7 +37,7 @@ define([
|
||||
var filepath = $(this).val(),
|
||||
msg;
|
||||
|
||||
if (filepath.substr(filepath.length - 6, 6) === 'tar.gz') {
|
||||
if (IMPORTABLE_FILE_TYPES.test(filepath)) {
|
||||
$('.error-block').hide();
|
||||
$('.file-name').text($(this).val().replace('C:\\fakepath\\', ''));
|
||||
$('.file-name-block').show();
|
||||
@@ -44,7 +46,7 @@ define([
|
||||
$('.progress').show();
|
||||
} else {
|
||||
msg = gettext('File format not supported. Please upload a file with a {ext} extension.')
|
||||
.replace('{ext}', '<code>tar.gz</code>');
|
||||
.replace('{ext}', '<code>tar.gz or zip</code>');
|
||||
|
||||
$('.error-block').text(msg).show();
|
||||
}
|
||||
@@ -84,7 +86,7 @@ define([
|
||||
|
||||
file = data.files[0];
|
||||
|
||||
if (file.name.match(/tar\.gz$/)) {
|
||||
if (IMPORTABLE_FILE_TYPES.test(file.name)) {
|
||||
$submitBtn.click(function(event) {
|
||||
event.preventDefault();
|
||||
|
||||
|
||||
@@ -45,13 +45,14 @@ else:
|
||||
<article class="content-primary" role="main">
|
||||
<div class="introduction">
|
||||
## Translators: ".tar.gz" is a file extension, and files with that extension are called "gzipped tar files": these terms should not be translated
|
||||
## Translators: ".zip" is a file extension, and files with that extension are called "zipped files": these terms should not be translated
|
||||
%if library:
|
||||
<p>${Text(_("Be sure you want to import a library before continuing. The contents of the imported library will replace the contents of the existing library. {em_start}You cannot undo a library import{em_end}. Before you proceed, we recommend that you export the current library, so that you have a backup copy of it.")).format(em_start=HTML('<strong>'), em_end=HTML("</strong>"))}</p>
|
||||
<p>${_("The library that you import must be in a .tar.gz file (that is, a .tar file compressed with GNU Zip). This .tar.gz file must contain a library.xml file. It may also contain other files.")}</p>
|
||||
<p>${_("The import process has five stages. During the first two stages, you must stay on this page. You can leave this page after the Unpacking stage has completed. We recommend, however, that you don't make important changes to your library until the import operation has completed.")}</p>
|
||||
%else:
|
||||
<p>${Text(_("Be sure you want to import a course before continuing. The contents of the imported course will replace the contents of the existing course. {em_start}You cannot undo a course import{em_end}. Before you proceed, we recommend that you export the current course, so that you have a backup copy of it.")).format(em_start=HTML('<strong>'), em_end=HTML("</strong>"))}</p>
|
||||
<p>${_("The course that you import must be in a .tar.gz file (that is, a .tar file compressed with GNU Zip). This .tar.gz file must contain a course.xml file. It may also contain other files.")}</p>
|
||||
<p>${_("The course that you import must be in a .tar.gz file (that is, a .tar file compressed with GNU Zip) or .zip (that is a compressed file). This .tar.gz or .zip file must contain a course.xml file. It may also contain other files.")}</p>
|
||||
<p>${_("The import process has five stages. During the first two stages, you must stay on this page. You can leave this page after the Unpacking stage has completed. We recommend, however, that you don't make important changes to your course until the import operation has completed.")}</p>
|
||||
%endif
|
||||
|
||||
@@ -60,11 +61,12 @@ else:
|
||||
<form id="fileupload" method="post" enctype="multipart/form-data" class="import-form">
|
||||
|
||||
## Translators: ".tar.gz" is a file extension, and files with that extension are called "gzipped tar files": these terms should not be translated
|
||||
## Translators: ".zip" is a file extension, and files with that extension are called "zipped files": these terms should not be translated
|
||||
<h2 class="title">
|
||||
%if library:
|
||||
${_("Select a .tar.gz File to Replace Your Library Content")}
|
||||
%else:
|
||||
${_("Select a .tar.gz File to Replace Your Course Content")}
|
||||
${_("Select a .tar.gz or .zip File to Replace Your Course Content")}
|
||||
%endif
|
||||
</h2>
|
||||
|
||||
|
||||
109
openedx/core/lib/extract_archive.py
Normal file
109
openedx/core/lib/extract_archive.py
Normal file
@@ -0,0 +1,109 @@
|
||||
"""
|
||||
Safe version of extractall which does not extract any files that would
|
||||
be, or symlink to a file that is, outside of the directory extracted in.
|
||||
|
||||
Adapted from:
|
||||
http://stackoverflow.com/questions/10060069/safely-extract-zip-or-tar-using-python
|
||||
"""
|
||||
|
||||
import logging
|
||||
from os.path import abspath, dirname
|
||||
from os.path import join as joinpath
|
||||
from os.path import realpath
|
||||
from typing import List, Union
|
||||
from zipfile import ZipFile, ZipInfo
|
||||
from tarfile import TarFile, TarInfo
|
||||
|
||||
from django.conf import settings
|
||||
from django.core.exceptions import SuspiciousOperation
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def resolved(rpath):
|
||||
"""
|
||||
Returns the canonical absolute path of `rpath`.
|
||||
"""
|
||||
return realpath(abspath(rpath))
|
||||
|
||||
|
||||
def _is_bad_path(path, base):
|
||||
"""
|
||||
Is (the canonical absolute path of) `path` outside `base`?
|
||||
"""
|
||||
return not resolved(joinpath(base, path)).startswith(base)
|
||||
|
||||
|
||||
def _is_bad_link(info, base):
|
||||
"""
|
||||
Does the file sym- or hard-link to files outside `base`?
|
||||
"""
|
||||
# Links are interpreted relative to the directory containing the link
|
||||
tip = resolved(joinpath(base, dirname(info.name)))
|
||||
return _is_bad_path(info.linkname, base=tip)
|
||||
|
||||
|
||||
def _check_tarinfo(finfo: TarInfo, base: str):
|
||||
"""
|
||||
Checks a file in a tar archive (TarInfo object) for safety.
|
||||
|
||||
It ensures that the file isn't a hard link or symlink to a file pointing to
|
||||
a path outside the archive and checks that the file isn't a device file.
|
||||
|
||||
Raises:
|
||||
SuspiciousOperation: If the TarInfo object is found to be a
|
||||
hard link, symlink, or a special device file.
|
||||
"""
|
||||
if finfo.issym() and _is_bad_link(finfo, base):
|
||||
log.debug("File %r is blocked: Hard link to %r", finfo.name, finfo.linkname)
|
||||
raise SuspiciousOperation("Hard link")
|
||||
if finfo.islnk() and _is_bad_link(finfo, base):
|
||||
log.debug("File %r is blocked: Symlink to %r", finfo.name, finfo.linkname)
|
||||
raise SuspiciousOperation("Symlink")
|
||||
if finfo.isdev():
|
||||
log.debug("File %r is blocked: FIFO, device or character file", finfo.name)
|
||||
raise SuspiciousOperation("Dev file")
|
||||
|
||||
|
||||
def _checkmembers(members: Union[List[ZipInfo], List[TarInfo]], base: str):
|
||||
"""
|
||||
Check that all elements of the archive file are safe.
|
||||
"""
|
||||
base = resolved(base)
|
||||
|
||||
# check that we're not trying to import outside of the github_repo_root
|
||||
if not base.startswith(resolved(settings.GITHUB_REPO_ROOT)):
|
||||
raise SuspiciousOperation("Attempted to import course outside of data dir")
|
||||
|
||||
for finfo in members:
|
||||
if isinstance(finfo, ZipInfo):
|
||||
filename = finfo.filename
|
||||
elif isinstance(finfo, TarInfo):
|
||||
filename = finfo.name
|
||||
_check_tarinfo(finfo, base)
|
||||
if _is_bad_path(filename, base):
|
||||
log.debug("File %r is blocked (illegal path)", filename)
|
||||
raise SuspiciousOperation("Illegal path")
|
||||
|
||||
|
||||
def safe_extractall(file_name, output_path):
|
||||
"""
|
||||
Extract Zip or Tar files
|
||||
"""
|
||||
archive = None
|
||||
if not output_path.endswith("/"):
|
||||
output_path += "/"
|
||||
try:
|
||||
if file_name.endswith(".zip"):
|
||||
archive = ZipFile(file_name, "r")
|
||||
members = archive.infolist()
|
||||
elif file_name.endswith(".tar.gz"):
|
||||
archive = TarFile.open(file_name)
|
||||
members = archive.getmembers()
|
||||
else:
|
||||
raise ValueError("Unsupported archive format")
|
||||
_checkmembers(members, output_path)
|
||||
archive.extractall(output_path)
|
||||
finally:
|
||||
if archive:
|
||||
archive.close()
|
||||
@@ -1,77 +0,0 @@
|
||||
"""
|
||||
Safe version of tarfile.extractall which does not extract any files that would
|
||||
be, or symlink to a file that is, outside of the directory extracted in.
|
||||
|
||||
Adapted from:
|
||||
http://stackoverflow.com/questions/10060069/safely-extract-zip-or-tar-using-python
|
||||
"""
|
||||
|
||||
import logging
|
||||
from os.path import join as joinpath
|
||||
from os.path import abspath, dirname, realpath
|
||||
|
||||
from django.conf import settings
|
||||
from django.core.exceptions import SuspiciousOperation
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def resolved(rpath):
|
||||
"""
|
||||
Returns the canonical absolute path of `rpath`.
|
||||
"""
|
||||
return realpath(abspath(rpath))
|
||||
|
||||
|
||||
def _is_bad_path(path, base):
|
||||
"""
|
||||
Is (the canonical absolute path of) `path` outside `base`?
|
||||
"""
|
||||
return not resolved(joinpath(base, path)).startswith(base)
|
||||
|
||||
|
||||
def _is_bad_link(info, base):
|
||||
"""
|
||||
Does the file sym- or hard-link to files outside `base`?
|
||||
"""
|
||||
# Links are interpreted relative to the directory containing the link
|
||||
tip = resolved(joinpath(base, dirname(info.name)))
|
||||
return _is_bad_path(info.linkname, base=tip)
|
||||
|
||||
|
||||
def safemembers(members, base):
|
||||
"""
|
||||
Check that all elements of a tar file are safe.
|
||||
"""
|
||||
|
||||
base = resolved(base)
|
||||
|
||||
# check that we're not trying to import outside of the github_repo_root
|
||||
if not base.startswith(resolved(settings.GITHUB_REPO_ROOT)):
|
||||
raise SuspiciousOperation("Attempted to import course outside of data dir")
|
||||
|
||||
for finfo in members:
|
||||
if _is_bad_path(finfo.name, base): # lint-amnesty, pylint: disable=no-else-raise
|
||||
log.debug("File %r is blocked (illegal path)", finfo.name)
|
||||
raise SuspiciousOperation("Illegal path")
|
||||
elif finfo.issym() and _is_bad_link(finfo, base):
|
||||
log.debug("File %r is blocked: Hard link to %r", finfo.name, finfo.linkname)
|
||||
raise SuspiciousOperation("Hard link")
|
||||
elif finfo.islnk() and _is_bad_link(finfo, base):
|
||||
log.debug("File %r is blocked: Symlink to %r", finfo.name,
|
||||
finfo.linkname)
|
||||
raise SuspiciousOperation("Symlink")
|
||||
elif finfo.isdev():
|
||||
log.debug("File %r is blocked: FIFO, device or character file",
|
||||
finfo.name)
|
||||
raise SuspiciousOperation("Dev file")
|
||||
|
||||
return members
|
||||
|
||||
|
||||
def safetar_extractall(tar_file, path=".", members=None): # pylint: disable=unused-argument
|
||||
"""
|
||||
Safe version of `tar_file.extractall()`.
|
||||
"""
|
||||
path = str(path)
|
||||
return tar_file.extractall(path, safemembers(tar_file, path))
|
||||
Reference in New Issue
Block a user