diff --git a/cms/djangoapps/contentstore/tests/test_import_export.py b/cms/djangoapps/contentstore/tests/test_import_export.py index 05f2b0b7b9..267bdb15b1 100644 --- a/cms/djangoapps/contentstore/tests/test_import_export.py +++ b/cms/djangoapps/contentstore/tests/test_import_export.py @@ -6,6 +6,7 @@ import shutil import tarfile import tempfile import copy +from path import path from uuid import uuid4 from pymongo import MongoClient @@ -19,6 +20,7 @@ from xmodule.contentstore.django import _CONTENTSTORE TEST_DATA_CONTENTSTORE = copy.deepcopy(settings.CONTENTSTORE) TEST_DATA_CONTENTSTORE['OPTIONS']['db'] = 'test_xcontent_%s' % uuid4().hex + @override_settings(CONTENTSTORE=TEST_DATA_CONTENTSTORE) class ImportTestCase(CourseTestCase): """ @@ -32,7 +34,7 @@ class ImportTestCase(CourseTestCase): 'course': self.course.location.course, 'name': self.course.location.name, }) - self.content_dir = tempfile.mkdtemp() + self.content_dir = path(tempfile.mkdtemp()) def touch(name): """ Equivalent to shell's 'touch'""" @@ -60,11 +62,15 @@ class ImportTestCase(CourseTestCase): with tarfile.open(self.bad_tar, "w:gz") as btar: btar.add(bad_dir) + self.unsafe_common_dir = path(tempfile.mkdtemp(dir=self.content_dir)) + + def tearDown(self): shutil.rmtree(self.content_dir) MongoClient().drop_database(TEST_DATA_CONTENTSTORE['OPTIONS']['db']) _CONTENTSTORE.clear() + def test_no_coursexml(self): """ Check that the response for a tar.gz import without a course.xml is @@ -92,3 +98,88 @@ class ImportTestCase(CourseTestCase): "course-data": [gtar] }) self.assertEquals(resp.status_code, 200) + + ## Unsafe tar methods ##################################################### + # Each of these methods creates a tarfile with a single type of unsafe + # content. + + def _fifo_tar(self): + """ + Tar file with FIFO + """ + fifop = self.unsafe_common_dir / "fifo.file" + fifo_tar = self.unsafe_common_dir / "fifo.tar.gz" + os.mkfifo(fifop) + with tarfile.open(fifo_tar, "w:gz") as tar: + tar.add(fifop) + + return fifo_tar + + def _symlink_tar(self): + """ + Tarfile with symlink to path outside directory. + """ + outsidep = self.unsafe_common_dir / "unsafe_file.txt" + symlinkp = self.unsafe_common_dir / "symlink.txt" + symlink_tar = self.unsafe_common_dir / "symlink.tar.gz" + outsidep.symlink(symlinkp) + with tarfile.open(symlink_tar, "w:gz" ) as tar: + tar.add(symlinkp) + + return symlink_tar + + def _outside_tar(self): + """ + Tarfile with file that extracts to outside directory. + + Extracting this tarfile in directory will put its contents + directly in (rather than ). + """ + outside_tar = self.unsafe_common_dir / "unsafe_file.tar.gz" + with tarfile.open(outside_tar, "w:gz") as tar: + tar.addfile(tarfile.TarInfo(str(self.content_dir / "a_file"))) + + return outside_tar + + def _outside_tar2(self): + """ + Tarfile with file that extracts to outside directory. + + The path here matches the basename (`self.unsafe_common_dir`), but + then "cd's out". E.g. "/usr/../etc" == "/etc", but the naive basename + of the first (but not the second) is "/usr" + + Extracting this tarfile in directory will also put its contents + directly in (rather than ). + """ + outside_tar = self.unsafe_common_dir / "unsafe_file.tar.gz" + with tarfile.open(outside_tar, "w:gz") as tar: + tar.addfile(tarfile.TarInfo(str(self.unsafe_common_dir / "../a_file"))) + + return outside_tar + + def test_unsafe_tar(self): + """ + Check that safety measure work. + + This includes: + 'tarbombs' which include files or symlinks with paths + outside or directly in the working directory, + 'special files' (character device, block device or FIFOs), + + all raise exceptions/400s. + """ + + def try_tar(tarpath): + with open(tarpath) as tar: + resp = self.client.post( + self.url, + { "name": tarpath, "course-data": [tar] } + ) + self.assertEquals(resp.status_code, 400) + self.assertTrue("SuspiciousFileOperation" in resp.content) + + try_tar(self._fifo_tar()) + try_tar(self._symlink_tar()) + try_tar(self._outside_tar()) + try_tar(self._outside_tar2()) diff --git a/cms/djangoapps/contentstore/views/import_export.py b/cms/djangoapps/contentstore/views/import_export.py index df6d960bba..7d459f3b41 100644 --- a/cms/djangoapps/contentstore/views/import_export.py +++ b/cms/djangoapps/contentstore/views/import_export.py @@ -19,6 +19,7 @@ from django.core.urlresolvers import reverse from django.core.servers.basehttp import FileWrapper from django.core.files.temp import NamedTemporaryFile from django.views.decorators.http import require_http_methods +from django.core.exceptions import SuspiciousOperation from mitxmako.shortcuts import render_to_response from auth.authz import create_all_course_groups @@ -32,6 +33,7 @@ from xmodule.exceptions import SerializationError from .access import get_location_and_verify_access from util.json_request import JsonResponse +from extract_tar import safetar_extractall __all__ = ['import_course', 'generate_export_course', 'export_course'] @@ -154,7 +156,16 @@ def import_course(request, org, course, name): sf.write("Extracting") tar_file = tarfile.open(temp_filepath) - tar_file.extractall((course_dir + '/').encode('utf-8')) + try: + safetar_extractall(tar_file, (course_dir + '/').encode('utf-8')) + except SuspiciousOperation as exc: + return JsonResponse( + { + 'ErrMsg': 'Unsafe tar file. Aborting import.', + 'SuspiciousFileOperationMsg': exc.args[0] + }, + status=400 + ) with open(status_file, 'w+') as sf: sf.write("Verifying") diff --git a/common/lib/extract_tar.py b/common/lib/extract_tar.py new file mode 100644 index 0000000000..78a210fac4 --- /dev/null +++ b/common/lib/extract_tar.py @@ -0,0 +1,63 @@ +""" +Safe version of tarfile.extractall which does not extract any files that would +be, or symlink to a file that is, outside of the directory extracted in. + +Adapted from: +http://stackoverflow.com/questions/10060069/safely-extract-zip-or-tar-using-python +""" +from os.path import abspath, realpath, dirname, join as joinpath +from django.core.exceptions import SuspiciousOperation +import logging + +log = logging.getLogger(__name__) #pylint: disable=C0103 + +def resolved(rpath): + """ + Returns the canonical absolute path of `rpath`. + """ + return realpath(abspath(rpath)) + +def _is_bad_path(path, base): + """ + Is (the canonical absolute path of) `path` outside `base`? + """ + return not resolved(joinpath(base, path)).startswith(base) + +def _is_bad_link(info, base): + """ + Does the file sym- ord hard-link to files outside `base`? + """ + # Links are interpreted relative to the directory containing the link + tip = resolved(joinpath(base, dirname(info.name))) + return _is_bad_path(info.linkname, base=tip) + +def safemembers(members): + """ + Check that all elements of a tar file are safe. + """ + + base = resolved(".") + + for finfo in members: + if _is_bad_path(finfo.name, base): + log.debug("File %r is blocked (illegal path)", finfo.name) + raise SuspiciousOperation("Illegal path") + elif finfo.issym() and _is_bad_link(finfo, base): + log.debug( "File %r is blocked: Hard link to %r", finfo.name, finfo.linkname) + raise SuspiciousOperation("Hard link") + elif finfo.islnk() and _is_bad_link(finfo, base): + log.debug("File %r is blocked: Symlink to %r", finfo.name, + finfo.linkname) + raise SuspiciousOperation("Symlink") + elif finfo.isdev(): + log.debug("File %r is blocked: FIFO, device or character file", + finfo.name) + raise SuspiciousOperation("Dev file") + + return members + +def safetar_extractall(tarf, *args, **kwargs): + """ + Safe version of `tarf.extractall()`. + """ + return tarf.extractall(members=safemembers(tarf), *args, **kwargs)