diff --git a/cms/djangoapps/contentstore/tests/test_import_export.py b/cms/djangoapps/contentstore/tests/test_import_export.py
index 05f2b0b7b9..267bdb15b1 100644
--- a/cms/djangoapps/contentstore/tests/test_import_export.py
+++ b/cms/djangoapps/contentstore/tests/test_import_export.py
@@ -6,6 +6,7 @@ import shutil
import tarfile
import tempfile
import copy
+from path import path
from uuid import uuid4
from pymongo import MongoClient
@@ -19,6 +20,7 @@ from xmodule.contentstore.django import _CONTENTSTORE
TEST_DATA_CONTENTSTORE = copy.deepcopy(settings.CONTENTSTORE)
TEST_DATA_CONTENTSTORE['OPTIONS']['db'] = 'test_xcontent_%s' % uuid4().hex
+
@override_settings(CONTENTSTORE=TEST_DATA_CONTENTSTORE)
class ImportTestCase(CourseTestCase):
"""
@@ -32,7 +34,7 @@ class ImportTestCase(CourseTestCase):
'course': self.course.location.course,
'name': self.course.location.name,
})
- self.content_dir = tempfile.mkdtemp()
+ self.content_dir = path(tempfile.mkdtemp())
def touch(name):
""" Equivalent to shell's 'touch'"""
@@ -60,11 +62,15 @@ class ImportTestCase(CourseTestCase):
with tarfile.open(self.bad_tar, "w:gz") as btar:
btar.add(bad_dir)
+ self.unsafe_common_dir = path(tempfile.mkdtemp(dir=self.content_dir))
+
+
def tearDown(self):
shutil.rmtree(self.content_dir)
MongoClient().drop_database(TEST_DATA_CONTENTSTORE['OPTIONS']['db'])
_CONTENTSTORE.clear()
+
def test_no_coursexml(self):
"""
Check that the response for a tar.gz import without a course.xml is
@@ -92,3 +98,88 @@ class ImportTestCase(CourseTestCase):
"course-data": [gtar]
})
self.assertEquals(resp.status_code, 200)
+
+ ## Unsafe tar methods #####################################################
+ # Each of these methods creates a tarfile with a single type of unsafe
+ # content.
+
+ def _fifo_tar(self):
+ """
+ Tar file with FIFO
+ """
+ fifop = self.unsafe_common_dir / "fifo.file"
+ fifo_tar = self.unsafe_common_dir / "fifo.tar.gz"
+ os.mkfifo(fifop)
+ with tarfile.open(fifo_tar, "w:gz") as tar:
+ tar.add(fifop)
+
+ return fifo_tar
+
+ def _symlink_tar(self):
+ """
+ Tarfile with symlink to path outside directory.
+ """
+ outsidep = self.unsafe_common_dir / "unsafe_file.txt"
+ symlinkp = self.unsafe_common_dir / "symlink.txt"
+ symlink_tar = self.unsafe_common_dir / "symlink.tar.gz"
+ outsidep.symlink(symlinkp)
+ with tarfile.open(symlink_tar, "w:gz" ) as tar:
+ tar.add(symlinkp)
+
+ return symlink_tar
+
+ def _outside_tar(self):
+ """
+ Tarfile with file that extracts to outside directory.
+
+ Extracting this tarfile in directory
will put its contents
+ directly in (rather than ).
+ """
+ outside_tar = self.unsafe_common_dir / "unsafe_file.tar.gz"
+ with tarfile.open(outside_tar, "w:gz") as tar:
+ tar.addfile(tarfile.TarInfo(str(self.content_dir / "a_file")))
+
+ return outside_tar
+
+ def _outside_tar2(self):
+ """
+ Tarfile with file that extracts to outside directory.
+
+ The path here matches the basename (`self.unsafe_common_dir`), but
+ then "cd's out". E.g. "/usr/../etc" == "/etc", but the naive basename
+ of the first (but not the second) is "/usr"
+
+ Extracting this tarfile in directory will also put its contents
+ directly in (rather than ).
+ """
+ outside_tar = self.unsafe_common_dir / "unsafe_file.tar.gz"
+ with tarfile.open(outside_tar, "w:gz") as tar:
+ tar.addfile(tarfile.TarInfo(str(self.unsafe_common_dir / "../a_file")))
+
+ return outside_tar
+
+ def test_unsafe_tar(self):
+ """
+ Check that safety measure work.
+
+ This includes:
+ 'tarbombs' which include files or symlinks with paths
+ outside or directly in the working directory,
+ 'special files' (character device, block device or FIFOs),
+
+ all raise exceptions/400s.
+ """
+
+ def try_tar(tarpath):
+ with open(tarpath) as tar:
+ resp = self.client.post(
+ self.url,
+ { "name": tarpath, "course-data": [tar] }
+ )
+ self.assertEquals(resp.status_code, 400)
+ self.assertTrue("SuspiciousFileOperation" in resp.content)
+
+ try_tar(self._fifo_tar())
+ try_tar(self._symlink_tar())
+ try_tar(self._outside_tar())
+ try_tar(self._outside_tar2())
diff --git a/cms/djangoapps/contentstore/views/import_export.py b/cms/djangoapps/contentstore/views/import_export.py
index df6d960bba..7d459f3b41 100644
--- a/cms/djangoapps/contentstore/views/import_export.py
+++ b/cms/djangoapps/contentstore/views/import_export.py
@@ -19,6 +19,7 @@ from django.core.urlresolvers import reverse
from django.core.servers.basehttp import FileWrapper
from django.core.files.temp import NamedTemporaryFile
from django.views.decorators.http import require_http_methods
+from django.core.exceptions import SuspiciousOperation
from mitxmako.shortcuts import render_to_response
from auth.authz import create_all_course_groups
@@ -32,6 +33,7 @@ from xmodule.exceptions import SerializationError
from .access import get_location_and_verify_access
from util.json_request import JsonResponse
+from extract_tar import safetar_extractall
__all__ = ['import_course', 'generate_export_course', 'export_course']
@@ -154,7 +156,16 @@ def import_course(request, org, course, name):
sf.write("Extracting")
tar_file = tarfile.open(temp_filepath)
- tar_file.extractall((course_dir + '/').encode('utf-8'))
+ try:
+ safetar_extractall(tar_file, (course_dir + '/').encode('utf-8'))
+ except SuspiciousOperation as exc:
+ return JsonResponse(
+ {
+ 'ErrMsg': 'Unsafe tar file. Aborting import.',
+ 'SuspiciousFileOperationMsg': exc.args[0]
+ },
+ status=400
+ )
with open(status_file, 'w+') as sf:
sf.write("Verifying")
diff --git a/common/lib/extract_tar.py b/common/lib/extract_tar.py
new file mode 100644
index 0000000000..78a210fac4
--- /dev/null
+++ b/common/lib/extract_tar.py
@@ -0,0 +1,63 @@
+"""
+Safe version of tarfile.extractall which does not extract any files that would
+be, or symlink to a file that is, outside of the directory extracted in.
+
+Adapted from:
+http://stackoverflow.com/questions/10060069/safely-extract-zip-or-tar-using-python
+"""
+from os.path import abspath, realpath, dirname, join as joinpath
+from django.core.exceptions import SuspiciousOperation
+import logging
+
+log = logging.getLogger(__name__) #pylint: disable=C0103
+
+def resolved(rpath):
+ """
+ Returns the canonical absolute path of `rpath`.
+ """
+ return realpath(abspath(rpath))
+
+def _is_bad_path(path, base):
+ """
+ Is (the canonical absolute path of) `path` outside `base`?
+ """
+ return not resolved(joinpath(base, path)).startswith(base)
+
+def _is_bad_link(info, base):
+ """
+ Does the file sym- ord hard-link to files outside `base`?
+ """
+ # Links are interpreted relative to the directory containing the link
+ tip = resolved(joinpath(base, dirname(info.name)))
+ return _is_bad_path(info.linkname, base=tip)
+
+def safemembers(members):
+ """
+ Check that all elements of a tar file are safe.
+ """
+
+ base = resolved(".")
+
+ for finfo in members:
+ if _is_bad_path(finfo.name, base):
+ log.debug("File %r is blocked (illegal path)", finfo.name)
+ raise SuspiciousOperation("Illegal path")
+ elif finfo.issym() and _is_bad_link(finfo, base):
+ log.debug( "File %r is blocked: Hard link to %r", finfo.name, finfo.linkname)
+ raise SuspiciousOperation("Hard link")
+ elif finfo.islnk() and _is_bad_link(finfo, base):
+ log.debug("File %r is blocked: Symlink to %r", finfo.name,
+ finfo.linkname)
+ raise SuspiciousOperation("Symlink")
+ elif finfo.isdev():
+ log.debug("File %r is blocked: FIFO, device or character file",
+ finfo.name)
+ raise SuspiciousOperation("Dev file")
+
+ return members
+
+def safetar_extractall(tarf, *args, **kwargs):
+ """
+ Safe version of `tarf.extractall()`.
+ """
+ return tarf.extractall(members=safemembers(tarf), *args, **kwargs)