Files
edx-platform/openedx/core/lib/extract_archive.py
2024-03-08 18:51:35 +01:00

110 lines
3.5 KiB
Python

"""
Safe version of extractall which does not extract any files that would
be, or symlink to a file that is, outside of the directory extracted in.
Adapted from:
http://stackoverflow.com/questions/10060069/safely-extract-zip-or-tar-using-python
"""
import logging
from os.path import abspath, dirname
from os.path import join as joinpath
from os.path import realpath
from typing import List, Union
from zipfile import ZipFile, ZipInfo
from tarfile import TarFile, TarInfo
from django.conf import settings
from django.core.exceptions import SuspiciousOperation
log = logging.getLogger(__name__)
def resolved(rpath):
"""
Returns the canonical absolute path of `rpath`.
"""
return realpath(abspath(rpath))
def _is_bad_path(path, base):
"""
Is (the canonical absolute path of) `path` outside `base`?
"""
return not resolved(joinpath(base, path)).startswith(base)
def _is_bad_link(info, base):
"""
Does the file sym- or hard-link to files outside `base`?
"""
# Links are interpreted relative to the directory containing the link
tip = resolved(joinpath(base, dirname(info.name)))
return _is_bad_path(info.linkname, base=tip)
def _check_tarinfo(finfo: TarInfo, base: str):
"""
Checks a file in a tar archive (TarInfo object) for safety.
It ensures that the file isn't a hard link or symlink to a file pointing to
a path outside the archive and checks that the file isn't a device file.
Raises:
SuspiciousOperation: If the TarInfo object is found to be a
hard link, symlink, or a special device file.
"""
if finfo.issym() and _is_bad_link(finfo, base):
log.debug("File %r is blocked: Hard link to %r", finfo.name, finfo.linkname)
raise SuspiciousOperation("Hard link")
if finfo.islnk() and _is_bad_link(finfo, base):
log.debug("File %r is blocked: Symlink to %r", finfo.name, finfo.linkname)
raise SuspiciousOperation("Symlink")
if finfo.isdev():
log.debug("File %r is blocked: FIFO, device or character file", finfo.name)
raise SuspiciousOperation("Dev file")
def _checkmembers(members: Union[List[ZipInfo], List[TarInfo]], base: str):
"""
Check that all elements of the archive file are safe.
"""
base = resolved(base)
# check that we're not trying to import outside of the github_repo_root
if not base.startswith(resolved(settings.GITHUB_REPO_ROOT)):
raise SuspiciousOperation("Attempted to import course outside of data dir")
for finfo in members:
if isinstance(finfo, ZipInfo):
filename = finfo.filename
elif isinstance(finfo, TarInfo):
filename = finfo.name
_check_tarinfo(finfo, base)
if _is_bad_path(filename, base):
log.debug("File %r is blocked (illegal path)", filename)
raise SuspiciousOperation("Illegal path")
def safe_extractall(file_name, output_path):
"""
Extract Zip or Tar files
"""
archive = None
if not output_path.endswith("/"):
output_path += "/"
try:
if file_name.endswith(".zip"):
archive = ZipFile(file_name, "r")
members = archive.infolist()
elif file_name.endswith(".tar.gz"):
archive = TarFile.open(file_name)
members = archive.getmembers()
else:
raise ValueError("Unsupported archive format")
_checkmembers(members, output_path)
archive.extractall(output_path)
finally:
if archive:
archive.close()