Convert between export formats.
STUD-1196
This commit is contained in:
@@ -0,0 +1,71 @@
|
||||
"""
|
||||
Script for converting a tar.gz file representing an exported course
|
||||
to the archive format used by a different version of export.
|
||||
|
||||
Sample invocation: ./manage.py export_convert_format mycourse.tar.gz ~/newformat/
|
||||
"""
|
||||
import os
|
||||
from path import path
|
||||
from django.core.management.base import BaseCommand, CommandError
|
||||
|
||||
from tempfile import mkdtemp
|
||||
import tarfile
|
||||
import shutil
|
||||
from extract_tar import safetar_extractall
|
||||
|
||||
from xmodule.modulestore.xml_exporter import convert_between_versions
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
"""
|
||||
Convert between export formats.
|
||||
"""
|
||||
help = 'Convert between versions 0 and 1 of the course export format'
|
||||
args = '<tar.gz archive file> <output path>'
|
||||
|
||||
def handle(self, *args, **options):
|
||||
"Execute the command"
|
||||
if len(args) != 2:
|
||||
raise CommandError("export requires two arguments: <tar.gz file> <output path>")
|
||||
|
||||
source_archive = args[0]
|
||||
output_path = args[1]
|
||||
|
||||
# Create temp directories to extract the source and create the target archive.
|
||||
temp_source_dir = mkdtemp()
|
||||
temp_target_dir = mkdtemp()
|
||||
try:
|
||||
extract_source(source_archive, temp_source_dir)
|
||||
|
||||
desired_version = convert_between_versions(temp_source_dir, temp_target_dir)
|
||||
|
||||
# New zip up the target directory.
|
||||
parts = os.path.basename(source_archive).split('.')
|
||||
archive_name = path(output_path) / "{source_name}_version_{desired_version}.tar.gz".format(
|
||||
source_name=parts[0], desired_version=desired_version
|
||||
)
|
||||
with open(archive_name, "w"):
|
||||
tar_file = tarfile.open(archive_name, mode='w:gz')
|
||||
try:
|
||||
for item in os.listdir(temp_target_dir):
|
||||
tar_file.add(path(temp_target_dir) / item, arcname=item)
|
||||
|
||||
finally:
|
||||
tar_file.close()
|
||||
|
||||
print("Created archive {0}".format(archive_name))
|
||||
|
||||
except ValueError as err:
|
||||
raise CommandError(err)
|
||||
|
||||
finally:
|
||||
shutil.rmtree(temp_source_dir)
|
||||
shutil.rmtree(temp_target_dir)
|
||||
|
||||
|
||||
def extract_source(source_archive, target):
|
||||
"""
|
||||
Extract the archive into the given target directory.
|
||||
"""
|
||||
with tarfile.open(source_archive) as tar_file:
|
||||
safetar_extractall(tar_file, target)
|
||||
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,65 @@
|
||||
"""
|
||||
Test for export_convert_format.
|
||||
"""
|
||||
from unittest import TestCase
|
||||
from django.core.management import call_command, CommandError
|
||||
from tempfile import mkdtemp
|
||||
import shutil
|
||||
from path import path
|
||||
from contentstore.management.commands.export_convert_format import Command, extract_source
|
||||
from xmodule.tests.helpers import directories_equal
|
||||
|
||||
|
||||
class ConvertExportFormat(TestCase):
|
||||
"""
|
||||
Tests converting between export formats.
|
||||
"""
|
||||
def setUp(self):
|
||||
""" Common setup. """
|
||||
self.temp_dir = mkdtemp()
|
||||
self.data_dir = path(__file__).realpath().parent / 'data'
|
||||
self.version0 = self.data_dir / "Version0_drafts.tar.gz"
|
||||
self.version1 = self.data_dir / "Version1_drafts.tar.gz"
|
||||
|
||||
self.command = Command()
|
||||
|
||||
def tearDown(self):
|
||||
""" Common cleanup. """
|
||||
shutil.rmtree(self.temp_dir)
|
||||
|
||||
def test_no_args(self):
|
||||
""" Test error condition of no arguments. """
|
||||
errstring = "export requires two arguments"
|
||||
with self.assertRaisesRegexp(CommandError, errstring):
|
||||
self.command.handle()
|
||||
|
||||
def test_version1_archive(self):
|
||||
"""
|
||||
Smoke test for creating a version 1 archive from a version 0.
|
||||
"""
|
||||
call_command('export_convert_format', self.version0, self.temp_dir)
|
||||
output = path(self.temp_dir) / 'Version0_drafts_version_1.tar.gz'
|
||||
self.assertTrue(self._verify_archive_equality(output, self.version1))
|
||||
|
||||
def test_version0_archive(self):
|
||||
"""
|
||||
Smoke test for creating a version 0 archive from a version 1.
|
||||
"""
|
||||
call_command('export_convert_format', self.version1, self.temp_dir)
|
||||
output = path(self.temp_dir) / 'Version1_drafts_version_0.tar.gz'
|
||||
self.assertTrue(self._verify_archive_equality(output, self.version0))
|
||||
|
||||
def _verify_archive_equality(self, file1, file2):
|
||||
"""
|
||||
Helper function for determining if 2 archives are equal.
|
||||
"""
|
||||
temp_dir_1 = mkdtemp()
|
||||
temp_dir_2 = mkdtemp()
|
||||
try:
|
||||
extract_source(file1, temp_dir_1)
|
||||
extract_source(file2, temp_dir_2)
|
||||
return directories_equal(temp_dir_1, temp_dir_2)
|
||||
|
||||
finally:
|
||||
shutil.rmtree(temp_dir_1)
|
||||
shutil.rmtree(temp_dir_2)
|
||||
@@ -9,7 +9,14 @@ from fs.osfs import OSFS
|
||||
from json import dumps
|
||||
import json
|
||||
import datetime
|
||||
import os
|
||||
from path import path
|
||||
import shutil
|
||||
|
||||
DRAFT_DIR = "drafts"
|
||||
PUBLISHED_DIR = "published"
|
||||
EXPORT_VERSION_FILE = "format.json"
|
||||
EXPORT_VERSION_KEY = "export_format"
|
||||
|
||||
class EdxJSONEncoder(json.JSONEncoder):
|
||||
"""
|
||||
@@ -95,7 +102,7 @@ def export_to_xml(modulestore, contentstore, course_location, root_dir, course_d
|
||||
draft_verticals = draft_modulestore.get_items([None, course_location.org, course_location.course,
|
||||
'vertical', None, 'draft'])
|
||||
if len(draft_verticals) > 0:
|
||||
draft_course_dir = export_fs.makeopendir('drafts')
|
||||
draft_course_dir = export_fs.makeopendir(DRAFT_DIR)
|
||||
for draft_vertical in draft_verticals:
|
||||
parent_locs = draft_modulestore.get_parent_locations(draft_vertical.location, course.location.course_id)
|
||||
# Don't try to export orphaned items.
|
||||
@@ -117,3 +124,90 @@ def export_extra_content(export_fs, modulestore, course_id, course_location, cat
|
||||
for item in items:
|
||||
with item_dir.open(item.location.name + file_suffix, 'w') as item_file:
|
||||
item_file.write(item.data.encode('utf8'))
|
||||
|
||||
|
||||
def convert_between_versions(source_dir, target_dir):
|
||||
"""
|
||||
Converts a version 0 export format to version 1, and vice versa.
|
||||
|
||||
@param source_dir: the directory structure with the course export that should be converted.
|
||||
The contents of source_dir will not be altered.
|
||||
@param target_dir: the directory where the converted export should be written.
|
||||
@return: the version number of the converted export.
|
||||
"""
|
||||
def convert_to_version_1():
|
||||
""" Convert a version 0 archive to version 0 """
|
||||
os.mkdir(copy_root)
|
||||
with open(copy_root / EXPORT_VERSION_FILE, 'w') as f:
|
||||
f.write('{{"{export_key}": 1}}\n'.format(export_key=EXPORT_VERSION_KEY))
|
||||
|
||||
# If a drafts folder exists, copy it over.
|
||||
copy_drafts()
|
||||
|
||||
# Now copy everything into the published directory
|
||||
published_dir = copy_root / PUBLISHED_DIR
|
||||
shutil.copytree(path(source_dir) / course_name, published_dir)
|
||||
# And delete the nested drafts directory, if it exists.
|
||||
nested_drafts_dir = published_dir / DRAFT_DIR
|
||||
if nested_drafts_dir.isdir():
|
||||
shutil.rmtree(nested_drafts_dir)
|
||||
|
||||
def convert_to_version_0():
|
||||
""" Convert a version 1 archive to version 0 """
|
||||
# Copy everything in "published" up to the top level.
|
||||
published_dir = path(source_dir) / course_name / PUBLISHED_DIR
|
||||
if not published_dir.isdir():
|
||||
raise ValueError("a version 1 archive must contain a published branch")
|
||||
|
||||
shutil.copytree(published_dir, copy_root)
|
||||
|
||||
# If there is a "draft" branch, copy it. All other branches are ignored.
|
||||
copy_drafts()
|
||||
|
||||
def copy_drafts():
|
||||
"""
|
||||
Copy drafts directory from the old archive structure to the new.
|
||||
"""
|
||||
draft_dir = path(source_dir) / course_name / DRAFT_DIR
|
||||
if draft_dir.isdir():
|
||||
shutil.copytree(draft_dir, copy_root / DRAFT_DIR)
|
||||
|
||||
root = os.listdir(source_dir)
|
||||
if len(root) != 1 or (path(source_dir) / root[0]).isfile():
|
||||
raise ValueError("source archive does not have single course directory at top level")
|
||||
|
||||
course_name = root[0]
|
||||
|
||||
# For this version of the script, we simply convert back and forth between version 0 and 1.
|
||||
original_version = get_version(path(source_dir) / course_name)
|
||||
if original_version not in [0, 1]:
|
||||
raise ValueError("unknown version: " + str(original_version))
|
||||
desired_version = 1 if original_version is 0 else 0
|
||||
|
||||
copy_root = path(target_dir) / course_name
|
||||
|
||||
if desired_version == 1:
|
||||
convert_to_version_1()
|
||||
else:
|
||||
convert_to_version_0()
|
||||
|
||||
return desired_version
|
||||
|
||||
|
||||
def get_version(course_path):
|
||||
"""
|
||||
Return the export format version number for the given
|
||||
archive directory structure (represented as a path instance).
|
||||
|
||||
If the archived file does not correspond to a known export
|
||||
format, None will be returned.
|
||||
"""
|
||||
format_file = course_path / EXPORT_VERSION_FILE
|
||||
if not format_file.isfile():
|
||||
return 0
|
||||
with open(format_file, "r") as f:
|
||||
data = json.load(f)
|
||||
if EXPORT_VERSION_KEY in data:
|
||||
return data[EXPORT_VERSION_KEY]
|
||||
|
||||
return None
|
||||
|
||||
BIN
common/lib/xmodule/xmodule/tests/data/EmptyCourse.tar.gz
Normal file
BIN
common/lib/xmodule/xmodule/tests/data/EmptyCourse.tar.gz
Normal file
Binary file not shown.
BIN
common/lib/xmodule/xmodule/tests/data/NoVersionNumber.tar.gz
Normal file
BIN
common/lib/xmodule/xmodule/tests/data/NoVersionNumber.tar.gz
Normal file
Binary file not shown.
BIN
common/lib/xmodule/xmodule/tests/data/Version0_drafts.tar.gz
Normal file
BIN
common/lib/xmodule/xmodule/tests/data/Version0_drafts.tar.gz
Normal file
Binary file not shown.
BIN
common/lib/xmodule/xmodule/tests/data/Version0_nodrafts.tar.gz
Normal file
BIN
common/lib/xmodule/xmodule/tests/data/Version0_nodrafts.tar.gz
Normal file
Binary file not shown.
BIN
common/lib/xmodule/xmodule/tests/data/Version1_drafts.tar.gz
Normal file
BIN
common/lib/xmodule/xmodule/tests/data/Version1_drafts.tar.gz
Normal file
Binary file not shown.
Binary file not shown.
BIN
common/lib/xmodule/xmodule/tests/data/Version1_nodrafts.tar.gz
Normal file
BIN
common/lib/xmodule/xmodule/tests/data/Version1_nodrafts.tar.gz
Normal file
Binary file not shown.
Binary file not shown.
26
common/lib/xmodule/xmodule/tests/helpers.py
Normal file
26
common/lib/xmodule/xmodule/tests/helpers.py
Normal file
@@ -0,0 +1,26 @@
|
||||
"""
|
||||
Utility methods for unit tests.
|
||||
"""
|
||||
|
||||
import filecmp
|
||||
from path import path
|
||||
|
||||
|
||||
def directories_equal(directory1, directory2):
|
||||
"""
|
||||
Returns True if the 2 directories have equal content, else false.
|
||||
"""
|
||||
def compare_dirs(dir1, dir2):
|
||||
""" Compare directories for equality. """
|
||||
comparison = filecmp.dircmp(dir1, dir2)
|
||||
if (len(comparison.left_only) > 0) or (len(comparison.right_only) > 0):
|
||||
return False
|
||||
if (len(comparison.funny_files) > 0) or (len(comparison.diff_files) > 0):
|
||||
return False
|
||||
for subdir in comparison.subdirs:
|
||||
if not compare_dirs(dir1 / subdir, dir2 / subdir):
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
return compare_dirs(path(directory1), path(directory2))
|
||||
@@ -12,11 +12,17 @@ import mock
|
||||
import pytz
|
||||
from fs.osfs import OSFS
|
||||
from path import path
|
||||
import uuid
|
||||
import tarfile
|
||||
import os
|
||||
|
||||
from xmodule.modulestore import Location
|
||||
from xmodule.modulestore.xml import XMLModuleStore
|
||||
from xmodule.modulestore.xml_exporter import EdxJSONEncoder
|
||||
from xmodule.modulestore.xml_exporter import (
|
||||
EdxJSONEncoder, convert_between_versions, get_version
|
||||
)
|
||||
from xmodule.tests import DATA_DIR
|
||||
from xmodule.tests.helpers import directories_equal
|
||||
|
||||
|
||||
def strip_filenames(descriptor):
|
||||
@@ -195,3 +201,132 @@ class TestEdxJsonEncoder(unittest.TestCase):
|
||||
|
||||
with self.assertRaises(TypeError):
|
||||
self.encoder.default({})
|
||||
|
||||
|
||||
class ConvertExportFormat(unittest.TestCase):
|
||||
"""
|
||||
Tests converting between export formats.
|
||||
"""
|
||||
def setUp(self):
|
||||
""" Common setup. """
|
||||
|
||||
# Directory for expanding all the test archives
|
||||
self.temp_dir = mkdtemp()
|
||||
|
||||
# Directory where new archive will be created
|
||||
self.result_dir = path(self.temp_dir) / uuid.uuid4().hex
|
||||
os.mkdir(self.result_dir)
|
||||
|
||||
# Expand all the test archives and store their paths.
|
||||
self.data_dir = path(__file__).realpath().parent / 'data'
|
||||
self.version0_nodrafts = self._expand_archive('Version0_nodrafts.tar.gz')
|
||||
self.version1_nodrafts = self._expand_archive('Version1_nodrafts.tar.gz')
|
||||
self.version0_drafts = self._expand_archive('Version0_drafts.tar.gz')
|
||||
self.version1_drafts = self._expand_archive('Version1_drafts.tar.gz')
|
||||
self.version1_drafts_extra_branch = self._expand_archive('Version1_drafts_extra_branch.tar.gz')
|
||||
self.no_version = self._expand_archive('NoVersionNumber.tar.gz')
|
||||
|
||||
def tearDown(self):
|
||||
""" Common cleanup. """
|
||||
shutil.rmtree(self.temp_dir)
|
||||
|
||||
def _expand_archive(self, name):
|
||||
""" Expand archive into a directory and return the directory. """
|
||||
target = path(self.temp_dir) / uuid.uuid4().hex
|
||||
os.mkdir(target)
|
||||
with tarfile.open(self.data_dir / name) as tar_file:
|
||||
tar_file.extractall(path=target)
|
||||
|
||||
return target
|
||||
|
||||
def test_no_version(self):
|
||||
""" Test error condition of no version number specified. """
|
||||
errstring = "unknown version"
|
||||
with self.assertRaisesRegexp(ValueError, errstring):
|
||||
convert_between_versions(self.no_version, self.result_dir)
|
||||
|
||||
def test_no_published(self):
|
||||
""" Test error condition of a version 1 archive with no published branch. """
|
||||
errstring = "version 1 archive must contain a published branch"
|
||||
no_published = self._expand_archive('Version1_nopublished.tar.gz')
|
||||
with self.assertRaisesRegexp(ValueError, errstring):
|
||||
convert_between_versions(no_published, self.result_dir)
|
||||
|
||||
def test_empty_course(self):
|
||||
""" Test error condition of a version 1 archive with no published branch. """
|
||||
errstring = "source archive does not have single course directory at top level"
|
||||
empty_course = self._expand_archive('EmptyCourse.tar.gz')
|
||||
with self.assertRaisesRegexp(ValueError, errstring):
|
||||
convert_between_versions(empty_course, self.result_dir)
|
||||
|
||||
def test_convert_to_1_nodrafts(self):
|
||||
"""
|
||||
Test for converting from version 0 of export format to version 1 in a course with no drafts.
|
||||
"""
|
||||
self._verify_conversion(self.version0_nodrafts, self.version1_nodrafts)
|
||||
|
||||
def test_convert_to_1_drafts(self):
|
||||
"""
|
||||
Test for converting from version 0 of export format to version 1 in a course with drafts.
|
||||
"""
|
||||
self._verify_conversion(self.version0_drafts, self.version1_drafts)
|
||||
|
||||
def test_convert_to_0_nodrafts(self):
|
||||
"""
|
||||
Test for converting from version 1 of export format to version 0 in a course with no drafts.
|
||||
"""
|
||||
self._verify_conversion(self.version1_nodrafts, self.version0_nodrafts)
|
||||
|
||||
def test_convert_to_0_drafts(self):
|
||||
"""
|
||||
Test for converting from version 1 of export format to version 0 in a course with drafts.
|
||||
"""
|
||||
self._verify_conversion(self.version1_drafts, self.version0_drafts)
|
||||
|
||||
def test_convert_to_0_extra_branch(self):
|
||||
"""
|
||||
Test for converting from version 1 of export format to version 0 in a course
|
||||
with drafts and an extra branch.
|
||||
"""
|
||||
self._verify_conversion(self.version1_drafts_extra_branch, self.version0_drafts)
|
||||
|
||||
def test_equality_function(self):
|
||||
"""
|
||||
Check equality function returns False for unequal directories.
|
||||
"""
|
||||
self.assertFalse(directories_equal(self.version1_nodrafts, self.version0_nodrafts))
|
||||
self.assertFalse(directories_equal(self.version1_drafts_extra_branch, self.version1_drafts))
|
||||
|
||||
def test_version_0(self):
|
||||
"""
|
||||
Check that get_version correctly identifies a version 0 archive (old format).
|
||||
"""
|
||||
self.assertEqual(0, self._version_test(self.version0_nodrafts))
|
||||
|
||||
def test_version_1(self):
|
||||
"""
|
||||
Check that get_version correctly identifies a version 1 archive (new format).
|
||||
"""
|
||||
self.assertEqual(1, self._version_test(self.version1_nodrafts))
|
||||
|
||||
def test_version_missing(self):
|
||||
"""
|
||||
Check that get_version returns None if no version number is specified,
|
||||
and the archive is not version 0.
|
||||
"""
|
||||
self.assertIsNone(self._version_test(self.no_version))
|
||||
|
||||
def _version_test(self, archive_dir):
|
||||
"""
|
||||
Helper function for version tests.
|
||||
"""
|
||||
root = os.listdir(archive_dir)
|
||||
course_directory = archive_dir / root[0]
|
||||
return get_version(course_directory)
|
||||
|
||||
def _verify_conversion(self, source_archive, comparison_archive):
|
||||
"""
|
||||
Helper function for conversion tests.
|
||||
"""
|
||||
convert_between_versions(source_archive, self.result_dir)
|
||||
self.assertTrue(directories_equal(self.result_dir, comparison_archive))
|
||||
|
||||
Reference in New Issue
Block a user