diff --git a/cms/djangoapps/contentstore/management/commands/export_convert_format.py b/cms/djangoapps/contentstore/management/commands/export_convert_format.py new file mode 100644 index 0000000000..0ab8c6b3b6 --- /dev/null +++ b/cms/djangoapps/contentstore/management/commands/export_convert_format.py @@ -0,0 +1,71 @@ +""" +Script for converting a tar.gz file representing an exported course +to the archive format used by a different version of export. + +Sample invocation: ./manage.py export_convert_format mycourse.tar.gz ~/newformat/ +""" +import os +from path import path +from django.core.management.base import BaseCommand, CommandError + +from tempfile import mkdtemp +import tarfile +import shutil +from extract_tar import safetar_extractall + +from xmodule.modulestore.xml_exporter import convert_between_versions + + +class Command(BaseCommand): + """ + Convert between export formats. + """ + help = 'Convert between versions 0 and 1 of the course export format' + args = ' ' + + def handle(self, *args, **options): + "Execute the command" + if len(args) != 2: + raise CommandError("export requires two arguments: ") + + source_archive = args[0] + output_path = args[1] + + # Create temp directories to extract the source and create the target archive. + temp_source_dir = mkdtemp() + temp_target_dir = mkdtemp() + try: + extract_source(source_archive, temp_source_dir) + + desired_version = convert_between_versions(temp_source_dir, temp_target_dir) + + # New zip up the target directory. + parts = os.path.basename(source_archive).split('.') + archive_name = path(output_path) / "{source_name}_version_{desired_version}.tar.gz".format( + source_name=parts[0], desired_version=desired_version + ) + with open(archive_name, "w"): + tar_file = tarfile.open(archive_name, mode='w:gz') + try: + for item in os.listdir(temp_target_dir): + tar_file.add(path(temp_target_dir) / item, arcname=item) + + finally: + tar_file.close() + + print("Created archive {0}".format(archive_name)) + + except ValueError as err: + raise CommandError(err) + + finally: + shutil.rmtree(temp_source_dir) + shutil.rmtree(temp_target_dir) + + +def extract_source(source_archive, target): + """ + Extract the archive into the given target directory. + """ + with tarfile.open(source_archive) as tar_file: + safetar_extractall(tar_file, target) diff --git a/cms/djangoapps/contentstore/management/commands/tests/data/Version0_drafts.tar.gz b/cms/djangoapps/contentstore/management/commands/tests/data/Version0_drafts.tar.gz new file mode 100644 index 0000000000..e55649b1da Binary files /dev/null and b/cms/djangoapps/contentstore/management/commands/tests/data/Version0_drafts.tar.gz differ diff --git a/cms/djangoapps/contentstore/management/commands/tests/data/Version1_drafts.tar.gz b/cms/djangoapps/contentstore/management/commands/tests/data/Version1_drafts.tar.gz new file mode 100644 index 0000000000..4cf81903c8 Binary files /dev/null and b/cms/djangoapps/contentstore/management/commands/tests/data/Version1_drafts.tar.gz differ diff --git a/cms/djangoapps/contentstore/management/commands/tests/test_export_convert_format.py b/cms/djangoapps/contentstore/management/commands/tests/test_export_convert_format.py new file mode 100644 index 0000000000..83b70951d6 --- /dev/null +++ b/cms/djangoapps/contentstore/management/commands/tests/test_export_convert_format.py @@ -0,0 +1,65 @@ +""" +Test for export_convert_format. +""" +from unittest import TestCase +from django.core.management import call_command, CommandError +from tempfile import mkdtemp +import shutil +from path import path +from contentstore.management.commands.export_convert_format import Command, extract_source +from xmodule.tests.helpers import directories_equal + + +class ConvertExportFormat(TestCase): + """ + Tests converting between export formats. + """ + def setUp(self): + """ Common setup. """ + self.temp_dir = mkdtemp() + self.data_dir = path(__file__).realpath().parent / 'data' + self.version0 = self.data_dir / "Version0_drafts.tar.gz" + self.version1 = self.data_dir / "Version1_drafts.tar.gz" + + self.command = Command() + + def tearDown(self): + """ Common cleanup. """ + shutil.rmtree(self.temp_dir) + + def test_no_args(self): + """ Test error condition of no arguments. """ + errstring = "export requires two arguments" + with self.assertRaisesRegexp(CommandError, errstring): + self.command.handle() + + def test_version1_archive(self): + """ + Smoke test for creating a version 1 archive from a version 0. + """ + call_command('export_convert_format', self.version0, self.temp_dir) + output = path(self.temp_dir) / 'Version0_drafts_version_1.tar.gz' + self.assertTrue(self._verify_archive_equality(output, self.version1)) + + def test_version0_archive(self): + """ + Smoke test for creating a version 0 archive from a version 1. + """ + call_command('export_convert_format', self.version1, self.temp_dir) + output = path(self.temp_dir) / 'Version1_drafts_version_0.tar.gz' + self.assertTrue(self._verify_archive_equality(output, self.version0)) + + def _verify_archive_equality(self, file1, file2): + """ + Helper function for determining if 2 archives are equal. + """ + temp_dir_1 = mkdtemp() + temp_dir_2 = mkdtemp() + try: + extract_source(file1, temp_dir_1) + extract_source(file2, temp_dir_2) + return directories_equal(temp_dir_1, temp_dir_2) + + finally: + shutil.rmtree(temp_dir_1) + shutil.rmtree(temp_dir_2) diff --git a/common/lib/xmodule/xmodule/modulestore/xml_exporter.py b/common/lib/xmodule/xmodule/modulestore/xml_exporter.py index 2b5efcb87f..e9360a53de 100644 --- a/common/lib/xmodule/xmodule/modulestore/xml_exporter.py +++ b/common/lib/xmodule/xmodule/modulestore/xml_exporter.py @@ -9,7 +9,14 @@ from fs.osfs import OSFS from json import dumps import json import datetime +import os +from path import path +import shutil +DRAFT_DIR = "drafts" +PUBLISHED_DIR = "published" +EXPORT_VERSION_FILE = "format.json" +EXPORT_VERSION_KEY = "export_format" class EdxJSONEncoder(json.JSONEncoder): """ @@ -95,7 +102,7 @@ def export_to_xml(modulestore, contentstore, course_location, root_dir, course_d draft_verticals = draft_modulestore.get_items([None, course_location.org, course_location.course, 'vertical', None, 'draft']) if len(draft_verticals) > 0: - draft_course_dir = export_fs.makeopendir('drafts') + draft_course_dir = export_fs.makeopendir(DRAFT_DIR) for draft_vertical in draft_verticals: parent_locs = draft_modulestore.get_parent_locations(draft_vertical.location, course.location.course_id) # Don't try to export orphaned items. @@ -117,3 +124,90 @@ def export_extra_content(export_fs, modulestore, course_id, course_location, cat for item in items: with item_dir.open(item.location.name + file_suffix, 'w') as item_file: item_file.write(item.data.encode('utf8')) + + +def convert_between_versions(source_dir, target_dir): + """ + Converts a version 0 export format to version 1, and vice versa. + + @param source_dir: the directory structure with the course export that should be converted. + The contents of source_dir will not be altered. + @param target_dir: the directory where the converted export should be written. + @return: the version number of the converted export. + """ + def convert_to_version_1(): + """ Convert a version 0 archive to version 0 """ + os.mkdir(copy_root) + with open(copy_root / EXPORT_VERSION_FILE, 'w') as f: + f.write('{{"{export_key}": 1}}\n'.format(export_key=EXPORT_VERSION_KEY)) + + # If a drafts folder exists, copy it over. + copy_drafts() + + # Now copy everything into the published directory + published_dir = copy_root / PUBLISHED_DIR + shutil.copytree(path(source_dir) / course_name, published_dir) + # And delete the nested drafts directory, if it exists. + nested_drafts_dir = published_dir / DRAFT_DIR + if nested_drafts_dir.isdir(): + shutil.rmtree(nested_drafts_dir) + + def convert_to_version_0(): + """ Convert a version 1 archive to version 0 """ + # Copy everything in "published" up to the top level. + published_dir = path(source_dir) / course_name / PUBLISHED_DIR + if not published_dir.isdir(): + raise ValueError("a version 1 archive must contain a published branch") + + shutil.copytree(published_dir, copy_root) + + # If there is a "draft" branch, copy it. All other branches are ignored. + copy_drafts() + + def copy_drafts(): + """ + Copy drafts directory from the old archive structure to the new. + """ + draft_dir = path(source_dir) / course_name / DRAFT_DIR + if draft_dir.isdir(): + shutil.copytree(draft_dir, copy_root / DRAFT_DIR) + + root = os.listdir(source_dir) + if len(root) != 1 or (path(source_dir) / root[0]).isfile(): + raise ValueError("source archive does not have single course directory at top level") + + course_name = root[0] + + # For this version of the script, we simply convert back and forth between version 0 and 1. + original_version = get_version(path(source_dir) / course_name) + if original_version not in [0, 1]: + raise ValueError("unknown version: " + str(original_version)) + desired_version = 1 if original_version is 0 else 0 + + copy_root = path(target_dir) / course_name + + if desired_version == 1: + convert_to_version_1() + else: + convert_to_version_0() + + return desired_version + + +def get_version(course_path): + """ + Return the export format version number for the given + archive directory structure (represented as a path instance). + + If the archived file does not correspond to a known export + format, None will be returned. + """ + format_file = course_path / EXPORT_VERSION_FILE + if not format_file.isfile(): + return 0 + with open(format_file, "r") as f: + data = json.load(f) + if EXPORT_VERSION_KEY in data: + return data[EXPORT_VERSION_KEY] + + return None diff --git a/common/lib/xmodule/xmodule/tests/data/EmptyCourse.tar.gz b/common/lib/xmodule/xmodule/tests/data/EmptyCourse.tar.gz new file mode 100644 index 0000000000..b81f01a2cb Binary files /dev/null and b/common/lib/xmodule/xmodule/tests/data/EmptyCourse.tar.gz differ diff --git a/common/lib/xmodule/xmodule/tests/data/NoVersionNumber.tar.gz b/common/lib/xmodule/xmodule/tests/data/NoVersionNumber.tar.gz new file mode 100644 index 0000000000..ab61274c0a Binary files /dev/null and b/common/lib/xmodule/xmodule/tests/data/NoVersionNumber.tar.gz differ diff --git a/common/lib/xmodule/xmodule/tests/data/Version0_drafts.tar.gz b/common/lib/xmodule/xmodule/tests/data/Version0_drafts.tar.gz new file mode 100644 index 0000000000..e55649b1da Binary files /dev/null and b/common/lib/xmodule/xmodule/tests/data/Version0_drafts.tar.gz differ diff --git a/common/lib/xmodule/xmodule/tests/data/Version0_nodrafts.tar.gz b/common/lib/xmodule/xmodule/tests/data/Version0_nodrafts.tar.gz new file mode 100644 index 0000000000..93e501d047 Binary files /dev/null and b/common/lib/xmodule/xmodule/tests/data/Version0_nodrafts.tar.gz differ diff --git a/common/lib/xmodule/xmodule/tests/data/Version1_drafts.tar.gz b/common/lib/xmodule/xmodule/tests/data/Version1_drafts.tar.gz new file mode 100644 index 0000000000..4cf81903c8 Binary files /dev/null and b/common/lib/xmodule/xmodule/tests/data/Version1_drafts.tar.gz differ diff --git a/common/lib/xmodule/xmodule/tests/data/Version1_drafts_extra_branch.tar.gz b/common/lib/xmodule/xmodule/tests/data/Version1_drafts_extra_branch.tar.gz new file mode 100644 index 0000000000..8c4ec41023 Binary files /dev/null and b/common/lib/xmodule/xmodule/tests/data/Version1_drafts_extra_branch.tar.gz differ diff --git a/common/lib/xmodule/xmodule/tests/data/Version1_nodrafts.tar.gz b/common/lib/xmodule/xmodule/tests/data/Version1_nodrafts.tar.gz new file mode 100644 index 0000000000..b6673ba1a2 Binary files /dev/null and b/common/lib/xmodule/xmodule/tests/data/Version1_nodrafts.tar.gz differ diff --git a/common/lib/xmodule/xmodule/tests/data/Version1_nopublished.tar.gz b/common/lib/xmodule/xmodule/tests/data/Version1_nopublished.tar.gz new file mode 100644 index 0000000000..55e8e13d0f Binary files /dev/null and b/common/lib/xmodule/xmodule/tests/data/Version1_nopublished.tar.gz differ diff --git a/common/lib/xmodule/xmodule/tests/helpers.py b/common/lib/xmodule/xmodule/tests/helpers.py new file mode 100644 index 0000000000..b8f56445c2 --- /dev/null +++ b/common/lib/xmodule/xmodule/tests/helpers.py @@ -0,0 +1,26 @@ +""" +Utility methods for unit tests. +""" + +import filecmp +from path import path + + +def directories_equal(directory1, directory2): + """ + Returns True if the 2 directories have equal content, else false. + """ + def compare_dirs(dir1, dir2): + """ Compare directories for equality. """ + comparison = filecmp.dircmp(dir1, dir2) + if (len(comparison.left_only) > 0) or (len(comparison.right_only) > 0): + return False + if (len(comparison.funny_files) > 0) or (len(comparison.diff_files) > 0): + return False + for subdir in comparison.subdirs: + if not compare_dirs(dir1 / subdir, dir2 / subdir): + return False + + return True + + return compare_dirs(path(directory1), path(directory2)) diff --git a/common/lib/xmodule/xmodule/tests/test_export.py b/common/lib/xmodule/xmodule/tests/test_export.py index 1ee059f9fe..a1fe66af39 100644 --- a/common/lib/xmodule/xmodule/tests/test_export.py +++ b/common/lib/xmodule/xmodule/tests/test_export.py @@ -12,11 +12,17 @@ import mock import pytz from fs.osfs import OSFS from path import path +import uuid +import tarfile +import os from xmodule.modulestore import Location from xmodule.modulestore.xml import XMLModuleStore -from xmodule.modulestore.xml_exporter import EdxJSONEncoder +from xmodule.modulestore.xml_exporter import ( + EdxJSONEncoder, convert_between_versions, get_version +) from xmodule.tests import DATA_DIR +from xmodule.tests.helpers import directories_equal def strip_filenames(descriptor): @@ -195,3 +201,132 @@ class TestEdxJsonEncoder(unittest.TestCase): with self.assertRaises(TypeError): self.encoder.default({}) + + +class ConvertExportFormat(unittest.TestCase): + """ + Tests converting between export formats. + """ + def setUp(self): + """ Common setup. """ + + # Directory for expanding all the test archives + self.temp_dir = mkdtemp() + + # Directory where new archive will be created + self.result_dir = path(self.temp_dir) / uuid.uuid4().hex + os.mkdir(self.result_dir) + + # Expand all the test archives and store their paths. + self.data_dir = path(__file__).realpath().parent / 'data' + self.version0_nodrafts = self._expand_archive('Version0_nodrafts.tar.gz') + self.version1_nodrafts = self._expand_archive('Version1_nodrafts.tar.gz') + self.version0_drafts = self._expand_archive('Version0_drafts.tar.gz') + self.version1_drafts = self._expand_archive('Version1_drafts.tar.gz') + self.version1_drafts_extra_branch = self._expand_archive('Version1_drafts_extra_branch.tar.gz') + self.no_version = self._expand_archive('NoVersionNumber.tar.gz') + + def tearDown(self): + """ Common cleanup. """ + shutil.rmtree(self.temp_dir) + + def _expand_archive(self, name): + """ Expand archive into a directory and return the directory. """ + target = path(self.temp_dir) / uuid.uuid4().hex + os.mkdir(target) + with tarfile.open(self.data_dir / name) as tar_file: + tar_file.extractall(path=target) + + return target + + def test_no_version(self): + """ Test error condition of no version number specified. """ + errstring = "unknown version" + with self.assertRaisesRegexp(ValueError, errstring): + convert_between_versions(self.no_version, self.result_dir) + + def test_no_published(self): + """ Test error condition of a version 1 archive with no published branch. """ + errstring = "version 1 archive must contain a published branch" + no_published = self._expand_archive('Version1_nopublished.tar.gz') + with self.assertRaisesRegexp(ValueError, errstring): + convert_between_versions(no_published, self.result_dir) + + def test_empty_course(self): + """ Test error condition of a version 1 archive with no published branch. """ + errstring = "source archive does not have single course directory at top level" + empty_course = self._expand_archive('EmptyCourse.tar.gz') + with self.assertRaisesRegexp(ValueError, errstring): + convert_between_versions(empty_course, self.result_dir) + + def test_convert_to_1_nodrafts(self): + """ + Test for converting from version 0 of export format to version 1 in a course with no drafts. + """ + self._verify_conversion(self.version0_nodrafts, self.version1_nodrafts) + + def test_convert_to_1_drafts(self): + """ + Test for converting from version 0 of export format to version 1 in a course with drafts. + """ + self._verify_conversion(self.version0_drafts, self.version1_drafts) + + def test_convert_to_0_nodrafts(self): + """ + Test for converting from version 1 of export format to version 0 in a course with no drafts. + """ + self._verify_conversion(self.version1_nodrafts, self.version0_nodrafts) + + def test_convert_to_0_drafts(self): + """ + Test for converting from version 1 of export format to version 0 in a course with drafts. + """ + self._verify_conversion(self.version1_drafts, self.version0_drafts) + + def test_convert_to_0_extra_branch(self): + """ + Test for converting from version 1 of export format to version 0 in a course + with drafts and an extra branch. + """ + self._verify_conversion(self.version1_drafts_extra_branch, self.version0_drafts) + + def test_equality_function(self): + """ + Check equality function returns False for unequal directories. + """ + self.assertFalse(directories_equal(self.version1_nodrafts, self.version0_nodrafts)) + self.assertFalse(directories_equal(self.version1_drafts_extra_branch, self.version1_drafts)) + + def test_version_0(self): + """ + Check that get_version correctly identifies a version 0 archive (old format). + """ + self.assertEqual(0, self._version_test(self.version0_nodrafts)) + + def test_version_1(self): + """ + Check that get_version correctly identifies a version 1 archive (new format). + """ + self.assertEqual(1, self._version_test(self.version1_nodrafts)) + + def test_version_missing(self): + """ + Check that get_version returns None if no version number is specified, + and the archive is not version 0. + """ + self.assertIsNone(self._version_test(self.no_version)) + + def _version_test(self, archive_dir): + """ + Helper function for version tests. + """ + root = os.listdir(archive_dir) + course_directory = archive_dir / root[0] + return get_version(course_directory) + + def _verify_conversion(self, source_archive, comparison_archive): + """ + Helper function for conversion tests. + """ + convert_between_versions(source_archive, self.result_dir) + self.assertTrue(directories_equal(self.result_dir, comparison_archive))