From 1813b22ab0e110004e33d656c5f52ca85e0a6e0a Mon Sep 17 00:00:00 2001 From: Julian Arni Date: Thu, 29 Aug 2013 16:12:56 -0400 Subject: [PATCH 1/8] Fix import errors with unicode filenames --- cms/djangoapps/contentstore/views/import_export.py | 2 +- common/lib/xmodule/xmodule/modulestore/xml_importer.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cms/djangoapps/contentstore/views/import_export.py b/cms/djangoapps/contentstore/views/import_export.py index 8d7f1aac76..df6d960bba 100644 --- a/cms/djangoapps/contentstore/views/import_export.py +++ b/cms/djangoapps/contentstore/views/import_export.py @@ -154,7 +154,7 @@ def import_course(request, org, course, name): sf.write("Extracting") tar_file = tarfile.open(temp_filepath) - tar_file.extractall(course_dir + '/') + tar_file.extractall((course_dir + '/').encode('utf-8')) with open(status_file, 'w+') as sf: sf.write("Verifying") diff --git a/common/lib/xmodule/xmodule/modulestore/xml_importer.py b/common/lib/xmodule/xmodule/modulestore/xml_importer.py index a9a7601e33..352258d93f 100644 --- a/common/lib/xmodule/xmodule/modulestore/xml_importer.py +++ b/common/lib/xmodule/xmodule/modulestore/xml_importer.py @@ -31,7 +31,7 @@ def import_static_content(modules, course_loc, course_data_path, static_content_ try: content_path = os.path.join(dirname, filename) if verbose: - log.debug('importing static content {0}...'.format(content_path)) + log.debug('importing static content {0}...'.format(content_path.encode("utf-8"))) fullname_with_subpath = content_path.replace(static_dir, '') # strip away leading path from the name if fullname_with_subpath.startswith('/'): From 1cfad39f1e6eb0b7262d483bafd41091f2832932 Mon Sep 17 00:00:00 2001 From: Julian Arni Date: Wed, 4 Sep 2013 13:33:08 -0400 Subject: [PATCH 2/8] Add tests for imports with unicode filenames --- .../xmodule/modulestore/tests/test_mongo.py | 30 +++++++++++--- .../lib/xmodule/xmodule/tests/test_import.py | 39 +++++++++++++++++++ .../data/test_unicode/about/end_date.html | 1 + .../chapter/handout_container.xml | 3 ++ .../data/test_unicode/chapter/poll_test.xml | 7 ++++ .../test_unicode/chapter/secret/magic.xml | 3 ++ common/test/data/test_unicode/course.xml | 1 + .../data/test_unicode/course/2012_Fall.xml | 20 ++++++++++ .../test/data/test_unicode/info/handouts.html | 1 + .../data/test_unicode/policies/2012_Fall.json | 33 ++++++++++++++++ .../sequential/vertical_sequential.xml | 4 ++ .../static/handouts/sample_handout.txt | 0 .../test_unicode/static/sample_static.txt | 0 .../test/data/test_unicode/static/unicø∂é.txt | 13 +++++++ .../static_import/∫hould_be_imπorted.html | 1 + .../test/data/test_unicode/tabs/syllabus.html | 1 + .../data/test_unicode/tabs/®esources.html | 1 + .../test_unicode/vertical/vertical_test.xml | 3 ++ .../test/data/test_unicode/video/a_video.xml | 1 + 19 files changed, 157 insertions(+), 5 deletions(-) create mode 100644 common/test/data/test_unicode/about/end_date.html create mode 100644 common/test/data/test_unicode/chapter/handout_container.xml create mode 100644 common/test/data/test_unicode/chapter/poll_test.xml create mode 100644 common/test/data/test_unicode/chapter/secret/magic.xml create mode 100644 common/test/data/test_unicode/course.xml create mode 100644 common/test/data/test_unicode/course/2012_Fall.xml create mode 100644 common/test/data/test_unicode/info/handouts.html create mode 100644 common/test/data/test_unicode/policies/2012_Fall.json create mode 100644 common/test/data/test_unicode/sequential/vertical_sequential.xml create mode 100644 common/test/data/test_unicode/static/handouts/sample_handout.txt create mode 100644 common/test/data/test_unicode/static/sample_static.txt create mode 100644 common/test/data/test_unicode/static/unicø∂é.txt create mode 100644 common/test/data/test_unicode/static_import/∫hould_be_imπorted.html create mode 100644 common/test/data/test_unicode/tabs/syllabus.html create mode 100644 common/test/data/test_unicode/tabs/®esources.html create mode 100644 common/test/data/test_unicode/vertical/vertical_test.xml create mode 100644 common/test/data/test_unicode/video/a_video.xml diff --git a/common/lib/xmodule/xmodule/modulestore/tests/test_mongo.py b/common/lib/xmodule/xmodule/modulestore/tests/test_mongo.py index 6f3276bf44..74ba7aa2e1 100644 --- a/common/lib/xmodule/xmodule/modulestore/tests/test_mongo.py +++ b/common/lib/xmodule/xmodule/modulestore/tests/test_mongo.py @@ -4,6 +4,7 @@ from nose.tools import assert_equals, assert_raises, \ assert_not_equals, assert_false # pylint: enable=E0611 import pymongo +import logging from uuid import uuid4 from xblock.fields import Scope @@ -19,6 +20,7 @@ from xmodule.contentstore.mongo import MongoContentStore from xmodule.modulestore.tests.test_modulestore import check_path_to_location +log = logging.getLogger(__name__) HOST = 'localhost' PORT = 27017 @@ -59,7 +61,7 @@ class TestMongoModuleStore(object): # draft_store = DraftModuleStore(HOST, DB, COLLECTION, FS_ROOT, RENDER_TEMPLATE, default_class=DEFAULT_CLASS) # Explicitly list the courses to load (don't want the big one) - courses = ['toy', 'simple', 'simple_with_draft'] + courses = ['toy', 'simple', 'simple_with_draft', 'test_unicode'] import_from_xml(store, DATA_DIR, courses, draft_store=draft_store, static_content_store=content_store) # also test a course with no importing of static content @@ -100,12 +102,14 @@ class TestMongoModuleStore(object): def test_get_courses(self): '''Make sure the course objects loaded properly''' courses = self.store.get_courses() - assert_equals(len(courses), 4) + assert_equals(len(courses), 5) courses.sort(key=lambda c: c.id) assert_equals(courses[0].id, 'edX/simple/2012_Fall') assert_equals(courses[1].id, 'edX/simple_with_draft/2012_Fall') assert_equals(courses[2].id, 'edX/test_import_course/2012_Fall') - assert_equals(courses[3].id, 'edX/toy/2012_Fall') + assert_equals(courses[3].id, 'edX/test_unicode/2012_Fall') + assert_equals(courses[4].id, 'edX/toy/2012_Fall') + log.debug(str(courses)) def test_loads(self): assert_not_equals( @@ -120,6 +124,22 @@ class TestMongoModuleStore(object): self.store.get_item("i4x://edX/toy/video/Welcome"), None) + def test_unicode_loads(self): + assert_not_equals( + self.store.get_item("i4x://edX/test_unicode/course/2012_Fall"), + None) + # All items with ascii-only filenames should load properly. + assert_not_equals( + self.store.get_item("i4x://edX/test_unicode/video/Welcome"), + None) + assert_not_equals( + self.store.get_item("i4x://edX/test_unicode/video/Welcome"), + None) + assert_not_equals( + self.store.get_item("i4x://edX/test_unicode/chapter/Overview"), + None) + + def test_find_one(self): assert_not_equals( self.store._find_one(Location("i4x://edX/toy/course/2012_Fall")), @@ -159,9 +179,9 @@ class TestMongoModuleStore(object): """ Helper function for pulling out the name of a given static tab. - Assumes the information is desired for courses[1] ('toy' course). + Assumes the information is desired for courses[4] ('toy' course). """ - return courses[2].tabs[index]['name'] + return courses[4].tabs[index]['name'] # There was a bug where model.save was not getting called after the static tab name # was set set for tabs that have a URL slug. 'Syllabus' and 'Resources' fall into that diff --git a/common/lib/xmodule/xmodule/tests/test_import.py b/common/lib/xmodule/xmodule/tests/test_import.py index 8f24112bc6..3c841aab50 100644 --- a/common/lib/xmodule/xmodule/tests/test_import.py +++ b/common/lib/xmodule/xmodule/tests/test_import.py @@ -368,6 +368,45 @@ class ImportTestCase(BaseCourseTestCase): html = modulestore.get_instance(course_id, loc) self.assertEquals(html.display_name, "Toy lab") + def test_unicode(self): + """Check that courses with unicode characters in filenames and in + org/course/name import properly. Currently, this means: (a) Having + files with unicode names does not prevent import; (b) if files are not + loaded because of unicode filenames, there are appropriate + exceptions/errors to that effect.""" + + print("Starting import") + modulestore = XMLModuleStore(DATA_DIR, course_dirs=['test_unicode']) + courses = modulestore.get_courses() + self.assertEquals(len(courses), 1) + course = courses[0] + course_id = course.id + + print("course errors:") + + # Expect to find an error/exception about characters in "®esources" + found = False + expect = "Invalid characters in '®esources'" + for (msg, err) in modulestore.get_item_errors(course.location): + msg_u = msg.encode("utf-8") + err_u = err.encode("utf-8") + print(msg_u) + print(err_u) + if max(msg_u.find(expect), err_u.find(expect)) >= 1: + found = True + + self.assertTrue(found) + chapters = course.get_children() + self.assertEquals(len(chapters), 5) + + ch2 = chapters[1] + self.assertEquals(ch2.url_name, "secret:magic") + + print("Ch2 location: ", ch2.location) + + also_ch2 = modulestore.get_instance(course_id, ch2.location) + self.assertEquals(ch2, also_ch2) + def test_url_name_mangling(self): """ Make sure that url_names are only mangled once. diff --git a/common/test/data/test_unicode/about/end_date.html b/common/test/data/test_unicode/about/end_date.html new file mode 100644 index 0000000000..a0990367ef --- /dev/null +++ b/common/test/data/test_unicode/about/end_date.html @@ -0,0 +1 @@ +TBD diff --git a/common/test/data/test_unicode/chapter/handout_container.xml b/common/test/data/test_unicode/chapter/handout_container.xml new file mode 100644 index 0000000000..0be4e30bbb --- /dev/null +++ b/common/test/data/test_unicode/chapter/handout_container.xml @@ -0,0 +1,3 @@ + + handouts + \ No newline at end of file diff --git a/common/test/data/test_unicode/chapter/poll_test.xml b/common/test/data/test_unicode/chapter/poll_test.xml new file mode 100644 index 0000000000..bbb340e936 --- /dev/null +++ b/common/test/data/test_unicode/chapter/poll_test.xml @@ -0,0 +1,7 @@ + + +

Have you changed your mind?

+ Yes + No +
+
\ No newline at end of file diff --git a/common/test/data/test_unicode/chapter/secret/magic.xml b/common/test/data/test_unicode/chapter/secret/magic.xml new file mode 100644 index 0000000000..fe7adb7967 --- /dev/null +++ b/common/test/data/test_unicode/chapter/secret/magic.xml @@ -0,0 +1,3 @@ + + diff --git a/common/test/data/test_unicode/course.xml b/common/test/data/test_unicode/course.xml new file mode 100644 index 0000000000..0375e11d3a --- /dev/null +++ b/common/test/data/test_unicode/course.xml @@ -0,0 +1 @@ + diff --git a/common/test/data/test_unicode/course/2012_Fall.xml b/common/test/data/test_unicode/course/2012_Fall.xml new file mode 100644 index 0000000000..4eb9d66171 --- /dev/null +++ b/common/test/data/test_unicode/course/2012_Fall.xml @@ -0,0 +1,20 @@ + + + + + + + + + + + + + + + + diff --git a/common/test/data/test_unicode/info/handouts.html b/common/test/data/test_unicode/info/handouts.html new file mode 100644 index 0000000000..85fa34d71d --- /dev/null +++ b/common/test/data/test_unicode/info/handouts.html @@ -0,0 +1 @@ +Sample \ No newline at end of file diff --git a/common/test/data/test_unicode/policies/2012_Fall.json b/common/test/data/test_unicode/policies/2012_Fall.json new file mode 100644 index 0000000000..c35877b8f5 --- /dev/null +++ b/common/test/data/test_unicode/policies/2012_Fall.json @@ -0,0 +1,33 @@ +{ + "course/2012_Fall": { + "graceperiod": "2 days 5 hours 59 minutes 59 seconds", + "start": "2015-07-17T12:00", + "display_name": "Toy Course", + "graded": "true", + "tabs": [ + {"type": "courseware"}, + {"type": "course_info", "name": "Course Info"}, + {"type": "static_tab", "url_slug": "syllabus", "name": "ßyllabus"}, + {"type": "static_tab", "url_slug": "resources", "name": "Resources"}, + {"type": "discussion", "name": "∂iscussion"}, + {"type": "wiki", "name": "∑iki"}, + {"type": "progress", "name": "πrogress"} + ] + }, + "chapter/Overview": { + "display_name": "O√erview" + }, + "videosequence/Toy_Videos": { + "display_name": "†oy Videos", + "format": "Lecture Sequence" + }, + "html/secret:toylab": { + "display_name": "Toy lab" + }, + "video/Video_Resources": { + "display_name": "Video Resources" + }, + "video/Welcome": { + "display_name": "Welcome" + } +} diff --git a/common/test/data/test_unicode/sequential/vertical_sequential.xml b/common/test/data/test_unicode/sequential/vertical_sequential.xml new file mode 100644 index 0000000000..695e640243 --- /dev/null +++ b/common/test/data/test_unicode/sequential/vertical_sequential.xml @@ -0,0 +1,4 @@ + + + … + \ No newline at end of file diff --git a/common/test/data/test_unicode/static/handouts/sample_handout.txt b/common/test/data/test_unicode/static/handouts/sample_handout.txt new file mode 100644 index 0000000000..e69de29bb2 diff --git a/common/test/data/test_unicode/static/sample_static.txt b/common/test/data/test_unicode/static/sample_static.txt new file mode 100644 index 0000000000..e69de29bb2 diff --git a/common/test/data/test_unicode/static/unicø∂é.txt b/common/test/data/test_unicode/static/unicø∂é.txt new file mode 100644 index 0000000000..248ebeafb7 --- /dev/null +++ b/common/test/data/test_unicode/static/unicø∂é.txt @@ -0,0 +1,13 @@ +μῆνιν ἄειδε θεὰ Πηληϊάδεω Ἀχιλῆος +οὐλομένην, ἣ μυρί᾽ Ἀχαιοῖς ἄλγε᾽ ἔθηκε, +πολλὰς δ᾽ ἰφθίμους ψυχὰς Ἄϊδι προΐαψεν +ἡρώων, αὐτοὺς δὲ ἑλώρια τεῦχε κύνεσσιν +5οἰωνοῖσί τε πᾶσι, Διὸς δ᾽ ἐτελείετο βουλή, +ἐξ οὗ δὴ τὰ πρῶτα διαστήτην ἐρίσαντε +Ἀτρεΐδης τε ἄναξ ἀνδρῶν καὶ δῖος Ἀχιλλεύς. +τίς τ᾽ ἄρ σφωε θεῶν ἔριδι ξυνέηκε μάχεσθαι; +Λητοῦς καὶ Διὸς υἱός: ὃ γὰρ βασιλῆϊ χολωθεὶς +10νοῦσον ἀνὰ στρατὸν ὄρσε κακήν, ὀλέκοντο δὲ λαοί, +οὕνεκα τὸν Χρύσην ἠτίμασεν ἀρητῆρα +Ἀτρεΐδης: ὃ γὰρ ἦλθε θοὰς ἐπὶ νῆας Ἀχαιῶν +λυσόμενός τε θύγατρα φέρων τ᾽ ἀπερείσι᾽ ἄποινα, diff --git a/common/test/data/test_unicode/static_import/∫hould_be_imπorted.html b/common/test/data/test_unicode/static_import/∫hould_be_imπorted.html new file mode 100644 index 0000000000..2ab33a3a03 --- /dev/null +++ b/common/test/data/test_unicode/static_import/∫hould_be_imπorted.html @@ -0,0 +1 @@ +

this ƒîlë should be in the contentstore

diff --git a/common/test/data/test_unicode/tabs/syllabus.html b/common/test/data/test_unicode/tabs/syllabus.html new file mode 100644 index 0000000000..45c2a7fc8f --- /dev/null +++ b/common/test/data/test_unicode/tabs/syllabus.html @@ -0,0 +1 @@ +

This is a syllabus

diff --git a/common/test/data/test_unicode/tabs/®esources.html b/common/test/data/test_unicode/tabs/®esources.html new file mode 100644 index 0000000000..0f6bdf0984 --- /dev/null +++ b/common/test/data/test_unicode/tabs/®esources.html @@ -0,0 +1 @@ +
resources!
diff --git a/common/test/data/test_unicode/vertical/vertical_test.xml b/common/test/data/test_unicode/vertical/vertical_test.xml new file mode 100644 index 0000000000..82df156dfe --- /dev/null +++ b/common/test/data/test_unicode/vertical/vertical_test.xml @@ -0,0 +1,3 @@ + + diff --git a/common/test/data/test_unicode/video/a_video.xml b/common/test/data/test_unicode/video/a_video.xml new file mode 100644 index 0000000000..b90ea9d8c4 --- /dev/null +++ b/common/test/data/test_unicode/video/a_video.xml @@ -0,0 +1 @@ +