From 0ed06539560278c270e73fa53db39c3f5776761f Mon Sep 17 00:00:00 2001 From: Chris Dodge Date: Sun, 11 Aug 2013 13:03:02 -0400 Subject: [PATCH 1/3] on import, when enumerating through the 'draft content', OSX environments will have hidden binary encoded quarantine files. We don't want to try to process these as they will throw an exception when trying to be parsed. It's harmless - the import goes on fine - but it's a lot of clutter in the output logs. --- .../xmodule/modulestore/xml_importer.py | 32 +++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/common/lib/xmodule/xmodule/modulestore/xml_importer.py b/common/lib/xmodule/xmodule/modulestore/xml_importer.py index fa9228bed3..0a20433d17 100644 --- a/common/lib/xmodule/xmodule/modulestore/xml_importer.py +++ b/common/lib/xmodule/xmodule/modulestore/xml_importer.py @@ -242,9 +242,37 @@ def import_course_draft(xml_module_store, store, draft_store, course_data_path, for dirname, dirnames, filenames in os.walk(draft_dir + "/vertical"): for filename in filenames: module_path = os.path.join(dirname, filename) - with open(module_path) as f: + with open(module_path, 'r') as f: try: - xml = f.read().decode('utf-8') + # note, on local dev it seems like OSX will put some extra files in + # the directory with "quarantine" information. These files are + # binary files and will throw execptions when we try to parse + # the file as an XML string. Let's make sure we're + # dealing with a string before ingesting + data = f.read() + + try: + xml = data.decode('utf-8') + except UnicodeDecodeError, e: + # seems like on OSX localdev, the OS is making quarantine files + # in the unzip directory when importing courses + # so if we blindly try to enumerate through the directory, we'll try + # to process a bunch of binary quarantine files (which are prefixed with a '._' character + # which will dump a bunch of exceptions to the output, although they are harmless. + # + # Reading online docs there doesn't seem to be a good means to detect a 'hidden' + # file that works well across all OS environments. So for now, I'm using + # OSX's utilization of a leading '.' in the filename to indicate a system hidden + # file. + # + # Better yet would be a way to figure out if this is a binary file, but I + # haven't found a good way to do this yet. + # + if filename.startswith('.'): + continue + # Not a 'hidden file', then re-raise exception + raise e + descriptor = system.process_xml(xml) def _import_module(module): From 6a423af7ad420183fd6c6436455ed15ddb56a72a Mon Sep 17 00:00:00 2001 From: Chris Dodge Date: Mon, 19 Aug 2013 23:53:53 -0400 Subject: [PATCH 2/3] PR feedback --- common/lib/xmodule/xmodule/modulestore/xml_importer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/common/lib/xmodule/xmodule/modulestore/xml_importer.py b/common/lib/xmodule/xmodule/modulestore/xml_importer.py index 0a20433d17..becc512e71 100644 --- a/common/lib/xmodule/xmodule/modulestore/xml_importer.py +++ b/common/lib/xmodule/xmodule/modulestore/xml_importer.py @@ -246,7 +246,7 @@ def import_course_draft(xml_module_store, store, draft_store, course_data_path, try: # note, on local dev it seems like OSX will put some extra files in # the directory with "quarantine" information. These files are - # binary files and will throw execptions when we try to parse + # binary files and will throw exceptions when we try to parse # the file as an XML string. Let's make sure we're # dealing with a string before ingesting data = f.read() @@ -268,7 +268,7 @@ def import_course_draft(xml_module_store, store, draft_store, course_data_path, # Better yet would be a way to figure out if this is a binary file, but I # haven't found a good way to do this yet. # - if filename.startswith('.'): + if filename.startswith('._'): continue # Not a 'hidden file', then re-raise exception raise e From e576c7be15567c202cec825f314ced09e14c7490 Mon Sep 17 00:00:00 2001 From: Chris Dodge Date: Tue, 20 Aug 2013 00:40:20 -0400 Subject: [PATCH 3/3] pylint error fix --- common/lib/xmodule/xmodule/modulestore/xml_importer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/common/lib/xmodule/xmodule/modulestore/xml_importer.py b/common/lib/xmodule/xmodule/modulestore/xml_importer.py index becc512e71..7bea0fdcac 100644 --- a/common/lib/xmodule/xmodule/modulestore/xml_importer.py +++ b/common/lib/xmodule/xmodule/modulestore/xml_importer.py @@ -253,7 +253,7 @@ def import_course_draft(xml_module_store, store, draft_store, course_data_path, try: xml = data.decode('utf-8') - except UnicodeDecodeError, e: + except UnicodeDecodeError, err: # seems like on OSX localdev, the OS is making quarantine files # in the unzip directory when importing courses # so if we blindly try to enumerate through the directory, we'll try @@ -271,7 +271,7 @@ def import_course_draft(xml_module_store, store, draft_store, course_data_path, if filename.startswith('._'): continue # Not a 'hidden file', then re-raise exception - raise e + raise err descriptor = system.process_xml(xml)