diff --git a/cms/djangoapps/contentstore/management/commands/cleanup_assets.py b/cms/djangoapps/contentstore/management/commands/cleanup_assets.py new file mode 100644 index 0000000000..7896aca3ec --- /dev/null +++ b/cms/djangoapps/contentstore/management/commands/cleanup_assets.py @@ -0,0 +1,40 @@ +""" +Script for removing all redundant Mac OS metadata files (with filename ".DS_Store" +or with filename which starts with "._") for all courses +""" +import logging + +from django.core.management.base import BaseCommand +from xmodule.contentstore.django import contentstore + + +log = logging.getLogger(__name__) + + +class Command(BaseCommand): + """ + Remove all Mac OS related redundant files for all courses in contentstore + """ + help = 'Remove all Mac OS related redundant file/files for all courses in contentstore' + + def handle(self, *args, **options): + """ + Execute the command + """ + content_store = contentstore() + success = False + + log.info(u"-" * 80) + log.info(u"Cleaning up assets for all courses") + try: + # Remove all redundant Mac OS metadata files + assets_deleted = content_store.remove_redundant_content_for_courses() + success = True + except Exception as err: + log.info(u"=" * 30 + u"> failed to cleanup") + log.info(u"Error:") + log.info(err) + + if success: + log.info(u"=" * 80) + log.info(u"Total number of assets deleted: {0}".format(assets_deleted)) diff --git a/cms/djangoapps/contentstore/management/commands/tests/test_cleanup_assets.py b/cms/djangoapps/contentstore/management/commands/tests/test_cleanup_assets.py new file mode 100644 index 0000000000..748884a7bb --- /dev/null +++ b/cms/djangoapps/contentstore/management/commands/tests/test_cleanup_assets.py @@ -0,0 +1,72 @@ +""" +Test for assets cleanup of courses for Mac OS metadata files (with filename ".DS_Store" +or with filename which starts with "._") +""" +from django.core.management import call_command + +from opaque_keys.edx.locations import SlashSeparatedCourseKey +from xmodule.contentstore.content import XASSET_LOCATION_TAG +from xmodule.contentstore.django import contentstore +from xmodule.modulestore.django import modulestore +from xmodule.modulestore.mongo.base import location_to_query +from xmodule.modulestore.tests.django_utils import ModuleStoreTestCase +from xmodule.modulestore.xml_importer import import_from_xml + + +class ExportAllCourses(ModuleStoreTestCase): + """ + Tests assets cleanup for all courses. + """ + def setUp(self): + """ Common setup. """ + self.content_store = contentstore() + self.module_store = modulestore() + + def test_export_all_courses(self): + """ + This test validates that redundant Mac metadata files ('._example.txt', '.DS_Store') are + cleaned up on import + """ + import_from_xml( + self.module_store, + '**replace_user**', + 'common/test/data/', + ['dot-underscore'], + static_content_store=self.content_store, + do_import_static=True, + verbose=True + ) + + course = self.module_store.get_course(SlashSeparatedCourseKey('edX', 'dot-underscore', '2014_Fall')) + self.assertIsNotNone(course) + + # check that there are two assets ['example.txt', '.example.txt'] in contentstore for imported course + all_assets, count = self.content_store.get_all_content_for_course(course.id) + self.assertEqual(count, 2) + self.assertEqual(all_assets[0]['_id']['name'], u'.example.txt') + self.assertEqual(all_assets[1]['_id']['name'], u'example.txt') + + # manually add redundant assets (file ".DS_Store" and filename starts with "._") + course_filter = course.id.make_asset_key("asset", None) + query = location_to_query(course_filter, wildcard=True, tag=XASSET_LOCATION_TAG) + query['_id.name'] = all_assets[0]['_id']['name'] + asset_doc = self.content_store.fs_files.find_one(query) + asset_doc['_id']['name'] = u'._example_test.txt' + self.content_store.fs_files.insert(asset_doc) + asset_doc['_id']['name'] = u'.DS_Store' + self.content_store.fs_files.insert(asset_doc) + + # check that now course has four assets + all_assets, count = self.content_store.get_all_content_for_course(course.id) + self.assertEqual(count, 4) + self.assertEqual(all_assets[0]['_id']['name'], u'.example.txt') + self.assertEqual(all_assets[1]['_id']['name'], u'example.txt') + self.assertEqual(all_assets[2]['_id']['name'], u'._example_test.txt') + self.assertEqual(all_assets[3]['_id']['name'], u'.DS_Store') + + # now call asset_cleanup command and check that there is only two proper assets in contentstore for the course + call_command('cleanup_assets') + all_assets, count = self.content_store.get_all_content_for_course(course.id) + self.assertEqual(count, 2) + self.assertEqual(all_assets[0]['_id']['name'], u'.example.txt') + self.assertEqual(all_assets[1]['_id']['name'], u'example.txt') diff --git a/cms/envs/aws.py b/cms/envs/aws.py index 63605461c2..5cc7eff1f1 100644 --- a/cms/envs/aws.py +++ b/cms/envs/aws.py @@ -151,6 +151,8 @@ TECH_SUPPORT_EMAIL = ENV_TOKENS.get('TECH_SUPPORT_EMAIL', TECH_SUPPORT_EMAIL) COURSES_WITH_UNSAFE_CODE = ENV_TOKENS.get("COURSES_WITH_UNSAFE_CODE", []) +ASSET_IGNORE_REGEX = ENV_TOKENS.get('ASSET_IGNORE_REGEX', ASSET_IGNORE_REGEX) + # Theme overrides THEME_NAME = ENV_TOKENS.get('THEME_NAME', None) diff --git a/cms/envs/common.py b/cms/envs/common.py index f605d4296a..05a2da47c2 100644 --- a/cms/envs/common.py +++ b/cms/envs/common.py @@ -31,7 +31,7 @@ import lms.envs.common # Although this module itself may not use these imported variables, other dependent modules may. from lms.envs.common import ( USE_TZ, TECH_SUPPORT_EMAIL, PLATFORM_NAME, BUGS_EMAIL, DOC_STORE_CONFIG, ALL_LANGUAGES, WIKI_ENABLED, MODULESTORE, - update_module_store_settings + update_module_store_settings, ASSET_IGNORE_REGEX ) from path import path from warnings import simplefilter diff --git a/common/lib/xmodule/xmodule/contentstore/mongo.py b/common/lib/xmodule/xmodule/contentstore/mongo.py index c3e7571efa..cf8e5e03b5 100644 --- a/common/lib/xmodule/xmodule/contentstore/mongo.py +++ b/common/lib/xmodule/xmodule/contentstore/mongo.py @@ -13,6 +13,7 @@ import os import json from bson.son import SON from opaque_keys.edx.keys import AssetKey +from xmodule.modulestore.django import ASSET_IGNORE_REGEX class MongoContentStore(ContentStore): @@ -170,6 +171,26 @@ class MongoContentStore(ContentStore): course_key, start=start, maxresults=maxresults, get_thumbnails=False, sort=sort ) + def remove_redundant_content_for_courses(self): + """ + Finds and removes all redundant files (Mac OS metadata files with filename ".DS_Store" + or filename starts with "._") for all courses + """ + assets_to_delete = 0 + for prefix in ['_id', 'content_son']: + query = SON([ + ('{}.tag'.format(prefix), XASSET_LOCATION_TAG), + ('{}.category'.format(prefix), 'asset'), + ('{}.name'.format(prefix), {'$regex': ASSET_IGNORE_REGEX}), + ]) + items = self.fs_files.find(query) + assets_to_delete = assets_to_delete + items.count() + for asset in items: + self.fs.delete(asset[prefix]) + + self.fs_files.remove(query) + return assets_to_delete + def _get_all_content_for_course(self, course_key, get_thumbnails=False, start=0, maxresults=-1, sort=None): ''' Returns a list of all static assets for a course. The return format is a list of asset data dictionary elements. diff --git a/common/lib/xmodule/xmodule/modulestore/django.py b/common/lib/xmodule/xmodule/modulestore/django.py index 926b54bc6b..84f694fd4e 100644 --- a/common/lib/xmodule/xmodule/modulestore/django.py +++ b/common/lib/xmodule/xmodule/modulestore/django.py @@ -8,6 +8,8 @@ from __future__ import absolute_import from importlib import import_module from django.conf import settings +if not settings.configured: + settings.configure() from django.core.cache import get_cache, InvalidCacheBackendError import django.utils @@ -25,6 +27,8 @@ try: except ImportError: HAS_REQUEST_CACHE = False +ASSET_IGNORE_REGEX = getattr(settings, "ASSET_IGNORE_REGEX", r"(^\._.*$)|(^\.DS_Store$)|(^.*~$)") + def load_function(path): """ diff --git a/common/lib/xmodule/xmodule/modulestore/xml_importer.py b/common/lib/xmodule/xmodule/modulestore/xml_importer.py index 158c9001d8..21be802a9d 100644 --- a/common/lib/xmodule/xmodule/modulestore/xml_importer.py +++ b/common/lib/xmodule/xmodule/modulestore/xml_importer.py @@ -3,6 +3,7 @@ import os import mimetypes from path import path import json +import re from .xml import XMLModuleStore, ImportSystem, ParentTracker from xblock.runtime import KvsFieldData, DictKeyValueStore @@ -15,6 +16,7 @@ from xmodule.errortracker import make_error_tracker from .store_utilities import rewrite_nonportable_content_links import xblock from xmodule.tabs import CourseTabList +from xmodule.modulestore.django import ASSET_IGNORE_REGEX from xmodule.modulestore.exceptions import InvalidLocationError from xmodule.modulestore.mongo.base import MongoRevisionKey from xmodule.modulestore import ModuleStoreEnum @@ -49,7 +51,7 @@ def import_static_content( content_path = os.path.join(dirname, filename) - if filename.endswith('~'): + if re.match(ASSET_IGNORE_REGEX, filename): if verbose: log.debug('skipping static content %s...', content_path) continue diff --git a/common/lib/xmodule/xmodule/tests/test_import_static.py b/common/lib/xmodule/xmodule/tests/test_import_static.py index 831d389dec..10de01d2a1 100644 --- a/common/lib/xmodule/xmodule/tests/test_import_static.py +++ b/common/lib/xmodule/xmodule/tests/test_import_static.py @@ -21,3 +21,21 @@ class IgnoredFilesTestCase(unittest.TestCase): self.assertIn("example.txt", name_val) self.assertNotIn("example.txt~", name_val) self.assertIn("GREEN", name_val["example.txt"]) + + def test_ignore_dot_underscore_static_files(self): + """ + Test for ignored Mac OS metadata files (filename starts with "._") + """ + course_dir = DATA_DIR / "dot-underscore" + course_id = SlashSeparatedCourseKey("edX", "dot-underscore", "2014_Fall") + content_store = Mock() + content_store.generate_thumbnail.return_value = ("content", "location") + import_static_content(course_dir, content_store, course_id) + saved_static_content = [call[0][0] for call in content_store.save.call_args_list] + name_val = {sc.name: sc.data for sc in saved_static_content} + self.assertIn("example.txt", name_val) + self.assertIn(".example.txt", name_val) + self.assertNotIn("._example.txt", name_val) + self.assertNotIn(".DS_Store", name_val) + self.assertIn("GREEN", name_val["example.txt"]) + self.assertIn("BLUE", name_val[".example.txt"]) diff --git a/common/test/data/dot-underscore/README.md b/common/test/data/dot-underscore/README.md new file mode 100644 index 0000000000..dc54082345 --- /dev/null +++ b/common/test/data/dot-underscore/README.md @@ -0,0 +1,6 @@ +IGNORE MAC METADATA FILES + +This course simulates an import of a course from a Mac OS that has some unnessary +metadata files (filename starts with ._) in assets (static/._example.txt). These +files do not belong with the content so skip them on import and also do a +cleanup for such already added assets. diff --git a/common/test/data/dot-underscore/about/index.html b/common/test/data/dot-underscore/about/index.html new file mode 100644 index 0000000000..d991f425fb --- /dev/null +++ b/common/test/data/dot-underscore/about/index.html @@ -0,0 +1 @@ +GREEN diff --git a/common/test/data/dot-underscore/course.xml b/common/test/data/dot-underscore/course.xml new file mode 100644 index 0000000000..9cec88e74f --- /dev/null +++ b/common/test/data/dot-underscore/course.xml @@ -0,0 +1 @@ + diff --git a/common/test/data/dot-underscore/course/2014_Fall.xml b/common/test/data/dot-underscore/course/2014_Fall.xml new file mode 100644 index 0000000000..c9d2e8702d --- /dev/null +++ b/common/test/data/dot-underscore/course/2014_Fall.xml @@ -0,0 +1,2 @@ + + diff --git a/common/test/data/dot-underscore/static/.DS_Store b/common/test/data/dot-underscore/static/.DS_Store new file mode 100644 index 0000000000..e69de29bb2 diff --git a/common/test/data/dot-underscore/static/._example.txt b/common/test/data/dot-underscore/static/._example.txt new file mode 100644 index 0000000000..e69de29bb2 diff --git a/common/test/data/dot-underscore/static/.example.txt b/common/test/data/dot-underscore/static/.example.txt new file mode 100644 index 0000000000..cb9e9c6ba5 --- /dev/null +++ b/common/test/data/dot-underscore/static/.example.txt @@ -0,0 +1 @@ +BLUE \ No newline at end of file diff --git a/common/test/data/dot-underscore/static/example.txt b/common/test/data/dot-underscore/static/example.txt new file mode 100644 index 0000000000..d991f425fb --- /dev/null +++ b/common/test/data/dot-underscore/static/example.txt @@ -0,0 +1 @@ +GREEN diff --git a/lms/envs/aws.py b/lms/envs/aws.py index 9421097531..654567f7f5 100644 --- a/lms/envs/aws.py +++ b/lms/envs/aws.py @@ -253,6 +253,8 @@ for name, value in ENV_TOKENS.get("CODE_JAIL", {}).items(): COURSES_WITH_UNSAFE_CODE = ENV_TOKENS.get("COURSES_WITH_UNSAFE_CODE", []) +ASSET_IGNORE_REGEX = ENV_TOKENS.get('ASSET_IGNORE_REGEX', ASSET_IGNORE_REGEX) + # Event Tracking if "TRACKING_IGNORE_URL_PATTERNS" in ENV_TOKENS: TRACKING_IGNORE_URL_PATTERNS = ENV_TOKENS.get("TRACKING_IGNORE_URL_PATTERNS") diff --git a/lms/envs/common.py b/lms/envs/common.py index e5b37a0a8f..9979a93a3f 100644 --- a/lms/envs/common.py +++ b/lms/envs/common.py @@ -266,6 +266,9 @@ FEATURES = { } +# Ignore static asset files on import which match this pattern +ASSET_IGNORE_REGEX = r"(^\._.*$)|(^\.DS_Store$)|(^.*~$)" + # Used for A/B testing DEFAULT_GROUPS = []