diff --git a/cms/djangoapps/contentstore/management/commands/cleanup_assets.py b/cms/djangoapps/contentstore/management/commands/cleanup_assets.py
new file mode 100644
index 0000000000..7896aca3ec
--- /dev/null
+++ b/cms/djangoapps/contentstore/management/commands/cleanup_assets.py
@@ -0,0 +1,40 @@
+"""
+Script for removing all redundant Mac OS metadata files (with filename ".DS_Store"
+or with filename which starts with "._") for all courses
+"""
+import logging
+
+from django.core.management.base import BaseCommand
+from xmodule.contentstore.django import contentstore
+
+
+log = logging.getLogger(__name__)
+
+
+class Command(BaseCommand):
+ """
+ Remove all Mac OS related redundant files for all courses in contentstore
+ """
+ help = 'Remove all Mac OS related redundant file/files for all courses in contentstore'
+
+ def handle(self, *args, **options):
+ """
+ Execute the command
+ """
+ content_store = contentstore()
+ success = False
+
+ log.info(u"-" * 80)
+ log.info(u"Cleaning up assets for all courses")
+ try:
+ # Remove all redundant Mac OS metadata files
+ assets_deleted = content_store.remove_redundant_content_for_courses()
+ success = True
+ except Exception as err:
+ log.info(u"=" * 30 + u"> failed to cleanup")
+ log.info(u"Error:")
+ log.info(err)
+
+ if success:
+ log.info(u"=" * 80)
+ log.info(u"Total number of assets deleted: {0}".format(assets_deleted))
diff --git a/cms/djangoapps/contentstore/management/commands/tests/test_cleanup_assets.py b/cms/djangoapps/contentstore/management/commands/tests/test_cleanup_assets.py
new file mode 100644
index 0000000000..748884a7bb
--- /dev/null
+++ b/cms/djangoapps/contentstore/management/commands/tests/test_cleanup_assets.py
@@ -0,0 +1,72 @@
+"""
+Test for assets cleanup of courses for Mac OS metadata files (with filename ".DS_Store"
+or with filename which starts with "._")
+"""
+from django.core.management import call_command
+
+from opaque_keys.edx.locations import SlashSeparatedCourseKey
+from xmodule.contentstore.content import XASSET_LOCATION_TAG
+from xmodule.contentstore.django import contentstore
+from xmodule.modulestore.django import modulestore
+from xmodule.modulestore.mongo.base import location_to_query
+from xmodule.modulestore.tests.django_utils import ModuleStoreTestCase
+from xmodule.modulestore.xml_importer import import_from_xml
+
+
+class ExportAllCourses(ModuleStoreTestCase):
+ """
+ Tests assets cleanup for all courses.
+ """
+ def setUp(self):
+ """ Common setup. """
+ self.content_store = contentstore()
+ self.module_store = modulestore()
+
+ def test_export_all_courses(self):
+ """
+ This test validates that redundant Mac metadata files ('._example.txt', '.DS_Store') are
+ cleaned up on import
+ """
+ import_from_xml(
+ self.module_store,
+ '**replace_user**',
+ 'common/test/data/',
+ ['dot-underscore'],
+ static_content_store=self.content_store,
+ do_import_static=True,
+ verbose=True
+ )
+
+ course = self.module_store.get_course(SlashSeparatedCourseKey('edX', 'dot-underscore', '2014_Fall'))
+ self.assertIsNotNone(course)
+
+ # check that there are two assets ['example.txt', '.example.txt'] in contentstore for imported course
+ all_assets, count = self.content_store.get_all_content_for_course(course.id)
+ self.assertEqual(count, 2)
+ self.assertEqual(all_assets[0]['_id']['name'], u'.example.txt')
+ self.assertEqual(all_assets[1]['_id']['name'], u'example.txt')
+
+ # manually add redundant assets (file ".DS_Store" and filename starts with "._")
+ course_filter = course.id.make_asset_key("asset", None)
+ query = location_to_query(course_filter, wildcard=True, tag=XASSET_LOCATION_TAG)
+ query['_id.name'] = all_assets[0]['_id']['name']
+ asset_doc = self.content_store.fs_files.find_one(query)
+ asset_doc['_id']['name'] = u'._example_test.txt'
+ self.content_store.fs_files.insert(asset_doc)
+ asset_doc['_id']['name'] = u'.DS_Store'
+ self.content_store.fs_files.insert(asset_doc)
+
+ # check that now course has four assets
+ all_assets, count = self.content_store.get_all_content_for_course(course.id)
+ self.assertEqual(count, 4)
+ self.assertEqual(all_assets[0]['_id']['name'], u'.example.txt')
+ self.assertEqual(all_assets[1]['_id']['name'], u'example.txt')
+ self.assertEqual(all_assets[2]['_id']['name'], u'._example_test.txt')
+ self.assertEqual(all_assets[3]['_id']['name'], u'.DS_Store')
+
+ # now call asset_cleanup command and check that there is only two proper assets in contentstore for the course
+ call_command('cleanup_assets')
+ all_assets, count = self.content_store.get_all_content_for_course(course.id)
+ self.assertEqual(count, 2)
+ self.assertEqual(all_assets[0]['_id']['name'], u'.example.txt')
+ self.assertEqual(all_assets[1]['_id']['name'], u'example.txt')
diff --git a/cms/envs/aws.py b/cms/envs/aws.py
index 63605461c2..5cc7eff1f1 100644
--- a/cms/envs/aws.py
+++ b/cms/envs/aws.py
@@ -151,6 +151,8 @@ TECH_SUPPORT_EMAIL = ENV_TOKENS.get('TECH_SUPPORT_EMAIL', TECH_SUPPORT_EMAIL)
COURSES_WITH_UNSAFE_CODE = ENV_TOKENS.get("COURSES_WITH_UNSAFE_CODE", [])
+ASSET_IGNORE_REGEX = ENV_TOKENS.get('ASSET_IGNORE_REGEX', ASSET_IGNORE_REGEX)
+
# Theme overrides
THEME_NAME = ENV_TOKENS.get('THEME_NAME', None)
diff --git a/cms/envs/common.py b/cms/envs/common.py
index f605d4296a..05a2da47c2 100644
--- a/cms/envs/common.py
+++ b/cms/envs/common.py
@@ -31,7 +31,7 @@ import lms.envs.common
# Although this module itself may not use these imported variables, other dependent modules may.
from lms.envs.common import (
USE_TZ, TECH_SUPPORT_EMAIL, PLATFORM_NAME, BUGS_EMAIL, DOC_STORE_CONFIG, ALL_LANGUAGES, WIKI_ENABLED, MODULESTORE,
- update_module_store_settings
+ update_module_store_settings, ASSET_IGNORE_REGEX
)
from path import path
from warnings import simplefilter
diff --git a/common/lib/xmodule/xmodule/contentstore/mongo.py b/common/lib/xmodule/xmodule/contentstore/mongo.py
index c3e7571efa..cf8e5e03b5 100644
--- a/common/lib/xmodule/xmodule/contentstore/mongo.py
+++ b/common/lib/xmodule/xmodule/contentstore/mongo.py
@@ -13,6 +13,7 @@ import os
import json
from bson.son import SON
from opaque_keys.edx.keys import AssetKey
+from xmodule.modulestore.django import ASSET_IGNORE_REGEX
class MongoContentStore(ContentStore):
@@ -170,6 +171,26 @@ class MongoContentStore(ContentStore):
course_key, start=start, maxresults=maxresults, get_thumbnails=False, sort=sort
)
+ def remove_redundant_content_for_courses(self):
+ """
+ Finds and removes all redundant files (Mac OS metadata files with filename ".DS_Store"
+ or filename starts with "._") for all courses
+ """
+ assets_to_delete = 0
+ for prefix in ['_id', 'content_son']:
+ query = SON([
+ ('{}.tag'.format(prefix), XASSET_LOCATION_TAG),
+ ('{}.category'.format(prefix), 'asset'),
+ ('{}.name'.format(prefix), {'$regex': ASSET_IGNORE_REGEX}),
+ ])
+ items = self.fs_files.find(query)
+ assets_to_delete = assets_to_delete + items.count()
+ for asset in items:
+ self.fs.delete(asset[prefix])
+
+ self.fs_files.remove(query)
+ return assets_to_delete
+
def _get_all_content_for_course(self, course_key, get_thumbnails=False, start=0, maxresults=-1, sort=None):
'''
Returns a list of all static assets for a course. The return format is a list of asset data dictionary elements.
diff --git a/common/lib/xmodule/xmodule/modulestore/django.py b/common/lib/xmodule/xmodule/modulestore/django.py
index 926b54bc6b..84f694fd4e 100644
--- a/common/lib/xmodule/xmodule/modulestore/django.py
+++ b/common/lib/xmodule/xmodule/modulestore/django.py
@@ -8,6 +8,8 @@ from __future__ import absolute_import
from importlib import import_module
from django.conf import settings
+if not settings.configured:
+ settings.configure()
from django.core.cache import get_cache, InvalidCacheBackendError
import django.utils
@@ -25,6 +27,8 @@ try:
except ImportError:
HAS_REQUEST_CACHE = False
+ASSET_IGNORE_REGEX = getattr(settings, "ASSET_IGNORE_REGEX", r"(^\._.*$)|(^\.DS_Store$)|(^.*~$)")
+
def load_function(path):
"""
diff --git a/common/lib/xmodule/xmodule/modulestore/xml_importer.py b/common/lib/xmodule/xmodule/modulestore/xml_importer.py
index 158c9001d8..21be802a9d 100644
--- a/common/lib/xmodule/xmodule/modulestore/xml_importer.py
+++ b/common/lib/xmodule/xmodule/modulestore/xml_importer.py
@@ -3,6 +3,7 @@ import os
import mimetypes
from path import path
import json
+import re
from .xml import XMLModuleStore, ImportSystem, ParentTracker
from xblock.runtime import KvsFieldData, DictKeyValueStore
@@ -15,6 +16,7 @@ from xmodule.errortracker import make_error_tracker
from .store_utilities import rewrite_nonportable_content_links
import xblock
from xmodule.tabs import CourseTabList
+from xmodule.modulestore.django import ASSET_IGNORE_REGEX
from xmodule.modulestore.exceptions import InvalidLocationError
from xmodule.modulestore.mongo.base import MongoRevisionKey
from xmodule.modulestore import ModuleStoreEnum
@@ -49,7 +51,7 @@ def import_static_content(
content_path = os.path.join(dirname, filename)
- if filename.endswith('~'):
+ if re.match(ASSET_IGNORE_REGEX, filename):
if verbose:
log.debug('skipping static content %s...', content_path)
continue
diff --git a/common/lib/xmodule/xmodule/tests/test_import_static.py b/common/lib/xmodule/xmodule/tests/test_import_static.py
index 831d389dec..10de01d2a1 100644
--- a/common/lib/xmodule/xmodule/tests/test_import_static.py
+++ b/common/lib/xmodule/xmodule/tests/test_import_static.py
@@ -21,3 +21,21 @@ class IgnoredFilesTestCase(unittest.TestCase):
self.assertIn("example.txt", name_val)
self.assertNotIn("example.txt~", name_val)
self.assertIn("GREEN", name_val["example.txt"])
+
+ def test_ignore_dot_underscore_static_files(self):
+ """
+ Test for ignored Mac OS metadata files (filename starts with "._")
+ """
+ course_dir = DATA_DIR / "dot-underscore"
+ course_id = SlashSeparatedCourseKey("edX", "dot-underscore", "2014_Fall")
+ content_store = Mock()
+ content_store.generate_thumbnail.return_value = ("content", "location")
+ import_static_content(course_dir, content_store, course_id)
+ saved_static_content = [call[0][0] for call in content_store.save.call_args_list]
+ name_val = {sc.name: sc.data for sc in saved_static_content}
+ self.assertIn("example.txt", name_val)
+ self.assertIn(".example.txt", name_val)
+ self.assertNotIn("._example.txt", name_val)
+ self.assertNotIn(".DS_Store", name_val)
+ self.assertIn("GREEN", name_val["example.txt"])
+ self.assertIn("BLUE", name_val[".example.txt"])
diff --git a/common/test/data/dot-underscore/README.md b/common/test/data/dot-underscore/README.md
new file mode 100644
index 0000000000..dc54082345
--- /dev/null
+++ b/common/test/data/dot-underscore/README.md
@@ -0,0 +1,6 @@
+IGNORE MAC METADATA FILES
+
+This course simulates an import of a course from a Mac OS that has some unnessary
+metadata files (filename starts with ._) in assets (static/._example.txt). These
+files do not belong with the content so skip them on import and also do a
+cleanup for such already added assets.
diff --git a/common/test/data/dot-underscore/about/index.html b/common/test/data/dot-underscore/about/index.html
new file mode 100644
index 0000000000..d991f425fb
--- /dev/null
+++ b/common/test/data/dot-underscore/about/index.html
@@ -0,0 +1 @@
+GREEN
diff --git a/common/test/data/dot-underscore/course.xml b/common/test/data/dot-underscore/course.xml
new file mode 100644
index 0000000000..9cec88e74f
--- /dev/null
+++ b/common/test/data/dot-underscore/course.xml
@@ -0,0 +1 @@
+
diff --git a/common/test/data/dot-underscore/course/2014_Fall.xml b/common/test/data/dot-underscore/course/2014_Fall.xml
new file mode 100644
index 0000000000..c9d2e8702d
--- /dev/null
+++ b/common/test/data/dot-underscore/course/2014_Fall.xml
@@ -0,0 +1,2 @@
+
+
diff --git a/common/test/data/dot-underscore/static/.DS_Store b/common/test/data/dot-underscore/static/.DS_Store
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/common/test/data/dot-underscore/static/._example.txt b/common/test/data/dot-underscore/static/._example.txt
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/common/test/data/dot-underscore/static/.example.txt b/common/test/data/dot-underscore/static/.example.txt
new file mode 100644
index 0000000000..cb9e9c6ba5
--- /dev/null
+++ b/common/test/data/dot-underscore/static/.example.txt
@@ -0,0 +1 @@
+BLUE
\ No newline at end of file
diff --git a/common/test/data/dot-underscore/static/example.txt b/common/test/data/dot-underscore/static/example.txt
new file mode 100644
index 0000000000..d991f425fb
--- /dev/null
+++ b/common/test/data/dot-underscore/static/example.txt
@@ -0,0 +1 @@
+GREEN
diff --git a/lms/envs/aws.py b/lms/envs/aws.py
index 9421097531..654567f7f5 100644
--- a/lms/envs/aws.py
+++ b/lms/envs/aws.py
@@ -253,6 +253,8 @@ for name, value in ENV_TOKENS.get("CODE_JAIL", {}).items():
COURSES_WITH_UNSAFE_CODE = ENV_TOKENS.get("COURSES_WITH_UNSAFE_CODE", [])
+ASSET_IGNORE_REGEX = ENV_TOKENS.get('ASSET_IGNORE_REGEX', ASSET_IGNORE_REGEX)
+
# Event Tracking
if "TRACKING_IGNORE_URL_PATTERNS" in ENV_TOKENS:
TRACKING_IGNORE_URL_PATTERNS = ENV_TOKENS.get("TRACKING_IGNORE_URL_PATTERNS")
diff --git a/lms/envs/common.py b/lms/envs/common.py
index e5b37a0a8f..9979a93a3f 100644
--- a/lms/envs/common.py
+++ b/lms/envs/common.py
@@ -266,6 +266,9 @@ FEATURES = {
}
+# Ignore static asset files on import which match this pattern
+ASSET_IGNORE_REGEX = r"(^\._.*$)|(^\.DS_Store$)|(^.*~$)"
+
# Used for A/B testing
DEFAULT_GROUPS = []