Merge pull request #4025 from edx/zub/bugfix/std1725-duplicatefilesonimport
ignore MAC meta files on import and also remove any such files from the ...
This commit is contained in:
@@ -0,0 +1,40 @@
|
||||
"""
|
||||
Script for removing all redundant Mac OS metadata files (with filename ".DS_Store"
|
||||
or with filename which starts with "._") for all courses
|
||||
"""
|
||||
import logging
|
||||
|
||||
from django.core.management.base import BaseCommand
|
||||
from xmodule.contentstore.django import contentstore
|
||||
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
"""
|
||||
Remove all Mac OS related redundant files for all courses in contentstore
|
||||
"""
|
||||
help = 'Remove all Mac OS related redundant file/files for all courses in contentstore'
|
||||
|
||||
def handle(self, *args, **options):
|
||||
"""
|
||||
Execute the command
|
||||
"""
|
||||
content_store = contentstore()
|
||||
success = False
|
||||
|
||||
log.info(u"-" * 80)
|
||||
log.info(u"Cleaning up assets for all courses")
|
||||
try:
|
||||
# Remove all redundant Mac OS metadata files
|
||||
assets_deleted = content_store.remove_redundant_content_for_courses()
|
||||
success = True
|
||||
except Exception as err:
|
||||
log.info(u"=" * 30 + u"> failed to cleanup")
|
||||
log.info(u"Error:")
|
||||
log.info(err)
|
||||
|
||||
if success:
|
||||
log.info(u"=" * 80)
|
||||
log.info(u"Total number of assets deleted: {0}".format(assets_deleted))
|
||||
@@ -0,0 +1,72 @@
|
||||
"""
|
||||
Test for assets cleanup of courses for Mac OS metadata files (with filename ".DS_Store"
|
||||
or with filename which starts with "._")
|
||||
"""
|
||||
from django.core.management import call_command
|
||||
|
||||
from opaque_keys.edx.locations import SlashSeparatedCourseKey
|
||||
from xmodule.contentstore.content import XASSET_LOCATION_TAG
|
||||
from xmodule.contentstore.django import contentstore
|
||||
from xmodule.modulestore.django import modulestore
|
||||
from xmodule.modulestore.mongo.base import location_to_query
|
||||
from xmodule.modulestore.tests.django_utils import ModuleStoreTestCase
|
||||
from xmodule.modulestore.xml_importer import import_from_xml
|
||||
|
||||
|
||||
class ExportAllCourses(ModuleStoreTestCase):
|
||||
"""
|
||||
Tests assets cleanup for all courses.
|
||||
"""
|
||||
def setUp(self):
|
||||
""" Common setup. """
|
||||
self.content_store = contentstore()
|
||||
self.module_store = modulestore()
|
||||
|
||||
def test_export_all_courses(self):
|
||||
"""
|
||||
This test validates that redundant Mac metadata files ('._example.txt', '.DS_Store') are
|
||||
cleaned up on import
|
||||
"""
|
||||
import_from_xml(
|
||||
self.module_store,
|
||||
'**replace_user**',
|
||||
'common/test/data/',
|
||||
['dot-underscore'],
|
||||
static_content_store=self.content_store,
|
||||
do_import_static=True,
|
||||
verbose=True
|
||||
)
|
||||
|
||||
course = self.module_store.get_course(SlashSeparatedCourseKey('edX', 'dot-underscore', '2014_Fall'))
|
||||
self.assertIsNotNone(course)
|
||||
|
||||
# check that there are two assets ['example.txt', '.example.txt'] in contentstore for imported course
|
||||
all_assets, count = self.content_store.get_all_content_for_course(course.id)
|
||||
self.assertEqual(count, 2)
|
||||
self.assertEqual(all_assets[0]['_id']['name'], u'.example.txt')
|
||||
self.assertEqual(all_assets[1]['_id']['name'], u'example.txt')
|
||||
|
||||
# manually add redundant assets (file ".DS_Store" and filename starts with "._")
|
||||
course_filter = course.id.make_asset_key("asset", None)
|
||||
query = location_to_query(course_filter, wildcard=True, tag=XASSET_LOCATION_TAG)
|
||||
query['_id.name'] = all_assets[0]['_id']['name']
|
||||
asset_doc = self.content_store.fs_files.find_one(query)
|
||||
asset_doc['_id']['name'] = u'._example_test.txt'
|
||||
self.content_store.fs_files.insert(asset_doc)
|
||||
asset_doc['_id']['name'] = u'.DS_Store'
|
||||
self.content_store.fs_files.insert(asset_doc)
|
||||
|
||||
# check that now course has four assets
|
||||
all_assets, count = self.content_store.get_all_content_for_course(course.id)
|
||||
self.assertEqual(count, 4)
|
||||
self.assertEqual(all_assets[0]['_id']['name'], u'.example.txt')
|
||||
self.assertEqual(all_assets[1]['_id']['name'], u'example.txt')
|
||||
self.assertEqual(all_assets[2]['_id']['name'], u'._example_test.txt')
|
||||
self.assertEqual(all_assets[3]['_id']['name'], u'.DS_Store')
|
||||
|
||||
# now call asset_cleanup command and check that there is only two proper assets in contentstore for the course
|
||||
call_command('cleanup_assets')
|
||||
all_assets, count = self.content_store.get_all_content_for_course(course.id)
|
||||
self.assertEqual(count, 2)
|
||||
self.assertEqual(all_assets[0]['_id']['name'], u'.example.txt')
|
||||
self.assertEqual(all_assets[1]['_id']['name'], u'example.txt')
|
||||
@@ -151,6 +151,8 @@ TECH_SUPPORT_EMAIL = ENV_TOKENS.get('TECH_SUPPORT_EMAIL', TECH_SUPPORT_EMAIL)
|
||||
|
||||
COURSES_WITH_UNSAFE_CODE = ENV_TOKENS.get("COURSES_WITH_UNSAFE_CODE", [])
|
||||
|
||||
ASSET_IGNORE_REGEX = ENV_TOKENS.get('ASSET_IGNORE_REGEX', ASSET_IGNORE_REGEX)
|
||||
|
||||
# Theme overrides
|
||||
THEME_NAME = ENV_TOKENS.get('THEME_NAME', None)
|
||||
|
||||
|
||||
@@ -31,7 +31,7 @@ import lms.envs.common
|
||||
# Although this module itself may not use these imported variables, other dependent modules may.
|
||||
from lms.envs.common import (
|
||||
USE_TZ, TECH_SUPPORT_EMAIL, PLATFORM_NAME, BUGS_EMAIL, DOC_STORE_CONFIG, ALL_LANGUAGES, WIKI_ENABLED, MODULESTORE,
|
||||
update_module_store_settings
|
||||
update_module_store_settings, ASSET_IGNORE_REGEX
|
||||
)
|
||||
from path import path
|
||||
from warnings import simplefilter
|
||||
|
||||
@@ -13,6 +13,7 @@ import os
|
||||
import json
|
||||
from bson.son import SON
|
||||
from opaque_keys.edx.keys import AssetKey
|
||||
from xmodule.modulestore.django import ASSET_IGNORE_REGEX
|
||||
|
||||
|
||||
class MongoContentStore(ContentStore):
|
||||
@@ -170,6 +171,26 @@ class MongoContentStore(ContentStore):
|
||||
course_key, start=start, maxresults=maxresults, get_thumbnails=False, sort=sort
|
||||
)
|
||||
|
||||
def remove_redundant_content_for_courses(self):
|
||||
"""
|
||||
Finds and removes all redundant files (Mac OS metadata files with filename ".DS_Store"
|
||||
or filename starts with "._") for all courses
|
||||
"""
|
||||
assets_to_delete = 0
|
||||
for prefix in ['_id', 'content_son']:
|
||||
query = SON([
|
||||
('{}.tag'.format(prefix), XASSET_LOCATION_TAG),
|
||||
('{}.category'.format(prefix), 'asset'),
|
||||
('{}.name'.format(prefix), {'$regex': ASSET_IGNORE_REGEX}),
|
||||
])
|
||||
items = self.fs_files.find(query)
|
||||
assets_to_delete = assets_to_delete + items.count()
|
||||
for asset in items:
|
||||
self.fs.delete(asset[prefix])
|
||||
|
||||
self.fs_files.remove(query)
|
||||
return assets_to_delete
|
||||
|
||||
def _get_all_content_for_course(self, course_key, get_thumbnails=False, start=0, maxresults=-1, sort=None):
|
||||
'''
|
||||
Returns a list of all static assets for a course. The return format is a list of asset data dictionary elements.
|
||||
|
||||
@@ -8,6 +8,8 @@ from __future__ import absolute_import
|
||||
|
||||
from importlib import import_module
|
||||
from django.conf import settings
|
||||
if not settings.configured:
|
||||
settings.configure()
|
||||
from django.core.cache import get_cache, InvalidCacheBackendError
|
||||
import django.utils
|
||||
|
||||
@@ -25,6 +27,8 @@ try:
|
||||
except ImportError:
|
||||
HAS_REQUEST_CACHE = False
|
||||
|
||||
ASSET_IGNORE_REGEX = getattr(settings, "ASSET_IGNORE_REGEX", r"(^\._.*$)|(^\.DS_Store$)|(^.*~$)")
|
||||
|
||||
|
||||
def load_function(path):
|
||||
"""
|
||||
|
||||
@@ -3,6 +3,7 @@ import os
|
||||
import mimetypes
|
||||
from path import path
|
||||
import json
|
||||
import re
|
||||
|
||||
from .xml import XMLModuleStore, ImportSystem, ParentTracker
|
||||
from xblock.runtime import KvsFieldData, DictKeyValueStore
|
||||
@@ -15,6 +16,7 @@ from xmodule.errortracker import make_error_tracker
|
||||
from .store_utilities import rewrite_nonportable_content_links
|
||||
import xblock
|
||||
from xmodule.tabs import CourseTabList
|
||||
from xmodule.modulestore.django import ASSET_IGNORE_REGEX
|
||||
from xmodule.modulestore.exceptions import InvalidLocationError
|
||||
from xmodule.modulestore.mongo.base import MongoRevisionKey
|
||||
from xmodule.modulestore import ModuleStoreEnum
|
||||
@@ -49,7 +51,7 @@ def import_static_content(
|
||||
|
||||
content_path = os.path.join(dirname, filename)
|
||||
|
||||
if filename.endswith('~'):
|
||||
if re.match(ASSET_IGNORE_REGEX, filename):
|
||||
if verbose:
|
||||
log.debug('skipping static content %s...', content_path)
|
||||
continue
|
||||
|
||||
@@ -21,3 +21,21 @@ class IgnoredFilesTestCase(unittest.TestCase):
|
||||
self.assertIn("example.txt", name_val)
|
||||
self.assertNotIn("example.txt~", name_val)
|
||||
self.assertIn("GREEN", name_val["example.txt"])
|
||||
|
||||
def test_ignore_dot_underscore_static_files(self):
|
||||
"""
|
||||
Test for ignored Mac OS metadata files (filename starts with "._")
|
||||
"""
|
||||
course_dir = DATA_DIR / "dot-underscore"
|
||||
course_id = SlashSeparatedCourseKey("edX", "dot-underscore", "2014_Fall")
|
||||
content_store = Mock()
|
||||
content_store.generate_thumbnail.return_value = ("content", "location")
|
||||
import_static_content(course_dir, content_store, course_id)
|
||||
saved_static_content = [call[0][0] for call in content_store.save.call_args_list]
|
||||
name_val = {sc.name: sc.data for sc in saved_static_content}
|
||||
self.assertIn("example.txt", name_val)
|
||||
self.assertIn(".example.txt", name_val)
|
||||
self.assertNotIn("._example.txt", name_val)
|
||||
self.assertNotIn(".DS_Store", name_val)
|
||||
self.assertIn("GREEN", name_val["example.txt"])
|
||||
self.assertIn("BLUE", name_val[".example.txt"])
|
||||
|
||||
6
common/test/data/dot-underscore/README.md
Normal file
6
common/test/data/dot-underscore/README.md
Normal file
@@ -0,0 +1,6 @@
|
||||
IGNORE MAC METADATA FILES
|
||||
|
||||
This course simulates an import of a course from a Mac OS that has some unnessary
|
||||
metadata files (filename starts with ._) in assets (static/._example.txt). These
|
||||
files do not belong with the content so skip them on import and also do a
|
||||
cleanup for such already added assets.
|
||||
1
common/test/data/dot-underscore/about/index.html
Normal file
1
common/test/data/dot-underscore/about/index.html
Normal file
@@ -0,0 +1 @@
|
||||
GREEN
|
||||
1
common/test/data/dot-underscore/course.xml
Normal file
1
common/test/data/dot-underscore/course.xml
Normal file
@@ -0,0 +1 @@
|
||||
<course org="edX" course="dot-underscore" slug="2014_Fall"/>
|
||||
2
common/test/data/dot-underscore/course/2014_Fall.xml
Normal file
2
common/test/data/dot-underscore/course/2014_Fall.xml
Normal file
@@ -0,0 +1,2 @@
|
||||
<course>
|
||||
</course>
|
||||
0
common/test/data/dot-underscore/static/.DS_Store
vendored
Normal file
0
common/test/data/dot-underscore/static/.DS_Store
vendored
Normal file
1
common/test/data/dot-underscore/static/.example.txt
Normal file
1
common/test/data/dot-underscore/static/.example.txt
Normal file
@@ -0,0 +1 @@
|
||||
BLUE
|
||||
1
common/test/data/dot-underscore/static/example.txt
Normal file
1
common/test/data/dot-underscore/static/example.txt
Normal file
@@ -0,0 +1 @@
|
||||
GREEN
|
||||
@@ -253,6 +253,8 @@ for name, value in ENV_TOKENS.get("CODE_JAIL", {}).items():
|
||||
|
||||
COURSES_WITH_UNSAFE_CODE = ENV_TOKENS.get("COURSES_WITH_UNSAFE_CODE", [])
|
||||
|
||||
ASSET_IGNORE_REGEX = ENV_TOKENS.get('ASSET_IGNORE_REGEX', ASSET_IGNORE_REGEX)
|
||||
|
||||
# Event Tracking
|
||||
if "TRACKING_IGNORE_URL_PATTERNS" in ENV_TOKENS:
|
||||
TRACKING_IGNORE_URL_PATTERNS = ENV_TOKENS.get("TRACKING_IGNORE_URL_PATTERNS")
|
||||
|
||||
@@ -266,6 +266,9 @@ FEATURES = {
|
||||
|
||||
}
|
||||
|
||||
# Ignore static asset files on import which match this pattern
|
||||
ASSET_IGNORE_REGEX = r"(^\._.*$)|(^\.DS_Store$)|(^.*~$)"
|
||||
|
||||
# Used for A/B testing
|
||||
DEFAULT_GROUPS = []
|
||||
|
||||
|
||||
Reference in New Issue
Block a user