From 4029ee183783bccceedcd4c76081f005c22c0c81 Mon Sep 17 00:00:00 2001 From: David Ormsbee Date: Fri, 22 Dec 2017 12:35:25 -0500 Subject: [PATCH] Ignore certain patterns during collectstatic. There are symlinks and file types that are not necessary to copy over to the actual static files output, either because they are source files that get compiled out, or because they exist only for tests. FEDX-448. With themes enabled, this reduces the unoptimized static asset build size from 343M to 212M. The optimized version goes from 838M to 500M. --- pavelib/assets.py | 23 ++++++++++++++++++- pavelib/paver_tests/test_assets.py | 34 ++++++++++++----------------- pavelib/paver_tests/test_servers.py | 6 ++++- 3 files changed, 41 insertions(+), 22 deletions(-) diff --git a/pavelib/assets.py b/pavelib/assets.py index 44f30fd564..049684f5a0 100644 --- a/pavelib/assets.py +++ b/pavelib/assets.py @@ -700,10 +700,31 @@ def collect_assets(systems, settings, **kwargs): `settings` is the Django settings module to use. `**kwargs` include arguments for using a log directory for collectstatic output. Defaults to /dev/null. """ + ignore_patterns = [ + # Karma test related files... + "fixtures", + "karma_*.js", + "spec", + "spec_helpers", + "spec-helpers", + "xmodule_js", # symlink for tests + + # Geo-IP data, only accessed in Python + "geoip", + + # We compile these out, don't need the source files in staticfiles + "sass", + "*.coffee", + ] + + ignore_args = " ".join( + '--ignore "{}"'.format(pattern) for pattern in ignore_patterns + ) for sys in systems: collectstatic_stdout_str = _collect_assets_cmd(sys, **kwargs) - sh(django_cmd(sys, settings, "collectstatic --noinput {logfile_str}".format( + sh(django_cmd(sys, settings, "collectstatic {ignore_args} --noinput {logfile_str}".format( + ignore_args=ignore_args, logfile_str=collectstatic_stdout_str ))) print("\t\tFinished collecting {} assets.".format(sys)) diff --git a/pavelib/paver_tests/test_assets.py b/pavelib/paver_tests/test_assets.py index 7877f1e5ca..c5f9bb3486 100644 --- a/pavelib/paver_tests/test_assets.py +++ b/pavelib/paver_tests/test_assets.py @@ -127,7 +127,6 @@ class TestCollectAssets(PaverTestCase): specified_log_dict = specified_log_loc log_loc = options.get("expected_log_location", "> /dev/null") systems = options.get("systems", ["lms"]) - expected_messages = self._set_expected_messages(log_location=log_loc, systems=systems) if specified_log_loc is None: collect_assets( systems, @@ -139,7 +138,7 @@ class TestCollectAssets(PaverTestCase): Env.DEVSTACK_SETTINGS, **specified_log_dict ) - self.assertEqual(self.task_messages, expected_messages) + self._assert_correct_messages(log_location=log_loc, systems=systems) def test_collect_assets_debug(self): """ @@ -149,27 +148,22 @@ class TestCollectAssets(PaverTestCase): expected_log_loc = "" systems = ["lms"] kwargs = {COLLECTSTATIC_LOG_DIR_ARG: None} - expected_messages = self._set_expected_messages(log_location=expected_log_loc, systems=systems) collect_assets(systems, Env.DEVSTACK_SETTINGS, **kwargs) - self.assertEqual(self.task_messages, expected_messages) + self._assert_correct_messages(log_location=expected_log_loc, systems=systems) - def _set_expected_messages(self, log_location, systems): - """ - Returns a list of messages that are expected to be sent from paver - to the commandline for collectstatic functions. This list is constructed - based on the log location and systems being passed in. + def _assert_correct_messages(self, log_location, systems): """ + Asserts that the expected commands were run. - expected_messages = [] - for sys in systems: - expected_messages.append( - 'python manage.py {system} --settings={settings} collectstatic --noinput {log_loc}'.format( - system=sys, - settings=Env.DEVSTACK_SETTINGS, - log_loc=log_location - ) - ) - return expected_messages + We just extract the pieces we care about here instead of specifying an + exact command, so that small arg changes don't break this test. + """ + for i, sys in enumerate(systems): + msg = self.task_messages[i] + self.assertTrue(msg.startswith('python manage.py {}'.format(sys))) + self.assertIn(' collectstatic '.format(Env.DEVSTACK_SETTINGS), msg) + self.assertIn('--settings={}'.format(Env.DEVSTACK_SETTINGS), msg) + self.assertTrue(msg.endswith(' {}'.format(log_location))) @ddt.ddt @@ -181,7 +175,7 @@ class TestUpdateAssetsTask(PaverTestCase): @ddt.data( [{"expected_substring": "> /dev/null"}], # go to /dev/null by default - [{"cmd_args": ["--debug"], "expected_substring": "collectstatic --noinput "}] # TODO: make this regex + [{"cmd_args": ["--debug"], "expected_substring": "collectstatic"}] # TODO: make this regex ) @ddt.unpack def test_update_assets_task_collectstatic_log_arg(self, options): diff --git a/pavelib/paver_tests/test_servers.py b/pavelib/paver_tests/test_servers.py index 93e894b38b..087f712f2b 100644 --- a/pavelib/paver_tests/test_servers.py +++ b/pavelib/paver_tests/test_servers.py @@ -30,7 +30,11 @@ EXPECTED_CMS_SASS_COMMAND = [ u"python manage.py cms --settings={asset_settings} compile_sass cms ", ] EXPECTED_COLLECT_STATIC_COMMAND = ( - u"python manage.py {system} --settings={asset_settings} collectstatic --noinput {log_string}" + u'python manage.py {system} --settings={asset_settings} collectstatic ' + u'--ignore "fixtures" --ignore "karma_*.js" --ignore "spec" ' + u'--ignore "spec_helpers" --ignore "spec-helpers" --ignore "xmodule_js" ' + u'--ignore "geoip" --ignore "sass" --ignore "*.coffee" ' + u'--noinput {log_string}' ) EXPECTED_CELERY_COMMAND = ( u"python manage.py lms --settings={settings} celery worker --beat --loglevel=INFO --pythonpath=."