From 1f3f0e2fb97c5b7be814a3abf8b2d1793f5e030c Mon Sep 17 00:00:00 2001 From: Jeremy Bowman Date: Wed, 18 Apr 2018 17:55:54 -0400 Subject: [PATCH] TE-2547 Add dependency analysis scripts --- cms/pytest.ini | 2 +- requirements/edx/development.in | 3 ++ requirements/edx/development.txt | 1 + requirements/edx/testing.in | 1 + scripts/dependencies/development.py | 48 +++++++++++++++++ scripts/dependencies/enumerate.sh | 37 +++++++++++++ scripts/dependencies/from_package.py | 38 ++++++++++++++ scripts/dependencies/on_package.py | 35 +++++++++++++ scripts/dependencies/testing.py | 78 ++++++++++++++++++++++++++++ 9 files changed, 242 insertions(+), 1 deletion(-) create mode 100755 scripts/dependencies/development.py create mode 100755 scripts/dependencies/enumerate.sh create mode 100755 scripts/dependencies/from_package.py create mode 100755 scripts/dependencies/on_package.py create mode 100755 scripts/dependencies/testing.py diff --git a/cms/pytest.ini b/cms/pytest.ini index 7749d15b2c..fef2586afa 100644 --- a/cms/pytest.ini +++ b/cms/pytest.ini @@ -10,4 +10,4 @@ filterwarnings = ignore::xblock.exceptions.FieldDataDeprecationWarning norecursedirs = envs python_classes = -python_files = tests.py test_*.py *_tests.py +python_files = test.py tests.py test_*.py *_tests.py diff --git a/requirements/edx/development.in b/requirements/edx/development.in index fe07765993..806bc23ada 100644 --- a/requirements/edx/development.in +++ b/requirements/edx/development.in @@ -5,6 +5,8 @@ # # pip install -r requirements/edx/development.txt # +# When adding a new dependency which is imported from edx-platform code as a library, +# update scripts/dependencies/development.txt accordingly. -r pip-tools.txt # pip-tools and its dependencies, for managing requirements files -r testing.txt # Dependencies for running the various test suites @@ -13,6 +15,7 @@ click # Used for perf_tests utilities in modulesto django-debug-toolbar==1.8 # A set of panels that display debug information about the current request/response edx-sphinx-theme # Documentation theme pyinotify # More efficient checking for runserver reload trigger events +snakefood # Lists dependencies between Python modules, used in scripts/dependencies/* sphinx # Developer documentation builder # Performance timer used in modulestore/perf_tests/test_asset_import_export.py diff --git a/requirements/edx/development.txt b/requirements/edx/development.txt index 0ef21b047c..1111e4f0ba 100644 --- a/requirements/edx/development.txt +++ b/requirements/edx/development.txt @@ -291,6 +291,7 @@ simplejson==3.13.2 singledispatch==3.4.0.3 six==1.11.0 slumber==0.7.1 +snakefood==1.4 snowballstemmer==1.2.1 # via sphinx social-auth-app-django==1.2.0 social-auth-core==1.4.0 diff --git a/requirements/edx/testing.in b/requirements/edx/testing.in index 599558c8e4..c556c93687 100644 --- a/requirements/edx/testing.in +++ b/requirements/edx/testing.in @@ -10,6 +10,7 @@ # * verify that the dependency has a license compatible with AGPLv3 # * confirm that it has no system requirements beyond what we already install # * run "make upgrade" to update the detailed requirements files +# * add an appropriate pattern to scripts/dependencies/testing.py -r base.txt # Core edx-platform production dependencies -r coverage.txt # Utilities for calculating test coverage diff --git a/scripts/dependencies/development.py b/scripts/dependencies/development.py new file mode 100755 index 0000000000..546dd21b3e --- /dev/null +++ b/scripts/dependencies/development.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python +""" +List any dependencies on development utilities in edx-platform from +non-development modules. Generally, there shouldn't be any; such a dependency +could result in ImportErrors in production or tests where development packages +aren't installed. + +This script counts on scripts/dependencies/enumerate.sh having already +been run in order to generate a dependency data file to work from. +""" +from __future__ import absolute_import, print_function + +import os +import re +import sys + +# Enumerate all the Python modules that should only be imported from development utilities +pattern_fragments = [ + # Development utility modules within edx-platform + r'^xmodule/modulestore/perf_tests' # modulestore performance tests + + # Development-only package dependencies + r'^code_block_timer', # code_block_timer + r'^debug_toolbar', # django-debug-toolbar +] + +dev_pattern = re.compile('|'.join(pattern_fragments)) + +data_path = 'reports/dependencies/dependencies.txt' +if not os.path.exists(data_path): + print('The dependencies data file is unavailable; run scripts/dependencies/enumerate.sh first.') + sys.exit(1) +exit_status = 0 +with open(data_path, 'r') as f: + for dep in map(eval, f): + (from_root, from_name), (to_root, to_name) = dep + if to_name is None: + continue + if dev_pattern.search(to_name) and not dev_pattern.search(from_name): + # We usually don't care about dependencies between modules in site-packages + if from_root.endswith(u'site-packages') and to_root.endswith(u'site-packages'): + continue + # The django-debug-toolbar URL imports are safely behind conditions on INSTALLED_APPS + if from_name in {u'cms/urls.py', u'lms/urls.py'} and to_name == u'debug_toolbar': + continue + print(dep) + exit_status = 1 +sys.exit(exit_status) diff --git a/scripts/dependencies/enumerate.sh b/scripts/dependencies/enumerate.sh new file mode 100755 index 0000000000..1ea2ae970e --- /dev/null +++ b/scripts/dependencies/enumerate.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash +set -e + +############################################################################ +# +# enumerate.sh +# +# Enumerates all dependencies (imports) from Python modules in +# edx-platform. The resulting data file generated at +# reports/dependencies/dependencies.txt can then be used by other scripts +# to detect inappropriate imports, such as: +# +# * Imports of test modules or testing packages from core application code +# * Imports of development-only packages from core or test code +# * Imports from a package we want to stop using as a dependency +# * Imports of other edx-platform modules from a module we want to move to +# a separate package in its own repository +# +# This script can take a while to run (a few minutes), so it should be run +# independently of the other scripts which use this data. +# +# While running, a number of warnings such as "Could not import module +# 'assert_equal'" may be generated. This is normal; the snakefood utility +# can't really distinguish between the import of a module and the import of +# an object within a module, so it prints a warning on all instances of the +# latter just in case it actually was an attempt to import a module which +# it couldn't find in the current PYTHONPATH. If you do see some modules +# listed which you think should be findable, you may need to run +# "make requirements" or update the ROOTS variable in this script. +# +############################################################################ + +OUTPUT_DIR="reports/dependencies" +mkdir -p ${OUTPUT_DIR} +DEPENDENCIES=${OUTPUT_DIR}/dependencies.txt +ROOTS=cms/djangoapps:common/djangoapps:lms/djangoapps:scripts/xsslint +PYTHONPATH=${ROOTS} sfood cms common lms openedx pavelib scripts manage.py pavement.py > ${DEPENDENCIES} diff --git a/scripts/dependencies/from_package.py b/scripts/dependencies/from_package.py new file mode 100755 index 0000000000..614fa49bd9 --- /dev/null +++ b/scripts/dependencies/from_package.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python +""" +List any modules that are imported from the given package. This can be used +to determine what needs to be refactored to allow a package to be broken out +into a separately installed package. The package argument to the script +should be formatted as shown in these examples: + +* scripts/dependencies/from_package.py xmodule +* scripts/dependencies/from_package.py openedx/features/course_experience +* scripts/dependencies/from_package.py cms/djangoapps/verify_student + +This script counts on scripts/dependencies/enumerate.sh having already +been run in order to generate a dependency data file to work from. +""" +from __future__ import absolute_import, print_function + +import os +import re +import sys + +pattern = re.compile(u'^{}'.format(sys.argv[1])) + +data_path = 'reports/dependencies/dependencies.txt' +if not os.path.exists(data_path): + print('The dependencies data file is unavailable; run scripts/dependencies/enumerate.sh first.') +with open(data_path, 'r') as f: + for dep in map(eval, f): + (from_root, from_name), (to_root, to_name) = dep + if to_name is None: + continue + if pattern.search(from_name) and not pattern.search(to_name): + # We usually don't care about dependencies between modules in site-packages + if from_root.endswith(u'site-packages') and to_root.endswith(u'site-packages'): + continue + # We don't really care about dependencies on the standard library + if to_root.startswith('/usr/lib/python') or to_root.endswith('lib/python2.7'): + continue + print(dep) diff --git a/scripts/dependencies/on_package.py b/scripts/dependencies/on_package.py new file mode 100755 index 0000000000..77ad4e9c1f --- /dev/null +++ b/scripts/dependencies/on_package.py @@ -0,0 +1,35 @@ +#!/usr/bin/env python +""" +List any modules that import code from the given package. This can be used +to determine if the package can be safely removed, or just to understand +what context it's used in. The package argument to the script should be +formatted as shown in these examples: + +* scripts/dependencies/on_package.py nose +* scripts/dependencies/on_package.py third_parth_auth +* scripts/dependencies/on_package.py cms/djangoapps/verify_student + +This script counts on scripts/dependencies/enumerate.sh having already +been run in order to generate a dependency data file to work from. +""" +from __future__ import absolute_import, print_function + +import os +import re +import sys + +pattern = re.compile(u'^{}'.format(sys.argv[1])) + +data_path = 'reports/dependencies/dependencies.txt' +if not os.path.exists(data_path): + print('The dependencies data file is unavailable; run scripts/dependencies/enumerate.sh first.') +with open(data_path, 'r') as f: + for dep in map(eval, f): + (from_root, from_name), (to_root, to_name) = dep + if to_name is None: + continue + if pattern.search(to_name) and not pattern.search(from_name): + # We usually don't care about dependencies between modules in site-packages + if from_root.endswith(u'site-packages') and to_root.endswith(u'site-packages'): + continue + print(dep) diff --git a/scripts/dependencies/testing.py b/scripts/dependencies/testing.py new file mode 100755 index 0000000000..8d98bc2822 --- /dev/null +++ b/scripts/dependencies/testing.py @@ -0,0 +1,78 @@ +#!/usr/bin/env python +""" +List any dependencies on test modules in edx-platform from non-test modules. +Generally, there shouldn't be any; such a dependency could result in +ImportErrors in production where testing packages aren't installed. + +This script counts on scripts/dependencies/enumerate.sh having already +been run in order to generate a dependency data file to work from. +""" +from __future__ import absolute_import, print_function + +import os +import re +import sys + +# Enumerate all the Python modules that should only be imported during test runs +pattern_fragments = [ + # Test modules within edx-platform + r'/tests?\.py', # test.py, tests.py + r'/tests?_[^/]*\.py', # test_*.py, tests_*.py + r'/[^/]*_tests\.py', # *_tests.py + r'/tests?/', # */test/*, */tests/* + r'[cl]ms/.*/features/', # cms/*/features/*, lms/*/features/* + r'/testing\.py', # testing.py + r'/testutils\.py', # testutils.py + r'/tests$', # tests/__init__.py + r'conftest\.py', # conftest.py + r'/envs/acceptance\.py', # cms/envs/acceptance.py, lms/envs/acceptance.py + r'/envs/acceptance_docker\.py', # cms/envs/acceptance.py, lms/envs/acceptance.py + r'/factories\.py', # factories.py + r'^terrain', # terrain/* + r'/setup_models_to_send_test_emails\.py', # setup_models_to_send_test_emails management command + + # Testing-only package dependencies + r'^bs4', # beautifulsoup4 + r'^before_after$', # before_after + r'^bok_choy', # bok-choy + r'^cssselect', # cssselect + r'^factory', # factory_boy + r'^freezegun', # freezegun + r'^httpretty', # httpretty + r'^moto', # moto + r'^nose', # nose + r'^pyquery', # pyquery + r'^pytest.py$', # pytest + r'^selenium', # selenium + r'^singledispatch', # singledispatch + r'^testfixtures', # testfixtures +] + +test_pattern = re.compile('|'.join(pattern_fragments)) + +data_path = 'reports/dependencies/dependencies.txt' +if not os.path.exists(data_path): + print('The dependencies data file is unavailable; run scripts/dependencies/enumerate.sh first.') + sys.exit(1) +exit_status = 0 +with open(data_path, 'r') as f: + for dep in map(eval, f): + (from_root, from_name), (to_root, to_name) = dep + if to_name is None: + continue + if test_pattern.search(to_name) and not test_pattern.search(from_name): + # snakefood sometimes picks a weird place to split the root path and filename + if from_root.endswith('/tests'): + continue + # We usually don't care about dependencies between modules in site-packages + if from_root.endswith(u'site-packages') and to_root.endswith(u'site-packages'): + continue + # Dependencies on django.test and waffle.testutils are ok + if to_name.startswith(u'django/test') or to_name == u'waffle/testutils.py': + continue + # Dependencies within pavelib are ok + if from_name.startswith(u'pavelib') and to_name.startswith(u'pavelib'): + continue + print(dep) + exit_status = 1 +sys.exit(exit_status)