From 59e0f6efcf2a297806918f8e0020255c1f59ea5f Mon Sep 17 00:00:00 2001 From: Robert Raposa Date: Mon, 1 Jun 2020 12:27:38 -0400 Subject: [PATCH] ARCHBOM-1244: Add code_owner custom metric (#24084) * includes ADR for Monitoring by Code Owner * add monitoring middleware to add the following custom metrics: - code_owner: The owning team mapped to the current view. - code_owner_mapping_error: If there are any errors when trying to perform the mapping. - view_func_module: The __module__ of the view_func, which can be used to find missing mappings. * add script to generate `settings.CODE_OWNER_MAPPINGS` from a csv file. ARCHBOM-1244 --- lms/djangoapps/monitoring/__init__.py | 3 + lms/djangoapps/monitoring/apps.py | 11 ++ .../0001-monitoring-by-code-owner.rst | 50 ++++++ lms/djangoapps/monitoring/middleware.py | 149 ++++++++++++++++++ .../scripts/generate_code_owner_mappings.py | 89 +++++++++++ .../monitoring/tests/test_middleware.py | 135 ++++++++++++++++ lms/envs/common.py | 4 + 7 files changed, 441 insertions(+) create mode 100644 lms/djangoapps/monitoring/__init__.py create mode 100644 lms/djangoapps/monitoring/apps.py create mode 100644 lms/djangoapps/monitoring/docs/decisions/0001-monitoring-by-code-owner.rst create mode 100644 lms/djangoapps/monitoring/middleware.py create mode 100644 lms/djangoapps/monitoring/scripts/generate_code_owner_mappings.py create mode 100644 lms/djangoapps/monitoring/tests/test_middleware.py diff --git a/lms/djangoapps/monitoring/__init__.py b/lms/djangoapps/monitoring/__init__.py new file mode 100644 index 0000000000..89f1b2b547 --- /dev/null +++ b/lms/djangoapps/monitoring/__init__.py @@ -0,0 +1,3 @@ +""" +LMS specific monitoring helpers. +""" diff --git a/lms/djangoapps/monitoring/apps.py b/lms/djangoapps/monitoring/apps.py new file mode 100644 index 0000000000..1ae13624c4 --- /dev/null +++ b/lms/djangoapps/monitoring/apps.py @@ -0,0 +1,11 @@ +""" +Configuration for the monitoring Django application. +""" +from django.apps import AppConfig + + +class MonitoringConfig(AppConfig): + """ + Configuration class for the monitoring Django application. + """ + name = 'monitoring' diff --git a/lms/djangoapps/monitoring/docs/decisions/0001-monitoring-by-code-owner.rst b/lms/djangoapps/monitoring/docs/decisions/0001-monitoring-by-code-owner.rst new file mode 100644 index 0000000000..fefb466403 --- /dev/null +++ b/lms/djangoapps/monitoring/docs/decisions/0001-monitoring-by-code-owner.rst @@ -0,0 +1,50 @@ +Monitoring by Code Owner +************************ + +Status +====== + +Accepted + +Context +======= + +It is currently difficult for different teams to have team-based on-calls rotations, alerting and monitoring for various parts of the edx-platform (specifically LMS). + +Decision +======== + +We will implement a custom metric "code_owner" that can be used in NewRelic (or other monitoring solutions that are made pluggable). + +The new custom metric makes it simple to query NewRelic for all Transactions or TransactionErrors that are associated with requests with a specific owner. This enables a team to quickly identify data that they own, for use in NewRelic alerts or NewRelic dashboards. + +To minimize maintenance, the value of the "code_owner" metric will be populated using the source-of-truth of ownership of various parts of edx-platform. + +See `Rejected Alternatives`_ for details of the decision **not** to split the NewRelic application into multiple NewRelic applications. + +Note: "owner" is a MySql reserved word, which NewRelic cautions against using, so we are using "code_owner". + +Consequences +============ + +This metric should be quickly available for use with custom alerts and custom dashboards. + +In the future, this metric could potentially be added to logging as well. + +Rejected Alternatives +===================== + +Splitting the NewRelic application +---------------------------------- + +The edx-platform (LMS) NewRelic application could have been split into multiple applications. This would have had the benefit of getting the out-of-the-box APM Dashboards for free. + +We decided against this alternative because: + +* To enable this solution, we would need to disable gunicorn instrumentation, and this instrumentation has proved to be valuable for understanding certain types of production issues. +* Splitting the app may make it more difficult to pinpoint the source of any problem that affects multiple applications. +* The application splitting depends on a slight "hack" from NewRelic, by resetting the application name for each path. + + * This "hack" goes against the grain of NewRelic's typical recommendations, so there could be unknown pitfalls. + * The mapping of request path to owner would require additional maintenance. + * Processing time is **not** accounted for in the NewRelic transaction time, because any processing required takes place before the NewRelic transaction gets started. diff --git a/lms/djangoapps/monitoring/middleware.py b/lms/djangoapps/monitoring/middleware.py new file mode 100644 index 0000000000..342db5b7f4 --- /dev/null +++ b/lms/djangoapps/monitoring/middleware.py @@ -0,0 +1,149 @@ +""" +Middleware for monitoring the LMS +""" +import logging +import re +from django.conf import settings +from edx_django_utils.monitoring import set_custom_metric + +log = logging.getLogger(__name__) + + +class CodeOwnerMetricMiddleware: + """ + Django middleware object to set custom metrics for the owner of each view. + + Custom metrics set: + - code_owner: The owning team mapped to the current view. + - code_owner_mapping_error: If there are any errors when trying to perform the mapping. + - view_func_module: The __module__ of the view_func, which can be used to + find missing mappings. + + """ + def __init__(self, get_response): + self.get_response = get_response + + def __call__(self, request): + response = self.get_response(request) + return response + + def process_view(self, request, view_func, view_args, view_kwargs): + """ + Set custom metric for the code_owner of the view if configured to do so. + """ + if not _PATH_TO_CODE_OWNER_MAPPINGS: + return + + try: + view_func_module = _get_view_func_module(view_func) + set_custom_metric('view_func_module', view_func_module) + code_owner = self._find_code_owner(view_func_module) + if code_owner: + set_custom_metric('code_owner', code_owner) + except Exception as e: + set_custom_metric('code_owner_mapping_error', e) + + def _find_code_owner(self, view_func_module): + """ + Attempts lookup of code_owner based on the view_func's __module__, + finding the most specific match. If no match, returns None. + + For example, if the module were 'openedx.features.discounts.views', + this lookup would match on 'openedx.features.discounts' before + 'openedx.features', because the former is more specific. + + """ + if _PATH_TO_CODE_OWNER_MAPPINGS is _INVALID_CODE_OWNER_MAPPING: + raise Exception('CODE_OWNER_MAPPINGS django setting set with invalid configuration. See logs for details.') + + view_func_module_parts = view_func_module.split('.') + # To make the most specific match, start with the max number of parts + for number_of_parts in range(len(view_func_module_parts), 0, -1): + partial_path = '.'.join(view_func_module_parts[0:number_of_parts]) + if partial_path in _PATH_TO_CODE_OWNER_MAPPINGS: + code_owner = _PATH_TO_CODE_OWNER_MAPPINGS[partial_path] + return code_owner + return None + + +def _process_code_owner_mappings(): + """ + Processes the CODE_OWNER_MAPPINGS Django Setting and returns a dict optimized + for efficient lookup by path. + + Returns: + (dict): optimized dict for success processing, None if there are no + configured mappings, or _INVALID_CODE_OWNER_MAPPING if there is an + error processing the setting. + + Example CODE_OWNER_MAPPINGS Django Setting:: + + CODE_OWNER_MAPPINGS = { + 'team-red': [ + 'xblock_django', + 'openedx.core.djangoapps.xblock', + ], + 'team-blue': [ + 'badges', + ], + } + + Example return value:: + + { + 'xblock_django': 'team-red', + 'openedx.core.djangoapps.xblock': 'team-red', + 'badges': 'team-blue', + } + + """ + _CODE_OWNER_MAPPINGS = getattr(settings, 'CODE_OWNER_MAPPINGS', None) + if not _CODE_OWNER_MAPPINGS: + return None + + try: + path_to_code_owner_mappings = {} + for code_owner in _CODE_OWNER_MAPPINGS: + path_list = _CODE_OWNER_MAPPINGS[code_owner] + for path in path_list: + path_to_code_owner_mappings[path] = code_owner + optional_module_prefix_match = _OPTIONAL_MODULE_PREFIX_PATTERN.match(path) + # if path has an optional prefix, also add the module name without the prefix + if optional_module_prefix_match: + path_without_prefix = path[optional_module_prefix_match.end():] + path_to_code_owner_mappings[path_without_prefix] = code_owner + + return path_to_code_owner_mappings + except Exception as e: + log.exception('Error processing code_owner_mappings. {}'.format(e)) + # errors should be unlikely due do scripting the setting values. + # this will trigger an error custom metric that can be alerted on. + return _INVALID_CODE_OWNER_MAPPING + +# .. toggle_name: CODE_OWNER_MAPPINGS +# .. toggle_implementation: DjangoSetting +# .. toggle_default: None +# .. toggle_description: Used to set monitoring custom metrics for owner. Dict with keys of code owner and value as list of dotted path module names owned by code owner. +# .. toggle_category: monitoring +# .. toggle_use_cases: open_edx +# .. toggle_creation_date: 2020-05-29 +# .. toggle_expiration_date: None +# .. toggle_tickets: None +# .. toggle_status: supported +# .. toggle_warnings: None +# Set to settings.CODE_OWNER_MAPPINGS during processing for simpler testing +_CODE_OWNER_MAPPINGS = None + +# The following module prefixes are optional in a func_view's reported module: +# 'common.djangoapps.', 'lms.djangoapps.', 'openedx.core.djangoapps.' +_OPTIONAL_MODULE_PREFIX_PATTERN = re.compile(r'^(lms|common|openedx\.core)\.djangoapps\.') +_INVALID_CODE_OWNER_MAPPING = 'invalid-code-owner-mapping' +# lookup table for code owner given a module path +_PATH_TO_CODE_OWNER_MAPPINGS = _process_code_owner_mappings() + + +def _get_view_func_module(view_func): + """ + Returns the view_func's __module__. Enables simpler testing via mocking. + """ + return view_func.__module__ diff --git a/lms/djangoapps/monitoring/scripts/generate_code_owner_mappings.py b/lms/djangoapps/monitoring/scripts/generate_code_owner_mappings.py new file mode 100644 index 0000000000..99046f5e18 --- /dev/null +++ b/lms/djangoapps/monitoring/scripts/generate_code_owner_mappings.py @@ -0,0 +1,89 @@ +""" +This script generates code owner mappings for monitoring LMS. + +Sample usage:: + + python lms/djangoapps/monitoring/scripts/generate_code_owner_mappings.py --app-csv "edx-platform Apps Ownership.csv" + +Sample CSV input:: + + Path,owner.squad + ./common/djangoapps/xblock_django,team-red + ./openedx/core/djangoapps/xblock,team-red + ./lms/djangoapps/badges,team-blue + +Sample output:: + + # Copy results into appropriate config yml file. + CODE_OWNER_MAPPINGS: + team-blue: + - badges + team-red: + - xblock_django + - openedx.core.djangoapps.xblock + +""" +import os +import re +import csv +import click + + +@click.command() +@click.option( + '--app-csv', + help="File name of .csv file with edx-platform app ownership details.", + required=True +) +def main(app_csv): + """ + Reads CSV of ownership data and outputs config.yml setting to system.out. + + Expected CSV format: + + Path,owner.squad\n + ./lms/templates/oauth2_provider,team-red\n + ./openedx/core/djangoapps/user_authn,team-blue\n + ... + + Final output only includes paths which might contain views. + + """ + csv_data = None + with open(app_csv, 'r') as file: + csv_data = file.read() + reader = csv.DictReader(csv_data.splitlines()) + + team_to_paths_map = {} + for row in reader: + path = row.get('Path') + team = row.get('owner.squad') + + # add paths that may have views + may_have_views = re.match(r'.*djangoapps', path) or re.match(r'[./]*openedx\/features', path) + # remove cms (studio) paths and tests + may_have_views = may_have_views and not re.match(r'.*(\/tests\b|cms\/).*', path) + + if may_have_views: + path = path.replace('./', '') # remove ./ from beginning of path + path = path.replace('/', '.') # convert path to dotted module name + + # skip catch-alls to ensure everything is properly mapped + if path in ('common,djangoapps', 'lms.djangoapps', 'openedx.core.djangoapps', 'openedx.features'): + continue + + if team not in team_to_paths_map: + team_to_paths_map[team] = [] + team_to_paths_map[team].append(path) + + print('# Do not hand edit CODE_OWNER_MAPPINGS. Generated by {}'.format(os.path.basename(__file__))) + print('CODE_OWNER_MAPPINGS:') + for team, path_list in sorted(team_to_paths_map.items()): + print(" {}:".format(team)) + path_list.sort() + for path in path_list: + print(" - {}".format(path)) + + +if __name__ == "__main__": + main() # pylint: disable=no-value-for-parameter diff --git a/lms/djangoapps/monitoring/tests/test_middleware.py b/lms/djangoapps/monitoring/tests/test_middleware.py new file mode 100644 index 0000000000..c795c65453 --- /dev/null +++ b/lms/djangoapps/monitoring/tests/test_middleware.py @@ -0,0 +1,135 @@ +""" +Tests for the LMS monitoring middleware +""" +import ddt +import timeit +from django.test import override_settings +from mock import call, patch, Mock +from unittest import TestCase +from unittest.mock import ANY + +from lms.djangoapps.monitoring.middleware import _process_code_owner_mappings, CodeOwnerMetricMiddleware + + +def _mock_get_view_func_module(view_func): + """ + Enables mocking/overriding a private function that normally gets the view_func.__module__ + because it was too difficult to mock the __module__ method. + """ + return view_func.mock_module + + +@ddt.ddt +class CodeOwnerMetricMiddlewareTests(TestCase): + """ + Tests for the LMS monitoring utility functions + """ + def setUp(self): + super().setUp() + self.mock_get_response = Mock() + self.middleware = CodeOwnerMetricMiddleware(self.mock_get_response) + + def test_init(self): + self.assertEqual(self.middleware.get_response, self.mock_get_response) + + def test_request_call(self): + self.mock_get_response.return_value = 'test-response' + request = Mock() + self.assertEqual(self.middleware(request), 'test-response') + + @override_settings(CODE_OWNER_MAPPINGS={ + 'team-red': [ + 'openedx.core.djangoapps.xblock', + 'lms.djangoapps.grades', + ], + 'team-blue': [ + 'common.djangoapps.xblock_django', + ], + }) + @patch('lms.djangoapps.monitoring.middleware.set_custom_metric') + @patch('lms.djangoapps.monitoring.middleware._get_view_func_module', side_effect=_mock_get_view_func_module) + @ddt.data( + ('xbl', None), + ('xblock_2', None), + ('xblock', 'team-red'), + ('openedx.core.djangoapps', None), + ('openedx.core.djangoapps.xblock', 'team-red'), + ('openedx.core.djangoapps.xblock.views', 'team-red'), + ('grades', 'team-red'), + ('lms.djangoapps.grades', 'team-red'), + ('xblock_django', 'team-blue'), + ('common.djangoapps.xblock_django', 'team-blue'), + ) + @ddt.unpack + def test_code_owner_mapping_hits_and_misses( + self, view_func_module, expected_owner, mock_get_view_func_module, mock_set_custom_metric + ): + with patch('lms.djangoapps.monitoring.middleware._PATH_TO_CODE_OWNER_MAPPINGS', _process_code_owner_mappings()): + mock_view_func = self._create_view_func_mock(view_func_module) + self.middleware.process_view(None, mock_view_func, None, None) + self._assert_code_owner_custom_metrics( + view_func_module, mock_set_custom_metric, expected_code_owner=expected_owner + ) + + @patch('lms.djangoapps.monitoring.middleware.set_custom_metric') + def test_code_owner_no_mappings(self, mock_set_custom_metric): + mock_view_func = self._create_view_func_mock('xblock') + self.middleware.process_view(None, mock_view_func, None, None) + mock_set_custom_metric.assert_not_called() + + @override_settings(CODE_OWNER_MAPPINGS=['invalid_setting_as_list']) + @patch('lms.djangoapps.monitoring.middleware.set_custom_metric') + @patch('lms.djangoapps.monitoring.middleware._get_view_func_module', side_effect=_mock_get_view_func_module) + def test_load_config_with_invalid_dict(self, mock_get_view_func_module, mock_set_custom_metric): + with patch('lms.djangoapps.monitoring.middleware._PATH_TO_CODE_OWNER_MAPPINGS', _process_code_owner_mappings()): + mock_view_func = self._create_view_func_mock('xblock') + self.middleware.process_view(None, mock_view_func, None, None) + self._assert_code_owner_custom_metrics( + 'xblock', mock_set_custom_metric, has_error=True + ) + + @patch('lms.djangoapps.monitoring.middleware.set_custom_metric') + @patch('lms.djangoapps.monitoring.middleware._get_view_func_module', side_effect=_mock_get_view_func_module) + def test_mapping_performance(self, mock_get_view_func_module, mock_set_custom_metric): + code_owner_mappings = { + 'team-red': [] + } + # create a long list of mappings that are nearly identical + for n in range(1, 200): + path = 'openedx.core.djangoapps.{}'.format(n) + code_owner_mappings['team-red'].append(path) + with override_settings(CODE_OWNER_MAPPINGS=code_owner_mappings): + with patch( + 'lms.djangoapps.monitoring.middleware._PATH_TO_CODE_OWNER_MAPPINGS', _process_code_owner_mappings() + ): + # test a module that matches nearly to the end, but doesn't actually match + mock_view_func = self._create_view_func_mock('openedx.core.djangoapps.XXX.views') + call_iterations = 100 + time = timeit.timeit( + lambda: self.middleware.process_view(None, mock_view_func, None, None), number=call_iterations + ) + average_time = time / call_iterations + self.assertTrue(average_time < 0.0005, 'Mapping takes {}s which is too slow.'.format(average_time)) + expected_calls = [] + for n in range(1, call_iterations): + expected_calls.append(call('view_func_module', 'openedx.core.djangoapps.XXX.views')) + mock_set_custom_metric.assert_has_calls(expected_calls) + + def _assert_code_owner_custom_metrics( + self, view_func_module, mock_set_custom_metric, expected_code_owner=None, has_error=False, + ): + call_list = [] + call_list.append(call('view_func_module', view_func_module)) + if expected_code_owner: + call_list.append(call('code_owner', expected_code_owner)) + if has_error: + call_list.append(call('code_owner_mapping_error', ANY)) + mock_set_custom_metric.assert_has_calls(call_list) + + def _create_view_func_mock(self, module): + """ + Mock view function where __module__ returns 'test_middleware' + """ + view_func = Mock() + view_func.mock_module = module + return view_func diff --git a/lms/envs/common.py b/lms/envs/common.py index c4d73d0d7b..8e24adbce0 100644 --- a/lms/envs/common.py +++ b/lms/envs/common.py @@ -1576,6 +1576,7 @@ MIDDLEWARE = [ # Outputs monitoring metrics for a request. 'edx_rest_framework_extensions.middleware.RequestMetricsMiddleware', + 'lms.djangoapps.monitoring.middleware.CodeOwnerMetricMiddleware', 'edx_rest_framework_extensions.auth.jwt.middleware.EnsureJWTAuthSettingsMiddleware', @@ -2562,6 +2563,9 @@ INSTALLED_APPS = [ # Management of per-user schedules 'openedx.core.djangoapps.schedules', 'rest_framework_jwt', + + # Monitoring utilities for LMS + 'lms.djangoapps.monitoring.apps.MonitoringConfig' ] ######################### CSRF #########################################