ARCHBOM-1244: Add code_owner custom metric (#24084)
* includes ADR for Monitoring by Code Owner
* add monitoring middleware to add the following custom metrics:
- code_owner: The owning team mapped to the current view.
- code_owner_mapping_error: If there are any errors when trying to
perform the mapping.
- view_func_module: The __module__ of the view_func, which can
be used to find missing mappings.
* add script to generate `settings.CODE_OWNER_MAPPINGS` from
a csv file.
ARCHBOM-1244
This commit is contained in:
3
lms/djangoapps/monitoring/__init__.py
Normal file
3
lms/djangoapps/monitoring/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
||||
"""
|
||||
LMS specific monitoring helpers.
|
||||
"""
|
||||
11
lms/djangoapps/monitoring/apps.py
Normal file
11
lms/djangoapps/monitoring/apps.py
Normal file
@@ -0,0 +1,11 @@
|
||||
"""
|
||||
Configuration for the monitoring Django application.
|
||||
"""
|
||||
from django.apps import AppConfig
|
||||
|
||||
|
||||
class MonitoringConfig(AppConfig):
|
||||
"""
|
||||
Configuration class for the monitoring Django application.
|
||||
"""
|
||||
name = 'monitoring'
|
||||
@@ -0,0 +1,50 @@
|
||||
Monitoring by Code Owner
|
||||
************************
|
||||
|
||||
Status
|
||||
======
|
||||
|
||||
Accepted
|
||||
|
||||
Context
|
||||
=======
|
||||
|
||||
It is currently difficult for different teams to have team-based on-calls rotations, alerting and monitoring for various parts of the edx-platform (specifically LMS).
|
||||
|
||||
Decision
|
||||
========
|
||||
|
||||
We will implement a custom metric "code_owner" that can be used in NewRelic (or other monitoring solutions that are made pluggable).
|
||||
|
||||
The new custom metric makes it simple to query NewRelic for all Transactions or TransactionErrors that are associated with requests with a specific owner. This enables a team to quickly identify data that they own, for use in NewRelic alerts or NewRelic dashboards.
|
||||
|
||||
To minimize maintenance, the value of the "code_owner" metric will be populated using the source-of-truth of ownership of various parts of edx-platform.
|
||||
|
||||
See `Rejected Alternatives`_ for details of the decision **not** to split the NewRelic application into multiple NewRelic applications.
|
||||
|
||||
Note: "owner" is a MySql reserved word, which NewRelic cautions against using, so we are using "code_owner".
|
||||
|
||||
Consequences
|
||||
============
|
||||
|
||||
This metric should be quickly available for use with custom alerts and custom dashboards.
|
||||
|
||||
In the future, this metric could potentially be added to logging as well.
|
||||
|
||||
Rejected Alternatives
|
||||
=====================
|
||||
|
||||
Splitting the NewRelic application
|
||||
----------------------------------
|
||||
|
||||
The edx-platform (LMS) NewRelic application could have been split into multiple applications. This would have had the benefit of getting the out-of-the-box APM Dashboards for free.
|
||||
|
||||
We decided against this alternative because:
|
||||
|
||||
* To enable this solution, we would need to disable gunicorn instrumentation, and this instrumentation has proved to be valuable for understanding certain types of production issues.
|
||||
* Splitting the app may make it more difficult to pinpoint the source of any problem that affects multiple applications.
|
||||
* The application splitting depends on a slight "hack" from NewRelic, by resetting the application name for each path.
|
||||
|
||||
* This "hack" goes against the grain of NewRelic's typical recommendations, so there could be unknown pitfalls.
|
||||
* The mapping of request path to owner would require additional maintenance.
|
||||
* Processing time is **not** accounted for in the NewRelic transaction time, because any processing required takes place before the NewRelic transaction gets started.
|
||||
149
lms/djangoapps/monitoring/middleware.py
Normal file
149
lms/djangoapps/monitoring/middleware.py
Normal file
@@ -0,0 +1,149 @@
|
||||
"""
|
||||
Middleware for monitoring the LMS
|
||||
"""
|
||||
import logging
|
||||
import re
|
||||
from django.conf import settings
|
||||
from edx_django_utils.monitoring import set_custom_metric
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class CodeOwnerMetricMiddleware:
|
||||
"""
|
||||
Django middleware object to set custom metrics for the owner of each view.
|
||||
|
||||
Custom metrics set:
|
||||
- code_owner: The owning team mapped to the current view.
|
||||
- code_owner_mapping_error: If there are any errors when trying to perform the mapping.
|
||||
- view_func_module: The __module__ of the view_func, which can be used to
|
||||
find missing mappings.
|
||||
|
||||
"""
|
||||
def __init__(self, get_response):
|
||||
self.get_response = get_response
|
||||
|
||||
def __call__(self, request):
|
||||
response = self.get_response(request)
|
||||
return response
|
||||
|
||||
def process_view(self, request, view_func, view_args, view_kwargs):
|
||||
"""
|
||||
Set custom metric for the code_owner of the view if configured to do so.
|
||||
"""
|
||||
if not _PATH_TO_CODE_OWNER_MAPPINGS:
|
||||
return
|
||||
|
||||
try:
|
||||
view_func_module = _get_view_func_module(view_func)
|
||||
set_custom_metric('view_func_module', view_func_module)
|
||||
code_owner = self._find_code_owner(view_func_module)
|
||||
if code_owner:
|
||||
set_custom_metric('code_owner', code_owner)
|
||||
except Exception as e:
|
||||
set_custom_metric('code_owner_mapping_error', e)
|
||||
|
||||
def _find_code_owner(self, view_func_module):
|
||||
"""
|
||||
Attempts lookup of code_owner based on the view_func's __module__,
|
||||
finding the most specific match. If no match, returns None.
|
||||
|
||||
For example, if the module were 'openedx.features.discounts.views',
|
||||
this lookup would match on 'openedx.features.discounts' before
|
||||
'openedx.features', because the former is more specific.
|
||||
|
||||
"""
|
||||
if _PATH_TO_CODE_OWNER_MAPPINGS is _INVALID_CODE_OWNER_MAPPING:
|
||||
raise Exception('CODE_OWNER_MAPPINGS django setting set with invalid configuration. See logs for details.')
|
||||
|
||||
view_func_module_parts = view_func_module.split('.')
|
||||
# To make the most specific match, start with the max number of parts
|
||||
for number_of_parts in range(len(view_func_module_parts), 0, -1):
|
||||
partial_path = '.'.join(view_func_module_parts[0:number_of_parts])
|
||||
if partial_path in _PATH_TO_CODE_OWNER_MAPPINGS:
|
||||
code_owner = _PATH_TO_CODE_OWNER_MAPPINGS[partial_path]
|
||||
return code_owner
|
||||
return None
|
||||
|
||||
|
||||
def _process_code_owner_mappings():
|
||||
"""
|
||||
Processes the CODE_OWNER_MAPPINGS Django Setting and returns a dict optimized
|
||||
for efficient lookup by path.
|
||||
|
||||
Returns:
|
||||
(dict): optimized dict for success processing, None if there are no
|
||||
configured mappings, or _INVALID_CODE_OWNER_MAPPING if there is an
|
||||
error processing the setting.
|
||||
|
||||
Example CODE_OWNER_MAPPINGS Django Setting::
|
||||
|
||||
CODE_OWNER_MAPPINGS = {
|
||||
'team-red': [
|
||||
'xblock_django',
|
||||
'openedx.core.djangoapps.xblock',
|
||||
],
|
||||
'team-blue': [
|
||||
'badges',
|
||||
],
|
||||
}
|
||||
|
||||
Example return value::
|
||||
|
||||
{
|
||||
'xblock_django': 'team-red',
|
||||
'openedx.core.djangoapps.xblock': 'team-red',
|
||||
'badges': 'team-blue',
|
||||
}
|
||||
|
||||
"""
|
||||
_CODE_OWNER_MAPPINGS = getattr(settings, 'CODE_OWNER_MAPPINGS', None)
|
||||
if not _CODE_OWNER_MAPPINGS:
|
||||
return None
|
||||
|
||||
try:
|
||||
path_to_code_owner_mappings = {}
|
||||
for code_owner in _CODE_OWNER_MAPPINGS:
|
||||
path_list = _CODE_OWNER_MAPPINGS[code_owner]
|
||||
for path in path_list:
|
||||
path_to_code_owner_mappings[path] = code_owner
|
||||
optional_module_prefix_match = _OPTIONAL_MODULE_PREFIX_PATTERN.match(path)
|
||||
# if path has an optional prefix, also add the module name without the prefix
|
||||
if optional_module_prefix_match:
|
||||
path_without_prefix = path[optional_module_prefix_match.end():]
|
||||
path_to_code_owner_mappings[path_without_prefix] = code_owner
|
||||
|
||||
return path_to_code_owner_mappings
|
||||
except Exception as e:
|
||||
log.exception('Error processing code_owner_mappings. {}'.format(e))
|
||||
# errors should be unlikely due do scripting the setting values.
|
||||
# this will trigger an error custom metric that can be alerted on.
|
||||
return _INVALID_CODE_OWNER_MAPPING
|
||||
|
||||
# .. toggle_name: CODE_OWNER_MAPPINGS
|
||||
# .. toggle_implementation: DjangoSetting
|
||||
# .. toggle_default: None
|
||||
# .. toggle_description: Used to set monitoring custom metrics for owner. Dict with keys of code owner and value as list of dotted path module names owned by code owner.
|
||||
# .. toggle_category: monitoring
|
||||
# .. toggle_use_cases: open_edx
|
||||
# .. toggle_creation_date: 2020-05-29
|
||||
# .. toggle_expiration_date: None
|
||||
# .. toggle_tickets: None
|
||||
# .. toggle_status: supported
|
||||
# .. toggle_warnings: None
|
||||
# Set to settings.CODE_OWNER_MAPPINGS during processing for simpler testing
|
||||
_CODE_OWNER_MAPPINGS = None
|
||||
|
||||
# The following module prefixes are optional in a func_view's reported module:
|
||||
# 'common.djangoapps.', 'lms.djangoapps.', 'openedx.core.djangoapps.'
|
||||
_OPTIONAL_MODULE_PREFIX_PATTERN = re.compile(r'^(lms|common|openedx\.core)\.djangoapps\.')
|
||||
_INVALID_CODE_OWNER_MAPPING = 'invalid-code-owner-mapping'
|
||||
# lookup table for code owner given a module path
|
||||
_PATH_TO_CODE_OWNER_MAPPINGS = _process_code_owner_mappings()
|
||||
|
||||
|
||||
def _get_view_func_module(view_func):
|
||||
"""
|
||||
Returns the view_func's __module__. Enables simpler testing via mocking.
|
||||
"""
|
||||
return view_func.__module__
|
||||
@@ -0,0 +1,89 @@
|
||||
"""
|
||||
This script generates code owner mappings for monitoring LMS.
|
||||
|
||||
Sample usage::
|
||||
|
||||
python lms/djangoapps/monitoring/scripts/generate_code_owner_mappings.py --app-csv "edx-platform Apps Ownership.csv"
|
||||
|
||||
Sample CSV input::
|
||||
|
||||
Path,owner.squad
|
||||
./common/djangoapps/xblock_django,team-red
|
||||
./openedx/core/djangoapps/xblock,team-red
|
||||
./lms/djangoapps/badges,team-blue
|
||||
|
||||
Sample output::
|
||||
|
||||
# Copy results into appropriate config yml file.
|
||||
CODE_OWNER_MAPPINGS:
|
||||
team-blue:
|
||||
- badges
|
||||
team-red:
|
||||
- xblock_django
|
||||
- openedx.core.djangoapps.xblock
|
||||
|
||||
"""
|
||||
import os
|
||||
import re
|
||||
import csv
|
||||
import click
|
||||
|
||||
|
||||
@click.command()
|
||||
@click.option(
|
||||
'--app-csv',
|
||||
help="File name of .csv file with edx-platform app ownership details.",
|
||||
required=True
|
||||
)
|
||||
def main(app_csv):
|
||||
"""
|
||||
Reads CSV of ownership data and outputs config.yml setting to system.out.
|
||||
|
||||
Expected CSV format:
|
||||
|
||||
Path,owner.squad\n
|
||||
./lms/templates/oauth2_provider,team-red\n
|
||||
./openedx/core/djangoapps/user_authn,team-blue\n
|
||||
...
|
||||
|
||||
Final output only includes paths which might contain views.
|
||||
|
||||
"""
|
||||
csv_data = None
|
||||
with open(app_csv, 'r') as file:
|
||||
csv_data = file.read()
|
||||
reader = csv.DictReader(csv_data.splitlines())
|
||||
|
||||
team_to_paths_map = {}
|
||||
for row in reader:
|
||||
path = row.get('Path')
|
||||
team = row.get('owner.squad')
|
||||
|
||||
# add paths that may have views
|
||||
may_have_views = re.match(r'.*djangoapps', path) or re.match(r'[./]*openedx\/features', path)
|
||||
# remove cms (studio) paths and tests
|
||||
may_have_views = may_have_views and not re.match(r'.*(\/tests\b|cms\/).*', path)
|
||||
|
||||
if may_have_views:
|
||||
path = path.replace('./', '') # remove ./ from beginning of path
|
||||
path = path.replace('/', '.') # convert path to dotted module name
|
||||
|
||||
# skip catch-alls to ensure everything is properly mapped
|
||||
if path in ('common,djangoapps', 'lms.djangoapps', 'openedx.core.djangoapps', 'openedx.features'):
|
||||
continue
|
||||
|
||||
if team not in team_to_paths_map:
|
||||
team_to_paths_map[team] = []
|
||||
team_to_paths_map[team].append(path)
|
||||
|
||||
print('# Do not hand edit CODE_OWNER_MAPPINGS. Generated by {}'.format(os.path.basename(__file__)))
|
||||
print('CODE_OWNER_MAPPINGS:')
|
||||
for team, path_list in sorted(team_to_paths_map.items()):
|
||||
print(" {}:".format(team))
|
||||
path_list.sort()
|
||||
for path in path_list:
|
||||
print(" - {}".format(path))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main() # pylint: disable=no-value-for-parameter
|
||||
135
lms/djangoapps/monitoring/tests/test_middleware.py
Normal file
135
lms/djangoapps/monitoring/tests/test_middleware.py
Normal file
@@ -0,0 +1,135 @@
|
||||
"""
|
||||
Tests for the LMS monitoring middleware
|
||||
"""
|
||||
import ddt
|
||||
import timeit
|
||||
from django.test import override_settings
|
||||
from mock import call, patch, Mock
|
||||
from unittest import TestCase
|
||||
from unittest.mock import ANY
|
||||
|
||||
from lms.djangoapps.monitoring.middleware import _process_code_owner_mappings, CodeOwnerMetricMiddleware
|
||||
|
||||
|
||||
def _mock_get_view_func_module(view_func):
|
||||
"""
|
||||
Enables mocking/overriding a private function that normally gets the view_func.__module__
|
||||
because it was too difficult to mock the __module__ method.
|
||||
"""
|
||||
return view_func.mock_module
|
||||
|
||||
|
||||
@ddt.ddt
|
||||
class CodeOwnerMetricMiddlewareTests(TestCase):
|
||||
"""
|
||||
Tests for the LMS monitoring utility functions
|
||||
"""
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
self.mock_get_response = Mock()
|
||||
self.middleware = CodeOwnerMetricMiddleware(self.mock_get_response)
|
||||
|
||||
def test_init(self):
|
||||
self.assertEqual(self.middleware.get_response, self.mock_get_response)
|
||||
|
||||
def test_request_call(self):
|
||||
self.mock_get_response.return_value = 'test-response'
|
||||
request = Mock()
|
||||
self.assertEqual(self.middleware(request), 'test-response')
|
||||
|
||||
@override_settings(CODE_OWNER_MAPPINGS={
|
||||
'team-red': [
|
||||
'openedx.core.djangoapps.xblock',
|
||||
'lms.djangoapps.grades',
|
||||
],
|
||||
'team-blue': [
|
||||
'common.djangoapps.xblock_django',
|
||||
],
|
||||
})
|
||||
@patch('lms.djangoapps.monitoring.middleware.set_custom_metric')
|
||||
@patch('lms.djangoapps.monitoring.middleware._get_view_func_module', side_effect=_mock_get_view_func_module)
|
||||
@ddt.data(
|
||||
('xbl', None),
|
||||
('xblock_2', None),
|
||||
('xblock', 'team-red'),
|
||||
('openedx.core.djangoapps', None),
|
||||
('openedx.core.djangoapps.xblock', 'team-red'),
|
||||
('openedx.core.djangoapps.xblock.views', 'team-red'),
|
||||
('grades', 'team-red'),
|
||||
('lms.djangoapps.grades', 'team-red'),
|
||||
('xblock_django', 'team-blue'),
|
||||
('common.djangoapps.xblock_django', 'team-blue'),
|
||||
)
|
||||
@ddt.unpack
|
||||
def test_code_owner_mapping_hits_and_misses(
|
||||
self, view_func_module, expected_owner, mock_get_view_func_module, mock_set_custom_metric
|
||||
):
|
||||
with patch('lms.djangoapps.monitoring.middleware._PATH_TO_CODE_OWNER_MAPPINGS', _process_code_owner_mappings()):
|
||||
mock_view_func = self._create_view_func_mock(view_func_module)
|
||||
self.middleware.process_view(None, mock_view_func, None, None)
|
||||
self._assert_code_owner_custom_metrics(
|
||||
view_func_module, mock_set_custom_metric, expected_code_owner=expected_owner
|
||||
)
|
||||
|
||||
@patch('lms.djangoapps.monitoring.middleware.set_custom_metric')
|
||||
def test_code_owner_no_mappings(self, mock_set_custom_metric):
|
||||
mock_view_func = self._create_view_func_mock('xblock')
|
||||
self.middleware.process_view(None, mock_view_func, None, None)
|
||||
mock_set_custom_metric.assert_not_called()
|
||||
|
||||
@override_settings(CODE_OWNER_MAPPINGS=['invalid_setting_as_list'])
|
||||
@patch('lms.djangoapps.monitoring.middleware.set_custom_metric')
|
||||
@patch('lms.djangoapps.monitoring.middleware._get_view_func_module', side_effect=_mock_get_view_func_module)
|
||||
def test_load_config_with_invalid_dict(self, mock_get_view_func_module, mock_set_custom_metric):
|
||||
with patch('lms.djangoapps.monitoring.middleware._PATH_TO_CODE_OWNER_MAPPINGS', _process_code_owner_mappings()):
|
||||
mock_view_func = self._create_view_func_mock('xblock')
|
||||
self.middleware.process_view(None, mock_view_func, None, None)
|
||||
self._assert_code_owner_custom_metrics(
|
||||
'xblock', mock_set_custom_metric, has_error=True
|
||||
)
|
||||
|
||||
@patch('lms.djangoapps.monitoring.middleware.set_custom_metric')
|
||||
@patch('lms.djangoapps.monitoring.middleware._get_view_func_module', side_effect=_mock_get_view_func_module)
|
||||
def test_mapping_performance(self, mock_get_view_func_module, mock_set_custom_metric):
|
||||
code_owner_mappings = {
|
||||
'team-red': []
|
||||
}
|
||||
# create a long list of mappings that are nearly identical
|
||||
for n in range(1, 200):
|
||||
path = 'openedx.core.djangoapps.{}'.format(n)
|
||||
code_owner_mappings['team-red'].append(path)
|
||||
with override_settings(CODE_OWNER_MAPPINGS=code_owner_mappings):
|
||||
with patch(
|
||||
'lms.djangoapps.monitoring.middleware._PATH_TO_CODE_OWNER_MAPPINGS', _process_code_owner_mappings()
|
||||
):
|
||||
# test a module that matches nearly to the end, but doesn't actually match
|
||||
mock_view_func = self._create_view_func_mock('openedx.core.djangoapps.XXX.views')
|
||||
call_iterations = 100
|
||||
time = timeit.timeit(
|
||||
lambda: self.middleware.process_view(None, mock_view_func, None, None), number=call_iterations
|
||||
)
|
||||
average_time = time / call_iterations
|
||||
self.assertTrue(average_time < 0.0005, 'Mapping takes {}s which is too slow.'.format(average_time))
|
||||
expected_calls = []
|
||||
for n in range(1, call_iterations):
|
||||
expected_calls.append(call('view_func_module', 'openedx.core.djangoapps.XXX.views'))
|
||||
mock_set_custom_metric.assert_has_calls(expected_calls)
|
||||
|
||||
def _assert_code_owner_custom_metrics(
|
||||
self, view_func_module, mock_set_custom_metric, expected_code_owner=None, has_error=False,
|
||||
):
|
||||
call_list = []
|
||||
call_list.append(call('view_func_module', view_func_module))
|
||||
if expected_code_owner:
|
||||
call_list.append(call('code_owner', expected_code_owner))
|
||||
if has_error:
|
||||
call_list.append(call('code_owner_mapping_error', ANY))
|
||||
mock_set_custom_metric.assert_has_calls(call_list)
|
||||
|
||||
def _create_view_func_mock(self, module):
|
||||
"""
|
||||
Mock view function where __module__ returns 'test_middleware'
|
||||
"""
|
||||
view_func = Mock()
|
||||
view_func.mock_module = module
|
||||
return view_func
|
||||
@@ -1576,6 +1576,7 @@ MIDDLEWARE = [
|
||||
|
||||
# Outputs monitoring metrics for a request.
|
||||
'edx_rest_framework_extensions.middleware.RequestMetricsMiddleware',
|
||||
'lms.djangoapps.monitoring.middleware.CodeOwnerMetricMiddleware',
|
||||
|
||||
'edx_rest_framework_extensions.auth.jwt.middleware.EnsureJWTAuthSettingsMiddleware',
|
||||
|
||||
@@ -2562,6 +2563,9 @@ INSTALLED_APPS = [
|
||||
# Management of per-user schedules
|
||||
'openedx.core.djangoapps.schedules',
|
||||
'rest_framework_jwt',
|
||||
|
||||
# Monitoring utilities for LMS
|
||||
'lms.djangoapps.monitoring.apps.MonitoringConfig'
|
||||
]
|
||||
|
||||
######################### CSRF #########################################
|
||||
|
||||
Reference in New Issue
Block a user