From ece785ff926533c467440238772a9168d786f08b Mon Sep 17 00:00:00 2001 From: Troy Sankey Date: Thu, 3 Nov 2016 10:18:17 -0400 Subject: [PATCH 1/2] Implement the newrelic_custom_metrics middleware Use this middleware and its helpers to accumulate custom New Relic metrics on a per-request basis. If, while handling a request, the app does not accumulate any metrics, this middleware is a very lightweight no-op. This commit also enables the middleware! PERF-354 --- .../newrelic_custom_metrics/__init__.py | 43 +++++++++++++ .../newrelic_custom_metrics/middleware.py | 62 +++++++++++++++++++ .../newrelic_custom_metrics/tests.py | 46 ++++++++++++++ lms/envs/common.py | 1 + 4 files changed, 152 insertions(+) create mode 100644 common/djangoapps/newrelic_custom_metrics/__init__.py create mode 100644 common/djangoapps/newrelic_custom_metrics/middleware.py create mode 100644 common/djangoapps/newrelic_custom_metrics/tests.py diff --git a/common/djangoapps/newrelic_custom_metrics/__init__.py b/common/djangoapps/newrelic_custom_metrics/__init__.py new file mode 100644 index 0000000000..cd920c807b --- /dev/null +++ b/common/djangoapps/newrelic_custom_metrics/__init__.py @@ -0,0 +1,43 @@ +""" +This is an interface to the newrelic_custom_metrics middleware. Functions +defined in this module can be used to report custom metrics to New Relic. For +example: + + import newrelic_custom_metrics + ... + newrelic_custom_metrics.accumulate('xb_user_state.get_many.num_items', 4) + +There is no need to do anything else. The metrics are automatically cleared +before the next request. +""" + +from newrelic_custom_metrics import middleware + + +def accumulate(name, value): + """ + Queue up a custom New Relic metric for the current request. At the end of + the request, the newrelic_custom_metrics middleware will batch report all + queued metrics to NR. + + Q: What style of names should I use? + A: Metric names should be comma delimited, becoming more specific from left + to right. + + Q: What type can values be? + A: numbers only. + + Q: What happens when I call this multiple times with the same name? + A: Like-named metrics will be accumulated using the sum. + """ + middleware.NewRelicCustomMetrics.accumulate_metric(name, value) + + +def increment(name): + """ + Increment a custom New Relic metric representing a counter. + + Here we simply accumulate a new custom metric with a value of 1, and the + middleware should automatically aggregate this metric. + """ + accumulate(name, 1) diff --git a/common/djangoapps/newrelic_custom_metrics/middleware.py b/common/djangoapps/newrelic_custom_metrics/middleware.py new file mode 100644 index 0000000000..3af8cb5ef1 --- /dev/null +++ b/common/djangoapps/newrelic_custom_metrics/middleware.py @@ -0,0 +1,62 @@ +""" +Middleware for handling the storage, aggregation, and reporing of custom New +Relic metrics. + +This middleware will only call on the newrelic agent if there are any metrics +to report for this request, so it will not incur any processing overhead for +request handlers which do not record custom metrics. +""" +import newrelic.agent +import request_cache + +REQUEST_CACHE_KEY = 'newrelic_custom_metrics' + + +class NewRelicCustomMetrics(object): + """ + The middleware class. Make sure to add below the request cache in + MIDDLEWARE_CLASSES. + """ + + @classmethod + def _get_metrics_cache(cls): + """ + Get a reference to the part of the request cache wherein we store New + Relic custom metrics related to the current request. + """ + return request_cache.get_cache(name=REQUEST_CACHE_KEY) + + @classmethod + def accumulate_metric(cls, name, value): + """ + Accumulate a custom metric (name and value) in the metrics cache. + """ + metrics_cache = cls._get_metrics_cache() + metrics_cache.setdefault(name, 0) + metrics_cache[name] += value + + @classmethod + def _batch_report(cls): + """ + Report the collected custom metrics to New Relic. + """ + metrics_cache = cls._get_metrics_cache() + for metric_name, metric_value in metrics_cache.iteritems(): + newrelic.agent.add_custom_parameter(metric_name, metric_value) + + # Whether or not there was an exception, report any custom NR metrics that + # may have been collected. + + def process_response(self, request, response): # pylint: disable=unused-argument + """ + Django middleware handler to process a response + """ + self._batch_report() + return response + + def process_exception(self, request, exception): # pylint: disable=unused-argument + """ + Django middleware handler to process an exception + """ + self._batch_report() + return None diff --git a/common/djangoapps/newrelic_custom_metrics/tests.py b/common/djangoapps/newrelic_custom_metrics/tests.py new file mode 100644 index 0000000000..b1fc931b1a --- /dev/null +++ b/common/djangoapps/newrelic_custom_metrics/tests.py @@ -0,0 +1,46 @@ +""" +Tests for newrelic custom metrics. +""" +from django.test import TestCase +from mock import patch, call + +import newrelic_custom_metrics + + +class TestNewRelicCustomMetrics(TestCase): + """ + Test the newrelic_custom_metrics middleware and helpers + """ + + @patch('newrelic.agent') + def test_cache_normal_contents(self, mock_newrelic_agent): + """ + Test normal usage of collecting and reporting custom New Relic metrics + """ + newrelic_custom_metrics.accumulate('hello', 10) + newrelic_custom_metrics.accumulate('world', 10) + newrelic_custom_metrics.accumulate('world', 10) + newrelic_custom_metrics.increment('foo') + newrelic_custom_metrics.increment('foo') + + # based on the metric data above, we expect the following calls to newrelic: + nr_agent_calls_expected = [ + call('hello', 10), + call('world', 20), + call('foo', 2), + ] + + # fake a response to trigger metrics reporting + newrelic_custom_metrics.middleware.NewRelicCustomMetrics().process_response( + 'fake request', + 'fake response', + ) + + # Assert call counts to newrelic.agent.add_custom_parameter() + expected_call_count = len(nr_agent_calls_expected) + measured_call_count = mock_newrelic_agent.add_custom_parameter.call_count + self.assertEqual(expected_call_count, measured_call_count) + + # Assert call args to newrelic.agent.add_custom_parameter(). Due to + # the nature of python dicts, call order is undefined. + mock_newrelic_agent.add_custom_parameter.has_calls(nr_agent_calls_expected, any_order=True) diff --git a/lms/envs/common.py b/lms/envs/common.py index d4623ac53c..52a9349f7c 100644 --- a/lms/envs/common.py +++ b/lms/envs/common.py @@ -1096,6 +1096,7 @@ MIDDLEWARE_CLASSES = ( 'crum.CurrentRequestUserMiddleware', 'request_cache.middleware.RequestCache', + 'newrelic_custom_metrics.middleware.NewRelicCustomMetrics', 'mobile_api.middleware.AppVersionUpgrade', 'openedx.core.djangoapps.header_control.middleware.HeaderControlMiddleware', From 076c63bcf54851447893bf47ce13c26bd6db7dfe Mon Sep 17 00:00:00 2001 From: Troy Sankey Date: Thu, 3 Nov 2016 10:23:53 -0400 Subject: [PATCH 2/2] Instrument DjangoXBlockUserStateClient with New Relic Report to New Relic certain per-request details about the DjangoXBlockUserStateClient. The following metrics are reported for the get_many() call: xb_user_state.get_many.calls xb_user_state.get_many.duration xb_user_state.get_many.blocks_requested xb_user_state.get_many.blocks_out xb_user_state.get_many.size xb_user_state.get_many..blocks_requested xb_user_state.get_many..blocks_out xb_user_state.get_many..size Similarly, for the set_many() call: xb_user_state.set_many.calls xb_user_state.set_many.duration xb_user_state.set_many.blocks_created xb_user_state.set_many.blocks_updated xb_user_state.set_many.size xb_user_state.set_many..blocks_created xb_user_state.set_many..blocks_updated xb_user_state.set_many..size Where is one of "chapter", "course", "problem", "video", etc. PERF-354 --- .../courseware/user_state_client.py | 93 ++++++++++++++++--- 1 file changed, 81 insertions(+), 12 deletions(-) diff --git a/lms/djangoapps/courseware/user_state_client.py b/lms/djangoapps/courseware/user_state_client.py index 1731b2e2e7..c1dfe42b82 100644 --- a/lms/djangoapps/courseware/user_state_client.py +++ b/lms/djangoapps/courseware/user_state_client.py @@ -12,6 +12,7 @@ try: except ImportError: import json +import newrelic_custom_metrics import dogstats_wrapper as dog_stats_api from django.contrib.auth.models import User from django.db import transaction @@ -120,6 +121,51 @@ class DjangoXBlockUserStateClient(XBlockUserStateClient): sample_rate=self.API_DATADOG_SAMPLE_RATE, ) + def _nr_metric_name(self, function_name, stat_name, block_type=None): + """ + Return a metric name (string) representing the provided descriptors. + The return value is directly usable for custom NR metrics. + """ + if block_type is None: + metric_name_parts = ['xb_user_state', function_name, stat_name] + else: + metric_name_parts = ['xb_user_state', function_name, block_type, stat_name] + return '.'.join(metric_name_parts) + + def _nr_stat_accumulate(self, function_name, stat_name, value): + """ + Accumulate arbitrary NR stats (not specific to block types). + """ + newrelic_custom_metrics.accumulate( + self._nr_metric_name(function_name, stat_name), + value + ) + + def _nr_stat_increment(self, function_name, stat_name, count=1): + """ + Increment arbitrary NR stats (not specific to block types). + """ + self._nr_stat_accumulate(function_name, stat_name, count) + + def _nr_block_stat_accumulate(self, function_name, block_type, stat_name, value): + """ + Accumulate NR stats related to block types. + """ + newrelic_custom_metrics.accumulate( + self._nr_metric_name(function_name, stat_name), + value, + ) + newrelic_custom_metrics.accumulate( + self._nr_metric_name(function_name, stat_name, block_type=block_type), + value, + ) + + def _nr_block_stat_increment(self, function_name, block_type, stat_name, count=1): + """ + Increment NR stats related to block types. + """ + self._nr_block_stat_accumulate(function_name, block_type, stat_name, count) + def get_many(self, username, block_keys, scope=Scope.user_state, fields=None): """ Retrieve the stored XBlock state for the specified XBlock usages. @@ -137,10 +183,15 @@ class DjangoXBlockUserStateClient(XBlockUserStateClient): if scope != Scope.user_state: raise ValueError("Only Scope.user_state is supported, not {}".format(scope)) - block_count = state_length = 0 + total_block_count = 0 evt_time = time() + # count how many times this function gets called + self._nr_stat_increment('get_many', 'calls') + + # keep track of blocks requested self._ddog_histogram(evt_time, 'get_many.blks_requested', len(block_keys)) + self._nr_stat_accumulate('get_many', 'blocks_requested', len(block_keys)) modules = self._get_student_modules(username, block_keys) for module, usage_key in modules: @@ -149,29 +200,38 @@ class DjangoXBlockUserStateClient(XBlockUserStateClient): continue state = json.loads(module.state) - state_length += len(module.state) + state_length = len(module.state) - self._ddog_histogram(evt_time, 'get_many.block_size', len(module.state)) + # record this metric before the check for empty state, so that we + # have some visibility into empty blocks. + self._ddog_histogram(evt_time, 'get_many.block_size', state_length) # If the state is the empty dict, then it has been deleted, and so # conformant UserStateClients should treat it as if it doesn't exist. if state == {}: continue + # collect statistics for metric reporting + self._nr_block_stat_increment('get_many', usage_key.block_type, 'blocks_out') + self._nr_block_stat_accumulate('get_many', usage_key.block_type, 'size', state_length) + total_block_count += 1 + + # filter state on fields if fields is not None: state = { field: state[field] for field in fields if field in state } - block_count += 1 yield XBlockUserState(username, usage_key, state, module.modified, scope) - # The rest of this method exists only to submit DataDog events. - # Remove it once we're no longer interested in the data. + # The rest of this method exists only to report metrics. finish_time = time() - self._ddog_histogram(evt_time, 'get_many.blks_out', block_count) - self._ddog_histogram(evt_time, 'get_many.response_time', (finish_time - evt_time) * 1000) + duration = (finish_time - evt_time) * 1000 # milliseconds + + self._ddog_histogram(evt_time, 'get_many.blks_out', total_block_count) + self._ddog_histogram(evt_time, 'get_many.response_time', duration) + self._nr_stat_accumulate('get_many', 'duration', duration) def set_many(self, username, block_keys_to_state, scope=Scope.user_state): """ @@ -188,6 +248,9 @@ class DjangoXBlockUserStateClient(XBlockUserStateClient): if scope != Scope.user_state: raise ValueError("Only Scope.user_state is supported") + # count how many times this function gets called + self._nr_stat_increment('set_many', 'calls') + # We do a find_or_create for every block (rather than re-using field objects # that were queried in get_many) so that if the score has # been changed by some other piece of the code, we don't overwrite @@ -240,14 +303,18 @@ class DjangoXBlockUserStateClient(XBlockUserStateClient): len(block_keys_to_state), block_keys_to_state.keys() )) - # The rest of this method exists only to submit DataDog events. - # Remove it once we're no longer interested in the data. - # + # DataDog and New Relic reporting + + # record the size of state modifications + self._nr_block_stat_accumulate('set_many', usage_key.block_type, 'size', len(student_module.state)) + # Record whether a state row has been created or updated. if created: self._ddog_increment(evt_time, 'set_many.state_created') + self._nr_block_stat_increment('set_many', usage_key.block_type, 'blocks_created') else: self._ddog_increment(evt_time, 'set_many.state_updated') + self._nr_block_stat_increment('set_many', usage_key.block_type, 'blocks_updated') # Event to record number of fields sent in to set/set_many. self._ddog_histogram(evt_time, 'set_many.fields_in', len(state)) @@ -262,8 +329,10 @@ class DjangoXBlockUserStateClient(XBlockUserStateClient): # Events for the entire set_many call. finish_time = time() + duration = (finish_time - evt_time) * 1000 # milliseconds self._ddog_histogram(evt_time, 'set_many.blks_updated', len(block_keys_to_state)) - self._ddog_histogram(evt_time, 'set_many.response_time', (finish_time - evt_time) * 1000) + self._ddog_histogram(evt_time, 'set_many.response_time', duration) + self._nr_stat_accumulate('set_many', 'duration', duration) def delete_many(self, username, block_keys, scope=Scope.user_state, fields=None): """