From fce9344ddcaaef451fcd302d3596636d9e8c05ff Mon Sep 17 00:00:00 2001 From: Gabe Mulley Date: Fri, 13 Oct 2017 18:18:29 -0400 Subject: [PATCH] new relic tracing for ace --- lms/djangoapps/discussion/views.py | 46 ++++------- .../djangoapps/monitoring_utils/__init__.py | 15 ++++ openedx/core/djangoapps/schedules/tasks.py | 77 +++++++++++++------ 3 files changed, 82 insertions(+), 56 deletions(-) diff --git a/lms/djangoapps/discussion/views.py b/lms/djangoapps/discussion/views.py index 00c718e69c..d414634977 100644 --- a/lms/djangoapps/discussion/views.py +++ b/lms/djangoapps/discussion/views.py @@ -3,7 +3,6 @@ Views handling read (GET) requests for the Discussion tab and inline discussions """ import logging -from contextlib import contextmanager from functools import wraps from sets import Set @@ -46,15 +45,12 @@ from django_comment_client.utils import ( ) from django_comment_common.utils import ThreadContext, get_course_discussion_settings, set_course_discussion_settings from openedx.core.djangoapps.plugin_api.views import EdxFragmentView +from openedx.core.djangoapps.monitoring_utils import function_trace from student.models import CourseEnrollment from util.json_request import JsonResponse, expect_json from xmodule.modulestore.django import modulestore log = logging.getLogger("edx.discussions") -try: - import newrelic.agent -except ImportError: - newrelic = None # pylint: disable=invalid-name THREADS_PER_PAGE = 20 @@ -62,20 +58,6 @@ INLINE_THREADS_PER_PAGE = 20 PAGES_NEARBY_DELTA = 2 -@contextmanager -def newrelic_function_trace(function_name): - """ - A wrapper context manager newrelic.agent.FunctionTrace to no-op if the - newrelic package is not installed - """ - if newrelic: - nr_transaction = newrelic.agent.current_transaction() - with newrelic.agent.FunctionTrace(nr_transaction, function_name): - yield - else: - yield - - def make_course_settings(course, user): """ Generate a JSON-serializable model for course settings, which will be used to initialize a @@ -216,12 +198,12 @@ def inline_discussion(request, course_key, discussion_id): except ValueError: return HttpResponseServerError("Invalid group_id") - with newrelic_function_trace("get_metadata_for_threads"): + with function_trace("get_metadata_for_threads"): annotated_content_info = utils.get_metadata_for_threads(course_key, threads, request.user, user_info) is_staff = has_permission(request.user, 'openclose_thread', course.id) threads = [utils.prepare_content(thread, course_key, is_staff) for thread in threads] - with newrelic_function_trace("add_courseware_context"): + with function_trace("add_courseware_context"): add_courseware_context(threads, course, request.user) return utils.JsonResponse({ @@ -256,10 +238,10 @@ def forum_form_discussion(request, course_key): except ValueError: return HttpResponseServerError("Invalid group_id") - with newrelic_function_trace("get_metadata_for_threads"): + with function_trace("get_metadata_for_threads"): annotated_content_info = utils.get_metadata_for_threads(course_key, threads, request.user, user_info) - with newrelic_function_trace("add_courseware_context"): + with function_trace("add_courseware_context"): add_courseware_context(threads, course, request.user) return utils.JsonResponse({ @@ -300,7 +282,7 @@ def single_thread(request, course_key, discussion_id, thread_id): raise_event=True, ) - with newrelic_function_trace("get_annotated_content_infos"): + with function_trace("get_annotated_content_infos"): annotated_content_info = utils.get_annotated_content_infos( course_key, thread, @@ -309,7 +291,7 @@ def single_thread(request, course_key, discussion_id, thread_id): ) content = utils.prepare_content(thread.to_dict(), course_key, is_staff) - with newrelic_function_trace("add_courseware_context"): + with function_trace("add_courseware_context"): add_courseware_context([content], course, request.user) return utils.JsonResponse({ @@ -457,13 +439,13 @@ def _create_discussion_board_context(request, base_context, thread=None): is_staff = has_permission(user, 'openclose_thread', course.id) threads = [utils.prepare_content(thread, course_key, is_staff) for thread in threads] - with newrelic_function_trace("get_metadata_for_threads"): + with function_trace("get_metadata_for_threads"): annotated_content_info = utils.get_metadata_for_threads(course_key, threads, user, user_info) - with newrelic_function_trace("add_courseware_context"): + with function_trace("add_courseware_context"): add_courseware_context(threads, course, user) - with newrelic_function_trace("get_cohort_info"): + with function_trace("get_cohort_info"): course_discussion_settings = get_course_discussion_settings(course_key) user_group_id = get_group_id_for_user(user, course_discussion_settings) @@ -529,13 +511,13 @@ def user_profile(request, course_key, user_id): query_params['page'] = page query_params['num_pages'] = num_pages - with newrelic_function_trace("get_metadata_for_threads"): + with function_trace("get_metadata_for_threads"): user_info = cc.User.from_django_user(request.user).to_dict() annotated_content_info = utils.get_metadata_for_threads(course_key, threads, request.user, user_info) is_staff = has_permission(request.user, 'openclose_thread', course.id) threads = [utils.prepare_content(thread, course_key, is_staff) for thread in threads] - with newrelic_function_trace("add_courseware_context"): + with function_trace("add_courseware_context"): add_courseware_context(threads, course, request.user) if request.is_ajax(): return utils.JsonResponse({ @@ -549,7 +531,7 @@ def user_profile(request, course_key, user_id): course_id=course.id ).order_by("name").values_list("name", flat=True).distinct() - with newrelic_function_trace("get_cohort_info"): + with function_trace("get_cohort_info"): course_discussion_settings = get_course_discussion_settings(course_key) user_group_id = get_group_id_for_user(request.user, course_discussion_settings) @@ -618,7 +600,7 @@ def followed_threads(request, course_key, user_id): query_params['num_pages'] = paginated_results.num_pages user_info = cc.User.from_django_user(request.user).to_dict() - with newrelic_function_trace("get_metadata_for_threads"): + with function_trace("get_metadata_for_threads"): annotated_content_info = utils.get_metadata_for_threads( course_key, paginated_results.collection, diff --git a/openedx/core/djangoapps/monitoring_utils/__init__.py b/openedx/core/djangoapps/monitoring_utils/__init__.py index bd581daf52..ed65327478 100644 --- a/openedx/core/djangoapps/monitoring_utils/__init__.py +++ b/openedx/core/djangoapps/monitoring_utils/__init__.py @@ -20,6 +20,7 @@ At this time, these custom metrics will only be reported to New Relic. TODO: supply additional public functions for storing strings and booleans. """ +from contextlib import contextmanager from . import middleware try: @@ -95,3 +96,17 @@ def set_monitoring_transaction_name(name, group=None, priority=None): if not newrelic: return newrelic.agent.set_transaction_name(name, group, priority) + + +@contextmanager +def function_trace(function_name): + """ + Wraps a chunk of code that we want to appear as a separate, explicit, + segment in our monitoring tools. + """ + if newrelic: + nr_transaction = newrelic.agent.current_transaction() + with newrelic.agent.FunctionTrace(nr_transaction, function_name): + yield + else: + yield diff --git a/openedx/core/djangoapps/schedules/tasks.py b/openedx/core/djangoapps/schedules/tasks.py index f42b60e3d4..0a4478936f 100644 --- a/openedx/core/djangoapps/schedules/tasks.py +++ b/openedx/core/djangoapps/schedules/tasks.py @@ -9,10 +9,9 @@ from django.contrib.sites.models import Site from django.contrib.staticfiles.templatetags.staticfiles import static from django.core.exceptions import ValidationError from django.core.urlresolvers import reverse -from django.db.models import F, Min, Q +from django.db.models import F, Q from django.db.utils import DatabaseError from django.utils.formats import dateformat, get_format -import pytz from edx_ace import ace from edx_ace.message import Message @@ -21,21 +20,15 @@ from edx_ace.utils.date import deserialize from opaque_keys.edx.keys import CourseKey from courseware.date_summary import verified_upgrade_deadline_link, verified_upgrade_link_is_valid +from openedx.core.djangoapps.monitoring_utils import set_custom_metric, function_trace +from request_cache.middleware import request_cached +from xmodule.modulestore.django import modulestore -from edxmako.shortcuts import marketing_link from openedx.core.djangoapps.schedules.config import COURSE_UPDATE_WAFFLE_FLAG from openedx.core.djangoapps.schedules.exceptions import CourseUpdateDoesNotExist from openedx.core.djangoapps.schedules.message_type import ScheduleMessageType from openedx.core.djangoapps.schedules.models import Schedule, ScheduleConfig -from openedx.core.djangoapps.schedules.template_context import ( - absolute_url, - encode_url, - encode_urls_in_dict, - get_base_template_context -) -from request_cache.middleware import request_cached -from xmodule.modulestore.django import modulestore - +from openedx.core.djangoapps.schedules.template_context import absolute_url, get_base_template_context LOG = logging.getLogger(__name__) @@ -82,6 +75,10 @@ def _recurring_nudge_schedule_send(site_id, msg_str): return msg = Message.from_string(msg_str) + # A unique identifier for this batch of messages being sent. + set_custom_metric('send_uuid', msg.send_uuid) + # A unique identifier for this particular message. + set_custom_metric('uuid', msg.uuid) LOG.debug('Recurring Nudge: Sending message = %s', msg_str) ace.send(msg) @@ -94,9 +91,12 @@ def recurring_nudge_schedule_bin( # TODO: in the next refactor of this task, pass in current_datetime instead of reproducing it here current_datetime = target_datetime - datetime.timedelta(days=day_offset) msg_type = RecurringNudge(abs(day_offset)) + site = Site.objects.get(id=site_id) + + _annotate_for_monitoring(msg_type, site, bin_num, target_day_str, day_offset) for (user, language, context) in _recurring_nudge_schedules_for_bin( - Site.objects.get(id=site_id), + site, current_datetime, target_datetime, bin_num, @@ -111,10 +111,28 @@ def recurring_nudge_schedule_bin( language, context, ) - _recurring_nudge_schedule_send.apply_async((site_id, str(msg)), retry=False) + with function_trace('enqueue_send_task'): + _recurring_nudge_schedule_send.apply_async((site_id, str(msg)), retry=False) + + +def _annotate_for_monitoring(message_type, site, bin_num, target_day_str, day_offset): + # This identifies the type of message being sent, for example: schedules.recurring_nudge3. + set_custom_metric('message_name', '{0}.{1}'.format(message_type.app_label, message_type.name)) + # The domain name of the site we are sending the message for. + set_custom_metric('site', site.domain) + # This is the "bin" of data being processed. We divide up the work into chunks so that we don't tie up celery + # workers for too long. This could help us identify particular bins that are problematic. + set_custom_metric('bin', bin_num) + # The date we are processing data for. + set_custom_metric('target_day', target_day_str) + # The number of days relative to the current date to process data for. + set_custom_metric('day_offset', day_offset) + # A unique identifier for this batch of messages being sent. + set_custom_metric('send_uuid', message_type.uuid) def _recurring_nudge_schedules_for_bin(site, current_datetime, target_datetime, bin_num, org_list, exclude_orgs=False): + schedules = get_schedules_with_target_date_by_bin_and_orgs( schedule_date_field='start', current_datetime=current_datetime, @@ -125,8 +143,6 @@ def _recurring_nudge_schedules_for_bin(site, current_datetime, target_datetime, exclude_orgs=exclude_orgs, ) - LOG.debug('Recurring Nudge: Query = %r', schedules.query.sql_with_params()) - for (user, user_schedules) in groupby(schedules, lambda s: s.enrollment.user): user_schedules = list(user_schedules) course_id_strs = [str(schedule.enrollment.course_id) for schedule in user_schedules] @@ -161,9 +177,12 @@ def upgrade_reminder_schedule_bin( # TODO: in the next refactor of this task, pass in current_datetime instead of reproducing it here current_datetime = target_datetime - datetime.timedelta(days=day_offset) msg_type = UpgradeReminder() + site = Site.objects.get(id=site_id) + + _annotate_for_monitoring(msg_type, site, bin_num, target_day_str, day_offset) for (user, language, context) in _upgrade_reminder_schedules_for_bin( - Site.objects.get(id=site_id), + site, current_datetime, target_datetime, bin_num, @@ -178,7 +197,8 @@ def upgrade_reminder_schedule_bin( language, context, ) - _upgrade_reminder_schedule_send.apply_async((site_id, str(msg)), retry=False) + with function_trace('enqueue_send_task'): + _upgrade_reminder_schedule_send.apply_async((site_id, str(msg)), retry=False) @task(ignore_result=True, routing_key=ROUTING_KEY) @@ -202,8 +222,6 @@ def _upgrade_reminder_schedules_for_bin(site, current_datetime, target_datetime, exclude_orgs=exclude_orgs, ) - LOG.debug('Upgrade Reminder: Query = %r', schedules.query.sql_with_params()) - for schedule in schedules: enrollment = schedule.enrollment user = enrollment.user @@ -244,9 +262,12 @@ def course_update_schedule_bin( # TODO: in the next refactor of this task, pass in current_datetime instead of reproducing it here current_datetime = target_datetime - datetime.timedelta(days=day_offset) msg_type = CourseUpdate() + site = Site.objects.get(id=site_id) + + _annotate_for_monitoring(msg_type, site, bin_num, target_day_str, day_offset) for (user, language, context) in _course_update_schedules_for_bin( - Site.objects.get(id=site_id), + site, current_datetime, target_datetime, day_offset, @@ -262,7 +283,8 @@ def course_update_schedule_bin( language, context, ) - _course_update_schedule_send.apply_async((site_id, str(msg)), retry=False) + with function_trace('enqueue_send_task'): + _course_update_schedule_send.apply_async((site_id, str(msg)), retry=False) @task(ignore_result=True, routing_key=ROUTING_KEY) @@ -289,8 +311,6 @@ def _course_update_schedules_for_bin(site, current_datetime, target_datetime, da order_by='enrollment__course', ) - LOG.debug('Course Update: Query = %r', schedules.query.sql_with_params()) - for schedule in schedules: enrollment = schedule.enrollment try: @@ -382,6 +402,15 @@ def get_schedules_with_target_date_by_bin_and_orgs(schedule_date_field, current_ if "read_replica" in settings.DATABASES: schedules = schedules.using("read_replica") + LOG.debug('Query = %r', schedules.query.sql_with_params()) + + with function_trace('schedule_query_set_evaluation'): + # This will run the query and cache all of the results in memory. + num_schedules = len(schedules) + + # This should give us a sense of the volume of data being processed by each task. + set_custom_metric('num_schedules', num_schedules) + return schedules