diff --git a/cms/envs/common.py b/cms/envs/common.py index 05a1b63f5f..1048570070 100644 --- a/cms/envs/common.py +++ b/cms/envs/common.py @@ -384,6 +384,7 @@ INSTALLED_APPS = ( # Tracking 'track', + 'eventtracking.django', # Monitoring 'datadog', @@ -438,3 +439,4 @@ TRACKING_BACKENDS = { # We're already logging events, and we don't want to capture user # names/passwords. Heartbeat events are likely not interesting. TRACKING_IGNORE_URL_PATTERNS = [r'^/event', r'^/login', r'^/heartbeat'] +TRACKING_ENABLED = True diff --git a/common/djangoapps/track/contexts.py b/common/djangoapps/track/contexts.py new file mode 100644 index 0000000000..d87d2bbf3b --- /dev/null +++ b/common/djangoapps/track/contexts.py @@ -0,0 +1,31 @@ +"""Generates common contexts""" + +import re + + +COURSE_REGEX = re.compile(r'^.*?/courses/(?P(?P[^/]+)/[^/]+/[^/]+)') + + +def course_context_from_url(url): + """ + Extracts the course_id from the given `url.` + + Example Returned Context:: + + { + 'course_id': 'org/course/run', + 'org_id': 'org' + } + + """ + url = url or '' + + context = { + 'course_id': '', + 'org_id': '' + } + match = COURSE_REGEX.match(url) + if match: + context.update(match.groupdict()) + + return context diff --git a/common/djangoapps/track/middleware.py b/common/djangoapps/track/middleware.py index 1da051582d..c5bba2e10c 100644 --- a/common/djangoapps/track/middleware.py +++ b/common/djangoapps/track/middleware.py @@ -3,13 +3,20 @@ import re from django.conf import settings -import views +from track import views +from track import contexts +from eventtracking import tracker + + +COURSE_CONTEXT_NAME = 'edx.course' class TrackMiddleware(object): def process_request(self, request): try: - if not self._should_process_request(request): + self.enter_course_context(request) + + if not self.should_process_request(request): return # Removes passwords from the tracking logs @@ -47,7 +54,8 @@ class TrackMiddleware(object): except: pass - def _should_process_request(self, request): + def should_process_request(self, request): + """Don't track requests to the specified URL patterns""" path = request.META['PATH_INFO'] ignored_url_patterns = getattr(settings, 'TRACKING_IGNORE_URL_PATTERNS', []) @@ -57,3 +65,22 @@ class TrackMiddleware(object): if re.match(pattern, path): return False return True + + def enter_course_context(self, request): + """ + Extract course information from the request and add it to the + tracking context. + """ + tracker.get_tracker().enter_context( + COURSE_CONTEXT_NAME, + contexts.course_context_from_url(request.build_absolute_uri()) + ) + + def process_response(self, request, response): # pylint: disable=unused-argument + """Exit the course context if it exists.""" + try: + tracker.get_tracker().exit_context(COURSE_CONTEXT_NAME) + except: # pylint: disable=bare-except + pass + + return response diff --git a/common/djangoapps/track/tests/test_contexts.py b/common/djangoapps/track/tests/test_contexts.py new file mode 100644 index 0000000000..0ade3544b5 --- /dev/null +++ b/common/djangoapps/track/tests/test_contexts.py @@ -0,0 +1,44 @@ +# pylint: disable=missing-docstring,maybe-no-member + +from unittest import TestCase + +from track import contexts + + +class TestContexts(TestCase): + + COURSE_ID = 'test/course_name/course_run' + ORG_ID = 'test' + + def test_course_id_from_url(self): + self.assert_parses_course_id_from_url('http://foo.bar.com/courses/{course_id}/more/stuff') + + def assert_parses_course_id_from_url(self, format_string): + self.assertEquals( + contexts.course_context_from_url(format_string.format(course_id=self.COURSE_ID)), + { + 'course_id': self.COURSE_ID, + 'org_id': self.ORG_ID + } + ) + + def test_no_course_id_in_url(self): + self.assert_empty_context_for_url('http://foo.bar.com/dashboard') + + def assert_empty_context_for_url(self, url): + self.assertEquals( + contexts.course_context_from_url(url), + { + 'course_id': '', + 'org_id': '' + } + ) + + def test_malformed_course_id(self): + self.assert_empty_context_for_url('http://foo.bar.com/courses/test') + + def test_course_id_later_in_url(self): + self.assert_parses_course_id_from_url('http://foo.bar.com/x/y/z/courses/{course_id}') + + def test_no_url(self): + self.assert_empty_context_for_url(None) diff --git a/common/djangoapps/track/tests/test_middleware.py b/common/djangoapps/track/tests/test_middleware.py index 4bfb843634..68cc7585f6 100644 --- a/common/djangoapps/track/tests/test_middleware.py +++ b/common/djangoapps/track/tests/test_middleware.py @@ -6,41 +6,61 @@ from django.test import TestCase from django.test.client import RequestFactory from django.test.utils import override_settings +from eventtracking import tracker from track.middleware import TrackMiddleware -@patch('track.views.server_track') class TrackMiddlewareTestCase(TestCase): def setUp(self): self.track_middleware = TrackMiddleware() self.request_factory = RequestFactory() - def test_normal_request(self, mock_server_track): + patcher = patch('track.views.server_track') + self.mock_server_track = patcher.start() + self.addCleanup(patcher.stop) + + def test_normal_request(self): request = self.request_factory.get('/somewhere') self.track_middleware.process_request(request) - self.assertTrue(mock_server_track.called) + self.assertTrue(self.mock_server_track.called) - def test_default_filters_do_not_render_view(self, mock_server_track): + def test_default_filters_do_not_render_view(self): for url in ['/event', '/event/1', '/login', '/heartbeat']: request = self.request_factory.get(url) self.track_middleware.process_request(request) - self.assertFalse(mock_server_track.called) - mock_server_track.reset_mock() + self.assertFalse(self.mock_server_track.called) + self.mock_server_track.reset_mock() @override_settings(TRACKING_IGNORE_URL_PATTERNS=[]) - def test_reading_filtered_urls_from_settings(self, mock_server_track): + def test_reading_filtered_urls_from_settings(self): request = self.request_factory.get('/event') self.track_middleware.process_request(request) - self.assertTrue(mock_server_track.called) + self.assertTrue(self.mock_server_track.called) @override_settings(TRACKING_IGNORE_URL_PATTERNS=[r'^/some/excluded.*']) - def test_anchoring_of_patterns_at_beginning(self, mock_server_track): + def test_anchoring_of_patterns_at_beginning(self): request = self.request_factory.get('/excluded') self.track_middleware.process_request(request) - self.assertTrue(mock_server_track.called) - mock_server_track.reset_mock() + self.assertTrue(self.mock_server_track.called) + self.mock_server_track.reset_mock() request = self.request_factory.get('/some/excluded/url') self.track_middleware.process_request(request) - self.assertFalse(mock_server_track.called) + self.assertFalse(self.mock_server_track.called) + + def test_request_in_course_context(self): + request = self.request_factory.get('/courses/test_org/test_course/test_run/foo') + self.track_middleware.process_request(request) + self.assertEquals( + tracker.get_tracker().resolve_context(), + { + 'course_id': 'test_org/test_course/test_run', + 'org_id': 'test_org' + } + ) + self.track_middleware.process_response(request, None) + self.assertEquals( + tracker.get_tracker().resolve_context(), + {} + ) diff --git a/common/djangoapps/track/tests/test_views.py b/common/djangoapps/track/tests/test_views.py new file mode 100644 index 0000000000..c05d4f2f4d --- /dev/null +++ b/common/djangoapps/track/tests/test_views.py @@ -0,0 +1,112 @@ +# pylint: disable=missing-docstring,maybe-no-member + +from datetime import datetime + +from mock import patch +from mock import sentinel +from pytz import UTC + +from django.test import TestCase +from django.test.client import RequestFactory + +from track import views + + +class TestTrackViews(TestCase): + + def setUp(self): + self.request_factory = RequestFactory() + + patcher = patch('track.views.tracker') + self.mock_tracker = patcher.start() + self.addCleanup(patcher.stop) + + self._expected_timestamp = datetime.now(UTC) + self._datetime_patcher = patch('track.views.datetime') + self.addCleanup(self._datetime_patcher.stop) + mock_datetime_mod = self._datetime_patcher.start() + mock_datetime_mod.datetime.now.return_value = self._expected_timestamp # pylint: disable=maybe-no-member + + self.path_with_course = '/courses/foo/bar/baz/xmod/' + self.url_with_course = 'http://www.edx.org' + self.path_with_course + + self.event = { + sentinel.key: sentinel.value + } + + def test_user_track(self): + request = self.request_factory.get('/event', { + 'page': self.url_with_course, + 'event_type': sentinel.event_type, + 'event': {} + }) + views.user_track(request) + + expected_event = { + 'username': 'anonymous', + 'session': '', + 'ip': '127.0.0.1', + 'event_source': 'browser', + 'event_type': str(sentinel.event_type), + 'event': '{}', + 'agent': '', + 'page': self.url_with_course, + 'time': self._expected_timestamp, + 'host': 'testserver', + 'context': { + 'course_id': 'foo/bar/baz', + 'org_id': 'foo', + }, + } + self.mock_tracker.send.assert_called_once_with(expected_event) + + def test_server_track(self): + request = self.request_factory.get(self.path_with_course) + views.server_track(request, str(sentinel.event_type), '{}') + + expected_event = { + 'username': 'anonymous', + 'ip': '127.0.0.1', + 'event_source': 'server', + 'event_type': str(sentinel.event_type), + 'event': '{}', + 'agent': '', + 'page': None, + 'time': self._expected_timestamp, + 'host': 'testserver', + 'context': {}, + } + self.mock_tracker.send.assert_called_once_with(expected_event) + + def test_task_track(self): + request_info = { + 'username': 'anonymous', + 'ip': '127.0.0.1', + 'agent': 'agent', + 'host': 'testserver', + } + + task_info = { + sentinel.task_key: sentinel.task_value + } + expected_event_data = dict(task_info) + expected_event_data.update(self.event) + + views.task_track(request_info, task_info, str(sentinel.event_type), self.event) + + expected_event = { + 'username': 'anonymous', + 'ip': '127.0.0.1', + 'event_source': 'task', + 'event_type': str(sentinel.event_type), + 'event': expected_event_data, + 'agent': 'agent', + 'page': None, + 'time': self._expected_timestamp, + 'host': 'testserver', + 'context': { + 'course_id': '', + 'org_id': '' + }, + } + self.mock_tracker.send.assert_called_once_with(expected_event) diff --git a/common/djangoapps/track/views.py b/common/djangoapps/track/views.py index 9fe04c7602..e493babe5d 100644 --- a/common/djangoapps/track/views.py +++ b/common/djangoapps/track/views.py @@ -12,7 +12,9 @@ from django_future.csrf import ensure_csrf_cookie from mitxmako.shortcuts import render_to_response from track import tracker +from track import contexts from track.models import TrackingLog +from eventtracking import tracker as eventtracker def log_event(event): @@ -43,18 +45,22 @@ def user_track(request): except: agent = '' - event = { - "username": username, - "session": scookie, - "ip": request.META['REMOTE_ADDR'], - "event_source": "browser", - "event_type": request.REQUEST['event_type'], - "event": request.REQUEST['event'], - "agent": agent, - "page": request.REQUEST['page'], - "time": datetime.datetime.now(UTC), - "host": request.META['SERVER_NAME'], - } + page = request.REQUEST['page'] + + with eventtracker.get_tracker().context('edx.course.browser', contexts.course_context_from_url(page)): + event = { + "username": username, + "session": scookie, + "ip": request.META['REMOTE_ADDR'], + "event_source": "browser", + "event_type": request.REQUEST['event_type'], + "event": request.REQUEST['event'], + "agent": agent, + "page": page, + "time": datetime.datetime.now(UTC), + "host": request.META['SERVER_NAME'], + "context": eventtracker.get_tracker().resolve_context(), + } log_event(event) @@ -83,6 +89,7 @@ def server_track(request, event_type, event, page=None): "page": page, "time": datetime.datetime.now(UTC), "host": request.META['SERVER_NAME'], + "context": eventtracker.get_tracker().resolve_context(), } if event_type.startswith("/event_logs") and request.user.is_staff: @@ -118,17 +125,19 @@ def task_track(request_info, task_info, event_type, event, page=None): # All fields must be specified, in case the tracking information is # also saved to the TrackingLog model. Get values from the task-level # information, or just add placeholder values. - event = { - "username": request_info.get('username', 'unknown'), - "ip": request_info.get('ip', 'unknown'), - "event_source": "task", - "event_type": event_type, - "event": full_event, - "agent": request_info.get('agent', 'unknown'), - "page": page, - "time": datetime.datetime.now(UTC), - "host": request_info.get('host', 'unknown') - } + with eventtracker.get_tracker().context('edx.course.task', contexts.course_context_from_url(page)): + event = { + "username": request_info.get('username', 'unknown'), + "ip": request_info.get('ip', 'unknown'), + "event_source": "task", + "event_type": event_type, + "event": full_event, + "agent": request_info.get('agent', 'unknown'), + "page": page, + "time": datetime.datetime.now(UTC), + "host": request_info.get('host', 'unknown'), + "context": eventtracker.get_tracker().resolve_context(), + } log_event(event) diff --git a/lms/djangoapps/courseware/features/events.py b/lms/djangoapps/courseware/features/events.py index 6fa1d5d190..14ebc90207 100644 --- a/lms/djangoapps/courseware/features/events.py +++ b/lms/djangoapps/courseware/features/events.py @@ -47,16 +47,9 @@ def event_is_emitted(_step, event_type, event_source): event = cursor.next() - # These fields should be present in the event, but we won't bother - # validating them since it is difficult to predict their values. - for expected_field in ['host', 'time', 'agent', 'ip', 'event_source', 'event', 'page']: - assert_in(expected_field, event, msg='Expected field {} not found in event'.format(expected_field)) - expected_field_values = { "username": world.scenario_dict['USER'].username, "event_type": event_type, } for key, value in expected_field_values.iteritems(): assert_equals(event[key], value) - - # Note that the event may contain other fields, which is fine! diff --git a/lms/envs/common.py b/lms/envs/common.py index db66b821ca..646d789f67 100644 --- a/lms/envs/common.py +++ b/lms/envs/common.py @@ -357,6 +357,7 @@ if MITX_FEATURES.get('ENABLE_SQL_TRACKING_LOGS'): # We're already logging events, and we don't want to capture user # names/passwords. Heartbeat events are likely not interesting. TRACKING_IGNORE_URL_PATTERNS = [r'^/event', r'^/login', r'^/heartbeat'] +TRACKING_ENABLED = True ######################## subdomain specific settings ########################### COURSE_LISTINGS = {} @@ -882,6 +883,7 @@ INSTALLED_APPS = ( 'static_template_view', 'staticbook', 'track', + 'eventtracking.django', 'util', 'certificates', 'instructor', diff --git a/requirements/edx/github.txt b/requirements/edx/github.txt index 01fb717ae5..2d86689eea 100644 --- a/requirements/edx/github.txt +++ b/requirements/edx/github.txt @@ -20,3 +20,4 @@ -e git+https://github.com/edx/diff-cover.git@v0.2.6#egg=diff_cover -e git+https://github.com/edx/js-test-tool.git@v0.1.3#egg=js_test_tool -e git+https://github.com/edx/django-waffle.git@823a102e48#egg=django-waffle +-e git+https://github.com/edx/event-tracking.git@f0211d702d#egg=event-tracking