diff --git a/common/djangoapps/track/middleware.py b/common/djangoapps/track/middleware.py index 4a6507b0ac..ea5cdd51f1 100644 --- a/common/djangoapps/track/middleware.py +++ b/common/djangoapps/track/middleware.py @@ -1,8 +1,17 @@ -import hmac +""" +This is a middleware layer which keeps a log of all requests made +to the server. It is responsible for removing security tokens and +similar from such events, and relaying them to the event tracking +framework. +""" + + import hashlib +import hmac import json -import re import logging +import re +import sys from django.conf import settings @@ -18,7 +27,11 @@ META_KEY_TO_CONTEXT_KEY = { 'REMOTE_ADDR': 'ip', 'SERVER_NAME': 'host', 'HTTP_USER_AGENT': 'agent', - 'PATH_INFO': 'path' + 'PATH_INFO': 'path', + # Not a typo. See: + # http://en.wikipedia.org/wiki/HTTP_referer#Origin_of_the_term_referer + 'HTTP_REFERER': 'referer', + 'HTTP_ACCEPT_LANGUAGE': 'accept_language', } @@ -70,7 +83,27 @@ class TrackMiddleware(object): views.server_track(request, request.META['PATH_INFO'], event) except: - pass + ## Why do we have the overly broad except? + ## + ## I added instrumentation so if we drop events on the + ## floor, we at least know about it. However, we really + ## should just return a 500 here: (1) This will translate + ## to much more insidious user-facing bugs if we make any + ## decisions based on incorrect data. (2) If the system + ## is down, we should fail and fix it. + event = {'event-type': 'exception', 'exception': repr(sys.exc_info()[0])} + try: + views.server_track(request, request.META['PATH_INFO'], event) + except: + # At this point, things are really broken. We really + # should fail return a 500 to the user here. However, + # the interim decision is to just fail in order to be + # consistent with current policy, and expedite the PR. + # This version of the code makes no compromises + # relative to the code before, while a proper failure + # here would involve shifting compromises and + # discussion. + pass def should_process_request(self, request): """Don't track requests to the specified URL patterns""" @@ -139,6 +172,11 @@ class TrackMiddleware(object): # django.contrib.sessions.backends.base._hash() but use MD5 # instead of SHA1 so that the result has the same length (32) # as the original session_key. + + # TODO: Switch to SHA224, which is secure. + # If necessary, drop the last little bit of the hash to make it the same length. + # Using a known-insecure hash to shorten is silly. + # Also, why do we need same length? key_salt = "common.djangoapps.track" + self.__class__.__name__ key = hashlib.md5(key_salt + settings.SECRET_KEY).digest() encrypted_session_key = hmac.new(key, msg=session_key, digestmod=hashlib.md5).hexdigest() diff --git a/common/djangoapps/track/shim.py b/common/djangoapps/track/shim.py index 8d2984cc3b..7960b75330 100644 --- a/common/djangoapps/track/shim.py +++ b/common/djangoapps/track/shim.py @@ -14,7 +14,9 @@ CONTEXT_FIELDS_TO_INCLUDE = [ 'session', 'ip', 'agent', - 'host' + 'host', + 'referer', + 'accept_language' ] diff --git a/common/djangoapps/track/tests/__init__.py b/common/djangoapps/track/tests/__init__.py index 3efbb4343c..f394d143a2 100644 --- a/common/djangoapps/track/tests/__init__.py +++ b/common/djangoapps/track/tests/__init__.py @@ -31,6 +31,22 @@ class InMemoryBackend(object): self.events.append(event) +def unicode_flatten(tree): + """ + Test cases have funny issues where some strings are unicode, and + some are not. This does not cause test failures, but causes test + output diffs to show many more difference than actually occur in the + data. This will convert everything to a common form. + """ + if isinstance(tree, basestring): + return unicode(tree) + elif isinstance(tree, list): + return map(unicode_flatten, list) + elif isinstance(tree, dict): + return dict([(unicode_flatten(key), unicode_flatten(value)) for key, value in tree.iteritems()]) + return tree + + @freeze_time(FROZEN_TIME) @override_settings( EVENT_TRACKING_BACKENDS=IN_MEMORY_BACKEND_CONFIG @@ -67,3 +83,7 @@ class EventTrackingTestCase(TestCase): def assert_events_emitted(self): """Ensure at least one event has been emitted at this point in the test.""" self.assertGreaterEqual(len(self.backend.events), 1) + + def assertEqualUnicode(self, tree_a, tree_b): + """Like assertEqual, but give nicer errors for unicode vs. non-unicode""" + self.assertEqual(unicode_flatten(tree_a), unicode_flatten(tree_b)) diff --git a/common/djangoapps/track/tests/test_middleware.py b/common/djangoapps/track/tests/test_middleware.py index 40513cf8f7..f641f8c087 100644 --- a/common/djangoapps/track/tests/test_middleware.py +++ b/common/djangoapps/track/tests/test_middleware.py @@ -53,6 +53,8 @@ class TrackMiddlewareTestCase(TestCase): def test_default_request_context(self): context = self.get_context_for_path('/courses/') self.assertEquals(context, { + 'accept_language': '', + 'referer': '', 'user_id': '', 'session': '', 'username': '', diff --git a/common/djangoapps/track/tests/test_shim.py b/common/djangoapps/track/tests/test_shim.py index b30731fc72..d28777f6bb 100644 --- a/common/djangoapps/track/tests/test_shim.py +++ b/common/djangoapps/track/tests/test_shim.py @@ -23,6 +23,8 @@ class LegacyFieldMappingProcessorTestCase(EventTrackingTestCase): data = {sentinel.key: sentinel.value} context = { + 'accept_language': sentinel.accept_language, + 'referer': sentinel.referer, 'username': sentinel.username, 'session': sentinel.session, 'ip': sentinel.ip, @@ -40,6 +42,8 @@ class LegacyFieldMappingProcessorTestCase(EventTrackingTestCase): emitted_event = self.get_event() expected_event = { + 'accept_language': sentinel.accept_language, + 'referer': sentinel.referer, 'event_type': sentinel.name, 'name': sentinel.name, 'context': { @@ -58,7 +62,7 @@ class LegacyFieldMappingProcessorTestCase(EventTrackingTestCase): 'page': None, 'session': sentinel.session, } - self.assertEqual(expected_event, emitted_event) + self.assertEqualUnicode(expected_event, emitted_event) @override_settings( EVENT_TRACKING_PROCESSORS=LEGACY_SHIM_PROCESSOR, @@ -69,6 +73,8 @@ class LegacyFieldMappingProcessorTestCase(EventTrackingTestCase): emitted_event = self.get_event() expected_event = { + 'accept_language': '', + 'referer': '', 'event_type': sentinel.name, 'name': sentinel.name, 'context': {}, @@ -82,4 +88,4 @@ class LegacyFieldMappingProcessorTestCase(EventTrackingTestCase): 'page': None, 'session': '', } - self.assertEqual(expected_event, emitted_event) + self.assertEqualUnicode(expected_event, emitted_event) diff --git a/common/djangoapps/track/views/__init__.py b/common/djangoapps/track/views/__init__.py index 39a9014d7c..a7aaa5045f 100644 --- a/common/djangoapps/track/views/__init__.py +++ b/common/djangoapps/track/views/__init__.py @@ -58,6 +58,8 @@ def user_track(request): "username": username, "session": context.get('session', ''), "ip": _get_request_header(request, 'REMOTE_ADDR'), + "referer": _get_request_header(request, 'HTTP_REFERER'), + "accept_language": _get_request_header(request, 'HTTP_ACCEPT_LANGUAGE'), "event_source": "browser", "event_type": _get_request_value(request, 'event_type'), "event": _get_request_value(request, 'event'), @@ -95,6 +97,8 @@ def server_track(request, event_type, event, page=None): event = { "username": username, "ip": _get_request_header(request, 'REMOTE_ADDR'), + "referer": _get_request_header(request, 'HTTP_REFERER'), + "accept_language": _get_request_header(request, 'HTTP_ACCEPT_LANGUAGE'), "event_source": "server", "event_type": event_type, "event": event, diff --git a/common/djangoapps/track/views/tests/test_segmentio.py b/common/djangoapps/track/views/tests/test_segmentio.py index 78ffc0eb18..83beee46c2 100644 --- a/common/djangoapps/track/views/tests/test_segmentio.py +++ b/common/djangoapps/track/views/tests/test_segmentio.py @@ -53,6 +53,7 @@ class SegmentIOTrackingTestCase(EventTrackingTestCase): def setUp(self): super(SegmentIOTrackingTestCase, self).setUp() + self.maxDiff = None # pylint: disable=invalid-name self.request_factory = RequestFactory() def test_get_request(self): @@ -189,6 +190,8 @@ class SegmentIOTrackingTestCase(EventTrackingTestCase): self.assertEquals(response.status_code, 200) expected_event = { + 'accept_language': '', + 'referer': '', 'username': str(sentinel.username), 'ip': '', 'session': '', @@ -207,7 +210,7 @@ class SegmentIOTrackingTestCase(EventTrackingTestCase): }, 'user_id': USER_ID, 'course_id': course_id, - 'org_id': 'foo', + 'org_id': u'foo', 'path': ENDPOINT, 'client': { 'library': { @@ -224,7 +227,7 @@ class SegmentIOTrackingTestCase(EventTrackingTestCase): finally: middleware.process_response(request, None) - self.assertEquals(self.get_event(), expected_event) + self.assertEqualUnicode(self.get_event(), expected_event) def test_invalid_course_id(self): request = self.create_request( @@ -352,6 +355,8 @@ class SegmentIOTrackingTestCase(EventTrackingTestCase): self.assertEquals(response.status_code, 200) expected_event_without_payload = { + 'accept_language': '', + 'referer': '', 'username': str(sentinel.username), 'ip': '', 'session': '', @@ -397,5 +402,5 @@ class SegmentIOTrackingTestCase(EventTrackingTestCase): actual_event = dict(self.get_event()) payload = json.loads(actual_event.pop('event')) - self.assertEquals(actual_event, expected_event_without_payload) - self.assertEquals(payload, expected_payload) + self.assertEqualUnicode(actual_event, expected_event_without_payload) + self.assertEqualUnicode(payload, expected_payload) diff --git a/common/djangoapps/track/views/tests/test_views.py b/common/djangoapps/track/views/tests/test_views.py index 1eb756cd49..d8d5a81007 100644 --- a/common/djangoapps/track/views/tests/test_views.py +++ b/common/djangoapps/track/views/tests/test_views.py @@ -41,6 +41,8 @@ class TestTrackViews(TestCase): views.user_track(request) expected_event = { + 'accept_language': '', + 'referer': '', 'username': 'anonymous', 'session': sentinel.session, 'ip': '127.0.0.1', @@ -65,6 +67,8 @@ class TestTrackViews(TestCase): views.user_track(request) expected_event = { + 'accept_language': '', + 'referer': '', 'username': 'anonymous', 'session': sentinel.session, 'ip': '127.0.0.1', @@ -95,6 +99,8 @@ class TestTrackViews(TestCase): views.user_track(request) expected_event = { + 'accept_language': '', + 'referer': '', 'username': 'anonymous', 'session': '', 'ip': '127.0.0.1', @@ -123,6 +129,8 @@ class TestTrackViews(TestCase): views.server_track(request, str(sentinel.event_type), '{}') expected_event = { + 'accept_language': '', + 'referer': '', 'username': 'anonymous', 'ip': '127.0.0.1', 'event_source': 'server', @@ -147,6 +155,8 @@ class TestTrackViews(TestCase): views.server_track(request, str(sentinel.event_type), '{}') expected_event = { + 'accept_language': '', + 'referer': '', 'username': 'anonymous', 'ip': '127.0.0.1', 'event_source': 'server', @@ -180,6 +190,8 @@ class TestTrackViews(TestCase): views.server_track(request, str(sentinel.event_type), '{}') expected_event = { + 'accept_language': '', + 'referer': '', 'username': 'anonymous', 'ip': '127.0.0.1', 'event_source': 'server', @@ -207,6 +219,8 @@ class TestTrackViews(TestCase): views.server_track(request, str(sentinel.event_type), '{}') expected_event = { + 'accept_language': '', + 'referer': '', 'username': 'anonymous', 'ip': '', 'event_source': 'server', @@ -223,6 +237,8 @@ class TestTrackViews(TestCase): @freeze_time(expected_time) def test_task_track(self): request_info = { + 'accept_language': '', + 'referer': '', 'username': 'anonymous', 'ip': '127.0.0.1', 'agent': 'agent',