Add referer and accept_language to tracking logs

3 line patch, together with a world of pain due to buggy code, broken abstractions in tests, etc.
This commit is contained in:
Piotr Mitros
2015-02-02 14:55:47 -05:00
parent d644da9ecd
commit cf719a40ba
8 changed files with 104 additions and 11 deletions

View File

@@ -1,8 +1,17 @@
import hmac
"""
This is a middleware layer which keeps a log of all requests made
to the server. It is responsible for removing security tokens and
similar from such events, and relaying them to the event tracking
framework.
"""
import hashlib
import hmac
import json
import re
import logging
import re
import sys
from django.conf import settings
@@ -18,7 +27,11 @@ META_KEY_TO_CONTEXT_KEY = {
'REMOTE_ADDR': 'ip',
'SERVER_NAME': 'host',
'HTTP_USER_AGENT': 'agent',
'PATH_INFO': 'path'
'PATH_INFO': 'path',
# Not a typo. See:
# http://en.wikipedia.org/wiki/HTTP_referer#Origin_of_the_term_referer
'HTTP_REFERER': 'referer',
'HTTP_ACCEPT_LANGUAGE': 'accept_language',
}
@@ -70,7 +83,27 @@ class TrackMiddleware(object):
views.server_track(request, request.META['PATH_INFO'], event)
except:
pass
## Why do we have the overly broad except?
##
## I added instrumentation so if we drop events on the
## floor, we at least know about it. However, we really
## should just return a 500 here: (1) This will translate
## to much more insidious user-facing bugs if we make any
## decisions based on incorrect data. (2) If the system
## is down, we should fail and fix it.
event = {'event-type': 'exception', 'exception': repr(sys.exc_info()[0])}
try:
views.server_track(request, request.META['PATH_INFO'], event)
except:
# At this point, things are really broken. We really
# should fail return a 500 to the user here. However,
# the interim decision is to just fail in order to be
# consistent with current policy, and expedite the PR.
# This version of the code makes no compromises
# relative to the code before, while a proper failure
# here would involve shifting compromises and
# discussion.
pass
def should_process_request(self, request):
"""Don't track requests to the specified URL patterns"""
@@ -139,6 +172,11 @@ class TrackMiddleware(object):
# django.contrib.sessions.backends.base._hash() but use MD5
# instead of SHA1 so that the result has the same length (32)
# as the original session_key.
# TODO: Switch to SHA224, which is secure.
# If necessary, drop the last little bit of the hash to make it the same length.
# Using a known-insecure hash to shorten is silly.
# Also, why do we need same length?
key_salt = "common.djangoapps.track" + self.__class__.__name__
key = hashlib.md5(key_salt + settings.SECRET_KEY).digest()
encrypted_session_key = hmac.new(key, msg=session_key, digestmod=hashlib.md5).hexdigest()

View File

@@ -14,7 +14,9 @@ CONTEXT_FIELDS_TO_INCLUDE = [
'session',
'ip',
'agent',
'host'
'host',
'referer',
'accept_language'
]

View File

@@ -31,6 +31,22 @@ class InMemoryBackend(object):
self.events.append(event)
def unicode_flatten(tree):
"""
Test cases have funny issues where some strings are unicode, and
some are not. This does not cause test failures, but causes test
output diffs to show many more difference than actually occur in the
data. This will convert everything to a common form.
"""
if isinstance(tree, basestring):
return unicode(tree)
elif isinstance(tree, list):
return map(unicode_flatten, list)
elif isinstance(tree, dict):
return dict([(unicode_flatten(key), unicode_flatten(value)) for key, value in tree.iteritems()])
return tree
@freeze_time(FROZEN_TIME)
@override_settings(
EVENT_TRACKING_BACKENDS=IN_MEMORY_BACKEND_CONFIG
@@ -67,3 +83,7 @@ class EventTrackingTestCase(TestCase):
def assert_events_emitted(self):
"""Ensure at least one event has been emitted at this point in the test."""
self.assertGreaterEqual(len(self.backend.events), 1)
def assertEqualUnicode(self, tree_a, tree_b):
"""Like assertEqual, but give nicer errors for unicode vs. non-unicode"""
self.assertEqual(unicode_flatten(tree_a), unicode_flatten(tree_b))

View File

@@ -53,6 +53,8 @@ class TrackMiddlewareTestCase(TestCase):
def test_default_request_context(self):
context = self.get_context_for_path('/courses/')
self.assertEquals(context, {
'accept_language': '',
'referer': '',
'user_id': '',
'session': '',
'username': '',

View File

@@ -23,6 +23,8 @@ class LegacyFieldMappingProcessorTestCase(EventTrackingTestCase):
data = {sentinel.key: sentinel.value}
context = {
'accept_language': sentinel.accept_language,
'referer': sentinel.referer,
'username': sentinel.username,
'session': sentinel.session,
'ip': sentinel.ip,
@@ -40,6 +42,8 @@ class LegacyFieldMappingProcessorTestCase(EventTrackingTestCase):
emitted_event = self.get_event()
expected_event = {
'accept_language': sentinel.accept_language,
'referer': sentinel.referer,
'event_type': sentinel.name,
'name': sentinel.name,
'context': {
@@ -58,7 +62,7 @@ class LegacyFieldMappingProcessorTestCase(EventTrackingTestCase):
'page': None,
'session': sentinel.session,
}
self.assertEqual(expected_event, emitted_event)
self.assertEqualUnicode(expected_event, emitted_event)
@override_settings(
EVENT_TRACKING_PROCESSORS=LEGACY_SHIM_PROCESSOR,
@@ -69,6 +73,8 @@ class LegacyFieldMappingProcessorTestCase(EventTrackingTestCase):
emitted_event = self.get_event()
expected_event = {
'accept_language': '',
'referer': '',
'event_type': sentinel.name,
'name': sentinel.name,
'context': {},
@@ -82,4 +88,4 @@ class LegacyFieldMappingProcessorTestCase(EventTrackingTestCase):
'page': None,
'session': '',
}
self.assertEqual(expected_event, emitted_event)
self.assertEqualUnicode(expected_event, emitted_event)

View File

@@ -58,6 +58,8 @@ def user_track(request):
"username": username,
"session": context.get('session', ''),
"ip": _get_request_header(request, 'REMOTE_ADDR'),
"referer": _get_request_header(request, 'HTTP_REFERER'),
"accept_language": _get_request_header(request, 'HTTP_ACCEPT_LANGUAGE'),
"event_source": "browser",
"event_type": _get_request_value(request, 'event_type'),
"event": _get_request_value(request, 'event'),
@@ -95,6 +97,8 @@ def server_track(request, event_type, event, page=None):
event = {
"username": username,
"ip": _get_request_header(request, 'REMOTE_ADDR'),
"referer": _get_request_header(request, 'HTTP_REFERER'),
"accept_language": _get_request_header(request, 'HTTP_ACCEPT_LANGUAGE'),
"event_source": "server",
"event_type": event_type,
"event": event,

View File

@@ -53,6 +53,7 @@ class SegmentIOTrackingTestCase(EventTrackingTestCase):
def setUp(self):
super(SegmentIOTrackingTestCase, self).setUp()
self.maxDiff = None # pylint: disable=invalid-name
self.request_factory = RequestFactory()
def test_get_request(self):
@@ -189,6 +190,8 @@ class SegmentIOTrackingTestCase(EventTrackingTestCase):
self.assertEquals(response.status_code, 200)
expected_event = {
'accept_language': '',
'referer': '',
'username': str(sentinel.username),
'ip': '',
'session': '',
@@ -207,7 +210,7 @@ class SegmentIOTrackingTestCase(EventTrackingTestCase):
},
'user_id': USER_ID,
'course_id': course_id,
'org_id': 'foo',
'org_id': u'foo',
'path': ENDPOINT,
'client': {
'library': {
@@ -224,7 +227,7 @@ class SegmentIOTrackingTestCase(EventTrackingTestCase):
finally:
middleware.process_response(request, None)
self.assertEquals(self.get_event(), expected_event)
self.assertEqualUnicode(self.get_event(), expected_event)
def test_invalid_course_id(self):
request = self.create_request(
@@ -352,6 +355,8 @@ class SegmentIOTrackingTestCase(EventTrackingTestCase):
self.assertEquals(response.status_code, 200)
expected_event_without_payload = {
'accept_language': '',
'referer': '',
'username': str(sentinel.username),
'ip': '',
'session': '',
@@ -397,5 +402,5 @@ class SegmentIOTrackingTestCase(EventTrackingTestCase):
actual_event = dict(self.get_event())
payload = json.loads(actual_event.pop('event'))
self.assertEquals(actual_event, expected_event_without_payload)
self.assertEquals(payload, expected_payload)
self.assertEqualUnicode(actual_event, expected_event_without_payload)
self.assertEqualUnicode(payload, expected_payload)

View File

@@ -41,6 +41,8 @@ class TestTrackViews(TestCase):
views.user_track(request)
expected_event = {
'accept_language': '',
'referer': '',
'username': 'anonymous',
'session': sentinel.session,
'ip': '127.0.0.1',
@@ -65,6 +67,8 @@ class TestTrackViews(TestCase):
views.user_track(request)
expected_event = {
'accept_language': '',
'referer': '',
'username': 'anonymous',
'session': sentinel.session,
'ip': '127.0.0.1',
@@ -95,6 +99,8 @@ class TestTrackViews(TestCase):
views.user_track(request)
expected_event = {
'accept_language': '',
'referer': '',
'username': 'anonymous',
'session': '',
'ip': '127.0.0.1',
@@ -123,6 +129,8 @@ class TestTrackViews(TestCase):
views.server_track(request, str(sentinel.event_type), '{}')
expected_event = {
'accept_language': '',
'referer': '',
'username': 'anonymous',
'ip': '127.0.0.1',
'event_source': 'server',
@@ -147,6 +155,8 @@ class TestTrackViews(TestCase):
views.server_track(request, str(sentinel.event_type), '{}')
expected_event = {
'accept_language': '',
'referer': '',
'username': 'anonymous',
'ip': '127.0.0.1',
'event_source': 'server',
@@ -180,6 +190,8 @@ class TestTrackViews(TestCase):
views.server_track(request, str(sentinel.event_type), '{}')
expected_event = {
'accept_language': '',
'referer': '',
'username': 'anonymous',
'ip': '127.0.0.1',
'event_source': 'server',
@@ -207,6 +219,8 @@ class TestTrackViews(TestCase):
views.server_track(request, str(sentinel.event_type), '{}')
expected_event = {
'accept_language': '',
'referer': '',
'username': 'anonymous',
'ip': '',
'event_source': 'server',
@@ -223,6 +237,8 @@ class TestTrackViews(TestCase):
@freeze_time(expected_time)
def test_task_track(self):
request_info = {
'accept_language': '',
'referer': '',
'username': 'anonymous',
'ip': '127.0.0.1',
'agent': 'agent',