From 0d6e0ac779d53de3d298fa33e68967fc4c208ed3 Mon Sep 17 00:00:00 2001 From: Syed Hassan Raza Date: Thu, 25 Feb 2016 22:30:45 +0500 Subject: [PATCH] Add latin1 decoding to HTTP Headers --- common/djangoapps/track/middleware.py | 9 ++++----- .../djangoapps/track/tests/test_middleware.py | 19 +++++++++++++++---- 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/common/djangoapps/track/middleware.py b/common/djangoapps/track/middleware.py index dadd3d771e..d08f8df2c8 100644 --- a/common/djangoapps/track/middleware.py +++ b/common/djangoapps/track/middleware.py @@ -140,12 +140,11 @@ class TrackMiddleware(object): 'ip': self.get_request_ip_address(request), } for header_name, context_key in META_KEY_TO_CONTEXT_KEY.iteritems(): - context[context_key] = request.META.get(header_name, '') + # HTTP headers may contain Latin1 characters. Decoding using Latin1 encoding here + # avoids encountering UnicodeDecodeError exceptions when these header strings are + # output to tracking logs. + context[context_key] = request.META.get(header_name, '').decode('latin1') - # HTTP_USER_AGENT user might can contain the information that include latin1 characters - # decoding this using latin1 scheme will prevent to raise UnicodeDecodeError when using - # json.dumps for tracking purpose. - context['agent'] = context['agent'].decode('latin1') # Google Analytics uses the clientId to keep track of unique visitors. A GA cookie looks like # this: _ga=GA1.2.1033501218.1368477899. The clientId is this part: 1033501218.1368477899. google_analytics_cookie = request.COOKIES.get('_ga') diff --git a/common/djangoapps/track/tests/test_middleware.py b/common/djangoapps/track/tests/test_middleware.py index 4c83824966..1609b96357 100644 --- a/common/djangoapps/track/tests/test_middleware.py +++ b/common/djangoapps/track/tests/test_middleware.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- """Tests for tracking middleware.""" +import ddt from mock import patch from mock import sentinel @@ -13,7 +14,9 @@ from eventtracking import tracker from track.middleware import TrackMiddleware +@ddt.ddt class TrackMiddlewareTestCase(TestCase): + """ Class for checking tracking requests """ def setUp(self): super(TrackMiddlewareTestCase, self).setUp() @@ -29,17 +32,25 @@ class TrackMiddlewareTestCase(TestCase): self.track_middleware.process_request(request) self.assertTrue(self.mock_server_track.called) - def test_request_with_latin1_characters(self): + @ddt.unpack + @ddt.data( + ('HTTP_USER_AGENT', 'agent'), + ('PATH_INFO', 'path'), + ('HTTP_REFERER', 'referer'), + ('HTTP_ACCEPT_LANGUAGE', 'accept_language'), + ) + def test_request_with_latin1_characters(self, meta_key, context_key): """ - When HTTP_USER_AGENT in request.META contains latin1 characters. + When HTTP headers contains latin1 characters. """ request = self.request_factory.get('/somewhere') - request.META['HTTP_USER_AGENT'] = 'test latin1 \xd3 \xe9 \xf1' # pylint: disable=no-member + # pylint: disable=no-member + request.META[meta_key] = 'test latin1 \xd3 \xe9 \xf1' # pylint: disable=no-member context = self.get_context_for_request(request) # The bytes in the string on the right are utf8 encoded in the source file, so we decode them to construct # a valid unicode string. - self.assertEqual(context['agent'], 'test latin1 Ó é ñ'.decode('utf8')) + self.assertEqual(context[context_key], 'test latin1 Ó é ñ'.decode('utf8')) def test_default_filters_do_not_render_view(self): for url in ['/event', '/event/1', '/login', '/heartbeat']: