From e0e03dec5fe2059ce8dc5a95de1da88fabed433f Mon Sep 17 00:00:00 2001 From: Tim McCormack Date: Wed, 27 Jan 2021 14:13:42 +0000 Subject: [PATCH] Use more appropriate hash when making tracking ID; document SECRET_KEY use (#26134) - Make it easier to rotate `SECRET_KEY` by documenting this usage for both sensitivity and rotation process. (ARCHBOM-1676). - Just use a hash of the secret rather than HMAC + MD5. We're not authenticating a message, so HMAC isn't really needed -- it just needs to be unique, deterministic, and irreversible. SHAKE allows generation of an arbitrary length hash without needing to truncate. Also, rename tracking session ID generator for clarity -- there's no encryption happening here. Add additional test for existing claim of uniqueness. --- common/djangoapps/track/middleware.py | 63 +++++++++++++------ .../djangoapps/track/tests/test_middleware.py | 16 +++-- 2 files changed, 54 insertions(+), 25 deletions(-) diff --git a/common/djangoapps/track/middleware.py b/common/djangoapps/track/middleware.py index 4bdc1627b1..925d2349be 100644 --- a/common/djangoapps/track/middleware.py +++ b/common/djangoapps/track/middleware.py @@ -7,7 +7,6 @@ framework. import hashlib -import hmac import json import logging import re @@ -164,31 +163,55 @@ class TrackMiddleware(MiddlewareMixin): ) def get_session_key(self, request): - """ Gets and encrypts the Django session key from the request or an empty string if it isn't found.""" + """ + Gets a key suitable for representing this Django session for tracking purposes. + + Returns an empty string if there is no active session. + """ try: - return self.encrypt_session_key(request.session.session_key) + return self.substitute_session_key(request.session.session_key) except AttributeError: + # NB: This can hide a missing SECRET_KEY return '' - def encrypt_session_key(self, session_key): - """Encrypts a Django session key to another 32-character hex value.""" + def substitute_session_key(self, session_key): + """ + Deterministically generate a tracking session key from the real one. + + If a session key is not provided, returns empty string. + + The tracking session ID is a 32-character hexadecimal string (matching + Django session key format for convenience, and in case something + downstream makes assumptions.) The tracking ID does not allow recovery + of the original session key but will always be the same unless server + secrets are changed, and will be unique for each session key. + """ if not session_key: return '' - - # Follow the model of django.utils.crypto.salted_hmac() and - # django.contrib.sessions.backends.base._hash() but use MD5 - # instead of SHA1 so that the result has the same length (32) - # as the original session_key. - - # TODO: Switch to SHA224, which is secure. - # If necessary, drop the last little bit of the hash to make it the same length. - # Using a known-insecure hash to shorten is silly. - # Also, why do we need same length? - key_salt = "common.djangoapps.track" + self.__class__.__name__ - key_bytes = (key_salt + settings.SECRET_KEY).encode('utf-8') - key = hashlib.md5(key_bytes).digest() - encrypted_session_key = hmac.new(key, msg=session_key.encode('utf-8'), digestmod=hashlib.md5).hexdigest() - return encrypted_session_key + # Prevent confirmation attacks by using SECRET_KEY as a pepper (see + # ADR: docs/decisions/0008-secret-key-usage.rst). + # Tracking ID and session key will only be linkable + # by someone in possession of the pepper. + # + # This assumes that session_key is high-entropy and unpredictable (which + # it should be anyway.) + # + # Use SHAKE256 from SHA-3 hash family to generate a hash of arbitrary + # length. + hasher = hashlib.shake_128() + # This is one of several uses of SECRET_KEY. + # + # Impact of exposure: Could result in identifying the tracking data for + # users if their actual session keys are already known. + # + # Rotation process: Can be rotated at will. Results in a one-time + # discontinuity in tracking metrics and should be accompanied by a + # heads-up to data researchers. + hasher.update(settings.SECRET_KEY.encode()) + hasher.update(session_key.encode()) + # pylint doesn't know that SHAKE's hexdigest takes an arg: + # https://github.com/PyCQA/pylint/issues/4039 + return hasher.hexdigest(16) # pylint: disable=too-many-function-args def get_user_primary_key(self, request): """Gets the primary key of the logged in Django user""" diff --git a/common/djangoapps/track/tests/test_middleware.py b/common/djangoapps/track/tests/test_middleware.py index 62f910b9cd..10d3aea4c9 100644 --- a/common/djangoapps/track/tests/test_middleware.py +++ b/common/djangoapps/track/tests/test_middleware.py @@ -173,7 +173,7 @@ class TrackMiddlewareTestCase(TestCase): SessionMiddleware().process_request(request) request.session.save() session_key = request.session.session_key - expected_session_key = self.track_middleware.encrypt_session_key(session_key) + expected_session_key = self.track_middleware.substitute_session_key(session_key) self.assertEqual(len(session_key), len(expected_session_key)) context = self.get_context_for_request(request) self.assert_dict_subset(context, { @@ -181,11 +181,17 @@ class TrackMiddlewareTestCase(TestCase): }) @override_settings(SECRET_KEY='85920908f28904ed733fe576320db18cabd7b6cd') - def test_session_key_encryption(self): + def test_session_key_substitution(self): session_key = '665924b49a93e22b46ee9365abf28c2a' - expected_session_key = '3b81f559d14130180065d635a4f35dd2' - encrypted_session_key = self.track_middleware.encrypt_session_key(session_key) - self.assertEqual(encrypted_session_key, expected_session_key) + # Output value pinned to alert on unintended changes to generator + expected_session_key = 'b4103566fc80d20da1970cbb4380bccd' + substitute_session_key = self.track_middleware.substitute_session_key(session_key) + self.assertEqual(substitute_session_key, expected_session_key) + + # Confirm that we get *different* outputs for different inputs + expected_session_key_2 = "6f0c784c1087c6bc4624b7eac982fedf" + substitute_session_key_2 = self.track_middleware.substitute_session_key(session_key + "different") + self.assertEqual(expected_session_key_2, substitute_session_key_2) def test_request_headers(self): ip_address = '10.0.0.0'