From fdd27a1f71285e383a209bd447426c651641a8f9 Mon Sep 17 00:00:00 2001 From: Tim McCormack Date: Thu, 4 Nov 2021 19:34:44 +0000 Subject: [PATCH] feat: Log session ID from various sources after safe-session mismatch (#29185) Add logging in case a safe-session user mismatch is related to wrong session being retrieved from cache. This additional logging should reveal any such mismatch (without revealing the actual session ID in logs). Send to metrics as custom attributes as well. Also: - Compute "session_id_changed" based on all three session IDs (and send as custom attribute) - Put all _verify_user logs into one (multiline) log line - Accordingly, change logging assertion to only require a substring, at-least-once match rather than a full-and-only match. ref: ARCHBOM-1939 --- .../djangoapps/safe_sessions/middleware.py | 86 ++++++++++++++++--- .../safe_sessions/tests/test_middleware.py | 40 ++++++++- .../safe_sessions/tests/test_utils.py | 14 +-- 3 files changed, 119 insertions(+), 21 deletions(-) diff --git a/openedx/core/djangoapps/safe_sessions/middleware.py b/openedx/core/djangoapps/safe_sessions/middleware.py index 3708f6af1e..e797f6ea1f 100644 --- a/openedx/core/djangoapps/safe_sessions/middleware.py +++ b/openedx/core/djangoapps/safe_sessions/middleware.py @@ -77,8 +77,9 @@ Custom Attributes: import inspect from base64 import b64encode -from hashlib import sha256 +from hashlib import sha1, sha256 from logging import getLogger +from typing import Union from django.conf import settings from django.contrib.auth import SESSION_KEY @@ -328,6 +329,9 @@ class SafeSessionMiddleware(SessionMiddleware, MiddlewareMixin): else: request.COOKIES[settings.SESSION_COOKIE_NAME] = safe_cookie_data.session_id # Step 2 + # Save off for debugging and logging in _verify_user + request.cookie_session_field = safe_cookie_data.session_id + process_request_response = super().process_request(request) # Step 3 # lint-amnesty, pylint: disable=assignment-from-no-return, super-with-arguments if process_request_response: # The process_request pipeline has been short circuited so @@ -469,24 +473,52 @@ class SafeSessionMiddleware(SessionMiddleware, MiddlewareMixin): if request_user_object_mismatch or session_user_mismatch: # Log accumulated information stored on request for each change of user - if hasattr(request, 'debug_user_changes'): - log.warning('An unsafe user transition was found. It either needs to be fixed or exempted.\n {}' - .format('\n'.join(request.debug_user_changes))) + extra_logs = [] + + response_session_id = getattr(getattr(request, 'session', None), 'session_key', None) + + # A safe-session user mismatch could be caused by the + # wrong session being retrieved from cache. This + # additional logging should reveal any such mismatch + # (without revealing the actual session ID in logs). + sessions_raw = [ + ('parsed_cookie', request.cookie_session_field), + ('at_request', request.safe_cookie_verified_session_id), + ('at_response', response_session_id), + ] + # Note that this is an ordered list of pairs, not a + # dict, so that the output order is consistent. + session_hashes = [(k, obscure_token(v)) for (k, v) in sessions_raw] + session_id_changed = len(set(kv[1] for kv in sessions_raw)) > 1 - session_id_changed = hasattr(request.session, 'session_key') and\ - request.safe_cookie_verified_session_id != request.session.session_key # delete old session id for security del request.safe_cookie_verified_session_id + del request.cookie_session_field - log_suffix = 'Session changed.' if session_id_changed else 'Session did not change.' + extra_logs.append('Session changed.' if session_id_changed else 'Session did not change.') + + # Allow comparing session IDs in both logs and metrics + extra_logs.append( + "Hash of session ID from various sources: " + + '; '.join(f'{k}={v}' for (k, v) in session_hashes) + ) + for source_name, id_hash in session_hashes: + set_custom_attribute(f'safe_sessions.session_id_hash.{source_name}', id_hash) + set_custom_attribute('safe_sessions.session_id_changed', session_id_changed) + + if hasattr(request, 'debug_user_changes'): + extra_logs.append( + 'An unsafe user transition was found. It either needs to be fixed or exempted.\n' + + '\n'.join(request.debug_user_changes) + ) if request_user_object_mismatch and not session_user_mismatch: log.warning( ( "SafeCookieData user at initial request '{}' does not match user at response time: '{}' " - "for request path '{}'. {}" + "for request path '{}'.\n{}" ).format( # pylint: disable=logging-format-interpolation - request.safe_cookie_verified_user_id, request.user.id, request.path, log_suffix + request.safe_cookie_verified_user_id, request.user.id, request.path, '\n'.join(extra_logs) ), ) set_custom_attribute("safe_sessions.user_mismatch", "request-response-mismatch") @@ -494,9 +526,9 @@ class SafeSessionMiddleware(SessionMiddleware, MiddlewareMixin): log.warning( ( "SafeCookieData user at initial request '{}' does not match user in session: '{}' " - "for request path '{}'. {}" + "for request path '{}'.\n{}" ).format( # pylint: disable=logging-format-interpolation - request.safe_cookie_verified_user_id, userid_in_session, request.path, log_suffix + request.safe_cookie_verified_user_id, userid_in_session, request.path, '\n'.join(extra_logs) ), ) set_custom_attribute("safe_sessions.user_mismatch", "request-session-mismatch") @@ -504,10 +536,10 @@ class SafeSessionMiddleware(SessionMiddleware, MiddlewareMixin): log.warning( ( "SafeCookieData user at initial request '{}' matches neither user in session: '{}' " - "nor user at response time: '{}' for request path '{}'. {}" + "nor user at response time: '{}' for request path '{}'.\n{}" ).format( # pylint: disable=logging-format-interpolation request.safe_cookie_verified_user_id, userid_in_session, request.user.id, request.path, - log_suffix + '\n'.join(extra_logs) ), ) set_custom_attribute("safe_sessions.user_mismatch", "request-response-and-session-mismatch") @@ -552,6 +584,34 @@ class SafeSessionMiddleware(SessionMiddleware, MiddlewareMixin): cookies[settings.SESSION_COOKIE_NAME] = str(safe_cookie_data) +def obscure_token(value: Union[str, None]) -> Union[str, None]: + """ + Return a short string that can be used to detect other occurrences + of this string without revealing the original. Return None if value + is None. + + Outputs are intended to be *transient* and should not be stored or + compared long-term, as they are dependent on the value of + settings.SECRET_KEY, which can be rotated at any time. + + WARNING: This code must only be used for *high-entropy inputs* + that an attacker cannot enumerate, predict, or guess for other + parties. In particular, it must not be used for sequential IDs or + timestamps, since an attacker possessing the pepper could + precompute the hashes. A non-cryptographic de-identification + technique must be used in such cases, such as a lookup table. + """ + if value is None: + return None + else: + # Use of hashing (and in particular use of SECRET_KEY as a + # pepper) is overkill for safe-sessions, where at worst we + # might end up logging an occasional session ID prefix... but + # there's very little cost in overdoing it here, especially if + # the code ends up getting copied around. + return sha1((settings.SECRET_KEY + value).encode()).hexdigest()[:8] + + def _mark_cookie_for_deletion(request): """ Updates the given request object to designate that the session diff --git a/openedx/core/djangoapps/safe_sessions/tests/test_middleware.py b/openedx/core/djangoapps/safe_sessions/tests/test_middleware.py index a8cff7eb56..be9e5313dd 100644 --- a/openedx/core/djangoapps/safe_sessions/tests/test_middleware.py +++ b/openedx/core/djangoapps/safe_sessions/tests/test_middleware.py @@ -5,7 +5,6 @@ Unit tests for SafeSessionMiddleware from unittest.mock import patch import ddt - from crum import set_current_request from django.conf import settings from django.contrib.auth import SESSION_KEY @@ -17,7 +16,13 @@ from django.test.utils import override_settings from openedx.core.djangolib.testing.utils import get_mock_request from common.djangoapps.student.tests.factories import UserFactory -from ..middleware import SafeCookieData, SafeSessionMiddleware, track_request_user_changes, mark_user_change_as_expected +from ..middleware import ( + SafeCookieData, + SafeSessionMiddleware, + mark_user_change_as_expected, + obscure_token, + track_request_user_changes +) from .test_utils import TestSafeSessionsLogMixin @@ -412,6 +417,37 @@ class TestSafeSessionMiddleware(TestSafeSessionsLogMixin, TestCase): assert 'safe_sessions.user_mismatch' not in [call.args[0] for call in mock_attr.call_args_list] +class TestTokenObscuring(TestCase): + """ + Test the ability to obscure session IDs. + """ + + def test_obscure_none(self): + """Doesn't break on None input.""" + assert obscure_token(None) is None + + def test_obscure_vary(self): + """ + Verifies that input and SECRET_KEY both change output. + + The "expected" values here are computed and arbitrary; if the + algorithm is updated, the expected values should also be + updated. (Changing the algorithm will invalidate any stored + data, but the docstring explicitly warns not to store these + outputs anyhow.) + """ + with override_settings(SECRET_KEY="FIRST SECRET"): + # Same input twice, same output twice + assert obscure_token('abcdef-123456') == '2d4260b0' + assert obscure_token('abcdef-123456') == '2d4260b0' + # Different input, different output + assert obscure_token('zxcvbnm-000111') == '87978f29' + + # Different key, different output + with override_settings(SECRET_KEY="SECOND SECRET"): + assert obscure_token('abcdef-123456') == '7325d529' + + @ddt.ddt class TestTrackRequestUserChanges(TestCase): """ diff --git a/openedx/core/djangoapps/safe_sessions/tests/test_utils.py b/openedx/core/djangoapps/safe_sessions/tests/test_utils.py index c6c1977a50..976ce58a4b 100644 --- a/openedx/core/djangoapps/safe_sessions/tests/test_utils.py +++ b/openedx/core/djangoapps/safe_sessions/tests/test_utils.py @@ -33,14 +33,16 @@ class TestSafeSessionsLogMixin: yield @contextmanager - def assert_logged_with_message(self, log_string, log_level='error'): + def assert_logged_with_message(self, log_substring, log_level='error'): """ Asserts that the logger with the given log_level was called - with a string. + with a substring. """ with patch('openedx.core.djangoapps.safe_sessions.middleware.log.' + log_level) as mock_log: yield - mock_log.assert_any_call(log_string) + log_messages = [call.args[0] for call in mock_log.call_args_list] + assert any(log_substring in msg for msg in log_messages), \ + f"Expected to find log substring in one of: {log_messages}" @contextmanager def assert_not_logged(self): @@ -134,7 +136,7 @@ class TestSafeSessionsLogMixin: with self.assert_logged_with_message( ( "SafeCookieData user at initial request '{}' does not match user at response time: '{}' " - "for request path '{}'. {}" + "for request path '{}'.\n{}" ).format( user_at_request, user_at_response, request_path, session_suffix ), @@ -153,7 +155,7 @@ class TestSafeSessionsLogMixin: with self.assert_logged_with_message( ( "SafeCookieData user at initial request '{}' does not match user in session: '{}' " - "for request path '{}'. {}" + "for request path '{}'.\n{}" ).format( user_at_request, user_in_session, request_path, session_suffix ), @@ -173,7 +175,7 @@ class TestSafeSessionsLogMixin: with self.assert_logged_with_message( ( "SafeCookieData user at initial request '{}' matches neither user in session: '{}' " - "nor user at response time: '{}' for request path '{}'. {}" + "nor user at response time: '{}' for request path '{}'.\n{}" ).format( user_at_request, user_in_session, user_at_response, request_path, session_suffix ),