diff --git a/openedx/core/djangoapps/safe_sessions/middleware.py b/openedx/core/djangoapps/safe_sessions/middleware.py index 3708f6af1e..e797f6ea1f 100644 --- a/openedx/core/djangoapps/safe_sessions/middleware.py +++ b/openedx/core/djangoapps/safe_sessions/middleware.py @@ -77,8 +77,9 @@ Custom Attributes: import inspect from base64 import b64encode -from hashlib import sha256 +from hashlib import sha1, sha256 from logging import getLogger +from typing import Union from django.conf import settings from django.contrib.auth import SESSION_KEY @@ -328,6 +329,9 @@ class SafeSessionMiddleware(SessionMiddleware, MiddlewareMixin): else: request.COOKIES[settings.SESSION_COOKIE_NAME] = safe_cookie_data.session_id # Step 2 + # Save off for debugging and logging in _verify_user + request.cookie_session_field = safe_cookie_data.session_id + process_request_response = super().process_request(request) # Step 3 # lint-amnesty, pylint: disable=assignment-from-no-return, super-with-arguments if process_request_response: # The process_request pipeline has been short circuited so @@ -469,24 +473,52 @@ class SafeSessionMiddleware(SessionMiddleware, MiddlewareMixin): if request_user_object_mismatch or session_user_mismatch: # Log accumulated information stored on request for each change of user - if hasattr(request, 'debug_user_changes'): - log.warning('An unsafe user transition was found. It either needs to be fixed or exempted.\n {}' - .format('\n'.join(request.debug_user_changes))) + extra_logs = [] + + response_session_id = getattr(getattr(request, 'session', None), 'session_key', None) + + # A safe-session user mismatch could be caused by the + # wrong session being retrieved from cache. This + # additional logging should reveal any such mismatch + # (without revealing the actual session ID in logs). + sessions_raw = [ + ('parsed_cookie', request.cookie_session_field), + ('at_request', request.safe_cookie_verified_session_id), + ('at_response', response_session_id), + ] + # Note that this is an ordered list of pairs, not a + # dict, so that the output order is consistent. + session_hashes = [(k, obscure_token(v)) for (k, v) in sessions_raw] + session_id_changed = len(set(kv[1] for kv in sessions_raw)) > 1 - session_id_changed = hasattr(request.session, 'session_key') and\ - request.safe_cookie_verified_session_id != request.session.session_key # delete old session id for security del request.safe_cookie_verified_session_id + del request.cookie_session_field - log_suffix = 'Session changed.' if session_id_changed else 'Session did not change.' + extra_logs.append('Session changed.' if session_id_changed else 'Session did not change.') + + # Allow comparing session IDs in both logs and metrics + extra_logs.append( + "Hash of session ID from various sources: " + + '; '.join(f'{k}={v}' for (k, v) in session_hashes) + ) + for source_name, id_hash in session_hashes: + set_custom_attribute(f'safe_sessions.session_id_hash.{source_name}', id_hash) + set_custom_attribute('safe_sessions.session_id_changed', session_id_changed) + + if hasattr(request, 'debug_user_changes'): + extra_logs.append( + 'An unsafe user transition was found. It either needs to be fixed or exempted.\n' + + '\n'.join(request.debug_user_changes) + ) if request_user_object_mismatch and not session_user_mismatch: log.warning( ( "SafeCookieData user at initial request '{}' does not match user at response time: '{}' " - "for request path '{}'. {}" + "for request path '{}'.\n{}" ).format( # pylint: disable=logging-format-interpolation - request.safe_cookie_verified_user_id, request.user.id, request.path, log_suffix + request.safe_cookie_verified_user_id, request.user.id, request.path, '\n'.join(extra_logs) ), ) set_custom_attribute("safe_sessions.user_mismatch", "request-response-mismatch") @@ -494,9 +526,9 @@ class SafeSessionMiddleware(SessionMiddleware, MiddlewareMixin): log.warning( ( "SafeCookieData user at initial request '{}' does not match user in session: '{}' " - "for request path '{}'. {}" + "for request path '{}'.\n{}" ).format( # pylint: disable=logging-format-interpolation - request.safe_cookie_verified_user_id, userid_in_session, request.path, log_suffix + request.safe_cookie_verified_user_id, userid_in_session, request.path, '\n'.join(extra_logs) ), ) set_custom_attribute("safe_sessions.user_mismatch", "request-session-mismatch") @@ -504,10 +536,10 @@ class SafeSessionMiddleware(SessionMiddleware, MiddlewareMixin): log.warning( ( "SafeCookieData user at initial request '{}' matches neither user in session: '{}' " - "nor user at response time: '{}' for request path '{}'. {}" + "nor user at response time: '{}' for request path '{}'.\n{}" ).format( # pylint: disable=logging-format-interpolation request.safe_cookie_verified_user_id, userid_in_session, request.user.id, request.path, - log_suffix + '\n'.join(extra_logs) ), ) set_custom_attribute("safe_sessions.user_mismatch", "request-response-and-session-mismatch") @@ -552,6 +584,34 @@ class SafeSessionMiddleware(SessionMiddleware, MiddlewareMixin): cookies[settings.SESSION_COOKIE_NAME] = str(safe_cookie_data) +def obscure_token(value: Union[str, None]) -> Union[str, None]: + """ + Return a short string that can be used to detect other occurrences + of this string without revealing the original. Return None if value + is None. + + Outputs are intended to be *transient* and should not be stored or + compared long-term, as they are dependent on the value of + settings.SECRET_KEY, which can be rotated at any time. + + WARNING: This code must only be used for *high-entropy inputs* + that an attacker cannot enumerate, predict, or guess for other + parties. In particular, it must not be used for sequential IDs or + timestamps, since an attacker possessing the pepper could + precompute the hashes. A non-cryptographic de-identification + technique must be used in such cases, such as a lookup table. + """ + if value is None: + return None + else: + # Use of hashing (and in particular use of SECRET_KEY as a + # pepper) is overkill for safe-sessions, where at worst we + # might end up logging an occasional session ID prefix... but + # there's very little cost in overdoing it here, especially if + # the code ends up getting copied around. + return sha1((settings.SECRET_KEY + value).encode()).hexdigest()[:8] + + def _mark_cookie_for_deletion(request): """ Updates the given request object to designate that the session diff --git a/openedx/core/djangoapps/safe_sessions/tests/test_middleware.py b/openedx/core/djangoapps/safe_sessions/tests/test_middleware.py index a8cff7eb56..be9e5313dd 100644 --- a/openedx/core/djangoapps/safe_sessions/tests/test_middleware.py +++ b/openedx/core/djangoapps/safe_sessions/tests/test_middleware.py @@ -5,7 +5,6 @@ Unit tests for SafeSessionMiddleware from unittest.mock import patch import ddt - from crum import set_current_request from django.conf import settings from django.contrib.auth import SESSION_KEY @@ -17,7 +16,13 @@ from django.test.utils import override_settings from openedx.core.djangolib.testing.utils import get_mock_request from common.djangoapps.student.tests.factories import UserFactory -from ..middleware import SafeCookieData, SafeSessionMiddleware, track_request_user_changes, mark_user_change_as_expected +from ..middleware import ( + SafeCookieData, + SafeSessionMiddleware, + mark_user_change_as_expected, + obscure_token, + track_request_user_changes +) from .test_utils import TestSafeSessionsLogMixin @@ -412,6 +417,37 @@ class TestSafeSessionMiddleware(TestSafeSessionsLogMixin, TestCase): assert 'safe_sessions.user_mismatch' not in [call.args[0] for call in mock_attr.call_args_list] +class TestTokenObscuring(TestCase): + """ + Test the ability to obscure session IDs. + """ + + def test_obscure_none(self): + """Doesn't break on None input.""" + assert obscure_token(None) is None + + def test_obscure_vary(self): + """ + Verifies that input and SECRET_KEY both change output. + + The "expected" values here are computed and arbitrary; if the + algorithm is updated, the expected values should also be + updated. (Changing the algorithm will invalidate any stored + data, but the docstring explicitly warns not to store these + outputs anyhow.) + """ + with override_settings(SECRET_KEY="FIRST SECRET"): + # Same input twice, same output twice + assert obscure_token('abcdef-123456') == '2d4260b0' + assert obscure_token('abcdef-123456') == '2d4260b0' + # Different input, different output + assert obscure_token('zxcvbnm-000111') == '87978f29' + + # Different key, different output + with override_settings(SECRET_KEY="SECOND SECRET"): + assert obscure_token('abcdef-123456') == '7325d529' + + @ddt.ddt class TestTrackRequestUserChanges(TestCase): """ diff --git a/openedx/core/djangoapps/safe_sessions/tests/test_utils.py b/openedx/core/djangoapps/safe_sessions/tests/test_utils.py index c6c1977a50..976ce58a4b 100644 --- a/openedx/core/djangoapps/safe_sessions/tests/test_utils.py +++ b/openedx/core/djangoapps/safe_sessions/tests/test_utils.py @@ -33,14 +33,16 @@ class TestSafeSessionsLogMixin: yield @contextmanager - def assert_logged_with_message(self, log_string, log_level='error'): + def assert_logged_with_message(self, log_substring, log_level='error'): """ Asserts that the logger with the given log_level was called - with a string. + with a substring. """ with patch('openedx.core.djangoapps.safe_sessions.middleware.log.' + log_level) as mock_log: yield - mock_log.assert_any_call(log_string) + log_messages = [call.args[0] for call in mock_log.call_args_list] + assert any(log_substring in msg for msg in log_messages), \ + f"Expected to find log substring in one of: {log_messages}" @contextmanager def assert_not_logged(self): @@ -134,7 +136,7 @@ class TestSafeSessionsLogMixin: with self.assert_logged_with_message( ( "SafeCookieData user at initial request '{}' does not match user at response time: '{}' " - "for request path '{}'. {}" + "for request path '{}'.\n{}" ).format( user_at_request, user_at_response, request_path, session_suffix ), @@ -153,7 +155,7 @@ class TestSafeSessionsLogMixin: with self.assert_logged_with_message( ( "SafeCookieData user at initial request '{}' does not match user in session: '{}' " - "for request path '{}'. {}" + "for request path '{}'.\n{}" ).format( user_at_request, user_in_session, request_path, session_suffix ), @@ -173,7 +175,7 @@ class TestSafeSessionsLogMixin: with self.assert_logged_with_message( ( "SafeCookieData user at initial request '{}' matches neither user in session: '{}' " - "nor user at response time: '{}' for request path '{}'. {}" + "nor user at response time: '{}' for request path '{}'.\n{}" ).format( user_at_request, user_in_session, user_at_response, request_path, session_suffix ),