feat: Log session ID from various sources after safe-session mismatch (#29185)

Add logging in case a safe-session user mismatch is related to wrong
session being retrieved from cache. This additional logging should
reveal any such mismatch (without revealing the actual session ID in
logs).

Send to metrics as custom attributes as well.

Also:

- Compute "session_id_changed" based on all three session IDs (and
  send as custom attribute)
- Put all _verify_user logs into one (multiline) log line
- Accordingly, change logging assertion to only require a substring,
  at-least-once match rather than a full-and-only match.

ref: ARCHBOM-1939
This commit is contained in:
Tim McCormack
2021-11-04 19:34:44 +00:00
committed by GitHub
parent ccb8c6ce27
commit fdd27a1f71
3 changed files with 119 additions and 21 deletions

View File

@@ -77,8 +77,9 @@ Custom Attributes:
import inspect
from base64 import b64encode
from hashlib import sha256
from hashlib import sha1, sha256
from logging import getLogger
from typing import Union
from django.conf import settings
from django.contrib.auth import SESSION_KEY
@@ -328,6 +329,9 @@ class SafeSessionMiddleware(SessionMiddleware, MiddlewareMixin):
else:
request.COOKIES[settings.SESSION_COOKIE_NAME] = safe_cookie_data.session_id # Step 2
# Save off for debugging and logging in _verify_user
request.cookie_session_field = safe_cookie_data.session_id
process_request_response = super().process_request(request) # Step 3 # lint-amnesty, pylint: disable=assignment-from-no-return, super-with-arguments
if process_request_response:
# The process_request pipeline has been short circuited so
@@ -469,24 +473,52 @@ class SafeSessionMiddleware(SessionMiddleware, MiddlewareMixin):
if request_user_object_mismatch or session_user_mismatch:
# Log accumulated information stored on request for each change of user
if hasattr(request, 'debug_user_changes'):
log.warning('An unsafe user transition was found. It either needs to be fixed or exempted.\n {}'
.format('\n'.join(request.debug_user_changes)))
extra_logs = []
response_session_id = getattr(getattr(request, 'session', None), 'session_key', None)
# A safe-session user mismatch could be caused by the
# wrong session being retrieved from cache. This
# additional logging should reveal any such mismatch
# (without revealing the actual session ID in logs).
sessions_raw = [
('parsed_cookie', request.cookie_session_field),
('at_request', request.safe_cookie_verified_session_id),
('at_response', response_session_id),
]
# Note that this is an ordered list of pairs, not a
# dict, so that the output order is consistent.
session_hashes = [(k, obscure_token(v)) for (k, v) in sessions_raw]
session_id_changed = len(set(kv[1] for kv in sessions_raw)) > 1
session_id_changed = hasattr(request.session, 'session_key') and\
request.safe_cookie_verified_session_id != request.session.session_key
# delete old session id for security
del request.safe_cookie_verified_session_id
del request.cookie_session_field
log_suffix = 'Session changed.' if session_id_changed else 'Session did not change.'
extra_logs.append('Session changed.' if session_id_changed else 'Session did not change.')
# Allow comparing session IDs in both logs and metrics
extra_logs.append(
"Hash of session ID from various sources: " +
'; '.join(f'{k}={v}' for (k, v) in session_hashes)
)
for source_name, id_hash in session_hashes:
set_custom_attribute(f'safe_sessions.session_id_hash.{source_name}', id_hash)
set_custom_attribute('safe_sessions.session_id_changed', session_id_changed)
if hasattr(request, 'debug_user_changes'):
extra_logs.append(
'An unsafe user transition was found. It either needs to be fixed or exempted.\n' +
'\n'.join(request.debug_user_changes)
)
if request_user_object_mismatch and not session_user_mismatch:
log.warning(
(
"SafeCookieData user at initial request '{}' does not match user at response time: '{}' "
"for request path '{}'. {}"
"for request path '{}'.\n{}"
).format( # pylint: disable=logging-format-interpolation
request.safe_cookie_verified_user_id, request.user.id, request.path, log_suffix
request.safe_cookie_verified_user_id, request.user.id, request.path, '\n'.join(extra_logs)
),
)
set_custom_attribute("safe_sessions.user_mismatch", "request-response-mismatch")
@@ -494,9 +526,9 @@ class SafeSessionMiddleware(SessionMiddleware, MiddlewareMixin):
log.warning(
(
"SafeCookieData user at initial request '{}' does not match user in session: '{}' "
"for request path '{}'. {}"
"for request path '{}'.\n{}"
).format( # pylint: disable=logging-format-interpolation
request.safe_cookie_verified_user_id, userid_in_session, request.path, log_suffix
request.safe_cookie_verified_user_id, userid_in_session, request.path, '\n'.join(extra_logs)
),
)
set_custom_attribute("safe_sessions.user_mismatch", "request-session-mismatch")
@@ -504,10 +536,10 @@ class SafeSessionMiddleware(SessionMiddleware, MiddlewareMixin):
log.warning(
(
"SafeCookieData user at initial request '{}' matches neither user in session: '{}' "
"nor user at response time: '{}' for request path '{}'. {}"
"nor user at response time: '{}' for request path '{}'.\n{}"
).format( # pylint: disable=logging-format-interpolation
request.safe_cookie_verified_user_id, userid_in_session, request.user.id, request.path,
log_suffix
'\n'.join(extra_logs)
),
)
set_custom_attribute("safe_sessions.user_mismatch", "request-response-and-session-mismatch")
@@ -552,6 +584,34 @@ class SafeSessionMiddleware(SessionMiddleware, MiddlewareMixin):
cookies[settings.SESSION_COOKIE_NAME] = str(safe_cookie_data)
def obscure_token(value: Union[str, None]) -> Union[str, None]:
"""
Return a short string that can be used to detect other occurrences
of this string without revealing the original. Return None if value
is None.
Outputs are intended to be *transient* and should not be stored or
compared long-term, as they are dependent on the value of
settings.SECRET_KEY, which can be rotated at any time.
WARNING: This code must only be used for *high-entropy inputs*
that an attacker cannot enumerate, predict, or guess for other
parties. In particular, it must not be used for sequential IDs or
timestamps, since an attacker possessing the pepper could
precompute the hashes. A non-cryptographic de-identification
technique must be used in such cases, such as a lookup table.
"""
if value is None:
return None
else:
# Use of hashing (and in particular use of SECRET_KEY as a
# pepper) is overkill for safe-sessions, where at worst we
# might end up logging an occasional session ID prefix... but
# there's very little cost in overdoing it here, especially if
# the code ends up getting copied around.
return sha1((settings.SECRET_KEY + value).encode()).hexdigest()[:8]
def _mark_cookie_for_deletion(request):
"""
Updates the given request object to designate that the session

View File

@@ -5,7 +5,6 @@ Unit tests for SafeSessionMiddleware
from unittest.mock import patch
import ddt
from crum import set_current_request
from django.conf import settings
from django.contrib.auth import SESSION_KEY
@@ -17,7 +16,13 @@ from django.test.utils import override_settings
from openedx.core.djangolib.testing.utils import get_mock_request
from common.djangoapps.student.tests.factories import UserFactory
from ..middleware import SafeCookieData, SafeSessionMiddleware, track_request_user_changes, mark_user_change_as_expected
from ..middleware import (
SafeCookieData,
SafeSessionMiddleware,
mark_user_change_as_expected,
obscure_token,
track_request_user_changes
)
from .test_utils import TestSafeSessionsLogMixin
@@ -412,6 +417,37 @@ class TestSafeSessionMiddleware(TestSafeSessionsLogMixin, TestCase):
assert 'safe_sessions.user_mismatch' not in [call.args[0] for call in mock_attr.call_args_list]
class TestTokenObscuring(TestCase):
"""
Test the ability to obscure session IDs.
"""
def test_obscure_none(self):
"""Doesn't break on None input."""
assert obscure_token(None) is None
def test_obscure_vary(self):
"""
Verifies that input and SECRET_KEY both change output.
The "expected" values here are computed and arbitrary; if the
algorithm is updated, the expected values should also be
updated. (Changing the algorithm will invalidate any stored
data, but the docstring explicitly warns not to store these
outputs anyhow.)
"""
with override_settings(SECRET_KEY="FIRST SECRET"):
# Same input twice, same output twice
assert obscure_token('abcdef-123456') == '2d4260b0'
assert obscure_token('abcdef-123456') == '2d4260b0'
# Different input, different output
assert obscure_token('zxcvbnm-000111') == '87978f29'
# Different key, different output
with override_settings(SECRET_KEY="SECOND SECRET"):
assert obscure_token('abcdef-123456') == '7325d529'
@ddt.ddt
class TestTrackRequestUserChanges(TestCase):
"""

View File

@@ -33,14 +33,16 @@ class TestSafeSessionsLogMixin:
yield
@contextmanager
def assert_logged_with_message(self, log_string, log_level='error'):
def assert_logged_with_message(self, log_substring, log_level='error'):
"""
Asserts that the logger with the given log_level was called
with a string.
with a substring.
"""
with patch('openedx.core.djangoapps.safe_sessions.middleware.log.' + log_level) as mock_log:
yield
mock_log.assert_any_call(log_string)
log_messages = [call.args[0] for call in mock_log.call_args_list]
assert any(log_substring in msg for msg in log_messages), \
f"Expected to find log substring in one of: {log_messages}"
@contextmanager
def assert_not_logged(self):
@@ -134,7 +136,7 @@ class TestSafeSessionsLogMixin:
with self.assert_logged_with_message(
(
"SafeCookieData user at initial request '{}' does not match user at response time: '{}' "
"for request path '{}'. {}"
"for request path '{}'.\n{}"
).format(
user_at_request, user_at_response, request_path, session_suffix
),
@@ -153,7 +155,7 @@ class TestSafeSessionsLogMixin:
with self.assert_logged_with_message(
(
"SafeCookieData user at initial request '{}' does not match user in session: '{}' "
"for request path '{}'. {}"
"for request path '{}'.\n{}"
).format(
user_at_request, user_in_session, request_path, session_suffix
),
@@ -173,7 +175,7 @@ class TestSafeSessionsLogMixin:
with self.assert_logged_with_message(
(
"SafeCookieData user at initial request '{}' matches neither user in session: '{}' "
"nor user at response time: '{}' for request path '{}'. {}"
"nor user at response time: '{}' for request path '{}'.\n{}"
).format(
user_at_request, user_in_session, user_at_response, request_path, session_suffix
),