[ARCHBOM-1645] Modifying anonymous_id_for_user() to handly SECRET_KEY rotation (#26162)

These changes were initially made to make it easier to do SECRET_KEY rotations.  Along the way, we found it made sense to refractor the code as well.

Changes made:
- changed get_to_create to create because now the code should only get to this block when a write is necessary
- added a lookup for anonymous_user_id. This is to return an existing anonymous_user_id rather than calculating. This will mitigate the results of SECRET_KEY rotation.
- Added monitoring to help us make better decisions: should we not sue SECRET_KEY, performance considerations...
- put old function behind toggle in case something goes wrong in production with new code
- refractoring function structure for better understanding
This commit is contained in:
Manjinder Singh
2021-01-27 07:23:19 -05:00
committed by GitHub
parent 428402e370
commit 2723e0e2bd
10 changed files with 123 additions and 13 deletions

View File

@@ -149,11 +149,91 @@ class AnonymousUserId(models.Model):
def anonymous_id_for_user(user, course_id, save=True):
"""
Return a unique id for a (user, course) pair, suitable for inserting
Inputs:
user: User model
course_id: string or None
save: Whether the id should be saved in an AnonymousUserId object, defaults to True
Return a unique id for a (user, course_id) pair, suitable for inserting
into e.g. personalized survey links.
If user is an `AnonymousUser`, returns `None`
else If this user/course_id pair already has an anonymous id in AnonymousUserId object, return that
else: create new anonymous_id, save it in AnonymousUserId, and return anonymous id
"""
# .. toggle_name: ANONYMOUS_USER_ID_REVERT_TO_STABLE_HASH
# .. toggle_implementation: SettingToggle
# .. toggle_default: False
# .. toggle_description: Used to make sure we can quickly revert back to old behaviour in case our refractoring
# of anonymous_id_for_user function does not work as intended. Our concern is that our refractoring adds a
# database lookup on every call and we are worried this will cause preformance issues.
# .. toggle_use_cases: temporary
# .. toggle_creation_date: 2021-01-26
# .. toggle_target_removal_date: 2021-01-29
# .. toggle_tickets: https://openedx.atlassian.net/browse/ARCHBOM-1645
if getattr(settings, "ANONYMOUS_USER_ID_REVERT_TO_STABLE_HASH", False):
return deprecated_anonymous_id_for_user(user, course_id, save)
# This part is for ability to get xblock instance in xblock_noauth handlers, where user is unauthenticated.
assert user
if user.is_anonymous:
return None
# ARCHBOM-1674: Get a sense of what fraction of anonymous_user_id calls are
# cached, stored in the DB, or retrieved from the DB. This will help inform
# us on decisions about whether we can move to always save IDs,
# pregenerate them, use random instead of deterministic IDs, etc.
monitoring.increment('temp_anonymous_user_id.requested')
cached_id = getattr(user, '_anonymous_id', {}).get(course_id)
if cached_id is not None:
monitoring.increment('temp_anonymous_user_id.returned_from_cache')
return cached_id
# check if an anonymous id already exists for this user and course_id combination
anonymous_user_ids = AnonymousUserId.objects.filter(user=user).filter(course_id=course_id).order_by('-id')
if anonymous_user_ids:
# If there are multiple anonymous_user_ids per user, course_id pair
# select the row which was created most recently
anonymous_user_id = anonymous_user_ids[0].anonymous_user_id
else:
# include the secret key as a salt, and to make the ids unique across different LMS installs.
hasher = hashlib.md5()
hasher.update(settings.SECRET_KEY.encode('utf8'))
hasher.update(text_type(user.id).encode('utf8'))
if course_id:
hasher.update(text_type(course_id).encode('utf-8'))
anonymous_user_id = hasher.hexdigest()
if save is True:
monitoring.increment('temp_anonymous_user_id.computed_stored')
try:
AnonymousUserId.objects.create(
user=user,
course_id=course_id,
anonymous_user_id=anonymous_user_id,
)
except IntegrityError:
# Another thread has already created this entry, so
# continue
monitoring.increment('temp_anonymous_user_id.computed_already_present')
else:
monitoring.increment('temp_anonymous_user_id.computed_unsaved')
# cache the anonymous_id in the user object
if not hasattr(user, '_anonymous_id'):
user._anonymous_id = {} # pylint: disable=protected-access
user._anonymous_id[course_id] = anonymous_user_id # pylint: disable=protected-access
return anonymous_user_id
def deprecated_anonymous_id_for_user(user, course_id, save=True):
"""
Return a unique id for a (user, course) pair, suitable for inserting
into e.g. personalized survey links.
If user is an `AnonymousUser`, returns `None`
Keyword arguments:
save -- Whether the id should be saved in an AnonymousUserId object.
"""

View File

@@ -39,6 +39,7 @@ from common.djangoapps.student.models import (
CourseEnrollment,
LinkedInAddToProfileConfiguration,
UserAttribute,
AnonymousUserId,
anonymous_id_for_user,
unique_id_for_user,
user_by_anonymous_id
@@ -969,10 +970,26 @@ class AnonymousLookupTable(ModuleStoreTestCase):
mode_slug='honor',
mode_display_name='Honor Code',
)
self.user2 = UserFactory.create()
patcher = patch('common.djangoapps.student.models.tracker')
patcher.start()
self.addCleanup(patcher.stop)
def test_same_user_over_multiple_sessions(self):
"""
Anonymous ids are stored in AnonymousUserId model.
This tests to make sure stored value is used rather than a creating a new one
"""
anonymous_id_1 = anonymous_id_for_user(self.user, None)
delattr(self.user, "_anonymous_id") # pylint: disable=literal-used-as-attribute
anonymous_id_2 = anonymous_id_for_user(self.user, None)
self.assertEqual(anonymous_id_1, anonymous_id_2)
def test_diff_anonymous_id_for_diff_users(self):
anonymous_id_1 = anonymous_id_for_user(self.user, None)
anonymous_id_2 = anonymous_id_for_user(self.user2, None)
self.assertNotEqual(anonymous_id_1, anonymous_id_2)
def test_for_unregistered_user(self): # same path as for logged out user
self.assertEqual(None, anonymous_id_for_user(AnonymousUser(), self.course.id))
self.assertIsNone(user_by_anonymous_id(None))
@@ -992,18 +1009,30 @@ class AnonymousLookupTable(ModuleStoreTestCase):
self.assertEqual(self.user, real_user)
self.assertEqual(anonymous_id, anonymous_id_for_user(self.user, course2.id, save=False))
def test_secret_key_changes(self):
"""Test that a new anonymous id is returned when the secret key changes."""
def test_anonymous_id_secret_key_changes_do_not_change_existing_anonymous_ids(self):
"""Test that a same anonymous id is returned when the SECRET_KEY changes."""
CourseEnrollment.enroll(self.user, self.course.id)
anonymous_id = anonymous_id_for_user(self.user, self.course.id)
with override_settings(SECRET_KEY='some_new_and_totally_secret_key'):
# Recreate user object to clear cached anonymous id.
self.user = User.objects.get(pk=self.user.id)
new_anonymous_id = anonymous_id_for_user(self.user, self.course.id)
self.assertNotEqual(anonymous_id, new_anonymous_id)
self.assertEqual(anonymous_id, new_anonymous_id)
self.assertEqual(self.user, user_by_anonymous_id(anonymous_id))
self.assertEqual(self.user, user_by_anonymous_id(new_anonymous_id))
def test_anonymous_id_secret_key_changes_result_in_diff_values_for_same_new_user(self):
"""Test that a different anonymous id is returned when the SECRET_KEY changes."""
CourseEnrollment.enroll(self.user, self.course.id)
anonymous_id = anonymous_id_for_user(self.user, self.course.id)
with override_settings(SECRET_KEY='some_new_and_totally_secret_key'):
# Recreate user object to clear cached anonymous id.
self.user = User.objects.get(pk=self.user.id)
AnonymousUserId.objects.filter(user=self.user).filter(course_id=self.course.id).delete()
new_anonymous_id = anonymous_id_for_user(self.user, self.course.id)
self.assertNotEqual(anonymous_id, new_anonymous_id)
self.assertEqual(self.user, user_by_anonymous_id(new_anonymous_id))
@skip_unless_lms
@patch('openedx.core.djangoapps.programs.utils.get_programs')

View File

@@ -260,7 +260,7 @@ class WeightedSubsectionsGrader(CourseGrader):
a value > 1, the student may end up with a percent > 100%. This allows for sections that
are extra credit.
"""
def __init__(self, subgraders):
def __init__(self, subgraders): # pylint: disable=super-init-not-called
self.subgraders = subgraders
@property

View File

@@ -325,7 +325,7 @@ class CertificatesListRestApiTest(AuthAndScopesTestMixin, SharedModuleStoreTestC
def test_query_counts(self):
# Test student with no certificates
student_no_cert = UserFactory.create(password=self.user_password)
with self.assertNumQueries(20):
with self.assertNumQueries(18):
resp = self.get_response(
AuthType.jwt,
requesting_user=self.global_staff,

View File

@@ -269,8 +269,8 @@ class IndexQueryTestCase(ModuleStoreTestCase):
NUM_PROBLEMS = 20
@ddt.data(
(ModuleStoreEnum.Type.mongo, 10, 175),
(ModuleStoreEnum.Type.split, 4, 171),
(ModuleStoreEnum.Type.mongo, 10, 171),
(ModuleStoreEnum.Type.split, 4, 167),
)
@ddt.unpack
def test_index_query_counts(self, store_type, expected_mongo_query_count, expected_mysql_query_count):
@@ -1439,7 +1439,7 @@ class ProgressPageTests(ProgressPageBaseTests):
@patch.dict(settings.FEATURES, {'ASSUME_ZERO_GRADE_IF_ABSENT_FOR_ALL_TESTS': False})
@ddt.data(
(False, 63, 44),
(False, 61, 44),
(True, 54, 39)
)
@ddt.unpack

View File

@@ -100,7 +100,7 @@ class TestCourseGradeFactory(GradeTestBase):
with self.assertNumQueries(3), mock_get_score(1, 2):
_assert_read(expected_pass=False, expected_percent=0) # start off with grade of 0
num_queries = 44
num_queries = 42
with self.assertNumQueries(num_queries), mock_get_score(1, 2):
grade_factory.update(self.request.user, self.course, force_update_subsections=True)

View File

@@ -664,7 +664,7 @@ class CreditRequirementApiTests(CreditApiTestBase):
self.assertFalse(api.is_user_eligible_for_credit(user.username, self.course_key))
# Satisfy the other requirement
with self.assertNumQueries(22):
with self.assertNumQueries(20):
api.set_credit_requirement_status(
user,
self.course_key,

View File

@@ -218,7 +218,7 @@ class TestCourseNextSectionUpdateResolver(SchedulesResolverTestMixin, ModuleStor
def test_schedule_context(self):
resolver = self.create_resolver()
# using this to make sure the select_related stays intact
with self.assertNumQueries(17):
with self.assertNumQueries(15):
sc = resolver.get_schedules()
schedules = list(sc)

View File

@@ -10,6 +10,7 @@ from django.core.cache import cache
from django.template import TemplateDoesNotExist
from django.utils.functional import cached_property
from fs.memoryfs import MemoryFS
from openedx.core.djangoapps.xblock.apps import get_xblock_app_config
from common.djangoapps.edxmako.shortcuts import render_to_string

View File

@@ -49,7 +49,7 @@ class TestCourseUpdatesPage(BaseCourseUpdatesTestCase):
# Fetch the view and verify that the query counts haven't changed
# TODO: decrease query count as part of REVO-28
with self.assertNumQueries(52, table_blacklist=QUERY_COUNT_TABLE_BLACKLIST):
with self.assertNumQueries(50, table_blacklist=QUERY_COUNT_TABLE_BLACKLIST):
with check_mongo_calls(4):
url = course_updates_url(self.course)
self.client.get(url)