From 2723e0e2bd3050fc292b2e6ffee47a3d01d0708d Mon Sep 17 00:00:00 2001 From: Manjinder Singh <49171515+jinder1s@users.noreply.github.com> Date: Wed, 27 Jan 2021 07:23:19 -0500 Subject: [PATCH] [ARCHBOM-1645] Modifying anonymous_id_for_user() to handly SECRET_KEY rotation (#26162) These changes were initially made to make it easier to do SECRET_KEY rotations. Along the way, we found it made sense to refractor the code as well. Changes made: - changed get_to_create to create because now the code should only get to this block when a write is necessary - added a lookup for anonymous_user_id. This is to return an existing anonymous_user_id rather than calculating. This will mitigate the results of SECRET_KEY rotation. - Added monitoring to help us make better decisions: should we not sue SECRET_KEY, performance considerations... - put old function behind toggle in case something goes wrong in production with new code - refractoring function structure for better understanding --- common/djangoapps/student/models.py | 82 ++++++++++++++++++- common/djangoapps/student/tests/tests.py | 35 +++++++- common/lib/xmodule/xmodule/graders.py | 2 +- .../certificates/apis/v0/tests/test_views.py | 2 +- lms/djangoapps/courseware/tests/test_views.py | 6 +- .../grades/tests/test_course_grade_factory.py | 2 +- .../core/djangoapps/credit/tests/test_api.py | 2 +- .../schedules/tests/test_resolvers.py | 2 +- .../core/djangoapps/xblock/runtime/shims.py | 1 + .../tests/views/test_course_updates.py | 2 +- 10 files changed, 123 insertions(+), 13 deletions(-) diff --git a/common/djangoapps/student/models.py b/common/djangoapps/student/models.py index cbc95c216a..bfe74f3d58 100644 --- a/common/djangoapps/student/models.py +++ b/common/djangoapps/student/models.py @@ -149,11 +149,91 @@ class AnonymousUserId(models.Model): def anonymous_id_for_user(user, course_id, save=True): """ - Return a unique id for a (user, course) pair, suitable for inserting + Inputs: + user: User model + course_id: string or None + save: Whether the id should be saved in an AnonymousUserId object, defaults to True + + Return a unique id for a (user, course_id) pair, suitable for inserting into e.g. personalized survey links. If user is an `AnonymousUser`, returns `None` + else If this user/course_id pair already has an anonymous id in AnonymousUserId object, return that + else: create new anonymous_id, save it in AnonymousUserId, and return anonymous id + """ + # .. toggle_name: ANONYMOUS_USER_ID_REVERT_TO_STABLE_HASH + # .. toggle_implementation: SettingToggle + # .. toggle_default: False + # .. toggle_description: Used to make sure we can quickly revert back to old behaviour in case our refractoring + # of anonymous_id_for_user function does not work as intended. Our concern is that our refractoring adds a + # database lookup on every call and we are worried this will cause preformance issues. + # .. toggle_use_cases: temporary + # .. toggle_creation_date: 2021-01-26 + # .. toggle_target_removal_date: 2021-01-29 + # .. toggle_tickets: https://openedx.atlassian.net/browse/ARCHBOM-1645 + if getattr(settings, "ANONYMOUS_USER_ID_REVERT_TO_STABLE_HASH", False): + return deprecated_anonymous_id_for_user(user, course_id, save) + + # This part is for ability to get xblock instance in xblock_noauth handlers, where user is unauthenticated. + assert user + + if user.is_anonymous: + return None + + # ARCHBOM-1674: Get a sense of what fraction of anonymous_user_id calls are + # cached, stored in the DB, or retrieved from the DB. This will help inform + # us on decisions about whether we can move to always save IDs, + # pregenerate them, use random instead of deterministic IDs, etc. + monitoring.increment('temp_anonymous_user_id.requested') + + cached_id = getattr(user, '_anonymous_id', {}).get(course_id) + if cached_id is not None: + monitoring.increment('temp_anonymous_user_id.returned_from_cache') + return cached_id + # check if an anonymous id already exists for this user and course_id combination + anonymous_user_ids = AnonymousUserId.objects.filter(user=user).filter(course_id=course_id).order_by('-id') + if anonymous_user_ids: + # If there are multiple anonymous_user_ids per user, course_id pair + # select the row which was created most recently + anonymous_user_id = anonymous_user_ids[0].anonymous_user_id + else: + # include the secret key as a salt, and to make the ids unique across different LMS installs. + hasher = hashlib.md5() + hasher.update(settings.SECRET_KEY.encode('utf8')) + hasher.update(text_type(user.id).encode('utf8')) + if course_id: + hasher.update(text_type(course_id).encode('utf-8')) + anonymous_user_id = hasher.hexdigest() + + if save is True: + monitoring.increment('temp_anonymous_user_id.computed_stored') + try: + AnonymousUserId.objects.create( + user=user, + course_id=course_id, + anonymous_user_id=anonymous_user_id, + ) + except IntegrityError: + # Another thread has already created this entry, so + # continue + monitoring.increment('temp_anonymous_user_id.computed_already_present') + else: + monitoring.increment('temp_anonymous_user_id.computed_unsaved') + + # cache the anonymous_id in the user object + if not hasattr(user, '_anonymous_id'): + user._anonymous_id = {} # pylint: disable=protected-access + user._anonymous_id[course_id] = anonymous_user_id # pylint: disable=protected-access + + return anonymous_user_id + + +def deprecated_anonymous_id_for_user(user, course_id, save=True): + """ + Return a unique id for a (user, course) pair, suitable for inserting + into e.g. personalized survey links. + If user is an `AnonymousUser`, returns `None` Keyword arguments: save -- Whether the id should be saved in an AnonymousUserId object. """ diff --git a/common/djangoapps/student/tests/tests.py b/common/djangoapps/student/tests/tests.py index 339ede82e5..9ee15048e9 100644 --- a/common/djangoapps/student/tests/tests.py +++ b/common/djangoapps/student/tests/tests.py @@ -39,6 +39,7 @@ from common.djangoapps.student.models import ( CourseEnrollment, LinkedInAddToProfileConfiguration, UserAttribute, + AnonymousUserId, anonymous_id_for_user, unique_id_for_user, user_by_anonymous_id @@ -969,10 +970,26 @@ class AnonymousLookupTable(ModuleStoreTestCase): mode_slug='honor', mode_display_name='Honor Code', ) + self.user2 = UserFactory.create() patcher = patch('common.djangoapps.student.models.tracker') patcher.start() self.addCleanup(patcher.stop) + def test_same_user_over_multiple_sessions(self): + """ + Anonymous ids are stored in AnonymousUserId model. + This tests to make sure stored value is used rather than a creating a new one + """ + anonymous_id_1 = anonymous_id_for_user(self.user, None) + delattr(self.user, "_anonymous_id") # pylint: disable=literal-used-as-attribute + anonymous_id_2 = anonymous_id_for_user(self.user, None) + self.assertEqual(anonymous_id_1, anonymous_id_2) + + def test_diff_anonymous_id_for_diff_users(self): + anonymous_id_1 = anonymous_id_for_user(self.user, None) + anonymous_id_2 = anonymous_id_for_user(self.user2, None) + self.assertNotEqual(anonymous_id_1, anonymous_id_2) + def test_for_unregistered_user(self): # same path as for logged out user self.assertEqual(None, anonymous_id_for_user(AnonymousUser(), self.course.id)) self.assertIsNone(user_by_anonymous_id(None)) @@ -992,18 +1009,30 @@ class AnonymousLookupTable(ModuleStoreTestCase): self.assertEqual(self.user, real_user) self.assertEqual(anonymous_id, anonymous_id_for_user(self.user, course2.id, save=False)) - def test_secret_key_changes(self): - """Test that a new anonymous id is returned when the secret key changes.""" + def test_anonymous_id_secret_key_changes_do_not_change_existing_anonymous_ids(self): + """Test that a same anonymous id is returned when the SECRET_KEY changes.""" CourseEnrollment.enroll(self.user, self.course.id) anonymous_id = anonymous_id_for_user(self.user, self.course.id) with override_settings(SECRET_KEY='some_new_and_totally_secret_key'): # Recreate user object to clear cached anonymous id. self.user = User.objects.get(pk=self.user.id) new_anonymous_id = anonymous_id_for_user(self.user, self.course.id) - self.assertNotEqual(anonymous_id, new_anonymous_id) + self.assertEqual(anonymous_id, new_anonymous_id) self.assertEqual(self.user, user_by_anonymous_id(anonymous_id)) self.assertEqual(self.user, user_by_anonymous_id(new_anonymous_id)) + def test_anonymous_id_secret_key_changes_result_in_diff_values_for_same_new_user(self): + """Test that a different anonymous id is returned when the SECRET_KEY changes.""" + CourseEnrollment.enroll(self.user, self.course.id) + anonymous_id = anonymous_id_for_user(self.user, self.course.id) + with override_settings(SECRET_KEY='some_new_and_totally_secret_key'): + # Recreate user object to clear cached anonymous id. + self.user = User.objects.get(pk=self.user.id) + AnonymousUserId.objects.filter(user=self.user).filter(course_id=self.course.id).delete() + new_anonymous_id = anonymous_id_for_user(self.user, self.course.id) + self.assertNotEqual(anonymous_id, new_anonymous_id) + self.assertEqual(self.user, user_by_anonymous_id(new_anonymous_id)) + @skip_unless_lms @patch('openedx.core.djangoapps.programs.utils.get_programs') diff --git a/common/lib/xmodule/xmodule/graders.py b/common/lib/xmodule/xmodule/graders.py index 503935c181..3ce3dfa329 100644 --- a/common/lib/xmodule/xmodule/graders.py +++ b/common/lib/xmodule/xmodule/graders.py @@ -260,7 +260,7 @@ class WeightedSubsectionsGrader(CourseGrader): a value > 1, the student may end up with a percent > 100%. This allows for sections that are extra credit. """ - def __init__(self, subgraders): + def __init__(self, subgraders): # pylint: disable=super-init-not-called self.subgraders = subgraders @property diff --git a/lms/djangoapps/certificates/apis/v0/tests/test_views.py b/lms/djangoapps/certificates/apis/v0/tests/test_views.py index dee9b063d4..d73f26628a 100644 --- a/lms/djangoapps/certificates/apis/v0/tests/test_views.py +++ b/lms/djangoapps/certificates/apis/v0/tests/test_views.py @@ -325,7 +325,7 @@ class CertificatesListRestApiTest(AuthAndScopesTestMixin, SharedModuleStoreTestC def test_query_counts(self): # Test student with no certificates student_no_cert = UserFactory.create(password=self.user_password) - with self.assertNumQueries(20): + with self.assertNumQueries(18): resp = self.get_response( AuthType.jwt, requesting_user=self.global_staff, diff --git a/lms/djangoapps/courseware/tests/test_views.py b/lms/djangoapps/courseware/tests/test_views.py index cf520f398c..4c62a494f6 100644 --- a/lms/djangoapps/courseware/tests/test_views.py +++ b/lms/djangoapps/courseware/tests/test_views.py @@ -269,8 +269,8 @@ class IndexQueryTestCase(ModuleStoreTestCase): NUM_PROBLEMS = 20 @ddt.data( - (ModuleStoreEnum.Type.mongo, 10, 175), - (ModuleStoreEnum.Type.split, 4, 171), + (ModuleStoreEnum.Type.mongo, 10, 171), + (ModuleStoreEnum.Type.split, 4, 167), ) @ddt.unpack def test_index_query_counts(self, store_type, expected_mongo_query_count, expected_mysql_query_count): @@ -1439,7 +1439,7 @@ class ProgressPageTests(ProgressPageBaseTests): @patch.dict(settings.FEATURES, {'ASSUME_ZERO_GRADE_IF_ABSENT_FOR_ALL_TESTS': False}) @ddt.data( - (False, 63, 44), + (False, 61, 44), (True, 54, 39) ) @ddt.unpack diff --git a/lms/djangoapps/grades/tests/test_course_grade_factory.py b/lms/djangoapps/grades/tests/test_course_grade_factory.py index c410729a0a..2c085b35cc 100644 --- a/lms/djangoapps/grades/tests/test_course_grade_factory.py +++ b/lms/djangoapps/grades/tests/test_course_grade_factory.py @@ -100,7 +100,7 @@ class TestCourseGradeFactory(GradeTestBase): with self.assertNumQueries(3), mock_get_score(1, 2): _assert_read(expected_pass=False, expected_percent=0) # start off with grade of 0 - num_queries = 44 + num_queries = 42 with self.assertNumQueries(num_queries), mock_get_score(1, 2): grade_factory.update(self.request.user, self.course, force_update_subsections=True) diff --git a/openedx/core/djangoapps/credit/tests/test_api.py b/openedx/core/djangoapps/credit/tests/test_api.py index 2eafd477e3..3f0f9452f9 100644 --- a/openedx/core/djangoapps/credit/tests/test_api.py +++ b/openedx/core/djangoapps/credit/tests/test_api.py @@ -664,7 +664,7 @@ class CreditRequirementApiTests(CreditApiTestBase): self.assertFalse(api.is_user_eligible_for_credit(user.username, self.course_key)) # Satisfy the other requirement - with self.assertNumQueries(22): + with self.assertNumQueries(20): api.set_credit_requirement_status( user, self.course_key, diff --git a/openedx/core/djangoapps/schedules/tests/test_resolvers.py b/openedx/core/djangoapps/schedules/tests/test_resolvers.py index 873896651c..491405f3b8 100644 --- a/openedx/core/djangoapps/schedules/tests/test_resolvers.py +++ b/openedx/core/djangoapps/schedules/tests/test_resolvers.py @@ -218,7 +218,7 @@ class TestCourseNextSectionUpdateResolver(SchedulesResolverTestMixin, ModuleStor def test_schedule_context(self): resolver = self.create_resolver() # using this to make sure the select_related stays intact - with self.assertNumQueries(17): + with self.assertNumQueries(15): sc = resolver.get_schedules() schedules = list(sc) diff --git a/openedx/core/djangoapps/xblock/runtime/shims.py b/openedx/core/djangoapps/xblock/runtime/shims.py index 0cc43b6cff..7b9e695396 100644 --- a/openedx/core/djangoapps/xblock/runtime/shims.py +++ b/openedx/core/djangoapps/xblock/runtime/shims.py @@ -10,6 +10,7 @@ from django.core.cache import cache from django.template import TemplateDoesNotExist from django.utils.functional import cached_property from fs.memoryfs import MemoryFS + from openedx.core.djangoapps.xblock.apps import get_xblock_app_config from common.djangoapps.edxmako.shortcuts import render_to_string diff --git a/openedx/features/course_experience/tests/views/test_course_updates.py b/openedx/features/course_experience/tests/views/test_course_updates.py index 0092588dcd..d0ae2996f3 100644 --- a/openedx/features/course_experience/tests/views/test_course_updates.py +++ b/openedx/features/course_experience/tests/views/test_course_updates.py @@ -49,7 +49,7 @@ class TestCourseUpdatesPage(BaseCourseUpdatesTestCase): # Fetch the view and verify that the query counts haven't changed # TODO: decrease query count as part of REVO-28 - with self.assertNumQueries(52, table_blacklist=QUERY_COUNT_TABLE_BLACKLIST): + with self.assertNumQueries(50, table_blacklist=QUERY_COUNT_TABLE_BLACKLIST): with check_mongo_calls(4): url = course_updates_url(self.course) self.client.get(url)