diff --git a/common/djangoapps/student/models.py b/common/djangoapps/student/models.py index a0245e74e0..8f1a467ddd 100644 --- a/common/djangoapps/student/models.py +++ b/common/djangoapps/student/models.py @@ -21,6 +21,7 @@ from datetime import datetime, timedelta from functools import total_ordering from importlib import import_module from urllib.parse import urlencode +import warnings import six from config_models.models import ConfigurationModel @@ -153,12 +154,12 @@ class AnonymousUserId(models.Model): course_id = LearningContextKeyField(db_index=True, max_length=255, blank=True) -def anonymous_id_for_user(user, course_id, save=True): +def anonymous_id_for_user(user, course_id, save='DEPRECATED'): """ Inputs: user: User model course_id: string or None - save: Whether the id should be saved in an AnonymousUserId object, defaults to True + save: Deprecated and ignored: ID is always saved in an AnonymousUserId object Return a unique id for a (user, course_id) pair, suitable for inserting into e.g. personalized survey links. @@ -171,28 +172,56 @@ def anonymous_id_for_user(user, course_id, save=True): # This part is for ability to get xblock instance in xblock_noauth handlers, where user is unauthenticated. assert user + if save != 'DEPRECATED': + warnings.warn( + "anonymous_id_for_user no longer accepts save param and now " + "always saves the ID in the database", + DeprecationWarning + ) + if user.is_anonymous: return None # ARCHBOM-1674: Get a sense of what fraction of anonymous_user_id calls are # cached, stored in the DB, or retrieved from the DB. This will help inform - # us on decisions about whether we can move to always save IDs, - # pregenerate them, use random instead of deterministic IDs, etc. + # us on decisions about whether we can + # pregenerate IDs, use random instead of deterministic IDs, etc. monitoring.increment('temp_anon_uid_v2.requested') cached_id = getattr(user, '_anonymous_id', {}).get(course_id) if cached_id is not None: monitoring.increment('temp_anon_uid_v2.returned_from_cache') return cached_id - # check if an anonymous id already exists for this user and course_id combination + # Check if an anonymous id already exists for this user and + # course_id combination. Prefer the one with the highest record ID + # (see below.) anonymous_user_ids = AnonymousUserId.objects.filter(user=user).filter(course_id=course_id).order_by('-id') if anonymous_user_ids: # If there are multiple anonymous_user_ids per user, course_id pair - # select the row which was created most recently + # select the row which was created most recently. + # There might be more than one if the Django SECRET_KEY had + # previously been rotated at a time before this function was + # changed to always save the generated IDs to the DB. In that + # case, just pick the one with the highest record ID, which is + # probably the most recently created one. anonymous_user_id = anonymous_user_ids[0].anonymous_user_id monitoring.increment('temp_anon_uid_v2.fetched_existing') else: - # include the secret key as a salt, and to make the ids unique across different LMS installs. + # Uses SECRET_KEY as a cryptographic pepper. This + # deterministic ID generation means that concurrent identical + # calls to this function return the same value -- no need for + # locking. (There may be a low level of integrity errors on + # creation as a result of concurrent duplicate row inserts.) + # + # Consequences for this function of SECRET_KEY exposure: Data + # researchers and other third parties receiving these + # anonymous user IDs would be able to identify users across + # courses, and predict the anonymous user IDs of all users + # (but not necessarily identify their accounts.) + # + # Rotation process of SECRET_KEY with respect to this + # function: Rotate at will, since the hashes are stored and + # will not change. hasher = hashlib.md5() hasher.update(settings.SECRET_KEY.encode('utf8')) hasher.update(text_type(user.id).encode('utf8')) @@ -200,20 +229,17 @@ def anonymous_id_for_user(user, course_id, save=True): hasher.update(text_type(course_id).encode('utf-8')) anonymous_user_id = hasher.hexdigest() - if save is True: - try: - AnonymousUserId.objects.create( - user=user, - course_id=course_id, - anonymous_user_id=anonymous_user_id, - ) - monitoring.increment('temp_anon_uid_v2.stored') - except IntegrityError: - # Another thread has already created this entry, so - # continue - monitoring.increment('temp_anon_uid_v2.store_db_error') - else: - monitoring.increment('temp_anon_uid_v2.computed_unsaved') + try: + AnonymousUserId.objects.create( + user=user, + course_id=course_id, + anonymous_user_id=anonymous_user_id, + ) + monitoring.increment('temp_anon_uid_v2.stored') + except IntegrityError: + # Another thread has already created this entry, so + # continue + monitoring.increment('temp_anon_uid_v2.store_db_error') # cache the anonymous_id in the user object if not hasattr(user, '_anonymous_id'): @@ -829,17 +855,23 @@ class UserSignupSource(models.Model): site = models.CharField(max_length=255, db_index=True) -def unique_id_for_user(user, save=True): +def unique_id_for_user(user, save='DEPRECATED'): """ Return a unique id for a user, suitable for inserting into e.g. personalized survey links. Keyword arguments: - save -- Whether the id should be saved in an AnonymousUserId object. + save -- Deprecated and ignored: ID is always saved in an AnonymousUserId object """ + if save != 'DEPRECATED': + warnings.warn( + "unique_id_for_user no longer accepts save param and now " + "always saves the ID in the database", + DeprecationWarning + ) # Setting course_id to '' makes it not affect the generated hash, # and thus produce the old per-student anonymous id - return anonymous_id_for_user(user, None, save=save) + return anonymous_id_for_user(user, None) # TODO: Should be renamed to generic UserGroup, and possibly diff --git a/common/djangoapps/student/tests/tests.py b/common/djangoapps/student/tests/tests.py index cebb6d5552..53d7f06683 100644 --- a/common/djangoapps/student/tests/tests.py +++ b/common/djangoapps/student/tests/tests.py @@ -999,7 +999,7 @@ class AnonymousLookupTable(ModuleStoreTestCase): anonymous_id = anonymous_id_for_user(self.user, self.course.id) real_user = user_by_anonymous_id(anonymous_id) self.assertEqual(self.user, real_user) - self.assertEqual(anonymous_id, anonymous_id_for_user(self.user, self.course.id, save=False)) + self.assertEqual(anonymous_id, anonymous_id_for_user(self.user, self.course.id)) def test_roundtrip_with_unicode_course_id(self): course2 = CourseFactory.create(display_name=u"Omega Course Ω") @@ -1007,7 +1007,7 @@ class AnonymousLookupTable(ModuleStoreTestCase): anonymous_id = anonymous_id_for_user(self.user, course2.id) real_user = user_by_anonymous_id(anonymous_id) self.assertEqual(self.user, real_user) - self.assertEqual(anonymous_id, anonymous_id_for_user(self.user, course2.id, save=False)) + self.assertEqual(anonymous_id, anonymous_id_for_user(self.user, course2.id)) def test_anonymous_id_secret_key_changes_do_not_change_existing_anonymous_ids(self): """Test that a same anonymous id is returned when the SECRET_KEY changes.""" diff --git a/common/djangoapps/xblock_django/tests/test_user_service.py b/common/djangoapps/xblock_django/tests/test_user_service.py index ff6d148673..871ec11477 100644 --- a/common/djangoapps/xblock_django/tests/test_user_service.py +++ b/common/djangoapps/xblock_django/tests/test_user_service.py @@ -105,8 +105,7 @@ class UserServiceTestCase(TestCase): course_key = CourseKey.from_string('edX/toy/2012_Fall') anon_user_id = anonymous_id_for_user( user=self.user, - course_id=course_key, - save=True + course_id=course_key ) django_user_service = DjangoXBlockUserService(self.user, user_is_staff=True) diff --git a/common/djangoapps/xblock_django/user_service.py b/common/djangoapps/xblock_django/user_service.py index bd341089a5..bf3643ff36 100644 --- a/common/djangoapps/xblock_django/user_service.py +++ b/common/djangoapps/xblock_django/user_service.py @@ -66,7 +66,7 @@ class DjangoXBlockUserService(UserService): return None course_id = CourseKey.from_string(course_id) - return anonymous_id_for_user(user=user, course_id=course_id, save=False) + return anonymous_id_for_user(user=user, course_id=course_id) def _convert_django_user_to_xblock_user(self, django_user): """ diff --git a/lms/djangoapps/instructor/views/api.py b/lms/djangoapps/instructor/views/api.py index 26574c5e80..ffa0ce20c4 100644 --- a/lms/djangoapps/instructor/views/api.py +++ b/lms/djangoapps/instructor/views/api.py @@ -1413,7 +1413,7 @@ def get_anon_ids(request, course_id): courseenrollment__course_id=course_id, ).order_by('id') header = ['User ID', 'Anonymized User ID', 'Course Specific Anonymized User ID'] - rows = [[s.id, unique_id_for_user(s, save=False), anonymous_id_for_user(s, course_id, save=False)] + rows = [[s.id, unique_id_for_user(s), anonymous_id_for_user(s, course_id)] for s in students] return csv_response(text_type(course_id).replace('/', '-') + '-anon-ids.csv', header, rows) diff --git a/lms/djangoapps/teams/api.py b/lms/djangoapps/teams/api.py index 4e91892b9c..f0f0fe8668 100644 --- a/lms/djangoapps/teams/api.py +++ b/lms/djangoapps/teams/api.py @@ -392,7 +392,7 @@ def anonymous_user_ids_for_team(user, team): )) return sorted([ - anonymous_id_for_user(user=team_member, course_id=team.course_id, save=True) + anonymous_id_for_user(user=team_member, course_id=team.course_id) for team_member in team.users.all() ])