From cd6064692681ab99912e3da3721cd857a0b313e9 Mon Sep 17 00:00:00 2001 From: Manjinder Singh <49171515+jinder1s@users.noreply.github.com> Date: Wed, 10 Feb 2021 07:37:27 -0500 Subject: [PATCH] fix: Switch anonymous user ID hash from md5 to shake (#26198) Now that we always return an existing value from the DB rather than trusting that ID generation is deterministic and constant over time, we're free to change the generation algorithm. Our long term goal is to switch to random IDs, but we need to first investigate the uses of save=False. In the meantime, this is a good opportunity to move away from MD5, which has a number of cryptographic weaknesses. None of the known vulnerabilities are considered exploitable in this location, given the limited ability to control the input to the hash, but we should generally be moving away from it everywhere for consistency. This change should not be breaking even for save=False callers, since those calls are extremely rare (1 in 100,000) and should only occur after a save=True call, at which point they'll use the stored value. Even if this were not true, for a save=False/True pair of calls to result in a mismatch in output, the first of the calls would have to occur around the time of the deploy of this code. Co-authored-by: Tim McCormack Co-authored-by: Tim McCormack --- common/djangoapps/student/models.py | 5 +++-- lms/djangoapps/courseware/tests/test_module_render.py | 6 +++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/common/djangoapps/student/models.py b/common/djangoapps/student/models.py index 8f1a467ddd..86177ee397 100644 --- a/common/djangoapps/student/models.py +++ b/common/djangoapps/student/models.py @@ -222,12 +222,13 @@ def anonymous_id_for_user(user, course_id, save='DEPRECATED'): # Rotation process of SECRET_KEY with respect to this # function: Rotate at will, since the hashes are stored and # will not change. - hasher = hashlib.md5() + # include the secret key as a salt, and to make the ids unique across different LMS installs. + hasher = hashlib.shake_128() hasher.update(settings.SECRET_KEY.encode('utf8')) hasher.update(text_type(user.id).encode('utf8')) if course_id: hasher.update(text_type(course_id).encode('utf-8')) - anonymous_user_id = hasher.hexdigest() + anonymous_user_id = hasher.hexdigest(16) # pylint: disable=too-many-function-args try: AnonymousUserId.objects.create( diff --git a/lms/djangoapps/courseware/tests/test_module_render.py b/lms/djangoapps/courseware/tests/test_module_render.py index 04c6461269..ef01e42fa1 100644 --- a/lms/djangoapps/courseware/tests/test_module_render.py +++ b/lms/djangoapps/courseware/tests/test_module_render.py @@ -1991,7 +1991,7 @@ class TestAnonymousStudentId(SharedModuleStoreTestCase, LoginEnrollmentTestCase) self.assertEqual( # This value is set by observation, so that later changes to the student # id computation don't break old data - '5afe5d9bb03796557ee2614f5c9611fb', + 'de619ab51c7f4e9c7216b4644c24f3b5', self._get_anonymous_id(CourseKey.from_string(course_id), descriptor_class) ) @@ -2000,14 +2000,14 @@ class TestAnonymousStudentId(SharedModuleStoreTestCase, LoginEnrollmentTestCase) self.assertEqual( # This value is set by observation, so that later changes to the student # id computation don't break old data - 'e3b0b940318df9c14be59acb08e78af5', + '0c706d119cad686d28067412b9178454', self._get_anonymous_id(CourseKey.from_string('MITx/6.00x/2012_Fall'), descriptor_class) ) self.assertEqual( # This value is set by observation, so that later changes to the student # id computation don't break old data - 'f82b5416c9f54b5ce33989511bb5ef2e', + 'e9969c28c12c8efa6e987d6dbeedeb0b', self._get_anonymous_id(CourseKey.from_string('MITx/6.00x/2013_Spring'), descriptor_class) )