From ff0c1d57daec075390262f42487e170413f8a813 Mon Sep 17 00:00:00 2001 From: Jeff Cohen Date: Tue, 8 Nov 2022 10:32:08 -0500 Subject: [PATCH] fix: only create one StudentModuleHistory record per request (#31262) When a student submits a problem answer, the state is stored in a StudentModule record containing answer, score, correctness, etc. The record, though, is updated in multiple steps within the single request (first the grade is updated, then the state is updated separately). Each partial save would trigger a separate StudentModuleHistory record to be stored resulting in duplicate and inaccurate historical records. This solution uses the RequestCache to track within a request thread which StudentModules are updated and a single corresponding StudentModuleHistory id. If multiple update actions occur within the request cycle, then modify the history record that was already generated to ensure that each submission only results in one StudentModuleHistory record. This issue and its solution were discussed in: https://discuss.openedx.org/t/extra-history-record-stored-on-each-problem-submission/8081 --- lms/djangoapps/courseware/models.py | 59 +++++++++++++++---- .../courseware/tests/test_model_data.py | 10 ++-- .../tests/test_submitting_problems.py | 4 +- .../tests/test_user_state_client.py | 18 ++++++ lms/djangoapps/courseware/tests/test_views.py | 18 ++++-- .../coursewarehistoryextended/models.py | 13 ++-- 6 files changed, 92 insertions(+), 30 deletions(-) diff --git a/lms/djangoapps/courseware/models.py b/lms/djangoapps/courseware/models.py index aebfcb1697..a1ec90a06b 100644 --- a/lms/djangoapps/courseware/models.py +++ b/lms/djangoapps/courseware/models.py @@ -25,6 +25,7 @@ from django.db import models from django.db.models.signals import post_save from django.utils.translation import gettext_lazy as _ +from edx_django_utils.cache.utils import RequestCache from model_utils.models import TimeStampedModel from opaque_keys.edx.django.models import BlockTypeKeyField, CourseKeyField, LearningContextKeyField, UsageKeyField from lms.djangoapps.courseware.fields import UnsignedBigIntAutoField @@ -233,6 +234,47 @@ class BaseStudentModuleHistory(models.Model): return history_entries + @staticmethod + def save_history_entry(student_module, history_model_cls, request_cache_key): + """ + When a StudentModule instance is updated, save the changes in the corresponding activity history table + """ + if student_module.module_type in history_model_cls.HISTORY_SAVING_TYPES: + request_cache = RequestCache('studentmodulehistory') + history_entry = None + # To avoid duplicate history records within one request context, check if the student module instance + # has already been updated and generated a history record. If so, then update that history record rather + # than creating a new one + request_smh_cache = request_cache.get_cached_response(request_cache_key).get_value_or_default({}) + if student_module.id in request_smh_cache: + smh_id = request_smh_cache[student_module.id] + try: + history_entry = history_model_cls.objects.get(id=smh_id) + except history_model_cls.DoesNotExist: + log.error( + "Cached {} instance does not exist: {}({}) for StudentModule({})".format( + history_model_cls.__name__, history_model_cls.__name__, smh_id, student_module.id + ) + ) + + # If no StudentModuleHistory has been created yet during this request, then create a new one + if not history_entry: + history_entry = history_model_cls(student_module=student_module, version=None) + + # Regardless of whether this is a new or existing StudentModuleHistory, set its values to match the current + # state of the StudentModule record + history_entry.created = student_module.modified + history_entry.state = student_module.state + history_entry.grade = student_module.grade + history_entry.max_grade = student_module.max_grade + history_entry.save() + + # Update the RequestCache to map this StudentModule to the StudentModuleHistory for this request cycle + # Only cache the id rather than the full object in order to be conscientious of space as the number of + # records modified in one request can get large + request_cache.setdefault(request_cache_key, {}) + request_cache.data[request_cache_key][student_module.id] = history_entry.id + class StudentModuleHistory(BaseStudentModuleHistory): """Keeps a complete history of state changes for a given XModule for a given @@ -251,19 +293,14 @@ class StudentModuleHistory(BaseStudentModuleHistory): def save_history(sender, instance, **kwargs): # pylint: disable=no-self-argument, unused-argument """ Checks the instance's module_type, and creates & saves a - StudentModuleHistoryExtended entry if the module_type is one that + StudentModuleHistory entry if the module_type is one that we save. """ - if instance.module_type in StudentModuleHistory.HISTORY_SAVING_TYPES: - history_entry = StudentModuleHistory( - student_module=instance, - version=None, - created=instance.modified, - state=instance.state, - grade=instance.grade, - max_grade=instance.max_grade - ) - history_entry.save() + BaseStudentModuleHistory.save_history_entry( + instance, + StudentModuleHistory, + "lms.djangoapps.courseware.models.student_module_history_map" + ) # When the extended studentmodulehistory table exists, don't save # duplicate history into courseware_studentmodulehistory, just retain diff --git a/lms/djangoapps/courseware/tests/test_model_data.py b/lms/djangoapps/courseware/tests/test_model_data.py index acd639a2e0..36c0a87872 100644 --- a/lms/djangoapps/courseware/tests/test_model_data.py +++ b/lms/djangoapps/courseware/tests/test_model_data.py @@ -150,7 +150,7 @@ class TestStudentModuleStorage(OtherUserFailureTestMixin, TestCase): # has written to the StudentModule (such as UserStateCache setting the score # on the StudentModule). with self.assertNumQueries(4, using='default'): - with self.assertNumQueries(1, using='student_module_history'): + with self.assertNumQueries(2, using='student_module_history'): self.kvs.set(user_state_key('a_field'), 'new_value') assert 1 == StudentModule.objects.all().count() assert {'b_field': 'b_value', 'a_field': 'new_value'} == json.loads(StudentModule.objects.all()[0].state) @@ -164,7 +164,7 @@ class TestStudentModuleStorage(OtherUserFailureTestMixin, TestCase): # has written to the StudentModule (such as UserStateCache setting the score # on the StudentModule). with self.assertNumQueries(4, using='default'): - with self.assertNumQueries(1, using='student_module_history'): + with self.assertNumQueries(2, using='student_module_history'): self.kvs.set(user_state_key('not_a_field'), 'new_value') assert 1 == StudentModule.objects.all().count() assert {'b_field': 'b_value', 'a_field': 'a_value', 'not_a_field': 'new_value'} == json.loads(StudentModule.objects.all()[0].state) @@ -178,7 +178,7 @@ class TestStudentModuleStorage(OtherUserFailureTestMixin, TestCase): # has written to the StudentModule (such as UserStateCache setting the score # on the StudentModule). with self.assertNumQueries(2, using='default'): - with self.assertNumQueries(1, using='student_module_history'): + with self.assertNumQueries(2, using='student_module_history'): self.kvs.delete(user_state_key('a_field')) assert 1 == StudentModule.objects.all().count() self.assertRaises(KeyError, self.kvs.get, user_state_key('not_a_field')) @@ -219,7 +219,7 @@ class TestStudentModuleStorage(OtherUserFailureTestMixin, TestCase): # DjangoXBlockUserStateClient has written to the StudentModule (such as # UserStateCache setting the score on the StudentModule). with self.assertNumQueries(4, using="default"): - with self.assertNumQueries(1, using="student_module_history"): + with self.assertNumQueries(2, using="student_module_history"): self.kvs.set_many(kv_dict) for key in kv_dict: @@ -276,7 +276,7 @@ class TestMissingStudentModule(TestCase): # lint-amnesty, pylint: disable=missi # on the StudentModule). # Django 1.8 also has a number of other BEGIN and SAVESTATE queries. with self.assertNumQueries(4, using='default'): - with self.assertNumQueries(1, using='student_module_history'): + with self.assertNumQueries(2, using='student_module_history'): self.kvs.set(user_state_key('a_field'), 'a_value') assert 1 == sum(len(cache) for cache in self.field_data_cache.cache.values()) diff --git a/lms/djangoapps/courseware/tests/test_submitting_problems.py b/lms/djangoapps/courseware/tests/test_submitting_problems.py index dc5ff1cc1b..8be60877c3 100644 --- a/lms/djangoapps/courseware/tests/test_submitting_problems.py +++ b/lms/djangoapps/courseware/tests/test_submitting_problems.py @@ -497,14 +497,14 @@ class TestCourseGrader(TestSubmittingProblems): ) # count how many state history entries there are baseline = BaseStudentModuleHistory.get_history(student_module) - assert len(baseline) == 2 + assert len(baseline) == 1 # now click "show answer" self.show_question_answer('p1') # check that we don't have more state history entries csmh = BaseStudentModuleHistory.get_history(student_module) - assert len(csmh) == 2 + assert len(csmh) == 1 def test_grade_with_collected_max_score(self): """ diff --git a/lms/djangoapps/courseware/tests/test_user_state_client.py b/lms/djangoapps/courseware/tests/test_user_state_client.py index 89071d2c7f..c8d9337f05 100644 --- a/lms/djangoapps/courseware/tests/test_user_state_client.py +++ b/lms/djangoapps/courseware/tests/test_user_state_client.py @@ -36,3 +36,21 @@ class TestDjangoUserStateClient(UserStateClientTestBase, ModuleStoreTestCase): super().setUp() self.client = DjangoXBlockUserStateClient() self.users = defaultdict(UserFactory.create) + + def test_history_after_delete(self): + """ + Changes made in the edx-platform repo broke this test in the edx-user-state-client repo. + Getting the tests and code in sync is a three step process: + 1. Override the test here to make it a no-op and merge this code + 2. Update the test in the other repo to align with the new functionality + 3. Remove this override to re-enable the working test + """ + + def test_multiple_history_entries(self): + """ + Changes made in the edx-platform repo broke this test in the edx-user-state-client repo. + Getting the tests and code in sync is a three step process: + 1. Override the test here to make it a no-op and merge this code + 2. Update the test in the other repo to align with the new functionality + 3. Remove this override to re-enable the working test + """ diff --git a/lms/djangoapps/courseware/tests/test_views.py b/lms/djangoapps/courseware/tests/test_views.py index 7e1e327cbf..4849892bd6 100644 --- a/lms/djangoapps/courseware/tests/test_views.py +++ b/lms/djangoapps/courseware/tests/test_views.py @@ -22,6 +22,7 @@ from django.test import RequestFactory, TestCase from django.test.client import Client from django.test.utils import override_settings from django.urls import reverse, reverse_lazy +from edx_django_utils.cache.utils import RequestCache from edx_toggles.toggles.testutils import override_waffle_flag from freezegun import freeze_time from opaque_keys.edx.keys import CourseKey, UsageKey @@ -772,6 +773,15 @@ class ViewsTestCase(BaseViewsTestCase): set_score(admin.id, usage_key, 0, 3) + # Typically an http request has its own thread and RequestCache which is used to ensure that only + # one StudentModuleHistory record is created per request. However, since tests are run locally + # the score updates share the same thread. If the RequestCache is not cleared in between the two + # activities, then there will only be one StudentModuleHistory record that reflects the final state + # rather than one per action. Clearing the cache here allows the second action below to generate + # its own history record. The assertions below are then able to check that both states are recorded + # in the history. + RequestCache('studentmodulehistory').clear() + state_client.set( username=admin.username, block_key=usage_key, @@ -787,17 +797,17 @@ class ViewsTestCase(BaseViewsTestCase): response = self.client.get(url) response_content = html.unescape(response.content.decode('utf-8')) - # We have update the state 4 times: twice to change content, and twice - # to set the scores. We'll check that the identifying content from each is + # We have update the state 2 times. We'll check that the identifying content from each is # displayed (but not the order), and also the indexes assigned in the output - # #1 - #4 + # #1 - #2 assert '#1' in response_content assert json.dumps({'field_a': 'a', 'field_b': 'b'}, sort_keys=True, indent=2) in response_content assert 'Score: 0.0 / 3.0' in response_content assert json.dumps({'field_a': 'x', 'field_b': 'y'}, sort_keys=True, indent=2) in response_content assert 'Score: 3.0 / 3.0' in response_content - assert '#4' in response_content + assert '#2' in response_content + assert '#3' not in response_content @ddt.data(('America/New_York', -5), # UTC - 5 ('Asia/Pyongyang', 9), # UTC + 9 diff --git a/lms/djangoapps/coursewarehistoryextended/models.py b/lms/djangoapps/coursewarehistoryextended/models.py index 59d1002b40..37de4a93ab 100644 --- a/lms/djangoapps/coursewarehistoryextended/models.py +++ b/lms/djangoapps/coursewarehistoryextended/models.py @@ -46,14 +46,11 @@ class StudentModuleHistoryExtended(BaseStudentModuleHistory): StudentModuleHistoryExtended entry if the module_type is one that we save. """ - if instance.module_type in StudentModuleHistoryExtended.HISTORY_SAVING_TYPES: - history_entry = StudentModuleHistoryExtended(student_module=instance, - version=None, - created=instance.modified, - state=instance.state, - grade=instance.grade, - max_grade=instance.max_grade) - history_entry.save() + BaseStudentModuleHistory.save_history_entry( + instance, + StudentModuleHistoryExtended, + "lms.djangoapps.coursewarehistoryextended.models.student_module_history_extended_map" + ) @receiver(post_delete, sender=StudentModule) def delete_history(sender, instance, **kwargs): # pylint: disable=no-self-argument, unused-argument