fix: only create one StudentModuleHistory record per request (#31262)

When a student submits a problem answer, the state is stored in a
StudentModule record containing answer, score, correctness, etc. The
record, though, is updated in multiple steps within the single request
(first the grade is updated, then the state is updated separately).
Each partial save would trigger a separate StudentModuleHistory record
to be stored resulting in duplicate and inaccurate historical records.

This solution uses the RequestCache to track within a request thread
which StudentModules are updated and a single corresponding
StudentModuleHistory id. If multiple update actions occur within the
request cycle, then modify the history record that was already
generated to ensure that each submission only results in one
StudentModuleHistory record.

This issue and its solution were discussed in:

  https://discuss.openedx.org/t/extra-history-record-stored-on-each-problem-submission/8081
This commit is contained in:
Jeff Cohen
2022-11-08 10:32:08 -05:00
committed by GitHub
parent f03b141f05
commit ff0c1d57da
6 changed files with 92 additions and 30 deletions

View File

@@ -25,6 +25,7 @@ from django.db import models
from django.db.models.signals import post_save
from django.utils.translation import gettext_lazy as _
from edx_django_utils.cache.utils import RequestCache
from model_utils.models import TimeStampedModel
from opaque_keys.edx.django.models import BlockTypeKeyField, CourseKeyField, LearningContextKeyField, UsageKeyField
from lms.djangoapps.courseware.fields import UnsignedBigIntAutoField
@@ -233,6 +234,47 @@ class BaseStudentModuleHistory(models.Model):
return history_entries
@staticmethod
def save_history_entry(student_module, history_model_cls, request_cache_key):
"""
When a StudentModule instance is updated, save the changes in the corresponding activity history table
"""
if student_module.module_type in history_model_cls.HISTORY_SAVING_TYPES:
request_cache = RequestCache('studentmodulehistory')
history_entry = None
# To avoid duplicate history records within one request context, check if the student module instance
# has already been updated and generated a history record. If so, then update that history record rather
# than creating a new one
request_smh_cache = request_cache.get_cached_response(request_cache_key).get_value_or_default({})
if student_module.id in request_smh_cache:
smh_id = request_smh_cache[student_module.id]
try:
history_entry = history_model_cls.objects.get(id=smh_id)
except history_model_cls.DoesNotExist:
log.error(
"Cached {} instance does not exist: {}({}) for StudentModule({})".format(
history_model_cls.__name__, history_model_cls.__name__, smh_id, student_module.id
)
)
# If no StudentModuleHistory has been created yet during this request, then create a new one
if not history_entry:
history_entry = history_model_cls(student_module=student_module, version=None)
# Regardless of whether this is a new or existing StudentModuleHistory, set its values to match the current
# state of the StudentModule record
history_entry.created = student_module.modified
history_entry.state = student_module.state
history_entry.grade = student_module.grade
history_entry.max_grade = student_module.max_grade
history_entry.save()
# Update the RequestCache to map this StudentModule to the StudentModuleHistory for this request cycle
# Only cache the id rather than the full object in order to be conscientious of space as the number of
# records modified in one request can get large
request_cache.setdefault(request_cache_key, {})
request_cache.data[request_cache_key][student_module.id] = history_entry.id
class StudentModuleHistory(BaseStudentModuleHistory):
"""Keeps a complete history of state changes for a given XModule for a given
@@ -251,19 +293,14 @@ class StudentModuleHistory(BaseStudentModuleHistory):
def save_history(sender, instance, **kwargs): # pylint: disable=no-self-argument, unused-argument
"""
Checks the instance's module_type, and creates & saves a
StudentModuleHistoryExtended entry if the module_type is one that
StudentModuleHistory entry if the module_type is one that
we save.
"""
if instance.module_type in StudentModuleHistory.HISTORY_SAVING_TYPES:
history_entry = StudentModuleHistory(
student_module=instance,
version=None,
created=instance.modified,
state=instance.state,
grade=instance.grade,
max_grade=instance.max_grade
)
history_entry.save()
BaseStudentModuleHistory.save_history_entry(
instance,
StudentModuleHistory,
"lms.djangoapps.courseware.models.student_module_history_map"
)
# When the extended studentmodulehistory table exists, don't save
# duplicate history into courseware_studentmodulehistory, just retain

View File

@@ -150,7 +150,7 @@ class TestStudentModuleStorage(OtherUserFailureTestMixin, TestCase):
# has written to the StudentModule (such as UserStateCache setting the score
# on the StudentModule).
with self.assertNumQueries(4, using='default'):
with self.assertNumQueries(1, using='student_module_history'):
with self.assertNumQueries(2, using='student_module_history'):
self.kvs.set(user_state_key('a_field'), 'new_value')
assert 1 == StudentModule.objects.all().count()
assert {'b_field': 'b_value', 'a_field': 'new_value'} == json.loads(StudentModule.objects.all()[0].state)
@@ -164,7 +164,7 @@ class TestStudentModuleStorage(OtherUserFailureTestMixin, TestCase):
# has written to the StudentModule (such as UserStateCache setting the score
# on the StudentModule).
with self.assertNumQueries(4, using='default'):
with self.assertNumQueries(1, using='student_module_history'):
with self.assertNumQueries(2, using='student_module_history'):
self.kvs.set(user_state_key('not_a_field'), 'new_value')
assert 1 == StudentModule.objects.all().count()
assert {'b_field': 'b_value', 'a_field': 'a_value', 'not_a_field': 'new_value'} == json.loads(StudentModule.objects.all()[0].state)
@@ -178,7 +178,7 @@ class TestStudentModuleStorage(OtherUserFailureTestMixin, TestCase):
# has written to the StudentModule (such as UserStateCache setting the score
# on the StudentModule).
with self.assertNumQueries(2, using='default'):
with self.assertNumQueries(1, using='student_module_history'):
with self.assertNumQueries(2, using='student_module_history'):
self.kvs.delete(user_state_key('a_field'))
assert 1 == StudentModule.objects.all().count()
self.assertRaises(KeyError, self.kvs.get, user_state_key('not_a_field'))
@@ -219,7 +219,7 @@ class TestStudentModuleStorage(OtherUserFailureTestMixin, TestCase):
# DjangoXBlockUserStateClient has written to the StudentModule (such as
# UserStateCache setting the score on the StudentModule).
with self.assertNumQueries(4, using="default"):
with self.assertNumQueries(1, using="student_module_history"):
with self.assertNumQueries(2, using="student_module_history"):
self.kvs.set_many(kv_dict)
for key in kv_dict:
@@ -276,7 +276,7 @@ class TestMissingStudentModule(TestCase): # lint-amnesty, pylint: disable=missi
# on the StudentModule).
# Django 1.8 also has a number of other BEGIN and SAVESTATE queries.
with self.assertNumQueries(4, using='default'):
with self.assertNumQueries(1, using='student_module_history'):
with self.assertNumQueries(2, using='student_module_history'):
self.kvs.set(user_state_key('a_field'), 'a_value')
assert 1 == sum(len(cache) for cache in self.field_data_cache.cache.values())

View File

@@ -497,14 +497,14 @@ class TestCourseGrader(TestSubmittingProblems):
)
# count how many state history entries there are
baseline = BaseStudentModuleHistory.get_history(student_module)
assert len(baseline) == 2
assert len(baseline) == 1
# now click "show answer"
self.show_question_answer('p1')
# check that we don't have more state history entries
csmh = BaseStudentModuleHistory.get_history(student_module)
assert len(csmh) == 2
assert len(csmh) == 1
def test_grade_with_collected_max_score(self):
"""

View File

@@ -36,3 +36,21 @@ class TestDjangoUserStateClient(UserStateClientTestBase, ModuleStoreTestCase):
super().setUp()
self.client = DjangoXBlockUserStateClient()
self.users = defaultdict(UserFactory.create)
def test_history_after_delete(self):
"""
Changes made in the edx-platform repo broke this test in the edx-user-state-client repo.
Getting the tests and code in sync is a three step process:
1. Override the test here to make it a no-op and merge this code
2. Update the test in the other repo to align with the new functionality
3. Remove this override to re-enable the working test
"""
def test_multiple_history_entries(self):
"""
Changes made in the edx-platform repo broke this test in the edx-user-state-client repo.
Getting the tests and code in sync is a three step process:
1. Override the test here to make it a no-op and merge this code
2. Update the test in the other repo to align with the new functionality
3. Remove this override to re-enable the working test
"""

View File

@@ -22,6 +22,7 @@ from django.test import RequestFactory, TestCase
from django.test.client import Client
from django.test.utils import override_settings
from django.urls import reverse, reverse_lazy
from edx_django_utils.cache.utils import RequestCache
from edx_toggles.toggles.testutils import override_waffle_flag
from freezegun import freeze_time
from opaque_keys.edx.keys import CourseKey, UsageKey
@@ -772,6 +773,15 @@ class ViewsTestCase(BaseViewsTestCase):
set_score(admin.id, usage_key, 0, 3)
# Typically an http request has its own thread and RequestCache which is used to ensure that only
# one StudentModuleHistory record is created per request. However, since tests are run locally
# the score updates share the same thread. If the RequestCache is not cleared in between the two
# activities, then there will only be one StudentModuleHistory record that reflects the final state
# rather than one per action. Clearing the cache here allows the second action below to generate
# its own history record. The assertions below are then able to check that both states are recorded
# in the history.
RequestCache('studentmodulehistory').clear()
state_client.set(
username=admin.username,
block_key=usage_key,
@@ -787,17 +797,17 @@ class ViewsTestCase(BaseViewsTestCase):
response = self.client.get(url)
response_content = html.unescape(response.content.decode('utf-8'))
# We have update the state 4 times: twice to change content, and twice
# to set the scores. We'll check that the identifying content from each is
# We have update the state 2 times. We'll check that the identifying content from each is
# displayed (but not the order), and also the indexes assigned in the output
# #1 - #4
# #1 - #2
assert '#1' in response_content
assert json.dumps({'field_a': 'a', 'field_b': 'b'}, sort_keys=True, indent=2) in response_content
assert 'Score: 0.0 / 3.0' in response_content
assert json.dumps({'field_a': 'x', 'field_b': 'y'}, sort_keys=True, indent=2) in response_content
assert 'Score: 3.0 / 3.0' in response_content
assert '#4' in response_content
assert '#2' in response_content
assert '#3' not in response_content
@ddt.data(('America/New_York', -5), # UTC - 5
('Asia/Pyongyang', 9), # UTC + 9

View File

@@ -46,14 +46,11 @@ class StudentModuleHistoryExtended(BaseStudentModuleHistory):
StudentModuleHistoryExtended entry if the module_type is one that
we save.
"""
if instance.module_type in StudentModuleHistoryExtended.HISTORY_SAVING_TYPES:
history_entry = StudentModuleHistoryExtended(student_module=instance,
version=None,
created=instance.modified,
state=instance.state,
grade=instance.grade,
max_grade=instance.max_grade)
history_entry.save()
BaseStudentModuleHistory.save_history_entry(
instance,
StudentModuleHistoryExtended,
"lms.djangoapps.coursewarehistoryextended.models.student_module_history_extended_map"
)
@receiver(post_delete, sender=StudentModule)
def delete_history(sender, instance, **kwargs): # pylint: disable=no-self-argument, unused-argument