LEARNER-5123 | Cache VisibleBlocks per course AND user, only fetch the ones the user needs for subsection scoring.

This commit is contained in:
Alex Dusenbery
2018-05-10 16:54:19 -04:00
committed by Alex Dusenbery
parent 62865599c7
commit ef477ee2a4
6 changed files with 85 additions and 87 deletions

View File

@@ -153,31 +153,39 @@ class VisibleBlocks(models.Model):
return BlockRecordList.from_json(self.blocks_json)
@classmethod
def bulk_read(cls, course_key):
def bulk_read(cls, user_id, course_key):
"""
Reads and returns all visible block records for the given course from
the cache. The cache is initialize with the visible blocks for this
course if no entry currently exists.has no entry for this course,
the cache is updated.
Reads and returns all visible block records for the given user and course from
the cache. The cache is initialized with the visible blocks for this user and
course if no entry currently exists.
Arguments:
course_key: The course identifier for the desired records
"""
prefetched = get_cache(cls._CACHE_NAMESPACE).get(cls._cache_key(course_key), None)
prefetched = get_cache(cls._CACHE_NAMESPACE).get(cls._cache_key(user_id, course_key), None)
if prefetched is None:
prefetched = cls._initialize_cache(course_key)
prefetched = cls._initialize_cache(user_id, course_key)
return prefetched
@classmethod
def cached_get_or_create(cls, blocks):
prefetched = get_cache(cls._CACHE_NAMESPACE).get(cls._cache_key(blocks.course_key))
def cached_get_or_create(cls, user_id, blocks):
"""
Given a ``user_id`` and a ``BlockRecordList`` object, attempts to
fetch the related VisibleBlocks model from the request cache. This
will create and save a new ``VisibleBlocks`` record if no record
exists corresponding to the hash_value of ``blocks``.
"""
prefetched = get_cache(cls._CACHE_NAMESPACE).get(cls._cache_key(user_id, blocks.course_key))
if prefetched is not None:
model = prefetched.get(blocks.hash_value)
if not model:
model = cls.objects.create(
# We still have to do a get_or_create, because
# another user may have had this block hash created,
# even if the user we checked the cache for hasn't yet.
model, _ = cls.objects.get_or_create(
hashed=blocks.hash_value, blocks_json=blocks.json_value, course_id=blocks.course_key,
)
cls._update_cache(blocks.course_key, [model])
cls._update_cache(user_id, blocks.course_key, [model])
else:
model, _ = cls.objects.get_or_create(
hashed=blocks.hash_value,
@@ -186,7 +194,7 @@ class VisibleBlocks(models.Model):
return model
@classmethod
def bulk_create(cls, course_key, block_record_lists):
def bulk_create(cls, user_id, course_key, block_record_lists):
"""
Bulk creates VisibleBlocks for the given iterator of
BlockRecordList objects and updates the VisibleBlocks cache
@@ -201,44 +209,48 @@ class VisibleBlocks(models.Model):
)
for brl in block_record_lists
])
cls._update_cache(course_key, created)
cls._update_cache(user_id, course_key, created)
return created
@classmethod
def bulk_get_or_create(cls, block_record_lists, course_key):
def bulk_get_or_create(cls, user_id, course_key, block_record_lists):
"""
Bulk creates VisibleBlocks for the given iterator of
BlockRecordList objects for the given course_key, but
BlockRecordList objects for the given user and course_key, but
only for those that aren't already created.
"""
existent_records = cls.bulk_read(course_key)
non_existent_brls = {brl for brl in block_record_lists if brl.hash_value not in existent_records}
cls.bulk_create(course_key, non_existent_brls)
cached_records = cls.bulk_read(user_id, course_key)
non_existent_brls = {brl.hash_value for brl in block_record_lists if brl.hash_value not in cached_records}
cls.bulk_create(user_id, course_key, non_existent_brls)
@classmethod
def _initialize_cache(cls, course_key):
def _initialize_cache(cls, user_id, course_key):
"""
Prefetches visible blocks for the given course and stores in the cache.
Prefetches visible blocks for the given user and course and stores in the cache.
Returns a dictionary mapping hashes of these block records to the
block record objects.
"""
prefetched = {record.hashed: record for record in cls.objects.filter(course_id=course_key)}
get_cache(cls._CACHE_NAMESPACE)[cls._cache_key(course_key)] = prefetched
grades_with_blocks = PersistentSubsectionGrade.objects.select_related('visible_blocks').filter(
user_id=user_id,
course_id=course_key,
)
prefetched = {grade.visible_blocks.hashed: grade.visible_blocks for grade in grades_with_blocks}
get_cache(cls._CACHE_NAMESPACE)[cls._cache_key(user_id, course_key)] = prefetched
return prefetched
@classmethod
def _update_cache(cls, course_key, visible_blocks):
def _update_cache(cls, user_id, course_key, visible_blocks):
"""
Adds a specific set of visible blocks to the request cache.
This assumes that prefetch has already been called.
"""
get_cache(cls._CACHE_NAMESPACE)[cls._cache_key(course_key)].update(
get_cache(cls._CACHE_NAMESPACE)[cls._cache_key(user_id, course_key)].update(
{visible_block.hashed: visible_block for visible_block in visible_blocks}
)
@classmethod
def _cache_key(cls, course_key):
return u"visible_blocks_cache.{}".format(course_key)
def _cache_key(cls, user_id, course_key):
return u"visible_blocks_cache.{}.{}".format(course_key, user_id)
class PersistentSubsectionGrade(TimeStampedModel):
@@ -362,10 +374,11 @@ class PersistentSubsectionGrade(TimeStampedModel):
Wrapper for objects.update_or_create.
"""
cls._prepare_params(params)
VisibleBlocks.cached_get_or_create(params['visible_blocks'])
VisibleBlocks.cached_get_or_create(params['user_id'], params['visible_blocks'])
cls._prepare_params_visible_blocks_id(params)
cls._prepare_params_override(params)
# TODO: do we NEED to pop these?
first_attempted = params.pop('first_attempted')
user_id = params.pop('user_id')
usage_key = params.pop('usage_key')
@@ -394,7 +407,9 @@ class PersistentSubsectionGrade(TimeStampedModel):
PersistentSubsectionGradeOverride.prefetch(user_id, course_key)
map(cls._prepare_params, grade_params_iter)
VisibleBlocks.bulk_get_or_create([params['visible_blocks'] for params in grade_params_iter], course_key)
VisibleBlocks.bulk_get_or_create(
user_id, course_key, [params['visible_blocks'] for params in grade_params_iter]
)
map(cls._prepare_params_visible_blocks_id, grade_params_iter)
map(cls._prepare_params_override, grade_params_iter)
@@ -619,4 +634,4 @@ class PersistentSubsectionGradeOverride(models.Model):
def prefetch(user, course_key):
PersistentSubsectionGradeOverride.prefetch(user.id, course_key)
VisibleBlocks.bulk_read(course_key)
VisibleBlocks.bulk_read(user.id, course_key)

View File

@@ -27,7 +27,6 @@ from .config.waffle import DISABLE_REGRADE_ON_POLICY_CHANGE, waffle
from .constants import ScoreDatabaseTableEnum
from .course_grade_factory import CourseGradeFactory
from .exceptions import DatabaseNotReadyError
from .models import VisibleBlocks
from .services import GradesService
from .signals.signals import SUBSECTION_SCORE_CHANGED
from .subsection_grade_factory import SubsectionGradeFactory
@@ -41,7 +40,6 @@ KNOWN_RETRY_ERRORS = ( # Errors we expect occasionally, should be resolved on r
ValidationError,
DatabaseNotReadyError,
)
MAX_VISIBLE_BLOCKS_ALLOWED = 50000
RECALCULATE_GRADE_DELAY_SECONDS = 2 # to prevent excessive _has_db_updated failures. See TNL-6424.
RETRY_DELAY_SECONDS = 30
SUBSECTION_GRADE_TIMEOUT_SECONDS = 300
@@ -139,12 +137,6 @@ def recalculate_course_and_subsection_grades_for_user(self, **kwargs): # pylint
user = User.objects.get(id=user_id)
course_key = CourseKey.from_string(course_key_str)
# Hotfix to address LEARNER-5123, to be removed later
visible_blocks_count = VisibleBlocks.objects.filter(course_id=course_key)
if visible_blocks_count > MAX_VISIBLE_BLOCKS_ALLOWED:
message = '{} has too many VisibleBlocks to recalculate grades for {}'
raise Exception(message.format(course_key_str, user_id))
previous_course_grade = CourseGradeFactory().read(user, course_key=course_key)
if previous_course_grade and previous_course_grade.attempted:
CourseGradeFactory().update(

View File

@@ -97,56 +97,28 @@ class TestCourseGradeFactory(GradeTestBase):
with self.assertNumQueries(2), mock_get_score(1, 2):
_assert_read(expected_pass=False, expected_percent=0) # start off with grade of 0
# TODO: Remove Django 1.11 upgrade shim
# SHIM: Django 1.11 results in a few more SAVEPOINTs due to:
# https://github.com/django/django/commit/d44afd88#diff-5b0dda5eb9a242c15879dc9cd2121379L485
if django.VERSION >= (1, 11):
num_queries = 37
else:
num_queries = 29
num_queries = 40
with self.assertNumQueries(num_queries), mock_get_score(1, 2):
grade_factory.update(self.request.user, self.course, force_update_subsections=True)
with self.assertNumQueries(2):
_assert_read(expected_pass=True, expected_percent=0.5) # updated to grade of .5
# TODO: Remove Django 1.11 upgrade shim
# SHIM: Django 1.11 results in a few more SAVEPOINTs due to:
# https://github.com/django/django/commit/d44afd88#diff-5b0dda5eb9a242c15879dc9cd2121379L485
if django.VERSION >= (1, 11):
num_queries = 6
else:
num_queries = 4
num_queries = 6
with self.assertNumQueries(num_queries), mock_get_score(1, 4):
grade_factory.update(self.request.user, self.course, force_update_subsections=False)
with self.assertNumQueries(2):
_assert_read(expected_pass=True, expected_percent=0.5) # NOT updated to grade of .25
# TODO: Remove Django 1.11 upgrade shim
# SHIM: Django 1.11 results in a few more SAVEPOINTs due to:
# https://github.com/django/django/commit/d44afd88#diff-5b0dda5eb9a242c15879dc9cd2121379L485
if django.VERSION >= (1, 11):
num_queries = 20
else:
num_queries = 12
num_queries = 20
with self.assertNumQueries(num_queries), mock_get_score(2, 2):
grade_factory.update(self.request.user, self.course, force_update_subsections=True)
with self.assertNumQueries(2):
_assert_read(expected_pass=True, expected_percent=1.0) # updated to grade of 1.0
# TODO: Remove Django 1.11 upgrade shim
# SHIM: Django 1.11 results in a few more SAVEPOINTs due to:
# https://github.com/django/django/commit/d44afd88#diff-5b0dda5eb9a242c15879dc9cd2121379L485
if django.VERSION >= (1, 11):
num_queries = 20
else:
num_queries = 12
num_queries = 23
with self.assertNumQueries(num_queries), mock_get_score(0, 0): # the subsection now is worth zero
grade_factory.update(self.request.user, self.course, force_update_subsections=True)

View File

@@ -132,11 +132,16 @@ class VisibleBlocksTest(GradesModelTestCase):
"""
shard = 4
def _create_block_record_list(self, blocks):
def setUp(self):
super(VisibleBlocksTest, self).setUp()
self.user_id = 12345
def _create_block_record_list(self, blocks, user_id=None):
"""
Creates and returns a BlockRecordList for the given blocks.
"""
return VisibleBlocks.cached_get_or_create(BlockRecordList.from_list(blocks, self.course_key))
block_record_list = BlockRecordList.from_list(blocks, self.course_key)
return VisibleBlocks.cached_get_or_create(user_id or self.user_id, block_record_list)
def test_creation(self):
"""

View File

@@ -61,10 +61,7 @@ class TestScoredBlockTypes(TestCase):
}
def test_block_types_possibly_scored(self):
self.assertSetEqual(
self.possibly_scored_block_types,
scores._block_types_possibly_scored()
)
self.assertTrue(self.possibly_scored_block_types.issubset(scores._block_types_possibly_scored()))
def test_possibly_scored(self):
course_key = CourseLocator(u'org', u'course', u'run')

View File

@@ -485,22 +485,39 @@ class RecalculateGradesForUserTest(HasCourseWithProblemsMixin, ModuleStoreTestCa
self.user = UserFactory.create()
self.set_up_course()
CourseEnrollment.enroll(self.user, self.course.id)
self.original_max_visible_blocks_allowed = tasks.MAX_VISIBLE_BLOCKS_ALLOWED
tasks.MAX_VISIBLE_BLOCKS_ALLOWED = 1
def tearDown(self):
super(RecalculateGradesForUserTest, self).tearDown()
tasks.MAX_VISIBLE_BLOCKS_ALLOWED = self.original_max_visible_blocks_allowed
def test_do_not_recalculate_complex_courses(self):
def test_recalculation_happy_path(self):
with patch('lms.djangoapps.grades.tasks.CourseGradeFactory') as mock_factory:
factory = mock_factory.return_value
factory.read.return_value = MagicMock(attempted=True)
kwargs = {
'user_id': self.user.id,
'course_key': six.text_type(self.course.id),
}
with self.assertRaisesRegexp(Exception, 'too many VisibleBlocks'):
task_result = tasks.recalculate_course_and_subsection_grades_for_user.apply_async(kwargs=kwargs)
task_result.get()
update = mock_factory.return_value.update
self.assertFalse(update.called)
task_result = tasks.recalculate_course_and_subsection_grades_for_user.apply_async(kwargs=kwargs)
task_result.get()
factory.read.assert_called_once_with(self.user, course_key=self.course.id)
factory.update.assert_called_once_with(
user=self.user,
course_key=self.course.id,
force_update_subsections=True,
)
def test_recalculation_doesnt_happen_if_not_previously_attempted(self):
with patch('lms.djangoapps.grades.tasks.CourseGradeFactory') as mock_factory:
factory = mock_factory.return_value
factory.read.return_value = MagicMock(attempted=False)
kwargs = {
'user_id': self.user.id,
'course_key': six.text_type(self.course.id),
}
task_result = tasks.recalculate_course_and_subsection_grades_for_user.apply_async(kwargs=kwargs)
task_result.get()
factory.read.assert_called_once_with(self.user, course_key=self.course.id)
self.assertFalse(factory.update.called)