LEARNER-5123 | Cache VisibleBlocks per course AND user, only fetch the ones the user needs for subsection scoring.

2018-05-10 16:54:19 -04:00
parent 62865599c7
commit ef477ee2a4
6 changed files with 85 additions and 87 deletions
--- a/lms/djangoapps/grades/models.py
+++ b/lms/djangoapps/grades/models.py
@@ -153,31 +153,39 @@ class VisibleBlocks(models.Model):
        return BlockRecordList.from_json(self.blocks_json)

    @classmethod
-    def bulk_read(cls, course_key):
+    def bulk_read(cls, user_id, course_key):
        """
-        Reads and returns all visible block records for the given course from
-        the cache.  The cache is initialize with the visible blocks for this
-        course if no entry currently exists.has no entry for this course,
-        the cache is updated.
+        Reads and returns all visible block records for the given user and course from
+        the cache.  The cache is initialized with the visible blocks for this user and
+        course if no entry currently exists.

        Arguments:
            course_key: The course identifier for the desired records
        """
-        prefetched = get_cache(cls._CACHE_NAMESPACE).get(cls._cache_key(course_key), None)
+        prefetched = get_cache(cls._CACHE_NAMESPACE).get(cls._cache_key(user_id, course_key), None)
        if prefetched is None:
-            prefetched = cls._initialize_cache(course_key)
+            prefetched = cls._initialize_cache(user_id, course_key)
        return prefetched

    @classmethod
-    def cached_get_or_create(cls, blocks):
-        prefetched = get_cache(cls._CACHE_NAMESPACE).get(cls._cache_key(blocks.course_key))
+    def cached_get_or_create(cls, user_id, blocks):
+        """
+        Given a ``user_id`` and a ``BlockRecordList`` object, attempts to
+        fetch the related VisibleBlocks model from the request cache.  This
+        will create and save a new ``VisibleBlocks`` record if no record
+        exists corresponding to the hash_value of ``blocks``.
+        """
+        prefetched = get_cache(cls._CACHE_NAMESPACE).get(cls._cache_key(user_id, blocks.course_key))
        if prefetched is not None:
            model = prefetched.get(blocks.hash_value)
            if not model:
-                model = cls.objects.create(
+                # We still have to do a get_or_create, because
+                # another user may have had this block hash created,
+                # even if the user we checked the cache for hasn't yet.
+                model, _ = cls.objects.get_or_create(
                    hashed=blocks.hash_value, blocks_json=blocks.json_value, course_id=blocks.course_key,
                )
-                cls._update_cache(blocks.course_key, [model])
+                cls._update_cache(user_id, blocks.course_key, [model])
        else:
            model, _ = cls.objects.get_or_create(
                hashed=blocks.hash_value,
@@ -186,7 +194,7 @@ class VisibleBlocks(models.Model):
        return model

    @classmethod
-    def bulk_create(cls, course_key, block_record_lists):
+    def bulk_create(cls, user_id, course_key, block_record_lists):
        """
        Bulk creates VisibleBlocks for the given iterator of
        BlockRecordList objects and updates the VisibleBlocks cache
@@ -201,44 +209,48 @@ class VisibleBlocks(models.Model):
            )
            for brl in block_record_lists
        ])
-        cls._update_cache(course_key, created)
+        cls._update_cache(user_id, course_key, created)
        return created

    @classmethod
-    def bulk_get_or_create(cls, block_record_lists, course_key):
+    def bulk_get_or_create(cls, user_id, course_key, block_record_lists):
        """
        Bulk creates VisibleBlocks for the given iterator of
-        BlockRecordList objects for the given course_key, but
+        BlockRecordList objects for the given user and course_key, but
        only for those that aren't already created.
        """
-        existent_records = cls.bulk_read(course_key)
-        non_existent_brls = {brl for brl in block_record_lists if brl.hash_value not in existent_records}
-        cls.bulk_create(course_key, non_existent_brls)
+        cached_records = cls.bulk_read(user_id, course_key)
+        non_existent_brls = {brl.hash_value for brl in block_record_lists if brl.hash_value not in cached_records}
+        cls.bulk_create(user_id, course_key, non_existent_brls)

    @classmethod
-    def _initialize_cache(cls, course_key):
+    def _initialize_cache(cls, user_id, course_key):
        """
-        Prefetches visible blocks for the given course and stores in the cache.
+        Prefetches visible blocks for the given user and course and stores in the cache.
        Returns a dictionary mapping hashes of these block records to the
        block record objects.
        """
-        prefetched = {record.hashed: record for record in cls.objects.filter(course_id=course_key)}
-        get_cache(cls._CACHE_NAMESPACE)[cls._cache_key(course_key)] = prefetched
+        grades_with_blocks = PersistentSubsectionGrade.objects.select_related('visible_blocks').filter(
+            user_id=user_id,
+            course_id=course_key,
+        )
+        prefetched = {grade.visible_blocks.hashed: grade.visible_blocks for grade in grades_with_blocks}
+        get_cache(cls._CACHE_NAMESPACE)[cls._cache_key(user_id, course_key)] = prefetched
        return prefetched

    @classmethod
-    def _update_cache(cls, course_key, visible_blocks):
+    def _update_cache(cls, user_id, course_key, visible_blocks):
        """
        Adds a specific set of visible blocks to the request cache.
        This assumes that prefetch has already been called.
        """
-        get_cache(cls._CACHE_NAMESPACE)[cls._cache_key(course_key)].update(
+        get_cache(cls._CACHE_NAMESPACE)[cls._cache_key(user_id, course_key)].update(
            {visible_block.hashed: visible_block for visible_block in visible_blocks}
        )

    @classmethod
-    def _cache_key(cls, course_key):
-        return u"visible_blocks_cache.{}".format(course_key)
+    def _cache_key(cls, user_id, course_key):
+        return u"visible_blocks_cache.{}.{}".format(course_key, user_id)


 class PersistentSubsectionGrade(TimeStampedModel):
@@ -362,10 +374,11 @@ class PersistentSubsectionGrade(TimeStampedModel):
        Wrapper for objects.update_or_create.
        """
        cls._prepare_params(params)
-        VisibleBlocks.cached_get_or_create(params['visible_blocks'])
+        VisibleBlocks.cached_get_or_create(params['user_id'], params['visible_blocks'])
        cls._prepare_params_visible_blocks_id(params)
        cls._prepare_params_override(params)

+        # TODO: do we NEED to pop these?
        first_attempted = params.pop('first_attempted')
        user_id = params.pop('user_id')
        usage_key = params.pop('usage_key')
@@ -394,7 +407,9 @@ class PersistentSubsectionGrade(TimeStampedModel):
        PersistentSubsectionGradeOverride.prefetch(user_id, course_key)

        map(cls._prepare_params, grade_params_iter)
-        VisibleBlocks.bulk_get_or_create([params['visible_blocks'] for params in grade_params_iter], course_key)
+        VisibleBlocks.bulk_get_or_create(
+            user_id, course_key, [params['visible_blocks'] for params in grade_params_iter]
+        )
        map(cls._prepare_params_visible_blocks_id, grade_params_iter)
        map(cls._prepare_params_override, grade_params_iter)

@@ -619,4 +634,4 @@ class PersistentSubsectionGradeOverride(models.Model):

 def prefetch(user, course_key):
    PersistentSubsectionGradeOverride.prefetch(user.id, course_key)
-    VisibleBlocks.bulk_read(course_key)
+    VisibleBlocks.bulk_read(user.id, course_key)
--- a/lms/djangoapps/grades/tasks.py
+++ b/lms/djangoapps/grades/tasks.py
@@ -27,7 +27,6 @@ from .config.waffle import DISABLE_REGRADE_ON_POLICY_CHANGE, waffle
 from .constants import ScoreDatabaseTableEnum
 from .course_grade_factory import CourseGradeFactory
 from .exceptions import DatabaseNotReadyError
-from .models import VisibleBlocks
 from .services import GradesService
 from .signals.signals import SUBSECTION_SCORE_CHANGED
 from .subsection_grade_factory import SubsectionGradeFactory
@@ -41,7 +40,6 @@ KNOWN_RETRY_ERRORS = (  # Errors we expect occasionally, should be resolved on r
    ValidationError,
    DatabaseNotReadyError,
 )
-MAX_VISIBLE_BLOCKS_ALLOWED = 50000
 RECALCULATE_GRADE_DELAY_SECONDS = 2  # to prevent excessive _has_db_updated failures. See TNL-6424.
 RETRY_DELAY_SECONDS = 30
 SUBSECTION_GRADE_TIMEOUT_SECONDS = 300
@@ -139,12 +137,6 @@ def recalculate_course_and_subsection_grades_for_user(self, **kwargs):  # pylint
    user = User.objects.get(id=user_id)
    course_key = CourseKey.from_string(course_key_str)

-    # Hotfix to address LEARNER-5123, to be removed later
-    visible_blocks_count = VisibleBlocks.objects.filter(course_id=course_key)
-    if visible_blocks_count > MAX_VISIBLE_BLOCKS_ALLOWED:
-        message = '{} has too many VisibleBlocks to recalculate grades for {}'
-        raise Exception(message.format(course_key_str, user_id))
-
    previous_course_grade = CourseGradeFactory().read(user, course_key=course_key)
    if previous_course_grade and previous_course_grade.attempted:
        CourseGradeFactory().update(
--- a/lms/djangoapps/grades/tests/test_course_grade_factory.py
+++ b/lms/djangoapps/grades/tests/test_course_grade_factory.py
@@ -97,56 +97,28 @@ class TestCourseGradeFactory(GradeTestBase):
        with self.assertNumQueries(2), mock_get_score(1, 2):
            _assert_read(expected_pass=False, expected_percent=0)  # start off with grade of 0

-        # TODO: Remove Django 1.11 upgrade shim
-        # SHIM: Django 1.11 results in a few more SAVEPOINTs due to:
-        # https://github.com/django/django/commit/d44afd88#diff-5b0dda5eb9a242c15879dc9cd2121379L485
-        if django.VERSION >= (1, 11):
-            num_queries = 37
-        else:
-            num_queries = 29
-
+        num_queries = 40
        with self.assertNumQueries(num_queries), mock_get_score(1, 2):
            grade_factory.update(self.request.user, self.course, force_update_subsections=True)

        with self.assertNumQueries(2):
            _assert_read(expected_pass=True, expected_percent=0.5)  # updated to grade of .5

-        # TODO: Remove Django 1.11 upgrade shim
-        # SHIM: Django 1.11 results in a few more SAVEPOINTs due to:
-        # https://github.com/django/django/commit/d44afd88#diff-5b0dda5eb9a242c15879dc9cd2121379L485
-        if django.VERSION >= (1, 11):
-            num_queries = 6
-        else:
-            num_queries = 4
-
+        num_queries = 6
        with self.assertNumQueries(num_queries), mock_get_score(1, 4):
            grade_factory.update(self.request.user, self.course, force_update_subsections=False)

        with self.assertNumQueries(2):
            _assert_read(expected_pass=True, expected_percent=0.5)  # NOT updated to grade of .25

-        # TODO: Remove Django 1.11 upgrade shim
-        # SHIM: Django 1.11 results in a few more SAVEPOINTs due to:
-        # https://github.com/django/django/commit/d44afd88#diff-5b0dda5eb9a242c15879dc9cd2121379L485
-        if django.VERSION >= (1, 11):
-            num_queries = 20
-        else:
-            num_queries = 12
-
+        num_queries = 20
        with self.assertNumQueries(num_queries), mock_get_score(2, 2):
            grade_factory.update(self.request.user, self.course, force_update_subsections=True)

        with self.assertNumQueries(2):
            _assert_read(expected_pass=True, expected_percent=1.0)  # updated to grade of 1.0

-        # TODO: Remove Django 1.11 upgrade shim
-        # SHIM: Django 1.11 results in a few more SAVEPOINTs due to:
-        # https://github.com/django/django/commit/d44afd88#diff-5b0dda5eb9a242c15879dc9cd2121379L485
-        if django.VERSION >= (1, 11):
-            num_queries = 20
-        else:
-            num_queries = 12
-
+        num_queries = 23
        with self.assertNumQueries(num_queries), mock_get_score(0, 0):  # the subsection now is worth zero
            grade_factory.update(self.request.user, self.course, force_update_subsections=True)

--- a/lms/djangoapps/grades/tests/test_models.py
+++ b/lms/djangoapps/grades/tests/test_models.py
@@ -132,11 +132,16 @@ class VisibleBlocksTest(GradesModelTestCase):
    """
    shard = 4

-    def _create_block_record_list(self, blocks):
+    def setUp(self):
+        super(VisibleBlocksTest, self).setUp()
+        self.user_id = 12345
+
+    def _create_block_record_list(self, blocks, user_id=None):
        """
        Creates and returns a BlockRecordList for the given blocks.
        """
-        return VisibleBlocks.cached_get_or_create(BlockRecordList.from_list(blocks, self.course_key))
+        block_record_list = BlockRecordList.from_list(blocks, self.course_key)
+        return VisibleBlocks.cached_get_or_create(user_id or self.user_id, block_record_list)

    def test_creation(self):
        """
--- a/lms/djangoapps/grades/tests/test_scores.py
+++ b/lms/djangoapps/grades/tests/test_scores.py
@@ -61,10 +61,7 @@ class TestScoredBlockTypes(TestCase):
    }

    def test_block_types_possibly_scored(self):
-        self.assertSetEqual(
-            self.possibly_scored_block_types,
-            scores._block_types_possibly_scored()
-        )
+        self.assertTrue(self.possibly_scored_block_types.issubset(scores._block_types_possibly_scored()))

    def test_possibly_scored(self):
        course_key = CourseLocator(u'org', u'course', u'run')
--- a/lms/djangoapps/grades/tests/test_tasks.py
+++ b/lms/djangoapps/grades/tests/test_tasks.py
@@ -485,22 +485,39 @@ class RecalculateGradesForUserTest(HasCourseWithProblemsMixin, ModuleStoreTestCa
        self.user = UserFactory.create()
        self.set_up_course()
        CourseEnrollment.enroll(self.user, self.course.id)
-        self.original_max_visible_blocks_allowed = tasks.MAX_VISIBLE_BLOCKS_ALLOWED
-        tasks.MAX_VISIBLE_BLOCKS_ALLOWED = 1

-    def tearDown(self):
-        super(RecalculateGradesForUserTest, self).tearDown()
-        tasks.MAX_VISIBLE_BLOCKS_ALLOWED = self.original_max_visible_blocks_allowed
-
-    def test_do_not_recalculate_complex_courses(self):
+    def test_recalculation_happy_path(self):
        with patch('lms.djangoapps.grades.tasks.CourseGradeFactory') as mock_factory:
+            factory = mock_factory.return_value
+            factory.read.return_value = MagicMock(attempted=True)
+
            kwargs = {
                'user_id': self.user.id,
                'course_key': six.text_type(self.course.id),
            }
-            with self.assertRaisesRegexp(Exception, 'too many VisibleBlocks'):
-                task_result = tasks.recalculate_course_and_subsection_grades_for_user.apply_async(kwargs=kwargs)
-                task_result.get()

-            update = mock_factory.return_value.update
-            self.assertFalse(update.called)
+            task_result = tasks.recalculate_course_and_subsection_grades_for_user.apply_async(kwargs=kwargs)
+            task_result.get()
+
+            factory.read.assert_called_once_with(self.user, course_key=self.course.id)
+            factory.update.assert_called_once_with(
+                user=self.user,
+                course_key=self.course.id,
+                force_update_subsections=True,
+            )
+
+    def test_recalculation_doesnt_happen_if_not_previously_attempted(self):
+        with patch('lms.djangoapps.grades.tasks.CourseGradeFactory') as mock_factory:
+            factory = mock_factory.return_value
+            factory.read.return_value = MagicMock(attempted=False)
+
+            kwargs = {
+                'user_id': self.user.id,
+                'course_key': six.text_type(self.course.id),
+            }
+
+            task_result = tasks.recalculate_course_and_subsection_grades_for_user.apply_async(kwargs=kwargs)
+            task_result.get()
+
+            factory.read.assert_called_once_with(self.user, course_key=self.course.id)
+            self.assertFalse(factory.update.called)