Merge pull request #7288 from edx/ormsbee/grade_query_caching

Grading Performance Work
2015-07-09 12:07:52 -04:00
parent 85857e6729 79de77cf95
commit 472de1a5de
11 changed files with 465 additions and 105 deletions
--- a/common/lib/xmodule/xmodule/course_module.py
+++ b/common/lib/xmodule/xmodule/course_module.py
@@ -20,6 +20,7 @@ from xmodule.tabs import CourseTabList
 from xmodule.mixin import LicenseMixin
 import json

+from xblock.core import XBlock
 from xblock.fields import Scope, List, String, Dict, Boolean, Integer, Float
 from .fields import Date
 from django.utils.timezone import UTC
@@ -1321,11 +1322,15 @@ class CourseDescriptor(CourseFields, SequenceDescriptor, LicenseMixin):
        except UndefinedContext:
            module = self

+        def possibly_scored(usage_key):
+            """Can this XBlock type can have a score or children?"""
+            return usage_key.block_type in self.block_types_affecting_grading
+
        all_descriptors = []
        graded_sections = {}

        def yield_descriptor_descendents(module_descriptor):
-            for child in module_descriptor.get_children():
+            for child in module_descriptor.get_children(usage_key_filter=possibly_scored):
                yield child
                for module_descriptor in yield_descriptor_descendents(child):
                    yield module_descriptor
@@ -1351,6 +1356,15 @@ class CourseDescriptor(CourseFields, SequenceDescriptor, LicenseMixin):
        return {'graded_sections': graded_sections,
                'all_descriptors': all_descriptors, }

+    @lazy
+    def block_types_affecting_grading(self):
+        """Return all block types that could impact grading (i.e. scored, or having children)."""
+        return frozenset(
+            cat for (cat, xblock_class) in XBlock.load_classes() if (
+                getattr(xblock_class, 'has_score', False) or getattr(xblock_class, 'has_children', False)
+            )
+        )
+
    @staticmethod
    def make_id(org, course, url_name):
        return '/'.join([org, course, url_name])
--- a/lms/djangoapps/ccx/tests/test_field_override_performance.py
+++ b/lms/djangoapps/ccx/tests/test_field_override_performance.py
@@ -29,7 +29,11 @@ from xmodule.modulestore.tests.utils import ProceduralCourseTestMixin

@attr('shard_1')
@mock.patch.dict(
-    'django.conf.settings.FEATURES', {'ENABLE_XBLOCK_VIEW_ENDPOINT': True}
+    'django.conf.settings.FEATURES',
+    {
+        'ENABLE_XBLOCK_VIEW_ENDPOINT': True,
+        'ENABLE_MAX_SCORE_CACHE': False,
+    }
 )
@ddt.ddt
 class FieldOverridePerformanceTestCase(ProceduralCourseTestMixin,
@@ -173,18 +177,18 @@ class TestFieldOverrideMongoPerformance(FieldOverridePerformanceTestCase):

    TEST_DATA = {
        # (providers, course_width, enable_ccx): # of sql queries, # of mongo queries, # of xblocks
-        ('no_overrides', 1, True): (27, 7, 14),
-        ('no_overrides', 2, True): (135, 7, 85),
-        ('no_overrides', 3, True): (595, 7, 336),
-        ('ccx', 1, True): (27, 7, 14),
-        ('ccx', 2, True): (135, 7, 85),
-        ('ccx', 3, True): (595, 7, 336),
-        ('no_overrides', 1, False): (27, 7, 14),
-        ('no_overrides', 2, False): (135, 7, 85),
-        ('no_overrides', 3, False): (595, 7, 336),
-        ('ccx', 1, False): (27, 7, 14),
-        ('ccx', 2, False): (135, 7, 85),
-        ('ccx', 3, False): (595, 7, 336),
+        ('no_overrides', 1, True): (23, 7, 14),
+        ('no_overrides', 2, True): (68, 7, 85),
+        ('no_overrides', 3, True): (263, 7, 336),
+        ('ccx', 1, True): (23, 7, 14),
+        ('ccx', 2, True): (68, 7, 85),
+        ('ccx', 3, True): (263, 7, 336),
+        ('no_overrides', 1, False): (23, 7, 14),
+        ('no_overrides', 2, False): (68, 7, 85),
+        ('no_overrides', 3, False): (263, 7, 336),
+        ('ccx', 1, False): (23, 7, 14),
+        ('ccx', 2, False): (68, 7, 85),
+        ('ccx', 3, False): (263, 7, 336),
    }


@@ -196,16 +200,16 @@ class TestFieldOverrideSplitPerformance(FieldOverridePerformanceTestCase):
    __test__ = True

    TEST_DATA = {
-        ('no_overrides', 1, True): (27, 4, 9),
-        ('no_overrides', 2, True): (135, 19, 54),
-        ('no_overrides', 3, True): (595, 84, 215),
-        ('ccx', 1, True): (27, 4, 9),
-        ('ccx', 2, True): (135, 19, 54),
-        ('ccx', 3, True): (595, 84, 215),
-        ('no_overrides', 1, False): (27, 4, 9),
-        ('no_overrides', 2, False): (135, 19, 54),
-        ('no_overrides', 3, False): (595, 84, 215),
-        ('ccx', 1, False): (27, 4, 9),
-        ('ccx', 2, False): (135, 19, 54),
-        ('ccx', 3, False): (595, 84, 215),
+        ('no_overrides', 1, True): (23, 4, 9),
+        ('no_overrides', 2, True): (68, 19, 54),
+        ('no_overrides', 3, True): (263, 84, 215),
+        ('ccx', 1, True): (23, 4, 9),
+        ('ccx', 2, True): (68, 19, 54),
+        ('ccx', 3, True): (263, 84, 215),
+        ('no_overrides', 1, False): (23, 4, 9),
+        ('no_overrides', 2, False): (68, 19, 54),
+        ('no_overrides', 3, False): (263, 84, 215),
+        ('ccx', 1, False): (23, 4, 9),
+        ('ccx', 2, False): (68, 19, 54),
+        ('ccx', 3, False): (263, 84, 215),
    }
--- a/lms/djangoapps/courseware/grades.py
+++ b/lms/djangoapps/courseware/grades.py
@@ -1,6 +1,7 @@
 # Compute grades using real division, with no integer truncation
 from __future__ import division
 from collections import defaultdict
+from functools import partial
 import json
 import random
 import logging
@@ -9,11 +10,12 @@ from contextlib import contextmanager
 from django.conf import settings
 from django.db import transaction
 from django.test.client import RequestFactory
+from django.core.cache import cache

 import dogstats_wrapper as dog_stats_api

 from courseware import courses
-from courseware.model_data import FieldDataCache
+from courseware.model_data import FieldDataCache, ScoresClient
 from student.models import anonymous_id_for_user
 from util.module_utils import yield_dynamic_descriptor_descendants
 from xmodule import graders
@@ -25,12 +27,131 @@ from .module_render import get_module_for_descriptor
 from submissions import api as sub_api  # installed from the edx-submissions repository
 from opaque_keys import InvalidKeyError
 from opaque_keys.edx.keys import CourseKey
-
 from openedx.core.djangoapps.signals.signals import GRADES_UPDATED

+
 log = logging.getLogger("edx.courseware")


+class MaxScoresCache(object):
+    """
+    A cache for unweighted max scores for problems.
+
+    The key assumption here is that any problem that has not yet recorded a
+    score for a user is worth the same number of points. An XBlock is free to
+    score one student at 2/5 and another at 1/3. But a problem that has never
+    issued a score -- say a problem two students have only seen mentioned in
+    their progress pages and never interacted with -- should be worth the same
+    number of points for everyone.
+    """
+    def __init__(self, cache_prefix):
+        self.cache_prefix = cache_prefix
+        self._max_scores_cache = {}
+        self._max_scores_updates = {}
+
+    @classmethod
+    def create_for_course(cls, course):
+        """
+        Given a CourseDescriptor, return a correctly configured `MaxScoresCache`
+
+        This method will base the `MaxScoresCache` cache prefix value on the
+        last time something was published to the live version of the course.
+        This is so that we don't have to worry about stale cached values for
+        max scores -- any time a content change occurs, we change our cache
+        keys.
+        """
+        return cls(u"{}.{}".format(course.id, course.subtree_edited_on.isoformat()))
+
+    def fetch_from_remote(self, locations):
+        """
+        Populate the local cache with values from django's cache
+        """
+        remote_dict = cache.get_many([self._remote_cache_key(loc) for loc in locations])
+        self._max_scores_cache = {
+            self._local_cache_key(remote_key): value
+            for remote_key, value in remote_dict.items()
+            if value is not None
+        }
+
+    def push_to_remote(self):
+        """
+        Update the remote cache
+        """
+        if self._max_scores_updates:
+            cache.set_many(
+                {
+                    self._remote_cache_key(key): value
+                    for key, value in self._max_scores_updates.items()
+                },
+                60 * 60 * 24  # 1 day
+            )
+
+    def _remote_cache_key(self, location):
+        """Convert a location to a remote cache key (add our prefixing)."""
+        return u"grades.MaxScores.{}___{}".format(self.cache_prefix, unicode(location))
+
+    def _local_cache_key(self, remote_key):
+        """Convert a remote cache key to a local cache key (i.e. location str)."""
+        return remote_key.split(u"___", 1)[1]
+
+    def num_cached_from_remote(self):
+        """How many items did we pull down from the remote cache?"""
+        return len(self._max_scores_cache)
+
+    def num_cached_updates(self):
+        """How many local updates are we waiting to push to the remote cache?"""
+        return len(self._max_scores_updates)
+
+    def set(self, location, max_score):
+        """
+        Adds a max score to the max_score_cache
+        """
+        loc_str = unicode(location)
+        if self._max_scores_cache.get(loc_str) != max_score:
+            self._max_scores_updates[loc_str] = max_score
+
+    def get(self, location):
+        """
+        Retrieve a max score from the cache
+        """
+        loc_str = unicode(location)
+        max_score = self._max_scores_updates.get(loc_str)
+        if max_score is None:
+            max_score = self._max_scores_cache.get(loc_str)
+
+        return max_score
+
+
+def descriptor_affects_grading(block_types_affecting_grading, descriptor):
+    """
+    Returns True if the descriptor could have any impact on grading, else False.
+
+    Something might be a scored item if it is capable of storing a score
+    (has_score=True). We also have to include anything that can have children,
+    since those children might have scores. We can avoid things like Videos,
+    which have state but cannot ever impact someone's grade.
+    """
+    return descriptor.location.block_type in block_types_affecting_grading
+
+
+def field_data_cache_for_grading(course, user):
+    """
+    Given a CourseDescriptor and User, create the FieldDataCache for grading.
+
+    This will generate a FieldDataCache that only loads state for those things
+    that might possibly affect the grading process, and will ignore things like
+    Videos.
+    """
+    descriptor_filter = partial(descriptor_affects_grading, course.block_types_affecting_grading)
+    return FieldDataCache.cache_for_descriptor_descendents(
+        course.id,
+        user,
+        course,
+        depth=None,
+        descriptor_filter=descriptor_filter
+    )
+
+
 def answer_distributions(course_key):
    """
    Given a course_key, return answer distributions in the form of a dictionary
@@ -123,14 +244,14 @@ def answer_distributions(course_key):


@transaction.commit_manually
-def grade(student, request, course, keep_raw_scores=False):
+def grade(student, request, course, keep_raw_scores=False, field_data_cache=None, scores_client=None):
    """
    Wraps "_grade" with the manual_transaction context manager just in case
    there are unanticipated errors.
    Send a signal to update the minimum grade requirement status.
    """
    with manual_transaction():
-        grade_summary = _grade(student, request, course, keep_raw_scores)
+        grade_summary = _grade(student, request, course, keep_raw_scores, field_data_cache, scores_client)
        responses = GRADES_UPDATED.send_robust(
            sender=None,
            username=request.user.username,
@@ -145,7 +266,7 @@ def grade(student, request, course, keep_raw_scores=False):
        return grade_summary


-def _grade(student, request, course, keep_raw_scores):
+def _grade(student, request, course, keep_raw_scores, field_data_cache, scores_client):
    """
    Unwrapped version of "grade"

@@ -166,15 +287,25 @@ def _grade(student, request, course, keep_raw_scores):

    More information on the format is in the docstring for CourseGrader.
    """
-    grading_context = course.grading_context
-    raw_scores = []
+    if field_data_cache is None:
+        with manual_transaction():
+            field_data_cache = field_data_cache_for_grading(course, student)
+    if scores_client is None:
+        scores_client = ScoresClient.from_field_data_cache(field_data_cache)

    # Dict of item_ids -> (earned, possible) point tuples. This *only* grabs
    # scores that were registered with the submissions API, which for the moment
    # means only openassessment (edx-ora2)
-    submissions_scores = sub_api.get_scores(
-        course.id.to_deprecated_string(), anonymous_id_for_user(student, course.id)
-    )
+    submissions_scores = sub_api.get_scores(course.id.to_deprecated_string(), anonymous_id_for_user(student, course.id))
+    max_scores_cache = MaxScoresCache.create_for_course(course)
+    # For the moment, we have to get scorable_locations from field_data_cache
+    # and not from scores_client, because scores_client is ignorant of things
+    # in the submissions API. As a further refactoring step, submissions should
+    # be hidden behind the ScoresClient.
+    max_scores_cache.fetch_from_remote(field_data_cache.scorable_locations)
+
+    grading_context = course.grading_context
+    raw_scores = []

    totaled_scores = {}
    # This next complicated loop is just to collect the totaled_scores, which is
@@ -202,13 +333,10 @@ def _grade(student, request, course, keep_raw_scores):
                )

            if not should_grade_section:
-                with manual_transaction():
-                    should_grade_section = StudentModule.objects.filter(
-                        student=student,
-                        module_state_key__in=[
-                            descriptor.location for descriptor in section['xmoduledescriptors']
-                        ]
-                    ).exists()
+                should_grade_section = any(
+                    descriptor.location in scores_client
+                    for descriptor in section['xmoduledescriptors']
+                )

            # If we haven't seen a single problem in the section, we don't have
            # to grade it at all! We can assume 0%
@@ -219,23 +347,24 @@ def _grade(student, request, course, keep_raw_scores):
                    '''creates an XModule instance given a descriptor'''
                    # TODO: We need the request to pass into here. If we could forego that, our arguments
                    # would be simpler
-                    with manual_transaction():
-                        field_data_cache = FieldDataCache([descriptor], course.id, student)
                    return get_module_for_descriptor(
                        student, request, descriptor, field_data_cache, course.id, course=course
                    )

-                for module_descriptor in yield_dynamic_descriptor_descendants(
-                        section_descriptor, student.id, create_module
-                ):
-
+                descendants = yield_dynamic_descriptor_descendants(section_descriptor, student.id, create_module)
+                for module_descriptor in descendants:
                    (correct, total) = get_score(
-                        course.id, student, module_descriptor, create_module, scores_cache=submissions_scores
+                        student,
+                        module_descriptor,
+                        create_module,
+                        scores_client,
+                        submissions_scores,
+                        max_scores_cache,
                    )
                    if correct is None and total is None:
                        continue

-                    if settings.GENERATE_PROFILE_SCORES:  	# for debugging!
+                    if settings.GENERATE_PROFILE_SCORES:    # for debugging!
                        if total > 1:
                            correct = random.randrange(max(total - 2, 1), total + 1)
                        else:
@@ -256,7 +385,7 @@ def _grade(student, request, course, keep_raw_scores):
                        )
                    )

-                _, graded_total = graders.aggregate_scores(scores, section_name)
+                __, graded_total = graders.aggregate_scores(scores, section_name)
                if keep_raw_scores:
                    raw_scores += scores
            else:
@@ -283,11 +412,14 @@ def _grade(student, request, course, keep_raw_scores):

    letter_grade = grade_for_percentage(course.grade_cutoffs, grade_summary['percent'])
    grade_summary['grade'] = letter_grade
-    grade_summary['totaled_scores'] = totaled_scores  	# make this available, eg for instructor download & debugging
+    grade_summary['totaled_scores'] = totaled_scores   # make this available, eg for instructor download & debugging
    if keep_raw_scores:
        # way to get all RAW scores out to instructor
        # so grader can be double-checked
        grade_summary['raw_scores'] = raw_scores
+
+    max_scores_cache.push_to_remote()
+
    return grade_summary


@@ -314,19 +446,19 @@ def grade_for_percentage(grade_cutoffs, percentage):


@transaction.commit_manually
-def progress_summary(student, request, course):
+def progress_summary(student, request, course, field_data_cache=None, scores_client=None):
    """
    Wraps "_progress_summary" with the manual_transaction context manager just
    in case there are unanticipated errors.
    """
    with manual_transaction():
-        return _progress_summary(student, request, course)
+        return _progress_summary(student, request, course, field_data_cache, scores_client)


 # TODO: This method is not very good. It was written in the old course style and
 # then converted over and performance is not good. Once the progress page is redesigned
 # to not have the progress summary this method should be deleted (so it won't be copied).
-def _progress_summary(student, request, course):
+def _progress_summary(student, request, course, field_data_cache=None, scores_client=None):
    """
    Unwrapped version of "progress_summary".

@@ -348,21 +480,26 @@ def _progress_summary(student, request, course):

    """
    with manual_transaction():
-        field_data_cache = FieldDataCache.cache_for_descriptor_descendents(
-            course.id, student, course, depth=None
-        )
-        # TODO: We need the request to pass into here. If we could
-        # forego that, our arguments would be simpler
+        if field_data_cache is None:
+            field_data_cache = field_data_cache_for_grading(course, student)
+        if scores_client is None:
+            scores_client = ScoresClient.from_field_data_cache(field_data_cache)
+
        course_module = get_module_for_descriptor(
            student, request, course, field_data_cache, course.id, course=course
        )
        if not course_module:
-            # This student must not have access to the course.
            return None

        course_module = getattr(course_module, '_x_module', course_module)

    submissions_scores = sub_api.get_scores(course.id.to_deprecated_string(), anonymous_id_for_user(student, course.id))
+    max_scores_cache = MaxScoresCache.create_for_course(course)
+    # For the moment, we have to get scorable_locations from field_data_cache
+    # and not from scores_client, because scores_client is ignorant of things
+    # in the submissions API. As a further refactoring step, submissions should
+    # be hidden behind the ScoresClient.
+    max_scores_cache.fetch_from_remote(field_data_cache.scorable_locations)

    chapters = []
    # Don't include chapters that aren't displayable (e.g. due to error)
@@ -389,7 +526,12 @@ def _progress_summary(student, request, course):
                ):
                    course_id = course.id
                    (correct, total) = get_score(
-                        course_id, student, module_descriptor, module_creator, scores_cache=submissions_scores
+                        student,
+                        module_descriptor,
+                        module_creator,
+                        scores_client,
+                        submissions_scores,
+                        max_scores_cache,
                    )
                    if correct is None and total is None:
                        continue
@@ -426,10 +568,20 @@ def _progress_summary(student, request, course):
            'sections': sections
        })

+    max_scores_cache.push_to_remote()
+
    return chapters


-def get_score(course_id, user, problem_descriptor, module_creator, scores_cache=None):
+def weighted_score(raw_correct, raw_total, weight):
+    """Return a tuple that represents the weighted (correct, total) score."""
+    # If there is no weighting, or weighting can't be applied, return input.
+    if weight is None or raw_total == 0:
+        return (raw_correct, raw_total)
+    return (float(raw_correct) * weight / raw_total, float(weight))
+
+
+def get_score(user, problem_descriptor, module_creator, scores_client, submissions_scores_cache, max_scores_cache):
    """
    Return the score for a user on a problem, as a tuple (correct, total).
    e.g. (5,7) if you got 5 out of 7 points.
@@ -439,19 +591,21 @@ def get_score(course_id, user, problem_descriptor, module_creator, scores_cache=

    user: a Student object
    problem_descriptor: an XModuleDescriptor
+    scores_client: an initialized ScoresClient
    module_creator: a function that takes a descriptor, and returns the corresponding XModule for this user.
           Can return None if user doesn't have access, or if something else went wrong.
-    scores_cache: A dict of location names to (earned, possible) point tuples.
+    submissions_scores_cache: A dict of location names to (earned, possible) point tuples.
           If an entry is found in this cache, it takes precedence.
+    max_scores_cache: a MaxScoresCache
    """
-    scores_cache = scores_cache or {}
+    submissions_scores_cache = submissions_scores_cache or {}

    if not user.is_authenticated():
        return (None, None)

    location_url = problem_descriptor.location.to_deprecated_string()
-    if location_url in scores_cache:
-        return scores_cache[location_url]
+    if location_url in submissions_scores_cache:
+        return submissions_scores_cache[location_url]

    # some problems have state that is updated independently of interaction
    # with the LMS, so they need to always be scored. (E.g. foldit.)
@@ -469,22 +623,27 @@ def get_score(course_id, user, problem_descriptor, module_creator, scores_cache=
        # These are not problems, and do not have a score
        return (None, None)

-    try:
-        student_module = StudentModule.objects.get(
-            student=user,
-            course_id=course_id,
-            module_state_key=problem_descriptor.location
-        )
-    except StudentModule.DoesNotExist:
-        student_module = None
-
-    if student_module is not None and student_module.max_grade is not None:
-        correct = student_module.grade if student_module.grade is not None else 0
-        total = student_module.max_grade
+    # Check the score that comes from the ScoresClient (out of CSM).
+    # If an entry exists and has a total associated with it, we trust that
+    # value. This is important for cases where a student might have seen an
+    # older version of the problem -- they're still graded on what was possible
+    # when they tried the problem, not what it's worth now.
+    score = scores_client.get(problem_descriptor.location)
+    cached_max_score = max_scores_cache.get(problem_descriptor.location)
+    if score and score.total is not None:
+        # We have a valid score, just use it.
+        correct = score.correct if score.correct is not None else 0.0
+        total = score.total
+    elif cached_max_score is not None and settings.FEATURES.get("ENABLE_MAX_SCORE_CACHE"):
+        # We don't have a valid score entry but we know from our cache what the
+        # max possible score is, so they've earned 0.0 / cached_max_score
+        correct = 0.0
+        total = cached_max_score
    else:
-        # If the problem was not in the cache, or hasn't been graded yet,
-        # we need to instantiate the problem.
-        # Otherwise, the max score (cached in student_module) won't be available
+        # This means we don't have a valid score entry and we don't have a
+        # cached_max_score on hand. We know they've earned 0.0 points on this,
+        # but we need to instantiate the module (i.e. load student state) in
+        # order to find out how much it was worth.
        problem = module_creator(problem_descriptor)
        if problem is None:
            return (None, None)
@@ -496,17 +655,11 @@ def get_score(course_id, user, problem_descriptor, module_creator, scores_cache=
        # In which case total might be None
        if total is None:
            return (None, None)
+        else:
+            # add location to the max score cache
+            max_scores_cache.set(problem_descriptor.location, total)

-    # Now we re-weight the problem, if specified
-    weight = problem_descriptor.weight
-    if weight is not None:
-        if total == 0:
-            log.exception("Cannot reweight a problem with zero total points. Problem: " + str(student_module))
-            return (correct, total)
-        correct = correct * weight / total
-        total = weight
-
-    return (correct, total)
+    return weighted_score(correct, total, problem_descriptor.weight)


@contextmanager
--- a/lms/djangoapps/courseware/model_data.py
+++ b/lms/djangoapps/courseware/model_data.py
@@ -23,7 +23,7 @@ DjangoOrmFieldCache: A base-class for single-row-per-field caches.

 import json
 from abc import abstractmethod, ABCMeta
-from collections import defaultdict
+from collections import defaultdict, namedtuple
 from .models import (
    StudentModule,
    XModuleUserStateSummaryField,
@@ -741,6 +741,7 @@ class FieldDataCache(object):
                self.course_id,
            ),
        }
+        self.scorable_locations = set()
        self.add_descriptors_to_cache(descriptors)

    def add_descriptors_to_cache(self, descriptors):
@@ -748,6 +749,7 @@ class FieldDataCache(object):
        Add all `descriptors` to this FieldDataCache.
        """
        if self.user.is_authenticated():
+            self.scorable_locations.update(desc.location for desc in descriptors if desc.has_score)
            for scope, fields in self._fields_to_cache(descriptors).items():
                if scope not in self.cache:
                    continue
@@ -955,3 +957,63 @@ class FieldDataCache(object):

    def __len__(self):
        return sum(len(cache) for cache in self.cache.values())
+
+
+class ScoresClient(object):
+    """
+    Basic client interface for retrieving Score information.
+
+    Eventually, this should read and write scores, but at the moment it only
+    handles the read side of things.
+    """
+    Score = namedtuple('Score', 'correct total')
+
+    def __init__(self, course_key, user_id):
+        """Basic constructor. from_field_data_cache() is more appopriate for most uses."""
+        self.course_key = course_key
+        self.user_id = user_id
+        self._locations_to_scores = {}
+        self._has_fetched = False
+
+    def __contains__(self, location):
+        """Return True if we have a score for this location."""
+        return location in self._locations_to_scores
+
+    def fetch_scores(self, locations):
+        """Grab score information."""
+        scores_qset = StudentModule.objects.filter(
+            student_id=self.user_id,
+            course_id=self.course_key,
+            module_state_key__in=set(locations),
+        )
+        # Locations in StudentModule don't necessarily have course key info
+        # attached to them (since old mongo identifiers don't include runs).
+        # So we have to add that info back in before we put it into our lookup.
+        self._locations_to_scores.update({
+            UsageKey.from_string(location).map_into_course(self.course_key): self.Score(correct, total)
+            for location, correct, total
+            in scores_qset.values_list('module_state_key', 'grade', 'max_grade')
+        })
+        self._has_fetched = True
+
+    def get(self, location):
+        """
+        Get the score for a given location, if it exists.
+
+        If we don't have a score for that location, return `None`. Note that as
+        convention, you should be passing in a location with full course run
+        information.
+        """
+        if not self._has_fetched:
+            raise ValueError(
+                "Tried to fetch location {} from ScoresClient before fetch_scores() has run."
+                .format(location)
+            )
+        return self._locations_to_scores.get(location)
+
+    @classmethod
+    def from_field_data_cache(cls, fd_cache):
+        """Create a ScoresClient from a populated FieldDataCache."""
+        client = cls(fd_cache.course_id, fd_cache.user.id)
+        client.fetch_scores(fd_cache.scorable_locations)
+        return client
--- a/lms/djangoapps/courseware/models.py
+++ b/lms/djangoapps/courseware/models.py
@@ -133,7 +133,10 @@ class StudentModule(models.Model):
        return 'StudentModule<%r>' % ({
            'course_id': self.course_id,
            'module_type': self.module_type,
-            'student': self.student.username,  # pylint: disable=no-member
+            # We use the student_id instead of username to avoid a database hop.
+            # This can actually matter in cases where we're logging many of
+            # these (e.g. on a broken progress page).
+            'student_id': self.student_id,  # pylint: disable=no-member
            'module_state_key': self.module_state_key,
            'state': str(self.state)[:20],
        },)
--- a/lms/djangoapps/courseware/tests/test_grades.py
+++ b/lms/djangoapps/courseware/tests/test_grades.py
@@ -2,13 +2,16 @@
 Test grade calculation.
 """
 from django.http import Http404
+from django.test.client import RequestFactory
+
 from mock import patch
 from nose.plugins.attrib import attr
 from opaque_keys.edx.locations import SlashSeparatedCourseKey

-from courseware.grades import grade, iterate_grades_for
+from courseware.grades import field_data_cache_for_grading, grade, iterate_grades_for, MaxScoresCache
 from student.tests.factories import UserFactory
-from xmodule.modulestore.tests.factories import CourseFactory
+from student.models import CourseEnrollment
+from xmodule.modulestore.tests.factories import CourseFactory, ItemFactory
 from xmodule.modulestore.tests.django_utils import ModuleStoreTestCase


@@ -121,3 +124,73 @@ class TestGradeIteration(ModuleStoreTestCase):
                students_to_errors[student] = err_msg

        return students_to_gradesets, students_to_errors
+
+
+class TestMaxScoresCache(ModuleStoreTestCase):
+    """
+    Tests for the MaxScoresCache
+    """
+    def setUp(self):
+        super(TestMaxScoresCache, self).setUp()
+        self.student = UserFactory.create()
+        self.course = CourseFactory.create()
+        self.problems = []
+        for _ in xrange(3):
+            self.problems.append(
+                ItemFactory.create(category='problem', parent=self.course)
+            )
+
+        CourseEnrollment.enroll(self.student, self.course.id)
+        self.request = RequestFactory().get('/')
+        self.locations = [problem.location for problem in self.problems]
+
+    def test_max_scores_cache(self):
+        """
+        Tests the behavior fo the MaxScoresCache
+        """
+        max_scores_cache = MaxScoresCache("test_max_scores_cache")
+        self.assertEqual(max_scores_cache.num_cached_from_remote(), 0)
+        self.assertEqual(max_scores_cache.num_cached_updates(), 0)
+
+        # add score to cache
+        max_scores_cache.set(self.locations[0], 1)
+        self.assertEqual(max_scores_cache.num_cached_updates(), 1)
+
+        # push to remote cache
+        max_scores_cache.push_to_remote()
+
+        # create a new cache with the same params, fetch from remote cache
+        max_scores_cache = MaxScoresCache("test_max_scores_cache")
+        max_scores_cache.fetch_from_remote(self.locations)
+
+        # see cache is populated
+        self.assertEqual(max_scores_cache.num_cached_from_remote(), 1)
+
+
+class TestFieldDataCacheScorableLocations(ModuleStoreTestCase):
+    """
+    Make sure we can filter the locations we pull back student state for via
+    the FieldDataCache.
+    """
+    def setUp(self):
+        super(TestFieldDataCacheScorableLocations, self).setUp()
+        self.student = UserFactory.create()
+        self.course = CourseFactory.create()
+        chapter = ItemFactory.create(category='chapter', parent=self.course)
+        sequential = ItemFactory.create(category='sequential', parent=chapter)
+        vertical = ItemFactory.create(category='vertical', parent=sequential)
+        ItemFactory.create(category='video', parent=vertical)
+        ItemFactory.create(category='html', parent=vertical)
+        ItemFactory.create(category='discussion', parent=vertical)
+        ItemFactory.create(category='problem', parent=vertical)
+
+        CourseEnrollment.enroll(self.student, self.course.id)
+
+    def test_field_data_cache_scorable_locations(self):
+        """Only scorable locations should be in FieldDataCache.scorable_locations."""
+        fd_cache = field_data_cache_for_grading(self.course, self.student)
+        block_types = set(loc.block_type for loc in fd_cache.scorable_locations)
+        self.assertNotIn('video', block_types)
+        self.assertNotIn('html', block_types)
+        self.assertNotIn('discussion', block_types)
+        self.assertIn('problem', block_types)
--- a/lms/djangoapps/courseware/tests/test_model_data.py
+++ b/lms/djangoapps/courseware/tests/test_model_data.py
@@ -6,8 +6,7 @@ from mock import Mock, patch
 from nose.plugins.attrib import attr
 from functools import partial

-from courseware.model_data import DjangoKeyValueStore
-from courseware.model_data import InvalidScopeError, FieldDataCache
+from courseware.model_data import DjangoKeyValueStore, FieldDataCache, InvalidScopeError
 from courseware.models import StudentModule
 from courseware.models import XModuleStudentInfoField, XModuleStudentPrefsField

--- a/lms/djangoapps/courseware/tests/test_submitting_problems.py
+++ b/lms/djangoapps/courseware/tests/test_submitting_problems.py
@@ -109,6 +109,15 @@ class TestSubmittingProblems(ModuleStoreTestCase, LoginEnrollmentTestCase):

        return resp

+    def look_at_question(self, problem_url_name):
+        """
+        Create state for a problem, but don't answer it
+        """
+        location = self.problem_location(problem_url_name)
+        modx_url = self.modx_url(location, "problem_get")
+        resp = self.client.get(modx_url)
+        return resp
+
    def reset_question_answer(self, problem_url_name):
        """
        Reset specified problem for current user.
@@ -457,6 +466,33 @@ class TestCourseGrader(TestSubmittingProblems):
        current_count = csmh.count()
        self.assertEqual(current_count, 3)

+    def test_grade_with_max_score_cache(self):
+        """
+        Tests that the max score cache is populated after a grading run
+        and that the results of grading runs before and after the cache
+        warms are the same.
+        """
+        self.basic_setup()
+        self.submit_question_answer('p1', {'2_1': 'Correct'})
+        self.look_at_question('p2')
+        self.assertTrue(
+            StudentModule.objects.filter(
+                module_state_key=self.problem_location('p2')
+            ).exists()
+        )
+        location_to_cache = unicode(self.problem_location('p2'))
+        max_scores_cache = grades.MaxScoresCache.create_for_course(self.course)
+
+        # problem isn't in the cache
+        max_scores_cache.fetch_from_remote([location_to_cache])
+        self.assertIsNone(max_scores_cache.get(location_to_cache))
+        self.check_grade_percent(0.33)
+
+        # problem is in the cache
+        max_scores_cache.fetch_from_remote([location_to_cache])
+        self.assertIsNotNone(max_scores_cache.get(location_to_cache))
+        self.check_grade_percent(0.33)
+
    def test_none_grade(self):
        """
        Check grade is 0 to begin with.
@@ -474,6 +510,13 @@ class TestCourseGrader(TestSubmittingProblems):
        self.check_grade_percent(0.33)
        self.assertEqual(self.get_grade_summary()['grade'], 'B')

+    @patch.dict("django.conf.settings.FEATURES", {"ENABLE_MAX_SCORE_CACHE": False})
+    def test_grade_no_max_score_cache(self):
+        """
+        Tests grading when the max score cache is disabled
+        """
+        self.test_b_grade_exact()
+
    def test_b_grade_above(self):
        """
        Check grade between cutoffs.
--- a/lms/djangoapps/courseware/views.py
+++ b/lms/djangoapps/courseware/views.py
@@ -44,7 +44,7 @@ from openedx.core.djangoapps.credit.api import (
    is_user_eligible_for_credit,
    is_credit_course
 )
-from courseware.model_data import FieldDataCache
+from courseware.model_data import FieldDataCache, ScoresClient
 from .module_render import toc_for_course, get_module_for_descriptor, get_module, get_module_by_usage_id
 from .entrance_exams import (
    course_has_entrance_exam,
@@ -1054,10 +1054,15 @@ def _progress(request, course_key, student_id):
    # The pre-fetching of groups is done to make auth checks not require an
    # additional DB lookup (this kills the Progress page in particular).
    student = User.objects.prefetch_related("groups").get(id=student.id)
-
-    courseware_summary = grades.progress_summary(student, request, course)
+    field_data_cache = grades.field_data_cache_for_grading(course, student)
+    scores_client = ScoresClient.from_field_data_cache(field_data_cache)
+    courseware_summary = grades.progress_summary(
+        student, request, course, field_data_cache=field_data_cache, scores_client=scores_client
+    )
+    grade_summary = grades.grade(
+        student, request, course, field_data_cache=field_data_cache, scores_client=scores_client
+    )
    studio_url = get_studio_url(course, 'settings/grading')
-    grade_summary = grades.grade(student, request, course)

    if courseware_summary is None:
        #This means the student didn't have access to the course (which the instructor requested)
--- a/lms/djangoapps/instructor_task/tests/test_tasks_helper.py
+++ b/lms/djangoapps/instructor_task/tests/test_tasks_helper.py
@@ -1336,7 +1336,7 @@ class TestCertificateGeneration(InstructorTaskModuleTestCase):

        current_task = Mock()
        current_task.update_state = Mock()
-        with self.assertNumQueries(109):
+        with self.assertNumQueries(125):
            with patch('instructor_task.tasks_helper._get_current_task') as mock_current_task:
                mock_current_task.return_value = current_task
                with patch('capa.xqueue_interface.XQueueInterface.send_to_queue') as mock_queue:
--- a/lms/envs/common.py
+++ b/lms/envs/common.py
@@ -40,6 +40,7 @@ from django.utils.translation import ugettext_lazy as _
 from .discussionsettings import *
 import dealer.git
 from xmodule.modulestore.modulestore_settings import update_module_store_settings
+from xmodule.modulestore.edit_info import EditInfoMixin
 from xmodule.mixin import LicenseMixin
 from lms.djangoapps.lms_xblock.mixin import LmsBlockMixin

@@ -416,6 +417,9 @@ FEATURES = {

    # The block types to disable need to be specified in "x block disable config" in django admin.
    'ENABLE_DISABLING_XBLOCK_TYPES': True,
+
+    # Enable the max score cache to speed up grading
+    'ENABLE_MAX_SCORE_CACHE': True,
 }

 # Ignore static asset files on import which match this pattern
@@ -703,7 +707,7 @@ from xmodule.x_module import XModuleMixin
 # These are the Mixins that should be added to every XBlock.
 # This should be moved into an XBlock Runtime/Application object
 # once the responsibility of XBlock creation is moved out of modulestore - cpennington
-XBLOCK_MIXINS = (LmsBlockMixin, InheritanceMixin, XModuleMixin)
+XBLOCK_MIXINS = (LmsBlockMixin, InheritanceMixin, XModuleMixin, EditInfoMixin)

 # Allow any XBlock in the LMS
 XBLOCK_SELECT_FUNCTION = prefer_xmodules