diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 518cfd5066..3a83020f6a 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -5,6 +5,8 @@ These are notable changes in edx-platform. This is a rolling list of changes, in roughly chronological order, most recent first. Add your entries at or near the top. Include a label indicating the component affected. +LMS: Fix answer distribution download for small courses. LMS-922, LMS-811 + Blades: Add template for the zooming image in studio. BLD-206. Blades: Update behavior of start/end time fields. BLD-506. diff --git a/common/lib/capa/capa/tests/response_xml_factory.py b/common/lib/capa/capa/tests/response_xml_factory.py index 4c015d6699..2ac1aa5ac6 100644 --- a/common/lib/capa/capa/tests/response_xml_factory.py +++ b/common/lib/capa/capa/tests/response_xml_factory.py @@ -640,8 +640,8 @@ class OptionResponseXMLFactory(ResponseXMLFactory): # Set the "options" attribute # Format: "('first', 'second', 'third')" - options_attr_string = ",".join(["'%s'" % str(o) for o in options_list]) - options_attr_string = "(%s)" % options_attr_string + options_attr_string = u",".join([u"'{}'".format(o) for o in options_list]) + options_attr_string = u"({})".format(options_attr_string) optioninput_element.set('options', options_attr_string) # Set the "correct" attribute diff --git a/lms/djangoapps/courseware/grades.py b/lms/djangoapps/courseware/grades.py index 49b67ecc40..f36ea50a37 100644 --- a/lms/djangoapps/courseware/grades.py +++ b/lms/djangoapps/courseware/grades.py @@ -1,6 +1,7 @@ # Compute grades using real division, with no integer truncation from __future__ import division from collections import defaultdict +import json import random import logging @@ -17,24 +18,15 @@ from courseware import courses from courseware.model_data import FieldDataCache from xblock.fields import Scope from xmodule import graders -from xmodule.capa_module import CapaModule from xmodule.graders import Score +from xmodule.modulestore.django import modulestore +from xmodule.modulestore.exceptions import ItemNotFoundError from .models import StudentModule from .module_render import get_module, get_module_for_descriptor log = logging.getLogger("edx.courseware") -def yield_module_descendents(module): - stack = module.get_display_items() - stack.reverse() - - while len(stack) > 0: - next_module = stack.pop() - stack.extend(next_module.get_display_items()) - yield next_module - - def yield_dynamic_descriptor_descendents(descriptor, module_creator): """ This returns all of the descendants of a descriptor. If the descriptor @@ -58,74 +50,101 @@ def yield_dynamic_descriptor_descendents(descriptor, module_creator): yield next_descriptor -def yield_problems(request, course, student): +def answer_distributions(course_id): """ - Return an iterator over capa_modules that this student has - potentially answered. (all that student has answered will definitely be in - the list, but there may be others as well). + Given a course_id, return answer distributions in the form of a dictionary + mapping: + + (problem url_name, problem display_name, problem_id) -> {dict: answer -> count} + + Answer distributions are found by iterating through all StudentModule + entries for a given course with type="problem" and a grade that is not null. + This means that we only count LoncapaProblems that people have submitted. + Other types of items like ORA or sequences will not be collected. Empty + Loncapa problem state that gets created from runnig the progress page is + also not counted. + + This method accesses the StudentModule table directly instead of using the + CapaModule abstraction. The main reason for this is so that we can generate + the report without any side-effects -- we don't have to worry about answer + distribution potentially causing re-evaluation of the student answer. This + also allows us to use the read-replica database, which reduces risk of bad + locking behavior. And quite frankly, it makes this a lot less confusing. + + Also, we're pulling all available records from the database for this course + rather than crawling through a student's course-tree -- the latter could + potentially cause us trouble with A/B testing. The distribution report may + not be aware of problems that are not visible to the user being used to + generate the report. + + This method will try to use a read-replica database if one is available. """ - grading_context = course.grading_context + # dict: { module.module_state_key : (url_name, display_name) } + state_keys_to_problem_info = {} # For caching, used by url_and_display_name - descriptor_locations = (descriptor.location.url() for descriptor in grading_context['all_descriptors']) - existing_student_modules = set(StudentModule.objects.filter( - module_state_key__in=descriptor_locations - ).values_list('module_state_key', flat=True)) + def url_and_display_name(module_state_key): + """ + For a given module_state_key, return the problem's url and display_name. + Handle modulestore access and caching. This method ignores permissions. + May throw an ItemNotFoundError if there is no content that corresponds + to this module_state_key. + """ + problem_store = modulestore() + if module_state_key not in state_keys_to_problem_info: + problems = problem_store.get_items(module_state_key, course_id=course_id, depth=1) + if not problems: + # Likely means that the problem was deleted from the course + # after the student had answered. We log this suspicion where + # this exception is caught. + raise ItemNotFoundError( + "Answer Distribution: Module {} not found for course {}" + .format(module_state_key, course_id) + ) + problem = problems[0] + problem_info = (problem.url_name, problem.display_name_with_default) + state_keys_to_problem_info[module_state_key] = problem_info - sections_to_list = [] - for _, sections in grading_context['graded_sections'].iteritems(): - for section in sections: + return state_keys_to_problem_info[module_state_key] - section_descriptor = section['section_descriptor'] - - # If the student hasn't seen a single problem in the section, skip it. - for moduledescriptor in section['xmoduledescriptors']: - if moduledescriptor.location.url() in existing_student_modules: - sections_to_list.append(section_descriptor) - break - - field_data_cache = FieldDataCache(sections_to_list, course.id, student) - for section_descriptor in sections_to_list: - section_module = get_module(student, request, - section_descriptor.location, field_data_cache, - course.id) - if section_module is None: - # student doesn't have access to this module, or something else - # went wrong. - # log.debug("couldn't get module for student {0} for section location {1}" - # .format(student.username, section_descriptor.location)) + # Iterate through all problems submitted for this course in no particular + # order, and build up our answer_counts dict that we will eventually return + answer_counts = defaultdict(lambda: defaultdict(int)) + for module in StudentModule.all_submitted_problems_read_only(course_id): + try: + state_dict = json.loads(module.state) if module.state else {} + raw_answers = state_dict.get("student_answers", {}) + except ValueError: + log.error( + "Answer Distribution: Could not parse module state for " + + "StudentModule id={}, course={}".format(module.id, course_id) + ) continue - for problem in yield_module_descendents(section_module): - if isinstance(problem, CapaModule): - yield problem + # Each problem part has an ID that is derived from the + # module.module_state_key (with some suffix appended) + for problem_part_id, raw_answer in raw_answers.items(): + # Convert whatever raw answers we have (numbers, unicode, None, etc.) + # to be unicode values. Note that if we get a string, it's always + # unicode and not str -- state comes from the json decoder, and that + # always returns unicode for strings. + answer = unicode(raw_answer) + try: + url, display_name = url_and_display_name(module.module_state_key) + except ItemNotFoundError: + msg = "Answer Distribution: Item {} referenced in StudentModule {} " + \ + "for user {} in course {} not found; " + \ + "This can happen if a student answered a question that " + \ + "was later deleted from the course. This answer will be " + \ + "omitted from the answer distribution CSV." + log.warning( + msg.format(module.module_state_key, module.id, module.student_id, course_id) + ) + continue -def answer_distributions(request, course): - """ - Given a course_descriptor, compute frequencies of answers for each problem: - - Format is: - - dict: (problem url_name, problem display_name, problem_id) -> (dict : answer -> count) - - TODO (vshnayder): this is currently doing a full linear pass through all - students and all problems. This will be just a little slow. - """ - - counts = defaultdict(lambda: defaultdict(int)) - - enrolled_students = User.objects.filter(courseenrollment__course_id=course.id) - - for student in enrolled_students: - for capa_module in yield_problems(request, course, student): - for problem_id in capa_module.lcp.student_answers: - # Answer can be a list or some other unhashable element. Convert to string. - answer = str(capa_module.lcp.student_answers[problem_id]) - key = (capa_module.url_name, capa_module.display_name_with_default, problem_id) - counts[key][answer] += 1 - - return counts + answer_counts[(url, display_name, problem_part_id)][answer] += 1 + return answer_counts @transaction.commit_manually def grade(student, request, course, keep_raw_scores=False): diff --git a/lms/djangoapps/courseware/models.py b/lms/djangoapps/courseware/models.py index 068c2e95cd..201f617cfa 100644 --- a/lms/djangoapps/courseware/models.py +++ b/lms/djangoapps/courseware/models.py @@ -13,6 +13,7 @@ ASSUMPTIONS: modules have unique IDs, even across different module_types """ from django.contrib.auth.models import User +from django.conf import settings from django.db import models from django.db.models.signals import post_save from django.dispatch import receiver @@ -57,6 +58,23 @@ class StudentModule(models.Model): created = models.DateTimeField(auto_now_add=True, db_index=True) modified = models.DateTimeField(auto_now=True, db_index=True) + @classmethod + def all_submitted_problems_read_only(cls, course_id): + """ + Return all model instances that correspond to problems that have been + submitted for a given course. So module_type='problem' and a non-null + grade. Use a read replica if one exists for this environment. + """ + queryset = cls.objects.filter( + course_id=course_id, + module_type='problem', + grade__isnull=False + ) + if "read_replica" in settings.DATABASES: + return queryset.using("read_replica") + else: + return queryset + def __repr__(self): return 'StudentModule<%r>' % ({ 'course_id': self.course_id, diff --git a/lms/djangoapps/courseware/tests/test_submitting_problems.py b/lms/djangoapps/courseware/tests/test_submitting_problems.py index 854750f8d1..6e0bebca8a 100644 --- a/lms/djangoapps/courseware/tests/test_submitting_problems.py +++ b/lms/djangoapps/courseware/tests/test_submitting_problems.py @@ -1,6 +1,8 @@ -"""Integration tests for submitting problem responses and getting grades.""" - -# text processing dependancies +# -*- coding: utf-8 -*- +""" +Integration tests for submitting problem responses and getting grades. +""" +# text processing dependencies import json from textwrap import dedent @@ -12,6 +14,7 @@ from django.test.utils import override_settings # Need access to internal func to put users in the right group from courseware import grades from courseware.model_data import FieldDataCache +from courseware.models import StudentModule from xmodule.modulestore.django import modulestore, editable_modulestore @@ -22,6 +25,7 @@ from capa.tests.response_xml_factory import OptionResponseXMLFactory, CustomResp from courseware.tests.helpers import LoginEnrollmentTestCase from courseware.tests.modulestore_config import TEST_DATA_MIXED_MODULESTORE from lms.lib.xblock.runtime import quote_slashes +from student.tests.factories import UserFactory @override_settings(MODULESTORE=TEST_DATA_MIXED_MODULESTORE) @@ -134,7 +138,7 @@ class TestSubmittingProblems(ModuleStoreTestCase, LoginEnrollmentTestCase): question_text='The correct answer is Correct', num_inputs=num_inputs, weight=num_inputs, - options=['Correct', 'Incorrect'], + options=['Correct', 'Incorrect', u'ⓤⓝⓘⓒⓞⓓⓔ'], correct_option='Correct' ) @@ -793,3 +797,156 @@ class TestPythonGradedResponse(TestSubmittingProblems): name = 'computed_answer' self.computed_answer_setup(name) self._check_ireset(name) + + +class TestAnswerDistributions(TestSubmittingProblems): + """Check that we can pull answer distributions for problems.""" + + def setUp(self): + """Set up a simple course with four problems.""" + super(TestAnswerDistributions, self).setUp() + + self.homework = self.add_graded_section_to_course('homework') + self.add_dropdown_to_section(self.homework.location, 'p1', 1) + self.add_dropdown_to_section(self.homework.location, 'p2', 1) + self.add_dropdown_to_section(self.homework.location, 'p3', 1) + self.refresh_course() + + def test_empty(self): + # Just make sure we can process this without errors. + empty_distribution = grades.answer_distributions(self.course.id) + self.assertFalse(empty_distribution) # should be empty + + def test_one_student(self): + # Basic test to make sure we have simple behavior right for a student + + # Throw in a non-ASCII answer + self.submit_question_answer('p1', {'2_1': u'ⓤⓝⓘⓒⓞⓓⓔ'}) + self.submit_question_answer('p2', {'2_1': 'Correct'}) + + distributions = grades.answer_distributions(self.course.id) + self.assertEqual( + distributions, + { + ('p1', 'p1', 'i4x-MITx-100-problem-p1_2_1'): { + u'ⓤⓝⓘⓒⓞⓓⓔ': 1 + }, + ('p2', 'p2', 'i4x-MITx-100-problem-p2_2_1'): { + 'Correct': 1 + } + } + ) + + def test_multiple_students(self): + # Our test class is based around making requests for a particular user, + # so we're going to cheat by creating another user and copying and + # modifying StudentModule entries to make them from other users. It's + # a little hacky, but it seemed the simpler way to do this. + self.submit_question_answer('p1', {'2_1': u'Correct'}) + self.submit_question_answer('p2', {'2_1': u'Incorrect'}) + self.submit_question_answer('p3', {'2_1': u'Correct'}) + + # Make the above submissions owned by user2 + user2 = UserFactory.create() + problems = StudentModule.objects.filter( + course_id=self.course.id, + student_id=self.student_user.id + ) + for problem in problems: + problem.student_id = user2.id + problem.save() + + # Now make more submissions by our original user + self.submit_question_answer('p1', {'2_1': u'Correct'}) + self.submit_question_answer('p2', {'2_1': u'Correct'}) + + self.assertEqual( + grades.answer_distributions(self.course.id), + { + ('p1', 'p1', 'i4x-MITx-100-problem-p1_2_1'): { + 'Correct': 2 + }, + ('p2', 'p2', 'i4x-MITx-100-problem-p2_2_1'): { + 'Correct': 1, + 'Incorrect': 1 + }, + ('p3', 'p3', 'i4x-MITx-100-problem-p3_2_1'): { + 'Correct': 1 + } + } + ) + + def test_other_data_types(self): + # We'll submit one problem, and then muck with the student_answers + # dict inside its state to try different data types (str, int, float, + # none) + self.submit_question_answer('p1', {'2_1': u'Correct'}) + + # Now fetch the state entry for that problem. + student_module = StudentModule.objects.get( + course_id=self.course.id, + student_id=self.student_user.id + ) + for val in ('Correct', True, False, 0, 0.0, 1, 1.0, None): + state = json.loads(student_module.state) + state["student_answers"]['i4x-MITx-100-problem-p1_2_1'] = val + student_module.state = json.dumps(state) + student_module.save() + + self.assertEqual( + grades.answer_distributions(self.course.id), + { + ('p1', 'p1', 'i4x-MITx-100-problem-p1_2_1'): { + str(val): 1 + }, + } + ) + + def test_missing_content(self): + # If there's a StudentModule entry for content that no longer exists, + # we just quietly ignore it (because we can't display a meaningful url + # or name for it). + self.submit_question_answer('p1', {'2_1': 'Incorrect'}) + + # Now fetch the state entry for that problem and alter it so it points + # to a non-existent problem. + student_module = StudentModule.objects.get( + course_id=self.course.id, + student_id=self.student_user.id + ) + student_module.module_state_key += "_fake" + student_module.save() + + # It should be empty (ignored) + empty_distribution = grades.answer_distributions(self.course.id) + self.assertFalse(empty_distribution) # should be empty + + def test_broken_state(self): + # Missing or broken state for a problem should be skipped without + # causing the whole answer_distribution call to explode. + + # Submit p1 + self.submit_question_answer('p1', {'2_1': u'Correct'}) + + # Now fetch the StudentModule entry for p1 so we can corrupt its state + prb1 = StudentModule.objects.get( + course_id=self.course.id, + student_id=self.student_user.id + ) + + # Submit p2 + self.submit_question_answer('p2', {'2_1': u'Incorrect'}) + + for new_p1_state in ('{"student_answers": {}}', "invalid json!", None): + prb1.state = new_p1_state + prb1.save() + + # p1 won't show up, but p2 should still work + self.assertEqual( + grades.answer_distributions(self.course.id), + { + ('p2', 'p2', 'i4x-MITx-100-problem-p2_2_1'): { + 'Incorrect': 1 + }, + } + ) diff --git a/lms/djangoapps/instructor/views/legacy.py b/lms/djangoapps/instructor/views/legacy.py index 51c29da728..3b46f4631e 100644 --- a/lms/djangoapps/instructor/views/legacy.py +++ b/lms/djangoapps/instructor/views/legacy.py @@ -140,7 +140,14 @@ def instructor_dashboard(request, course_id): writer.writerow(encoded_row) for datarow in datatable['data']: # 's' here may be an integer, float (eg score) or string (eg student name) - encoded_row = [unicode(s).encode('utf-8') for s in datarow] + encoded_row = [ + # If s is already a UTF-8 string, trying to make a unicode + # object out of it will fail unless we pass in an encoding to + # the constructor. But we can't do that across the board, + # because s is often a numeric type. So just do this. + s if isinstance(s, str) else unicode(s).encode('utf-8') + for s in datarow + ] writer.writerow(encoded_row) return response @@ -1492,14 +1499,16 @@ def get_answers_distribution(request, course_id): """ course = get_course_with_access(request.user, course_id, 'staff') - dist = grades.answer_distributions(request, course) + dist = grades.answer_distributions(course.id) d = {} d['header'] = ['url_name', 'display name', 'answer id', 'answer', 'count'] - d['data'] = [[url_name, display_name, answer_id, a, answers[a]] - for (url_name, display_name, answer_id), answers in dist.items() - for a in answers] + d['data'] = [ + [url_name, display_name, answer_id, a, answers[a]] + for (url_name, display_name, answer_id), answers in sorted(dist.items()) + for a in answers + ] return d