Merge pull request #2014 from edx/ormsbee/grade_distribution_fix
Fix (re-implement) answer distribution report generation.
This commit is contained in:
@@ -5,6 +5,8 @@ These are notable changes in edx-platform. This is a rolling list of changes,
|
||||
in roughly chronological order, most recent first. Add your entries at or near
|
||||
the top. Include a label indicating the component affected.
|
||||
|
||||
LMS: Fix answer distribution download for small courses. LMS-922, LMS-811
|
||||
|
||||
Blades: Add template for the zooming image in studio. BLD-206.
|
||||
|
||||
Blades: Update behavior of start/end time fields. BLD-506.
|
||||
|
||||
@@ -660,8 +660,8 @@ class OptionResponseXMLFactory(ResponseXMLFactory):
|
||||
|
||||
# Set the "options" attribute
|
||||
# Format: "('first', 'second', 'third')"
|
||||
options_attr_string = ",".join(["'%s'" % str(o) for o in options_list])
|
||||
options_attr_string = "(%s)" % options_attr_string
|
||||
options_attr_string = u",".join([u"'{}'".format(o) for o in options_list])
|
||||
options_attr_string = u"({})".format(options_attr_string)
|
||||
optioninput_element.set('options', options_attr_string)
|
||||
|
||||
# Set the "correct" attribute
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
# Compute grades using real division, with no integer truncation
|
||||
from __future__ import division
|
||||
from collections import defaultdict
|
||||
import json
|
||||
import random
|
||||
import logging
|
||||
|
||||
@@ -17,24 +18,15 @@ from courseware import courses
|
||||
from courseware.model_data import FieldDataCache
|
||||
from xblock.fields import Scope
|
||||
from xmodule import graders
|
||||
from xmodule.capa_module import CapaModule
|
||||
from xmodule.graders import Score
|
||||
from xmodule.modulestore.django import modulestore
|
||||
from xmodule.modulestore.exceptions import ItemNotFoundError
|
||||
from .models import StudentModule
|
||||
from .module_render import get_module, get_module_for_descriptor
|
||||
|
||||
log = logging.getLogger("edx.courseware")
|
||||
|
||||
|
||||
def yield_module_descendents(module):
|
||||
stack = module.get_display_items()
|
||||
stack.reverse()
|
||||
|
||||
while len(stack) > 0:
|
||||
next_module = stack.pop()
|
||||
stack.extend(next_module.get_display_items())
|
||||
yield next_module
|
||||
|
||||
|
||||
def yield_dynamic_descriptor_descendents(descriptor, module_creator):
|
||||
"""
|
||||
This returns all of the descendants of a descriptor. If the descriptor
|
||||
@@ -58,74 +50,101 @@ def yield_dynamic_descriptor_descendents(descriptor, module_creator):
|
||||
yield next_descriptor
|
||||
|
||||
|
||||
def yield_problems(request, course, student):
|
||||
def answer_distributions(course_id):
|
||||
"""
|
||||
Return an iterator over capa_modules that this student has
|
||||
potentially answered. (all that student has answered will definitely be in
|
||||
the list, but there may be others as well).
|
||||
Given a course_id, return answer distributions in the form of a dictionary
|
||||
mapping:
|
||||
|
||||
(problem url_name, problem display_name, problem_id) -> {dict: answer -> count}
|
||||
|
||||
Answer distributions are found by iterating through all StudentModule
|
||||
entries for a given course with type="problem" and a grade that is not null.
|
||||
This means that we only count LoncapaProblems that people have submitted.
|
||||
Other types of items like ORA or sequences will not be collected. Empty
|
||||
Loncapa problem state that gets created from runnig the progress page is
|
||||
also not counted.
|
||||
|
||||
This method accesses the StudentModule table directly instead of using the
|
||||
CapaModule abstraction. The main reason for this is so that we can generate
|
||||
the report without any side-effects -- we don't have to worry about answer
|
||||
distribution potentially causing re-evaluation of the student answer. This
|
||||
also allows us to use the read-replica database, which reduces risk of bad
|
||||
locking behavior. And quite frankly, it makes this a lot less confusing.
|
||||
|
||||
Also, we're pulling all available records from the database for this course
|
||||
rather than crawling through a student's course-tree -- the latter could
|
||||
potentially cause us trouble with A/B testing. The distribution report may
|
||||
not be aware of problems that are not visible to the user being used to
|
||||
generate the report.
|
||||
|
||||
This method will try to use a read-replica database if one is available.
|
||||
"""
|
||||
grading_context = course.grading_context
|
||||
# dict: { module.module_state_key : (url_name, display_name) }
|
||||
state_keys_to_problem_info = {} # For caching, used by url_and_display_name
|
||||
|
||||
descriptor_locations = (descriptor.location.url() for descriptor in grading_context['all_descriptors'])
|
||||
existing_student_modules = set(StudentModule.objects.filter(
|
||||
module_state_key__in=descriptor_locations
|
||||
).values_list('module_state_key', flat=True))
|
||||
def url_and_display_name(module_state_key):
|
||||
"""
|
||||
For a given module_state_key, return the problem's url and display_name.
|
||||
Handle modulestore access and caching. This method ignores permissions.
|
||||
May throw an ItemNotFoundError if there is no content that corresponds
|
||||
to this module_state_key.
|
||||
"""
|
||||
problem_store = modulestore()
|
||||
if module_state_key not in state_keys_to_problem_info:
|
||||
problems = problem_store.get_items(module_state_key, course_id=course_id, depth=1)
|
||||
if not problems:
|
||||
# Likely means that the problem was deleted from the course
|
||||
# after the student had answered. We log this suspicion where
|
||||
# this exception is caught.
|
||||
raise ItemNotFoundError(
|
||||
"Answer Distribution: Module {} not found for course {}"
|
||||
.format(module_state_key, course_id)
|
||||
)
|
||||
problem = problems[0]
|
||||
problem_info = (problem.url_name, problem.display_name_with_default)
|
||||
state_keys_to_problem_info[module_state_key] = problem_info
|
||||
|
||||
sections_to_list = []
|
||||
for _, sections in grading_context['graded_sections'].iteritems():
|
||||
for section in sections:
|
||||
return state_keys_to_problem_info[module_state_key]
|
||||
|
||||
section_descriptor = section['section_descriptor']
|
||||
|
||||
# If the student hasn't seen a single problem in the section, skip it.
|
||||
for moduledescriptor in section['xmoduledescriptors']:
|
||||
if moduledescriptor.location.url() in existing_student_modules:
|
||||
sections_to_list.append(section_descriptor)
|
||||
break
|
||||
|
||||
field_data_cache = FieldDataCache(sections_to_list, course.id, student)
|
||||
for section_descriptor in sections_to_list:
|
||||
section_module = get_module(student, request,
|
||||
section_descriptor.location, field_data_cache,
|
||||
course.id)
|
||||
if section_module is None:
|
||||
# student doesn't have access to this module, or something else
|
||||
# went wrong.
|
||||
# log.debug("couldn't get module for student {0} for section location {1}"
|
||||
# .format(student.username, section_descriptor.location))
|
||||
# Iterate through all problems submitted for this course in no particular
|
||||
# order, and build up our answer_counts dict that we will eventually return
|
||||
answer_counts = defaultdict(lambda: defaultdict(int))
|
||||
for module in StudentModule.all_submitted_problems_read_only(course_id):
|
||||
try:
|
||||
state_dict = json.loads(module.state) if module.state else {}
|
||||
raw_answers = state_dict.get("student_answers", {})
|
||||
except ValueError:
|
||||
log.error(
|
||||
"Answer Distribution: Could not parse module state for " +
|
||||
"StudentModule id={}, course={}".format(module.id, course_id)
|
||||
)
|
||||
continue
|
||||
|
||||
for problem in yield_module_descendents(section_module):
|
||||
if isinstance(problem, CapaModule):
|
||||
yield problem
|
||||
# Each problem part has an ID that is derived from the
|
||||
# module.module_state_key (with some suffix appended)
|
||||
for problem_part_id, raw_answer in raw_answers.items():
|
||||
# Convert whatever raw answers we have (numbers, unicode, None, etc.)
|
||||
# to be unicode values. Note that if we get a string, it's always
|
||||
# unicode and not str -- state comes from the json decoder, and that
|
||||
# always returns unicode for strings.
|
||||
answer = unicode(raw_answer)
|
||||
|
||||
try:
|
||||
url, display_name = url_and_display_name(module.module_state_key)
|
||||
except ItemNotFoundError:
|
||||
msg = "Answer Distribution: Item {} referenced in StudentModule {} " + \
|
||||
"for user {} in course {} not found; " + \
|
||||
"This can happen if a student answered a question that " + \
|
||||
"was later deleted from the course. This answer will be " + \
|
||||
"omitted from the answer distribution CSV."
|
||||
log.warning(
|
||||
msg.format(module.module_state_key, module.id, module.student_id, course_id)
|
||||
)
|
||||
continue
|
||||
|
||||
def answer_distributions(request, course):
|
||||
"""
|
||||
Given a course_descriptor, compute frequencies of answers for each problem:
|
||||
|
||||
Format is:
|
||||
|
||||
dict: (problem url_name, problem display_name, problem_id) -> (dict : answer -> count)
|
||||
|
||||
TODO (vshnayder): this is currently doing a full linear pass through all
|
||||
students and all problems. This will be just a little slow.
|
||||
"""
|
||||
|
||||
counts = defaultdict(lambda: defaultdict(int))
|
||||
|
||||
enrolled_students = User.objects.filter(courseenrollment__course_id=course.id)
|
||||
|
||||
for student in enrolled_students:
|
||||
for capa_module in yield_problems(request, course, student):
|
||||
for problem_id in capa_module.lcp.student_answers:
|
||||
# Answer can be a list or some other unhashable element. Convert to string.
|
||||
answer = str(capa_module.lcp.student_answers[problem_id])
|
||||
key = (capa_module.url_name, capa_module.display_name_with_default, problem_id)
|
||||
counts[key][answer] += 1
|
||||
|
||||
return counts
|
||||
answer_counts[(url, display_name, problem_part_id)][answer] += 1
|
||||
|
||||
return answer_counts
|
||||
|
||||
@transaction.commit_manually
|
||||
def grade(student, request, course, keep_raw_scores=False):
|
||||
|
||||
@@ -13,6 +13,7 @@ ASSUMPTIONS: modules have unique IDs, even across different module_types
|
||||
|
||||
"""
|
||||
from django.contrib.auth.models import User
|
||||
from django.conf import settings
|
||||
from django.db import models
|
||||
from django.db.models.signals import post_save
|
||||
from django.dispatch import receiver
|
||||
@@ -57,6 +58,23 @@ class StudentModule(models.Model):
|
||||
created = models.DateTimeField(auto_now_add=True, db_index=True)
|
||||
modified = models.DateTimeField(auto_now=True, db_index=True)
|
||||
|
||||
@classmethod
|
||||
def all_submitted_problems_read_only(cls, course_id):
|
||||
"""
|
||||
Return all model instances that correspond to problems that have been
|
||||
submitted for a given course. So module_type='problem' and a non-null
|
||||
grade. Use a read replica if one exists for this environment.
|
||||
"""
|
||||
queryset = cls.objects.filter(
|
||||
course_id=course_id,
|
||||
module_type='problem',
|
||||
grade__isnull=False
|
||||
)
|
||||
if "read_replica" in settings.DATABASES:
|
||||
return queryset.using("read_replica")
|
||||
else:
|
||||
return queryset
|
||||
|
||||
def __repr__(self):
|
||||
return 'StudentModule<%r>' % ({
|
||||
'course_id': self.course_id,
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
"""Integration tests for submitting problem responses and getting grades."""
|
||||
|
||||
# text processing dependancies
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Integration tests for submitting problem responses and getting grades.
|
||||
"""
|
||||
# text processing dependencies
|
||||
import json
|
||||
import os
|
||||
from textwrap import dedent
|
||||
@@ -16,6 +18,7 @@ from django.test.utils import override_settings
|
||||
# Need access to internal func to put users in the right group
|
||||
from courseware import grades
|
||||
from courseware.model_data import FieldDataCache
|
||||
from courseware.models import StudentModule
|
||||
|
||||
from xmodule.modulestore.django import modulestore, editable_modulestore
|
||||
|
||||
@@ -29,6 +32,7 @@ from capa.tests.response_xml_factory import (
|
||||
from courseware.tests.helpers import LoginEnrollmentTestCase
|
||||
from courseware.tests.modulestore_config import TEST_DATA_MIXED_MODULESTORE
|
||||
from lms.lib.xblock.runtime import quote_slashes
|
||||
from student.tests.factories import UserFactory
|
||||
|
||||
|
||||
@override_settings(MODULESTORE=TEST_DATA_MIXED_MODULESTORE)
|
||||
@@ -141,7 +145,7 @@ class TestSubmittingProblems(ModuleStoreTestCase, LoginEnrollmentTestCase):
|
||||
question_text='The correct answer is Correct',
|
||||
num_inputs=num_inputs,
|
||||
weight=num_inputs,
|
||||
options=['Correct', 'Incorrect'],
|
||||
options=['Correct', 'Incorrect', u'ⓤⓝⓘⓒⓞⓓⓔ'],
|
||||
correct_option='Correct'
|
||||
)
|
||||
|
||||
@@ -852,3 +856,156 @@ class TestPythonGradedResponse(TestSubmittingProblems):
|
||||
name = 'computed_answer'
|
||||
self.computed_answer_setup(name)
|
||||
self._check_ireset(name)
|
||||
|
||||
|
||||
class TestAnswerDistributions(TestSubmittingProblems):
|
||||
"""Check that we can pull answer distributions for problems."""
|
||||
|
||||
def setUp(self):
|
||||
"""Set up a simple course with four problems."""
|
||||
super(TestAnswerDistributions, self).setUp()
|
||||
|
||||
self.homework = self.add_graded_section_to_course('homework')
|
||||
self.add_dropdown_to_section(self.homework.location, 'p1', 1)
|
||||
self.add_dropdown_to_section(self.homework.location, 'p2', 1)
|
||||
self.add_dropdown_to_section(self.homework.location, 'p3', 1)
|
||||
self.refresh_course()
|
||||
|
||||
def test_empty(self):
|
||||
# Just make sure we can process this without errors.
|
||||
empty_distribution = grades.answer_distributions(self.course.id)
|
||||
self.assertFalse(empty_distribution) # should be empty
|
||||
|
||||
def test_one_student(self):
|
||||
# Basic test to make sure we have simple behavior right for a student
|
||||
|
||||
# Throw in a non-ASCII answer
|
||||
self.submit_question_answer('p1', {'2_1': u'ⓤⓝⓘⓒⓞⓓⓔ'})
|
||||
self.submit_question_answer('p2', {'2_1': 'Correct'})
|
||||
|
||||
distributions = grades.answer_distributions(self.course.id)
|
||||
self.assertEqual(
|
||||
distributions,
|
||||
{
|
||||
('p1', 'p1', 'i4x-MITx-100-problem-p1_2_1'): {
|
||||
u'ⓤⓝⓘⓒⓞⓓⓔ': 1
|
||||
},
|
||||
('p2', 'p2', 'i4x-MITx-100-problem-p2_2_1'): {
|
||||
'Correct': 1
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
def test_multiple_students(self):
|
||||
# Our test class is based around making requests for a particular user,
|
||||
# so we're going to cheat by creating another user and copying and
|
||||
# modifying StudentModule entries to make them from other users. It's
|
||||
# a little hacky, but it seemed the simpler way to do this.
|
||||
self.submit_question_answer('p1', {'2_1': u'Correct'})
|
||||
self.submit_question_answer('p2', {'2_1': u'Incorrect'})
|
||||
self.submit_question_answer('p3', {'2_1': u'Correct'})
|
||||
|
||||
# Make the above submissions owned by user2
|
||||
user2 = UserFactory.create()
|
||||
problems = StudentModule.objects.filter(
|
||||
course_id=self.course.id,
|
||||
student_id=self.student_user.id
|
||||
)
|
||||
for problem in problems:
|
||||
problem.student_id = user2.id
|
||||
problem.save()
|
||||
|
||||
# Now make more submissions by our original user
|
||||
self.submit_question_answer('p1', {'2_1': u'Correct'})
|
||||
self.submit_question_answer('p2', {'2_1': u'Correct'})
|
||||
|
||||
self.assertEqual(
|
||||
grades.answer_distributions(self.course.id),
|
||||
{
|
||||
('p1', 'p1', 'i4x-MITx-100-problem-p1_2_1'): {
|
||||
'Correct': 2
|
||||
},
|
||||
('p2', 'p2', 'i4x-MITx-100-problem-p2_2_1'): {
|
||||
'Correct': 1,
|
||||
'Incorrect': 1
|
||||
},
|
||||
('p3', 'p3', 'i4x-MITx-100-problem-p3_2_1'): {
|
||||
'Correct': 1
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
def test_other_data_types(self):
|
||||
# We'll submit one problem, and then muck with the student_answers
|
||||
# dict inside its state to try different data types (str, int, float,
|
||||
# none)
|
||||
self.submit_question_answer('p1', {'2_1': u'Correct'})
|
||||
|
||||
# Now fetch the state entry for that problem.
|
||||
student_module = StudentModule.objects.get(
|
||||
course_id=self.course.id,
|
||||
student_id=self.student_user.id
|
||||
)
|
||||
for val in ('Correct', True, False, 0, 0.0, 1, 1.0, None):
|
||||
state = json.loads(student_module.state)
|
||||
state["student_answers"]['i4x-MITx-100-problem-p1_2_1'] = val
|
||||
student_module.state = json.dumps(state)
|
||||
student_module.save()
|
||||
|
||||
self.assertEqual(
|
||||
grades.answer_distributions(self.course.id),
|
||||
{
|
||||
('p1', 'p1', 'i4x-MITx-100-problem-p1_2_1'): {
|
||||
str(val): 1
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
def test_missing_content(self):
|
||||
# If there's a StudentModule entry for content that no longer exists,
|
||||
# we just quietly ignore it (because we can't display a meaningful url
|
||||
# or name for it).
|
||||
self.submit_question_answer('p1', {'2_1': 'Incorrect'})
|
||||
|
||||
# Now fetch the state entry for that problem and alter it so it points
|
||||
# to a non-existent problem.
|
||||
student_module = StudentModule.objects.get(
|
||||
course_id=self.course.id,
|
||||
student_id=self.student_user.id
|
||||
)
|
||||
student_module.module_state_key += "_fake"
|
||||
student_module.save()
|
||||
|
||||
# It should be empty (ignored)
|
||||
empty_distribution = grades.answer_distributions(self.course.id)
|
||||
self.assertFalse(empty_distribution) # should be empty
|
||||
|
||||
def test_broken_state(self):
|
||||
# Missing or broken state for a problem should be skipped without
|
||||
# causing the whole answer_distribution call to explode.
|
||||
|
||||
# Submit p1
|
||||
self.submit_question_answer('p1', {'2_1': u'Correct'})
|
||||
|
||||
# Now fetch the StudentModule entry for p1 so we can corrupt its state
|
||||
prb1 = StudentModule.objects.get(
|
||||
course_id=self.course.id,
|
||||
student_id=self.student_user.id
|
||||
)
|
||||
|
||||
# Submit p2
|
||||
self.submit_question_answer('p2', {'2_1': u'Incorrect'})
|
||||
|
||||
for new_p1_state in ('{"student_answers": {}}', "invalid json!", None):
|
||||
prb1.state = new_p1_state
|
||||
prb1.save()
|
||||
|
||||
# p1 won't show up, but p2 should still work
|
||||
self.assertEqual(
|
||||
grades.answer_distributions(self.course.id),
|
||||
{
|
||||
('p2', 'p2', 'i4x-MITx-100-problem-p2_2_1'): {
|
||||
'Incorrect': 1
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
@@ -140,7 +140,14 @@ def instructor_dashboard(request, course_id):
|
||||
writer.writerow(encoded_row)
|
||||
for datarow in datatable['data']:
|
||||
# 's' here may be an integer, float (eg score) or string (eg student name)
|
||||
encoded_row = [unicode(s).encode('utf-8') for s in datarow]
|
||||
encoded_row = [
|
||||
# If s is already a UTF-8 string, trying to make a unicode
|
||||
# object out of it will fail unless we pass in an encoding to
|
||||
# the constructor. But we can't do that across the board,
|
||||
# because s is often a numeric type. So just do this.
|
||||
s if isinstance(s, str) else unicode(s).encode('utf-8')
|
||||
for s in datarow
|
||||
]
|
||||
writer.writerow(encoded_row)
|
||||
return response
|
||||
|
||||
@@ -1492,14 +1499,16 @@ def get_answers_distribution(request, course_id):
|
||||
"""
|
||||
course = get_course_with_access(request.user, course_id, 'staff')
|
||||
|
||||
dist = grades.answer_distributions(request, course)
|
||||
dist = grades.answer_distributions(course.id)
|
||||
|
||||
d = {}
|
||||
d['header'] = ['url_name', 'display name', 'answer id', 'answer', 'count']
|
||||
|
||||
d['data'] = [[url_name, display_name, answer_id, a, answers[a]]
|
||||
for (url_name, display_name, answer_id), answers in dist.items()
|
||||
for a in answers]
|
||||
d['data'] = [
|
||||
[url_name, display_name, answer_id, a, answers[a]]
|
||||
for (url_name, display_name, answer_id), answers in sorted(dist.items())
|
||||
for a in answers
|
||||
]
|
||||
return d
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user