Merge pull request #17948 from open-craft/clemente/generate-report-data--for-capa-problems

Add generate_report_data to return a readable report of student answers to capa problems
This commit is contained in:
Eric Fischer
2018-05-24 15:52:17 -04:00
committed by GitHub
6 changed files with 388 additions and 6 deletions

View File

@@ -128,7 +128,7 @@ class LoncapaProblem(object):
Main class for capa Problems.
"""
def __init__(self, problem_text, id, capa_system, capa_module, # pylint: disable=redefined-builtin
state=None, seed=None, minimal_init=False):
state=None, seed=None, minimal_init=False, extract_tree=True):
"""
Initializes capa Problem.
@@ -147,6 +147,8 @@ class LoncapaProblem(object):
- `done` (bool) indicates whether or not this problem is considered done
- `input_state` (dict) maps input_id to a dictionary that holds the state for that input
seed (int): random number generator seed.
minimal_init (bool): whether to skip pre-processing student answers
extract_tree (bool): whether to parse the problem XML and store the HTML
"""
@@ -212,7 +214,8 @@ class LoncapaProblem(object):
if hasattr(response, 'late_transforms'):
response.late_transforms(self)
self.extracted_tree = self._extract_html(self.tree)
if extract_tree:
self.extracted_tree = self._extract_html(self.tree)
def make_xml_compatible(self, tree):
"""
@@ -492,6 +495,124 @@ class LoncapaProblem(object):
answer_ids.append(results.keys())
return answer_ids
def find_question_label(self, answer_id):
"""
Obtain the most relevant question text for a particular answer.
E.g. in a problem like "How much is 2+2?" "Two"/"Three"/"More than three",
this function returns the "How much is 2+2?" text.
It uses, in order:
- the question prompt, if the question has one
- the <p> or <label> element which precedes the choices (skipping descriptive elements)
- a text like "Question 5" if no other name could be found
Arguments::
answer_id: a string like "98e6a8e915904d5389821a94e48babcf_13_1"
Returns:
a string with the question text
"""
_ = self.capa_system.i18n.ugettext
# Some questions define a prompt with this format: >>This is a prompt<<
prompt = self.problem_data[answer_id].get('label')
if prompt:
question_text = prompt.striptags()
else:
# If no prompt, then we must look for something resembling a question ourselves
#
# We have a structure like:
#
# <p />
# <optionresponse id="a0effb954cca4759994f1ac9e9434bf4_2">
# <optioninput id="a0effb954cca4759994f1ac9e9434bf4_3_1" />
# <optionresponse>
#
# Starting from answer (the optioninput in this example) we go up and backwards
xml_elems = self.tree.xpath('//*[@id="' + answer_id + '"]')
assert len(xml_elems) == 1
xml_elem = xml_elems[0].getparent()
# Get the element that probably contains the question text
questiontext_elem = xml_elem.getprevious()
# Go backwards looking for a <p> or <label>, but skip <description> because it doesn't
# contain the question text.
#
# E.g if we have this:
# <p /> <description /> <optionresponse /> <optionresponse />
#
# then from the first optionresponse we'll end with the <p>.
# If we start in the second optionresponse, we'll find another response in the way,
# stop early, and instead of a question we'll report "Question 2".
SKIP_ELEMS = ['description']
LABEL_ELEMS = ['p', 'label']
while questiontext_elem is not None and questiontext_elem.tag in SKIP_ELEMS:
questiontext_elem = questiontext_elem.getprevious()
if questiontext_elem is not None and questiontext_elem.tag in LABEL_ELEMS:
question_text = questiontext_elem.text
else:
# For instance 'd2e35c1d294b4ba0b3b1048615605d2a_2_1' contains 2,
# which is used in question number 1 (see example XML in comment above)
# There's no question 0 (question IDs start at 1, answer IDs at 2)
question_nr = int(answer_id.split('_')[-2]) - 1
question_text = _("Question {0}").format(question_nr)
return question_text
def find_answer_text(self, answer_id, current_answer):
"""
Process a raw answer text to make it more meaningful.
E.g. in a choice problem like "How much is 2+2?" "Two"/"Three"/"More than three",
this function will transform "choice_1" (which is the internal response given by
many capa methods) to the human version, e.g. "More than three".
If the answers are multiple (e.g. because they're from a multiple choice problem),
this will join them with a comma.
If passed a normal string which is already the answer, it doesn't change it.
TODO merge with response_a11y_data?
Arguments:
answer_id: a string like "98e6a8e915904d5389821a94e48babcf_13_1"
current_answer: a data structure as found in `LoncapaProblem.student_answers`
which represents the best response we have until now
Returns:
a string with the human version of the response
"""
if isinstance(current_answer, list):
# Multiple answers. This case happens e.g. in multiple choice problems
answer_text = ", ".join(
self.find_answer_text(answer_id, answer) for answer in current_answer
)
elif isinstance(current_answer, basestring) and current_answer.startswith('choice_'):
# Many problem (e.g. checkbox) report "choice_0" "choice_1" etc.
# Here we transform it
elems = self.tree.xpath('//*[@id="{answer_id}"]//*[@name="{choice_number}"]'.format(
answer_id=answer_id,
choice_number=current_answer
))
assert len(elems) == 1
choicegroup = elems[0].getparent()
input_cls = inputtypes.registry.get_class_for_tag(choicegroup.tag)
choices_map = dict(input_cls.extract_choices(choicegroup, self.capa_system.i18n, text_only=True))
answer_text = choices_map[current_answer]
elif isinstance(current_answer, basestring):
# Already a string with the answer
answer_text = current_answer
else:
raise NotImplementedError()
return answer_text
def do_targeted_feedback(self, tree):
"""
Implements targeted-feedback in-place on <multiplechoiceresponse> --

View File

@@ -518,13 +518,15 @@ class ChoiceGroup(InputTypeBase):
'name_array_suffix': self.suffix}
@staticmethod
def extract_choices(element, i18n):
def extract_choices(element, i18n, text_only=False):
"""
Extracts choices for a few input types, such as ChoiceGroup, RadioGroup and
CheckboxGroup.
returns list of (choice_name, choice_text) tuples
By default it will return any XML tag in the choice (e.g. <choicehint>) unless text_only=True is passed.
TODO: allow order of choices to be randomized, following lon-capa spec. Use
"location" attribute, ie random, top, bottom.
"""
@@ -534,7 +536,11 @@ class ChoiceGroup(InputTypeBase):
for choice in element:
if choice.tag == 'choice':
choices.append((choice.get("name"), stringify_children(choice)))
if not text_only:
text = stringify_children(choice)
else:
text = choice.text
choices.append((choice.get("name"), text))
else:
if choice.tag != 'compoundhint':
msg = Text('[capa.inputtypes.extract_choices] {error_message}').format(

View File

@@ -4,9 +4,11 @@ Test capa problem.
import ddt
import textwrap
from lxml import etree
from mock import patch
import unittest
from capa.tests.helpers import new_loncapa_problem
from openedx.core.djangolib.markup import HTML
@ddt.ddt
@@ -590,3 +592,81 @@ class CAPAMultiInputProblemTest(unittest.TestCase):
description_element = multi_inputs_group.xpath('//p[@id="{}"]'.format(description_id))
self.assertEqual(len(description_element), 1)
self.assertEqual(description_element[0].text, descriptions[index])
@ddt.ddt
class CAPAProblemReportHelpersTest(unittest.TestCase):
""" TestCase for CAPA methods for finding question labels and answer text """
@ddt.data(
('answerid_2_1', 'label', 'label'),
('answerid_2_2', 'label <some>html</some>', 'label html'),
('answerid_2_2', '<more html="yes"/>label <some>html</some>', 'label html'),
('answerid_2_3', None, 'Question 1'),
('answerid_2_3', '', 'Question 1'),
('answerid_3_3', '', 'Question 2'),
)
@ddt.unpack
def test_find_question_label(self, answer_id, label, stripped_label):
problem = new_loncapa_problem(
'<problem><some-problem id="{}"/></problem>'.format(answer_id)
)
mock_problem_data = {
answer_id: {
'label': HTML(label) if label else ''
}
}
with patch.object(problem, 'problem_data', mock_problem_data):
self.assertEqual(problem.find_question_label(answer_id), stripped_label)
@ddt.data(None, dict(), [None])
def test_find_answer_test_not_implemented(self, current_answer):
problem = new_loncapa_problem('<problem/>')
self.assertRaises(NotImplementedError, problem.find_answer_text, '', current_answer)
@ddt.data(
('1_2_1', 'choice_0', 'over-suspicious'),
('1_2_1', 'choice_1', 'funny'),
('1_3_1', 'choice_0', 'The iPad'),
('1_3_1', 'choice_2', 'The iPod'),
('1_3_1', ['choice_0', 'choice_1'], 'The iPad, Napster'),
('1_4_1', 'yellow', 'yellow'),
('1_4_1', 'blue', 'blue'),
)
@ddt.unpack
def test_find_answer_text_choices(self, answer_id, choice_id, answer_text):
problem = new_loncapa_problem(
"""
<problem>
<choiceresponse>
<checkboxgroup label="Select the correct synonym of paranoid?">
<choice correct="true">over-suspicious</choice>
<choice correct="false">funny</choice>
</checkboxgroup>
</choiceresponse>
<multiplechoiceresponse>
<choicegroup type="MultipleChoice">
<choice correct="false">The iPad</choice>
<choice correct="false">Napster</choice>
<choice correct="true">The iPod</choice>
</choicegroup>
</multiplechoiceresponse>
<optionresponse>
<optioninput options="('yellow','blue','green')" correct="blue" label="Color_1"/>
</optionresponse>
</problem>
"""
)
self.assertEquals(problem.find_answer_text(answer_id, choice_id), answer_text)
def test_find_answer_text_textinput(self):
problem = new_loncapa_problem(
"""
<problem>
<stringresponse answer="hide" type="ci">
<textline size="40"/>
</stringresponse>
</problem>
"""
)
self.assertEquals(problem.find_answer_text('1_2_1', 'hide'), 'hide')

View File

@@ -303,6 +303,104 @@ class CapaDescriptor(CapaFields, RawDescriptor):
)
return lcp.get_max_score()
def generate_report_data(self, user_state_iterator, limit_responses=None):
"""
Return a list of student responses to this block in a readable way.
Arguments:
user_state_iterator: iterator over UserStateClient objects.
E.g. the result of user_state_client.iter_all_for_block(block_key)
limit_responses (int|None): maximum number of responses to include.
Set to None (default) to include all.
Returns:
each call returns a tuple like:
("username", {
"Question": "2 + 2 equals how many?",
"Answer": "Four",
"Answer ID": "98e6a8e915904d5389821a94e48babcf_10_1"
})
"""
from capa.capa_problem import LoncapaProblem, LoncapaSystem
if self.category != 'problem':
raise NotImplementedError()
if limit_responses == 0:
# Don't even start collecting answers
return
capa_system = LoncapaSystem(
ajax_url=None,
# TODO set anonymous_student_id to the anonymous ID of the user which answered each problem
# Anonymous ID is required for Matlab, CodeResponse, and some custom problems that include
# '$anonymous_student_id' in their XML.
# For the purposes of this report, we don't need to support those use cases.
anonymous_student_id=None,
cache=None,
can_execute_unsafe_code=lambda: None,
get_python_lib_zip=lambda: None,
DEBUG=None,
filestore=self.runtime.resources_fs,
i18n=self.runtime.service(self, "i18n"),
node_path=None,
render_template=None,
seed=1,
STATIC_URL=None,
xqueue=None,
matlab_api_key=None,
)
_ = capa_system.i18n.ugettext
count = 0
for user_state in user_state_iterator:
if 'student_answers' not in user_state.state:
continue
lcp = LoncapaProblem(
problem_text=self.data,
id=self.location.html_id(),
capa_system=capa_system,
# We choose to run without a fully initialized CapaModule
capa_module=None,
state={
'done': user_state.state.get('done'),
'correct_map': user_state.state.get('correct_map'),
'student_answers': user_state.state.get('student_answers'),
'has_saved_answers': user_state.state.get('has_saved_answers'),
'input_state': user_state.state.get('input_state'),
'seed': user_state.state.get('seed'),
},
seed=user_state.state.get('seed'),
# extract_tree=False allows us to work without a fully initialized CapaModule
# We'll still be able to find particular data in the XML when we need it
extract_tree=False,
)
for answer_id, orig_answers in lcp.student_answers.items():
# Some types of problems have data in lcp.student_answers that isn't in lcp.problem_data.
# E.g. formulae do this to store the MathML version of the answer.
# We exclude these rows from the report because we only need the text-only answer.
if answer_id.endswith('_dynamath'):
continue
if limit_responses and count >= limit_responses:
# End the iterator here
return
question_text = lcp.find_question_label(answer_id)
answer_text = lcp.find_answer_text(answer_id, current_answer=orig_answers)
count += 1
yield (user_state.username, {
_("Answer ID"): answer_id,
_("Question"): question_text,
_("Answer"): answer_text,
})
# Proxy to CapaModule for access to any of its attributes
answer_available = module_attr('answer_available')
submit_button_name = module_attr('submit_button_name')

View File

@@ -15,6 +15,7 @@ import unittest
import ddt
from django.utils.encoding import smart_text
from edx_user_state_client.interface import XBlockUserState
from lxml import etree
from mock import Mock, patch, DEFAULT
import six
@@ -3133,3 +3134,79 @@ class TestProblemCheckTracking(unittest.TestCase):
problem.runtime.replace_jump_to_id_urls = Mock()
problem.get_answer(data)
self.assertTrue(problem.runtime.replace_jump_to_id_urls.called)
class TestCapaDescriptorReportGeneration(unittest.TestCase):
"""
Ensure that Capa report generation works correctly
"""
def setUp(self):
self.find_question_label_patcher = patch(
'capa.capa_problem.LoncapaProblem.find_question_label',
lambda self, answer_id: answer_id
)
self.find_answer_text_patcher = patch(
'capa.capa_problem.LoncapaProblem.find_answer_text',
lambda self, answer_id, current_answer: current_answer
)
self.find_question_label_patcher.start()
self.find_answer_text_patcher.start()
self.addCleanup(self.find_question_label_patcher.stop)
self.addCleanup(self.find_answer_text_patcher.stop)
def _mock_user_state_generator(self, user_count=1, response_count=10):
for uid in range(user_count):
yield self._user_state(username='user{}'.format(uid), response_count=response_count)
def _user_state(self, username='testuser', response_count=10, suffix=''):
return XBlockUserState(
username=username,
state={
'student_answers': {
'{}_answerid_{}{}'.format(username, aid, suffix): '{}_answer_{}'.format(username, aid)
for aid in range(response_count)
},
'seed': 1,
'correct_map': {},
},
block_key=None,
updated=None,
scope=None,
)
def _get_descriptor(self):
scope_ids = Mock(block_type='problem')
descriptor = CapaDescriptor(get_test_system(), scope_ids=scope_ids)
descriptor.runtime = Mock()
descriptor.data = '<problem/>'
return descriptor
def test_generate_report_data_not_implemented(self):
scope_ids = Mock(block_type='noproblem')
descriptor = CapaDescriptor(get_test_system(), scope_ids=scope_ids)
with self.assertRaises(NotImplementedError):
next(descriptor.generate_report_data(iter([])))
def test_generate_report_data_limit_responses(self):
descriptor = self._get_descriptor()
report_data = list(descriptor.generate_report_data(self._mock_user_state_generator(), 2))
self.assertEquals(2, len(report_data))
def test_generate_report_data_dont_limit_responses(self):
descriptor = self._get_descriptor()
user_count = 5
response_count = 10
report_data = list(descriptor.generate_report_data(
self._mock_user_state_generator(
user_count=user_count,
response_count=response_count,
)
))
self.assertEquals(user_count * response_count, len(report_data))
def test_generate_report_data_skip_dynamath(self):
descriptor = self._get_descriptor()
iterator = iter([self._user_state(suffix='_dynamath')])
report_data = list(descriptor.generate_report_data(iterator))
self.assertEquals(0, len(report_data))

View File

@@ -459,7 +459,7 @@ class DjangoXBlockUserStateClient(XBlockUserStateClient):
if scope != Scope.user_state:
raise ValueError("Only Scope.user_state is supported")
results = StudentModule.objects.filter(module_state_key=block_key)
results = StudentModule.objects.order_by('id').filter(module_state_key=block_key)
p = Paginator(results, settings.USER_STATE_BATCH_SIZE)
for page_number in p.page_range:
@@ -491,7 +491,7 @@ class DjangoXBlockUserStateClient(XBlockUserStateClient):
if scope != Scope.user_state:
raise ValueError("Only Scope.user_state is supported")
results = StudentModule.objects.filter(course_id=course_key)
results = StudentModule.objects.order_by('id').filter(course_id=course_key)
if block_type:
results = results.filter(module_type=block_type)