Merge pull request #17948 from open-craft/clemente/generate-report-data--for-capa-problems

Add generate_report_data to return a readable report of student answers to capa problems
2018-05-24 15:52:17 -04:00
parent 5e77dfd5b7 6d3f8150db
commit e32c7473ae
6 changed files with 388 additions and 6 deletions
--- a/common/lib/capa/capa/capa_problem.py
+++ b/common/lib/capa/capa/capa_problem.py
@@ -128,7 +128,7 @@ class LoncapaProblem(object):
    Main class for capa Problems.
    """
    def __init__(self, problem_text, id, capa_system, capa_module,  # pylint: disable=redefined-builtin
-                 state=None, seed=None, minimal_init=False):
+                 state=None, seed=None, minimal_init=False, extract_tree=True):
        """
        Initializes capa Problem.

@@ -147,6 +147,8 @@ class LoncapaProblem(object):
                - `done` (bool) indicates whether or not this problem is considered done
                - `input_state` (dict) maps input_id to a dictionary that holds the state for that input
            seed (int): random number generator seed.
+            minimal_init (bool): whether to skip pre-processing student answers
+            extract_tree (bool): whether to parse the problem XML and store the HTML

        """

@@ -212,7 +214,8 @@ class LoncapaProblem(object):
                if hasattr(response, 'late_transforms'):
                    response.late_transforms(self)

-            self.extracted_tree = self._extract_html(self.tree)
+            if extract_tree:
+                self.extracted_tree = self._extract_html(self.tree)

    def make_xml_compatible(self, tree):
        """
@@ -492,6 +495,124 @@ class LoncapaProblem(object):
            answer_ids.append(results.keys())
        return answer_ids

+    def find_question_label(self, answer_id):
+        """
+        Obtain the most relevant question text for a particular answer.
+
+        E.g. in a problem like "How much is 2+2?" "Two"/"Three"/"More than three",
+        this function returns the "How much is 2+2?" text.
+
+        It uses, in order:
+        - the question prompt, if the question has one
+        - the <p> or <label> element which precedes the choices (skipping descriptive elements)
+        - a text like "Question 5" if no other name could be found
+
+        Arguments::
+            answer_id: a string like "98e6a8e915904d5389821a94e48babcf_13_1"
+
+        Returns:
+            a string with the question text
+        """
+        _ = self.capa_system.i18n.ugettext
+        # Some questions define a prompt with this format:   >>This is a prompt<<
+        prompt = self.problem_data[answer_id].get('label')
+
+        if prompt:
+            question_text = prompt.striptags()
+        else:
+            # If no prompt, then we must look for something resembling a question ourselves
+            #
+            # We have a structure like:
+            #
+            # <p />
+            # <optionresponse id="a0effb954cca4759994f1ac9e9434bf4_2">
+            #   <optioninput id="a0effb954cca4759994f1ac9e9434bf4_3_1" />
+            # <optionresponse>
+            #
+            # Starting from  answer (the optioninput in this example) we go up and backwards
+            xml_elems = self.tree.xpath('//*[@id="' + answer_id + '"]')
+            assert len(xml_elems) == 1
+            xml_elem = xml_elems[0].getparent()
+
+            # Get the element that probably contains the question text
+            questiontext_elem = xml_elem.getprevious()
+
+            # Go backwards looking for a <p> or <label>, but skip <description> because it doesn't
+            # contain the question text.
+            #
+            # E.g if we have this:
+            #   <p /> <description /> <optionresponse /> <optionresponse />
+            #
+            # then from the first optionresponse we'll end with the <p>.
+            # If we start in the second optionresponse, we'll find another response in the way,
+            # stop early, and instead of a question we'll report "Question 2".
+            SKIP_ELEMS = ['description']
+            LABEL_ELEMS = ['p', 'label']
+            while questiontext_elem is not None and questiontext_elem.tag in SKIP_ELEMS:
+                questiontext_elem = questiontext_elem.getprevious()
+
+            if questiontext_elem is not None and questiontext_elem.tag in LABEL_ELEMS:
+                question_text = questiontext_elem.text
+            else:
+                # For instance 'd2e35c1d294b4ba0b3b1048615605d2a_2_1' contains 2,
+                # which is used in question number 1 (see example XML in comment above)
+                # There's no question 0 (question IDs start at 1, answer IDs at 2)
+                question_nr = int(answer_id.split('_')[-2]) - 1
+                question_text = _("Question {0}").format(question_nr)
+
+        return question_text
+
+    def find_answer_text(self, answer_id, current_answer):
+        """
+        Process a raw answer text to make it more meaningful.
+
+        E.g. in a choice problem like "How much is 2+2?" "Two"/"Three"/"More than three",
+        this function will transform "choice_1" (which is the internal response given by
+        many capa methods) to the human version, e.g. "More than three".
+
+        If the answers are multiple (e.g. because they're from a multiple choice problem),
+        this will join them with a comma.
+
+        If passed a normal string which is already the answer, it doesn't change it.
+
+        TODO merge with response_a11y_data?
+
+        Arguments:
+            answer_id: a string like "98e6a8e915904d5389821a94e48babcf_13_1"
+            current_answer: a data structure as found in `LoncapaProblem.student_answers`
+                which represents the best response we have until now
+
+        Returns:
+            a string with the human version of the response
+        """
+        if isinstance(current_answer, list):
+            # Multiple answers. This case happens e.g. in multiple choice problems
+            answer_text = ", ".join(
+                self.find_answer_text(answer_id, answer) for answer in current_answer
+            )
+
+        elif isinstance(current_answer, basestring) and current_answer.startswith('choice_'):
+            # Many problem (e.g. checkbox) report "choice_0" "choice_1" etc.
+            # Here we transform it
+            elems = self.tree.xpath('//*[@id="{answer_id}"]//*[@name="{choice_number}"]'.format(
+                answer_id=answer_id,
+                choice_number=current_answer
+            ))
+            assert len(elems) == 1
+            choicegroup = elems[0].getparent()
+            input_cls = inputtypes.registry.get_class_for_tag(choicegroup.tag)
+            choices_map = dict(input_cls.extract_choices(choicegroup, self.capa_system.i18n, text_only=True))
+            answer_text = choices_map[current_answer]
+
+        elif isinstance(current_answer, basestring):
+            # Already a string with the answer
+            answer_text = current_answer
+
+        else:
+            raise NotImplementedError()
+
+        return answer_text
+
    def do_targeted_feedback(self, tree):
        """
        Implements targeted-feedback in-place on  <multiplechoiceresponse> --
--- a/common/lib/capa/capa/inputtypes.py
+++ b/common/lib/capa/capa/inputtypes.py
@@ -518,13 +518,15 @@ class ChoiceGroup(InputTypeBase):
                'name_array_suffix': self.suffix}

    @staticmethod
-    def extract_choices(element, i18n):
+    def extract_choices(element, i18n, text_only=False):
        """
        Extracts choices for a few input types, such as ChoiceGroup, RadioGroup and
        CheckboxGroup.

        returns list of (choice_name, choice_text) tuples

+        By default it will return any XML tag in the choice (e.g. <choicehint>) unless text_only=True is passed.
+
        TODO: allow order of choices to be randomized, following lon-capa spec.  Use
        "location" attribute, ie random, top, bottom.
        """
@@ -534,7 +536,11 @@ class ChoiceGroup(InputTypeBase):

        for choice in element:
            if choice.tag == 'choice':
-                choices.append((choice.get("name"), stringify_children(choice)))
+                if not text_only:
+                    text = stringify_children(choice)
+                else:
+                    text = choice.text
+                choices.append((choice.get("name"), text))
            else:
                if choice.tag != 'compoundhint':
                    msg = Text('[capa.inputtypes.extract_choices] {error_message}').format(
--- a/common/lib/capa/capa/tests/test_capa_problem.py
+++ b/common/lib/capa/capa/tests/test_capa_problem.py
@@ -4,9 +4,11 @@ Test capa problem.
 import ddt
 import textwrap
 from lxml import etree
+from mock import patch
 import unittest

 from capa.tests.helpers import new_loncapa_problem
+from openedx.core.djangolib.markup import HTML


@ddt.ddt
@@ -590,3 +592,81 @@ class CAPAMultiInputProblemTest(unittest.TestCase):
            description_element = multi_inputs_group.xpath('//p[@id="{}"]'.format(description_id))
            self.assertEqual(len(description_element), 1)
            self.assertEqual(description_element[0].text, descriptions[index])
+
+
+@ddt.ddt
+class CAPAProblemReportHelpersTest(unittest.TestCase):
+    """ TestCase for CAPA methods for finding question labels and answer text """
+
+    @ddt.data(
+        ('answerid_2_1', 'label', 'label'),
+        ('answerid_2_2', 'label <some>html</some>', 'label html'),
+        ('answerid_2_2', '<more html="yes"/>label <some>html</some>', 'label html'),
+        ('answerid_2_3', None, 'Question 1'),
+        ('answerid_2_3', '', 'Question 1'),
+        ('answerid_3_3', '', 'Question 2'),
+    )
+    @ddt.unpack
+    def test_find_question_label(self, answer_id, label, stripped_label):
+        problem = new_loncapa_problem(
+            '<problem><some-problem id="{}"/></problem>'.format(answer_id)
+        )
+        mock_problem_data = {
+            answer_id: {
+                'label': HTML(label) if label else ''
+            }
+        }
+        with patch.object(problem, 'problem_data', mock_problem_data):
+            self.assertEqual(problem.find_question_label(answer_id), stripped_label)
+
+    @ddt.data(None, dict(), [None])
+    def test_find_answer_test_not_implemented(self, current_answer):
+        problem = new_loncapa_problem('<problem/>')
+        self.assertRaises(NotImplementedError, problem.find_answer_text, '', current_answer)
+
+    @ddt.data(
+        ('1_2_1', 'choice_0', 'over-suspicious'),
+        ('1_2_1', 'choice_1', 'funny'),
+        ('1_3_1', 'choice_0', 'The iPad'),
+        ('1_3_1', 'choice_2', 'The iPod'),
+        ('1_3_1', ['choice_0', 'choice_1'], 'The iPad, Napster'),
+        ('1_4_1', 'yellow', 'yellow'),
+        ('1_4_1', 'blue', 'blue'),
+    )
+    @ddt.unpack
+    def test_find_answer_text_choices(self, answer_id, choice_id, answer_text):
+        problem = new_loncapa_problem(
+            """
+            <problem>
+                <choiceresponse>
+                    <checkboxgroup label="Select the correct synonym of paranoid?">
+                        <choice correct="true">over-suspicious</choice>
+                        <choice correct="false">funny</choice>
+                    </checkboxgroup>
+                </choiceresponse>
+                <multiplechoiceresponse>
+                    <choicegroup type="MultipleChoice">
+                        <choice correct="false">The iPad</choice>
+                        <choice correct="false">Napster</choice>
+                        <choice correct="true">The iPod</choice>
+                    </choicegroup>
+                </multiplechoiceresponse>
+                <optionresponse>
+                    <optioninput options="('yellow','blue','green')" correct="blue" label="Color_1"/>
+                </optionresponse>
+            </problem>
+            """
+        )
+        self.assertEquals(problem.find_answer_text(answer_id, choice_id), answer_text)
+
+    def test_find_answer_text_textinput(self):
+        problem = new_loncapa_problem(
+            """
+            <problem>
+                <stringresponse answer="hide" type="ci">
+                    <textline size="40"/>
+                </stringresponse>
+            </problem>
+            """
+        )
+        self.assertEquals(problem.find_answer_text('1_2_1', 'hide'), 'hide')
--- a/common/lib/xmodule/xmodule/capa_module.py
+++ b/common/lib/xmodule/xmodule/capa_module.py
@@ -303,6 +303,104 @@ class CapaDescriptor(CapaFields, RawDescriptor):
        )
        return lcp.get_max_score()

+    def generate_report_data(self, user_state_iterator, limit_responses=None):
+        """
+        Return a list of student responses to this block in a readable way.
+
+        Arguments:
+            user_state_iterator: iterator over UserStateClient objects.
+                E.g. the result of user_state_client.iter_all_for_block(block_key)
+
+            limit_responses (int|None): maximum number of responses to include.
+                Set to None (default) to include all.
+
+        Returns:
+            each call returns a tuple like:
+            ("username", {
+                           "Question": "2 + 2 equals how many?",
+                           "Answer": "Four",
+                           "Answer ID": "98e6a8e915904d5389821a94e48babcf_10_1"
+            })
+        """
+
+        from capa.capa_problem import LoncapaProblem, LoncapaSystem
+
+        if self.category != 'problem':
+            raise NotImplementedError()
+
+        if limit_responses == 0:
+            # Don't even start collecting answers
+            return
+
+        capa_system = LoncapaSystem(
+            ajax_url=None,
+            # TODO set anonymous_student_id to the anonymous ID of the user which answered each problem
+            # Anonymous ID is required for Matlab, CodeResponse, and some custom problems that include
+            # '$anonymous_student_id' in their XML.
+            # For the purposes of this report, we don't need to support those use cases.
+            anonymous_student_id=None,
+            cache=None,
+            can_execute_unsafe_code=lambda: None,
+            get_python_lib_zip=lambda: None,
+            DEBUG=None,
+            filestore=self.runtime.resources_fs,
+            i18n=self.runtime.service(self, "i18n"),
+            node_path=None,
+            render_template=None,
+            seed=1,
+            STATIC_URL=None,
+            xqueue=None,
+            matlab_api_key=None,
+        )
+        _ = capa_system.i18n.ugettext
+
+        count = 0
+        for user_state in user_state_iterator:
+
+            if 'student_answers' not in user_state.state:
+                continue
+
+            lcp = LoncapaProblem(
+                problem_text=self.data,
+                id=self.location.html_id(),
+                capa_system=capa_system,
+                # We choose to run without a fully initialized CapaModule
+                capa_module=None,
+                state={
+                    'done': user_state.state.get('done'),
+                    'correct_map': user_state.state.get('correct_map'),
+                    'student_answers': user_state.state.get('student_answers'),
+                    'has_saved_answers': user_state.state.get('has_saved_answers'),
+                    'input_state': user_state.state.get('input_state'),
+                    'seed': user_state.state.get('seed'),
+                },
+                seed=user_state.state.get('seed'),
+                # extract_tree=False allows us to work without a fully initialized CapaModule
+                # We'll still be able to find particular data in the XML when we need it
+                extract_tree=False,
+            )
+
+            for answer_id, orig_answers in lcp.student_answers.items():
+                # Some types of problems have data in lcp.student_answers that isn't in lcp.problem_data.
+                # E.g. formulae do this to store the MathML version of the answer.
+                # We exclude these rows from the report because we only need the text-only answer.
+                if answer_id.endswith('_dynamath'):
+                    continue
+
+                if limit_responses and count >= limit_responses:
+                    # End the iterator here
+                    return
+
+                question_text = lcp.find_question_label(answer_id)
+                answer_text = lcp.find_answer_text(answer_id, current_answer=orig_answers)
+
+                count += 1
+                yield (user_state.username, {
+                    _("Answer ID"): answer_id,
+                    _("Question"): question_text,
+                    _("Answer"): answer_text,
+                })
+
    # Proxy to CapaModule for access to any of its attributes
    answer_available = module_attr('answer_available')
    submit_button_name = module_attr('submit_button_name')
--- a/common/lib/xmodule/xmodule/tests/test_capa_module.py
+++ b/common/lib/xmodule/xmodule/tests/test_capa_module.py
@@ -15,6 +15,7 @@ import unittest

 import ddt
 from django.utils.encoding import smart_text
+from edx_user_state_client.interface import XBlockUserState
 from lxml import etree
 from mock import Mock, patch, DEFAULT
 import six
@@ -3133,3 +3134,79 @@ class TestProblemCheckTracking(unittest.TestCase):
        problem.runtime.replace_jump_to_id_urls = Mock()
        problem.get_answer(data)
        self.assertTrue(problem.runtime.replace_jump_to_id_urls.called)
+
+
+class TestCapaDescriptorReportGeneration(unittest.TestCase):
+    """
+    Ensure that Capa report generation works correctly
+    """
+
+    def setUp(self):
+        self.find_question_label_patcher = patch(
+            'capa.capa_problem.LoncapaProblem.find_question_label',
+            lambda self, answer_id: answer_id
+        )
+        self.find_answer_text_patcher = patch(
+            'capa.capa_problem.LoncapaProblem.find_answer_text',
+            lambda self, answer_id, current_answer: current_answer
+        )
+        self.find_question_label_patcher.start()
+        self.find_answer_text_patcher.start()
+        self.addCleanup(self.find_question_label_patcher.stop)
+        self.addCleanup(self.find_answer_text_patcher.stop)
+
+    def _mock_user_state_generator(self, user_count=1, response_count=10):
+        for uid in range(user_count):
+            yield self._user_state(username='user{}'.format(uid), response_count=response_count)
+
+    def _user_state(self, username='testuser', response_count=10, suffix=''):
+        return XBlockUserState(
+            username=username,
+            state={
+                'student_answers': {
+                    '{}_answerid_{}{}'.format(username, aid, suffix): '{}_answer_{}'.format(username, aid)
+                    for aid in range(response_count)
+                },
+                'seed': 1,
+                'correct_map': {},
+            },
+            block_key=None,
+            updated=None,
+            scope=None,
+        )
+
+    def _get_descriptor(self):
+        scope_ids = Mock(block_type='problem')
+        descriptor = CapaDescriptor(get_test_system(), scope_ids=scope_ids)
+        descriptor.runtime = Mock()
+        descriptor.data = '<problem/>'
+        return descriptor
+
+    def test_generate_report_data_not_implemented(self):
+        scope_ids = Mock(block_type='noproblem')
+        descriptor = CapaDescriptor(get_test_system(), scope_ids=scope_ids)
+        with self.assertRaises(NotImplementedError):
+            next(descriptor.generate_report_data(iter([])))
+
+    def test_generate_report_data_limit_responses(self):
+        descriptor = self._get_descriptor()
+        report_data = list(descriptor.generate_report_data(self._mock_user_state_generator(), 2))
+        self.assertEquals(2, len(report_data))
+
+    def test_generate_report_data_dont_limit_responses(self):
+        descriptor = self._get_descriptor()
+        user_count = 5
+        response_count = 10
+        report_data = list(descriptor.generate_report_data(
+            self._mock_user_state_generator(
+                user_count=user_count,
+                response_count=response_count,
+            )
+        ))
+        self.assertEquals(user_count * response_count, len(report_data))
+
+    def test_generate_report_data_skip_dynamath(self):
+        descriptor = self._get_descriptor()
+        iterator = iter([self._user_state(suffix='_dynamath')])
+        report_data = list(descriptor.generate_report_data(iterator))
+        self.assertEquals(0, len(report_data))
--- a/lms/djangoapps/courseware/user_state_client.py
+++ b/lms/djangoapps/courseware/user_state_client.py
@@ -459,7 +459,7 @@ class DjangoXBlockUserStateClient(XBlockUserStateClient):
        if scope != Scope.user_state:
            raise ValueError("Only Scope.user_state is supported")

-        results = StudentModule.objects.filter(module_state_key=block_key)
+        results = StudentModule.objects.order_by('id').filter(module_state_key=block_key)
        p = Paginator(results, settings.USER_STATE_BATCH_SIZE)

        for page_number in p.page_range:
@@ -491,7 +491,7 @@ class DjangoXBlockUserStateClient(XBlockUserStateClient):
        if scope != Scope.user_state:
            raise ValueError("Only Scope.user_state is supported")

-        results = StudentModule.objects.filter(course_id=course_key)
+        results = StudentModule.objects.order_by('id').filter(course_id=course_key)
        if block_type:
            results = results.filter(module_type=block_type)