diff --git a/common/lib/capa/capa/responsetypes.py b/common/lib/capa/capa/responsetypes.py index fda70c6a55..3b17daa830 100644 --- a/common/lib/capa/capa/responsetypes.py +++ b/common/lib/capa/capa/responsetypes.py @@ -2,7 +2,8 @@ # File: courseware/capa/responsetypes.py # ''' -Problem response evaluation. Handles checking of student responses, of a variety of types. +Problem response evaluation. Handles checking of student responses, +of a variety of types. Used by capa_problem.py ''' @@ -35,7 +36,7 @@ from datetime import datetime from .util import * from lxml import etree from lxml.html.soupparser import fromstring as fromstring_bs # uses Beautiful Soup!!! FIXME? -import xqueue_interface +import capa.xqueue_interface log = logging.getLogger(__name__) @@ -300,7 +301,7 @@ class LoncapaResponse(object): # response aid = self.answer_ids[-1] new_cmap.set_hint_and_mode(aid, hint_text, hintmode) - log.debug('after hint: new_cmap = %s' % new_cmap) + log.debug('after hint: new_cmap = %s', new_cmap) @abc.abstractmethod def get_score(self, student_answers): @@ -790,6 +791,10 @@ class OptionResponse(LoncapaResponse): class NumericalResponse(LoncapaResponse): + ''' + This response type expects a number or formulaic expression that evaluates + to a number (e.g. `4+5/2^2`), and accepts with a tolerance. + ''' response_tag = 'numericalresponse' hint_tag = 'numericalhint' @@ -806,12 +811,12 @@ class NumericalResponse(LoncapaResponse): '//*[@id=$id]//responseparam[@type="tolerance"]/@default', id=xml.get('id'))[0] self.tolerance = contextualize_text(self.tolerance_xml, context) - except Exception: + except IndexError: # xpath found an empty list, so (...)[0] is the error self.tolerance = '0' try: self.answer_id = xml.xpath('//*[@id=$id]//textline/@id', id=xml.get('id'))[0] - except Exception: + except IndexError: # Same as above self.answer_id = None def get_score(self, student_answers): @@ -836,7 +841,6 @@ class NumericalResponse(LoncapaResponse): except: # Use the traceback-preserving version of re-raising with a # different type - import sys type, value, traceback = sys.exc_info() raise StudentInputError, ("Could not interpret '%s' as a number" % @@ -1869,8 +1873,6 @@ class FormulaResponse(LoncapaResponse): log.debug('formularesponse: error %s in formula' % err) raise StudentInputError("Invalid input: Could not parse '%s' as a formula" % cgi.escape(given)) - if numpy.isnan(student_result) or numpy.isinf(student_result): - return "incorrect" if not compare_with_tolerance(student_result, instructor_result, self.tolerance): return "incorrect" return "correct" diff --git a/common/lib/capa/capa/tests/test_responsetypes.py b/common/lib/capa/capa/tests/test_responsetypes.py index 7a43fff4c9..da3d45ad74 100644 --- a/common/lib/capa/capa/tests/test_responsetypes.py +++ b/common/lib/capa/capa/tests/test_responsetypes.py @@ -438,6 +438,43 @@ class FormulaResponseTest(ResponseTest): self.assert_grade(problem, incorrect, 'incorrect', msg="Failed on function {0}; the given, incorrect answer was {1} but graded 'correct'".format(func, incorrect)) + def test_grade_infinity(self): + # This resolves a bug where a problem with relative tolerance would + # pass with any arbitrarily large student answer. + + sample_dict = {'x': (1, 2)} + + # Test problem + problem = self.build_problem(sample_dict=sample_dict, + num_samples=10, + tolerance="1%", + answer="x") + # Expect such a large answer to be marked incorrect + input_formula = "x*1e999" + self.assert_grade(problem, input_formula, "incorrect") + # Expect such a large negative answer to be marked incorrect + input_formula = "-x*1e999" + self.assert_grade(problem, input_formula, "incorrect") + + def test_grade_nan(self): + # Attempt to produce a value which causes the student's answer to be + # evaluated to nan. See if this is resolved correctly. + + sample_dict = {'x': (1, 2)} + + # Test problem + problem = self.build_problem(sample_dict=sample_dict, + num_samples=10, + tolerance="1%", + answer="x") + # Expect an incorrect answer (+ nan) to be marked incorrect + # Right now this evaluates to 'nan' for a given x (Python implementation-dependent) + input_formula = "10*x + 0*1e999" + self.assert_grade(problem, input_formula, "incorrect") + # Expect an correct answer (+ nan) to be marked incorrect + input_formula = "x + 0*1e999" + self.assert_grade(problem, input_formula, "incorrect") + class StringResponseTest(ResponseTest): from response_xml_factory import StringResponseXMLFactory @@ -714,6 +751,30 @@ class NumericalResponseTest(ResponseTest): incorrect_responses = ["", "4.5", "3.5", "0"] self.assert_multiple_grade(problem, correct_responses, incorrect_responses) + def test_grade_infinity(self): + # This resolves a bug where a problem with relative tolerance would + # pass with any arbitrarily large student answer. + problem = self.build_problem(question_text="What is 2 + 2 approximately?", + explanation="The answer is 4", + answer=4, + tolerance="10%") + correct_responses = [] + incorrect_responses = ["1e999", "-1e999"] + self.assert_multiple_grade(problem, correct_responses, incorrect_responses) + + def test_grade_nan(self): + # Attempt to produce a value which causes the student's answer to be + # evaluated to nan. See if this is resolved correctly. + problem = self.build_problem(question_text="What is 2 + 2 approximately?", + explanation="The answer is 4", + answer=4, + tolerance="10%") + correct_responses = [] + # Right now these evaluate to `nan` + # `4 + nan` should be incorrect + incorrect_responses = ["0*1e999", "4 + 0*1e999"] + self.assert_multiple_grade(problem, correct_responses, incorrect_responses) + def test_grade_with_script(self): script_text = "computed_response = math.sqrt(4)" problem = self.build_problem(question_text="What is sqrt(4)?", diff --git a/common/lib/capa/capa/util.py b/common/lib/capa/capa/util.py index 9f3e8bd3a0..8b05ea717e 100644 --- a/common/lib/capa/capa/util.py +++ b/common/lib/capa/capa/util.py @@ -1,4 +1,5 @@ from .calc import evaluator, UndefinedVariable +from cmath import isinf #----------------------------------------------------------------------------- # @@ -20,7 +21,14 @@ def compare_with_tolerance(v1, v2, tol): tolerance = tolerance_rel * max(abs(v1), abs(v2)) else: tolerance = evaluator(dict(), dict(), tol) - return abs(v1 - v2) <= tolerance + + if isinf(v1) or isinf(v2): + # If an input is infinite, we can end up with `abs(v1-v2)` and + # `tolerance` both equal to infinity. Then, below we would have + # `inf <= inf` which is a fail. Instead, compare directly. + return v1 == v2 + else: + return abs(v1 - v2) <= tolerance def contextualize_text(text, context): # private @@ -51,7 +59,8 @@ def convert_files_to_filenames(answers): new_answers = dict() for answer_id in answers.keys(): answer = answers[answer_id] - if is_list_of_files(answer): # Files are stored as a list, even if one file + # Files are stored as a list, even if one file + if is_list_of_files(answer): new_answers[answer_id] = [f.name for f in answer] else: new_answers[answer_id] = answers[answer_id]