From d5a273ce2f97aa4f6149861860c7edc5e6003cbe Mon Sep 17 00:00:00 2001 From: Tim McCormack Date: Thu, 8 May 2025 13:43:53 -0400 Subject: [PATCH] feat!: Expand codejail darklaunch normalizers; append by default (#36682) For darklaunch comparisons where the two sides have different Python versions, we'll want a more comprehensive list of normalizers. - Expand the default list to include patterns discovered during a Python 3.8 vs. 3.12 comparison. - Append the setting value by default, rather than replacing (but still allow replacing). - Use default normalizers if custom ones can't be loaded. - Add log message when loading normalizers fails. - Validate the replacement pattern, not just the search pattern. --- xmodule/capa/safe_exec/safe_exec.py | 105 ++++++++++++++---- .../capa/safe_exec/tests/test_safe_exec.py | 61 ++++++++-- 2 files changed, 131 insertions(+), 35 deletions(-) diff --git a/xmodule/capa/safe_exec/safe_exec.py b/xmodule/capa/safe_exec/safe_exec.py index 2fd6be94b8..cd7b553579 100644 --- a/xmodule/capa/safe_exec/safe_exec.py +++ b/xmodule/capa/safe_exec/safe_exec.py @@ -291,6 +291,24 @@ def safe_exec( raise exception +def _compile_normalizers(normalizer_setting): + """ + Compile emsg normalizer search/replace pairs into regex. + + Raises exception on bad settings. + """ + compiled = [] + for pair in normalizer_setting: + search = re.compile(assert_type(pair['search'], str)) + replace = assert_type(pair['replace'], str) + + # Test the replacement string (might contain errors) + re.sub(search, replace, "example") + + compiled.append({'search': search, 'replace': replace}) + return compiled + + @lru_cache(maxsize=1) def emsg_normalizers(): """ @@ -299,38 +317,77 @@ def emsg_normalizers(): The output is like the setting value, except the 'search' patterns have been compiled. """ - default = [ + default_setting = [ { # Character range should be at least as broad as what Python's `tempfile` uses. 'search': r'/tmp/codejail-[0-9a-zA-Z_]+', 'replace': r'/tmp/codejail-', }, - ] - try: - # .. setting_name: CODEJAIL_DARKLAUNCH_EMSG_NORMALIZERS - # .. setting_default: (see description) - # .. setting_description: A list of patterns to search and replace in codejail error - # messages during comparison in codejail-service darklaunch. Each entry is a dict - # of 'search' (a regular expression string) and 'replace' (the replacement string). - # The default value suppresses differences matching '/tmp/codejail-[0-9a-zA-Z]+', - # the directory structure codejail uses for its random-named sandboxes. Deployers - # may also need to add a search/replace pair for the location of the sandbox - # virtualenv, or any other paths that show up in stack traces. - # .. setting_warning: Note that `replace' is a pattern, allowing for - # backreferences. Any backslashes in the replacement pattern that are not - # intended as backreferences should be escaped as `\\`. - setting = getattr(settings, 'CODEJAIL_DARKLAUNCH_EMSG_NORMALIZERS', default) - compiled = [] - for pair in setting: - compiled.append({ - 'search': re.compile(assert_type(pair['search'], str)), - 'replace': assert_type(pair['replace'], str), - }) - return compiled + # These are useful for eliding differences in environments due to Python version: + + { + # Python 3.8 doesn't include the dir here, but Python 3.12 + # does. Normalize to the 3.8 version. + 'search': r'File "/tmp/codejail-/jailed_code"', + 'replace': r'File "jailed_code"' + }, + { + # Python version shows up in stack traces in the virtualenv paths + 'search': r'python3\.[0-9]+', + 'replace': r'python3.XX' + }, + { + # Line numbers in stack traces differ between Python versions + 'search': r', line [0-9]+, in ', + 'replace': r', line XXX, in ' + }, + { + # Some time after 3.8, Python started adding '^^^' indicators to stack traces + 'search': r'\\n\s*\^+\s*\\n', + 'replace': r'\\n' + }, + { + # Python3.8 had these stack trace elements but 3.12 does not + 'search': r'\\n File "[^"]+", line [0-9]+, in \\n', + 'replace': r'\\n' + }, + ] + default_normalizers = _compile_normalizers(default_setting) + + # .. setting_name: CODEJAIL_DARKLAUNCH_EMSG_NORMALIZERS + # .. setting_default: [] + # .. setting_description: A list of patterns to search and replace in codejail error + # messages during comparison in codejail-service darklaunch. Each entry is a dict + # of 'search' (a regular expression string) and 'replace' (the replacement string). + # Deployers may also need to add a search/replace pair for the location of the sandbox + # virtualenv, or any other paths that show up in stack traces. + # .. setting_warning: Note that `replace' is a pattern, allowing for + # backreferences. Any backslashes in the replacement pattern that are not + # intended as backreferences should be escaped as `\\`. + # The default list suppresses differences due to the randomly-named sandboxes + # or to differences due to Python version. See setting + # ``CODEJAIL_DARKLAUNCH_EMSG_NORMALIZERS_COMBINE`` for information on how + # this setting interacts with the defaults. + custom_setting = getattr(settings, 'CODEJAIL_DARKLAUNCH_EMSG_NORMALIZERS', []) + try: + custom_normalizers = _compile_normalizers(custom_setting) except BaseException as e: + log.error("Could not load custom codejail darklaunch emsg normalizers") record_exception() - return [] + return default_normalizers + + # .. setting_name: CODEJAIL_DARKLAUNCH_EMSG_NORMALIZERS_COMBINE + # .. setting_default: 'append' + # .. setting_description: How to combine ``CODEJAIL_DARKLAUNCH_EMSG_NORMALIZERS`` + # with the defaults. If the value is 'replace', the defaults will be replaced + # with the specified patterns. If the value is 'append' (the default), the + # specified replacements will be run after the defaults. + combine = getattr(settings, 'CODEJAIL_DARKLAUNCH_EMSG_NORMALIZERS_COMBINE', 'append') + if combine == 'replace': + return custom_normalizers + else: # 'append', or unknown + return default_normalizers + custom_normalizers def normalize_error_message(emsg): diff --git a/xmodule/capa/safe_exec/tests/test_safe_exec.py b/xmodule/capa/safe_exec/tests/test_safe_exec.py index c2b37eecb7..d09f8c9d9b 100644 --- a/xmodule/capa/safe_exec/tests/test_safe_exec.py +++ b/xmodule/capa/safe_exec/tests/test_safe_exec.py @@ -370,32 +370,71 @@ class TestCodeJailDarkLaunch(unittest.TestCase): assert isinstance(results['raised'], SafeExecException) assert 'whatever.py' in repr(results['raised']) + def test_default_normalizers(self): + """ + Default normalizers handle false mismatches we've observed. + + This just provides coverage for some of the more complicated patterns. + """ + side_1 = ( + 'Couldn\'t execute jailed code: stdout: b\'\', stderr: b\'Traceback' + ' (most recent call last):\\n File "/tmp/codejail-9g9715g_/jailed_code"' + ', line 19, in \\n exec(code, g_dict)\\n File ""' + ', line 1, in \\n File "", line 89, in test_add\\n' + ' File "", line 1\\n import random random.choice(range(10))' + '\\n ^\\nSyntaxError: invalid syntax\\n\' with status code: 1' + ) + side_2 = ( + 'Couldn\'t execute jailed code: stdout: b\'\', stderr: b\'Traceback' + ' (most recent call last):\\n File "jailed_code"' + ', line 19, in \\n exec(code, g_dict)\\n File ""' + ', line 203, in \\n File "", line 89, in test_add\\n' + ' File "", line 1\\n import random random.choice(range(10))' + '\\n ^^^^^^\\nSyntaxError: invalid syntax\\n\' with status code: 1' + ) + assert normalize_error_message(side_1) == normalize_error_message(side_2) + @override_settings(CODEJAIL_DARKLAUNCH_EMSG_NORMALIZERS=[ - { - 'search': r'/tmp/codejail-[0-9a-zA-Z]+', - 'replace': r'/tmp/codejail-', - }, { 'search': r'[0-9]+', 'replace': r'', }, ]) def test_configurable_normalizers(self): - """We can override the normalizers, and they run in order.""" + """We can augment the normalizers, and they run in order.""" emsg_in = "Error in /tmp/codejail-1234abcd/whatever.py: something 12 34 other" - expect_out = "Error in /tmp/codejail-/whatever.py: something other" + expect_out = "Error in /tmp/codejail-/whatever.py: something other" + assert expect_out == normalize_error_message(emsg_in) + + @override_settings( + CODEJAIL_DARKLAUNCH_EMSG_NORMALIZERS=[ + { + 'search': r'[0-9]+', + 'replace': r'', + }, + ], + CODEJAIL_DARKLAUNCH_EMSG_NORMALIZERS_COMBINE='replace', + ) + def test_can_replace_normalizers(self): + """We can replace the normalizers.""" + emsg_in = "Error in /tmp/codejail-1234abcd/whatever.py: something 12 34 other" + expect_out = "Error in /tmp/codejail-abcd/whatever.py: something other" assert expect_out == normalize_error_message(emsg_in) @override_settings(CODEJAIL_DARKLAUNCH_EMSG_NORMALIZERS=[ { - 'search': r'broken [', - 'replace': r'replace', + 'search': r'broken', + 'replace': r'replace \g<>', # invalid replacement pattern }, ]) @patch('xmodule.capa.safe_exec.safe_exec.record_exception') - def test_normalizers_validate(self, mock_record_exception): - """Normalizers are validated, and fall back to empty list on error.""" - assert emsg_normalizers() == [] # pylint: disable=use-implicit-booleaness-not-comparison + @patch('xmodule.capa.safe_exec.safe_exec.log.error') + def test_normalizers_validate(self, mock_log_error, mock_record_exception): + """Normalizers are validated, and fall back to default list on error.""" + assert len(emsg_normalizers()) > 0 # pylint: disable=use-implicit-booleaness-not-comparison + mock_log_error.assert_called_once_with( + "Could not load custom codejail darklaunch emsg normalizers" + ) mock_record_exception.assert_called_once()