feat!: Expand codejail darklaunch normalizers; append by default (#36682)
For darklaunch comparisons where the two sides have different Python versions, we'll want a more comprehensive list of normalizers. - Expand the default list to include patterns discovered during a Python 3.8 vs. 3.12 comparison. - Append the setting value by default, rather than replacing (but still allow replacing). - Use default normalizers if custom ones can't be loaded. - Add log message when loading normalizers fails. - Validate the replacement pattern, not just the search pattern.
This commit is contained in:
@@ -291,6 +291,24 @@ def safe_exec(
|
||||
raise exception
|
||||
|
||||
|
||||
def _compile_normalizers(normalizer_setting):
|
||||
"""
|
||||
Compile emsg normalizer search/replace pairs into regex.
|
||||
|
||||
Raises exception on bad settings.
|
||||
"""
|
||||
compiled = []
|
||||
for pair in normalizer_setting:
|
||||
search = re.compile(assert_type(pair['search'], str))
|
||||
replace = assert_type(pair['replace'], str)
|
||||
|
||||
# Test the replacement string (might contain errors)
|
||||
re.sub(search, replace, "example")
|
||||
|
||||
compiled.append({'search': search, 'replace': replace})
|
||||
return compiled
|
||||
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
def emsg_normalizers():
|
||||
"""
|
||||
@@ -299,38 +317,77 @@ def emsg_normalizers():
|
||||
The output is like the setting value, except the 'search' patterns have
|
||||
been compiled.
|
||||
"""
|
||||
default = [
|
||||
default_setting = [
|
||||
{
|
||||
# Character range should be at least as broad as what Python's `tempfile` uses.
|
||||
'search': r'/tmp/codejail-[0-9a-zA-Z_]+',
|
||||
'replace': r'/tmp/codejail-<SANDBOX_DIR_NAME>',
|
||||
},
|
||||
]
|
||||
try:
|
||||
# .. setting_name: CODEJAIL_DARKLAUNCH_EMSG_NORMALIZERS
|
||||
# .. setting_default: (see description)
|
||||
# .. setting_description: A list of patterns to search and replace in codejail error
|
||||
# messages during comparison in codejail-service darklaunch. Each entry is a dict
|
||||
# of 'search' (a regular expression string) and 'replace' (the replacement string).
|
||||
# The default value suppresses differences matching '/tmp/codejail-[0-9a-zA-Z]+',
|
||||
# the directory structure codejail uses for its random-named sandboxes. Deployers
|
||||
# may also need to add a search/replace pair for the location of the sandbox
|
||||
# virtualenv, or any other paths that show up in stack traces.
|
||||
# .. setting_warning: Note that `replace' is a pattern, allowing for
|
||||
# backreferences. Any backslashes in the replacement pattern that are not
|
||||
# intended as backreferences should be escaped as `\\`.
|
||||
setting = getattr(settings, 'CODEJAIL_DARKLAUNCH_EMSG_NORMALIZERS', default)
|
||||
|
||||
compiled = []
|
||||
for pair in setting:
|
||||
compiled.append({
|
||||
'search': re.compile(assert_type(pair['search'], str)),
|
||||
'replace': assert_type(pair['replace'], str),
|
||||
})
|
||||
return compiled
|
||||
# These are useful for eliding differences in environments due to Python version:
|
||||
|
||||
{
|
||||
# Python 3.8 doesn't include the dir here, but Python 3.12
|
||||
# does. Normalize to the 3.8 version.
|
||||
'search': r'File "/tmp/codejail-<SANDBOX_DIR_NAME>/jailed_code"',
|
||||
'replace': r'File "jailed_code"'
|
||||
},
|
||||
{
|
||||
# Python version shows up in stack traces in the virtualenv paths
|
||||
'search': r'python3\.[0-9]+',
|
||||
'replace': r'python3.XX'
|
||||
},
|
||||
{
|
||||
# Line numbers in stack traces differ between Python versions
|
||||
'search': r', line [0-9]+, in ',
|
||||
'replace': r', line XXX, in '
|
||||
},
|
||||
{
|
||||
# Some time after 3.8, Python started adding '^^^' indicators to stack traces
|
||||
'search': r'\\n\s*\^+\s*\\n',
|
||||
'replace': r'\\n'
|
||||
},
|
||||
{
|
||||
# Python3.8 had these <listcomp> stack trace elements but 3.12 does not
|
||||
'search': r'\\n File "[^"]+", line [0-9]+, in <listcomp>\\n',
|
||||
'replace': r'\\n'
|
||||
},
|
||||
]
|
||||
default_normalizers = _compile_normalizers(default_setting)
|
||||
|
||||
# .. setting_name: CODEJAIL_DARKLAUNCH_EMSG_NORMALIZERS
|
||||
# .. setting_default: []
|
||||
# .. setting_description: A list of patterns to search and replace in codejail error
|
||||
# messages during comparison in codejail-service darklaunch. Each entry is a dict
|
||||
# of 'search' (a regular expression string) and 'replace' (the replacement string).
|
||||
# Deployers may also need to add a search/replace pair for the location of the sandbox
|
||||
# virtualenv, or any other paths that show up in stack traces.
|
||||
# .. setting_warning: Note that `replace' is a pattern, allowing for
|
||||
# backreferences. Any backslashes in the replacement pattern that are not
|
||||
# intended as backreferences should be escaped as `\\`.
|
||||
# The default list suppresses differences due to the randomly-named sandboxes
|
||||
# or to differences due to Python version. See setting
|
||||
# ``CODEJAIL_DARKLAUNCH_EMSG_NORMALIZERS_COMBINE`` for information on how
|
||||
# this setting interacts with the defaults.
|
||||
custom_setting = getattr(settings, 'CODEJAIL_DARKLAUNCH_EMSG_NORMALIZERS', [])
|
||||
try:
|
||||
custom_normalizers = _compile_normalizers(custom_setting)
|
||||
except BaseException as e:
|
||||
log.error("Could not load custom codejail darklaunch emsg normalizers")
|
||||
record_exception()
|
||||
return []
|
||||
return default_normalizers
|
||||
|
||||
# .. setting_name: CODEJAIL_DARKLAUNCH_EMSG_NORMALIZERS_COMBINE
|
||||
# .. setting_default: 'append'
|
||||
# .. setting_description: How to combine ``CODEJAIL_DARKLAUNCH_EMSG_NORMALIZERS``
|
||||
# with the defaults. If the value is 'replace', the defaults will be replaced
|
||||
# with the specified patterns. If the value is 'append' (the default), the
|
||||
# specified replacements will be run after the defaults.
|
||||
combine = getattr(settings, 'CODEJAIL_DARKLAUNCH_EMSG_NORMALIZERS_COMBINE', 'append')
|
||||
if combine == 'replace':
|
||||
return custom_normalizers
|
||||
else: # 'append', or unknown
|
||||
return default_normalizers + custom_normalizers
|
||||
|
||||
|
||||
def normalize_error_message(emsg):
|
||||
|
||||
@@ -370,32 +370,71 @@ class TestCodeJailDarkLaunch(unittest.TestCase):
|
||||
assert isinstance(results['raised'], SafeExecException)
|
||||
assert 'whatever.py' in repr(results['raised'])
|
||||
|
||||
def test_default_normalizers(self):
|
||||
"""
|
||||
Default normalizers handle false mismatches we've observed.
|
||||
|
||||
This just provides coverage for some of the more complicated patterns.
|
||||
"""
|
||||
side_1 = (
|
||||
'Couldn\'t execute jailed code: stdout: b\'\', stderr: b\'Traceback'
|
||||
' (most recent call last):\\n File "/tmp/codejail-9g9715g_/jailed_code"'
|
||||
', line 19, in <module>\\n exec(code, g_dict)\\n File "<string>"'
|
||||
', line 1, in <module>\\n File "<string>", line 89, in test_add\\n'
|
||||
' File "<string>", line 1\\n import random random.choice(range(10))'
|
||||
'\\n ^\\nSyntaxError: invalid syntax\\n\' with status code: 1'
|
||||
)
|
||||
side_2 = (
|
||||
'Couldn\'t execute jailed code: stdout: b\'\', stderr: b\'Traceback'
|
||||
' (most recent call last):\\n File "jailed_code"'
|
||||
', line 19, in <module>\\n exec(code, g_dict)\\n File "<string>"'
|
||||
', line 203, in <module>\\n File "<string>", line 89, in test_add\\n'
|
||||
' File "<string>", line 1\\n import random random.choice(range(10))'
|
||||
'\\n ^^^^^^\\nSyntaxError: invalid syntax\\n\' with status code: 1'
|
||||
)
|
||||
assert normalize_error_message(side_1) == normalize_error_message(side_2)
|
||||
|
||||
@override_settings(CODEJAIL_DARKLAUNCH_EMSG_NORMALIZERS=[
|
||||
{
|
||||
'search': r'/tmp/codejail-[0-9a-zA-Z]+',
|
||||
'replace': r'/tmp/codejail-<RAND>',
|
||||
},
|
||||
{
|
||||
'search': r'[0-9]+',
|
||||
'replace': r'<NUM>',
|
||||
},
|
||||
])
|
||||
def test_configurable_normalizers(self):
|
||||
"""We can override the normalizers, and they run in order."""
|
||||
"""We can augment the normalizers, and they run in order."""
|
||||
emsg_in = "Error in /tmp/codejail-1234abcd/whatever.py: something 12 34 other"
|
||||
expect_out = "Error in /tmp/codejail-<RAND>/whatever.py: something <NUM> <NUM> other"
|
||||
expect_out = "Error in /tmp/codejail-<SANDBOX_DIR_NAME>/whatever.py: something <NUM> <NUM> other"
|
||||
assert expect_out == normalize_error_message(emsg_in)
|
||||
|
||||
@override_settings(
|
||||
CODEJAIL_DARKLAUNCH_EMSG_NORMALIZERS=[
|
||||
{
|
||||
'search': r'[0-9]+',
|
||||
'replace': r'<NUM>',
|
||||
},
|
||||
],
|
||||
CODEJAIL_DARKLAUNCH_EMSG_NORMALIZERS_COMBINE='replace',
|
||||
)
|
||||
def test_can_replace_normalizers(self):
|
||||
"""We can replace the normalizers."""
|
||||
emsg_in = "Error in /tmp/codejail-1234abcd/whatever.py: something 12 34 other"
|
||||
expect_out = "Error in /tmp/codejail-<NUM>abcd/whatever.py: something <NUM> <NUM> other"
|
||||
assert expect_out == normalize_error_message(emsg_in)
|
||||
|
||||
@override_settings(CODEJAIL_DARKLAUNCH_EMSG_NORMALIZERS=[
|
||||
{
|
||||
'search': r'broken [',
|
||||
'replace': r'replace',
|
||||
'search': r'broken',
|
||||
'replace': r'replace \g<>', # invalid replacement pattern
|
||||
},
|
||||
])
|
||||
@patch('xmodule.capa.safe_exec.safe_exec.record_exception')
|
||||
def test_normalizers_validate(self, mock_record_exception):
|
||||
"""Normalizers are validated, and fall back to empty list on error."""
|
||||
assert emsg_normalizers() == [] # pylint: disable=use-implicit-booleaness-not-comparison
|
||||
@patch('xmodule.capa.safe_exec.safe_exec.log.error')
|
||||
def test_normalizers_validate(self, mock_log_error, mock_record_exception):
|
||||
"""Normalizers are validated, and fall back to default list on error."""
|
||||
assert len(emsg_normalizers()) > 0 # pylint: disable=use-implicit-booleaness-not-comparison
|
||||
mock_log_error.assert_called_once_with(
|
||||
"Could not load custom codejail darklaunch emsg normalizers"
|
||||
)
|
||||
mock_record_exception.assert_called_once()
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user