feat!: Expand codejail darklaunch normalizers; append by default (#36682)

For darklaunch comparisons where the two sides have different Python
versions, we'll want a more comprehensive list of normalizers.

- Expand the default list to include patterns discovered during a Python
  3.8 vs. 3.12 comparison.
- Append the setting value by default, rather than replacing (but still
  allow replacing).
- Use default normalizers if custom ones can't be loaded.
- Add log message when loading normalizers fails.
- Validate the replacement pattern, not just the search pattern.
This commit is contained in:
Tim McCormack
2025-05-08 13:43:53 -04:00
committed by GitHub
parent ad156c062e
commit d5a273ce2f
2 changed files with 131 additions and 35 deletions

View File

@@ -291,6 +291,24 @@ def safe_exec(
raise exception
def _compile_normalizers(normalizer_setting):
"""
Compile emsg normalizer search/replace pairs into regex.
Raises exception on bad settings.
"""
compiled = []
for pair in normalizer_setting:
search = re.compile(assert_type(pair['search'], str))
replace = assert_type(pair['replace'], str)
# Test the replacement string (might contain errors)
re.sub(search, replace, "example")
compiled.append({'search': search, 'replace': replace})
return compiled
@lru_cache(maxsize=1)
def emsg_normalizers():
"""
@@ -299,38 +317,77 @@ def emsg_normalizers():
The output is like the setting value, except the 'search' patterns have
been compiled.
"""
default = [
default_setting = [
{
# Character range should be at least as broad as what Python's `tempfile` uses.
'search': r'/tmp/codejail-[0-9a-zA-Z_]+',
'replace': r'/tmp/codejail-<SANDBOX_DIR_NAME>',
},
]
try:
# .. setting_name: CODEJAIL_DARKLAUNCH_EMSG_NORMALIZERS
# .. setting_default: (see description)
# .. setting_description: A list of patterns to search and replace in codejail error
# messages during comparison in codejail-service darklaunch. Each entry is a dict
# of 'search' (a regular expression string) and 'replace' (the replacement string).
# The default value suppresses differences matching '/tmp/codejail-[0-9a-zA-Z]+',
# the directory structure codejail uses for its random-named sandboxes. Deployers
# may also need to add a search/replace pair for the location of the sandbox
# virtualenv, or any other paths that show up in stack traces.
# .. setting_warning: Note that `replace' is a pattern, allowing for
# backreferences. Any backslashes in the replacement pattern that are not
# intended as backreferences should be escaped as `\\`.
setting = getattr(settings, 'CODEJAIL_DARKLAUNCH_EMSG_NORMALIZERS', default)
compiled = []
for pair in setting:
compiled.append({
'search': re.compile(assert_type(pair['search'], str)),
'replace': assert_type(pair['replace'], str),
})
return compiled
# These are useful for eliding differences in environments due to Python version:
{
# Python 3.8 doesn't include the dir here, but Python 3.12
# does. Normalize to the 3.8 version.
'search': r'File "/tmp/codejail-<SANDBOX_DIR_NAME>/jailed_code"',
'replace': r'File "jailed_code"'
},
{
# Python version shows up in stack traces in the virtualenv paths
'search': r'python3\.[0-9]+',
'replace': r'python3.XX'
},
{
# Line numbers in stack traces differ between Python versions
'search': r', line [0-9]+, in ',
'replace': r', line XXX, in '
},
{
# Some time after 3.8, Python started adding '^^^' indicators to stack traces
'search': r'\\n\s*\^+\s*\\n',
'replace': r'\\n'
},
{
# Python3.8 had these <listcomp> stack trace elements but 3.12 does not
'search': r'\\n File "[^"]+", line [0-9]+, in <listcomp>\\n',
'replace': r'\\n'
},
]
default_normalizers = _compile_normalizers(default_setting)
# .. setting_name: CODEJAIL_DARKLAUNCH_EMSG_NORMALIZERS
# .. setting_default: []
# .. setting_description: A list of patterns to search and replace in codejail error
# messages during comparison in codejail-service darklaunch. Each entry is a dict
# of 'search' (a regular expression string) and 'replace' (the replacement string).
# Deployers may also need to add a search/replace pair for the location of the sandbox
# virtualenv, or any other paths that show up in stack traces.
# .. setting_warning: Note that `replace' is a pattern, allowing for
# backreferences. Any backslashes in the replacement pattern that are not
# intended as backreferences should be escaped as `\\`.
# The default list suppresses differences due to the randomly-named sandboxes
# or to differences due to Python version. See setting
# ``CODEJAIL_DARKLAUNCH_EMSG_NORMALIZERS_COMBINE`` for information on how
# this setting interacts with the defaults.
custom_setting = getattr(settings, 'CODEJAIL_DARKLAUNCH_EMSG_NORMALIZERS', [])
try:
custom_normalizers = _compile_normalizers(custom_setting)
except BaseException as e:
log.error("Could not load custom codejail darklaunch emsg normalizers")
record_exception()
return []
return default_normalizers
# .. setting_name: CODEJAIL_DARKLAUNCH_EMSG_NORMALIZERS_COMBINE
# .. setting_default: 'append'
# .. setting_description: How to combine ``CODEJAIL_DARKLAUNCH_EMSG_NORMALIZERS``
# with the defaults. If the value is 'replace', the defaults will be replaced
# with the specified patterns. If the value is 'append' (the default), the
# specified replacements will be run after the defaults.
combine = getattr(settings, 'CODEJAIL_DARKLAUNCH_EMSG_NORMALIZERS_COMBINE', 'append')
if combine == 'replace':
return custom_normalizers
else: # 'append', or unknown
return default_normalizers + custom_normalizers
def normalize_error_message(emsg):

View File

@@ -370,32 +370,71 @@ class TestCodeJailDarkLaunch(unittest.TestCase):
assert isinstance(results['raised'], SafeExecException)
assert 'whatever.py' in repr(results['raised'])
def test_default_normalizers(self):
"""
Default normalizers handle false mismatches we've observed.
This just provides coverage for some of the more complicated patterns.
"""
side_1 = (
'Couldn\'t execute jailed code: stdout: b\'\', stderr: b\'Traceback'
' (most recent call last):\\n File "/tmp/codejail-9g9715g_/jailed_code"'
', line 19, in <module>\\n exec(code, g_dict)\\n File "<string>"'
', line 1, in <module>\\n File "<string>", line 89, in test_add\\n'
' File "<string>", line 1\\n import random random.choice(range(10))'
'\\n ^\\nSyntaxError: invalid syntax\\n\' with status code: 1'
)
side_2 = (
'Couldn\'t execute jailed code: stdout: b\'\', stderr: b\'Traceback'
' (most recent call last):\\n File "jailed_code"'
', line 19, in <module>\\n exec(code, g_dict)\\n File "<string>"'
', line 203, in <module>\\n File "<string>", line 89, in test_add\\n'
' File "<string>", line 1\\n import random random.choice(range(10))'
'\\n ^^^^^^\\nSyntaxError: invalid syntax\\n\' with status code: 1'
)
assert normalize_error_message(side_1) == normalize_error_message(side_2)
@override_settings(CODEJAIL_DARKLAUNCH_EMSG_NORMALIZERS=[
{
'search': r'/tmp/codejail-[0-9a-zA-Z]+',
'replace': r'/tmp/codejail-<RAND>',
},
{
'search': r'[0-9]+',
'replace': r'<NUM>',
},
])
def test_configurable_normalizers(self):
"""We can override the normalizers, and they run in order."""
"""We can augment the normalizers, and they run in order."""
emsg_in = "Error in /tmp/codejail-1234abcd/whatever.py: something 12 34 other"
expect_out = "Error in /tmp/codejail-<RAND>/whatever.py: something <NUM> <NUM> other"
expect_out = "Error in /tmp/codejail-<SANDBOX_DIR_NAME>/whatever.py: something <NUM> <NUM> other"
assert expect_out == normalize_error_message(emsg_in)
@override_settings(
CODEJAIL_DARKLAUNCH_EMSG_NORMALIZERS=[
{
'search': r'[0-9]+',
'replace': r'<NUM>',
},
],
CODEJAIL_DARKLAUNCH_EMSG_NORMALIZERS_COMBINE='replace',
)
def test_can_replace_normalizers(self):
"""We can replace the normalizers."""
emsg_in = "Error in /tmp/codejail-1234abcd/whatever.py: something 12 34 other"
expect_out = "Error in /tmp/codejail-<NUM>abcd/whatever.py: something <NUM> <NUM> other"
assert expect_out == normalize_error_message(emsg_in)
@override_settings(CODEJAIL_DARKLAUNCH_EMSG_NORMALIZERS=[
{
'search': r'broken [',
'replace': r'replace',
'search': r'broken',
'replace': r'replace \g<>', # invalid replacement pattern
},
])
@patch('xmodule.capa.safe_exec.safe_exec.record_exception')
def test_normalizers_validate(self, mock_record_exception):
"""Normalizers are validated, and fall back to empty list on error."""
assert emsg_normalizers() == [] # pylint: disable=use-implicit-booleaness-not-comparison
@patch('xmodule.capa.safe_exec.safe_exec.log.error')
def test_normalizers_validate(self, mock_log_error, mock_record_exception):
"""Normalizers are validated, and fall back to default list on error."""
assert len(emsg_normalizers()) > 0 # pylint: disable=use-implicit-booleaness-not-comparison
mock_log_error.assert_called_once_with(
"Could not load custom codejail darklaunch emsg normalizers"
)
mock_record_exception.assert_called_once()