fix: Add much more codejail darklaunch info; fix remote error bug (#36534)
- Fix bug where we were overwriting `remote_emsg` with None, and add test that would have caught it. - Suppress differences due solely to the codejail sandbox directory name differing (in stack traces), and add test for this. Configurable because we'll need to add an additional search/replace pair for the sandbox venv paths. - Add a variety of custom attributes, replacing existing ones. The attrs now have a prefixed naming scheme to simplify searching. - Add slug to log output so we can more readily correlate traces and logs, as well as logs across services. - Fix typo in error message. - Fix existing import sort order lint.
This commit is contained in:
@@ -2,14 +2,20 @@
|
||||
import copy
|
||||
import hashlib
|
||||
import logging
|
||||
import re
|
||||
from functools import lru_cache
|
||||
from typing import assert_type
|
||||
|
||||
from codejail.safe_exec import SafeExecException, json_safe
|
||||
from codejail.safe_exec import not_safe_exec as codejail_not_safe_exec
|
||||
from codejail.safe_exec import safe_exec as codejail_safe_exec
|
||||
from django.conf import settings
|
||||
from django.dispatch import receiver
|
||||
from django.test.signals import setting_changed
|
||||
from edx_django_utils.monitoring import function_trace, record_exception, set_custom_attribute
|
||||
|
||||
from . import lazymod
|
||||
from .remote_exec import is_codejail_rest_service_enabled, is_codejail_in_darklaunch, get_remote_exec
|
||||
from .remote_exec import get_remote_exec, is_codejail_in_darklaunch, is_codejail_rest_service_enabled
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
@@ -219,6 +225,28 @@ def safe_exec(
|
||||
# Run the code in both the remote codejail service as well as the local codejail
|
||||
# when in darklaunch mode.
|
||||
if is_codejail_in_darklaunch():
|
||||
# Start adding attributes only once we're in a darklaunch
|
||||
# comparison, even though these particular ones aren't specific to
|
||||
# darklaunch. There can be multiple codejail calls per trace, and
|
||||
# these attrs will overwrite previous values in the same trace. When
|
||||
# that happens, we need to ensure we overwrite *all* of them,
|
||||
# otherwise we could end up with inconsistent combinations of values.
|
||||
|
||||
# .. custom_attribute_name: codejail.slug
|
||||
# .. custom_attribute_description: Value of the slug parameter. This
|
||||
# might be a problem ID, if present.
|
||||
set_custom_attribute('codejail.slug', slug)
|
||||
# .. custom_attribute_name: codejail.limit_overrides_context
|
||||
# .. custom_attribute_description: Value of the limit_overrides_context
|
||||
# parameter to this code execution. Generally this will be the
|
||||
# course name, if present at all.
|
||||
set_custom_attribute('codejail.limit_overrides_context', limit_overrides_context)
|
||||
# .. custom_attribute_name: codejail.extra_files_count
|
||||
# .. custom_attribute_description: Number of extra_files included
|
||||
# in request. This should be 0 or 1, the latter indicating a
|
||||
# python_lib.zip was present.
|
||||
set_custom_attribute('codejail.extra_files_count', len(extra_files) if extra_files else 0)
|
||||
|
||||
try:
|
||||
data = {
|
||||
"code": code_prolog + LAZY_IMPORTS + code,
|
||||
@@ -230,30 +258,26 @@ def safe_exec(
|
||||
"extra_files": extra_files,
|
||||
}
|
||||
with function_trace('safe_exec.remote_exec_darklaunch'):
|
||||
# Ignore the returned exception, because it's just a
|
||||
# SafeExecException wrapped around emsg (if present).
|
||||
remote_emsg, _ = get_remote_exec(data)
|
||||
remote_exception = None
|
||||
except BaseException as e: # pragma: no cover # pylint: disable=broad-except
|
||||
# Swallow all exceptions and log it in monitoring so that dark launch doesn't cause issues during
|
||||
# deploy.
|
||||
remote_emsg = None
|
||||
remote_exception = e
|
||||
else:
|
||||
remote_emsg = None
|
||||
remote_exception = None
|
||||
|
||||
try:
|
||||
log.info(
|
||||
f"Remote execution in darklaunch mode produces globals={darklaunch_globals!r}, "
|
||||
f"emsg={remote_emsg!r}, exception={remote_exception!r}"
|
||||
)
|
||||
local_exc_unexpected = None if isinstance(exception, SafeExecException) else exception
|
||||
log.info(
|
||||
f"Local execution in darklaunch mode produces globals={globals_dict!r}, "
|
||||
f"emsg={emsg!r}, exception={local_exc_unexpected!r}")
|
||||
set_custom_attribute('dark_launch_emsg_match', remote_emsg == emsg)
|
||||
set_custom_attribute('remote_emsg_exists', remote_emsg is not None)
|
||||
set_custom_attribute('local_emsg_exists', emsg is not None)
|
||||
|
||||
report_darklaunch_results(
|
||||
slug=slug,
|
||||
globals_local=globals_dict, emsg_local=emsg, unexpected_exc_local=local_exc_unexpected,
|
||||
globals_remote=darklaunch_globals, emsg_remote=remote_emsg, unexpected_exc_remote=remote_exception,
|
||||
)
|
||||
except BaseException as e: # pragma: no cover # pylint: disable=broad-except
|
||||
log.exception("Error occurred while trying to report codejail darklauch data.")
|
||||
log.exception("Error occurred while trying to report codejail darklaunch data.")
|
||||
record_exception()
|
||||
|
||||
# Put the result back in the cache. This is complicated by the fact that
|
||||
@@ -265,3 +289,125 @@ def safe_exec(
|
||||
# If an exception happened, raise it now.
|
||||
if exception:
|
||||
raise exception
|
||||
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
def emsg_normalizers():
|
||||
"""
|
||||
Load emsg normalization settings.
|
||||
|
||||
The output is like the setting value, except the 'search' patterns have
|
||||
been compiled.
|
||||
"""
|
||||
default = [
|
||||
{
|
||||
'search': r'/tmp/codejail-[0-9a-zA-Z]+',
|
||||
'replace': r'/tmp/codejail-<SANDBOX_DIR_NAME>',
|
||||
},
|
||||
]
|
||||
try:
|
||||
# .. setting_name: CODEJAIL_DARKLAUNCH_EMSG_NORMALIZERS
|
||||
# .. setting_default: (see description)
|
||||
# .. setting_description: A list of patterns to search and replace in codejail error
|
||||
# messages during comparison in codejail-service darklaunch. Each entry is a dict
|
||||
# of 'search' (a regular expression string) and 'replace' (the replacement string).
|
||||
# The default value suppresses differences matching '/tmp/codejail-[0-9a-zA-Z]+',
|
||||
# the directory structure codejail uses for its random-named sandboxes. Deployers
|
||||
# may also need to add a search/replace pair for the location of the sandbox
|
||||
# virtualenv, or any other paths that show up in stack traces.
|
||||
# .. setting_warning: Note that `replace' is a pattern, allowing for
|
||||
# backreferences. Any backslashes in the replacement pattern that are not
|
||||
# intended as backreferences should be escaped as `\\`.
|
||||
setting = getattr(settings, 'CODEJAIL_DARKLAUNCH_EMSG_NORMALIZERS', default)
|
||||
|
||||
compiled = []
|
||||
for pair in setting:
|
||||
compiled.append({
|
||||
'search': re.compile(assert_type(pair['search'], str)),
|
||||
'replace': assert_type(pair['replace'], str),
|
||||
})
|
||||
return compiled
|
||||
except BaseException as e:
|
||||
record_exception()
|
||||
return []
|
||||
|
||||
|
||||
def normalize_error_message(emsg):
|
||||
"""
|
||||
Remove any uninteresting sources of discrepancy from an emsg.
|
||||
"""
|
||||
if emsg is None:
|
||||
return None
|
||||
|
||||
for replacer in emsg_normalizers():
|
||||
emsg = re.sub(replacer['search'], replacer['replace'], emsg, count=0)
|
||||
|
||||
return emsg
|
||||
|
||||
|
||||
def report_darklaunch_results(
|
||||
*, slug,
|
||||
globals_local, emsg_local, unexpected_exc_local,
|
||||
globals_remote, emsg_remote, unexpected_exc_remote,
|
||||
):
|
||||
"""Send telemetry for results of darklaunch."""
|
||||
can_compare_output = True
|
||||
|
||||
def report_arm(arm, globals_dict, emsg, unexpected_exception):
|
||||
nonlocal can_compare_output
|
||||
if unexpected_exception:
|
||||
# .. custom_attribute_name: codejail.darklaunch.status.{local,remote}
|
||||
# .. custom_attribute_description: Outcome of this arm of the
|
||||
# darklaunch comparison. Values can be 'ok' (normal execution),
|
||||
# 'safe_error' (submitted code raised an exception), or
|
||||
# 'unexpected_error' (uncaught error in submitting or evaluating code).
|
||||
set_custom_attribute(f'codejail.darklaunch.status.{arm}', 'unexpected_error')
|
||||
# .. custom_attribute_name: codejail.darklaunch.exception.{local,remote}
|
||||
# .. custom_attribute_description: When the status attribute indicates
|
||||
# an unexpected error, this is a string representation of the error,
|
||||
# otherwise None.
|
||||
set_custom_attribute(f'codejail.darklaunch.exception.{arm}', repr(unexpected_exception))
|
||||
can_compare_output = False
|
||||
else:
|
||||
set_custom_attribute(f'codejail.darklaunch.status.{arm}', 'ok' if emsg is None else 'safe_error')
|
||||
set_custom_attribute(f'codejail.darklaunch.exception.{arm}', None)
|
||||
|
||||
# Logs include full globals and emsg
|
||||
log.info(
|
||||
f"Codejail darklaunch {arm} results for slug={slug}: globals={globals_dict!r}, "
|
||||
f"emsg={emsg!r}, exception={unexpected_exception!r}"
|
||||
)
|
||||
|
||||
report_arm('local', globals_local, emsg_local, unexpected_exc_local)
|
||||
report_arm('remote', globals_remote, emsg_remote, unexpected_exc_remote)
|
||||
|
||||
# If the arms can't be compared (unexpected errors), stop early -- the rest
|
||||
# is about output comparison.
|
||||
if not can_compare_output:
|
||||
set_custom_attribute('codejail.darklaunch.globals_match', 'N/A')
|
||||
set_custom_attribute('codejail.darklaunch.emsg_match', 'N/A')
|
||||
return
|
||||
|
||||
globals_match = globals_local == globals_remote
|
||||
emsg_match = normalize_error_message(emsg_local) == normalize_error_message(emsg_remote)
|
||||
|
||||
# .. custom_attribute_name: codejail.darklaunch.globals_match
|
||||
# .. custom_attribute_description: True if local and remote globals_dict
|
||||
# values match, False otherwise. 'N/A' when either arm raised an
|
||||
# uncaught error.
|
||||
set_custom_attribute('codejail.darklaunch.globals_match', globals_match)
|
||||
# .. custom_attribute_name: codejail.darklaunch.emsg_match
|
||||
# .. custom_attribute_description: True if the local and remote emsg values
|
||||
# (errors returned from sandbox) match, False otherwise. Differences due
|
||||
# to known irrelevant factors are suppressed in this comparison, such as
|
||||
# the randomized directory names used for sandboxes. 'N/A' when either
|
||||
# arm raised an uncaught error.
|
||||
set_custom_attribute('codejail.darklaunch.emsg_match', emsg_match)
|
||||
|
||||
|
||||
@receiver(setting_changed)
|
||||
def reset_caches(sender, **kwargs):
|
||||
"""
|
||||
Reset cached settings during unit tests.
|
||||
"""
|
||||
emsg_normalizers.cache_clear()
|
||||
|
||||
@@ -23,6 +23,7 @@ from six.moves import range
|
||||
from openedx.core.djangolib.testing.utils import skip_unless_lms
|
||||
from xmodule.capa.safe_exec import safe_exec, update_hash
|
||||
from xmodule.capa.safe_exec.remote_exec import is_codejail_in_darklaunch, is_codejail_rest_service_enabled
|
||||
from xmodule.capa.safe_exec.safe_exec import emsg_normalizers, normalize_error_message
|
||||
|
||||
|
||||
class TestSafeExec(unittest.TestCase): # lint-amnesty, pylint: disable=missing-class-docstring
|
||||
@@ -212,6 +213,13 @@ class TestCodeJailDarkLaunch(unittest.TestCase):
|
||||
|
||||
return {'raised': safe_exec_e}
|
||||
|
||||
# These don't change between the tests
|
||||
standard_codejail_attr_calls = [
|
||||
call('codejail.slug', None),
|
||||
call('codejail.limit_overrides_context', None),
|
||||
call('codejail.extra_files_count', 0),
|
||||
]
|
||||
|
||||
def test_separate_globals(self):
|
||||
"""Test that local and remote globals are isolated from each other's side effects."""
|
||||
# Both will attempt to read and write the 'overwrite' key.
|
||||
@@ -238,17 +246,21 @@ class TestCodeJailDarkLaunch(unittest.TestCase):
|
||||
results = self.run_dark_launch(
|
||||
globals_dict=globals_dict, local=local_exec, remote=remote_exec,
|
||||
expect_attr_calls=[
|
||||
call('local_emsg_exists', False),
|
||||
call('remote_emsg_exists', False),
|
||||
call('dark_launch_emsg_match', True),
|
||||
*self.standard_codejail_attr_calls,
|
||||
call('codejail.darklaunch.status.local', 'ok'),
|
||||
call('codejail.darklaunch.status.remote', 'ok'),
|
||||
call('codejail.darklaunch.exception.local', None),
|
||||
call('codejail.darklaunch.exception.remote', None),
|
||||
call('codejail.darklaunch.globals_match', False), # mismatch revealed here
|
||||
call('codejail.darklaunch.emsg_match', True),
|
||||
],
|
||||
expect_log_info_calls=[
|
||||
call(
|
||||
"Local execution in darklaunch mode produces globals={'overwrite': 'mock local'}, "
|
||||
"Codejail darklaunch local results for slug=None: globals={'overwrite': 'mock local'}, "
|
||||
"emsg=None, exception=None"
|
||||
),
|
||||
call(
|
||||
"Remote execution in darklaunch mode produces globals={'overwrite': 'mock remote'}, "
|
||||
"Codejail darklaunch remote results for slug=None: globals={'overwrite': 'mock remote'}, "
|
||||
"emsg=None, exception=None"
|
||||
),
|
||||
],
|
||||
@@ -274,18 +286,22 @@ class TestCodeJailDarkLaunch(unittest.TestCase):
|
||||
results = self.run_dark_launch(
|
||||
globals_dict={}, local=local_exec, remote=remote_exec,
|
||||
expect_attr_calls=[
|
||||
call('local_emsg_exists', True),
|
||||
call('remote_emsg_exists', False),
|
||||
call('dark_launch_emsg_match', False),
|
||||
*self.standard_codejail_attr_calls,
|
||||
call('codejail.darklaunch.status.local', 'unexpected_error'),
|
||||
call('codejail.darklaunch.status.remote', 'ok'),
|
||||
call('codejail.darklaunch.exception.local', "BaseException('unexpected')"),
|
||||
call('codejail.darklaunch.exception.remote', None),
|
||||
call('codejail.darklaunch.globals_match', "N/A"),
|
||||
call('codejail.darklaunch.emsg_match', "N/A"),
|
||||
],
|
||||
expect_log_info_calls=[
|
||||
call(
|
||||
"Remote execution in darklaunch mode produces globals={}, "
|
||||
"emsg=None, exception=None"
|
||||
"Codejail darklaunch local results for slug=None: globals={}, "
|
||||
"emsg='unexpected', exception=BaseException('unexpected')"
|
||||
),
|
||||
call(
|
||||
"Local execution in darklaunch mode produces globals={}, "
|
||||
"emsg='unexpected', exception=BaseException('unexpected')"
|
||||
"Codejail darklaunch remote results for slug=None: globals={}, "
|
||||
"emsg=None, exception=None"
|
||||
),
|
||||
],
|
||||
expect_globals_contains={},
|
||||
@@ -295,6 +311,103 @@ class TestCodeJailDarkLaunch(unittest.TestCase):
|
||||
assert isinstance(results['raised'], BaseException)
|
||||
assert 'unexpected' in repr(results['raised'])
|
||||
|
||||
def test_emsg_mismatch(self):
|
||||
"""Test that local and remote error messages are compared."""
|
||||
def local_exec(code, globals_dict, **kwargs):
|
||||
raise SafeExecException("oops")
|
||||
|
||||
def remote_exec(data):
|
||||
return ("OH NO", SafeExecException("OH NO"))
|
||||
|
||||
results = self.run_dark_launch(
|
||||
globals_dict={}, local=local_exec, remote=remote_exec,
|
||||
expect_attr_calls=[
|
||||
*self.standard_codejail_attr_calls,
|
||||
call('codejail.darklaunch.status.local', 'safe_error'),
|
||||
call('codejail.darklaunch.status.remote', 'safe_error'),
|
||||
call('codejail.darklaunch.exception.local', None),
|
||||
call('codejail.darklaunch.exception.remote', None),
|
||||
call('codejail.darklaunch.globals_match', True),
|
||||
call('codejail.darklaunch.emsg_match', False), # mismatch revealed here
|
||||
],
|
||||
expect_log_info_calls=[
|
||||
call(
|
||||
"Codejail darklaunch local results for slug=None: globals={}, "
|
||||
"emsg='oops', exception=None"
|
||||
),
|
||||
call(
|
||||
"Codejail darklaunch remote results for slug=None: globals={}, "
|
||||
"emsg='OH NO', exception=None"
|
||||
),
|
||||
],
|
||||
expect_globals_contains={},
|
||||
)
|
||||
assert isinstance(results['raised'], SafeExecException)
|
||||
assert 'oops' in repr(results['raised'])
|
||||
|
||||
def test_ignore_sandbox_dir_mismatch(self):
|
||||
"""Mismatch due only to differences in sandbox directory should be ignored."""
|
||||
def local_exec(code, globals_dict, **kwargs):
|
||||
raise SafeExecException("stack trace involving /tmp/codejail-1234567/whatever.py")
|
||||
|
||||
def remote_exec(data):
|
||||
emsg = "stack trace involving /tmp/codejail-abcdefgh/whatever.py"
|
||||
return (emsg, SafeExecException(emsg))
|
||||
|
||||
results = self.run_dark_launch(
|
||||
globals_dict={}, local=local_exec, remote=remote_exec,
|
||||
expect_attr_calls=[
|
||||
*self.standard_codejail_attr_calls,
|
||||
call('codejail.darklaunch.status.local', 'safe_error'),
|
||||
call('codejail.darklaunch.status.remote', 'safe_error'),
|
||||
call('codejail.darklaunch.exception.local', None),
|
||||
call('codejail.darklaunch.exception.remote', None),
|
||||
call('codejail.darklaunch.globals_match', True),
|
||||
call('codejail.darklaunch.emsg_match', True), # even though not exact match
|
||||
],
|
||||
expect_log_info_calls=[
|
||||
call(
|
||||
"Codejail darklaunch local results for slug=None: globals={}, "
|
||||
"emsg='stack trace involving /tmp/codejail-1234567/whatever.py', exception=None"
|
||||
),
|
||||
call(
|
||||
"Codejail darklaunch remote results for slug=None: globals={}, "
|
||||
"emsg='stack trace involving /tmp/codejail-abcdefgh/whatever.py', exception=None"
|
||||
),
|
||||
],
|
||||
expect_globals_contains={},
|
||||
)
|
||||
assert isinstance(results['raised'], SafeExecException)
|
||||
assert 'whatever.py' in repr(results['raised'])
|
||||
|
||||
@override_settings(CODEJAIL_DARKLAUNCH_EMSG_NORMALIZERS=[
|
||||
{
|
||||
'search': r'/tmp/codejail-[0-9a-zA-Z]+',
|
||||
'replace': r'/tmp/codejail-<RAND>',
|
||||
},
|
||||
{
|
||||
'search': r'[0-9]+',
|
||||
'replace': r'<NUM>',
|
||||
},
|
||||
])
|
||||
def test_configurable_normalizers(self):
|
||||
"""We can override the normalizers, and they run in order."""
|
||||
emsg_in = "Error in /tmp/codejail-1234abcd/whatever.py: something 12 34 other"
|
||||
expect_out = "Error in /tmp/codejail-<RAND>/whatever.py: something <NUM> <NUM> other"
|
||||
assert expect_out == normalize_error_message(emsg_in)
|
||||
|
||||
@override_settings(CODEJAIL_DARKLAUNCH_EMSG_NORMALIZERS=[
|
||||
{
|
||||
'search': r'broken [',
|
||||
'replace': r'replace',
|
||||
},
|
||||
])
|
||||
@patch('xmodule.capa.safe_exec.safe_exec.record_exception')
|
||||
def test_normalizers_validate(self, mock_record_exception):
|
||||
"""Normalizers are validated, and fall back to empty list on error."""
|
||||
assert emsg_normalizers() == [] # pylint: disable=use-implicit-booleaness-not-comparison
|
||||
mock_record_exception.assert_called_once()
|
||||
|
||||
|
||||
class TestLimitConfiguration(unittest.TestCase):
|
||||
"""
|
||||
|
||||
Reference in New Issue
Block a user