Refactor the heartbeat djangoapp to be a bit more extensible
We may want to add additional heartbeat checks, particularly depending on the context (e.g. different environments). This is meant to be a start at enabling a more plug-in style heartbeat check that provides more detailed diagnostics if desired.
This commit is contained in:
121
openedx/core/djangoapps/heartbeat/default_checks.py
Normal file
121
openedx/core/djangoapps/heartbeat/default_checks.py
Normal file
@@ -0,0 +1,121 @@
|
||||
"""
|
||||
A set of built-in default checks for the platform heartbeat endpoint
|
||||
|
||||
Other checks should be included in their respective modules/djangoapps
|
||||
"""
|
||||
from datetime import datetime, timedelta
|
||||
from time import sleep, time
|
||||
|
||||
from django.conf import settings
|
||||
from django.core.cache import cache
|
||||
from django.db import connection
|
||||
from django.db.utils import DatabaseError
|
||||
from xmodule.modulestore.django import modulestore
|
||||
from xmodule.exceptions import HeartbeatFailure
|
||||
|
||||
from .defaults import HEARTBEAT_CELERY_TIMEOUT
|
||||
from .tasks import sample_task
|
||||
|
||||
|
||||
# DEFAULT SYSTEM CHECKS
|
||||
|
||||
# Modulestore
|
||||
|
||||
def check_modulestore():
|
||||
""" Check the modulestore connection
|
||||
|
||||
Returns:
|
||||
(string, Boolean, unicode): A tuple containing the name of the check, whether it succeeded, and a unicode
|
||||
string of either "OK" or the failure message
|
||||
|
||||
"""
|
||||
# This refactoring merely delegates to the default modulestore (which if it's mixed modulestore will
|
||||
# delegate to all configured modulestores) and a quick test of sql. A later refactoring may allow
|
||||
# any service to register itself as participating in the heartbeat. It's important that all implementation
|
||||
# do as little as possible but give a sound determination that they are ready.
|
||||
try:
|
||||
#@TODO Do we want to parse the output for split and mongo detail and return it?
|
||||
modulestore().heartbeat()
|
||||
return 'modulestore', True, u'OK'
|
||||
except HeartbeatFailure as fail:
|
||||
return 'modulestore', False, unicode(fail)
|
||||
|
||||
|
||||
def check_database():
|
||||
""" Check the database connection by attempting a no-op query
|
||||
|
||||
Returns:
|
||||
(string, Boolean, unicode): A tuple containing the name of the check, whether it succeeded, and a unicode
|
||||
string of either "OK" or the failure message
|
||||
|
||||
"""
|
||||
cursor = connection.cursor()
|
||||
try:
|
||||
cursor.execute("SELECT CURRENT_DATE")
|
||||
cursor.fetchone()
|
||||
return 'sql', True, u'OK'
|
||||
except DatabaseError as fail:
|
||||
return 'sql', False, unicode(fail)
|
||||
|
||||
|
||||
# Caching
|
||||
CACHE_KEY = 'heartbeat-test'
|
||||
CACHE_VALUE = 'abc123'
|
||||
|
||||
|
||||
def check_cache_set():
|
||||
""" Check setting a cache value
|
||||
|
||||
Returns:
|
||||
(string, Boolean, unicode): A tuple containing the name of the check, whether it succeeded, and a unicode
|
||||
string of either "OK" or the failure message
|
||||
|
||||
"""
|
||||
try:
|
||||
cache.set(CACHE_KEY, CACHE_VALUE, 30)
|
||||
return 'cache_set', True, u'OK'
|
||||
except Exception as fail:
|
||||
return 'cache_set', False, unicode(fail)
|
||||
|
||||
|
||||
def check_cache_get():
|
||||
""" Check getting a cache value
|
||||
|
||||
Returns:
|
||||
(string, Boolean, unicode): A tuple containing the name of the check, whether it succeeded, and a unicode
|
||||
string of either "OK" or the failure message
|
||||
|
||||
"""
|
||||
try:
|
||||
data = cache.get(CACHE_KEY)
|
||||
if data == CACHE_VALUE:
|
||||
return 'cache_get', True, u'OK'
|
||||
else:
|
||||
return 'cache_get', False, u'value check failed'
|
||||
except Exception as fail:
|
||||
return 'cache_get', False, unicode(fail)
|
||||
|
||||
|
||||
# Celery
|
||||
def check_celery():
|
||||
""" Check running a simple asynchronous celery task
|
||||
|
||||
Returns:
|
||||
(string, Boolean, unicode): A tuple containing the name of the check, whether it succeeded, and a unicode
|
||||
string of either "OK" or the failure message
|
||||
|
||||
"""
|
||||
now = time()
|
||||
datetimenow = datetime.now()
|
||||
expires = datetimenow + timedelta(seconds=getattr(settings, 'HEARTBEAT_CELERY_TIMEOUT', HEARTBEAT_CELERY_TIMEOUT))
|
||||
|
||||
try:
|
||||
task = sample_task.apply_async(expires=expires)
|
||||
while expires > datetime.now():
|
||||
if task.ready() and task.result:
|
||||
finished = str(time() - now)
|
||||
return 'celery', True, unicode({'time': finished})
|
||||
sleep(0.25)
|
||||
return 'celery', False, "expired"
|
||||
except Exception as fail:
|
||||
return 'celery', False, unicode(fail)
|
||||
17
openedx/core/djangoapps/heartbeat/defaults.py
Normal file
17
openedx/core/djangoapps/heartbeat/defaults.py
Normal file
@@ -0,0 +1,17 @@
|
||||
"""
|
||||
Configuration defaults for the heartbeat djangoapp
|
||||
|
||||
Configures what checks to run by default in normal and "extended"/heavy mode,
|
||||
as well as providing settings for the default checks themselves
|
||||
"""
|
||||
|
||||
HEARTBEAT_DEFAULT_CHECKS = [
|
||||
'.default_checks.check_modulestore',
|
||||
'.default_checks.check_database',
|
||||
]
|
||||
|
||||
HEARTBEAT_EXTENDED_DEFAULT_CHECKS = (
|
||||
'.default_checks.check_celery',
|
||||
)
|
||||
|
||||
HEARTBEAT_CELERY_TIMEOUT = 5
|
||||
41
openedx/core/djangoapps/heartbeat/runchecks.py
Normal file
41
openedx/core/djangoapps/heartbeat/runchecks.py
Normal file
@@ -0,0 +1,41 @@
|
||||
from importlib import import_module
|
||||
|
||||
from django.conf import settings
|
||||
from django.core.exceptions import ImproperlyConfigured
|
||||
|
||||
from .defaults import HEARTBEAT_DEFAULT_CHECKS, HEARTBEAT_EXTENDED_DEFAULT_CHECKS
|
||||
|
||||
|
||||
def runchecks(include_extended=False):
|
||||
"""
|
||||
Iterates through a tuple of systems checks, then returns a dictionary containing the check name as the key, and a
|
||||
dict containing a status boolean and string describing the status, including any failure or error messages
|
||||
"""
|
||||
response_dict = {}
|
||||
|
||||
#Taken straight from Django
|
||||
#If there is a better way, I don't know it
|
||||
list_of_checks = getattr(settings, 'HEARTBEAT_CHECKS', HEARTBEAT_DEFAULT_CHECKS)
|
||||
if include_extended:
|
||||
list_of_checks += getattr(settings, 'HEARTBEAT_EXTENDED_CHECKS', HEARTBEAT_EXTENDED_DEFAULT_CHECKS)
|
||||
|
||||
for path in list_of_checks:
|
||||
module, _, attr = path.rpartition('.')
|
||||
try:
|
||||
if module[0] == '.': # Relative path, assume relative to this app
|
||||
mod = import_module(module, __package__)
|
||||
else:
|
||||
mod = import_module(module)
|
||||
func = getattr(mod, attr)
|
||||
|
||||
check_name, is_ok, message = func()
|
||||
response_dict[check_name] = {
|
||||
'status': is_ok,
|
||||
'message': message
|
||||
}
|
||||
except ImportError as e:
|
||||
raise ImproperlyConfigured('Error importing module %s: "%s"' % (module, e))
|
||||
except AttributeError:
|
||||
raise ImproperlyConfigured('Module "%s" does not define a "%s" callable' % (module, attr))
|
||||
|
||||
return response_dict
|
||||
9
openedx/core/djangoapps/heartbeat/tasks.py
Normal file
9
openedx/core/djangoapps/heartbeat/tasks.py
Normal file
@@ -0,0 +1,9 @@
|
||||
"""
|
||||
A trivial task for health checks
|
||||
"""
|
||||
from celery.task import task
|
||||
|
||||
|
||||
@task()
|
||||
def sample_task():
|
||||
return True
|
||||
@@ -23,19 +23,21 @@ class HeartbeatTestCase(ModuleStoreTestCase):
|
||||
return super(HeartbeatTestCase, self).setUp()
|
||||
|
||||
def test_success(self):
|
||||
response = self.client.get(self.heartbeat_url)
|
||||
response = self.client.get(self.heartbeat_url + '?extended')
|
||||
print response
|
||||
|
||||
self.assertEqual(response.status_code, 200)
|
||||
|
||||
def test_sql_fail(self):
|
||||
with patch('openedx.core.djangoapps.heartbeat.views.connection') as mock_connection:
|
||||
with patch('openedx.core.djangoapps.heartbeat.default_checks.connection') as mock_connection:
|
||||
mock_connection.cursor.return_value.execute.side_effect = DatabaseError
|
||||
response = self.client.get(self.heartbeat_url)
|
||||
self.assertEqual(response.status_code, 503)
|
||||
response_dict = json.loads(response.content)
|
||||
self.assertIn('SQL', response_dict)
|
||||
self.assertIn('sql', response_dict)
|
||||
|
||||
def test_modulestore_fail(self):
|
||||
with patch('openedx.core.djangoapps.heartbeat.views.modulestore') as mock_modulestore:
|
||||
with patch('openedx.core.djangoapps.heartbeat.default_checks.modulestore') as mock_modulestore:
|
||||
mock_modulestore.return_value.heartbeat.side_effect = HeartbeatFailure('msg', 'service')
|
||||
response = self.client.get(self.heartbeat_url)
|
||||
self.assertEqual(response.status_code, 503)
|
||||
|
||||
@@ -1,13 +1,10 @@
|
||||
"""
|
||||
Views for verifying the health (heartbeat) of the app.
|
||||
"""
|
||||
from django.db import connection
|
||||
from django.db.utils import DatabaseError
|
||||
from dogapi import dog_stats_api
|
||||
|
||||
from util.json_request import JsonResponse
|
||||
from xmodule.exceptions import HeartbeatFailure
|
||||
from xmodule.modulestore.django import modulestore
|
||||
|
||||
from .runchecks import runchecks
|
||||
|
||||
|
||||
@dog_stats_api.timed('edxapp.heartbeat')
|
||||
@@ -17,21 +14,16 @@ def heartbeat(request): # pylint: disable=unused-argument
|
||||
of service id: status or message. If the status for any service is anything other than True,
|
||||
it returns HTTP code 503 (Service Unavailable); otherwise, it returns 200.
|
||||
"""
|
||||
# This refactoring merely delegates to the default modulestore (which if it's mixed modulestore will
|
||||
# delegate to all configured modulestores) and a quick test of sql. A later refactoring may allow
|
||||
# any service to register itself as participating in the heartbeat. It's important that all implementation
|
||||
# do as little as possible but give a sound determination that they are ready.
|
||||
check_results = {}
|
||||
try:
|
||||
output = modulestore().heartbeat()
|
||||
except HeartbeatFailure as fail:
|
||||
return JsonResponse({fail.service: unicode(fail)}, status=503)
|
||||
check_results = runchecks('extended' in request.GET)
|
||||
|
||||
cursor = connection.cursor()
|
||||
try:
|
||||
cursor.execute("SELECT CURRENT_DATE")
|
||||
cursor.fetchone()
|
||||
output['SQL'] = True
|
||||
except DatabaseError as fail:
|
||||
return JsonResponse({'SQL': unicode(fail)}, status=503)
|
||||
status_code = 200 # Default to OK
|
||||
for check in check_results:
|
||||
if not check_results[check]['status']:
|
||||
status_code = 503 # 503 on any failure
|
||||
except Exception as e:
|
||||
status_code = 503
|
||||
check_results = {'error': unicode(e)}
|
||||
|
||||
return JsonResponse(output)
|
||||
return JsonResponse(check_results, status=status_code)
|
||||
|
||||
Reference in New Issue
Block a user