From 7895875559ec77fdb56cfacf409782cf410f7945 Mon Sep 17 00:00:00 2001 From: Joel Barciauskas Date: Sat, 8 Apr 2017 21:49:18 -0400 Subject: [PATCH] Refactor the heartbeat djangoapp to be a bit more extensible We may want to add additional heartbeat checks, particularly depending on the context (e.g. different environments). This is meant to be a start at enabling a more plug-in style heartbeat check that provides more detailed diagnostics if desired. --- .../djangoapps/heartbeat/default_checks.py | 121 ++++++++++++++++++ openedx/core/djangoapps/heartbeat/defaults.py | 17 +++ .../core/djangoapps/heartbeat/runchecks.py | 41 ++++++ openedx/core/djangoapps/heartbeat/tasks.py | 9 ++ .../heartbeat/tests/test_heartbeat.py | 10 +- openedx/core/djangoapps/heartbeat/views.py | 32 ++--- 6 files changed, 206 insertions(+), 24 deletions(-) create mode 100644 openedx/core/djangoapps/heartbeat/default_checks.py create mode 100644 openedx/core/djangoapps/heartbeat/defaults.py create mode 100644 openedx/core/djangoapps/heartbeat/runchecks.py create mode 100644 openedx/core/djangoapps/heartbeat/tasks.py diff --git a/openedx/core/djangoapps/heartbeat/default_checks.py b/openedx/core/djangoapps/heartbeat/default_checks.py new file mode 100644 index 0000000000..dde0617be6 --- /dev/null +++ b/openedx/core/djangoapps/heartbeat/default_checks.py @@ -0,0 +1,121 @@ +""" +A set of built-in default checks for the platform heartbeat endpoint + +Other checks should be included in their respective modules/djangoapps +""" +from datetime import datetime, timedelta +from time import sleep, time + +from django.conf import settings +from django.core.cache import cache +from django.db import connection +from django.db.utils import DatabaseError +from xmodule.modulestore.django import modulestore +from xmodule.exceptions import HeartbeatFailure + +from .defaults import HEARTBEAT_CELERY_TIMEOUT +from .tasks import sample_task + + +# DEFAULT SYSTEM CHECKS + +# Modulestore + +def check_modulestore(): + """ Check the modulestore connection + + Returns: + (string, Boolean, unicode): A tuple containing the name of the check, whether it succeeded, and a unicode + string of either "OK" or the failure message + + """ + # This refactoring merely delegates to the default modulestore (which if it's mixed modulestore will + # delegate to all configured modulestores) and a quick test of sql. A later refactoring may allow + # any service to register itself as participating in the heartbeat. It's important that all implementation + # do as little as possible but give a sound determination that they are ready. + try: + #@TODO Do we want to parse the output for split and mongo detail and return it? + modulestore().heartbeat() + return 'modulestore', True, u'OK' + except HeartbeatFailure as fail: + return 'modulestore', False, unicode(fail) + + +def check_database(): + """ Check the database connection by attempting a no-op query + + Returns: + (string, Boolean, unicode): A tuple containing the name of the check, whether it succeeded, and a unicode + string of either "OK" or the failure message + + """ + cursor = connection.cursor() + try: + cursor.execute("SELECT CURRENT_DATE") + cursor.fetchone() + return 'sql', True, u'OK' + except DatabaseError as fail: + return 'sql', False, unicode(fail) + + +# Caching +CACHE_KEY = 'heartbeat-test' +CACHE_VALUE = 'abc123' + + +def check_cache_set(): + """ Check setting a cache value + + Returns: + (string, Boolean, unicode): A tuple containing the name of the check, whether it succeeded, and a unicode + string of either "OK" or the failure message + + """ + try: + cache.set(CACHE_KEY, CACHE_VALUE, 30) + return 'cache_set', True, u'OK' + except Exception as fail: + return 'cache_set', False, unicode(fail) + + +def check_cache_get(): + """ Check getting a cache value + + Returns: + (string, Boolean, unicode): A tuple containing the name of the check, whether it succeeded, and a unicode + string of either "OK" or the failure message + + """ + try: + data = cache.get(CACHE_KEY) + if data == CACHE_VALUE: + return 'cache_get', True, u'OK' + else: + return 'cache_get', False, u'value check failed' + except Exception as fail: + return 'cache_get', False, unicode(fail) + + +# Celery +def check_celery(): + """ Check running a simple asynchronous celery task + + Returns: + (string, Boolean, unicode): A tuple containing the name of the check, whether it succeeded, and a unicode + string of either "OK" or the failure message + + """ + now = time() + datetimenow = datetime.now() + expires = datetimenow + timedelta(seconds=getattr(settings, 'HEARTBEAT_CELERY_TIMEOUT', HEARTBEAT_CELERY_TIMEOUT)) + + try: + task = sample_task.apply_async(expires=expires) + while expires > datetime.now(): + if task.ready() and task.result: + finished = str(time() - now) + return 'celery', True, unicode({'time': finished}) + sleep(0.25) + return 'celery', False, "expired" + except Exception as fail: + return 'celery', False, unicode(fail) diff --git a/openedx/core/djangoapps/heartbeat/defaults.py b/openedx/core/djangoapps/heartbeat/defaults.py new file mode 100644 index 0000000000..3f892d1671 --- /dev/null +++ b/openedx/core/djangoapps/heartbeat/defaults.py @@ -0,0 +1,17 @@ +""" +Configuration defaults for the heartbeat djangoapp + +Configures what checks to run by default in normal and "extended"/heavy mode, +as well as providing settings for the default checks themselves +""" + +HEARTBEAT_DEFAULT_CHECKS = [ + '.default_checks.check_modulestore', + '.default_checks.check_database', +] + +HEARTBEAT_EXTENDED_DEFAULT_CHECKS = ( + '.default_checks.check_celery', +) + +HEARTBEAT_CELERY_TIMEOUT = 5 diff --git a/openedx/core/djangoapps/heartbeat/runchecks.py b/openedx/core/djangoapps/heartbeat/runchecks.py new file mode 100644 index 0000000000..bba1876455 --- /dev/null +++ b/openedx/core/djangoapps/heartbeat/runchecks.py @@ -0,0 +1,41 @@ +from importlib import import_module + +from django.conf import settings +from django.core.exceptions import ImproperlyConfigured + +from .defaults import HEARTBEAT_DEFAULT_CHECKS, HEARTBEAT_EXTENDED_DEFAULT_CHECKS + + +def runchecks(include_extended=False): + """ + Iterates through a tuple of systems checks, then returns a dictionary containing the check name as the key, and a + dict containing a status boolean and string describing the status, including any failure or error messages + """ + response_dict = {} + + #Taken straight from Django + #If there is a better way, I don't know it + list_of_checks = getattr(settings, 'HEARTBEAT_CHECKS', HEARTBEAT_DEFAULT_CHECKS) + if include_extended: + list_of_checks += getattr(settings, 'HEARTBEAT_EXTENDED_CHECKS', HEARTBEAT_EXTENDED_DEFAULT_CHECKS) + + for path in list_of_checks: + module, _, attr = path.rpartition('.') + try: + if module[0] == '.': # Relative path, assume relative to this app + mod = import_module(module, __package__) + else: + mod = import_module(module) + func = getattr(mod, attr) + + check_name, is_ok, message = func() + response_dict[check_name] = { + 'status': is_ok, + 'message': message + } + except ImportError as e: + raise ImproperlyConfigured('Error importing module %s: "%s"' % (module, e)) + except AttributeError: + raise ImproperlyConfigured('Module "%s" does not define a "%s" callable' % (module, attr)) + + return response_dict diff --git a/openedx/core/djangoapps/heartbeat/tasks.py b/openedx/core/djangoapps/heartbeat/tasks.py new file mode 100644 index 0000000000..058eea8e02 --- /dev/null +++ b/openedx/core/djangoapps/heartbeat/tasks.py @@ -0,0 +1,9 @@ +""" +A trivial task for health checks +""" +from celery.task import task + + +@task() +def sample_task(): + return True diff --git a/openedx/core/djangoapps/heartbeat/tests/test_heartbeat.py b/openedx/core/djangoapps/heartbeat/tests/test_heartbeat.py index a7bf8341ac..e999cda7b5 100644 --- a/openedx/core/djangoapps/heartbeat/tests/test_heartbeat.py +++ b/openedx/core/djangoapps/heartbeat/tests/test_heartbeat.py @@ -23,19 +23,21 @@ class HeartbeatTestCase(ModuleStoreTestCase): return super(HeartbeatTestCase, self).setUp() def test_success(self): - response = self.client.get(self.heartbeat_url) + response = self.client.get(self.heartbeat_url + '?extended') + print response + self.assertEqual(response.status_code, 200) def test_sql_fail(self): - with patch('openedx.core.djangoapps.heartbeat.views.connection') as mock_connection: + with patch('openedx.core.djangoapps.heartbeat.default_checks.connection') as mock_connection: mock_connection.cursor.return_value.execute.side_effect = DatabaseError response = self.client.get(self.heartbeat_url) self.assertEqual(response.status_code, 503) response_dict = json.loads(response.content) - self.assertIn('SQL', response_dict) + self.assertIn('sql', response_dict) def test_modulestore_fail(self): - with patch('openedx.core.djangoapps.heartbeat.views.modulestore') as mock_modulestore: + with patch('openedx.core.djangoapps.heartbeat.default_checks.modulestore') as mock_modulestore: mock_modulestore.return_value.heartbeat.side_effect = HeartbeatFailure('msg', 'service') response = self.client.get(self.heartbeat_url) self.assertEqual(response.status_code, 503) diff --git a/openedx/core/djangoapps/heartbeat/views.py b/openedx/core/djangoapps/heartbeat/views.py index 5128618e26..3a41d7fc98 100644 --- a/openedx/core/djangoapps/heartbeat/views.py +++ b/openedx/core/djangoapps/heartbeat/views.py @@ -1,13 +1,10 @@ """ Views for verifying the health (heartbeat) of the app. """ -from django.db import connection -from django.db.utils import DatabaseError from dogapi import dog_stats_api - from util.json_request import JsonResponse -from xmodule.exceptions import HeartbeatFailure -from xmodule.modulestore.django import modulestore + +from .runchecks import runchecks @dog_stats_api.timed('edxapp.heartbeat') @@ -17,21 +14,16 @@ def heartbeat(request): # pylint: disable=unused-argument of service id: status or message. If the status for any service is anything other than True, it returns HTTP code 503 (Service Unavailable); otherwise, it returns 200. """ - # This refactoring merely delegates to the default modulestore (which if it's mixed modulestore will - # delegate to all configured modulestores) and a quick test of sql. A later refactoring may allow - # any service to register itself as participating in the heartbeat. It's important that all implementation - # do as little as possible but give a sound determination that they are ready. + check_results = {} try: - output = modulestore().heartbeat() - except HeartbeatFailure as fail: - return JsonResponse({fail.service: unicode(fail)}, status=503) + check_results = runchecks('extended' in request.GET) - cursor = connection.cursor() - try: - cursor.execute("SELECT CURRENT_DATE") - cursor.fetchone() - output['SQL'] = True - except DatabaseError as fail: - return JsonResponse({'SQL': unicode(fail)}, status=503) + status_code = 200 # Default to OK + for check in check_results: + if not check_results[check]['status']: + status_code = 503 # 503 on any failure + except Exception as e: + status_code = 503 + check_results = {'error': unicode(e)} - return JsonResponse(output) + return JsonResponse(check_results, status=status_code)