diff --git a/openedx/core/djangoapps/programs/migrations/0005_programsapiconfig_max_retries.py b/openedx/core/djangoapps/programs/migrations/0005_programsapiconfig_max_retries.py new file mode 100644 index 0000000000..9af915d7b8 --- /dev/null +++ b/openedx/core/djangoapps/programs/migrations/0005_programsapiconfig_max_retries.py @@ -0,0 +1,19 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('programs', '0004_programsapiconfig_enable_certification'), + ] + + operations = [ + migrations.AddField( + model_name='programsapiconfig', + name='max_retries', + field=models.PositiveIntegerField(default=11, help_text='When making requests to award certificates, make at most this many attempts to retry a failing request.', verbose_name='Maximum Certification Retries'), + ), + ] diff --git a/openedx/core/djangoapps/programs/models.py b/openedx/core/djangoapps/programs/models.py index 069051ef51..fd880af7f3 100644 --- a/openedx/core/djangoapps/programs/models.py +++ b/openedx/core/djangoapps/programs/models.py @@ -65,6 +65,15 @@ class ProgramsApiConfig(ConfigurationModel): default=False ) + max_retries = models.PositiveIntegerField( + verbose_name=_("Maximum Certification Retries"), + default=11, # This gives about 30 minutes wait before the final attempt + help_text=_( + "When making requests to award certificates, make at most this many attempts " + "to retry a failing request." + ) + ) + @property def internal_api_url(self): """ diff --git a/openedx/core/djangoapps/programs/tasks/v1/tasks.py b/openedx/core/djangoapps/programs/tasks/v1/tasks.py index a7781bf79b..e709b0bd50 100644 --- a/openedx/core/djangoapps/programs/tasks/v1/tasks.py +++ b/openedx/core/djangoapps/programs/tasks/v1/tasks.py @@ -140,21 +140,24 @@ def award_program_certificates(self, username): """ LOGGER.info('Running task award_program_certificates for username %s', username) + config = ProgramsApiConfig.current() + countdown = 2 ** self.request.retries + # If either programs or credentials config models are disabled for this # feature, this task should not have been invoked in the first place, and # an error somewhere is likely (though a race condition is also possible). # In either case, the task should not be executed nor should it be retried. - if not ProgramsApiConfig.current().is_certification_enabled: + if not config.is_certification_enabled: LOGGER.warning( 'Task award_program_certificates cannot be executed when program certification is disabled in API config', ) - return + raise self.retry(countdown=countdown, max_retries=config.max_retries) if not CredentialsApiConfig.current().is_learner_issuance_enabled: LOGGER.warning( 'Task award_program_certificates cannot be executed when credentials issuance is disabled in API config', ) - return + raise self.retry(countdown=countdown, max_retries=config.max_retries) try: try: @@ -183,7 +186,7 @@ def award_program_certificates(self, username): # Invoke the Programs API completion check endpoint to identify any # programs that are satisfied by these course completions. - programs_client = get_api_client(ProgramsApiConfig.current(), student) + programs_client = get_api_client(config, student) program_ids = get_completed_programs(programs_client, course_certs) if not program_ids: # Again, no reason to continue beyond this point unless/until this @@ -194,13 +197,15 @@ def award_program_certificates(self, username): # awarded, if any. existing_program_ids = get_awarded_certificate_programs(student) - except Exception, exc: # pylint: disable=broad-except + except Exception as exc: # pylint: disable=broad-except LOGGER.exception('Failed to determine program certificates to be awarded for user %s', username) - raise self.retry(exc=exc) + raise self.retry(exc=exc, countdown=countdown, max_retries=config.max_retries) # For each completed program for which the student doesn't already have a # certificate, award one now. # + # This logic is important, because we will retry the whole task if awarding any particular program cert fails. + # # N.B. the list is sorted to facilitate deterministic ordering, e.g. for tests. new_program_ids = sorted(list(set(program_ids) - set(existing_program_ids))) if new_program_ids: @@ -209,15 +214,22 @@ def award_program_certificates(self, username): CredentialsApiConfig.current(), User.objects.get(username=settings.CREDENTIALS_SERVICE_USERNAME) # pylint: disable=no-member ) - except Exception, exc: # pylint: disable=broad-except + except Exception as exc: # pylint: disable=broad-except LOGGER.exception('Failed to create a credentials API client to award program certificates') # Retry because a misconfiguration could be fixed - raise self.retry(exc=exc) + raise self.retry(exc=exc, countdown=countdown, max_retries=config.max_retries) + retry = False for program_id in new_program_ids: try: award_program_certificate(credentials_client, username, program_id) LOGGER.info('Awarded certificate for program %s to user %s', program_id, username) except Exception: # pylint: disable=broad-except - # keep trying to award other certs. + # keep trying to award other certs, but retry the whole task to fix any missing entries LOGGER.exception('Failed to award certificate for program %s to user %s', program_id, username) + retry = True + + if retry: + # N.B. This logic assumes that this task is idempotent + LOGGER.info('Retrying task to award failed certificates to user %s', username) + raise self.retry(countdown=countdown, max_retries=config.max_retries) diff --git a/openedx/core/djangoapps/programs/tasks/v1/tests/test_tasks.py b/openedx/core/djangoapps/programs/tasks/v1/tests/test_tasks.py index a1ca459791..50f5dedf9b 100644 --- a/openedx/core/djangoapps/programs/tasks/v1/tests/test_tasks.py +++ b/openedx/core/djangoapps/programs/tasks/v1/tests/test_tasks.py @@ -3,13 +3,15 @@ Tests for programs celery tasks. """ import ddt -from django.conf import settings -from django.test import override_settings, TestCase -from edx_rest_api_client.client import EdxRestApiClient import httpretty import json import mock +from celery.exceptions import MaxRetriesExceededError +from django.conf import settings +from django.test import override_settings, TestCase +from edx_rest_api_client.client import EdxRestApiClient + from oauth2_provider.tests.factories import ClientFactory from openedx.core.djangoapps.credentials.tests.mixins import CredentialsApiConfigMixin from openedx.core.djangoapps.programs.tests.mixins import ProgramsApiConfigMixin @@ -258,7 +260,7 @@ class AwardProgramCertificatesTestCase(TestCase, ProgramsApiConfigMixin, Credent ('credentials', 'enable_learner_issuance'), ) @ddt.unpack - def test_abort_if_config_disabled( + def test_retry_if_config_disabled( self, disabled_config_type, disabled_config_attribute, @@ -270,7 +272,8 @@ class AwardProgramCertificatesTestCase(TestCase, ProgramsApiConfigMixin, Credent """ getattr(self, 'create_{}_config'.format(disabled_config_type))(**{disabled_config_attribute: False}) with mock.patch(TASKS_MODULE + '.LOGGER.warning') as mock_warning: - tasks.award_program_certificates.delay(self.student.username).get() + with self.assertRaises(MaxRetriesExceededError): + tasks.award_program_certificates.delay(self.student.username).get() self.assertTrue(mock_warning.called) for mock_helper in mock_helpers: self.assertFalse(mock_helper.called) @@ -337,9 +340,10 @@ class AwardProgramCertificatesTestCase(TestCase, ProgramsApiConfigMixin, Credent """ def side_effect(*_a): # pylint: disable=missing-docstring - exc = side_effects.pop(0) - if exc: - raise exc + if side_effects: + exc = side_effects.pop(0) + if exc: + raise exc return mock.DEFAULT return side_effect @@ -357,16 +361,17 @@ class AwardProgramCertificatesTestCase(TestCase, ProgramsApiConfigMixin, Credent that arise are logged also. """ mock_get_completed_programs.return_value = [1, 2] - mock_get_awarded_certificate_programs.return_value = [] + mock_get_awarded_certificate_programs.side_effect = [[], [2]] mock_award_program_certificate.side_effect = self._make_side_effect([Exception('boom'), None]) with mock.patch(TASKS_MODULE + '.LOGGER.info') as mock_info, \ mock.patch(TASKS_MODULE + '.LOGGER.exception') as mock_exception: tasks.award_program_certificates.delay(self.student.username).get() - self.assertEqual(mock_award_program_certificate.call_count, 2) + self.assertEqual(mock_award_program_certificate.call_count, 3) mock_exception.assert_called_once_with(mock.ANY, 1, self.student.username) - mock_info.assert_called_with(mock.ANY, 2, self.student.username) + mock_info.assert_any_call(mock.ANY, 1, self.student.username) + mock_info.assert_any_call(mock.ANY, 2, self.student.username) def test_retry_on_certificates_api_errors( self,