Merge pull request #26292 from edx/tuchfarber/add_consistent_logging_credentials_tasks

Make credentials celery tasks errors consistent
This commit is contained in:
Matt Tuchfarber
2021-02-08 14:40:35 -05:00
committed by GitHub
3 changed files with 214 additions and 98 deletions

View File

@@ -4,6 +4,7 @@ This file contains celery tasks for credentials-related functionality.
from celery import shared_task
from celery.exceptions import MaxRetriesExceededError
from celery.utils.log import get_task_logger
from django.conf import settings
from django.contrib.auth.models import User # lint-amnesty, pylint: disable=imported-auth-user
@@ -25,7 +26,7 @@ MAX_RETRIES = 11
@set_code_owner_attribute
def send_grade_to_credentials(self, username, course_run_key, verified, letter_grade, percent_grade):
""" Celery task to notify the Credentials IDA of a grade change via POST. """
logger.info(u'Running task send_grade_to_credentials for username %s and course %s', username, course_run_key)
logger.info(f"Running task send_grade_to_credentials for username {username} and course {course_run_key}")
countdown = 2 ** self.request.retries
course_key = CourseKey.from_string(course_run_key)
@@ -44,8 +45,12 @@ def send_grade_to_credentials(self, username, course_run_key, verified, letter_g
'verified': verified,
})
logger.info(u'Sent grade for course %s to user %s', course_run_key, username)
logger.info(f"Sent grade for course {course_run_key} to user {username}")
except Exception as exc:
logger.exception(u'Failed to send grade for course %s to user %s', course_run_key, username)
raise self.retry(exc=exc, countdown=countdown, max_retries=MAX_RETRIES)
error_msg = f"Failed to send grade for course {course_run_key} to user {username}."
logger.exception(error_msg)
exception = MaxRetriesExceededError(
f"Failed to send grade to credentials. Reason: {error_msg}"
)
raise self.retry(exc=exception, countdown=countdown, max_retries=MAX_RETRIES)

View File

@@ -144,7 +144,17 @@ def award_program_certificates(self, username): # lint-amnesty, pylint: disable
None
"""
LOGGER.info(u'Running task award_program_certificates for username %s', username)
def _retry_with_custom_exception(username, reason, countdown):
exception = MaxRetriesExceededError(
f"Failed to award program certificate for user {username}. Reason: {reason}"
)
return self.retry(
exc=exception,
countdown=countdown,
max_retries=MAX_RETRIES
)
LOGGER.info(f"Running task award_program_certificates for username {username}")
programs_without_certificates = configuration_helpers.get_value('programs_without_certificates', [])
if programs_without_certificates:
if str(programs_without_certificates[0]).lower() == "all":
@@ -158,16 +168,17 @@ def award_program_certificates(self, username): # lint-amnesty, pylint: disable
# mark this task for retry instead of failing it altogether.
if not CredentialsApiConfig.current().is_learner_issuance_enabled:
LOGGER.warning(
'Task award_program_certificates cannot be executed when credentials issuance is disabled in API config',
error_msg = (
"Task award_program_certificates cannot be executed when credentials issuance is disabled in API config"
)
raise self.retry(countdown=countdown, max_retries=MAX_RETRIES)
LOGGER.warning(error_msg)
raise _retry_with_custom_exception(username=username, reason=error_msg, countdown=countdown)
try:
try:
student = User.objects.get(username=username)
except User.DoesNotExist:
LOGGER.exception(u'Task award_program_certificates was called with invalid username %s', username)
LOGGER.exception(f"Task award_program_certificates was called with invalid username {username}")
# Don't retry for this case - just conclude the task.
return
completed_programs = {}
@@ -176,7 +187,7 @@ def award_program_certificates(self, username): # lint-amnesty, pylint: disable
if not completed_programs:
# No reason to continue beyond this point unless/until this
# task gets updated to support revocation of program certs.
LOGGER.info(u'Task award_program_certificates was called for user %s with no completed programs', username)
LOGGER.info(f"Task award_program_certificates was called for user {username} with no completed programs")
return
# Determine which program certificates the user has already been awarded, if any.
@@ -187,8 +198,9 @@ def award_program_certificates(self, username): # lint-amnesty, pylint: disable
awarded_and_skipped_program_uuids = list(set(existing_program_uuids + list(programs_without_certificates)))
except Exception as exc:
LOGGER.exception(u'Failed to determine program certificates to be awarded for user %s', username)
raise self.retry(exc=exc, countdown=countdown, max_retries=MAX_RETRIES)
error_msg = f"Failed to determine program certificates to be awarded for user {username}. {exc}"
LOGGER.exception(error_msg)
raise _retry_with_custom_exception(username=username, reason=error_msg, countdown=countdown) from exc
# For each completed program for which the student doesn't already have a
# certificate, award one now.
@@ -203,22 +215,22 @@ def award_program_certificates(self, username): # lint-amnesty, pylint: disable
User.objects.get(username=settings.CREDENTIALS_SERVICE_USERNAME),
)
except Exception as exc:
LOGGER.exception('Failed to create a credentials API client to award program certificates')
error_msg = "Failed to create a credentials API client to award program certificates"
LOGGER.exception(error_msg)
# Retry because a misconfiguration could be fixed
raise self.retry(exc=exc, countdown=countdown, max_retries=MAX_RETRIES)
raise _retry_with_custom_exception(username=username, reason=error_msg, countdown=countdown) from exc
failed_program_certificate_award_attempts = []
for program_uuid in new_program_uuids:
visible_date = completed_programs[program_uuid]
try:
LOGGER.info(u'Visible date for user %s : program %s is %s', username, program_uuid,
visible_date)
LOGGER.info(f"Visible date for user {username} : program {program_uuid} is {visible_date}")
award_program_certificate(credentials_client, username, program_uuid, visible_date)
LOGGER.info(u'Awarded certificate for program %s to user %s', program_uuid, username)
LOGGER.info(f"Awarded certificate for program {program_uuid} to user {username}")
except exceptions.HttpNotFoundError:
LOGGER.exception(
u"""Certificate for program {uuid} could not be found. Unable to award certificate to user
{username}. The program might not be configured.""".format(uuid=program_uuid, username=username)
f"Certificate for program {program_uuid} could not be found. " +
f"Unable to award certificate to user {username}. The program might not be configured."
)
except exceptions.HttpClientError as exc:
# Grab the status code from the client error, because our API
@@ -227,40 +239,42 @@ def award_program_certificates(self, username): # lint-amnesty, pylint: disable
# in edx_rest_api_client.
if exc.response.status_code == 429: # lint-amnesty, pylint: disable=no-else-raise, no-member
rate_limit_countdown = 60
LOGGER.info(
u"""Rate limited. Retrying task to award certificates to user {username} in {countdown}
seconds""".format(username=username, countdown=rate_limit_countdown)
error_msg = (
f"Rate limited. "
f"Retrying task to award certificates to user {username} in {rate_limit_countdown} seconds"
)
LOGGER.info(error_msg)
# Retry after 60 seconds, when we should be in a new throttling window
raise self.retry(exc=exc, countdown=rate_limit_countdown, max_retries=MAX_RETRIES)
raise _retry_with_custom_exception(
username=username,
reason=error_msg,
countdown=rate_limit_countdown
) from exc
else:
LOGGER.exception(
u"""Unable to award certificate to user {username} for program {uuid}. The program might not be
configured.""".format(username=username, uuid=program_uuid)
f"Unable to award certificate to user {username} for program {program_uuid}. "
"The program might not be configured."
)
except Exception: # pylint: disable=broad-except
# keep trying to award other certs, but retry the whole task to fix any missing entries
LOGGER.warning(u'Failed to award certificate for program {uuid} to user {username}.'.format(
uuid=program_uuid, username=username))
LOGGER.warning(f"Failed to award certificate for program {program_uuid} to user {username}.")
failed_program_certificate_award_attempts.append(program_uuid)
if failed_program_certificate_award_attempts:
# N.B. This logic assumes that this task is idempotent
LOGGER.info(u'Retrying task to award failed certificates to user %s', username)
LOGGER.info(f"Retrying task to award failed certificates to user {username}")
# The error message may change on each reattempt but will never be raised until
# the max number of retries have been exceeded. It is unlikely that this list
# will change by the time it reaches its maximimum number of attempts.
exception = MaxRetriesExceededError(
u"Failed to award certificate for user {} for programs {}".format(
username, failed_program_certificate_award_attempts))
raise self.retry(
exc=exception,
countdown=countdown,
max_retries=MAX_RETRIES)
error_msg = (
f"Failed to award certificate for user {username} "
f"for programs {failed_program_certificate_award_attempts}"
)
raise _retry_with_custom_exception(username=username, reason=error_msg, countdown=countdown)
else:
LOGGER.info(u'User %s is not eligible for any new program certificates', username)
LOGGER.info(f"User {username} is not eligible for any new program certificates")
LOGGER.info(u'Successfully completed the task award_program_certificates for username %s', username)
LOGGER.info(f"Successfully completed the task award_program_certificates for username {username}")
def post_course_certificate(client, username, certificate, visible_date):
@@ -291,7 +305,17 @@ def award_course_certificate(self, username, course_run_key):
This task is designed to be called whenever a student GeneratedCertificate is updated.
It can be called independently for a username and a course_run, but is invoked on each GeneratedCertificate.save.
"""
LOGGER.info(u'Running task award_course_certificate for username %s', username)
def _retry_with_custom_exception(username, course_run_key, reason, countdown):
exception = MaxRetriesExceededError(
f"Failed to revoke program certificate for user {username} for course {course_run_key}. Reason: {reason}"
)
return self.retry(
exc=exception,
countdown=countdown,
max_retries=MAX_RETRIES
)
LOGGER.info(f"Running task award_course_certificate for username {username}")
countdown = 2 ** self.request.retries
@@ -301,17 +325,23 @@ def award_course_certificate(self, username, course_run_key):
# mark this task for retry instead of failing it altogether.
if not CredentialsApiConfig.current().is_learner_issuance_enabled:
LOGGER.warning(
'Task award_course_certificate cannot be executed when credentials issuance is disabled in API config',
error_msg = (
"Task award_course_certificate cannot be executed when credentials issuance is disabled in API config"
)
LOGGER.warning(error_msg)
raise _retry_with_custom_exception(
username=username,
course_run_key=course_run_key,
reason=error_msg,
countdown=countdown
)
raise self.retry(countdown=countdown, max_retries=MAX_RETRIES)
try:
course_key = CourseKey.from_string(course_run_key)
try:
user = User.objects.get(username=username)
except User.DoesNotExist:
LOGGER.exception(u'Task award_course_certificate was called with invalid username %s', username)
LOGGER.exception(f"Task award_course_certificate was called with invalid username {username}")
# Don't retry for this case - just conclude the task.
return
# Get the cert for the course key and username if it's both passing and available in professional/verified
@@ -322,9 +352,8 @@ def award_course_certificate(self, username, course_run_key):
)
except GeneratedCertificate.DoesNotExist:
LOGGER.exception(
u'Task award_course_certificate was called without Certificate found for %s to user %s',
course_key,
username
"Task award_course_certificate was called without Certificate found "
f"for {course_key} to user {username}"
)
return
if certificate.mode in CourseMode.CERTIFICATE_RELEVANT_MODES:
@@ -332,8 +361,7 @@ def award_course_certificate(self, username, course_run_key):
course_overview = CourseOverview.get_from_id(course_key)
except (CourseOverview.DoesNotExist, IOError):
LOGGER.exception(
u'Task award_course_certificate was called without course overview data for course %s',
course_key
f"Task award_course_certificate was called without course overview data for course {course_key}"
)
return
credentials_client = get_credentials_api_client(User.objects.get(
@@ -346,10 +374,16 @@ def award_course_certificate(self, username, course_run_key):
visible_date = available_date_for_certificate(course_overview, certificate)
post_course_certificate(credentials_client, username, certificate, visible_date)
LOGGER.info(u'Awarded certificate for course %s to user %s', course_key, username)
LOGGER.info(f"Awarded certificate for course {course_key} to user {username}")
except Exception as exc:
LOGGER.exception(u'Failed to determine course certificates to be awarded for user %s', username)
raise self.retry(exc=exc, countdown=countdown, max_retries=MAX_RETRIES)
error_msg = f"Failed to determine course certificates to be awarded for user {username}."
LOGGER.exception(error_msg)
raise _retry_with_custom_exception(
username=username,
course_run_key=course_run_key,
reason=error_msg,
countdown=countdown
) from exc
def get_revokable_program_uuids(course_specific_programs, student):
@@ -418,6 +452,16 @@ def revoke_program_certificates(self, username, course_key):
None
"""
def _retry_with_custom_exception(username, course_key, reason, countdown):
exception = MaxRetriesExceededError(
f"Failed to revoke program certificate for user {username} for course {course_key}. Reason: {reason}"
)
return self.retry(
exc=exception,
countdown=countdown,
max_retries=MAX_RETRIES
)
countdown = 2 ** self.request.retries
# If the credentials config model is disabled for this
# feature, it may indicate a condition where processing of such tasks
@@ -425,15 +469,21 @@ def revoke_program_certificates(self, username, course_key):
# mark this task for retry instead of failing it altogether.
if not CredentialsApiConfig.current().is_learner_issuance_enabled:
LOGGER.warning(
'Task revoke_program_certificates cannot be executed when credentials issuance is disabled in API config',
error_msg = (
"Task revoke_program_certificates cannot be executed when credentials issuance is disabled in API config"
)
LOGGER.warning(error_msg)
raise _retry_with_custom_exception(
username=username,
course_key=course_key,
reason=error_msg,
countdown=countdown
)
raise self.retry(countdown=countdown, max_retries=MAX_RETRIES)
try:
student = User.objects.get(username=username)
except User.DoesNotExist:
LOGGER.exception(u'Task revoke_program_certificates was called with invalid username %s', username)
LOGGER.exception(f"Task revoke_program_certificates was called with invalid username {username}", username)
# Don't retry for this case - just conclude the task.
return
@@ -443,21 +493,25 @@ def revoke_program_certificates(self, username, course_key):
if not course_specific_programs:
# No reason to continue beyond this point
LOGGER.info(
u'Task revoke_program_certificates was called for user %s and course %s with no engaged programs',
username,
course_key
f"Task revoke_program_certificates was called for user {username} "
f"and course {course_key} with no engaged programs"
)
return
# Determine which program certificates the user has already been awarded, if any.
program_uuids_to_revoke = get_revokable_program_uuids(course_specific_programs, student)
except Exception as exc:
LOGGER.exception(
u'Failed to determine program certificates to be revoked for user %s with course %s',
username,
course_key
error_msg = (
f"Failed to determine program certificates to be revoked for user {username} "
f"with course {course_key}"
)
raise self.retry(exc=exc, countdown=countdown, max_retries=MAX_RETRIES)
LOGGER.exception(error_msg)
raise _retry_with_custom_exception(
username=username,
course_key=course_key,
reason=error_msg,
countdown=countdown
) from exc
if program_uuids_to_revoke:
try:
@@ -465,19 +519,20 @@ def revoke_program_certificates(self, username, course_key):
User.objects.get(username=settings.CREDENTIALS_SERVICE_USERNAME),
)
except Exception as exc:
LOGGER.exception('Failed to create a credentials API client to revoke program certificates')
error_msg = "Failed to create a credentials API client to revoke program certificates"
LOGGER.exception(error_msg)
# Retry because a misconfiguration could be fixed
raise self.retry(exc=exc, countdown=countdown, max_retries=MAX_RETRIES)
raise _retry_with_custom_exception(username, course_key, reason=exc, countdown=countdown) from exc
failed_program_certificate_revoke_attempts = []
for program_uuid in program_uuids_to_revoke:
try:
revoke_program_certificate(credentials_client, username, program_uuid)
LOGGER.info(u'Revoked certificate for program %s for user %s', program_uuid, username)
LOGGER.info(f"Revoked certificate for program {program_uuid} for user {username}")
except exceptions.HttpNotFoundError:
LOGGER.exception(
u"""Certificate for program {uuid} could not be found. Unable to revoke certificate for user
{username}.""".format(uuid=program_uuid, username=username)
f"Certificate for program {program_uuid} could not be found. "
f"Unable to revoke certificate for user {username}"
)
except exceptions.HttpClientError as exc:
# Grab the status code from the client error, because our API
@@ -486,41 +541,45 @@ def revoke_program_certificates(self, username, course_key):
# in edx_rest_api_client.
if exc.response.status_code == 429: # pylint: disable=no-member, no-else-raise
rate_limit_countdown = 60
LOGGER.info(
u"""Rate limited. Retrying task to revoke certificates for user {username} in {countdown}
seconds""".format(username=username, countdown=rate_limit_countdown)
error_msg = (
"Rate limited. "
f"Retrying task to revoke certificates for user {username} in {rate_limit_countdown} seconds"
)
LOGGER.info(error_msg)
# Retry after 60 seconds, when we should be in a new throttling window
raise self.retry(exc=exc, countdown=rate_limit_countdown, max_retries=MAX_RETRIES)
raise _retry_with_custom_exception(
username,
course_key,
reason=error_msg,
countdown=rate_limit_countdown
) from exc
else:
LOGGER.exception(
u"Unable to revoke certificate for user {username} for program {uuid}.".format(
username=username, uuid=program_uuid
)
)
LOGGER.exception(f"Unable to revoke certificate for user {username} for program {program_uuid}.")
except Exception: # pylint: disable=broad-except
# keep trying to revoke other certs, but retry the whole task to fix any missing entries
LOGGER.warning(u'Failed to revoke certificate for program {uuid} of user {username}.'.format(
uuid=program_uuid, username=username))
LOGGER.warning(f"Failed to revoke certificate for program {program_uuid} of user {username}.")
failed_program_certificate_revoke_attempts.append(program_uuid)
if failed_program_certificate_revoke_attempts:
# N.B. This logic assumes that this task is idempotent
LOGGER.info(u'Retrying task to revoke failed certificates to user %s', username)
LOGGER.info(f"Retrying task to revoke failed certificates to user {username}")
# The error message may change on each reattempt but will never be raised until
# the max number of retries have been exceeded. It is unlikely that this list
# will change by the time it reaches its maximimum number of attempts.
exception = MaxRetriesExceededError(
u"Failed to revoke certificate for user {} for programs {}".format(
username, failed_program_certificate_revoke_attempts))
raise self.retry(
exc=exception,
countdown=countdown,
max_retries=MAX_RETRIES)
else:
LOGGER.info(u'There is no program certificates for user %s to revoke', username)
error_msg = (
f"Failed to revoke certificate for user {username} "
f"for programs {failed_program_certificate_revoke_attempts}"
)
raise _retry_with_custom_exception(
username,
course_key,
reason=error_msg,
countdown=countdown
)
LOGGER.info(u'Successfully completed the task revoke_program_certificates for username %s', username)
else:
LOGGER.info(f"There is no program certificates for user {username} to revoke")
LOGGER.info(f"Successfully completed the task revoke_program_certificates for username {username}")
@shared_task(bind=True, ignore_result=True)
@@ -549,14 +608,19 @@ def update_certificate_visible_date_on_course_update(self, course_key):
# mark this task for retry instead of failing it altogether.
if not CredentialsApiConfig.current().is_learner_issuance_enabled:
LOGGER.info(
'Task update_certificate_visible_date_on_course_update cannot be executed when credentials issuance is '
'disabled in API config',
error_msg = (
"Task update_certificate_visible_date_on_course_update cannot be executed when credentials issuance is "
"disabled in API config"
)
raise self.retry(countdown=countdown, max_retries=MAX_RETRIES)
LOGGER.info(error_msg)
exception = MaxRetriesExceededError(
f"Failed to update certificate availability date for course {course_key}. Reason: {error_msg}"
)
raise self.retry(exc=exception, countdown=countdown, max_retries=MAX_RETRIES)
users_with_certificates_in_course = GeneratedCertificate.eligible_available_certificates.filter(
course_id=course_key).values_list('user__username', flat=True)
course_id=course_key
).values_list('user__username', flat=True)
for user in users_with_certificates_in_course:
award_course_certificate.delay(user, str(course_key))

View File

@@ -280,6 +280,29 @@ class AwardProgramCertificatesTestCase(CatalogIntegrationMixin, CredentialsApiCo
self.assertFalse(mock_get_certified_programs.called)
self.assertFalse(mock_award_program_certificate.called)
@mock.patch(TASKS_MODULE + '.get_credentials_api_client')
def test_failure_to_create_api_client_retries(
self,
mock_get_api_client,
mock_get_completed_programs,
mock_get_certified_programs,
mock_award_program_certificate
):
"""
Checks that we log an exception and retry if the API client isn't creating.
"""
mock_get_api_client.side_effect = Exception('boom')
mock_get_completed_programs.return_value = {1: 1, 2: 2}
mock_get_certified_programs.return_value = [2]
with mock.patch(TASKS_MODULE + '.LOGGER.exception') as mock_exception:
with self.assertRaises(MaxRetriesExceededError):
tasks.award_program_certificates.delay(self.student.username).get()
self.assertTrue(mock_exception.called)
self.assertEqual(mock_get_api_client.call_count, tasks.MAX_RETRIES + 1)
self.assertFalse(mock_award_program_certificate.called)
def _make_side_effect(self, side_effects):
"""
DRY helper. Returns a side effect function for use with mocks that
@@ -327,8 +350,8 @@ class AwardProgramCertificatesTestCase(CatalogIntegrationMixin, CredentialsApiCo
uuid=1,
username=self.student.username)
)
mock_info.assert_any_call(mock.ANY, 1, self.student.username)
mock_info.assert_any_call(mock.ANY, 2, self.student.username)
mock_info.assert_any_call(f"Awarded certificate for program {1} to user {self.student.username}")
mock_info.assert_any_call(f"Awarded certificate for program {2} to user {self.student.username}")
def test_retry_on_programs_api_errors(
self,
@@ -781,8 +804,8 @@ class RevokeProgramCertificatesTestCase(CatalogIntegrationMixin, CredentialsApiC
uuid=1,
username=self.student.username)
)
mock_info.assert_any_call(mock.ANY, 1, self.student.username)
mock_info.assert_any_call(mock.ANY, 2, self.student.username)
mock_info.assert_any_call(f"Revoked certificate for program {1} for user {self.student.username}")
mock_info.assert_any_call(f"Revoked certificate for program {2} for user {self.student.username}")
def test_retry_on_credentials_api_errors(
self,
@@ -865,3 +888,27 @@ class RevokeProgramCertificatesTestCase(CatalogIntegrationMixin, CredentialsApiC
tasks.revoke_program_certificates.delay(self.student.username, self.course_key).get()
self.assertEqual(mock_revoke_program_certificate.call_count, 2)
def test_get_api_client_failure_retries(
self,
mock_get_inverted_programs,
mock_get_certified_programs,
mock_revoke_program_certificate,
):
"""
Verify that a 404 error causes the task to fail but there is no retry.
"""
mock_get_inverted_programs.return_value = self.inverted_programs
mock_get_certified_programs.return_value = [1, 2]
with mock.patch(
TASKS_MODULE + ".get_credentials_api_client"
) as mock_get_api_client, mock.patch(
TASKS_MODULE + '.LOGGER.exception'
) as mock_exception:
mock_get_api_client.side_effect = Exception("boom")
with self.assertRaises(MaxRetriesExceededError):
tasks.revoke_program_certificates.delay(self.student.username, self.course_key).get()
self.assertTrue(mock_exception.called)
self.assertEqual(mock_get_api_client.call_count, tasks.MAX_RETRIES + 1)
self.assertFalse(mock_revoke_program_certificate.called)