Incorporate changes in max_retry logic, adding subtask_status as bulk_email arg.
This commit is contained in:
@@ -36,8 +36,8 @@ from courseware.courses import get_course_by_id, course_image_url
|
||||
from instructor_task.models import InstructorTask
|
||||
from instructor_task.subtasks import (
|
||||
update_subtask_status,
|
||||
create_subtask_result,
|
||||
increment_subtask_result,
|
||||
create_subtask_status,
|
||||
increment_subtask_status,
|
||||
update_instructor_task_for_subtasks,
|
||||
)
|
||||
|
||||
@@ -54,7 +54,7 @@ QUOTA_EXCEEDED_ERRORS = (SESDailyQuotaExceededError, )
|
||||
# Errors that mail is being sent too quickly. When caught by a task, it
|
||||
# triggers an exponential backoff and retry. Retries happen continuously until
|
||||
# the email is sent.
|
||||
SENDING_RATE_ERRORS = (SESMaxSendingRateExceededError, )
|
||||
INFINITE_RETRY_ERRORS = (SESMaxSendingRateExceededError, )
|
||||
|
||||
|
||||
def _get_recipient_queryset(user_id, to_option, course_id, course_location):
|
||||
@@ -120,6 +120,7 @@ def perform_delegate_email_batches(entry_id, course_id, task_input, action_name)
|
||||
# get inputs to use in this task from the entry:
|
||||
#task_id = entry.task_id
|
||||
user_id = entry.requester.id
|
||||
task_id = entry.task_id
|
||||
|
||||
# TODO: check this against argument passed in?
|
||||
# course_id = entry.course_id
|
||||
@@ -132,7 +133,7 @@ def perform_delegate_email_batches(entry_id, course_id, task_input, action_name)
|
||||
# is submitted and reaches this point. It is possible to add retry behavior here,
|
||||
# to keep trying until the object is actually committed by the view function's return,
|
||||
# but it's cleaner to just expect to be done.
|
||||
log.warning("Failed to get CourseEmail with id %s", email_id)
|
||||
log.warning("Task %s: Failed to get CourseEmail with id %s", task_id, email_id)
|
||||
raise
|
||||
|
||||
to_option = email_obj.to_option
|
||||
@@ -144,26 +145,25 @@ def perform_delegate_email_batches(entry_id, course_id, task_input, action_name)
|
||||
try:
|
||||
course = get_course_by_id(course_id, depth=1)
|
||||
except Http404 as exc:
|
||||
log.exception("get_course_by_id failed: %s", exc.args[0])
|
||||
log.exception("Task %s: get_course_by_id failed: %s", task_id, exc.args[0])
|
||||
raise Exception("get_course_by_id failed: " + exc.args[0])
|
||||
|
||||
global_email_context = _get_course_email_context(course)
|
||||
recipient_qset = _get_recipient_queryset(user_id, to_option, course_id, course.location)
|
||||
total_num_emails = recipient_qset.count()
|
||||
|
||||
log.info("Preparing to queue emails to %d recipient(s) for course %s, email %s, to_option %s",
|
||||
total_num_emails, course_id, email_id, to_option)
|
||||
log.info("Task %s: Preparing to queue emails to %d recipient(s) for course %s, email %s, to_option %s",
|
||||
task_id, total_num_emails, course_id, email_id, to_option)
|
||||
|
||||
num_queries = int(math.ceil(float(total_num_emails) / float(settings.EMAILS_PER_QUERY)))
|
||||
last_pk = recipient_qset[0].pk - 1
|
||||
num_workers = 0
|
||||
num_emails_queued = 0
|
||||
task_list = []
|
||||
subtask_id_list = []
|
||||
for _ in range(num_queries):
|
||||
# Note that if we were doing this for regrading we probably only need 'pk', and not
|
||||
# either profile__name or email. That's because we'll have to do
|
||||
# a lot more work in the individual regrade for each user, but using user_id as a key.
|
||||
# TODO: figure out how to pass these values as an argument, when refactoring this code.
|
||||
recipient_sublist = list(recipient_qset.order_by('pk').filter(pk__gt=last_pk)
|
||||
.values('profile__name', 'email', 'pk')[:settings.EMAILS_PER_QUERY])
|
||||
last_pk = recipient_sublist[-1]['pk']
|
||||
@@ -179,22 +179,32 @@ def perform_delegate_email_batches(entry_id, course_id, task_input, action_name)
|
||||
to_list = recipient_sublist[i * chunk:i * chunk + chunk]
|
||||
subtask_id = str(uuid4())
|
||||
subtask_id_list.append(subtask_id)
|
||||
retry_progress = create_subtask_status()
|
||||
task_list.append(send_course_email.subtask((
|
||||
entry_id,
|
||||
email_id,
|
||||
to_list,
|
||||
global_email_context,
|
||||
retry_progress,
|
||||
),
|
||||
task_id=subtask_id,
|
||||
routing_key=settings.HIGH_PRIORITY_QUEUE,
|
||||
))
|
||||
num_workers += num_tasks_this_query
|
||||
num_emails_queued += num_emails_this_query
|
||||
|
||||
# Sanity check: we expect the chunking to be properly summing to the original count:
|
||||
if num_emails_queued != total_num_emails:
|
||||
error_msg = "Task {}: number of emails generated by chunking {} not equal to original total {}".format(
|
||||
task_id, num_emails_queued, total_num_emails
|
||||
)
|
||||
log.error(error_msg)
|
||||
raise Exception(error_msg)
|
||||
|
||||
# Update the InstructorTask with information about the subtasks we've defined.
|
||||
progress = update_instructor_task_for_subtasks(entry, action_name, total_num_emails, subtask_id_list)
|
||||
num_subtasks = len(subtask_id_list)
|
||||
log.info("Preparing to queue %d email tasks for course %s, email %s, to %s",
|
||||
num_subtasks, course_id, email_id, to_option)
|
||||
log.info("Preparing to queue %d email tasks (%d emails) for course %s, email %s, to %s",
|
||||
num_subtasks, total_num_emails, course_id, email_id, to_option)
|
||||
|
||||
# now group the subtasks, and start them running:
|
||||
task_group = group(task_list)
|
||||
@@ -202,9 +212,9 @@ def perform_delegate_email_batches(entry_id, course_id, task_input, action_name)
|
||||
|
||||
# We want to return progress here, as this is what will be stored in the
|
||||
# AsyncResult for the parent task as its return value.
|
||||
# The AsyncResult will then be marked as SUCCEEDED, and have this return value as it's "result".
|
||||
# The AsyncResult will then be marked as SUCCEEDED, and have this return value as its "result".
|
||||
# That's okay, for the InstructorTask will have the "real" status, and monitoring code
|
||||
# will use that instead.
|
||||
# should be using that instead.
|
||||
return progress
|
||||
|
||||
|
||||
@@ -215,7 +225,7 @@ def _get_current_task():
|
||||
|
||||
|
||||
@task(default_retry_delay=15, max_retries=5) # pylint: disable=E1102
|
||||
def send_course_email(entry_id, email_id, to_list, global_email_context):
|
||||
def send_course_email(entry_id, email_id, to_list, global_email_context, subtask_status):
|
||||
"""
|
||||
Sends an email to a list of recipients.
|
||||
|
||||
@@ -242,19 +252,20 @@ def send_course_email(entry_id, email_id, to_list, global_email_context):
|
||||
# Get information from current task's request:
|
||||
current_task_id = _get_current_task().request.id
|
||||
num_to_send = len(to_list)
|
||||
log.info("Preparing to send %s emails as subtask %s for instructor task %d: request = %s",
|
||||
num_to_send, current_task_id, entry_id, _get_current_task().request)
|
||||
log.info("Preparing to send email %s to %d recipients as subtask %s for instructor task %d: context = %s, status=%s",
|
||||
email_id, num_to_send, current_task_id, entry_id, global_email_context, subtask_status)
|
||||
|
||||
send_exception = None
|
||||
course_email_result_value = None
|
||||
new_subtask_status = None
|
||||
try:
|
||||
course_title = global_email_context['course_title']
|
||||
with dog_stats_api.timer('course_email.single_task.time.overall', tags=[_statsd_tag(course_title)]):
|
||||
course_email_result_value, send_exception = _send_course_email(
|
||||
new_subtask_status, send_exception = _send_course_email(
|
||||
entry_id,
|
||||
email_id,
|
||||
to_list,
|
||||
global_email_context,
|
||||
subtask_status,
|
||||
)
|
||||
except Exception:
|
||||
# Unexpected exception. Try to write out the failure to the entry before failing
|
||||
@@ -264,28 +275,30 @@ def send_course_email(entry_id, email_id, to_list, global_email_context):
|
||||
# We got here for really unexpected reasons. Since we don't know how far
|
||||
# the task got in emailing, we count all recipients as having failed.
|
||||
# It at least keeps the counts consistent.
|
||||
course_email_result_value = create_subtask_result(0, num_to_send, 0)
|
||||
new_subtask_status = increment_subtask_status(subtask_status, failed=num_to_send, state=FAILURE)
|
||||
update_subtask_status(entry_id, current_task_id, new_subtask_status)
|
||||
raise send_exception
|
||||
|
||||
if send_exception is None:
|
||||
# Update the InstructorTask object that is storing its progress.
|
||||
log.info("background task (%s) succeeded", current_task_id)
|
||||
update_subtask_status(entry_id, current_task_id, SUCCESS, course_email_result_value)
|
||||
update_subtask_status(entry_id, current_task_id, new_subtask_status)
|
||||
elif isinstance(send_exception, RetryTaskError):
|
||||
# If retrying, record the progress made before the retry condition
|
||||
# was encountered. Once the retry is running, it will be only processing
|
||||
# what wasn't already accomplished.
|
||||
log.warning("background task (%s) being retried", current_task_id)
|
||||
update_subtask_status(entry_id, current_task_id, RETRY, course_email_result_value)
|
||||
update_subtask_status(entry_id, current_task_id, new_subtask_status)
|
||||
raise send_exception
|
||||
else:
|
||||
log.error("background task (%s) failed: %s", current_task_id, send_exception)
|
||||
update_subtask_status(entry_id, current_task_id, FAILURE, course_email_result_value)
|
||||
update_subtask_status(entry_id, current_task_id, new_subtask_status)
|
||||
raise send_exception
|
||||
|
||||
return course_email_result_value
|
||||
return new_subtask_status
|
||||
|
||||
|
||||
def _send_course_email(entry_id, email_id, to_list, global_email_context):
|
||||
def _send_course_email(entry_id, email_id, to_list, global_email_context, subtask_status):
|
||||
"""
|
||||
Performs the email sending task.
|
||||
|
||||
@@ -312,6 +325,8 @@ def _send_course_email(entry_id, email_id, to_list, global_email_context):
|
||||
'skipped': number of emails skipped (due to optout)
|
||||
'failed': number of emails not sent because of some failure
|
||||
|
||||
The dict may also contain information about retries.
|
||||
|
||||
* Second value is an exception returned by the innards of the method, indicating a fatal error.
|
||||
In this case, the number of recipients that were not sent have already been added to the
|
||||
'failed' count above.
|
||||
@@ -319,6 +334,8 @@ def _send_course_email(entry_id, email_id, to_list, global_email_context):
|
||||
# Get information from current task's request:
|
||||
task_id = _get_current_task().request.id
|
||||
retry_index = _get_current_task().request.retries
|
||||
|
||||
# If this is a second attempt, then throttle the speed at which mail is sent:
|
||||
throttle = retry_index > 0
|
||||
|
||||
num_optout = 0
|
||||
@@ -400,6 +417,7 @@ def _send_course_email(entry_id, email_id, to_list, global_email_context):
|
||||
|
||||
log.info('Email with id %s sent to %s', email_id, email)
|
||||
num_sent += 1
|
||||
|
||||
except SMTPDataError as exc:
|
||||
# According to SMTP spec, we'll retry error codes in the 4xx range. 5xx range indicates hard failure.
|
||||
if exc.smtp_code >= 400 and exc.smtp_code < 500:
|
||||
@@ -414,8 +432,15 @@ def _send_course_email(entry_id, email_id, to_list, global_email_context):
|
||||
# Pop the user that was emailed off the end of the list:
|
||||
to_list.pop()
|
||||
|
||||
except SENDING_RATE_ERRORS as exc:
|
||||
subtask_progress = create_subtask_result(num_sent, num_error, num_optout)
|
||||
except INFINITE_RETRY_ERRORS as exc:
|
||||
subtask_progress = increment_subtask_status(
|
||||
subtask_status,
|
||||
succeeded=num_sent,
|
||||
failed=num_error,
|
||||
skipped=num_optout,
|
||||
retriedA=1,
|
||||
state=RETRY
|
||||
)
|
||||
return _submit_for_retry(
|
||||
entry_id, email_id, to_list, global_email_context, exc, subtask_progress, True
|
||||
)
|
||||
@@ -424,7 +449,14 @@ def _send_course_email(entry_id, email_id, to_list, global_email_context):
|
||||
# Errors caught here cause the email to be retried. The entire task is actually retried
|
||||
# without popping the current recipient off of the existing list.
|
||||
# Errors caught are those that indicate a temporary condition that might succeed on retry.
|
||||
subtask_progress = create_subtask_result(num_sent, num_error, num_optout)
|
||||
subtask_progress = increment_subtask_status(
|
||||
subtask_status,
|
||||
succeeded=num_sent,
|
||||
failed=num_error,
|
||||
skipped=num_optout,
|
||||
retriedB=1,
|
||||
state=RETRY
|
||||
)
|
||||
return _submit_for_retry(
|
||||
entry_id, email_id, to_list, global_email_context, exc, subtask_progress, False
|
||||
)
|
||||
@@ -444,10 +476,24 @@ def _send_course_email(entry_id, email_id, to_list, global_email_context):
|
||||
log.exception('Task %s: email with id %d caused send_course_email task to fail with uncaught exception. To list: %s',
|
||||
task_id, email_id, [i['email'] for i in to_list])
|
||||
num_error += len(to_list)
|
||||
return create_subtask_result(num_sent, num_error, num_optout), exc
|
||||
subtask_progress = increment_subtask_status(
|
||||
subtask_status,
|
||||
succeeded=num_sent,
|
||||
failed=num_error,
|
||||
skipped=num_optout,
|
||||
state=FAILURE
|
||||
)
|
||||
return subtask_progress, exc
|
||||
else:
|
||||
# Successful completion is marked by an exception value of None:
|
||||
return create_subtask_result(num_sent, num_error, num_optout), None
|
||||
subtask_progress = increment_subtask_status(
|
||||
subtask_status,
|
||||
succeeded=num_sent,
|
||||
failed=num_error,
|
||||
skipped=num_optout,
|
||||
state=SUCCESS
|
||||
)
|
||||
return subtask_progress, None
|
||||
finally:
|
||||
# clean up at the end
|
||||
connection.close()
|
||||
@@ -476,16 +522,24 @@ def _submit_for_retry(entry_id, email_id, to_list, global_email_context, current
|
||||
task_id = _get_current_task().request.id
|
||||
retry_index = _get_current_task().request.retries
|
||||
|
||||
log.warning('Task %s: email with id %d not delivered due to temporary error %s, retrying send to %d recipients',
|
||||
task_id, email_id, current_exception, len(to_list))
|
||||
|
||||
# Don't resend emails that have already succeeded.
|
||||
# Retry the email at increasing exponential backoff.
|
||||
log.info("Task %s: Successfully sent to %s users; failed to send to %s users (and skipped %s users)",
|
||||
current_task.request.id, subtask_progress['succeeded'], subtask_progress['failed'], subtask_progress['skipped'])
|
||||
|
||||
# Calculate time until we retry this task (in seconds):
|
||||
if is_sending_rate_error:
|
||||
countdown = ((2 ** retry_index) * 15) * random.uniform(.5, 1.5)
|
||||
exp = min(retry_index, 5)
|
||||
countdown = ((2 ** exp) * 15) * random.uniform(.5, 1.25)
|
||||
exception_type = 'sending-rate'
|
||||
else:
|
||||
countdown = ((2 ** retry_index) * 15) * random.uniform(.75, 1.5)
|
||||
exception_type = 'transient'
|
||||
|
||||
# max_retries is increased by the number of times an "infinite-retry" exception
|
||||
# has been retried. We want the regular retries to trigger a retry, but not these
|
||||
# special retries. So we count them separately.
|
||||
max_retries = _get_current_task().max_retries + subtask_progress['retriedA']
|
||||
log.warning('Task %s: email with id %d not delivered due to %s error %s, retrying send to %d recipients (with max_retry=%s)',
|
||||
task_id, email_id, exception_type, current_exception, len(to_list), max_retries)
|
||||
|
||||
try:
|
||||
send_course_email.retry(
|
||||
@@ -494,9 +548,11 @@ def _submit_for_retry(entry_id, email_id, to_list, global_email_context, current
|
||||
email_id,
|
||||
to_list,
|
||||
global_email_context,
|
||||
subtask_progress,
|
||||
],
|
||||
exc=current_exception,
|
||||
countdown=countdown,
|
||||
max_retries=max_retries,
|
||||
throw=True,
|
||||
)
|
||||
except RetryTaskError as retry_error:
|
||||
@@ -513,7 +569,7 @@ def _submit_for_retry(entry_id, email_id, to_list, global_email_context, current
|
||||
log.exception('Task %s: email with id %d caused send_course_email task to fail to retry. To list: %s',
|
||||
task_id, email_id, [i['email'] for i in to_list])
|
||||
num_failed = len(to_list)
|
||||
new_subtask_progress = increment_subtask_result(subtask_progress, 0, num_failed, 0)
|
||||
new_subtask_progress = increment_subtask_status(subtask_progress, failed=num_failed, state=FAILURE)
|
||||
return new_subtask_progress, retry_exc
|
||||
|
||||
|
||||
|
||||
@@ -15,7 +15,7 @@ from student.tests.factories import UserFactory, GroupFactory, CourseEnrollmentF
|
||||
from xmodule.modulestore.tests.django_utils import ModuleStoreTestCase
|
||||
from xmodule.modulestore.tests.factories import CourseFactory
|
||||
from bulk_email.models import Optout
|
||||
from instructor_task.subtasks import create_subtask_result
|
||||
from instructor_task.subtasks import increment_subtask_status
|
||||
|
||||
STAFF_COUNT = 3
|
||||
STUDENT_COUNT = 10
|
||||
@@ -29,13 +29,13 @@ class MockCourseEmailResult(object):
|
||||
"""
|
||||
emails_sent = 0
|
||||
|
||||
def get_mock_create_subtask_result(self):
|
||||
def get_mock_increment_subtask_status(self):
|
||||
"""Wrapper for mock email function."""
|
||||
def mock_create_subtask_result(sent, failed, output, **kwargs): # pylint: disable=W0613
|
||||
def mock_increment_subtask_status(original_status, **kwargs): # pylint: disable=W0613
|
||||
"""Increments count of number of emails sent."""
|
||||
self.emails_sent += sent
|
||||
return create_subtask_result(sent, failed, output)
|
||||
return mock_create_subtask_result
|
||||
self.emails_sent += kwargs['succeeded']
|
||||
return increment_subtask_status(original_status, **kwargs)
|
||||
return mock_increment_subtask_status
|
||||
|
||||
|
||||
@override_settings(MODULESTORE=TEST_DATA_MONGO_MODULESTORE)
|
||||
@@ -244,13 +244,13 @@ class TestEmailSendFromDashboard(ModuleStoreTestCase):
|
||||
)
|
||||
|
||||
@override_settings(EMAILS_PER_TASK=3, EMAILS_PER_QUERY=7)
|
||||
@patch('bulk_email.tasks.create_subtask_result')
|
||||
@patch('bulk_email.tasks.increment_subtask_status')
|
||||
def test_chunked_queries_send_numerous_emails(self, email_mock):
|
||||
"""
|
||||
Test sending a large number of emails, to test the chunked querying
|
||||
"""
|
||||
mock_factory = MockCourseEmailResult()
|
||||
email_mock.side_effect = mock_factory.get_mock_create_subtask_result()
|
||||
email_mock.side_effect = mock_factory.get_mock_increment_subtask_status()
|
||||
added_users = []
|
||||
for _ in xrange(LARGE_NUM_EMAILS):
|
||||
user = UserFactory()
|
||||
|
||||
@@ -67,7 +67,7 @@ class TestEmailErrors(ModuleStoreTestCase):
|
||||
self.assertTrue(type(exc) == SMTPDataError)
|
||||
|
||||
@patch('bulk_email.tasks.get_connection', autospec=True)
|
||||
@patch('bulk_email.tasks.create_subtask_result')
|
||||
@patch('bulk_email.tasks.increment_subtask_status')
|
||||
@patch('bulk_email.tasks.send_course_email.retry')
|
||||
def test_data_err_fail(self, retry, result, get_conn):
|
||||
"""
|
||||
@@ -91,11 +91,11 @@ class TestEmailErrors(ModuleStoreTestCase):
|
||||
# We shouldn't retry when hitting a 5xx error
|
||||
self.assertFalse(retry.called)
|
||||
# Test that after the rejected email, the rest still successfully send
|
||||
((sent, fail, optouts), _) = result.call_args
|
||||
self.assertEquals(optouts, 0)
|
||||
((_initial_results), kwargs) = result.call_args
|
||||
self.assertEquals(kwargs['skipped'], 0)
|
||||
expectedNumFails = int((settings.EMAILS_PER_TASK + 3) / 4.0)
|
||||
self.assertEquals(fail, expectedNumFails)
|
||||
self.assertEquals(sent, settings.EMAILS_PER_TASK - expectedNumFails)
|
||||
self.assertEquals(kwargs['failed'], expectedNumFails)
|
||||
self.assertEquals(kwargs['succeeded'], settings.EMAILS_PER_TASK - expectedNumFails)
|
||||
|
||||
@patch('bulk_email.tasks.get_connection', autospec=True)
|
||||
@patch('bulk_email.tasks.send_course_email.retry')
|
||||
@@ -138,7 +138,7 @@ class TestEmailErrors(ModuleStoreTestCase):
|
||||
exc = kwargs['exc']
|
||||
self.assertTrue(type(exc) == SMTPConnectError)
|
||||
|
||||
@patch('bulk_email.tasks.create_subtask_result')
|
||||
@patch('bulk_email.tasks.increment_subtask_status')
|
||||
@patch('bulk_email.tasks.send_course_email.retry')
|
||||
@patch('bulk_email.tasks.log')
|
||||
@patch('bulk_email.tasks.get_connection', Mock(return_value=EmailTestException))
|
||||
@@ -163,12 +163,13 @@ class TestEmailErrors(ModuleStoreTestCase):
|
||||
self.assertFalse(retry.called)
|
||||
# check the results being returned
|
||||
self.assertTrue(result.called)
|
||||
((sent, fail, optouts), _) = result.call_args
|
||||
self.assertEquals(optouts, 0)
|
||||
self.assertEquals(fail, 1) # just myself
|
||||
self.assertEquals(sent, 0)
|
||||
((initial_results, ), kwargs) = result.call_args
|
||||
self.assertEquals(initial_results['skipped'], 0)
|
||||
self.assertEquals(initial_results['failed'], 0)
|
||||
self.assertEquals(initial_results['succeeded'], 0)
|
||||
self.assertEquals(kwargs['failed'], 1)
|
||||
|
||||
@patch('bulk_email.tasks.create_subtask_result')
|
||||
@patch('bulk_email.tasks.increment_subtask_status')
|
||||
@patch('bulk_email.tasks.log')
|
||||
def test_nonexist_email(self, mock_log, result):
|
||||
"""
|
||||
@@ -180,7 +181,7 @@ class TestEmailErrors(ModuleStoreTestCase):
|
||||
task_input = {"email_id": -1}
|
||||
with self.assertRaises(CourseEmail.DoesNotExist):
|
||||
perform_delegate_email_batches(entry.id, course_id, task_input, "action_name")
|
||||
((log_str, email_id), _) = mock_log.warning.call_args
|
||||
((log_str, _, email_id), _) = mock_log.warning.call_args
|
||||
self.assertTrue(mock_log.warning.called)
|
||||
self.assertIn('Failed to get CourseEmail with id', log_str)
|
||||
self.assertEqual(email_id, -1)
|
||||
@@ -198,7 +199,7 @@ class TestEmailErrors(ModuleStoreTestCase):
|
||||
task_input = {"email_id": email.id}
|
||||
with self.assertRaises(Exception):
|
||||
perform_delegate_email_batches(entry.id, course_id, task_input, "action_name")
|
||||
((log_str, _), _) = mock_log.exception.call_args
|
||||
((log_str, _, _), _) = mock_log.exception.call_args
|
||||
self.assertTrue(mock_log.exception.called)
|
||||
self.assertIn('get_course_by_id failed:', log_str)
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@ from time import time
|
||||
import json
|
||||
|
||||
from celery.utils.log import get_task_logger
|
||||
from celery.states import SUCCESS, RETRY
|
||||
from celery.states import SUCCESS, RETRY, READY_STATES
|
||||
|
||||
from django.db import transaction
|
||||
|
||||
@@ -14,29 +14,51 @@ from instructor_task.models import InstructorTask, PROGRESS, QUEUING
|
||||
TASK_LOG = get_task_logger(__name__)
|
||||
|
||||
|
||||
def create_subtask_result(num_sent, num_error, num_optout):
|
||||
def create_subtask_status(succeeded=0, failed=0, pending=0, skipped=0, retriedA=0, retriedB=0, state=None):
|
||||
"""
|
||||
Create a result of a subtask.
|
||||
Create a dict for tracking the status of a subtask.
|
||||
|
||||
Keys are: 'attempted', 'succeeded', 'skipped', 'failed'.
|
||||
Keys are: 'attempted', 'succeeded', 'skipped', 'failed', 'retried'.
|
||||
TODO: update
|
||||
Object must be JSON-serializable, so that it can be passed as an argument
|
||||
to tasks.
|
||||
|
||||
Object must be JSON-serializable.
|
||||
TODO: decide if in future we want to include specific error information
|
||||
indicating the reason for failure.
|
||||
Also, we should count up "not attempted" separately from
|
||||
attempted/failed.
|
||||
"""
|
||||
attempted = num_sent + num_error
|
||||
current_result = {'attempted': attempted, 'succeeded': num_sent, 'skipped': num_optout, 'failed': num_error}
|
||||
attempted = succeeded + failed
|
||||
current_result = {
|
||||
'attempted': attempted,
|
||||
'succeeded': succeeded,
|
||||
'pending': pending,
|
||||
'skipped': skipped,
|
||||
'failed': failed,
|
||||
'retriedA': retriedA,
|
||||
'retriedB': retriedB,
|
||||
'state': state if state is not None else QUEUING,
|
||||
}
|
||||
return current_result
|
||||
|
||||
|
||||
def increment_subtask_result(subtask_result, new_num_sent, new_num_error, new_num_optout):
|
||||
def increment_subtask_status(subtask_result, succeeded=0, failed=0, pending=0, skipped=0, retriedA=0, retriedB=0, state=None):
|
||||
"""
|
||||
Update the result of a subtask with additional results.
|
||||
|
||||
Keys are: 'attempted', 'succeeded', 'skipped', 'failed'.
|
||||
Keys are: 'attempted', 'succeeded', 'skipped', 'failed', 'retried'.
|
||||
"""
|
||||
new_result = create_subtask_result(new_num_sent, new_num_error, new_num_optout)
|
||||
# TODO: rewrite this if we have additional fields added to original subtask_result,
|
||||
# that are not part of the increment. Tradeoff on duplicating the 'attempts' logic.
|
||||
new_result = create_subtask_status(succeeded, failed, pending, skipped, retriedA, retriedB, state)
|
||||
for keyname in new_result:
|
||||
if keyname in subtask_result:
|
||||
if keyname == 'state':
|
||||
# does not get incremented. If no new value, copy old value:
|
||||
if state is None:
|
||||
new_result[keyname] = subtask_result[keyname]
|
||||
elif keyname in subtask_result:
|
||||
new_result[keyname] += subtask_result[keyname]
|
||||
|
||||
return new_result
|
||||
|
||||
|
||||
@@ -49,7 +71,7 @@ def update_instructor_task_for_subtasks(entry, action_name, total_num, subtask_i
|
||||
as is the 'duration_ms' value. A 'start_time' is stored for later duration calculations,
|
||||
and the total number of "things to do" is set, so the user can be told how much needs to be
|
||||
done overall. The `action_name` is also stored, to also help with constructing more readable
|
||||
progress messages.
|
||||
task_progress messages.
|
||||
|
||||
The InstructorTask's "subtasks" field is also initialized. This is also a JSON-serialized dict.
|
||||
Keys include 'total', 'succeeded', 'retried', 'failed', which are counters for the number of
|
||||
@@ -70,7 +92,8 @@ def update_instructor_task_for_subtasks(entry, action_name, total_num, subtask_i
|
||||
rely on the status stored in the InstructorTask object, rather than status stored in the
|
||||
corresponding AsyncResult.
|
||||
"""
|
||||
progress = {
|
||||
# TODO: also add 'pending' count here? (Even though it's total-attempted-skipped
|
||||
task_progress = {
|
||||
'action_name': action_name,
|
||||
'attempted': 0,
|
||||
'failed': 0,
|
||||
@@ -80,22 +103,33 @@ def update_instructor_task_for_subtasks(entry, action_name, total_num, subtask_i
|
||||
'duration_ms': int(0),
|
||||
'start_time': time()
|
||||
}
|
||||
entry.task_output = InstructorTask.create_output_for_success(progress)
|
||||
entry.task_output = InstructorTask.create_output_for_success(task_progress)
|
||||
entry.task_state = PROGRESS
|
||||
|
||||
# Write out the subtasks information.
|
||||
num_subtasks = len(subtask_id_list)
|
||||
subtask_status = dict.fromkeys(subtask_id_list, QUEUING)
|
||||
subtask_dict = {'total': num_subtasks, 'succeeded': 0, 'failed': 0, 'retried': 0, 'status': subtask_status}
|
||||
# using fromkeys to initialize uses a single value. we need the value
|
||||
# to be distinct, since it's now a dict:
|
||||
# subtask_status = dict.fromkeys(subtask_id_list, QUEUING)
|
||||
# TODO: may not be necessary to store initial value with all those zeroes!
|
||||
# Instead, use a placemarker....
|
||||
subtask_status = {subtask_id: create_subtask_status() for subtask_id in subtask_id_list}
|
||||
subtask_dict = {
|
||||
'total': num_subtasks,
|
||||
'succeeded': 0,
|
||||
'failed': 0,
|
||||
'retried': 0,
|
||||
'status': subtask_status
|
||||
}
|
||||
entry.subtasks = json.dumps(subtask_dict)
|
||||
|
||||
# and save the entry immediately, before any subtasks actually start work:
|
||||
entry.save_now()
|
||||
return progress
|
||||
return task_progress
|
||||
|
||||
|
||||
@transaction.commit_manually
|
||||
def update_subtask_status(entry_id, current_task_id, status, subtask_result):
|
||||
def update_subtask_status(entry_id, current_task_id, subtask_status):
|
||||
"""
|
||||
Update the status of the subtask in the parent InstructorTask object tracking its progress.
|
||||
|
||||
@@ -104,7 +138,7 @@ def update_subtask_status(entry_id, current_task_id, status, subtask_result):
|
||||
committed on completion, or rolled back on error.
|
||||
|
||||
The InstructorTask's "task_output" field is updated. This is a JSON-serialized dict.
|
||||
Accumulates values for 'attempted', 'succeeded', 'failed', 'skipped' from `subtask_result`
|
||||
Accumulates values for 'attempted', 'succeeded', 'failed', 'skipped' from `subtask_progress`
|
||||
into the corresponding values in the InstructorTask's task_output. Also updates the 'duration_ms'
|
||||
value with the current interval since the original InstructorTask started.
|
||||
|
||||
@@ -121,7 +155,7 @@ def update_subtask_status(entry_id, current_task_id, status, subtask_result):
|
||||
messages, progress made, etc.
|
||||
"""
|
||||
TASK_LOG.info("Preparing to update status for email subtask %s for instructor task %d with status %s",
|
||||
current_task_id, entry_id, subtask_result)
|
||||
current_task_id, entry_id, subtask_status)
|
||||
|
||||
try:
|
||||
entry = InstructorTask.objects.select_for_update().get(pk=entry_id)
|
||||
@@ -140,28 +174,38 @@ def update_subtask_status(entry_id, current_task_id, status, subtask_result):
|
||||
# ultimate status to be clobbered by the "earlier" updates. This
|
||||
# should not be a problem in normal (non-eager) processing.
|
||||
old_status = subtask_status[current_task_id]
|
||||
if status != RETRY or old_status == QUEUING:
|
||||
subtask_status[current_task_id] = status
|
||||
# TODO: check this logic...
|
||||
state = subtask_status['state']
|
||||
# if state != RETRY or old_status['status'] == QUEUING:
|
||||
# instead replace the status only if it's 'newer'
|
||||
# i.e. has fewer pending
|
||||
if subtask_status['pending'] <= old_status['pending']:
|
||||
subtask_status[current_task_id] = subtask_status
|
||||
|
||||
# Update the parent task progress
|
||||
task_progress = json.loads(entry.task_output)
|
||||
start_time = task_progress['start_time']
|
||||
task_progress['duration_ms'] = int((time() - start_time) * 1000)
|
||||
if subtask_result is not None:
|
||||
# change behavior so we don't update on progress now:
|
||||
# TODO: figure out if we can make this more responsive later,
|
||||
# by figuring out how to handle retries better.
|
||||
if subtask_status is not None and state in READY_STATES:
|
||||
for statname in ['attempted', 'succeeded', 'failed', 'skipped']:
|
||||
task_progress[statname] += subtask_result[statname]
|
||||
task_progress[statname] += subtask_status[statname]
|
||||
|
||||
# Figure out if we're actually done (i.e. this is the last task to complete).
|
||||
# This is easier if we just maintain a counter, rather than scanning the
|
||||
# entire subtask_status dict.
|
||||
if status == SUCCESS:
|
||||
if state == SUCCESS:
|
||||
subtask_dict['succeeded'] += 1
|
||||
elif status == RETRY:
|
||||
elif state == RETRY:
|
||||
subtask_dict['retried'] += 1
|
||||
else:
|
||||
subtask_dict['failed'] += 1
|
||||
num_remaining = subtask_dict['total'] - subtask_dict['succeeded'] - subtask_dict['failed']
|
||||
# If we're done with the last task, update the parent status to indicate that:
|
||||
# TODO: see if there was a catastrophic failure that occurred, and figure out
|
||||
# how to report that here.
|
||||
if num_remaining <= 0:
|
||||
entry.task_state = SUCCESS
|
||||
entry.subtasks = json.dumps(subtask_dict)
|
||||
|
||||
Reference in New Issue
Block a user