diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 0b424adbfc..3e0a9f4926 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -9,6 +9,8 @@ LMS: Disable data download buttons on the instructor dashboard for large courses LMS: Ported bulk emailing to the beta instructor dashboard. +LMS: Add monitoring of bulk email subtasks to display progress on instructor dash. + LMS: Refactor and clean student dashboard templates. LMS: Fix issue with CourseMode expiration dates diff --git a/lms/djangoapps/bulk_email/models.py b/lms/djangoapps/bulk_email/models.py index 9d32dbd70c..8bbca579a5 100644 --- a/lms/djangoapps/bulk_email/models.py +++ b/lms/djangoapps/bulk_email/models.py @@ -12,8 +12,9 @@ file and check it in at the same time as your model changes. To do that, """ import logging -from django.db import models +from django.db import models, transaction from django.contrib.auth.models import User +from html_to_text import html_to_text log = logging.getLogger(__name__) @@ -33,12 +34,14 @@ class Email(models.Model): class Meta: # pylint: disable=C0111 abstract = True + SEND_TO_MYSELF = 'myself' SEND_TO_STAFF = 'staff' SEND_TO_ALL = 'all' +TO_OPTIONS = [SEND_TO_MYSELF, SEND_TO_STAFF, SEND_TO_ALL] -class CourseEmail(Email, models.Model): +class CourseEmail(Email): """ Stores information for an email to a course. """ @@ -51,17 +54,66 @@ class CourseEmail(Email, models.Model): # * All: This sends an email to anyone enrolled in the course, with any role # (student, staff, or instructor) # - TO_OPTIONS = ( + TO_OPTION_CHOICES = ( (SEND_TO_MYSELF, 'Myself'), (SEND_TO_STAFF, 'Staff and instructors'), (SEND_TO_ALL, 'All') ) course_id = models.CharField(max_length=255, db_index=True) - to_option = models.CharField(max_length=64, choices=TO_OPTIONS, default=SEND_TO_MYSELF) + to_option = models.CharField(max_length=64, choices=TO_OPTION_CHOICES, default=SEND_TO_MYSELF) def __unicode__(self): return self.subject + @classmethod + def create(cls, course_id, sender, to_option, subject, html_message, text_message=None): + """ + Create an instance of CourseEmail. + + The CourseEmail.save_now method makes sure the CourseEmail entry is committed. + When called from any view that is wrapped by TransactionMiddleware, + and thus in a "commit-on-success" transaction, an autocommit buried within here + will cause any pending transaction to be committed by a successful + save here. Any future database operations will take place in a + separate transaction. + """ + # automatically generate the stripped version of the text from the HTML markup: + if text_message is None: + text_message = html_to_text(html_message) + + # perform some validation here: + if to_option not in TO_OPTIONS: + fmt = 'Course email being sent to unrecognized to_option: "{to_option}" for "{course}", subject "{subject}"' + msg = fmt.format(to_option=to_option, course=course_id, subject=subject) + raise ValueError(msg) + + # create the task, then save it immediately: + course_email = cls( + course_id=course_id, + sender=sender, + to_option=to_option, + subject=subject, + html_message=html_message, + text_message=text_message, + ) + course_email.save_now() + + return course_email + + @transaction.autocommit + def save_now(self): + """ + Writes CourseEmail immediately, ensuring the transaction is committed. + + Autocommit annotation makes sure the database entry is committed. + When called from any view that is wrapped by TransactionMiddleware, + and thus in a "commit-on-success" transaction, this autocommit here + will cause any pending transaction to be committed by a successful + save here. Any future database operations will take place in a + separate transaction. + """ + self.save() + class Optout(models.Model): """ @@ -101,7 +153,11 @@ class CourseEmailTemplate(models.Model): If one isn't stored, an exception is thrown. """ - return CourseEmailTemplate.objects.get() + try: + return CourseEmailTemplate.objects.get() + except CourseEmailTemplate.DoesNotExist: + log.exception("Attempting to fetch a non-existent course email template") + raise @staticmethod def _render(format_string, message_body, context): diff --git a/lms/djangoapps/bulk_email/tasks.py b/lms/djangoapps/bulk_email/tasks.py index 75d4a2ada0..110fe4e626 100644 --- a/lms/djangoapps/bulk_email/tasks.py +++ b/lms/djangoapps/bulk_email/tasks.py @@ -4,183 +4,486 @@ to a course. """ import math import re -import time +import random +from uuid import uuid4 +from time import sleep + +from sys import exc_info +from traceback import format_exc from dogapi import dog_stats_api -from smtplib import SMTPServerDisconnected, SMTPDataError, SMTPConnectError +from smtplib import SMTPServerDisconnected, SMTPDataError, SMTPConnectError, SMTPException +from boto.ses.exceptions import ( + SESDailyQuotaExceededError, + SESMaxSendingRateExceededError, + SESAddressBlacklistedError, + SESIllegalAddressError, + SESLocalAddressCharacterError, +) +from boto.exception import AWSConnectionError + +from celery import task, current_task, group +from celery.utils.log import get_task_logger +from celery.states import SUCCESS, FAILURE, RETRY +from celery.exceptions import RetryTaskError from django.conf import settings from django.contrib.auth.models import User, Group from django.core.mail import EmailMultiAlternatives, get_connection from django.http import Http404 -from celery import task, current_task -from celery.utils.log import get_task_logger from django.core.urlresolvers import reverse from bulk_email.models import ( CourseEmail, Optout, CourseEmailTemplate, - SEND_TO_MYSELF, SEND_TO_STAFF, SEND_TO_ALL, + SEND_TO_MYSELF, SEND_TO_ALL, TO_OPTIONS, ) from courseware.access import _course_staff_group_name, _course_instructor_group_name -from courseware.courses import get_course_by_id, course_image_url +from courseware.courses import get_course, course_image_url +from instructor_task.models import InstructorTask +from instructor_task.subtasks import ( + update_subtask_status, + create_subtask_status, + increment_subtask_status, + initialize_subtask_info, +) log = get_task_logger(__name__) -@task(default_retry_delay=10, max_retries=5) # pylint: disable=E1102 -def delegate_email_batches(email_id, user_id): +# Errors that an individual email is failing to be sent, and should just +# be treated as a fail. +SINGLE_EMAIL_FAILURE_ERRORS = (SESAddressBlacklistedError, SESIllegalAddressError, SESLocalAddressCharacterError) + +# Exceptions that, if caught, should cause the task to be re-tried. +# These errors will be caught a limited number of times before the task fails. +LIMITED_RETRY_ERRORS = (SMTPConnectError, SMTPServerDisconnected, AWSConnectionError) + +# Errors that indicate that a mailing task should be retried without limit. +# An example is if email is being sent too quickly, but may succeed if sent +# more slowly. When caught by a task, it triggers an exponential backoff and retry. +# Retries happen continuously until the email is sent. +# Note that the SMTPDataErrors here are only those within the 4xx range. +# Those not in this range (i.e. in the 5xx range) are treated as hard failures +# and thus like SINGLE_EMAIL_FAILURE_ERRORS. +INFINITE_RETRY_ERRORS = (SESMaxSendingRateExceededError, SMTPDataError) + +# Errors that are known to indicate an inability to send any more emails, +# and should therefore not be retried. For example, exceeding a quota for emails. +# Also, any SMTP errors that are not explicitly enumerated above. +BULK_EMAIL_FAILURE_ERRORS = (SESDailyQuotaExceededError, SMTPException) + + +def _get_recipient_queryset(user_id, to_option, course_id, course_location): """ - Delegates emails by querying for the list of recipients who should - get the mail, chopping up into batches of settings.EMAILS_PER_TASK size, - and queueing up worker jobs. + Returns a query set of email recipients corresponding to the requested to_option category. - Returns the number of batches (workers) kicked off. + `to_option` is either SEND_TO_MYSELF, SEND_TO_STAFF, or SEND_TO_ALL. + + Recipients who are in more than one category (e.g. enrolled in the course and are staff or self) + will be properly deduped. """ - try: - email_obj = CourseEmail.objects.get(id=email_id) - except CourseEmail.DoesNotExist as exc: - # The retry behavior here is necessary because of a race condition between the commit of the transaction - # that creates this CourseEmail row and the celery pipeline that starts this task. - # We might possibly want to move the blocking into the view function rather than have it in this task. - log.warning("Failed to get CourseEmail with id %s, retry %d", email_id, current_task.request.retries) - raise delegate_email_batches.retry(arg=[email_id, user_id], exc=exc) + if to_option not in TO_OPTIONS: + log.error("Unexpected bulk email TO_OPTION found: %s", to_option) + raise Exception("Unexpected bulk email TO_OPTION found: {0}".format(to_option)) - to_option = email_obj.to_option - course_id = email_obj.course_id + if to_option == SEND_TO_MYSELF: + recipient_qset = User.objects.filter(id=user_id) + else: + staff_grpname = _course_staff_group_name(course_location) + staff_group, _ = Group.objects.get_or_create(name=staff_grpname) + staff_qset = staff_group.user_set.all() + instructor_grpname = _course_instructor_group_name(course_location) + instructor_group, _ = Group.objects.get_or_create(name=instructor_grpname) + instructor_qset = instructor_group.user_set.all() + recipient_qset = staff_qset | instructor_qset + if to_option == SEND_TO_ALL: + # We also require students to have activated their accounts to + # provide verification that the provided email address is valid. + enrollment_qset = User.objects.filter( + is_active=True, + courseenrollment__course_id=course_id, + courseenrollment__is_active=True + ) + recipient_qset = recipient_qset | enrollment_qset + recipient_qset = recipient_qset.distinct() - try: - course = get_course_by_id(course_id, depth=1) - except Http404 as exc: - log.exception("get_course_by_id failed: %s", exc.args[0]) - raise Exception("get_course_by_id failed: " + exc.args[0]) + recipient_qset = recipient_qset.order_by('pk') + return recipient_qset + +def _get_course_email_context(course): + """ + Returns context arguments to apply to all emails, independent of recipient. + """ + course_id = course.id + course_title = course.display_name course_url = 'https://{}{}'.format( settings.SITE_NAME, reverse('course_root', kwargs={'course_id': course_id}) ) image_url = 'https://{}{}'.format(settings.SITE_NAME, course_image_url(course)) + email_context = { + 'course_title': course_title, + 'course_url': course_url, + 'course_image_url': image_url, + 'account_settings_url': 'https://{}{}'.format(settings.SITE_NAME, reverse('dashboard')), + 'platform_name': settings.PLATFORM_NAME, + } + return email_context - if to_option == SEND_TO_MYSELF: - recipient_qset = User.objects.filter(id=user_id) - elif to_option == SEND_TO_ALL or to_option == SEND_TO_STAFF: - staff_grpname = _course_staff_group_name(course.location) - staff_group, _ = Group.objects.get_or_create(name=staff_grpname) - staff_qset = staff_group.user_set.all() - instructor_grpname = _course_instructor_group_name(course.location) - instructor_group, _ = Group.objects.get_or_create(name=instructor_grpname) - instructor_qset = instructor_group.user_set.all() - recipient_qset = staff_qset | instructor_qset - if to_option == SEND_TO_ALL: - enrollment_qset = User.objects.filter(courseenrollment__course_id=course_id, - courseenrollment__is_active=True) - recipient_qset = recipient_qset | enrollment_qset - recipient_qset = recipient_qset.distinct() - else: - log.error("Unexpected bulk email TO_OPTION found: %s", to_option) - raise Exception("Unexpected bulk email TO_OPTION found: {0}".format(to_option)) +def _generate_subtasks(create_subtask_fcn, recipient_qset): + """ + Generates a list of subtasks to send email to a given set of recipients. - recipient_qset = recipient_qset.order_by('pk') + Arguments: + `create_subtask_fcn` : a function whose inputs are a list of recipients and a subtask_id + to assign to the new subtask. Returns the subtask that will send email to that + list of recipients. + `recipient_qset` : a query set that defines the recipients who should receive emails. + + Returns: a tuple, containing: + + * A list of subtasks that will send emails to all recipients. + * A list of subtask_ids corresponding to those subtasks. + * A count of the total number of emails being sent. + + """ total_num_emails = recipient_qset.count() - num_queries = int(math.ceil(float(total_num_emails) / float(settings.EMAILS_PER_QUERY))) + num_queries = int(math.ceil(float(total_num_emails) / float(settings.BULK_EMAIL_EMAILS_PER_QUERY))) last_pk = recipient_qset[0].pk - 1 - num_workers = 0 + num_emails_queued = 0 + task_list = [] + subtask_id_list = [] for _ in range(num_queries): - recipient_sublist = list(recipient_qset.order_by('pk').filter(pk__gt=last_pk) - .values('profile__name', 'email', 'pk')[:settings.EMAILS_PER_QUERY]) + recipient_sublist = list(recipient_qset.order_by('pk').filter(pk__gt=last_pk).values('profile__name', 'email', 'pk')[:settings.BULK_EMAIL_EMAILS_PER_QUERY]) last_pk = recipient_sublist[-1]['pk'] num_emails_this_query = len(recipient_sublist) - num_tasks_this_query = int(math.ceil(float(num_emails_this_query) / float(settings.EMAILS_PER_TASK))) + num_tasks_this_query = int(math.ceil(float(num_emails_this_query) / float(settings.BULK_EMAIL_EMAILS_PER_TASK))) chunk = int(math.ceil(float(num_emails_this_query) / float(num_tasks_this_query))) for i in range(num_tasks_this_query): to_list = recipient_sublist[i * chunk:i * chunk + chunk] - course_email.delay( - email_id, - to_list, - course.display_name, - course_url, - image_url, - False - ) - num_workers += num_tasks_this_query - return num_workers + subtask_id = str(uuid4()) + subtask_id_list.append(subtask_id) + new_subtask = create_subtask_fcn(to_list, subtask_id) + task_list.append(new_subtask) + + num_emails_queued += num_emails_this_query + + # Sanity check: we expect the chunking to be properly summing to the original count: + if num_emails_queued != total_num_emails: + error_msg = "Task {}: number of emails generated by chunking {} not equal to original total {}".format(num_emails_queued, total_num_emails) + log.error(error_msg) + raise ValueError(error_msg) + + return task_list, subtask_id_list, total_num_emails -@task(default_retry_delay=15, max_retries=5) # pylint: disable=E1102 -def course_email(email_id, to_list, course_title, course_url, image_url, throttle=False): +def perform_delegate_email_batches(entry_id, course_id, task_input, action_name): """ - Takes a primary id for a CourseEmail object and a 'to_list' of recipient objects--keys are - 'profile__name', 'email' (address), and 'pk' (in the user table). - course_title, course_url, and image_url are to memoize course properties and save lookups. + Delegates emails by querying for the list of recipients who should + get the mail, chopping up into batches of settings.BULK_EMAIL_EMAILS_PER_TASK size, + and queueing up worker jobs. - Sends to all addresses contained in to_list. Emails are sent multi-part, in both plain - text and html. + Returns the number of batches (workers) kicked off. """ - with dog_stats_api.timer('course_email.single_task.time.overall', tags=[_statsd_tag(course_title)]): - _send_course_email(email_id, to_list, course_title, course_url, image_url, throttle) + entry = InstructorTask.objects.get(pk=entry_id) + # Get inputs to use in this task from the entry. + user_id = entry.requester.id + task_id = entry.task_id -def _send_course_email(email_id, to_list, course_title, course_url, image_url, throttle): - """ - Performs the email sending task. - """ + # Perfunctory check, since expansion is made for convenience of other task + # code that doesn't need the entry_id. + if course_id != entry.course_id: + format_msg = "Course id conflict: explicit value {} does not match task value {}" + raise ValueError(format_msg.format(course_id, entry.course_id)) + + # Fetch the CourseEmail. + email_id = task_input['email_id'] try: - msg = CourseEmail.objects.get(id=email_id) + email_obj = CourseEmail.objects.get(id=email_id) except CourseEmail.DoesNotExist: - log.exception("Could not find email id:{} to send.".format(email_id)) + # The CourseEmail object should be committed in the view function before the task + # is submitted and reaches this point. + log.warning("Task %s: Failed to get CourseEmail with id %s", task_id, email_id) raise - # exclude optouts - optouts = (Optout.objects.filter(course_id=msg.course_id, - user__in=[i['pk'] for i in to_list]) - .values_list('user__email', flat=True)) + # Sanity check that course for email_obj matches that of the task referencing it. + if course_id != email_obj.course_id: + format_msg = "Course id conflict: explicit value {} does not match email value {}" + raise ValueError(format_msg.format(course_id, email_obj.course_id)) + # Fetch the course object. + try: + course = get_course(course_id) + except ValueError: + log.exception("Task %s: course not found: %s", task_id, course_id) + raise + + to_option = email_obj.to_option + recipient_qset = _get_recipient_queryset(user_id, to_option, course_id, course.location) + global_email_context = _get_course_email_context(course) + + def _create_send_email_subtask(to_list, subtask_id): + """Creates a subtask to send email to a given recipient list.""" + subtask_status = create_subtask_status(subtask_id) + new_subtask = send_course_email.subtask( + ( + entry_id, + email_id, + to_list, + global_email_context, + subtask_status, + ), + task_id=subtask_id, + routing_key=settings.BULK_EMAIL_ROUTING_KEY, + ) + return new_subtask + + log.info("Task %s: Preparing to generate subtasks for course %s, email %s, to_option %s", + task_id, course_id, email_id, to_option) + task_list, subtask_id_list, total_num_emails = _generate_subtasks(_create_send_email_subtask, recipient_qset) + + # Update the InstructorTask with information about the subtasks we've defined. + log.info("Task %s: Preparing to update task for sending %d emails for course %s, email %s, to_option %s", + task_id, total_num_emails, course_id, email_id, to_option) + progress = initialize_subtask_info(entry, action_name, total_num_emails, subtask_id_list) + num_subtasks = len(subtask_id_list) + + # Now group the subtasks, and start them running. This allows all the subtasks + # in the list to be submitted at the same time. + log.info("Task %s: Preparing to queue %d email tasks (%d emails) for course %s, email %s, to %s", + task_id, num_subtasks, total_num_emails, course_id, email_id, to_option) + task_group = group(task_list) + task_group.apply_async(routing_key=settings.BULK_EMAIL_ROUTING_KEY) + + # We want to return progress here, as this is what will be stored in the + # AsyncResult for the parent task as its return value. + # The AsyncResult will then be marked as SUCCEEDED, and have this return value as its "result". + # That's okay, for the InstructorTask will have the "real" status, and monitoring code + # should be using that instead. + return progress + + +@task(default_retry_delay=settings.BULK_EMAIL_DEFAULT_RETRY_DELAY, max_retries=settings.BULK_EMAIL_MAX_RETRIES) # pylint: disable=E1102 +def send_course_email(entry_id, email_id, to_list, global_email_context, subtask_status): + """ + Sends an email to a list of recipients. + + Inputs are: + * `entry_id`: id of the InstructorTask object to which progress should be recorded. + * `email_id`: id of the CourseEmail model that is to be emailed. + * `to_list`: list of recipients. Each is represented as a dict with the following keys: + - 'profile__name': full name of User. + - 'email': email address of User. + - 'pk': primary key of User model. + * `global_email_context`: dict containing values that are unique for this email but the same + for all recipients of this email. This dict is to be used to fill in slots in email + template. It does not include 'name' and 'email', which will be provided by the to_list. + * `subtask_status` : dict containing values representing current status. Keys are: + + 'task_id' : id of subtask. This is used to pass task information across retries. + 'attempted' : number of attempts -- should equal succeeded plus failed + 'succeeded' : number that succeeded in processing + 'skipped' : number that were not processed. + 'failed' : number that failed during processing + 'retried_nomax' : number of times the subtask has been retried for conditions that + should not have a maximum count applied + 'retried_withmax' : number of times the subtask has been retried for conditions that + should have a maximum count applied + 'state' : celery state of the subtask (e.g. QUEUING, PROGRESS, RETRY, FAILURE, SUCCESS) + + Most values will be zero on initial call, but may be different when the task is + invoked as part of a retry. + + Sends to all addresses contained in to_list that are not also in the Optout table. + Emails are sent multi-part, in both plain text and html. Updates InstructorTask object + with status information (sends, failures, skips) and updates number of subtasks completed. + """ + # Get entry here, as a sanity check that it actually exists. We won't actually do anything + # with it right away, but we also don't expect it to fail. + InstructorTask.objects.get(pk=entry_id) + + current_task_id = subtask_status['task_id'] + num_to_send = len(to_list) + log.info("Preparing to send email %s to %d recipients as subtask %s for instructor task %d: context = %s, status=%s", + email_id, num_to_send, current_task_id, entry_id, global_email_context, subtask_status) + + send_exception = None + new_subtask_status = None + try: + course_title = global_email_context['course_title'] + with dog_stats_api.timer('course_email.single_task.time.overall', tags=[_statsd_tag(course_title)]): + new_subtask_status, send_exception = _send_course_email( + entry_id, + email_id, + to_list, + global_email_context, + subtask_status, + ) + except Exception: + # Unexpected exception. Try to write out the failure to the entry before failing. + log.exception("Send-email task %s: failed unexpectedly!", current_task_id) + # We got here for really unexpected reasons. Since we don't know how far + # the task got in emailing, we count all recipients as having failed. + # It at least keeps the counts consistent. + new_subtask_status = increment_subtask_status(subtask_status, failed=num_to_send, state=FAILURE) + update_subtask_status(entry_id, current_task_id, new_subtask_status) + raise + + if send_exception is None: + # Update the InstructorTask object that is storing its progress. + log.info("Send-email task %s: succeeded", current_task_id) + update_subtask_status(entry_id, current_task_id, new_subtask_status) + elif isinstance(send_exception, RetryTaskError): + # If retrying, a RetryTaskError needs to be returned to Celery. + # We assume that the the progress made before the retry condition + # was encountered has already been updated before the retry call was made, + # so we only log here. + log.warning("Send-email task %s: being retried", current_task_id) + raise send_exception # pylint: disable=E0702 + else: + log.error("Send-email task %s: failed: %s", current_task_id, send_exception) + update_subtask_status(entry_id, current_task_id, new_subtask_status) + raise send_exception # pylint: disable=E0702 + + log.info("Send-email task %s: returning status %s", current_task_id, new_subtask_status) + return new_subtask_status + + +def _filter_optouts_from_recipients(to_list, course_id): + """ + Filters a recipient list based on student opt-outs for a given course. + + Returns the filtered recipient list, as well as the number of optouts + removed from the list. + """ + optouts = Optout.objects.filter( + course_id=course_id, + user__in=[i['pk'] for i in to_list] + ).values_list('user__email', flat=True) optouts = set(optouts) + # Only count the num_optout for the first time the optouts are calculated. + # We assume that the number will not change on retries, and so we don't need + # to calculate it each time. num_optout = len(optouts) - to_list = [recipient for recipient in to_list if recipient['email'] not in optouts] + return to_list, num_optout - subject = "[" + course_title + "] " + msg.subject +def _get_source_address(course_id, course_title): + """ + Calculates an email address to be used as the 'from-address' for sent emails. + + Makes a unique from name and address for each course, e.g. + + "COURSE_TITLE" Course Staff + + """ course_title_no_quotes = re.sub(r'"', '', course_title) - course_num = msg.course_id.split('/')[1] # course_id = 'org/course_num/run' - # Substitute a '_' anywhere a non-(ascii, period, or dash) character appears. + + # The course_id is assumed to be in the form 'org/course_num/run', + # so pull out the course_num. Then make sure that it can be used + # in an email address, by substituting a '_' anywhere a non-(ascii, period, or dash) + # character appears. + course_num = course_id.split('/')[1] INVALID_CHARS = re.compile(r"[^\w.-]") course_num = INVALID_CHARS.sub('_', course_num) - # Make a unique from name and address for each course, eg - # "COURSE_TITLE" Course Staff - from_addr = '"{0}" Course Staff <{1}-{2}>'.format( - course_title_no_quotes, course_num, settings.DEFAULT_BULK_FROM_EMAIL - ) + from_addr = '"{0}" Course Staff <{1}-{2}>'.format(course_title_no_quotes, course_num, settings.BULK_EMAIL_DEFAULT_FROM_EMAIL) + return from_addr + + +def _send_course_email(entry_id, email_id, to_list, global_email_context, subtask_status): + """ + Performs the email sending task. + + Sends an email to a list of recipients. + + Inputs are: + * `entry_id`: id of the InstructorTask object to which progress should be recorded. + * `email_id`: id of the CourseEmail model that is to be emailed. + * `to_list`: list of recipients. Each is represented as a dict with the following keys: + - 'profile__name': full name of User. + - 'email': email address of User. + - 'pk': primary key of User model. + * `global_email_context`: dict containing values that are unique for this email but the same + for all recipients of this email. This dict is to be used to fill in slots in email + template. It does not include 'name' and 'email', which will be provided by the to_list. + * `subtask_status` : dict containing values representing current status. Keys are: + + 'task_id' : id of subtask. This is used to pass task information across retries. + 'attempted' : number of attempts -- should equal succeeded plus failed + 'succeeded' : number that succeeded in processing + 'skipped' : number that were not processed. + 'failed' : number that failed during processing + 'retried_nomax' : number of times the subtask has been retried for conditions that + should not have a maximum count applied + 'retried_withmax' : number of times the subtask has been retried for conditions that + should have a maximum count applied + 'state' : celery state of the subtask (e.g. QUEUING, PROGRESS, RETRY, FAILURE, SUCCESS) + + Sends to all addresses contained in to_list that are not also in the Optout table. + Emails are sent multi-part, in both plain text and html. + + Returns a tuple of two values: + * First value is a dict which represents current progress at the end of this call. Keys are + the same as for the input subtask_status. + + * Second value is an exception returned by the innards of the method, indicating a fatal error. + In this case, the number of recipients that were not sent have already been added to the + 'failed' count above. + """ + # Get information from current task's request: + task_id = subtask_status['task_id'] + + # collect stats on progress: + num_optout = 0 + num_sent = 0 + num_error = 0 + + try: + course_email = CourseEmail.objects.get(id=email_id) + except CourseEmail.DoesNotExist as exc: + log.exception("Task %s: could not find email id:%s to send.", task_id, email_id) + raise + + # Exclude optouts (if not a retry): + # Note that we don't have to do the optout logic at all if this is a retry, + # because we have presumably already performed the optout logic on the first + # attempt. Anyone on the to_list on a retry has already passed the filter + # that existed at that time, and we don't need to keep checking for changes + # in the Optout list. + if (subtask_status['retried_nomax'] + subtask_status['retried_withmax']) == 0: + to_list, num_optout = _filter_optouts_from_recipients(to_list, course_email.course_id) + + course_title = global_email_context['course_title'] + subject = "[" + course_title + "] " + course_email.subject + from_addr = _get_source_address(course_email.course_id, course_title) course_email_template = CourseEmailTemplate.get_template() - try: connection = get_connection() connection.open() - num_sent = 0 - num_error = 0 # Define context values to use in all course emails: - email_context = { - 'name': '', - 'email': '', - 'course_title': course_title, - 'course_url': course_url, - 'course_image_url': image_url, - 'account_settings_url': 'https://{}{}'.format(settings.SITE_NAME, reverse('dashboard')), - 'platform_name': settings.PLATFORM_NAME, - } + email_context = {'name': '', 'email': ''} + email_context.update(global_email_context) while to_list: - # Update context with user-specific values: - email = to_list[-1]['email'] + # Update context with user-specific values from the user at the end of the list. + # At the end of processing this user, they will be popped off of the to_list. + # That way, the to_list will always contain the recipients remaining to be emailed. + # This is convenient for retries, which will need to send to those who haven't + # yet been emailed, but not send to those who have already been sent to. + current_recipient = to_list[-1] + email = current_recipient['email'] email_context['email'] = email - email_context['name'] = to_list[-1]['profile__name'] + email_context['name'] = current_recipient['profile__name'] # Construct message content using templates and context: - plaintext_msg = course_email_template.render_plaintext(msg.text_message, email_context) - html_msg = course_email_template.render_htmltext(msg.html_message, email_context) + plaintext_msg = course_email_template.render_plaintext(course_email.text_message, email_context) + html_msg = course_email_template.render_htmltext(course_email.html_message, email_context) # Create email: email_msg = EmailMultiAlternatives( @@ -192,66 +495,247 @@ def _send_course_email(email_id, to_list, course_title, course_url, image_url, t ) email_msg.attach_alternative(html_msg, 'text/html') - # Throttle if we tried a few times and got the rate limiter - if throttle or current_task.request.retries > 0: - time.sleep(0.2) + # Throttle if we have gotten the rate limiter. This is not very high-tech, + # but if a task has been retried for rate-limiting reasons, then we sleep + # for a period of time between all emails within this task. Choice of + # the value depends on the number of workers that might be sending email in + # parallel, and what the SES throttle rate is. + if subtask_status['retried_nomax'] > 0: + sleep(settings.BULK_EMAIL_RETRY_DELAY_BETWEEN_SENDS) try: + log.debug('Email with id %s to be sent to %s', email_id, email) + with dog_stats_api.timer('course_email.single_send.time.overall', tags=[_statsd_tag(course_title)]): connection.send_messages([email_msg]) - dog_stats_api.increment('course_email.sent', tags=[_statsd_tag(course_title)]) - - log.info('Email with id %s sent to %s', email_id, email) - num_sent += 1 except SMTPDataError as exc: - # According to SMTP spec, we'll retry error codes in the 4xx range. 5xx range indicates hard failure + # According to SMTP spec, we'll retry error codes in the 4xx range. 5xx range indicates hard failure. if exc.smtp_code >= 400 and exc.smtp_code < 500: - # This will cause the outer handler to catch the exception and retry the entire task + # This will cause the outer handler to catch the exception and retry the entire task. raise exc else: - # This will fall through and not retry the message, since it will be popped - log.warning('Email with id %s not delivered to %s due to error %s', email_id, email, exc.smtp_error) - + # This will fall through and not retry the message. + log.warning('Task %s: email with id %s not delivered to %s due to error %s', task_id, email_id, email, exc.smtp_error) dog_stats_api.increment('course_email.error', tags=[_statsd_tag(course_title)]) - num_error += 1 + except SINGLE_EMAIL_FAILURE_ERRORS as exc: + # This will fall through and not retry the message. + log.warning('Task %s: email with id %s not delivered to %s due to error %s', task_id, email_id, email, exc) + dog_stats_api.increment('course_email.error', tags=[_statsd_tag(course_title)]) + num_error += 1 + + else: + dog_stats_api.increment('course_email.sent', tags=[_statsd_tag(course_title)]) + if settings.BULK_EMAIL_LOG_SENT_EMAILS: + log.info('Email with id %s sent to %s', email_id, email) + else: + log.debug('Email with id %s sent to %s', email_id, email) + num_sent += 1 + + # Pop the user that was emailed off the end of the list only once they have + # successfully been processed. (That way, if there were a failure that + # needed to be retried, the user is still on the list.) to_list.pop() - connection.close() - return course_email_result(num_sent, num_error, num_optout) + except INFINITE_RETRY_ERRORS as exc: + dog_stats_api.increment('course_email.infinite_retry', tags=[_statsd_tag(course_title)]) + # Increment the "retried_nomax" counter, update other counters with progress to date, + # and set the state to RETRY: + subtask_progress = increment_subtask_status( + subtask_status, + succeeded=num_sent, + failed=num_error, + skipped=num_optout, + retried_nomax=1, + state=RETRY + ) + return _submit_for_retry( + entry_id, email_id, to_list, global_email_context, exc, subtask_progress, skip_retry_max=True + ) - except (SMTPDataError, SMTPConnectError, SMTPServerDisconnected) as exc: - # Error caught here cause the email to be retried. The entire task is actually retried without popping the list - # Reasoning is that all of these errors may be temporary condition. - log.warning('Email with id %d not delivered due to temporary error %s, retrying send to %d recipients', - email_id, exc, len(to_list)) - raise course_email.retry( - arg=[ + except LIMITED_RETRY_ERRORS as exc: + # Errors caught here cause the email to be retried. The entire task is actually retried + # without popping the current recipient off of the existing list. + # Errors caught are those that indicate a temporary condition that might succeed on retry. + dog_stats_api.increment('course_email.limited_retry', tags=[_statsd_tag(course_title)]) + # Increment the "retried_withmax" counter, update other counters with progress to date, + # and set the state to RETRY: + subtask_progress = increment_subtask_status( + subtask_status, + succeeded=num_sent, + failed=num_error, + skipped=num_optout, + retried_withmax=1, + state=RETRY + ) + return _submit_for_retry( + entry_id, email_id, to_list, global_email_context, exc, subtask_progress, skip_retry_max=False + ) + + except BULK_EMAIL_FAILURE_ERRORS as exc: + dog_stats_api.increment('course_email.error', tags=[_statsd_tag(course_title)]) + num_pending = len(to_list) + log.exception('Task %s: email with id %d caused send_course_email task to fail with "fatal" exception. %d emails unsent.', + task_id, email_id, num_pending) + # Update counters with progress to date, counting unsent emails as failures, + # and set the state to FAILURE: + subtask_progress = increment_subtask_status( + subtask_status, + succeeded=num_sent, + failed=(num_error + num_pending), + skipped=num_optout, + state=FAILURE + ) + return subtask_progress, exc + + except Exception as exc: + # Errors caught here cause the email to be retried. The entire task is actually retried + # without popping the current recipient off of the existing list. + # These are unexpected errors. Since they might be due to a temporary condition that might + # succeed on retry, we give them a retry. + dog_stats_api.increment('course_email.limited_retry', tags=[_statsd_tag(course_title)]) + log.exception('Task %s: email with id %d caused send_course_email task to fail with unexpected exception. Generating retry.', + task_id, email_id) + # Increment the "retried_withmax" counter, update other counters with progress to date, + # and set the state to RETRY: + subtask_progress = increment_subtask_status( + subtask_status, + succeeded=num_sent, + failed=num_error, + skipped=num_optout, + retried_withmax=1, + state=RETRY + ) + return _submit_for_retry( + entry_id, email_id, to_list, global_email_context, exc, subtask_progress, skip_retry_max=False + ) + + else: + # All went well. Update counters with progress to date, + # and set the state to SUCCESS: + subtask_progress = increment_subtask_status( + subtask_status, + succeeded=num_sent, + failed=num_error, + skipped=num_optout, + state=SUCCESS + ) + # Successful completion is marked by an exception value of None. + return subtask_progress, None + finally: + # Clean up at the end. + connection.close() + + +def _get_current_task(): + """ + Stub to make it easier to test without actually running Celery. + + This is a wrapper around celery.current_task, which provides access + to the top of the stack of Celery's tasks. When running tests, however, + it doesn't seem to work to mock current_task directly, so this wrapper + is used to provide a hook to mock in tests, while providing the real + `current_task` in production. + """ + return current_task + + +def _submit_for_retry(entry_id, email_id, to_list, global_email_context, current_exception, subtask_status, skip_retry_max=False): + """ + Helper function to requeue a task for retry, using the new version of arguments provided. + + Inputs are the same as for running a task, plus two extra indicating the state at the time of retry. + These include the `current_exception` that the task encountered that is causing the retry attempt, + and the `subtask_status` that is to be returned. A third extra argument `skip_retry_max` + indicates whether the current retry should be subject to a maximum test. + + Returns a tuple of two values: + * First value is a dict which represents current progress. Keys are: + + 'task_id' : id of subtask. This is used to pass task information across retries. + 'attempted' : number of attempts -- should equal succeeded plus failed + 'succeeded' : number that succeeded in processing + 'skipped' : number that were not processed. + 'failed' : number that failed during processing + 'retried_nomax' : number of times the subtask has been retried for conditions that + should not have a maximum count applied + 'retried_withmax' : number of times the subtask has been retried for conditions that + should have a maximum count applied + 'state' : celery state of the subtask (e.g. QUEUING, PROGRESS, RETRY, FAILURE, SUCCESS) + + * Second value is an exception returned by the innards of the method. If the retry was + successfully submitted, this value will be the RetryTaskError that retry() returns. + Otherwise, it (ought to be) the current_exception passed in. + """ + task_id = subtask_status['task_id'] + log.info("Task %s: Successfully sent to %s users; failed to send to %s users (and skipped %s users)", + task_id, subtask_status['succeeded'], subtask_status['failed'], subtask_status['skipped']) + + # Calculate time until we retry this task (in seconds): + # The value for max_retries is increased by the number of times an "infinite-retry" exception + # has been retried. We want the regular retries to trigger max-retry checking, but not these + # special retries. So we count them separately. + max_retries = _get_current_task().max_retries + subtask_status['retried_nomax'] + base_delay = _get_current_task().default_retry_delay + if skip_retry_max: + # once we reach five retries, don't increase the countdown further. + retry_index = min(subtask_status['retried_nomax'], 5) + exception_type = 'sending-rate' + # if we have a cap, after all, apply it now: + if hasattr(settings, 'BULK_EMAIL_INFINITE_RETRY_CAP'): + retry_cap = settings.BULK_EMAIL_INFINITE_RETRY_CAP + subtask_status['retried_withmax'] + max_retries = min(max_retries, retry_cap) + else: + retry_index = subtask_status['retried_withmax'] + exception_type = 'transient' + + # Skew the new countdown value by a random factor, so that not all + # retries are deferred by the same amount. + countdown = ((2 ** retry_index) * base_delay) * random.uniform(.75, 1.25) + + log.warning('Task %s: email with id %d not delivered due to %s error %s, retrying send to %d recipients in %s seconds (with max_retry=%s)', + task_id, email_id, exception_type, current_exception, len(to_list), countdown, max_retries) + + # we make sure that we update the InstructorTask with the current subtask status + # *before* actually calling retry(), to be sure that there is no race + # condition between this update and the update made by the retried task. + update_subtask_status(entry_id, task_id, subtask_status) + + # Now attempt the retry. If it succeeds, it returns a RetryTaskError that + # needs to be returned back to Celery. If it fails, we return the existing + # exception. + try: + send_course_email.retry( + args=[ + entry_id, email_id, to_list, - course_title, - course_url, - image_url, - current_task.request.retries > 0 + global_email_context, + subtask_status, ], - exc=exc, - countdown=(2 ** current_task.request.retries) * 15 + exc=current_exception, + countdown=countdown, + max_retries=max_retries, + throw=True, ) - except: - log.exception('Email with id %d caused course_email task to fail with uncaught exception. To list: %s', - email_id, - [i['email'] for i in to_list]) - # Close the connection before we exit - connection.close() - raise - - -# This string format code is wrapped in this function to allow mocking for a unit test -def course_email_result(num_sent, num_error, num_optout): - """Return the formatted result of course_email sending.""" - return "Sent {0}, Fail {1}, Optout {2}".format(num_sent, num_error, num_optout) + except RetryTaskError as retry_error: + # If the retry call is successful, update with the current progress: + log.exception('Task %s: email with id %d caused send_course_email task to retry.', + task_id, email_id) + return subtask_status, retry_error + except Exception as retry_exc: + # If there are no more retries, because the maximum has been reached, + # we expect the original exception to be raised. We catch it here + # (and put it in retry_exc just in case it's different, but it shouldn't be), + # and update status as if it were any other failure. That means that + # the recipients still in the to_list are counted as failures. + log.exception('Task %s: email with id %d caused send_course_email task to fail to retry. To list: %s', + task_id, email_id, [i['email'] for i in to_list]) + num_failed = len(to_list) + new_subtask_progress = increment_subtask_status(subtask_status, failed=num_failed, state=FAILURE) + return new_subtask_progress, retry_exc def _statsd_tag(course_title): diff --git a/lms/djangoapps/bulk_email/tests/test_email.py b/lms/djangoapps/bulk_email/tests/test_email.py index dab7812763..80fc692a4a 100644 --- a/lms/djangoapps/bulk_email/tests/test_email.py +++ b/lms/djangoapps/bulk_email/tests/test_email.py @@ -2,6 +2,8 @@ """ Unit tests for sending course email """ +from mock import patch + from django.conf import settings from django.core import mail from django.core.urlresolvers import reverse @@ -12,11 +14,8 @@ from courseware.tests.tests import TEST_DATA_MONGO_MODULESTORE from student.tests.factories import UserFactory, GroupFactory, CourseEnrollmentFactory from xmodule.modulestore.tests.django_utils import ModuleStoreTestCase from xmodule.modulestore.tests.factories import CourseFactory - -from bulk_email.tasks import delegate_email_batches, course_email -from bulk_email.models import CourseEmail, Optout - -from mock import patch +from bulk_email.models import Optout +from instructor_task.subtasks import increment_subtask_status STAFF_COUNT = 3 STUDENT_COUNT = 10 @@ -30,13 +29,13 @@ class MockCourseEmailResult(object): """ emails_sent = 0 - def get_mock_course_email_result(self): + def get_mock_increment_subtask_status(self): """Wrapper for mock email function.""" - def mock_course_email_result(sent, failed, output, **kwargs): # pylint: disable=W0613 + def mock_increment_subtask_status(original_status, **kwargs): # pylint: disable=W0613 """Increments count of number of emails sent.""" - self.emails_sent += sent - return True - return mock_course_email_result + self.emails_sent += kwargs.get('succeeded', 0) + return increment_subtask_status(original_status, **kwargs) + return mock_increment_subtask_status @override_settings(MODULESTORE=TEST_DATA_MONGO_MODULESTORE) @@ -244,14 +243,14 @@ class TestEmailSendFromDashboard(ModuleStoreTestCase): [self.instructor.email] + [s.email for s in self.staff] + [s.email for s in self.students] ) - @override_settings(EMAILS_PER_TASK=3, EMAILS_PER_QUERY=7) - @patch('bulk_email.tasks.course_email_result') + @override_settings(BULK_EMAIL_EMAILS_PER_TASK=3, BULK_EMAIL_EMAILS_PER_QUERY=7) + @patch('bulk_email.tasks.increment_subtask_status') def test_chunked_queries_send_numerous_emails(self, email_mock): """ Test sending a large number of emails, to test the chunked querying """ mock_factory = MockCourseEmailResult() - email_mock.side_effect = mock_factory.get_mock_course_email_result() + email_mock.side_effect = mock_factory.get_mock_increment_subtask_status() added_users = [] for _ in xrange(LARGE_NUM_EMAILS): user = UserFactory() @@ -281,14 +280,3 @@ class TestEmailSendFromDashboard(ModuleStoreTestCase): [s.email for s in self.students] + [s.email for s in added_users if s not in optouts]) self.assertItemsEqual(outbox_contents, should_send_contents) - - -@override_settings(MODULESTORE=TEST_DATA_MONGO_MODULESTORE) -class TestEmailSendExceptions(ModuleStoreTestCase): - """ - Test that exceptions are handled correctly. - """ - def test_no_course_email_obj(self): - # Make sure course_email handles CourseEmail.DoesNotExist exception. - with self.assertRaises(CourseEmail.DoesNotExist): - course_email(101, [], "_", "_", "_", False) diff --git a/lms/djangoapps/bulk_email/tests/test_err_handling.py b/lms/djangoapps/bulk_email/tests/test_err_handling.py index 99d91eab3f..9d03c020e6 100644 --- a/lms/djangoapps/bulk_email/tests/test_err_handling.py +++ b/lms/djangoapps/bulk_email/tests/test_err_handling.py @@ -2,21 +2,24 @@ Unit tests for handling email sending errors """ from itertools import cycle +from mock import patch +from smtplib import SMTPDataError, SMTPServerDisconnected, SMTPConnectError + from django.test.utils import override_settings from django.conf import settings from django.core.management import call_command from django.core.urlresolvers import reverse + from courseware.tests.tests import TEST_DATA_MONGO_MODULESTORE from xmodule.modulestore.tests.django_utils import ModuleStoreTestCase from xmodule.modulestore.tests.factories import CourseFactory from student.tests.factories import UserFactory, AdminFactory, CourseEnrollmentFactory -from bulk_email.models import CourseEmail -from bulk_email.tasks import delegate_email_batches - -from mock import patch, Mock -from smtplib import SMTPDataError, SMTPServerDisconnected, SMTPConnectError +from bulk_email.models import CourseEmail, SEND_TO_ALL +from bulk_email.tasks import perform_delegate_email_batches, send_course_email +from instructor_task.models import InstructorTask +from instructor_task.subtasks import create_subtask_status class EmailTestException(Exception): @@ -43,7 +46,7 @@ class TestEmailErrors(ModuleStoreTestCase): patch.stopall() @patch('bulk_email.tasks.get_connection', autospec=True) - @patch('bulk_email.tasks.course_email.retry') + @patch('bulk_email.tasks.send_course_email.retry') def test_data_err_retry(self, retry, get_conn): """ Test that celery handles transient SMTPDataErrors by retrying. @@ -64,15 +67,16 @@ class TestEmailErrors(ModuleStoreTestCase): self.assertIsInstance(exc, SMTPDataError) @patch('bulk_email.tasks.get_connection', autospec=True) - @patch('bulk_email.tasks.course_email_result') - @patch('bulk_email.tasks.course_email.retry') + @patch('bulk_email.tasks.increment_subtask_status') + @patch('bulk_email.tasks.send_course_email.retry') def test_data_err_fail(self, retry, result, get_conn): """ Test that celery handles permanent SMTPDataErrors by failing and not retrying. """ + # have every fourth email fail due to blacklisting: get_conn.return_value.send_messages.side_effect = cycle([SMTPDataError(554, "Email address is blacklisted"), - None]) - students = [UserFactory() for _ in xrange(settings.EMAILS_PER_TASK)] + None, None, None]) + students = [UserFactory() for _ in xrange(settings.BULK_EMAIL_EMAILS_PER_TASK)] for student in students: CourseEnrollmentFactory.create(user=student, course_id=self.course.id) @@ -87,13 +91,14 @@ class TestEmailErrors(ModuleStoreTestCase): # We shouldn't retry when hitting a 5xx error self.assertFalse(retry.called) # Test that after the rejected email, the rest still successfully send - ((sent, fail, optouts), _) = result.call_args - self.assertEquals(optouts, 0) - self.assertEquals(fail, settings.EMAILS_PER_TASK / 2) - self.assertEquals(sent, settings.EMAILS_PER_TASK / 2) + ((_initial_results), kwargs) = result.call_args + self.assertEquals(kwargs['skipped'], 0) + expected_fails = int((settings.BULK_EMAIL_EMAILS_PER_TASK + 3) / 4.0) + self.assertEquals(kwargs['failed'], expected_fails) + self.assertEquals(kwargs['succeeded'], settings.BULK_EMAIL_EMAILS_PER_TASK - expected_fails) @patch('bulk_email.tasks.get_connection', autospec=True) - @patch('bulk_email.tasks.course_email.retry') + @patch('bulk_email.tasks.send_course_email.retry') def test_disconn_err_retry(self, retry, get_conn): """ Test that celery handles SMTPServerDisconnected by retrying. @@ -113,7 +118,7 @@ class TestEmailErrors(ModuleStoreTestCase): self.assertIsInstance(exc, SMTPServerDisconnected) @patch('bulk_email.tasks.get_connection', autospec=True) - @patch('bulk_email.tasks.course_email.retry') + @patch('bulk_email.tasks.send_course_email.retry') def test_conn_err_retry(self, retry, get_conn): """ Test that celery handles SMTPConnectError by retrying. @@ -133,67 +138,80 @@ class TestEmailErrors(ModuleStoreTestCase): exc = kwargs['exc'] self.assertIsInstance(exc, SMTPConnectError) - @patch('bulk_email.tasks.course_email_result') - @patch('bulk_email.tasks.course_email.retry') + @patch('bulk_email.tasks.increment_subtask_status') @patch('bulk_email.tasks.log') - @patch('bulk_email.tasks.get_connection', Mock(return_value=EmailTestException)) - def test_general_exception(self, mock_log, retry, result): - """ - Tests the if the error is not SMTP-related, we log and reraise - """ - test_email = { - 'action': 'Send email', - 'to_option': 'myself', - 'subject': 'test subject for myself', - 'message': 'test message for myself' - } - # For some reason (probably the weirdness of testing with celery tasks) assertRaises doesn't work here - # so we assert on the arguments of log.exception - self.client.post(self.url, test_email) - ((log_str, email_id, to_list), _) = mock_log.exception.call_args - self.assertTrue(mock_log.exception.called) - self.assertIn('caused course_email task to fail with uncaught exception.', log_str) - self.assertEqual(email_id, 1) - self.assertEqual(to_list, [self.instructor.email]) - self.assertFalse(retry.called) - self.assertFalse(result.called) - - @patch('bulk_email.tasks.course_email_result') - @patch('bulk_email.tasks.delegate_email_batches.retry') - @patch('bulk_email.tasks.log') - def test_nonexist_email(self, mock_log, retry, result): + def test_nonexistent_email(self, mock_log, result): """ Tests retries when the email doesn't exist """ - delegate_email_batches.delay(-1, self.instructor.id) - ((log_str, email_id, _num_retries), _) = mock_log.warning.call_args + # create an InstructorTask object to pass through + course_id = self.course.id + entry = InstructorTask.create(course_id, "task_type", "task_key", "task_input", self.instructor) + task_input = {"email_id": -1} + with self.assertRaises(CourseEmail.DoesNotExist): + perform_delegate_email_batches(entry.id, course_id, task_input, "action_name") # pylint: disable=E1101 + ((log_str, _, email_id), _) = mock_log.warning.call_args self.assertTrue(mock_log.warning.called) self.assertIn('Failed to get CourseEmail with id', log_str) self.assertEqual(email_id, -1) - self.assertTrue(retry.called) self.assertFalse(result.called) - @patch('bulk_email.tasks.log') - def test_nonexist_course(self, mock_log): + def test_nonexistent_course(self): """ Tests exception when the course in the email doesn't exist """ - email = CourseEmail(course_id="I/DONT/EXIST") + course_id = "I/DONT/EXIST" + email = CourseEmail(course_id=course_id) email.save() - delegate_email_batches.delay(email.id, self.instructor.id) - ((log_str, _), _) = mock_log.exception.call_args - self.assertTrue(mock_log.exception.called) - self.assertIn('get_course_by_id failed:', log_str) + entry = InstructorTask.create(course_id, "task_type", "task_key", "task_input", self.instructor) + task_input = {"email_id": email.id} # pylint: disable=E1101 + with self.assertRaisesRegexp(ValueError, "Course not found"): + perform_delegate_email_batches(entry.id, course_id, task_input, "action_name") # pylint: disable=E1101 - @patch('bulk_email.tasks.log') - def test_nonexist_to_option(self, mock_log): + def test_nonexistent_to_option(self): """ Tests exception when the to_option in the email doesn't exist """ email = CourseEmail(course_id=self.course.id, to_option="IDONTEXIST") email.save() - delegate_email_batches.delay(email.id, self.instructor.id) - ((log_str, opt_str), _) = mock_log.error.call_args - self.assertTrue(mock_log.error.called) - self.assertIn('Unexpected bulk email TO_OPTION found', log_str) - self.assertEqual("IDONTEXIST", opt_str) + entry = InstructorTask.create(self.course.id, "task_type", "task_key", "task_input", self.instructor) + task_input = {"email_id": email.id} # pylint: disable=E1101 + with self.assertRaisesRegexp(Exception, 'Unexpected bulk email TO_OPTION found: IDONTEXIST'): + perform_delegate_email_batches(entry.id, self.course.id, task_input, "action_name") # pylint: disable=E1101 + + def test_wrong_course_id_in_task(self): + """ + Tests exception when the course_id in task is not the same as one explicitly passed in. + """ + email = CourseEmail(course_id=self.course.id, to_option=SEND_TO_ALL) + email.save() + entry = InstructorTask.create("bogus_task_id", "task_type", "task_key", "task_input", self.instructor) + task_input = {"email_id": email.id} # pylint: disable=E1101 + with self.assertRaisesRegexp(ValueError, 'does not match task value'): + perform_delegate_email_batches(entry.id, self.course.id, task_input, "action_name") # pylint: disable=E1101 + + def test_wrong_course_id_in_email(self): + """ + Tests exception when the course_id in CourseEmail is not the same as one explicitly passed in. + """ + email = CourseEmail(course_id="bogus_course_id", to_option=SEND_TO_ALL) + email.save() + entry = InstructorTask.create(self.course.id, "task_type", "task_key", "task_input", self.instructor) + task_input = {"email_id": email.id} # pylint: disable=E1101 + with self.assertRaisesRegexp(ValueError, 'does not match email value'): + perform_delegate_email_batches(entry.id, self.course.id, task_input, "action_name") # pylint: disable=E1101 + + def test_send_email_undefined_email(self): + # test at a lower level, to ensure that the course gets checked down below too. + entry = InstructorTask.create(self.course.id, "task_type", "task_key", "task_input", self.instructor) + entry_id = entry.id # pylint: disable=E1101 + to_list = ['test@test.com'] + global_email_context = {'course_title': 'dummy course'} + subtask_id = "subtask-id-value" + subtask_status = create_subtask_status(subtask_id) + bogus_email_id = 1001 + with self.assertRaises(CourseEmail.DoesNotExist): + # we skip the call that updates subtask status, since we've not set up the InstructorTask + # for the subtask, and it's not important to the test. + with patch('bulk_email.tasks.update_subtask_status'): + send_course_email(entry_id, bogus_email_id, to_list, global_email_context, subtask_status) diff --git a/lms/djangoapps/bulk_email/tests/test_models.py b/lms/djangoapps/bulk_email/tests/test_models.py new file mode 100644 index 0000000000..737bc36845 --- /dev/null +++ b/lms/djangoapps/bulk_email/tests/test_models.py @@ -0,0 +1,101 @@ +""" +Unit tests for bulk-email-related models. +""" +from django.test import TestCase +from django.core.management import call_command + +from student.tests.factories import UserFactory + +from bulk_email.models import CourseEmail, SEND_TO_STAFF, CourseEmailTemplate + + +class CourseEmailTest(TestCase): + """Test the CourseEmail model.""" + + def test_creation(self): + course_id = 'abc/123/doremi' + sender = UserFactory.create() + to_option = SEND_TO_STAFF + subject = "dummy subject" + html_message = "dummy message" + email = CourseEmail.create(course_id, sender, to_option, subject, html_message) + self.assertEquals(email.course_id, course_id) + self.assertEquals(email.to_option, SEND_TO_STAFF) + self.assertEquals(email.subject, subject) + self.assertEquals(email.html_message, html_message) + self.assertEquals(email.sender, sender) + + def test_bad_to_option(self): + course_id = 'abc/123/doremi' + sender = UserFactory.create() + to_option = "fake" + subject = "dummy subject" + html_message = "dummy message" + with self.assertRaises(ValueError): + CourseEmail.create(course_id, sender, to_option, subject, html_message) + + +class NoCourseEmailTemplateTest(TestCase): + """Test the CourseEmailTemplate model without loading the template data.""" + + def test_get_missing_template(self): + with self.assertRaises(CourseEmailTemplate.DoesNotExist): + CourseEmailTemplate.get_template() + + +class CourseEmailTemplateTest(TestCase): + """Test the CourseEmailTemplate model.""" + + def setUp(self): + # load initial content (since we don't run migrations as part of tests): + call_command("loaddata", "course_email_template.json") + + def _get_sample_plain_context(self): + """Provide sample context sufficient for rendering plaintext template""" + context = { + 'course_title': "Bogus Course Title", + 'course_url': "/location/of/course/url", + 'account_settings_url': "/location/of/account/settings/url", + 'platform_name': 'edX', + 'email': 'your-email@test.com', + } + return context + + def _get_sample_html_context(self): + """Provide sample context sufficient for rendering HTML template""" + context = self._get_sample_plain_context() + context['course_image_url'] = "/location/of/course/image/url" + return context + + def test_get_template(self): + template = CourseEmailTemplate.get_template() + self.assertIsNotNone(template.html_template) + self.assertIsNotNone(template.plain_template) + + def test_render_html_without_context(self): + template = CourseEmailTemplate.get_template() + base_context = self._get_sample_html_context() + for keyname in base_context: + context = dict(base_context) + del context[keyname] + with self.assertRaises(KeyError): + template.render_htmltext("My new html text.", context) + + def test_render_plaintext_without_context(self): + template = CourseEmailTemplate.get_template() + base_context = self._get_sample_plain_context() + for keyname in base_context: + context = dict(base_context) + del context[keyname] + with self.assertRaises(KeyError): + template.render_plaintext("My new plain text.", context) + + def test_render_html(self): + template = CourseEmailTemplate.get_template() + context = self._get_sample_html_context() + template.render_htmltext("My new html text.", context) + + def test_render_plain(self): + template = CourseEmailTemplate.get_template() + context = self._get_sample_plain_context() + template.render_plaintext("My new plain text.", context) diff --git a/lms/djangoapps/bulk_email/tests/test_tasks.py b/lms/djangoapps/bulk_email/tests/test_tasks.py new file mode 100644 index 0000000000..fadb4122b5 --- /dev/null +++ b/lms/djangoapps/bulk_email/tests/test_tasks.py @@ -0,0 +1,380 @@ +""" +Unit tests for LMS instructor-initiated background tasks. + +Runs tasks on answers to course problems to validate that code +paths actually work. + +""" +import json +from uuid import uuid4 +from itertools import cycle, chain, repeat +from mock import patch, Mock +from smtplib import SMTPServerDisconnected, SMTPDataError, SMTPConnectError, SMTPAuthenticationError +from boto.ses.exceptions import ( + SESDailyQuotaExceededError, + SESMaxSendingRateExceededError, + SESAddressBlacklistedError, + SESIllegalAddressError, + SESLocalAddressCharacterError, +) +from boto.exception import AWSConnectionError + +from celery.states import SUCCESS, FAILURE + +from django.conf import settings +from django.core.management import call_command + +from bulk_email.models import CourseEmail, Optout, SEND_TO_ALL + +from instructor_task.tasks import send_bulk_course_email +from instructor_task.subtasks import update_subtask_status +from instructor_task.models import InstructorTask +from instructor_task.tests.test_base import InstructorTaskCourseTestCase +from instructor_task.tests.factories import InstructorTaskFactory + + +class TestTaskFailure(Exception): + """Dummy exception used for unit tests.""" + pass + + +def my_update_subtask_status(entry_id, current_task_id, new_subtask_status): + """ + Check whether a subtask has been updated before really updating. + + Check whether a subtask which has been retried + has had the retry already write its results here before the code + that was invoking the retry had a chance to update this status. + + This is the norm in "eager" mode (used by tests) where the retry is called + and run to completion before control is returned to the code that + invoked the retry. If the retries eventually end in failure (e.g. due to + a maximum number of retries being attempted), the "eager" code will return + the error for each retry as it is popped off the stack. We want to just ignore + the later updates that are called as the result of the earlier retries. + + This should not be an issue in production, where status is updated before + a task is retried, and is then updated afterwards if the retry fails. + """ + entry = InstructorTask.objects.get(pk=entry_id) + subtask_dict = json.loads(entry.subtasks) + subtask_status_info = subtask_dict['status'] + current_subtask_status = subtask_status_info[current_task_id] + + def _get_retry_count(subtask_result): + """Return the number of retries counted for the given subtask.""" + retry_count = subtask_result.get('retried_nomax', 0) + retry_count += subtask_result.get('retried_withmax', 0) + return retry_count + + current_retry_count = _get_retry_count(current_subtask_status) + new_retry_count = _get_retry_count(new_subtask_status) + if current_retry_count <= new_retry_count: + update_subtask_status(entry_id, current_task_id, new_subtask_status) + + +class TestBulkEmailInstructorTask(InstructorTaskCourseTestCase): + """Tests instructor task that send bulk email.""" + + def setUp(self): + super(TestBulkEmailInstructorTask, self).setUp() + self.initialize_course() + self.instructor = self.create_instructor('instructor') + + # load initial content (since we don't run migrations as part of tests): + call_command("loaddata", "course_email_template.json") + + def _create_input_entry(self, course_id=None): + """ + Creates a InstructorTask entry for testing. + + Overrides the base class version in that this creates CourseEmail. + """ + to_option = SEND_TO_ALL + course_id = course_id or self.course.id + course_email = CourseEmail.create(course_id, self.instructor, to_option, "Test Subject", "

This is a test message

") + task_input = {'email_id': course_email.id} # pylint: disable=E1101 + task_id = str(uuid4()) + instructor_task = InstructorTaskFactory.create( + course_id=course_id, + requester=self.instructor, + task_input=json.dumps(task_input), + task_key='dummy value', + task_id=task_id, + ) + return instructor_task + + def _run_task_with_mock_celery(self, task_class, entry_id, task_id): + """Submit a task and mock how celery provides a current_task.""" + mock_current_task = Mock() + mock_current_task.max_retries = settings.BULK_EMAIL_MAX_RETRIES + mock_current_task.default_retry_delay = settings.BULK_EMAIL_DEFAULT_RETRY_DELAY + task_args = [entry_id, {}] + + with patch('bulk_email.tasks._get_current_task') as mock_get_task: + mock_get_task.return_value = mock_current_task + return task_class.apply(task_args, task_id=task_id).get() + + def test_email_missing_current_task(self): + task_entry = self._create_input_entry() + with self.assertRaises(ValueError): + send_bulk_course_email(task_entry.id, {}) + + def test_email_undefined_course(self): + # Check that we fail when passing in a course that doesn't exist. + task_entry = self._create_input_entry(course_id="bogus/course/id") + with self.assertRaises(ValueError): + self._run_task_with_mock_celery(send_bulk_course_email, task_entry.id, task_entry.task_id) + + def test_bad_task_id_on_update(self): + task_entry = self._create_input_entry() + + def dummy_update_subtask_status(entry_id, _current_task_id, new_subtask_status): + """Passes a bad value for task_id to test update_subtask_status""" + bogus_task_id = "this-is-bogus" + update_subtask_status(entry_id, bogus_task_id, new_subtask_status) + + with self.assertRaises(ValueError): + with patch('bulk_email.tasks.update_subtask_status', dummy_update_subtask_status): + send_bulk_course_email(task_entry.id, {}) # pylint: disable=E1101 + + def _create_students(self, num_students): + """Create students for testing""" + return [self.create_student('robot%d' % i) for i in xrange(num_students)] + + def _assert_single_subtask_status(self, entry, succeeded, failed=0, skipped=0, retried_nomax=0, retried_withmax=0): + """Compare counts with 'subtasks' entry in InstructorTask table.""" + subtask_info = json.loads(entry.subtasks) + # verify subtask-level counts: + self.assertEquals(subtask_info.get('total'), 1) + self.assertEquals(subtask_info.get('succeeded'), 1 if succeeded > 0 else 0) + self.assertEquals(subtask_info.get('failed'), 0 if succeeded > 0 else 1) + # verify individual subtask status: + subtask_status_info = subtask_info.get('status') + task_id_list = subtask_status_info.keys() + self.assertEquals(len(task_id_list), 1) + task_id = task_id_list[0] + subtask_status = subtask_status_info.get(task_id) + print("Testing subtask status: {}".format(subtask_status)) + self.assertEquals(subtask_status.get('task_id'), task_id) + self.assertEquals(subtask_status.get('attempted'), succeeded + failed) + self.assertEquals(subtask_status.get('succeeded'), succeeded) + self.assertEquals(subtask_status.get('skipped'), skipped) + self.assertEquals(subtask_status.get('failed'), failed) + self.assertEquals(subtask_status.get('retried_nomax'), retried_nomax) + self.assertEquals(subtask_status.get('retried_withmax'), retried_withmax) + self.assertEquals(subtask_status.get('state'), SUCCESS if succeeded > 0 else FAILURE) + + def _test_run_with_task(self, task_class, action_name, total, succeeded, failed=0, skipped=0, retried_nomax=0, retried_withmax=0): + """Run a task and check the number of emails processed.""" + task_entry = self._create_input_entry() + parent_status = self._run_task_with_mock_celery(task_class, task_entry.id, task_entry.task_id) + + # check return value + self.assertEquals(parent_status.get('total'), total) + self.assertEquals(parent_status.get('action_name'), action_name) + + # compare with task_output entry in InstructorTask table: + entry = InstructorTask.objects.get(id=task_entry.id) + status = json.loads(entry.task_output) + self.assertEquals(status.get('attempted'), succeeded + failed) + self.assertEquals(status.get('succeeded'), succeeded) + self.assertEquals(status.get('skipped'), skipped) + self.assertEquals(status.get('failed'), failed) + self.assertEquals(status.get('total'), total) + self.assertEquals(status.get('action_name'), action_name) + self.assertGreater(status.get('duration_ms'), 0) + self.assertEquals(entry.task_state, SUCCESS) + self._assert_single_subtask_status(entry, succeeded, failed, skipped, retried_nomax, retried_withmax) + + def test_successful(self): + # Select number of emails to fit into a single subtask. + num_emails = settings.BULK_EMAIL_EMAILS_PER_TASK + # We also send email to the instructor: + self._create_students(num_emails - 1) + with patch('bulk_email.tasks.get_connection', autospec=True) as get_conn: + get_conn.return_value.send_messages.side_effect = cycle([None]) + self._test_run_with_task(send_bulk_course_email, 'emailed', num_emails, num_emails) + + def test_unactivated_user(self): + # Select number of emails to fit into a single subtask. + num_emails = settings.BULK_EMAIL_EMAILS_PER_TASK + # We also send email to the instructor: + students = self._create_students(num_emails - 1) + # mark a student as not yet having activated their email: + student = students[0] + student.is_active = False + student.save() + with patch('bulk_email.tasks.get_connection', autospec=True) as get_conn: + get_conn.return_value.send_messages.side_effect = cycle([None]) + self._test_run_with_task(send_bulk_course_email, 'emailed', num_emails - 1, num_emails - 1) + + def test_skipped(self): + # Select number of emails to fit into a single subtask. + num_emails = settings.BULK_EMAIL_EMAILS_PER_TASK + # We also send email to the instructor: + students = self._create_students(num_emails - 1) + # have every fourth student optout: + expected_skipped = int((num_emails + 3) / 4.0) + expected_succeeds = num_emails - expected_skipped + for index in range(0, num_emails, 4): + Optout.objects.create(user=students[index], course_id=self.course.id) + # mark some students as opting out + with patch('bulk_email.tasks.get_connection', autospec=True) as get_conn: + get_conn.return_value.send_messages.side_effect = cycle([None]) + self._test_run_with_task(send_bulk_course_email, 'emailed', num_emails, expected_succeeds, skipped=expected_skipped) + + def _test_email_address_failures(self, exception): + """Test that celery handles bad address errors by failing and not retrying.""" + # Select number of emails to fit into a single subtask. + num_emails = settings.BULK_EMAIL_EMAILS_PER_TASK + # We also send email to the instructor: + self._create_students(num_emails - 1) + expected_fails = int((num_emails + 3) / 4.0) + expected_succeeds = num_emails - expected_fails + with patch('bulk_email.tasks.get_connection', autospec=True) as get_conn: + # have every fourth email fail due to some address failure: + get_conn.return_value.send_messages.side_effect = cycle([exception, None, None, None]) + self._test_run_with_task(send_bulk_course_email, 'emailed', num_emails, expected_succeeds, failed=expected_fails) + + def test_smtp_blacklisted_user(self): + # Test that celery handles permanent SMTPDataErrors by failing and not retrying. + self._test_email_address_failures(SMTPDataError(554, "Email address is blacklisted")) + + def test_ses_blacklisted_user(self): + # Test that celery handles permanent SMTPDataErrors by failing and not retrying. + self._test_email_address_failures(SESAddressBlacklistedError(554, "Email address is blacklisted")) + + def test_ses_illegal_address(self): + # Test that celery handles permanent SMTPDataErrors by failing and not retrying. + self._test_email_address_failures(SESIllegalAddressError(554, "Email address is illegal")) + + def test_ses_local_address_character_error(self): + # Test that celery handles permanent SMTPDataErrors by failing and not retrying. + self._test_email_address_failures(SESLocalAddressCharacterError(554, "Email address contains a bad character")) + + def _test_retry_after_limited_retry_error(self, exception): + """Test that celery handles connection failures by retrying.""" + # If we want the batch to succeed, we need to send fewer emails + # than the max retries, so that the max is not triggered. + num_emails = settings.BULK_EMAIL_MAX_RETRIES + # We also send email to the instructor: + self._create_students(num_emails - 1) + expected_fails = 0 + expected_succeeds = num_emails + with patch('bulk_email.tasks.get_connection', autospec=True) as get_conn: + # Have every other mail attempt fail due to disconnection. + get_conn.return_value.send_messages.side_effect = cycle([exception, None]) + self._test_run_with_task( + send_bulk_course_email, + 'emailed', + num_emails, + expected_succeeds, + failed=expected_fails, + retried_withmax=num_emails + ) + + def _test_max_retry_limit_causes_failure(self, exception): + """Test that celery can hit a maximum number of retries.""" + # Doesn't really matter how many recipients, since we expect + # to fail on the first. + num_emails = 10 + # We also send email to the instructor: + self._create_students(num_emails - 1) + expected_fails = num_emails + expected_succeeds = 0 + with patch('bulk_email.tasks.get_connection', autospec=True) as get_conn: + # always fail to connect, triggering repeated retries until limit is hit: + get_conn.return_value.send_messages.side_effect = cycle([exception]) + with patch('bulk_email.tasks.update_subtask_status', my_update_subtask_status): + self._test_run_with_task( + send_bulk_course_email, + 'emailed', + num_emails, + expected_succeeds, + failed=expected_fails, + retried_withmax=(settings.BULK_EMAIL_MAX_RETRIES + 1) + ) + + def test_retry_after_smtp_disconnect(self): + self._test_retry_after_limited_retry_error(SMTPServerDisconnected(425, "Disconnecting")) + + def test_max_retry_after_smtp_disconnect(self): + self._test_max_retry_limit_causes_failure(SMTPServerDisconnected(425, "Disconnecting")) + + def test_retry_after_smtp_connect_error(self): + self._test_retry_after_limited_retry_error(SMTPConnectError(424, "Bad Connection")) + + def test_max_retry_after_smtp_connect_error(self): + self._test_max_retry_limit_causes_failure(SMTPConnectError(424, "Bad Connection")) + + def test_retry_after_aws_connect_error(self): + self._test_retry_after_limited_retry_error(AWSConnectionError("Unable to provide secure connection through proxy")) + + def test_max_retry_after_aws_connect_error(self): + self._test_max_retry_limit_causes_failure(AWSConnectionError("Unable to provide secure connection through proxy")) + + def test_retry_after_general_error(self): + self._test_retry_after_limited_retry_error(Exception("This is some random exception.")) + + def test_max_retry_after_general_error(self): + self._test_max_retry_limit_causes_failure(Exception("This is some random exception.")) + + def _test_retry_after_unlimited_retry_error(self, exception): + """Test that celery handles throttling failures by retrying.""" + num_emails = 8 + # We also send email to the instructor: + self._create_students(num_emails - 1) + expected_fails = 0 + expected_succeeds = num_emails + # Note that because celery in eager mode will call retries synchronously, + # each retry will increase the stack depth. It turns out that there is a + # maximum depth at which a RuntimeError is raised ("maximum recursion depth + # exceeded"). The maximum recursion depth is 90, so + # num_emails * expected_retries < 90. + expected_retries = 10 + with patch('bulk_email.tasks.get_connection', autospec=True) as get_conn: + # Cycle through N throttling errors followed by a success. + get_conn.return_value.send_messages.side_effect = cycle( + chain(repeat(exception, expected_retries), [None]) + ) + self._test_run_with_task( + send_bulk_course_email, + 'emailed', + num_emails, + expected_succeeds, + failed=expected_fails, + retried_nomax=(expected_retries * num_emails) + ) + + def test_retry_after_smtp_throttling_error(self): + self._test_retry_after_unlimited_retry_error(SMTPDataError(455, "Throttling: Sending rate exceeded")) + + def test_retry_after_ses_throttling_error(self): + self._test_retry_after_unlimited_retry_error(SESMaxSendingRateExceededError(455, "Throttling: Sending rate exceeded")) + + def _test_immediate_failure(self, exception): + """Test that celery can hit a maximum number of retries.""" + # Doesn't really matter how many recipients, since we expect + # to fail on the first. + num_emails = 10 + # We also send email to the instructor: + self._create_students(num_emails - 1) + expected_fails = num_emails + expected_succeeds = 0 + with patch('bulk_email.tasks.get_connection', autospec=True) as get_conn: + # always fail to connect, triggering repeated retries until limit is hit: + get_conn.return_value.send_messages.side_effect = cycle([exception]) + self._test_run_with_task( + send_bulk_course_email, + 'emailed', + num_emails, + expected_succeeds, + failed=expected_fails, + ) + + def test_failure_on_unhandled_smtp(self): + self._test_immediate_failure(SMTPAuthenticationError(403, "That password doesn't work!")) + + def test_failure_on_ses_quota_exceeded(self): + self._test_immediate_failure(SESDailyQuotaExceededError(403, "You're done for the day!")) diff --git a/lms/djangoapps/courseware/courses.py b/lms/djangoapps/courseware/courses.py index 625453aad0..aa6e7214d4 100644 --- a/lms/djangoapps/courseware/courses.py +++ b/lms/djangoapps/courseware/courses.py @@ -36,11 +36,31 @@ def get_request_for_thread(): del frame +def get_course(course_id, depth=0): + """ + Given a course id, return the corresponding course descriptor. + + If course_id is not valid, raises a ValueError. This is appropriate + for internal use. + + depth: The number of levels of children for the modulestore to cache. + None means infinite depth. Default is to fetch no children. + """ + try: + course_loc = CourseDescriptor.id_to_location(course_id) + return modulestore().get_instance(course_id, course_loc, depth=depth) + except (KeyError, ItemNotFoundError): + raise ValueError("Course not found: {}".format(course_id)) + except InvalidLocationError: + raise ValueError("Invalid location: {}".format(course_id)) + + def get_course_by_id(course_id, depth=0): """ Given a course id, return the corresponding course descriptor. If course_id is not valid, raises a 404. + depth: The number of levels of children for the modulestore to cache. None means infinite depth """ try: @@ -51,6 +71,7 @@ def get_course_by_id(course_id, depth=0): except InvalidLocationError: raise Http404("Invalid location") + def get_course_with_access(user, course_id, action, depth=0): """ Given a course_id, look up the corresponding course descriptor, @@ -182,7 +203,6 @@ def get_course_about_section(course, section_key): raise KeyError("Invalid about key " + str(section_key)) - def get_course_info_section(request, course, section_key): """ This returns the snippet of html to be rendered on the course info page, @@ -194,8 +214,6 @@ def get_course_info_section(request, course, section_key): - updates - guest_updates """ - - loc = Location(course.location.tag, course.location.org, course.location.course, 'course_info', section_key) # Use an empty cache diff --git a/lms/djangoapps/courseware/tests/test_courses.py b/lms/djangoapps/courseware/tests/test_courses.py index ee05a483a5..207752a85f 100644 --- a/lms/djangoapps/courseware/tests/test_courses.py +++ b/lms/djangoapps/courseware/tests/test_courses.py @@ -4,16 +4,19 @@ import mock from django.test import TestCase from django.http import Http404 from django.test.utils import override_settings -from courseware.courses import get_course_by_id, get_cms_course_link_by_id +from courseware.courses import get_course_by_id, get_course, get_cms_course_link_by_id from xmodule.modulestore.django import get_default_store_name_for_current_request CMS_BASE_TEST = 'testcms' + class CoursesTest(TestCase): + """Test methods related to fetching courses.""" + def test_get_course_by_id_invalid_chars(self): """ Test that `get_course_by_id` throws a 404, rather than - an exception, when faced with unexpected characters + an exception, when faced with unexpected characters (such as unicode characters, and symbols such as = and ' ') """ with self.assertRaises(Http404): @@ -21,6 +24,17 @@ class CoursesTest(TestCase): get_course_by_id('MITx/foobar/business and management') get_course_by_id('MITx/foobar/NiñøJoséMaríáßç') + def test_get_course_invalid_chars(self): + """ + Test that `get_course` throws a ValueError, rather than + a 404, when faced with unexpected characters + (such as unicode characters, and symbols such as = and ' ') + """ + with self.assertRaises(ValueError): + get_course('MITx/foobar/statistics=introduction') + get_course('MITx/foobar/business and management') + get_course('MITx/foobar/NiñøJoséMaríáßç') + @override_settings(CMS_BASE=CMS_BASE_TEST) def test_get_cms_course_link_by_id(self): """ diff --git a/lms/djangoapps/instructor/views/api.py b/lms/djangoapps/instructor/views/api.py index 35dbde477d..facf648580 100644 --- a/lms/djangoapps/instructor/views/api.py +++ b/lms/djangoapps/instructor/views/api.py @@ -40,8 +40,6 @@ import analytics.csvs import csv from bulk_email.models import CourseEmail -from html_to_text import html_to_text -from bulk_email import tasks log = logging.getLogger(__name__) @@ -755,7 +753,7 @@ def send_email(request, course_id): Send an email to self, staff, or everyone involved in a course. Query Parameters: - 'send_to' specifies what group the email should be sent to - Options are defined by the Email model in + Options are defined by the CourseEmail model in lms/djangoapps/bulk_email/models.py - 'subject' specifies email's subject - 'message' specifies email's content @@ -763,17 +761,15 @@ def send_email(request, course_id): send_to = request.POST.get("send_to") subject = request.POST.get("subject") message = request.POST.get("message") - text_message = html_to_text(message) - email = CourseEmail( - course_id=course_id, - sender=request.user, - to_option=send_to, - subject=subject, - html_message=message, - text_message=text_message, - ) - email.save() - tasks.delegate_email_batches.delay(email.id, request.user.id) # pylint: disable=E1101 + + # Create the CourseEmail object. This is saved immediately, so that + # any transaction that has been pending up to this point will also be + # committed. + email = CourseEmail.create(course_id, request.user, send_to, subject, message) + + # Submit the task, so that the correct InstructorTask object gets created (for monitoring purposes) + instructor_task.api.submit_bulk_course_email(request, course_id, email.id) # pylint: disable=E1101 + response_payload = {'course_id': course_id} return JsonResponse(response_payload) diff --git a/lms/djangoapps/instructor/views/legacy.py b/lms/djangoapps/instructor/views/legacy.py index 0978d020bf..808b9f5fb6 100644 --- a/lms/djangoapps/instructor/views/legacy.py +++ b/lms/djangoapps/instructor/views/legacy.py @@ -30,6 +30,7 @@ from xmodule.modulestore.django import modulestore from xmodule.modulestore.exceptions import ItemNotFoundError from xmodule.html_module import HtmlDescriptor +from bulk_email.models import CourseEmail from courseware import grades from courseware.access import (has_access, get_access_group_name, course_beta_test_group_name) @@ -46,7 +47,8 @@ from instructor_task.api import (get_running_instructor_tasks, get_instructor_task_history, submit_rescore_problem_for_all_students, submit_rescore_problem_for_student, - submit_reset_problem_attempts_for_all_students) + submit_reset_problem_attempts_for_all_students, + submit_bulk_course_email) from instructor_task.views import get_task_completion_info from mitxmako.shortcuts import render_to_response from psychometrics import psychoanalyze @@ -58,10 +60,6 @@ from xblock.field_data import DictFieldData from xblock.fields import ScopeIds from django.utils.translation import ugettext as _u -from bulk_email.models import CourseEmail -from html_to_text import html_to_text -from bulk_email import tasks - log = logging.getLogger(__name__) # internal commands for managing forum roles: @@ -720,29 +718,24 @@ def instructor_dashboard(request, course_id): email_to_option = request.POST.get("to_option") email_subject = request.POST.get("subject") html_message = request.POST.get("message") - text_message = html_to_text(html_message) - email = CourseEmail( - course_id=course_id, - sender=request.user, - to_option=email_to_option, - subject=email_subject, - html_message=html_message, - text_message=text_message - ) + # Create the CourseEmail object. This is saved immediately, so that + # any transaction that has been pending up to this point will also be + # committed. + email = CourseEmail.create(course_id, request.user, email_to_option, email_subject, html_message) - email.save() - - tasks.delegate_email_batches.delay( - email.id, - request.user.id - ) + # Submit the task, so that the correct InstructorTask object gets created (for monitoring purposes) + submit_bulk_course_email(request, course_id, email.id) # pylint: disable=E1101 if email_to_option == "all": email_msg = '

Your email was successfully queued for sending. Please note that for large public classes (~10k), it may take 1-2 hours to send all emails.

' else: email_msg = '

Your email was successfully queued for sending.

' + elif "Show Background Email Task History" in action: + message, datatable = get_background_task_table(course_id, task_type='bulk_course_email') + msg += message + #---------------------------------------- # psychometrics @@ -877,6 +870,7 @@ def instructor_dashboard(request, course_id): return render_to_response('courseware/instructor_dashboard.html', context) + def _do_remote_gradebook(user, course, action, args=None, files=None): ''' Perform remote gradebook action. Returns msg, datatable. @@ -1527,7 +1521,7 @@ def dump_grading_context(course): return msg -def get_background_task_table(course_id, problem_url, student=None): +def get_background_task_table(course_id, problem_url=None, student=None, task_type=None): """ Construct the "datatable" structure to represent background task history. @@ -1538,14 +1532,16 @@ def get_background_task_table(course_id, problem_url, student=None): Returns a tuple of (msg, datatable), where the msg is a possible error message, and the datatable is the datatable to be used for display. """ - history_entries = get_instructor_task_history(course_id, problem_url, student) + history_entries = get_instructor_task_history(course_id, problem_url, student, task_type) datatable = {} msg = "" # first check to see if there is any history at all # (note that we don't have to check that the arguments are valid; it # just won't find any entries.) if (history_entries.count()) == 0: - if student is not None: + if problem_url is None: + msg += 'Failed to find any background tasks for course "{course}".'.format(course=course_id) + elif student is not None: template = 'Failed to find any background tasks for course "{course}", module "{problem}" and student "{student}".' msg += template.format(course=course_id, problem=problem_url, student=student.username) else: @@ -1582,7 +1578,9 @@ def get_background_task_table(course_id, problem_url, student=None): task_message] datatable['data'].append(row) - if student is not None: + if problem_url is None: + datatable['title'] = "{course_id}".format(course_id=course_id) + elif student is not None: datatable['title'] = "{course_id} > {location} > {student}".format(course_id=course_id, location=problem_url, student=student.username) diff --git a/lms/djangoapps/instructor_task/api.py b/lms/djangoapps/instructor_task/api.py index bd3c5e033a..7521a8eb3a 100644 --- a/lms/djangoapps/instructor_task/api.py +++ b/lms/djangoapps/instructor_task/api.py @@ -6,6 +6,7 @@ already been submitted, filtered either by running state or input arguments. """ +import hashlib from celery.states import READY_STATES @@ -14,11 +15,13 @@ from xmodule.modulestore.django import modulestore from instructor_task.models import InstructorTask from instructor_task.tasks import (rescore_problem, reset_problem_attempts, - delete_problem_state) + delete_problem_state, + send_bulk_course_email) from instructor_task.api_helper import (check_arguments_for_rescoring, encode_problem_and_student_input, submit_task) +from bulk_email.models import CourseEmail def get_running_instructor_tasks(course_id): @@ -34,14 +37,18 @@ def get_running_instructor_tasks(course_id): return instructor_tasks.order_by('-id') -def get_instructor_task_history(course_id, problem_url, student=None): +def get_instructor_task_history(course_id, problem_url=None, student=None, task_type=None): """ Returns a query of InstructorTask objects of historical tasks for a given course, - that match a particular problem and optionally a student. + that optionally match a particular problem, a student, and/or a task type. """ - _, task_key = encode_problem_and_student_input(problem_url, student) + instructor_tasks = InstructorTask.objects.filter(course_id=course_id) + if problem_url is not None or student is not None: + _, task_key = encode_problem_and_student_input(problem_url, student) + instructor_tasks = instructor_tasks.filter(task_key=task_key) + if task_type is not None: + instructor_tasks = instructor_tasks.filter(task_type=task_type) - instructor_tasks = InstructorTask.objects.filter(course_id=course_id, task_key=task_key) return instructor_tasks.order_by('-id') @@ -162,3 +169,40 @@ def submit_delete_problem_state_for_all_students(request, course_id, problem_url task_class = delete_problem_state task_input, task_key = encode_problem_and_student_input(problem_url) return submit_task(request, task_type, task_class, course_id, task_input, task_key) + + +def submit_bulk_course_email(request, course_id, email_id): + """ + Request to have bulk email sent as a background task. + + The specified CourseEmail object will be sent be updated for all students who have enrolled + in a course. Parameters are the `course_id` and the `email_id`, the id of the CourseEmail object. + + AlreadyRunningError is raised if the same recipients are already being emailed with the same + CourseEmail object. + + This method makes sure the InstructorTask entry is committed. + When called from any view that is wrapped by TransactionMiddleware, + and thus in a "commit-on-success" transaction, an autocommit buried within here + will cause any pending transaction to be committed by a successful + save here. Any future database operations will take place in a + separate transaction. + """ + # Assume that the course is defined, and that the user has already been verified to have + # appropriate access to the course. But make sure that the email exists. + # We also pull out the To argument here, so that is displayed in + # the InstructorTask status. + email_obj = CourseEmail.objects.get(id=email_id) + to_option = email_obj.to_option + + task_type = 'bulk_course_email' + task_class = send_bulk_course_email + # Pass in the to_option as a separate argument, even though it's (currently) + # in the CourseEmail. That way it's visible in the progress status. + # (At some point in the future, we might take the recipient out of the CourseEmail, + # so that the same saved email can be sent to different recipients, as it is tested.) + task_input = {'email_id': email_id, 'to_option': to_option} + task_key_stub = "{email_id}_{to_option}".format(email_id=email_id, to_option=to_option) + # create the key value by using MD5 hash: + task_key = hashlib.md5(task_key_stub).hexdigest() + return submit_task(request, task_type, task_class, course_id, task_input, task_key) diff --git a/lms/djangoapps/instructor_task/api_helper.py b/lms/djangoapps/instructor_task/api_helper.py index 2795fd08c1..37a9852caa 100644 --- a/lms/djangoapps/instructor_task/api_helper.py +++ b/lms/djangoapps/instructor_task/api_helper.py @@ -58,13 +58,14 @@ def _reserve_task(course_id, task_type, task_key, task_input, requester): return InstructorTask.create(course_id, task_type, task_key, task_input, requester) -def _get_xmodule_instance_args(request): +def _get_xmodule_instance_args(request, task_id): """ Calculate parameters needed for instantiating xmodule instances. The `request_info` will be passed to a tracking log function, to provide information about the source of the task request. The `xqueue_callback_url_prefix` is used to permit old-style xqueue callbacks directly to the appropriate module in the LMS. + The `task_id` is also passed to the tracking log function. """ request_info = {'username': request.user.username, 'ip': request.META['REMOTE_ADDR'], @@ -74,6 +75,7 @@ def _get_xmodule_instance_args(request): xmodule_instance_args = {'xqueue_callback_url_prefix': get_xqueue_callback_url_prefix(request), 'request_info': request_info, + 'task_id': task_id, } return xmodule_instance_args @@ -88,10 +90,16 @@ def _update_instructor_task(instructor_task, task_result): is usually not saved. In general, tasks that have finished (either with success or failure) should have their entries updated by the task itself, so are not updated here. Tasks that are still running are not updated - while they run. So the one exception to the no-save rule are tasks that + and saved while they run. The one exception to the no-save rule are tasks that are in a "revoked" state. This may mean that the task never had the opportunity to update the InstructorTask entry. + Tasks that are in progress and have subtasks doing the processing do not look + to the task's AsyncResult object. When subtasks are running, the + InstructorTask object itself is updated with the subtasks' progress, + not any AsyncResult object. In this case, the InstructorTask is + not updated at all. + Calculates json to store in "task_output" field of the `instructor_task`, as well as updating the task_state. @@ -108,11 +116,21 @@ def _update_instructor_task(instructor_task, task_result): returned_result = task_result.result result_traceback = task_result.traceback - # Assume we don't always update the InstructorTask entry if we don't have to: + # Assume we don't always save the InstructorTask entry if we don't have to, + # but that in most cases we will update the InstructorTask in-place with its + # current progress. + entry_needs_updating = True entry_needs_saving = False task_output = None - if result_state in [PROGRESS, SUCCESS]: + if instructor_task.task_state == PROGRESS and len(instructor_task.subtasks) > 0: + # This happens when running subtasks: the result object is marked with SUCCESS, + # meaning that the subtasks have successfully been defined. However, the InstructorTask + # will be marked as in PROGRESS, until the last subtask completes and marks it as SUCCESS. + # We want to ignore the parent SUCCESS if subtasks are still running, and just trust the + # contents of the InstructorTask. + entry_needs_updating = False + elif result_state in [PROGRESS, SUCCESS]: # construct a status message directly from the task result's result: # it needs to go back with the entry passed in. log.info("background task (%s), state %s: result: %s", task_id, result_state, returned_result) @@ -134,12 +152,13 @@ def _update_instructor_task(instructor_task, task_result): # save progress and state into the entry, even if it's not being saved: # when celery is run in "ALWAYS_EAGER" mode, progress needs to go back # with the entry passed in. - instructor_task.task_state = result_state - if task_output is not None: - instructor_task.task_output = task_output + if entry_needs_updating: + instructor_task.task_state = result_state + if task_output is not None: + instructor_task.task_output = task_output - if entry_needs_saving: - instructor_task.save() + if entry_needs_saving: + instructor_task.save() def get_updated_instructor_task(task_id): @@ -175,7 +194,7 @@ def get_status_from_instructor_task(instructor_task): 'in_progress': boolean indicating if task is still running. 'task_progress': dict containing progress information. This includes: 'attempted': number of attempts made - 'updated': number of attempts that "succeeded" + 'succeeded': number of attempts that "succeeded" 'total': number of possible subtasks to attempt 'action_name': user-visible verb to use in status messages. Should be past-tense. 'duration_ms': how long the task has (or had) been running. @@ -214,7 +233,7 @@ def check_arguments_for_rescoring(course_id, problem_url): def encode_problem_and_student_input(problem_url, student=None): """ - Encode problem_url and optional student into task_key and task_input values. + Encode optional problem_url and optional student into task_key and task_input values. `problem_url` is full URL of the problem. `student` is the user object of the student @@ -257,7 +276,7 @@ def submit_task(request, task_type, task_class, course_id, task_input, task_key) # submit task: task_id = instructor_task.task_id - task_args = [instructor_task.id, _get_xmodule_instance_args(request)] + task_args = [instructor_task.id, _get_xmodule_instance_args(request, task_id)] # pylint: disable=E1101 task_class.apply_async(task_args, task_id=task_id) return instructor_task diff --git a/lms/djangoapps/instructor_task/migrations/0002_add_subtask_field.py b/lms/djangoapps/instructor_task/migrations/0002_add_subtask_field.py new file mode 100644 index 0000000000..845dffd856 --- /dev/null +++ b/lms/djangoapps/instructor_task/migrations/0002_add_subtask_field.py @@ -0,0 +1,76 @@ +# -*- coding: utf-8 -*- +import datetime +from south.db import db +from south.v2 import SchemaMigration +from django.db import models + + +class Migration(SchemaMigration): + + def forwards(self, orm): + # Adding field 'InstructorTask.subtasks' + db.add_column('instructor_task_instructortask', 'subtasks', + self.gf('django.db.models.fields.TextField')(default='', blank=True), + keep_default=False) + + + def backwards(self, orm): + # Deleting field 'InstructorTask.subtasks' + db.delete_column('instructor_task_instructortask', 'subtasks') + + + models = { + 'auth.group': { + 'Meta': {'object_name': 'Group'}, + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'name': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '80'}), + 'permissions': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['auth.Permission']", 'symmetrical': 'False', 'blank': 'True'}) + }, + 'auth.permission': { + 'Meta': {'ordering': "('content_type__app_label', 'content_type__model', 'codename')", 'unique_together': "(('content_type', 'codename'),)", 'object_name': 'Permission'}, + 'codename': ('django.db.models.fields.CharField', [], {'max_length': '100'}), + 'content_type': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['contenttypes.ContentType']"}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'name': ('django.db.models.fields.CharField', [], {'max_length': '50'}) + }, + 'auth.user': { + 'Meta': {'object_name': 'User'}, + 'date_joined': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}), + 'email': ('django.db.models.fields.EmailField', [], {'max_length': '75', 'blank': 'True'}), + 'first_name': ('django.db.models.fields.CharField', [], {'max_length': '30', 'blank': 'True'}), + 'groups': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['auth.Group']", 'symmetrical': 'False', 'blank': 'True'}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'is_active': ('django.db.models.fields.BooleanField', [], {'default': 'True'}), + 'is_staff': ('django.db.models.fields.BooleanField', [], {'default': 'False'}), + 'is_superuser': ('django.db.models.fields.BooleanField', [], {'default': 'False'}), + 'last_login': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}), + 'last_name': ('django.db.models.fields.CharField', [], {'max_length': '30', 'blank': 'True'}), + 'password': ('django.db.models.fields.CharField', [], {'max_length': '128'}), + 'user_permissions': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['auth.Permission']", 'symmetrical': 'False', 'blank': 'True'}), + 'username': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '30'}) + }, + 'contenttypes.contenttype': { + 'Meta': {'ordering': "('name',)", 'unique_together': "(('app_label', 'model'),)", 'object_name': 'ContentType', 'db_table': "'django_content_type'"}, + 'app_label': ('django.db.models.fields.CharField', [], {'max_length': '100'}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'model': ('django.db.models.fields.CharField', [], {'max_length': '100'}), + 'name': ('django.db.models.fields.CharField', [], {'max_length': '100'}) + }, + 'instructor_task.instructortask': { + 'Meta': {'object_name': 'InstructorTask'}, + 'course_id': ('django.db.models.fields.CharField', [], {'max_length': '255', 'db_index': 'True'}), + 'created': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'null': 'True', 'blank': 'True'}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'requester': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['auth.User']"}), + 'subtasks': ('django.db.models.fields.TextField', [], {'blank': 'True'}), + 'task_id': ('django.db.models.fields.CharField', [], {'max_length': '255', 'db_index': 'True'}), + 'task_input': ('django.db.models.fields.CharField', [], {'max_length': '255'}), + 'task_key': ('django.db.models.fields.CharField', [], {'max_length': '255', 'db_index': 'True'}), + 'task_output': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'null': 'True'}), + 'task_state': ('django.db.models.fields.CharField', [], {'max_length': '50', 'null': 'True', 'db_index': 'True'}), + 'task_type': ('django.db.models.fields.CharField', [], {'max_length': '50', 'db_index': 'True'}), + 'updated': ('django.db.models.fields.DateTimeField', [], {'auto_now': 'True', 'blank': 'True'}) + } + } + + complete_apps = ['instructor_task'] \ No newline at end of file diff --git a/lms/djangoapps/instructor_task/models.py b/lms/djangoapps/instructor_task/models.py index b28a9a3d83..8d6376fae3 100644 --- a/lms/djangoapps/instructor_task/models.py +++ b/lms/djangoapps/instructor_task/models.py @@ -56,6 +56,7 @@ class InstructorTask(models.Model): requester = models.ForeignKey(User, db_index=True) created = models.DateTimeField(auto_now_add=True, null=True) updated = models.DateTimeField(auto_now=True) + subtasks = models.TextField(blank=True) # JSON dictionary def __repr__(self): return 'InstructorTask<%r>' % ({ diff --git a/lms/djangoapps/instructor_task/subtasks.py b/lms/djangoapps/instructor_task/subtasks.py new file mode 100644 index 0000000000..17da1b9ed6 --- /dev/null +++ b/lms/djangoapps/instructor_task/subtasks.py @@ -0,0 +1,242 @@ +""" +This module contains celery task functions for handling the management of subtasks. +""" +from time import time +import json + +from celery.utils.log import get_task_logger +from celery.states import SUCCESS, READY_STATES + +from django.db import transaction + +from instructor_task.models import InstructorTask, PROGRESS, QUEUING + +TASK_LOG = get_task_logger(__name__) + + +def create_subtask_status(task_id, succeeded=0, failed=0, skipped=0, retried_nomax=0, retried_withmax=0, state=None): + """ + Create and return a dict for tracking the status of a subtask. + + Subtask status keys are: + + 'task_id' : id of subtask. This is used to pass task information across retries. + 'attempted' : number of attempts -- should equal succeeded plus failed + 'succeeded' : number that succeeded in processing + 'skipped' : number that were not processed. + 'failed' : number that failed during processing + 'retried_nomax' : number of times the subtask has been retried for conditions that + should not have a maximum count applied + 'retried_withmax' : number of times the subtask has been retried for conditions that + should have a maximum count applied + 'state' : celery state of the subtask (e.g. QUEUING, PROGRESS, RETRY, FAILURE, SUCCESS) + + Object must be JSON-serializable, so that it can be passed as an argument + to tasks. + + In future, we may want to include specific error information + indicating the reason for failure. + Also, we should count up "not attempted" separately from attempted/failed. + """ + attempted = succeeded + failed + current_result = { + 'task_id': task_id, + 'attempted': attempted, + 'succeeded': succeeded, + 'skipped': skipped, + 'failed': failed, + 'retried_nomax': retried_nomax, + 'retried_withmax': retried_withmax, + 'state': state if state is not None else QUEUING, + } + return current_result + + +def increment_subtask_status(subtask_result, succeeded=0, failed=0, skipped=0, retried_nomax=0, retried_withmax=0, state=None): + """ + Update the result of a subtask with additional results. + + Create and return a dict for tracking the status of a subtask. + + Keys for input `subtask_result` and returned subtask_status are: + + 'task_id' : id of subtask. This is used to pass task information across retries. + 'attempted' : number of attempts -- should equal succeeded plus failed + 'succeeded' : number that succeeded in processing + 'skipped' : number that were not processed. + 'failed' : number that failed during processing + 'retried_nomax' : number of times the subtask has been retried for conditions that + should not have a maximum count applied + 'retried_withmax' : number of times the subtask has been retried for conditions that + should have a maximum count applied + 'state' : celery state of the subtask (e.g. QUEUING, PROGRESS, RETRY, FAILURE, SUCCESS) + + Kwarg arguments are incremented to the corresponding key in `subtask_result`. + The exception is for `state`, which if specified is used to override the existing value. + """ + new_result = dict(subtask_result) + new_result['attempted'] += (succeeded + failed) + new_result['succeeded'] += succeeded + new_result['failed'] += failed + new_result['skipped'] += skipped + new_result['retried_nomax'] += retried_nomax + new_result['retried_withmax'] += retried_withmax + if state is not None: + new_result['state'] = state + + return new_result + + +def initialize_subtask_info(entry, action_name, total_num, subtask_id_list): + """ + Store initial subtask information to InstructorTask object. + + The InstructorTask's "task_output" field is initialized. This is a JSON-serialized dict. + Counters for 'attempted', 'succeeded', 'failed', 'skipped' keys are initialized to zero, + as is the 'duration_ms' value. A 'start_time' is stored for later duration calculations, + and the total number of "things to do" is set, so the user can be told how much needs to be + done overall. The `action_name` is also stored, to help with constructing more readable + task_progress messages. + + The InstructorTask's "subtasks" field is also initialized. This is also a JSON-serialized dict. + Keys include 'total', 'succeeded', 'retried', 'failed', which are counters for the number of + subtasks. 'Total' is set here to the total number, while the other three are initialized to zero. + Once the counters for 'succeeded' and 'failed' match the 'total', the subtasks are done and + the InstructorTask's "status" will be changed to SUCCESS. + + The "subtasks" field also contains a 'status' key, that contains a dict that stores status + information for each subtask. The value for each subtask (keyed by its task_id) + is its subtask status, as defined by create_subtask_status(). + + This information needs to be set up in the InstructorTask before any of the subtasks start + running. If not, there is a chance that the subtasks could complete before the parent task + is done creating subtasks. Doing so also simplifies the save() here, as it avoids the need + for locking. + + Monitoring code should assume that if an InstructorTask has subtask information, that it should + rely on the status stored in the InstructorTask object, rather than status stored in the + corresponding AsyncResult. + """ + task_progress = { + 'action_name': action_name, + 'attempted': 0, + 'failed': 0, + 'skipped': 0, + 'succeeded': 0, + 'total': total_num, + 'duration_ms': int(0), + 'start_time': time() + } + entry.task_output = InstructorTask.create_output_for_success(task_progress) + entry.task_state = PROGRESS + + # Write out the subtasks information. + num_subtasks = len(subtask_id_list) + # Note that may not be necessary to store initial value with all those zeroes! + subtask_status = {subtask_id: create_subtask_status(subtask_id) for subtask_id in subtask_id_list} + subtask_dict = { + 'total': num_subtasks, + 'succeeded': 0, + 'failed': 0, + 'status': subtask_status + } + entry.subtasks = json.dumps(subtask_dict) + + # and save the entry immediately, before any subtasks actually start work: + entry.save_now() + return task_progress + + +@transaction.commit_manually +def update_subtask_status(entry_id, current_task_id, new_subtask_status): + """ + Update the status of the subtask in the parent InstructorTask object tracking its progress. + + Uses select_for_update to lock the InstructorTask object while it is being updated. + The operation is surrounded by a try/except/else that permit the manual transaction to be + committed on completion, or rolled back on error. + + The InstructorTask's "task_output" field is updated. This is a JSON-serialized dict. + Accumulates values for 'attempted', 'succeeded', 'failed', 'skipped' from `new_subtask_status` + into the corresponding values in the InstructorTask's task_output. Also updates the 'duration_ms' + value with the current interval since the original InstructorTask started. Note that this + value is only approximate, since the subtask may be running on a different server than the + original task, so is subject to clock skew. + + The InstructorTask's "subtasks" field is also updated. This is also a JSON-serialized dict. + Keys include 'total', 'succeeded', 'retried', 'failed', which are counters for the number of + subtasks. 'Total' is expected to have been set at the time the subtasks were created. + The other three counters are incremented depending on the value of `status`. Once the counters + for 'succeeded' and 'failed' match the 'total', the subtasks are done and the InstructorTask's + "status" is changed to SUCCESS. + + The "subtasks" field also contains a 'status' key, that contains a dict that stores status + information for each subtask. At the moment, the value for each subtask (keyed by its task_id) + is the value of `status`, but could be expanded in future to store information about failure + messages, progress made, etc. + """ + TASK_LOG.info("Preparing to update status for email subtask %s for instructor task %d with status %s", + current_task_id, entry_id, new_subtask_status) + + try: + entry = InstructorTask.objects.select_for_update().get(pk=entry_id) + subtask_dict = json.loads(entry.subtasks) + subtask_status_info = subtask_dict['status'] + if current_task_id not in subtask_status_info: + # unexpected error -- raise an exception + format_str = "Unexpected task_id '{}': unable to update status for email subtask of instructor task '{}'" + msg = format_str.format(current_task_id, entry_id) + TASK_LOG.warning(msg) + raise ValueError(msg) + + # Update status: + subtask_status_info[current_task_id] = new_subtask_status + + # Update the parent task progress. + # Set the estimate of duration, but only if it + # increases. Clock skew between time() returned by different machines + # may result in non-monotonic values for duration. + task_progress = json.loads(entry.task_output) + start_time = task_progress['start_time'] + prev_duration = task_progress['duration_ms'] + new_duration = int((time() - start_time) * 1000) + task_progress['duration_ms'] = max(prev_duration, new_duration) + + # Update counts only when subtask is done. + # In future, we can make this more responsive by updating status + # between retries, by comparing counts that change from previous + # retry. + new_state = new_subtask_status['state'] + if new_subtask_status is not None and new_state in READY_STATES: + for statname in ['attempted', 'succeeded', 'failed', 'skipped']: + task_progress[statname] += new_subtask_status[statname] + + # Figure out if we're actually done (i.e. this is the last task to complete). + # This is easier if we just maintain a counter, rather than scanning the + # entire new_subtask_status dict. + if new_state == SUCCESS: + subtask_dict['succeeded'] += 1 + elif new_state in READY_STATES: + subtask_dict['failed'] += 1 + num_remaining = subtask_dict['total'] - subtask_dict['succeeded'] - subtask_dict['failed'] + + # If we're done with the last task, update the parent status to indicate that. + # At present, we mark the task as having succeeded. In future, we should see + # if there was a catastrophic failure that occurred, and figure out how to + # report that here. + if num_remaining <= 0: + entry.task_state = SUCCESS + entry.subtasks = json.dumps(subtask_dict) + entry.task_output = InstructorTask.create_output_for_success(task_progress) + + TASK_LOG.info("Task output updated to %s for email subtask %s of instructor task %d", + entry.task_output, current_task_id, entry_id) + TASK_LOG.debug("about to save....") + entry.save() + except Exception: + TASK_LOG.exception("Unexpected error while updating InstructorTask.") + transaction.rollback() + raise + else: + TASK_LOG.debug("about to commit....") + transaction.commit() diff --git a/lms/djangoapps/instructor_task/tasks.py b/lms/djangoapps/instructor_task/tasks.py index b045de470a..f30ffe3af2 100644 --- a/lms/djangoapps/instructor_task/tasks.py +++ b/lms/djangoapps/instructor_task/tasks.py @@ -19,14 +19,21 @@ a problem URL and optionally a student. These are used to set up the initial va of the query for traversing StudentModule objects. """ +from django.utils.translation import ugettext_noop from celery import task -from instructor_task.tasks_helper import (update_problem_module_state, - rescore_problem_module_state, - reset_attempts_module_state, - delete_problem_module_state) +from functools import partial +from instructor_task.tasks_helper import ( + run_main_task, + BaseInstructorTask, + perform_module_state_update, + rescore_problem_module_state, + reset_attempts_module_state, + delete_problem_module_state, +) +from bulk_email.tasks import perform_delegate_email_batches -@task +@task(base=BaseInstructorTask) # pylint: disable=E1102 def rescore_problem(entry_id, xmodule_instance_args): """Rescores a problem in a course, for all students or one specific student. @@ -45,15 +52,19 @@ def rescore_problem(entry_id, xmodule_instance_args): `xmodule_instance_args` provides information needed by _get_module_instance_for_task() to instantiate an xmodule instance. """ - action_name = 'rescored' - update_fcn = rescore_problem_module_state - filter_fcn = lambda(modules_to_update): modules_to_update.filter(state__contains='"done": true') - return update_problem_module_state(entry_id, - update_fcn, action_name, filter_fcn=filter_fcn, - xmodule_instance_args=xmodule_instance_args) + # Translators: This is a past-tense verb that is inserted into task progress messages as {action}. + action_name = ugettext_noop('rescored') + update_fcn = partial(rescore_problem_module_state, xmodule_instance_args) + + def filter_fcn(modules_to_update): + """Filter that matches problems which are marked as being done""" + return modules_to_update.filter(state__contains='"done": true') + + visit_fcn = partial(perform_module_state_update, update_fcn, filter_fcn) + return run_main_task(entry_id, visit_fcn, action_name) -@task +@task(base=BaseInstructorTask) # pylint: disable=E1102 def reset_problem_attempts(entry_id, xmodule_instance_args): """Resets problem attempts to zero for a particular problem for all students in a course. @@ -68,14 +79,14 @@ def reset_problem_attempts(entry_id, xmodule_instance_args): `xmodule_instance_args` provides information needed by _get_module_instance_for_task() to instantiate an xmodule instance. """ - action_name = 'reset' - update_fcn = reset_attempts_module_state - return update_problem_module_state(entry_id, - update_fcn, action_name, filter_fcn=None, - xmodule_instance_args=xmodule_instance_args) + # Translators: This is a past-tense verb that is inserted into task progress messages as {action}. + action_name = ugettext_noop('reset') + update_fcn = partial(reset_attempts_module_state, xmodule_instance_args) + visit_fcn = partial(perform_module_state_update, update_fcn, None) + return run_main_task(entry_id, visit_fcn, action_name) -@task +@task(base=BaseInstructorTask) # pylint: disable=E1102 def delete_problem_state(entry_id, xmodule_instance_args): """Deletes problem state entirely for all students on a particular problem in a course. @@ -90,8 +101,29 @@ def delete_problem_state(entry_id, xmodule_instance_args): `xmodule_instance_args` provides information needed by _get_module_instance_for_task() to instantiate an xmodule instance. """ - action_name = 'deleted' - update_fcn = delete_problem_module_state - return update_problem_module_state(entry_id, - update_fcn, action_name, filter_fcn=None, - xmodule_instance_args=xmodule_instance_args) + # Translators: This is a past-tense verb that is inserted into task progress messages as {action}. + action_name = ugettext_noop('deleted') + update_fcn = partial(delete_problem_module_state, xmodule_instance_args) + visit_fcn = partial(perform_module_state_update, update_fcn, None) + return run_main_task(entry_id, visit_fcn, action_name) + + +@task(base=BaseInstructorTask) # pylint: disable=E1102 +def send_bulk_course_email(entry_id, _xmodule_instance_args): + """Sends emails to recipients enrolled in a course. + + `entry_id` is the id value of the InstructorTask entry that corresponds to this task. + The entry contains the `course_id` that identifies the course, as well as the + `task_input`, which contains task-specific input. + + The task_input should be a dict with the following entries: + + 'email_id': the full URL to the problem to be rescored. (required) + + `_xmodule_instance_args` provides information needed by _get_module_instance_for_task() + to instantiate an xmodule instance. This is unused here. + """ + # Translators: This is a past-tense verb that is inserted into task progress messages as {action}. + action_name = ugettext_noop('emailed') + visit_fcn = perform_delegate_email_batches + return run_main_task(entry_id, visit_fcn, action_name) diff --git a/lms/djangoapps/instructor_task/tasks_helper.py b/lms/djangoapps/instructor_task/tasks_helper.py index de5ac1e53b..cf828edb5b 100644 --- a/lms/djangoapps/instructor_task/tasks_helper.py +++ b/lms/djangoapps/instructor_task/tasks_helper.py @@ -3,19 +3,15 @@ This file contains tasks that are designed to perform background operations on t running state of a course. """ - import json from time import time -from sys import exc_info -from traceback import format_exc -from celery import current_task +from celery import Task, current_task from celery.utils.log import get_task_logger -from celery.signals import worker_process_init from celery.states import SUCCESS, FAILURE from django.contrib.auth.models import User -from django.db import transaction +from django.db import transaction, reset_queries from dogapi import dog_stats_api from xmodule.modulestore.django import modulestore @@ -33,6 +29,96 @@ TASK_LOG = get_task_logger(__name__) # define value to use when no task_id is provided: UNKNOWN_TASK_ID = 'unknown-task_id' +# define values for update functions to use to return status to perform_module_state_update +UPDATE_STATUS_SUCCEEDED = 'succeeded' +UPDATE_STATUS_FAILED = 'failed' +UPDATE_STATUS_SKIPPED = 'skipped' + + +class BaseInstructorTask(Task): + """ + Base task class for use with InstructorTask models. + + Permits updating information about task in corresponding InstructorTask for monitoring purposes. + + Assumes that the entry_id of the InstructorTask model is the first argument to the task. + + The `entry_id` is the primary key for the InstructorTask entry representing the task. This class + updates the entry on success and failure of the task it wraps. It is setting the entry's value + for task_state based on what Celery would set it to once the task returns to Celery: + FAILURE if an exception is encountered, and SUCCESS if it returns normally. + Other arguments are pass-throughs to perform_module_state_update, and documented there. + """ + abstract = True + + def on_success(self, task_progress, task_id, args, kwargs): + """ + Update InstructorTask object corresponding to this task with info about success. + + Updates task_output and task_state. But it shouldn't actually do anything + if the task is only creating subtasks to actually do the work. + + Assumes `task_progress` is a dict containing the task's result, with the following keys: + + 'attempted': number of attempts made + 'succeeded': number of attempts that "succeeded" + 'skipped': number of attempts that "skipped" + 'failed': number of attempts that "failed" + 'total': number of possible subtasks to attempt + 'action_name': user-visible verb to use in status messages. Should be past-tense. + Pass-through of input `action_name`. + 'duration_ms': how long the task has (or had) been running. + + This is JSON-serialized and stored in the task_output column of the InstructorTask entry. + + """ + TASK_LOG.debug('Task %s: success returned with progress: %s', task_id, task_progress) + # We should be able to find the InstructorTask object to update + # based on the task_id here, without having to dig into the + # original args to the task. On the other hand, the entry_id + # is the first value passed to all such args, so we'll use that. + # And we assume that it exists, else we would already have had a failure. + entry_id = args[0] + entry = InstructorTask.objects.get(pk=entry_id) + # Check to see if any subtasks had been defined as part of this task. + # If not, then we know that we're done. (If so, let the subtasks + # handle updating task_state themselves.) + if len(entry.subtasks) == 0: + entry.task_output = InstructorTask.create_output_for_success(task_progress) + entry.task_state = SUCCESS + entry.save_now() + + def on_failure(self, exc, task_id, args, kwargs, einfo): + """ + Update InstructorTask object corresponding to this task with info about failure. + + Fetches and updates exception and traceback information on failure. + + If an exception is raised internal to the task, it is caught by celery and provided here. + The information is recorded in the InstructorTask object as a JSON-serialized dict + stored in the task_output column. It contains the following keys: + + 'exception': type of exception object + 'message': error message from exception object + 'traceback': traceback information (truncated if necessary) + + Note that there is no way to record progress made within the task (e.g. attempted, + succeeded, etc.) when such failures occur. + """ + TASK_LOG.debug('Task %s: failure returned', task_id) + entry_id = args[0] + try: + entry = InstructorTask.objects.get(pk=entry_id) + except InstructorTask.DoesNotExist: + # if the InstructorTask object does not exist, then there's no point + # trying to update it. + TASK_LOG.error("Task (%s) has no InstructorTask object for id %s", task_id, entry_id) + else: + TASK_LOG.warning("Task (%s) failed: %s %s", task_id, einfo.exception, einfo.traceback) + entry.task_output = InstructorTask.create_output_for_failure(einfo.exception, einfo.traceback) + entry.task_state = FAILURE + entry.save_now() + class UpdateProblemModuleStateError(Exception): """ @@ -45,12 +131,80 @@ class UpdateProblemModuleStateError(Exception): def _get_current_task(): - """Stub to make it easier to test without actually running Celery""" + """ + Stub to make it easier to test without actually running Celery. + + This is a wrapper around celery.current_task, which provides access + to the top of the stack of Celery's tasks. When running tests, however, + it doesn't seem to work to mock current_task directly, so this wrapper + is used to provide a hook to mock in tests, while providing the real + `current_task` in production. + """ return current_task -def _perform_module_state_update(course_id, module_state_key, student_identifier, update_fcn, action_name, filter_fcn, - xmodule_instance_args): +def run_main_task(entry_id, task_fcn, action_name): + """ + Applies the `task_fcn` to the arguments defined in `entry_id` InstructorTask. + + Arguments passed to `task_fcn` are: + + `entry_id` : the primary key for the InstructorTask entry representing the task. + `course_id` : the id for the course. + `task_input` : dict containing task-specific arguments, JSON-decoded from InstructorTask's task_input. + `action_name` : past-tense verb to use for constructing status messages. + + If no exceptions are raised, the `task_fcn` should return a dict containing + the task's result with the following keys: + + 'attempted': number of attempts made + 'succeeded': number of attempts that "succeeded" + 'skipped': number of attempts that "skipped" + 'failed': number of attempts that "failed" + 'total': number of possible subtasks to attempt + 'action_name': user-visible verb to use in status messages. + Should be past-tense. Pass-through of input `action_name`. + 'duration_ms': how long the task has (or had) been running. + + """ + + # get the InstructorTask to be updated. If this fails, then let the exception return to Celery. + # There's no point in catching it here. + entry = InstructorTask.objects.get(pk=entry_id) + + # get inputs to use in this task from the entry: + task_id = entry.task_id + course_id = entry.course_id + task_input = json.loads(entry.task_input) + + # construct log message: + fmt = 'task "{task_id}": course "{course_id}" input "{task_input}"' + task_info_string = fmt.format(task_id=task_id, course_id=course_id, task_input=task_input) + + TASK_LOG.info('Starting update (nothing %s yet): %s', action_name, task_info_string) + + # Check that the task_id submitted in the InstructorTask matches the current task + # that is running. + request_task_id = _get_current_task().request.id + if task_id != request_task_id: + fmt = 'Requested task did not match actual task "{actual_id}": {task_info}' + message = fmt.format(actual_id=request_task_id, task_info=task_info_string) + TASK_LOG.error(message) + raise ValueError(message) + + # Now do the work: + with dog_stats_api.timer('instructor_tasks.time.overall', tags=['action:{name}'.format(name=action_name)]): + task_progress = task_fcn(entry_id, course_id, task_input, action_name) + + # Release any queries that the connection has been hanging onto: + reset_queries() + + # log and exit, returning task_progress info as task result: + TASK_LOG.info('Finishing %s: final: %s', task_info_string, task_progress) + return task_progress + + +def perform_module_state_update(update_fcn, filter_fcn, _entry_id, course_id, task_input, action_name): """ Performs generic update by visiting StudentModule instances with the update_fcn provided. @@ -71,8 +225,10 @@ def _perform_module_state_update(course_id, module_state_key, student_identifier The return value is a dict containing the task's results, with the following keys: 'attempted': number of attempts made - 'updated': number of attempts that "succeeded" - 'total': number of possible subtasks to attempt + 'succeeded': number of attempts that "succeeded" + 'skipped': number of attempts that "skipped" + 'failed': number of attempts that "failed" + 'total': number of possible updates to attempt 'action_name': user-visible verb to use in status messages. Should be past-tense. Pass-through of input `action_name`. 'duration_ms': how long the task has (or had) been running. @@ -85,6 +241,9 @@ def _perform_module_state_update(course_id, module_state_key, student_identifier # get start time for task: start_time = time() + module_state_key = task_input.get('problem_url') + student_identifier = task_input.get('student') + # find the problem descriptor: module_descriptor = modulestore().get_instance(course_id, module_state_key) @@ -92,8 +251,8 @@ def _perform_module_state_update(course_id, module_state_key, student_identifier modules_to_update = StudentModule.objects.filter(course_id=course_id, module_state_key=module_state_key) - # give the option of rescoring an individual student. If not specified, - # then rescores all students who have responded to a problem so far + # give the option of updating an individual student. If not specified, + # then updates all students who have responded to a problem so far student = None if student_identifier is not None: # if an identifier is supplied, then look for the student, @@ -110,8 +269,10 @@ def _perform_module_state_update(course_id, module_state_key, student_identifier modules_to_update = filter_fcn(modules_to_update) # perform the main loop - num_updated = 0 num_attempted = 0 + num_succeeded = 0 + num_skipped = 0 + num_failed = 0 num_total = modules_to_update.count() def get_task_progress(): @@ -119,7 +280,9 @@ def _perform_module_state_update(course_id, module_state_key, student_identifier current_time = time() progress = {'action_name': action_name, 'attempted': num_attempted, - 'updated': num_updated, + 'succeeded': num_succeeded, + 'skipped': num_skipped, + 'failed': num_failed, 'total': num_total, 'duration_ms': int((current_time - start_time) * 1000), } @@ -132,10 +295,17 @@ def _perform_module_state_update(course_id, module_state_key, student_identifier # There is no try here: if there's an error, we let it throw, and the task will # be marked as FAILED, with a stack trace. with dog_stats_api.timer('instructor_tasks.module.time.step', tags=['action:{name}'.format(name=action_name)]): - if update_fcn(module_descriptor, module_to_update, xmodule_instance_args): + update_status = update_fcn(module_descriptor, module_to_update) + if update_status == UPDATE_STATUS_SUCCEEDED: # If the update_fcn returns true, then it performed some kind of work. # Logging of failures is left to the update_fcn itself. - num_updated += 1 + num_succeeded += 1 + elif update_status == UPDATE_STATUS_FAILED: + num_failed += 1 + elif update_status == UPDATE_STATUS_SKIPPED: + num_skipped += 1 + else: + raise UpdateProblemModuleStateError("Unexpected update_status returned: {}".format(update_status)) # update task status: task_progress = get_task_progress() @@ -144,103 +314,32 @@ def _perform_module_state_update(course_id, module_state_key, student_identifier return task_progress -def update_problem_module_state(entry_id, update_fcn, action_name, filter_fcn, - xmodule_instance_args): - """ - Performs generic update by visiting StudentModule instances with the update_fcn provided. - - The `entry_id` is the primary key for the InstructorTask entry representing the task. This function - updates the entry on success and failure of the _perform_module_state_update function it - wraps. It is setting the entry's value for task_state based on what Celery would set it to once - the task returns to Celery: FAILURE if an exception is encountered, and SUCCESS if it returns normally. - Other arguments are pass-throughs to _perform_module_state_update, and documented there. - - If no exceptions are raised, a dict containing the task's result is returned, with the following keys: - - 'attempted': number of attempts made - 'updated': number of attempts that "succeeded" - 'total': number of possible subtasks to attempt - 'action_name': user-visible verb to use in status messages. Should be past-tense. - Pass-through of input `action_name`. - 'duration_ms': how long the task has (or had) been running. - - Before returning, this is also JSON-serialized and stored in the task_output column of the InstructorTask entry. - - If an exception is raised internally, it is caught and recorded in the InstructorTask entry. - This is also a JSON-serialized dict, stored in the task_output column, containing the following keys: - - 'exception': type of exception object - 'message': error message from exception object - 'traceback': traceback information (truncated if necessary) - - Once the exception is caught, it is raised again and allowed to pass up to the - task-running level, so that it can also set the failure modes and capture the error trace in the - result object that Celery creates. - - """ - - # get the InstructorTask to be updated. If this fails, then let the exception return to Celery. - # There's no point in catching it here. - entry = InstructorTask.objects.get(pk=entry_id) - - # get inputs to use in this task from the entry: - task_id = entry.task_id - course_id = entry.course_id - task_input = json.loads(entry.task_input) - module_state_key = task_input.get('problem_url') - student_ident = task_input['student'] if 'student' in task_input else None - - fmt = 'Starting to update problem modules as task "{task_id}": course "{course_id}" problem "{state_key}": nothing {action} yet' - TASK_LOG.info(fmt.format(task_id=task_id, course_id=course_id, state_key=module_state_key, action=action_name)) - - # add task_id to xmodule_instance_args, so that it can be output with tracking info: - if xmodule_instance_args is not None: - xmodule_instance_args['task_id'] = task_id - - # Now that we have an entry we can try to catch failures: - task_progress = None - try: - # Check that the task_id submitted in the InstructorTask matches the current task - # that is running. - request_task_id = _get_current_task().request.id - if task_id != request_task_id: - fmt = 'Requested task "{task_id}" did not match actual task "{actual_id}"' - message = fmt.format(task_id=task_id, course_id=course_id, state_key=module_state_key, actual_id=request_task_id) - TASK_LOG.error(message) - raise UpdateProblemModuleStateError(message) - - # Now do the work: - with dog_stats_api.timer('instructor_tasks.module.time.overall', tags=['action:{name}'.format(name=action_name)]): - task_progress = _perform_module_state_update(course_id, module_state_key, student_ident, update_fcn, - action_name, filter_fcn, xmodule_instance_args) - # If we get here, we assume we've succeeded, so update the InstructorTask entry in anticipation. - # But we do this within the try, in case creating the task_output causes an exception to be - # raised. - entry.task_output = InstructorTask.create_output_for_success(task_progress) - entry.task_state = SUCCESS - entry.save_now() - - except Exception: - # try to write out the failure to the entry before failing - _, exception, traceback = exc_info() - traceback_string = format_exc(traceback) if traceback is not None else '' - TASK_LOG.warning("background task (%s) failed: %s %s", task_id, exception, traceback_string) - entry.task_output = InstructorTask.create_output_for_failure(exception, traceback_string) - entry.task_state = FAILURE - entry.save_now() - raise - - # log and exit, returning task_progress info as task result: - fmt = 'Finishing task "{task_id}": course "{course_id}" problem "{state_key}": final: {progress}' - TASK_LOG.info(fmt.format(task_id=task_id, course_id=course_id, state_key=module_state_key, progress=task_progress)) - return task_progress - - def _get_task_id_from_xmodule_args(xmodule_instance_args): """Gets task_id from `xmodule_instance_args` dict, or returns default value if missing.""" return xmodule_instance_args.get('task_id', UNKNOWN_TASK_ID) if xmodule_instance_args is not None else UNKNOWN_TASK_ID +def _get_xqueue_callback_url_prefix(xmodule_instance_args): + """Gets prefix to use when constructing xqueue_callback_url.""" + return xmodule_instance_args.get('xqueue_callback_url_prefix', '') if xmodule_instance_args is not None else '' + + +def _get_track_function_for_task(student, xmodule_instance_args=None, source_page='x_module_task'): + """ + Make a tracking function that logs what happened. + + For insertion into ModuleSystem, and used by CapaModule, which will + provide the event_type (as string) and event (as dict) as arguments. + The request_info and task_info (and page) are provided here. + """ + # get request-related tracking information from args passthrough, and supplement with task-specific + # information: + request_info = xmodule_instance_args.get('request_info', {}) if xmodule_instance_args is not None else {} + task_info = {'student': student.username, 'task_id': _get_task_id_from_xmodule_args(xmodule_instance_args)} + + return lambda event_type, event: task_track(request_info, task_info, event_type, event, page=source_page) + + def _get_module_instance_for_task(course_id, student, module_descriptor, xmodule_instance_args=None, grade_bucket_type=None): """ @@ -277,7 +376,7 @@ def _get_module_instance_for_task(course_id, student, module_descriptor, xmodule @transaction.autocommit -def rescore_problem_module_state(module_descriptor, student_module, xmodule_instance_args=None): +def rescore_problem_module_state(xmodule_instance_args, module_descriptor, student_module): ''' Takes an XModule descriptor and a corresponding StudentModule object, and performs rescoring on the student's problem submission. @@ -315,24 +414,26 @@ def rescore_problem_module_state(module_descriptor, student_module, xmodule_inst # don't consider these fatal, but false means that the individual call didn't complete: TASK_LOG.warning(u"error processing rescore call for course {course}, problem {loc} and student {student}: " "unexpected response {msg}".format(msg=result, course=course_id, loc=module_state_key, student=student)) - return False + return UPDATE_STATUS_FAILED elif result['success'] not in ['correct', 'incorrect']: TASK_LOG.warning(u"error processing rescore call for course {course}, problem {loc} and student {student}: " "{msg}".format(msg=result['success'], course=course_id, loc=module_state_key, student=student)) - return False + return UPDATE_STATUS_FAILED else: TASK_LOG.debug(u"successfully processed rescore call for course {course}, problem {loc} and student {student}: " "{msg}".format(msg=result['success'], course=course_id, loc=module_state_key, student=student)) - return True + return UPDATE_STATUS_SUCCEEDED @transaction.autocommit -def reset_attempts_module_state(_module_descriptor, student_module, xmodule_instance_args=None): +def reset_attempts_module_state(xmodule_instance_args, _module_descriptor, student_module): """ Resets problem attempts to zero for specified `student_module`. - Always returns true, indicating success, if it doesn't raise an exception due to database error. + Returns a status of UPDATE_STATUS_SUCCEEDED if a problem has non-zero attempts + that are being reset, and UPDATE_STATUS_SKIPPED otherwise. """ + update_status = UPDATE_STATUS_SKIPPED problem_state = json.loads(student_module.state) if student_module.state else {} if 'attempts' in problem_state: old_number_of_attempts = problem_state["attempts"] @@ -343,26 +444,24 @@ def reset_attempts_module_state(_module_descriptor, student_module, xmodule_inst student_module.save() # get request-related tracking information from args passthrough, # and supplement with task-specific information: - request_info = xmodule_instance_args.get('request_info', {}) if xmodule_instance_args is not None else {} - task_info = {"student": student_module.student.username, "task_id": _get_task_id_from_xmodule_args(xmodule_instance_args)} + track_function = _get_track_function_for_task(student_module.student, xmodule_instance_args) event_info = {"old_attempts": old_number_of_attempts, "new_attempts": 0} - task_track(request_info, task_info, 'problem_reset_attempts', event_info, page='x_module_task') + track_function('problem_reset_attempts', event_info) + update_status = UPDATE_STATUS_SUCCEEDED - # consider the reset to be successful, even if no update was performed. (It's just "optimized".) - return True + return update_status @transaction.autocommit -def delete_problem_module_state(_module_descriptor, student_module, xmodule_instance_args=None): +def delete_problem_module_state(xmodule_instance_args, _module_descriptor, student_module): """ Delete the StudentModule entry. - Always returns true, indicating success, if it doesn't raise an exception due to database error. + Always returns UPDATE_STATUS_SUCCEEDED, indicating success, if it doesn't raise an exception due to database error. """ student_module.delete() # get request-related tracking information from args passthrough, # and supplement with task-specific information: - request_info = xmodule_instance_args.get('request_info', {}) if xmodule_instance_args is not None else {} - task_info = {"student": student_module.student.username, "task_id": _get_task_id_from_xmodule_args(xmodule_instance_args)} - task_track(request_info, task_info, 'problem_delete_state', {}, page='x_module_task') - return True + track_function = _get_track_function_for_task(student_module.student, xmodule_instance_args) + track_function('problem_delete_state', {}) + return UPDATE_STATUS_SUCCEEDED diff --git a/lms/djangoapps/instructor_task/tests/test_api.py b/lms/djangoapps/instructor_task/tests/test_api.py index 1e40c51c4b..aa34e51872 100644 --- a/lms/djangoapps/instructor_task/tests/test_api.py +++ b/lms/djangoapps/instructor_task/tests/test_api.py @@ -6,16 +6,21 @@ from xmodule.modulestore.exceptions import ItemNotFoundError from courseware.tests.factories import UserFactory -from instructor_task.api import (get_running_instructor_tasks, - get_instructor_task_history, - submit_rescore_problem_for_all_students, - submit_rescore_problem_for_student, - submit_reset_problem_attempts_for_all_students, - submit_delete_problem_state_for_all_students) +from bulk_email.models import CourseEmail, SEND_TO_ALL +from instructor_task.api import ( + get_running_instructor_tasks, + get_instructor_task_history, + submit_rescore_problem_for_all_students, + submit_rescore_problem_for_student, + submit_reset_problem_attempts_for_all_students, + submit_delete_problem_state_for_all_students, + submit_bulk_course_email, +) from instructor_task.api_helper import AlreadyRunningError from instructor_task.models import InstructorTask, PROGRESS from instructor_task.tests.test_base import (InstructorTaskTestCase, + InstructorTaskCourseTestCase, InstructorTaskModuleTestCase, TEST_COURSE_ID) @@ -42,12 +47,28 @@ class InstructorTaskReportTest(InstructorTaskTestCase): expected_ids.append(self._create_success_entry().task_id) expected_ids.append(self._create_progress_entry().task_id) task_ids = [instructor_task.task_id for instructor_task - in get_instructor_task_history(TEST_COURSE_ID, self.problem_url)] + in get_instructor_task_history(TEST_COURSE_ID, problem_url=self.problem_url)] self.assertEquals(set(task_ids), set(expected_ids)) + # make the same call using explicit task_type: + task_ids = [instructor_task.task_id for instructor_task + in get_instructor_task_history( + TEST_COURSE_ID, + problem_url=self.problem_url, + task_type='rescore_problem' + )] + self.assertEquals(set(task_ids), set(expected_ids)) + # make the same call using a non-existent task_type: + task_ids = [instructor_task.task_id for instructor_task + in get_instructor_task_history( + TEST_COURSE_ID, + problem_url=self.problem_url, + task_type='dummy_type' + )] + self.assertEquals(set(task_ids), set()) -class InstructorTaskSubmitTest(InstructorTaskModuleTestCase): - """Tests API methods that involve the submission of background tasks.""" +class InstructorTaskModuleSubmitTest(InstructorTaskModuleTestCase): + """Tests API methods that involve the submission of module-based background tasks.""" def setUp(self): self.initialize_course() @@ -136,3 +157,29 @@ class InstructorTaskSubmitTest(InstructorTaskModuleTestCase): def test_submit_delete_all(self): self._test_submit_task(submit_delete_problem_state_for_all_students) + + +class InstructorTaskCourseSubmitTest(InstructorTaskCourseTestCase): + """Tests API methods that involve the submission of course-based background tasks.""" + + def setUp(self): + self.initialize_course() + self.student = UserFactory.create(username="student", email="student@edx.org") + self.instructor = UserFactory.create(username="instructor", email="instructor@edx.org") + + def _define_course_email(self): + """Create CourseEmail object for testing.""" + course_email = CourseEmail.create(self.course.id, self.instructor, SEND_TO_ALL, "Test Subject", "

This is a test message

") + return course_email.id # pylint: disable=E1101 + + def test_submit_bulk_email_all(self): + email_id = self._define_course_email() + instructor_task = submit_bulk_course_email(self.create_task_request(self.instructor), self.course.id, email_id) + + # test resubmitting, by updating the existing record: + instructor_task = InstructorTask.objects.get(id=instructor_task.id) # pylint: disable=E1101 + instructor_task.task_state = PROGRESS + instructor_task.save() + + with self.assertRaises(AlreadyRunningError): + instructor_task = submit_bulk_course_email(self.create_task_request(self.instructor), self.course.id, email_id) diff --git a/lms/djangoapps/instructor_task/tests/test_base.py b/lms/djangoapps/instructor_task/tests/test_base.py index 2c1fe02bd8..e0abfdf51f 100644 --- a/lms/djangoapps/instructor_task/tests/test_base.py +++ b/lms/djangoapps/instructor_task/tests/test_base.py @@ -88,7 +88,7 @@ class InstructorTaskTestCase(TestCase): def _create_progress_entry(self, student=None, task_state=PROGRESS): """Creates a InstructorTask entry representing a task in progress.""" progress = {'attempted': 3, - 'updated': 2, + 'succeeded': 2, 'total': 5, 'action_name': 'rescored', } @@ -96,10 +96,10 @@ class InstructorTaskTestCase(TestCase): @override_settings(MODULESTORE=TEST_DATA_MIXED_MODULESTORE) -class InstructorTaskModuleTestCase(LoginEnrollmentTestCase, ModuleStoreTestCase): +class InstructorTaskCourseTestCase(LoginEnrollmentTestCase, ModuleStoreTestCase): """ Base test class for InstructorTask-related tests that require - the setup of a course and problem in order to access StudentModule state. + the setup of a course. """ course = None current_user = None @@ -120,6 +120,7 @@ class InstructorTaskModuleTestCase(LoginEnrollmentTestCase, ModuleStoreTestCase) # add a sequence to the course to which the problems can be added self.problem_section = ItemFactory.create(parent_location=chapter.location, category='sequential', + metadata={'graded': True, 'format': 'Homework'}, display_name=TEST_SECTION_NAME) @staticmethod @@ -130,12 +131,12 @@ class InstructorTaskModuleTestCase(LoginEnrollmentTestCase, ModuleStoreTestCase) def login_username(self, username): """Login the user, given the `username`.""" if self.current_user != username: - self.login(InstructorTaskModuleTestCase.get_user_email(username), "test") + self.login(InstructorTaskCourseTestCase.get_user_email(username), "test") self.current_user = username def _create_user(self, username, is_staff=False): """Creates a user and enrolls them in the test course.""" - email = InstructorTaskModuleTestCase.get_user_email(username) + email = InstructorTaskCourseTestCase.get_user_email(username) thisuser = UserFactory.create(username=username, email=email, is_staff=is_staff) CourseEnrollmentFactory.create(user=thisuser, course_id=self.course.id) return thisuser @@ -148,6 +149,31 @@ class InstructorTaskModuleTestCase(LoginEnrollmentTestCase, ModuleStoreTestCase) """Creates a student for the test course.""" return self._create_user(username, is_staff=False) + @staticmethod + def get_task_status(task_id): + """Use api method to fetch task status, using mock request.""" + mock_request = Mock() + mock_request.REQUEST = {'task_id': task_id} + response = instructor_task_status(mock_request) + status = json.loads(response.content) + return status + + def create_task_request(self, requester_username): + """Generate request that can be used for submitting tasks""" + request = Mock() + request.user = User.objects.get(username=requester_username) + request.get_host = Mock(return_value="testhost") + request.META = {'REMOTE_ADDR': '0:0:0:0', 'SERVER_NAME': 'testhost'} + request.is_secure = Mock(return_value=False) + return request + + +@override_settings(MODULESTORE=TEST_DATA_MIXED_MODULESTORE) +class InstructorTaskModuleTestCase(InstructorTaskCourseTestCase): + """ + Base test class for InstructorTask-related tests that require + the setup of a course and problem in order to access StudentModule state. + """ @staticmethod def problem_location(problem_url_name): """ @@ -191,21 +217,3 @@ class InstructorTaskModuleTestCase(LoginEnrollmentTestCase, ModuleStoreTestCase) module_type=descriptor.location.category, module_state_key=descriptor.location.url(), ) - - @staticmethod - def get_task_status(task_id): - """Use api method to fetch task status, using mock request.""" - mock_request = Mock() - mock_request.REQUEST = {'task_id': task_id} - response = instructor_task_status(mock_request) - status = json.loads(response.content) - return status - - def create_task_request(self, requester_username): - """Generate request that can be used for submitting tasks""" - request = Mock() - request.user = User.objects.get(username=requester_username) - request.get_host = Mock(return_value="testhost") - request.META = {'REMOTE_ADDR': '0:0:0:0', 'SERVER_NAME': 'testhost'} - request.is_secure = Mock(return_value=False) - return request diff --git a/lms/djangoapps/instructor_task/tests/test_integration.py b/lms/djangoapps/instructor_task/tests/test_integration.py index fb8cd44169..b36467c16f 100644 --- a/lms/djangoapps/instructor_task/tests/test_integration.py +++ b/lms/djangoapps/instructor_task/tests/test_integration.py @@ -227,7 +227,7 @@ class TestRescoringTask(TestIntegrationTask): self.assertEqual(task_input['problem_url'], InstructorTaskModuleTestCase.problem_location(problem_url_name)) status = json.loads(instructor_task.task_output) self.assertEqual(status['attempted'], 1) - self.assertEqual(status['updated'], 0) + self.assertEqual(status['succeeded'], 0) self.assertEqual(status['total'], 1) def define_code_response_problem(self, problem_url_name): diff --git a/lms/djangoapps/instructor_task/tests/test_tasks.py b/lms/djangoapps/instructor_task/tests/test_tasks.py index 090c114720..5aa5dbcb80 100644 --- a/lms/djangoapps/instructor_task/tests/test_tasks.py +++ b/lms/djangoapps/instructor_task/tests/test_tasks.py @@ -7,24 +7,22 @@ paths actually work. """ import json from uuid import uuid4 -from unittest import skip -from mock import Mock, patch +from mock import Mock, MagicMock, patch from celery.states import SUCCESS, FAILURE from xmodule.modulestore.exceptions import ItemNotFoundError -from courseware.model_data import StudentModule +from courseware.models import StudentModule from courseware.tests.factories import StudentModuleFactory -from student.tests.factories import UserFactory +from student.tests.factories import UserFactory, CourseEnrollmentFactory from instructor_task.models import InstructorTask from instructor_task.tests.test_base import InstructorTaskModuleTestCase from instructor_task.tests.factories import InstructorTaskFactory from instructor_task.tasks import rescore_problem, reset_problem_attempts, delete_problem_state -from instructor_task.tasks_helper import UpdateProblemModuleStateError, update_problem_module_state - +from instructor_task.tasks_helper import UpdateProblemModuleStateError PROBLEM_URL_NAME = "test_urlname" @@ -34,20 +32,24 @@ class TestTaskFailure(Exception): class TestInstructorTasks(InstructorTaskModuleTestCase): + def setUp(self): super(InstructorTaskModuleTestCase, self).setUp() self.initialize_course() self.instructor = self.create_instructor('instructor') self.problem_url = InstructorTaskModuleTestCase.problem_location(PROBLEM_URL_NAME) - def _create_input_entry(self, student_ident=None): + def _create_input_entry(self, student_ident=None, use_problem_url=True, course_id=None): """Creates a InstructorTask entry for testing.""" task_id = str(uuid4()) - task_input = {'problem_url': self.problem_url} + task_input = {} + if use_problem_url: + task_input['problem_url'] = self.problem_url if student_ident is not None: task_input['student'] = student_ident - instructor_task = InstructorTaskFactory.create(course_id=self.course.id, + course_id = course_id or self.course.id + instructor_task = InstructorTaskFactory.create(course_id=course_id, requester=self.instructor, task_input=json.dumps(task_input), task_key='dummy value', @@ -62,7 +64,7 @@ class TestInstructorTasks(InstructorTaskModuleTestCase): 'request_info': {}, } - def _run_task_with_mock_celery(self, task_function, entry_id, task_id, expected_failure_message=None): + def _run_task_with_mock_celery(self, task_class, entry_id, task_id, expected_failure_message=None): """Submit a task and mock how celery provides a current_task.""" self.current_task = Mock() self.current_task.request = Mock() @@ -70,70 +72,52 @@ class TestInstructorTasks(InstructorTaskModuleTestCase): self.current_task.update_state = Mock() if expected_failure_message is not None: self.current_task.update_state.side_effect = TestTaskFailure(expected_failure_message) + task_args = [entry_id, self._get_xmodule_instance_args()] + with patch('instructor_task.tasks_helper._get_current_task') as mock_get_task: mock_get_task.return_value = self.current_task - return task_function(entry_id, self._get_xmodule_instance_args()) + return task_class.apply(task_args, task_id=task_id).get() - def _test_missing_current_task(self, task_function): - """Check that a task_function fails when celery doesn't provide a current_task.""" + def _test_missing_current_task(self, task_class): + """Check that a task_class fails when celery doesn't provide a current_task.""" task_entry = self._create_input_entry() - with self.assertRaises(UpdateProblemModuleStateError): - task_function(task_entry.id, self._get_xmodule_instance_args()) + with self.assertRaises(ValueError): + task_class(task_entry.id, self._get_xmodule_instance_args()) - def test_rescore_missing_current_task(self): - self._test_missing_current_task(rescore_problem) + def _test_undefined_course(self, task_class): + """Run with celery, but with no course defined.""" + task_entry = self._create_input_entry(course_id="bogus/course/id") + with self.assertRaises(ItemNotFoundError): + self._run_task_with_mock_celery(task_class, task_entry.id, task_entry.task_id) - def test_reset_missing_current_task(self): - self._test_missing_current_task(reset_problem_attempts) - - def test_delete_missing_current_task(self): - self._test_missing_current_task(delete_problem_state) - - def _test_undefined_problem(self, task_function): + def _test_undefined_problem(self, task_class): """Run with celery, but no problem defined.""" task_entry = self._create_input_entry() with self.assertRaises(ItemNotFoundError): - self._run_task_with_mock_celery(task_function, task_entry.id, task_entry.task_id) + self._run_task_with_mock_celery(task_class, task_entry.id, task_entry.task_id) - def test_rescore_undefined_problem(self): - self._test_undefined_problem(rescore_problem) - - def test_reset_undefined_problem(self): - self._test_undefined_problem(reset_problem_attempts) - - def test_delete_undefined_problem(self): - self._test_undefined_problem(delete_problem_state) - - def _test_run_with_task(self, task_function, action_name, expected_num_updated): + def _test_run_with_task(self, task_class, action_name, expected_num_succeeded, expected_num_skipped=0): """Run a task and check the number of StudentModules processed.""" task_entry = self._create_input_entry() - status = self._run_task_with_mock_celery(task_function, task_entry.id, task_entry.task_id) + status = self._run_task_with_mock_celery(task_class, task_entry.id, task_entry.task_id) # check return value - self.assertEquals(status.get('attempted'), expected_num_updated) - self.assertEquals(status.get('updated'), expected_num_updated) - self.assertEquals(status.get('total'), expected_num_updated) + self.assertEquals(status.get('attempted'), expected_num_succeeded + expected_num_skipped) + self.assertEquals(status.get('succeeded'), expected_num_succeeded) + self.assertEquals(status.get('skipped'), expected_num_skipped) + self.assertEquals(status.get('total'), expected_num_succeeded + expected_num_skipped) self.assertEquals(status.get('action_name'), action_name) - self.assertGreater('duration_ms', 0) + self.assertGreater(status.get('duration_ms'), 0) # compare with entry in table: entry = InstructorTask.objects.get(id=task_entry.id) self.assertEquals(json.loads(entry.task_output), status) self.assertEquals(entry.task_state, SUCCESS) - def _test_run_with_no_state(self, task_function, action_name): + def _test_run_with_no_state(self, task_class, action_name): """Run with no StudentModules defined for the current problem.""" self.define_option_problem(PROBLEM_URL_NAME) - self._test_run_with_task(task_function, action_name, 0) + self._test_run_with_task(task_class, action_name, 0) - def test_rescore_with_no_state(self): - self._test_run_with_no_state(rescore_problem, 'rescored') - - def test_reset_with_no_state(self): - self._test_run_with_no_state(reset_problem_attempts, 'reset') - - def test_delete_with_no_state(self): - self._test_run_with_no_state(delete_problem_state, 'deleted') - - def _create_students_with_state(self, num_students, state=None): + def _create_students_with_state(self, num_students, state=None, grade=0, max_grade=1): """Create students, a problem, and StudentModule objects for testing""" self.define_option_problem(PROBLEM_URL_NAME) students = [ @@ -141,9 +125,12 @@ class TestInstructorTasks(InstructorTaskModuleTestCase): for i in xrange(num_students) ] for student in students: + CourseEnrollmentFactory.create(course_id=self.course.id, user=student) StudentModuleFactory.create(course_id=self.course.id, module_state_key=self.problem_url, student=student, + grade=grade, + max_grade=max_grade, state=state) return students @@ -156,6 +143,185 @@ class TestInstructorTasks(InstructorTaskModuleTestCase): state = json.loads(module.state) self.assertEquals(state['attempts'], num_attempts) + def _test_run_with_failure(self, task_class, expected_message): + """Run a task and trigger an artificial failure with the given message.""" + task_entry = self._create_input_entry() + self.define_option_problem(PROBLEM_URL_NAME) + with self.assertRaises(TestTaskFailure): + self._run_task_with_mock_celery(task_class, task_entry.id, task_entry.task_id, expected_message) + # compare with entry in table: + entry = InstructorTask.objects.get(id=task_entry.id) + self.assertEquals(entry.task_state, FAILURE) + output = json.loads(entry.task_output) + self.assertEquals(output['exception'], 'TestTaskFailure') + self.assertEquals(output['message'], expected_message) + + def _test_run_with_long_error_msg(self, task_class): + """ + Run with an error message that is so long it will require + truncation (as well as the jettisoning of the traceback). + """ + task_entry = self._create_input_entry() + self.define_option_problem(PROBLEM_URL_NAME) + expected_message = "x" * 1500 + with self.assertRaises(TestTaskFailure): + self._run_task_with_mock_celery(task_class, task_entry.id, task_entry.task_id, expected_message) + # compare with entry in table: + entry = InstructorTask.objects.get(id=task_entry.id) + self.assertEquals(entry.task_state, FAILURE) + self.assertGreater(1023, len(entry.task_output)) + output = json.loads(entry.task_output) + self.assertEquals(output['exception'], 'TestTaskFailure') + self.assertEquals(output['message'], expected_message[:len(output['message']) - 3] + "...") + self.assertTrue('traceback' not in output) + + def _test_run_with_short_error_msg(self, task_class): + """ + Run with an error message that is short enough to fit + in the output, but long enough that the traceback won't. + Confirm that the traceback is truncated. + """ + task_entry = self._create_input_entry() + self.define_option_problem(PROBLEM_URL_NAME) + expected_message = "x" * 900 + with self.assertRaises(TestTaskFailure): + self._run_task_with_mock_celery(task_class, task_entry.id, task_entry.task_id, expected_message) + # compare with entry in table: + entry = InstructorTask.objects.get(id=task_entry.id) + self.assertEquals(entry.task_state, FAILURE) + self.assertGreater(1023, len(entry.task_output)) + output = json.loads(entry.task_output) + self.assertEquals(output['exception'], 'TestTaskFailure') + self.assertEquals(output['message'], expected_message) + self.assertEquals(output['traceback'][-3:], "...") + + +class TestRescoreInstructorTask(TestInstructorTasks): + """Tests problem-rescoring instructor task.""" + + def test_rescore_missing_current_task(self): + self._test_missing_current_task(rescore_problem) + + def test_rescore_undefined_course(self): + self._test_undefined_course(rescore_problem) + + def test_rescore_undefined_problem(self): + self._test_undefined_problem(rescore_problem) + + def test_rescore_with_no_state(self): + self._test_run_with_no_state(rescore_problem, 'rescored') + + def test_rescore_with_failure(self): + self._test_run_with_failure(rescore_problem, 'We expected this to fail') + + def test_rescore_with_long_error_msg(self): + self._test_run_with_long_error_msg(rescore_problem) + + def test_rescore_with_short_error_msg(self): + self._test_run_with_short_error_msg(rescore_problem) + + def test_rescoring_unrescorable(self): + input_state = json.dumps({'done': True}) + num_students = 1 + self._create_students_with_state(num_students, input_state) + task_entry = self._create_input_entry() + mock_instance = MagicMock() + del mock_instance.rescore_problem + with patch('instructor_task.tasks_helper.get_module_for_descriptor_internal') as mock_get_module: + mock_get_module.return_value = mock_instance + with self.assertRaises(UpdateProblemModuleStateError): + self._run_task_with_mock_celery(rescore_problem, task_entry.id, task_entry.task_id) + # check values stored in table: + entry = InstructorTask.objects.get(id=task_entry.id) + output = json.loads(entry.task_output) + self.assertEquals(output['exception'], "UpdateProblemModuleStateError") + self.assertEquals(output['message'], "Specified problem does not support rescoring.") + self.assertGreater(len(output['traceback']), 0) + + def test_rescoring_success(self): + input_state = json.dumps({'done': True}) + num_students = 10 + self._create_students_with_state(num_students, input_state) + task_entry = self._create_input_entry() + mock_instance = Mock() + mock_instance.rescore_problem = Mock(return_value={'success': 'correct'}) + with patch('instructor_task.tasks_helper.get_module_for_descriptor_internal') as mock_get_module: + mock_get_module.return_value = mock_instance + self._run_task_with_mock_celery(rescore_problem, task_entry.id, task_entry.task_id) + # check return value + entry = InstructorTask.objects.get(id=task_entry.id) + output = json.loads(entry.task_output) + self.assertEquals(output.get('attempted'), num_students) + self.assertEquals(output.get('succeeded'), num_students) + self.assertEquals(output.get('total'), num_students) + self.assertEquals(output.get('action_name'), 'rescored') + self.assertGreater(output.get('duration_ms'), 0) + + def test_rescoring_bad_result(self): + # Confirm that rescoring does not succeed if "success" key is not an expected value. + input_state = json.dumps({'done': True}) + num_students = 10 + self._create_students_with_state(num_students, input_state) + task_entry = self._create_input_entry() + mock_instance = Mock() + mock_instance.rescore_problem = Mock(return_value={'success': 'bogus'}) + with patch('instructor_task.tasks_helper.get_module_for_descriptor_internal') as mock_get_module: + mock_get_module.return_value = mock_instance + self._run_task_with_mock_celery(rescore_problem, task_entry.id, task_entry.task_id) + # check return value + entry = InstructorTask.objects.get(id=task_entry.id) + output = json.loads(entry.task_output) + self.assertEquals(output.get('attempted'), num_students) + self.assertEquals(output.get('succeeded'), 0) + self.assertEquals(output.get('total'), num_students) + self.assertEquals(output.get('action_name'), 'rescored') + self.assertGreater(output.get('duration_ms'), 0) + + def test_rescoring_missing_result(self): + # Confirm that rescoring does not succeed if "success" key is not returned. + input_state = json.dumps({'done': True}) + num_students = 10 + self._create_students_with_state(num_students, input_state) + task_entry = self._create_input_entry() + mock_instance = Mock() + mock_instance.rescore_problem = Mock(return_value={'bogus': 'value'}) + with patch('instructor_task.tasks_helper.get_module_for_descriptor_internal') as mock_get_module: + mock_get_module.return_value = mock_instance + self._run_task_with_mock_celery(rescore_problem, task_entry.id, task_entry.task_id) + # check return value + entry = InstructorTask.objects.get(id=task_entry.id) + output = json.loads(entry.task_output) + self.assertEquals(output.get('attempted'), num_students) + self.assertEquals(output.get('succeeded'), 0) + self.assertEquals(output.get('total'), num_students) + self.assertEquals(output.get('action_name'), 'rescored') + self.assertGreater(output.get('duration_ms'), 0) + + +class TestResetAttemptsInstructorTask(TestInstructorTasks): + """Tests instructor task that resets problem attempts.""" + + def test_reset_missing_current_task(self): + self._test_missing_current_task(reset_problem_attempts) + + def test_reset_undefined_course(self): + self._test_undefined_course(reset_problem_attempts) + + def test_reset_undefined_problem(self): + self._test_undefined_problem(reset_problem_attempts) + + def test_reset_with_no_state(self): + self._test_run_with_no_state(reset_problem_attempts, 'reset') + + def test_reset_with_failure(self): + self._test_run_with_failure(reset_problem_attempts, 'We expected this to fail') + + def test_reset_with_long_error_msg(self): + self._test_run_with_long_error_msg(reset_problem_attempts) + + def test_reset_with_short_error_msg(self): + self._test_run_with_short_error_msg(reset_problem_attempts) + def test_reset_with_some_state(self): initial_attempts = 3 input_state = json.dumps({'attempts': initial_attempts}) @@ -168,23 +334,17 @@ class TestInstructorTasks(InstructorTaskModuleTestCase): # check that entries were reset self._assert_num_attempts(students, 0) - def test_delete_with_some_state(self): - # This will create StudentModule entries -- we don't have to worry about - # the state inside them. + def test_reset_with_zero_attempts(self): + initial_attempts = 0 + input_state = json.dumps({'attempts': initial_attempts}) num_students = 10 - students = self._create_students_with_state(num_students) - # check that entries were created correctly - for student in students: - StudentModule.objects.get(course_id=self.course.id, - student=student, - module_state_key=self.problem_url) - self._test_run_with_task(delete_problem_state, 'deleted', num_students) - # confirm that no state can be found anymore: - for student in students: - with self.assertRaises(StudentModule.DoesNotExist): - StudentModule.objects.get(course_id=self.course.id, - student=student, - module_state_key=self.problem_url) + students = self._create_students_with_state(num_students, input_state) + # check that entries were set correctly + self._assert_num_attempts(students, initial_attempts) + # run the task + self._test_run_with_task(reset_problem_attempts, 'reset', 0, expected_num_skipped=num_students) + # check that entries were reset + self._assert_num_attempts(students, 0) def _test_reset_with_student(self, use_email): """Run a reset task for one student, with several StudentModules for the problem defined.""" @@ -209,10 +369,11 @@ class TestInstructorTasks(InstructorTaskModuleTestCase): status = self._run_task_with_mock_celery(reset_problem_attempts, task_entry.id, task_entry.task_id) # check return value self.assertEquals(status.get('attempted'), 1) - self.assertEquals(status.get('updated'), 1) + self.assertEquals(status.get('succeeded'), 1) self.assertEquals(status.get('total'), 1) self.assertEquals(status.get('action_name'), 'reset') - self.assertGreater('duration_ms', 0) + self.assertGreater(status.get('duration_ms'), 0) + # compare with entry in table: entry = InstructorTask.objects.get(id=task_entry.id) self.assertEquals(json.loads(entry.task_output), status) @@ -234,144 +395,45 @@ class TestInstructorTasks(InstructorTaskModuleTestCase): def test_reset_with_student_email(self): self._test_reset_with_student(True) - def _test_run_with_failure(self, task_function, expected_message): - """Run a task and trigger an artificial failure with give message.""" - task_entry = self._create_input_entry() - self.define_option_problem(PROBLEM_URL_NAME) - with self.assertRaises(TestTaskFailure): - self._run_task_with_mock_celery(task_function, task_entry.id, task_entry.task_id, expected_message) - # compare with entry in table: - entry = InstructorTask.objects.get(id=task_entry.id) - self.assertEquals(entry.task_state, FAILURE) - output = json.loads(entry.task_output) - self.assertEquals(output['exception'], 'TestTaskFailure') - self.assertEquals(output['message'], expected_message) - def test_rescore_with_failure(self): - self._test_run_with_failure(rescore_problem, 'We expected this to fail') +class TestDeleteStateInstructorTask(TestInstructorTasks): + """Tests instructor task that deletes problem state.""" - def test_reset_with_failure(self): - self._test_run_with_failure(reset_problem_attempts, 'We expected this to fail') + def test_delete_missing_current_task(self): + self._test_missing_current_task(delete_problem_state) + + def test_delete_undefined_course(self): + self._test_undefined_course(delete_problem_state) + + def test_delete_undefined_problem(self): + self._test_undefined_problem(delete_problem_state) + + def test_delete_with_no_state(self): + self._test_run_with_no_state(delete_problem_state, 'deleted') def test_delete_with_failure(self): self._test_run_with_failure(delete_problem_state, 'We expected this to fail') - def _test_run_with_long_error_msg(self, task_function): - """ - Run with an error message that is so long it will require - truncation (as well as the jettisoning of the traceback). - """ - task_entry = self._create_input_entry() - self.define_option_problem(PROBLEM_URL_NAME) - expected_message = "x" * 1500 - with self.assertRaises(TestTaskFailure): - self._run_task_with_mock_celery(task_function, task_entry.id, task_entry.task_id, expected_message) - # compare with entry in table: - entry = InstructorTask.objects.get(id=task_entry.id) - self.assertEquals(entry.task_state, FAILURE) - self.assertGreater(1023, len(entry.task_output)) - output = json.loads(entry.task_output) - self.assertEquals(output['exception'], 'TestTaskFailure') - self.assertEquals(output['message'], expected_message[:len(output['message']) - 3] + "...") - self.assertTrue('traceback' not in output) - - def test_rescore_with_long_error_msg(self): - self._test_run_with_long_error_msg(rescore_problem) - - def test_reset_with_long_error_msg(self): - self._test_run_with_long_error_msg(reset_problem_attempts) - def test_delete_with_long_error_msg(self): self._test_run_with_long_error_msg(delete_problem_state) - def _test_run_with_short_error_msg(self, task_function): - """ - Run with an error message that is short enough to fit - in the output, but long enough that the traceback won't. - Confirm that the traceback is truncated. - """ - task_entry = self._create_input_entry() - self.define_option_problem(PROBLEM_URL_NAME) - expected_message = "x" * 900 - with self.assertRaises(TestTaskFailure): - self._run_task_with_mock_celery(task_function, task_entry.id, task_entry.task_id, expected_message) - # compare with entry in table: - entry = InstructorTask.objects.get(id=task_entry.id) - self.assertEquals(entry.task_state, FAILURE) - self.assertGreater(1023, len(entry.task_output)) - output = json.loads(entry.task_output) - self.assertEquals(output['exception'], 'TestTaskFailure') - self.assertEquals(output['message'], expected_message) - self.assertEquals(output['traceback'][-3:], "...") - - def test_rescore_with_short_error_msg(self): - self._test_run_with_short_error_msg(rescore_problem) - - def test_reset_with_short_error_msg(self): - self._test_run_with_short_error_msg(reset_problem_attempts) - def test_delete_with_short_error_msg(self): self._test_run_with_short_error_msg(delete_problem_state) - def test_successful_result_too_long(self): - # while we don't expect the existing tasks to generate output that is too - # long, we can test the framework will handle such an occurrence. - task_entry = self._create_input_entry() - self.define_option_problem(PROBLEM_URL_NAME) - action_name = 'x' * 1000 - update_fcn = lambda(_module_descriptor, _student_module, _xmodule_instance_args): True - task_function = (lambda entry_id, xmodule_instance_args: - update_problem_module_state(entry_id, - update_fcn, action_name, filter_fcn=None, - xmodule_instance_args=None)) - - with self.assertRaises(ValueError): - self._run_task_with_mock_celery(task_function, task_entry.id, task_entry.task_id) - # compare with entry in table: - entry = InstructorTask.objects.get(id=task_entry.id) - self.assertEquals(entry.task_state, FAILURE) - self.assertGreater(1023, len(entry.task_output)) - output = json.loads(entry.task_output) - self.assertEquals(output['exception'], 'ValueError') - self.assertTrue("Length of task output is too long" in output['message']) - self.assertTrue('traceback' not in output) - - @skip - def test_rescoring_unrescorable(self): - # TODO: this test needs to have Mako templates initialized - # to make sure that the creation of an XModule works. - input_state = json.dumps({'done': True}) - num_students = 1 - self._create_students_with_state(num_students, input_state) - task_entry = self._create_input_entry() - with self.assertRaises(UpdateProblemModuleStateError): - self._run_task_with_mock_celery(rescore_problem, task_entry.id, task_entry.task_id) - # check values stored in table: - entry = InstructorTask.objects.get(id=task_entry.id) - output = json.loads(entry.task_output) - self.assertEquals(output['exception'], "UpdateProblemModuleStateError") - self.assertEquals(output['message'], "Specified problem does not support rescoring.") - self.assertGreater(len(output['traceback']), 0) - - @skip - def test_rescoring_success(self): - # TODO: this test needs to have Mako templates initialized - # to make sure that the creation of an XModule works. - input_state = json.dumps({'done': True}) + def test_delete_with_some_state(self): + # This will create StudentModule entries -- we don't have to worry about + # the state inside them. num_students = 10 - self._create_students_with_state(num_students, input_state) - task_entry = self._create_input_entry() - mock_instance = Mock() - mock_instance.rescore_problem = Mock({'success': 'correct'}) - # TODO: figure out why this mock is not working.... - with patch('courseware.module_render.get_module_for_descriptor_internal') as mock_get_module: - mock_get_module.return_value = mock_instance - self._run_task_with_mock_celery(rescore_problem, task_entry.id, task_entry.task_id) - # check return value - entry = InstructorTask.objects.get(id=task_entry.id) - output = json.loads(entry.task_output) - self.assertEquals(output.get('attempted'), num_students) - self.assertEquals(output.get('updated'), num_students) - self.assertEquals(output.get('total'), num_students) - self.assertEquals(output.get('action_name'), 'rescored') - self.assertGreater('duration_ms', 0) + students = self._create_students_with_state(num_students) + # check that entries were created correctly + for student in students: + StudentModule.objects.get(course_id=self.course.id, + student=student, + module_state_key=self.problem_url) + self._test_run_with_task(delete_problem_state, 'deleted', num_students) + # confirm that no state can be found anymore: + for student in students: + with self.assertRaises(StudentModule.DoesNotExist): + StudentModule.objects.get(course_id=self.course.id, + student=student, + module_state_key=self.problem_url) diff --git a/lms/djangoapps/instructor_task/tests/test_views.py b/lms/djangoapps/instructor_task/tests/test_views.py index 41de314abd..5dd1e4fd14 100644 --- a/lms/djangoapps/instructor_task/tests/test_views.py +++ b/lms/djangoapps/instructor_task/tests/test_views.py @@ -68,8 +68,10 @@ class InstructorTaskReportTest(InstructorTaskTestCase): self.assertEquals(output['task_id'], task_id) self.assertEquals(output['task_state'], FAILURE) self.assertFalse(output['in_progress']) - expected_progress = {'exception': TEST_FAILURE_EXCEPTION, - 'message': TEST_FAILURE_MESSAGE} + expected_progress = { + 'exception': TEST_FAILURE_EXCEPTION, + 'message': TEST_FAILURE_MESSAGE, + } self.assertEquals(output['task_progress'], expected_progress) def test_get_status_from_success(self): @@ -83,13 +85,70 @@ class InstructorTaskReportTest(InstructorTaskTestCase): self.assertEquals(output['task_id'], task_id) self.assertEquals(output['task_state'], SUCCESS) self.assertFalse(output['in_progress']) - expected_progress = {'attempted': 3, - 'updated': 2, - 'total': 5, - 'action_name': 'rescored'} + expected_progress = { + 'attempted': 3, + 'succeeded': 2, + 'total': 5, + 'action_name': 'rescored', + } self.assertEquals(output['task_progress'], expected_progress) - def _test_get_status_from_result(self, task_id, mock_result): + def test_get_status_from_legacy_success(self): + # get status for a task that had already succeeded, back at a time + # when 'updated' was used instead of the preferred 'succeeded'. + legacy_progress = { + 'attempted': 3, + 'updated': 2, + 'total': 5, + 'action_name': 'rescored', + } + instructor_task = self._create_entry(task_state=SUCCESS, task_output=legacy_progress) + task_id = instructor_task.task_id + response = self._get_instructor_task_status(task_id) + output = json.loads(response.content) + self.assertEquals(output['message'], "Problem rescored for 2 of 3 students (out of 5)") + self.assertEquals(output['succeeded'], False) + self.assertEquals(output['task_id'], task_id) + self.assertEquals(output['task_state'], SUCCESS) + self.assertFalse(output['in_progress']) + self.assertEquals(output['task_progress'], legacy_progress) + + def _create_email_subtask_entry(self, total=5, attempted=3, succeeded=2, skipped=0, task_state=PROGRESS): + """Create an InstructorTask with subtask defined and email argument.""" + progress = {'attempted': attempted, + 'succeeded': succeeded, + 'skipped': skipped, + 'total': total, + 'action_name': 'emailed', + } + instructor_task = self._create_entry(task_state=task_state, task_output=progress) + instructor_task.subtasks = {} + instructor_task.task_input = json.dumps({'email_id': 134}) + instructor_task.save() + return instructor_task + + def test_get_status_from_subtasks(self): + # get status for a task that is in progress, with updates + # from subtasks. + instructor_task = self._create_email_subtask_entry(skipped=1) + task_id = instructor_task.task_id + response = self._get_instructor_task_status(task_id) + output = json.loads(response.content) + self.assertEquals(output['message'], "Progress: emailed 2 of 3 so far (skipping 1) (out of 5)") + self.assertEquals(output['succeeded'], False) + self.assertEquals(output['task_id'], task_id) + self.assertEquals(output['task_state'], PROGRESS) + self.assertTrue(output['in_progress']) + expected_progress = { + 'attempted': 3, + 'succeeded': 2, + 'skipped': 1, + 'total': 5, + 'action_name': 'emailed', + } + self.assertEquals(output['task_progress'], expected_progress) + + def _test_get_status_from_result(self, task_id, mock_result=None): """ Provides mock result to caller of instructor_task_status, and returns resulting output. """ @@ -120,10 +179,12 @@ class InstructorTaskReportTest(InstructorTaskTestCase): mock_result = Mock() mock_result.task_id = task_id mock_result.state = PROGRESS - mock_result.result = {'attempted': 5, - 'updated': 4, - 'total': 10, - 'action_name': 'rescored'} + mock_result.result = { + 'attempted': 5, + 'succeeded': 4, + 'total': 10, + 'action_name': 'rescored', + } output = self._test_get_status_from_result(task_id, mock_result) self.assertEquals(output['message'], "Progress: rescored 4 of 5 so far (out of 10)") self.assertEquals(output['succeeded'], False) @@ -145,9 +206,11 @@ class InstructorTaskReportTest(InstructorTaskTestCase): self.assertEquals(output['succeeded'], False) self.assertEquals(output['task_state'], FAILURE) self.assertFalse(output['in_progress']) - expected_progress = {'exception': 'NotImplementedError', - 'message': "This task later failed.", - 'traceback': "random traceback"} + expected_progress = { + 'exception': 'NotImplementedError', + 'message': "This task later failed.", + 'traceback': "random traceback", + } self.assertEquals(output['task_progress'], expected_progress) def test_update_progress_to_revoked(self): @@ -165,7 +228,7 @@ class InstructorTaskReportTest(InstructorTaskTestCase): expected_progress = {'message': "Task revoked before running"} self.assertEquals(output['task_progress'], expected_progress) - def _get_output_for_task_success(self, attempted, updated, total, student=None): + def _get_output_for_task_success(self, attempted, succeeded, total, student=None): """returns the task_id and the result returned by instructor_task_status().""" # view task entry for task in progress instructor_task = self._create_progress_entry(student) @@ -173,23 +236,38 @@ class InstructorTaskReportTest(InstructorTaskTestCase): mock_result = Mock() mock_result.task_id = task_id mock_result.state = SUCCESS - mock_result.result = {'attempted': attempted, - 'updated': updated, - 'total': total, - 'action_name': 'rescored'} + mock_result.result = { + 'attempted': attempted, + 'succeeded': succeeded, + 'total': total, + 'action_name': 'rescored', + } output = self._test_get_status_from_result(task_id, mock_result) return output + def _get_email_output_for_task_success(self, attempted, succeeded, total, skipped=0): + """returns the result returned by instructor_task_status().""" + instructor_task = self._create_email_subtask_entry( + total=total, + attempted=attempted, + succeeded=succeeded, + skipped=skipped, + task_state=SUCCESS, + ) + return self._test_get_status_from_result(instructor_task.task_id) + def test_update_progress_to_success(self): output = self._get_output_for_task_success(10, 8, 10) self.assertEquals(output['message'], "Problem rescored for 8 of 10 students") self.assertEquals(output['succeeded'], False) self.assertEquals(output['task_state'], SUCCESS) self.assertFalse(output['in_progress']) - expected_progress = {'attempted': 10, - 'updated': 8, - 'total': 10, - 'action_name': 'rescored'} + expected_progress = { + 'attempted': 10, + 'succeeded': 8, + 'total': 10, + 'action_name': 'rescored', + } self.assertEquals(output['task_progress'], expected_progress) def test_success_messages(self): @@ -225,6 +303,47 @@ class InstructorTaskReportTest(InstructorTaskTestCase): self.assertTrue("Problem successfully rescored for student" in output['message']) self.assertTrue(output['succeeded']) + def test_email_success_messages(self): + output = self._get_email_output_for_task_success(0, 0, 10) + self.assertEqual(output['message'], "Unable to find any recipients to be emailed (out of 10)") + self.assertFalse(output['succeeded']) + + output = self._get_email_output_for_task_success(10, 0, 10) + self.assertEqual(output['message'], "Message failed to be emailed for any of 10 recipients ") + self.assertFalse(output['succeeded']) + + output = self._get_email_output_for_task_success(10, 8, 10) + self.assertEqual(output['message'], "Message emailed for 8 of 10 recipients") + self.assertFalse(output['succeeded']) + + output = self._get_email_output_for_task_success(9, 8, 10) + self.assertEqual(output['message'], "Message emailed for 8 of 9 recipients (out of 10)") + self.assertFalse(output['succeeded']) + + output = self._get_email_output_for_task_success(10, 10, 10) + self.assertEqual(output['message'], "Message successfully emailed for 10 recipients") + self.assertTrue(output['succeeded']) + + output = self._get_email_output_for_task_success(0, 0, 10, skipped=3) + self.assertEqual(output['message'], "Unable to find any recipients to be emailed (skipping 3) (out of 10)") + self.assertFalse(output['succeeded']) + + output = self._get_email_output_for_task_success(10, 0, 10, skipped=3) + self.assertEqual(output['message'], "Message failed to be emailed for any of 10 recipients (skipping 3)") + self.assertFalse(output['succeeded']) + + output = self._get_email_output_for_task_success(10, 8, 10, skipped=3) + self.assertEqual(output['message'], "Message emailed for 8 of 10 recipients (skipping 3)") + self.assertFalse(output['succeeded']) + + output = self._get_email_output_for_task_success(9, 8, 10, skipped=3) + self.assertEqual(output['message'], "Message emailed for 8 of 9 recipients (skipping 3) (out of 10)") + self.assertFalse(output['succeeded']) + + output = self._get_email_output_for_task_success(10, 10, 10, skipped=3) + self.assertEqual(output['message'], "Message successfully emailed for 10 recipients (skipping 3)") + self.assertTrue(output['succeeded']) + def test_get_info_for_queuing_task(self): # get status for a task that is still running: instructor_task = self._create_entry() @@ -262,4 +381,4 @@ class InstructorTaskReportTest(InstructorTaskTestCase): instructor_task.task_input = "{ bad" succeeded, message = get_task_completion_info(instructor_task) self.assertFalse(succeeded) - self.assertEquals(message, "Problem rescored for 2 of 3 students (out of 5)") + self.assertEquals(message, "Status: rescored 2 of 3 (out of 5)") diff --git a/lms/djangoapps/instructor_task/views.py b/lms/djangoapps/instructor_task/views.py index 40f128d08e..9a23841425 100644 --- a/lms/djangoapps/instructor_task/views.py +++ b/lms/djangoapps/instructor_task/views.py @@ -3,6 +3,7 @@ import json import logging from django.http import HttpResponse +from django.utils.translation import ugettext as _ from celery.states import FAILURE, REVOKED, READY_STATES @@ -40,7 +41,7 @@ def instructor_task_status(request): Status is returned as a JSON-serialized dict, wrapped as the content of a HTTPResponse. - The task_id can be specified to this view in one of three ways: + The task_id can be specified to this view in one of two ways: * by making a request containing 'task_id' as a parameter with a single value Returns a dict containing status information for the specified task_id @@ -65,7 +66,7 @@ def instructor_task_status(request): 'in_progress': boolean indicating if task is still running. 'task_progress': dict containing progress information. This includes: 'attempted': number of attempts made - 'updated': number of attempts that "succeeded" + 'succeeded': number of attempts that "succeeded" 'total': number of possible subtasks to attempt 'action_name': user-visible verb to use in status messages. Should be past-tense. 'duration_ms': how long the task has (or had) been running. @@ -105,68 +106,118 @@ def get_task_completion_info(instructor_task): succeeded = False if instructor_task.task_state not in STATES_WITH_STATUS: - return (succeeded, "No status information available") + return (succeeded, _("No status information available")) # we're more surprised if there is no output for a completed task, but just warn: if instructor_task.task_output is None: - log.warning("No task_output information found for instructor_task {0}".format(instructor_task.task_id)) - return (succeeded, "No status information available") + log.warning(_("No task_output information found for instructor_task {0}").format(instructor_task.task_id)) + return (succeeded, _("No status information available")) try: task_output = json.loads(instructor_task.task_output) except ValueError: - fmt = "No parsable task_output information found for instructor_task {0}: {1}" + fmt = _("No parsable task_output information found for instructor_task {0}: {1}") log.warning(fmt.format(instructor_task.task_id, instructor_task.task_output)) - return (succeeded, "No parsable status information available") + return (succeeded, _("No parsable status information available")) if instructor_task.task_state in [FAILURE, REVOKED]: - return (succeeded, task_output.get('message', 'No message provided')) + return (succeeded, task_output.get('message', _('No message provided'))) - if any([key not in task_output for key in ['action_name', 'attempted', 'updated', 'total']]): - fmt = "Invalid task_output information found for instructor_task {0}: {1}" + if any([key not in task_output for key in ['action_name', 'attempted', 'total']]): + fmt = _("Invalid task_output information found for instructor_task {0}: {1}") log.warning(fmt.format(instructor_task.task_id, instructor_task.task_output)) - return (succeeded, "No progress status information available") + return (succeeded, _("No progress status information available")) - action_name = task_output['action_name'] + action_name = _(task_output['action_name']) num_attempted = task_output['attempted'] - num_updated = task_output['updated'] num_total = task_output['total'] + # In earlier versions of this code, the key 'updated' was used instead of + # (the more general) 'succeeded'. In order to support history that may contain + # output with the old key, we check for values with both the old and the current + # key, and simply sum them. + num_succeeded = task_output.get('updated', 0) + task_output.get('succeeded', 0) + num_skipped = task_output.get('skipped', 0) + student = None + problem_url = None + email_id = None try: task_input = json.loads(instructor_task.task_input) except ValueError: - fmt = "No parsable task_input information found for instructor_task {0}: {1}" + fmt = _("No parsable task_input information found for instructor_task {0}: {1}") log.warning(fmt.format(instructor_task.task_id, instructor_task.task_input)) else: student = task_input.get('student') + problem_url = task_input.get('problem_url') + email_id = task_input.get('email_id') if instructor_task.task_state == PROGRESS: # special message for providing progress updates: - msg_format = "Progress: {action} {updated} of {attempted} so far" - elif student is not None: + # Translators: {action} is a past-tense verb that is localized separately. {attempted} and {succeeded} are counts. + msg_format = _("Progress: {action} {succeeded} of {attempted} so far") + elif student is not None and problem_url is not None: + # this reports on actions on problems for a particular student: if num_attempted == 0: - msg_format = "Unable to find submission to be {action} for student '{student}'" - elif num_updated == 0: - msg_format = "Problem failed to be {action} for student '{student}'" + # Translators: {action} is a past-tense verb that is localized separately. {student} is a student identifier. + msg_format = _("Unable to find submission to be {action} for student '{student}'") + elif num_succeeded == 0: + # Translators: {action} is a past-tense verb that is localized separately. {student} is a student identifier. + msg_format = _("Problem failed to be {action} for student '{student}'") else: succeeded = True - msg_format = "Problem successfully {action} for student '{student}'" - elif num_attempted == 0: - msg_format = "Unable to find any students with submissions to be {action}" - elif num_updated == 0: - msg_format = "Problem failed to be {action} for any of {attempted} students" - elif num_updated == num_attempted: - succeeded = True - msg_format = "Problem successfully {action} for {attempted} students" - else: # num_updated < num_attempted - msg_format = "Problem {action} for {updated} of {attempted} students" + # Translators: {action} is a past-tense verb that is localized separately. {student} is a student identifier. + msg_format = _("Problem successfully {action} for student '{student}'") + elif student is None and problem_url is not None: + # this reports on actions on problems for all students: + if num_attempted == 0: + # Translators: {action} is a past-tense verb that is localized separately. + msg_format = _("Unable to find any students with submissions to be {action}") + elif num_succeeded == 0: + # Translators: {action} is a past-tense verb that is localized separately. {attempted} is a count. + msg_format = _("Problem failed to be {action} for any of {attempted} students") + elif num_succeeded == num_attempted: + succeeded = True + # Translators: {action} is a past-tense verb that is localized separately. {attempted} is a count. + msg_format = _("Problem successfully {action} for {attempted} students") + else: # num_succeeded < num_attempted + # Translators: {action} is a past-tense verb that is localized separately. {succeeded} and {attempted} are counts. + msg_format = _("Problem {action} for {succeeded} of {attempted} students") + elif email_id is not None: + # this reports on actions on bulk emails + if num_attempted == 0: + # Translators: {action} is a past-tense verb that is localized separately. + msg_format = _("Unable to find any recipients to be {action}") + elif num_succeeded == 0: + # Translators: {action} is a past-tense verb that is localized separately. {attempted} is a count. + msg_format = _("Message failed to be {action} for any of {attempted} recipients ") + elif num_succeeded == num_attempted: + succeeded = True + # Translators: {action} is a past-tense verb that is localized separately. {attempted} is a count. + msg_format = _("Message successfully {action} for {attempted} recipients") + else: # num_succeeded < num_attempted + # Translators: {action} is a past-tense verb that is localized separately. {succeeded} and {attempted} are counts. + msg_format = _("Message {action} for {succeeded} of {attempted} recipients") + else: + # provide a default: + # Translators: {action} is a past-tense verb that is localized separately. {succeeded} and {attempted} are counts. + msg_format = _("Status: {action} {succeeded} of {attempted}") + + if num_skipped > 0: + # Translators: {skipped} is a count. This message is appended to task progress status messages. + msg_format += _(" (skipping {skipped})") if student is None and num_attempted != num_total: - msg_format += " (out of {total})" + # Translators: {total} is a count. This message is appended to task progress status messages. + msg_format += _(" (out of {total})") # Update status in task result object itself: - message = msg_format.format(action=action_name, updated=num_updated, - attempted=num_attempted, total=num_total, - student=student) + message = msg_format.format( + action=action_name, + succeeded=num_succeeded, + attempted=num_attempted, + total=num_total, + skipped=num_skipped, + student=student + ) return (succeeded, message) diff --git a/lms/envs/aws.py b/lms/envs/aws.py index 99c68c97ed..ee56c6490a 100644 --- a/lms/envs/aws.py +++ b/lms/envs/aws.py @@ -109,8 +109,6 @@ EMAIL_FILE_PATH = ENV_TOKENS.get('EMAIL_FILE_PATH', None) EMAIL_HOST = ENV_TOKENS.get('EMAIL_HOST', 'localhost') # django default is localhost EMAIL_PORT = ENV_TOKENS.get('EMAIL_PORT', 25) # django default is 25 EMAIL_USE_TLS = ENV_TOKENS.get('EMAIL_USE_TLS', False) # django default is False -EMAILS_PER_TASK = ENV_TOKENS.get('EMAILS_PER_TASK', 100) -EMAILS_PER_QUERY = ENV_TOKENS.get('EMAILS_PER_QUERY', 1000) SITE_NAME = ENV_TOKENS['SITE_NAME'] SESSION_ENGINE = ENV_TOKENS.get('SESSION_ENGINE', SESSION_ENGINE) SESSION_COOKIE_DOMAIN = ENV_TOKENS.get('SESSION_COOKIE_DOMAIN') @@ -130,10 +128,9 @@ LOG_DIR = ENV_TOKENS['LOG_DIR'] CACHES = ENV_TOKENS['CACHES'] -#Email overrides +# Email overrides DEFAULT_FROM_EMAIL = ENV_TOKENS.get('DEFAULT_FROM_EMAIL', DEFAULT_FROM_EMAIL) DEFAULT_FEEDBACK_EMAIL = ENV_TOKENS.get('DEFAULT_FEEDBACK_EMAIL', DEFAULT_FEEDBACK_EMAIL) -DEFAULT_BULK_FROM_EMAIL = ENV_TOKENS.get('DEFAULT_BULK_FROM_EMAIL', DEFAULT_BULK_FROM_EMAIL) ADMINS = ENV_TOKENS.get('ADMINS', ADMINS) SERVER_EMAIL = ENV_TOKENS.get('SERVER_EMAIL', SERVER_EMAIL) TECH_SUPPORT_EMAIL = ENV_TOKENS.get('TECH_SUPPORT_EMAIL', TECH_SUPPORT_EMAIL) @@ -143,7 +140,21 @@ PAYMENT_SUPPORT_EMAIL = ENV_TOKENS.get('PAYMENT_SUPPORT_EMAIL', PAYMENT_SUPPORT_ PAID_COURSE_REGISTRATION_CURRENCY = ENV_TOKENS.get('PAID_COURSE_REGISTRATION_CURRENCY', PAID_COURSE_REGISTRATION_CURRENCY) -#Theme overrides +# Bulk Email overrides +BULK_EMAIL_DEFAULT_FROM_EMAIL = ENV_TOKENS.get('BULK_EMAIL_DEFAULT_FROM_EMAIL', BULK_EMAIL_DEFAULT_FROM_EMAIL) +BULK_EMAIL_EMAILS_PER_TASK = ENV_TOKENS.get('BULK_EMAIL_EMAILS_PER_TASK', BULK_EMAIL_EMAILS_PER_TASK) +BULK_EMAIL_EMAILS_PER_QUERY = ENV_TOKENS.get('BULK_EMAIL_EMAILS_PER_QUERY', BULK_EMAIL_EMAILS_PER_QUERY) +BULK_EMAIL_DEFAULT_RETRY_DELAY = ENV_TOKENS.get('BULK_EMAIL_DEFAULT_RETRY_DELAY', BULK_EMAIL_DEFAULT_RETRY_DELAY) +BULK_EMAIL_MAX_RETRIES = ENV_TOKENS.get('BULK_EMAIL_MAX_RETRIES', BULK_EMAIL_MAX_RETRIES) +BULK_EMAIL_INFINITE_RETRY_CAP = ENV_TOKENS.get('BULK_EMAIL_INFINITE_RETRY_CAP', BULK_EMAIL_INFINITE_RETRY_CAP) +BULK_EMAIL_LOG_SENT_EMAILS = ENV_TOKENS.get('BULK_EMAIL_LOG_SENT_EMAILS', BULK_EMAIL_LOG_SENT_EMAILS) +BULK_EMAIL_RETRY_DELAY_BETWEEN_SENDS = ENV_TOKENS.get('BULK_EMAIL_RETRY_DELAY_BETWEEN_SENDS', BULK_EMAIL_RETRY_DELAY_BETWEEN_SENDS) +# We want Bulk Email running on the high-priority queue, so we define the +# routing key that points to it. At the moment, the name is the same. +# We have to reset the value here, since we have changed the value of the queue name. +BULK_EMAIL_ROUTING_KEY = HIGH_PRIORITY_QUEUE + +# Theme overrides THEME_NAME = ENV_TOKENS.get('THEME_NAME', None) if not THEME_NAME is None: enable_theme(THEME_NAME) @@ -152,10 +163,10 @@ if not THEME_NAME is None: # Marketing link overrides MKTG_URL_LINK_MAP.update(ENV_TOKENS.get('MKTG_URL_LINK_MAP', {})) -#Timezone overrides +# Timezone overrides TIME_ZONE = ENV_TOKENS.get('TIME_ZONE', TIME_ZONE) -#Additional installed apps +# Additional installed apps for app in ENV_TOKENS.get('ADDL_INSTALLED_APPS', []): INSTALLED_APPS += (app,) diff --git a/lms/envs/common.py b/lms/envs/common.py index 4e9c47ebf6..1f750544bd 100644 --- a/lms/envs/common.py +++ b/lms/envs/common.py @@ -114,6 +114,7 @@ MITX_FEATURES = { # analytics experiments 'ENABLE_INSTRUCTOR_ANALYTICS': False, + # bulk email available to instructors: 'ENABLE_INSTRUCTOR_EMAIL': False, # enable analytics server. @@ -340,7 +341,7 @@ TRACKING_BACKENDS = { } } -# Backawrds compatibility with ENABLE_SQL_TRACKING_LOGS feature flag. +# Backwards compatibility with ENABLE_SQL_TRACKING_LOGS feature flag. # In the future, adding the backend to TRACKING_BACKENDS enough. if MITX_FEATURES.get('ENABLE_SQL_TRACKING_LOGS'): TRACKING_BACKENDS.update({ @@ -425,12 +426,9 @@ HTTPS = 'on' ROOT_URLCONF = 'lms.urls' IGNORABLE_404_ENDS = ('favicon.ico') -# Email +# Platform Email EMAIL_BACKEND = 'django.core.mail.backends.console.EmailBackend' DEFAULT_FROM_EMAIL = 'registration@edx.org' -DEFAULT_BULK_FROM_EMAIL = 'no-reply@courseupdates.edx.org' -EMAILS_PER_TASK = 100 -EMAILS_PER_QUERY = 1000 DEFAULT_FEEDBACK_EMAIL = 'feedback@edx.org' SERVER_EMAIL = 'devops@edx.org' TECH_SUPPORT_EMAIL = 'technical@edx.org' @@ -812,6 +810,45 @@ CELERY_QUEUES = { DEFAULT_PRIORITY_QUEUE: {} } +# let logging work as configured: +CELERYD_HIJACK_ROOT_LOGGER = False + +################################ Bulk Email ################################### + +# Suffix used to construct 'from' email address for bulk emails. +# A course-specific identifier is prepended. +BULK_EMAIL_DEFAULT_FROM_EMAIL = 'no-reply@courseupdates.edx.org' + +# Parameters for breaking down course enrollment into subtasks. +BULK_EMAIL_EMAILS_PER_TASK = 100 +BULK_EMAIL_EMAILS_PER_QUERY = 1000 + +# Initial delay used for retrying tasks. Additional retries use +# longer delays. Value is in seconds. +BULK_EMAIL_DEFAULT_RETRY_DELAY = 30 + +# Maximum number of retries per task for errors that are not related +# to throttling. +BULK_EMAIL_MAX_RETRIES = 5 + +# Maximum number of retries per task for errors that are related to +# throttling. If this is not set, then there is no cap on such retries. +BULK_EMAIL_INFINITE_RETRY_CAP = 1000 + +# We want Bulk Email running on the high-priority queue, so we define the +# routing key that points to it. At the moment, the name is the same. +BULK_EMAIL_ROUTING_KEY = HIGH_PRIORITY_QUEUE + +# Flag to indicate if individual email addresses should be logged as they are sent +# a bulk email message. +BULK_EMAIL_LOG_SENT_EMAILS = False + +# Delay in seconds to sleep between individual mail messages being sent, +# when a bulk email task is retried for rate-related reasons. Choose this +# value depending on the number of workers that might be sending email in +# parallel, and what the SES rate is. +BULK_EMAIL_RETRY_DELAY_BETWEEN_SENDS = 0.02 + ################################### APPS ###################################### INSTALLED_APPS = ( # Standard ones that are always installed... diff --git a/lms/templates/courseware/instructor_dashboard.html b/lms/templates/courseware/instructor_dashboard.html index 9fdea3dae8..2d75824cf9 100644 --- a/lms/templates/courseware/instructor_dashboard.html +++ b/lms/templates/courseware/instructor_dashboard.html @@ -550,6 +550,13 @@ function goto( mode) return true; } + +

These email actions run in the background, and status for active email tasks will appear in a table below. + To see status for all bulk email tasks submitted for this course, click on this button: +

+

+ +

%endif diff --git a/requirements/edx/github.txt b/requirements/edx/github.txt index 363b3682b8..6491fc9c82 100644 --- a/requirements/edx/github.txt +++ b/requirements/edx/github.txt @@ -17,6 +17,6 @@ # Our libraries: -e git+https://github.com/edx/XBlock.git@cee38a15f#egg=XBlock -e git+https://github.com/edx/codejail.git@0a1b468#egg=codejail --e git+https://github.com/edx/diff-cover.git@v0.2.5#egg=diff_cover +-e git+https://github.com/edx/diff-cover.git@v0.2.6#egg=diff_cover -e git+https://github.com/edx/js-test-tool.git@v0.1.1#egg=js_test_tool -e git+https://github.com/edx/django-waffle.git@823a102e48#egg=django-waffle