468 lines
22 KiB
Python
468 lines
22 KiB
Python
"""
|
|
This file contains tasks that are designed to perform background operations on the
|
|
running state of a course.
|
|
|
|
"""
|
|
import json
|
|
from time import time
|
|
|
|
from celery import Task, current_task
|
|
from celery.utils.log import get_task_logger
|
|
from celery.states import SUCCESS, FAILURE
|
|
|
|
from django.contrib.auth.models import User
|
|
from django.db import transaction, reset_queries
|
|
from dogapi import dog_stats_api
|
|
|
|
from xmodule.modulestore.django import modulestore
|
|
|
|
from track.views import task_track
|
|
|
|
from courseware.models import StudentModule
|
|
from courseware.model_data import FieldDataCache
|
|
from courseware.module_render import get_module_for_descriptor_internal
|
|
from instructor_task.models import InstructorTask, PROGRESS
|
|
|
|
# define different loggers for use within tasks and on client side
|
|
TASK_LOG = get_task_logger(__name__)
|
|
|
|
# define value to use when no task_id is provided:
|
|
UNKNOWN_TASK_ID = 'unknown-task_id'
|
|
|
|
# define values for update functions to use to return status to perform_module_state_update
|
|
UPDATE_STATUS_SUCCEEDED = 'succeeded'
|
|
UPDATE_STATUS_FAILED = 'failed'
|
|
UPDATE_STATUS_SKIPPED = 'skipped'
|
|
|
|
|
|
class BaseInstructorTask(Task):
|
|
"""
|
|
Base task class for use with InstructorTask models.
|
|
|
|
Permits updating information about task in corresponding InstructorTask for monitoring purposes.
|
|
|
|
Assumes that the entry_id of the InstructorTask model is the first argument to the task.
|
|
|
|
The `entry_id` is the primary key for the InstructorTask entry representing the task. This class
|
|
updates the entry on success and failure of the task it wraps. It is setting the entry's value
|
|
for task_state based on what Celery would set it to once the task returns to Celery:
|
|
FAILURE if an exception is encountered, and SUCCESS if it returns normally.
|
|
Other arguments are pass-throughs to perform_module_state_update, and documented there.
|
|
"""
|
|
abstract = True
|
|
|
|
def on_success(self, task_progress, task_id, args, kwargs):
|
|
"""
|
|
Update InstructorTask object corresponding to this task with info about success.
|
|
|
|
Updates task_output and task_state. But it shouldn't actually do anything
|
|
if the task is only creating subtasks to actually do the work.
|
|
|
|
Assumes `task_progress` is a dict containing the task's result, with the following keys:
|
|
|
|
'attempted': number of attempts made
|
|
'succeeded': number of attempts that "succeeded"
|
|
'skipped': number of attempts that "skipped"
|
|
'failed': number of attempts that "failed"
|
|
'total': number of possible subtasks to attempt
|
|
'action_name': user-visible verb to use in status messages. Should be past-tense.
|
|
Pass-through of input `action_name`.
|
|
'duration_ms': how long the task has (or had) been running.
|
|
|
|
This is JSON-serialized and stored in the task_output column of the InstructorTask entry.
|
|
|
|
"""
|
|
TASK_LOG.debug('Task %s: success returned with progress: %s', task_id, task_progress)
|
|
# We should be able to find the InstructorTask object to update
|
|
# based on the task_id here, without having to dig into the
|
|
# original args to the task. On the other hand, the entry_id
|
|
# is the first value passed to all such args, so we'll use that.
|
|
# And we assume that it exists, else we would already have had a failure.
|
|
entry_id = args[0]
|
|
entry = InstructorTask.objects.get(pk=entry_id)
|
|
# Check to see if any subtasks had been defined as part of this task.
|
|
# If not, then we know that we're done. (If so, let the subtasks
|
|
# handle updating task_state themselves.)
|
|
if len(entry.subtasks) == 0:
|
|
entry.task_output = InstructorTask.create_output_for_success(task_progress)
|
|
entry.task_state = SUCCESS
|
|
entry.save_now()
|
|
|
|
def on_failure(self, exc, task_id, args, kwargs, einfo):
|
|
"""
|
|
Update InstructorTask object corresponding to this task with info about failure.
|
|
|
|
Fetches and updates exception and traceback information on failure.
|
|
|
|
If an exception is raised internal to the task, it is caught by celery and provided here.
|
|
The information is recorded in the InstructorTask object as a JSON-serialized dict
|
|
stored in the task_output column. It contains the following keys:
|
|
|
|
'exception': type of exception object
|
|
'message': error message from exception object
|
|
'traceback': traceback information (truncated if necessary)
|
|
|
|
Note that there is no way to record progress made within the task (e.g. attempted,
|
|
succeeded, etc.) when such failures occur.
|
|
"""
|
|
TASK_LOG.debug('Task %s: failure returned', task_id)
|
|
entry_id = args[0]
|
|
try:
|
|
entry = InstructorTask.objects.get(pk=entry_id)
|
|
except InstructorTask.DoesNotExist:
|
|
# if the InstructorTask object does not exist, then there's no point
|
|
# trying to update it.
|
|
TASK_LOG.error("Task (%s) has no InstructorTask object for id %s", task_id, entry_id)
|
|
else:
|
|
TASK_LOG.warning("Task (%s) failed: %s %s", task_id, einfo.exception, einfo.traceback)
|
|
entry.task_output = InstructorTask.create_output_for_failure(einfo.exception, einfo.traceback)
|
|
entry.task_state = FAILURE
|
|
entry.save_now()
|
|
|
|
|
|
class UpdateProblemModuleStateError(Exception):
|
|
"""
|
|
Error signaling a fatal condition while updating problem modules.
|
|
|
|
Used when the current module cannot be processed and no more
|
|
modules should be attempted.
|
|
"""
|
|
pass
|
|
|
|
|
|
def _get_current_task():
|
|
"""
|
|
Stub to make it easier to test without actually running Celery.
|
|
|
|
This is a wrapper around celery.current_task, which provides access
|
|
to the top of the stack of Celery's tasks. When running tests, however,
|
|
it doesn't seem to work to mock current_task directly, so this wrapper
|
|
is used to provide a hook to mock in tests, while providing the real
|
|
`current_task` in production.
|
|
"""
|
|
return current_task
|
|
|
|
|
|
def run_main_task(entry_id, task_fcn, action_name):
|
|
"""
|
|
Applies the `task_fcn` to the arguments defined in `entry_id` InstructorTask.
|
|
|
|
Arguments passed to `task_fcn` are:
|
|
|
|
`entry_id` : the primary key for the InstructorTask entry representing the task.
|
|
`course_id` : the id for the course.
|
|
`task_input` : dict containing task-specific arguments, JSON-decoded from InstructorTask's task_input.
|
|
`action_name` : past-tense verb to use for constructing status messages.
|
|
|
|
If no exceptions are raised, the `task_fcn` should return a dict containing
|
|
the task's result with the following keys:
|
|
|
|
'attempted': number of attempts made
|
|
'succeeded': number of attempts that "succeeded"
|
|
'skipped': number of attempts that "skipped"
|
|
'failed': number of attempts that "failed"
|
|
'total': number of possible subtasks to attempt
|
|
'action_name': user-visible verb to use in status messages.
|
|
Should be past-tense. Pass-through of input `action_name`.
|
|
'duration_ms': how long the task has (or had) been running.
|
|
|
|
"""
|
|
|
|
# get the InstructorTask to be updated. If this fails, then let the exception return to Celery.
|
|
# There's no point in catching it here.
|
|
entry = InstructorTask.objects.get(pk=entry_id)
|
|
|
|
# get inputs to use in this task from the entry:
|
|
task_id = entry.task_id
|
|
course_id = entry.course_id
|
|
task_input = json.loads(entry.task_input)
|
|
|
|
# construct log message:
|
|
fmt = 'task "{task_id}": course "{course_id}" input "{task_input}"'
|
|
task_info_string = fmt.format(task_id=task_id, course_id=course_id, task_input=task_input)
|
|
|
|
TASK_LOG.info('Starting update (nothing %s yet): %s', action_name, task_info_string)
|
|
|
|
# Check that the task_id submitted in the InstructorTask matches the current task
|
|
# that is running.
|
|
request_task_id = _get_current_task().request.id
|
|
if task_id != request_task_id:
|
|
fmt = 'Requested task did not match actual task "{actual_id}": {task_info}'
|
|
message = fmt.format(actual_id=request_task_id, task_info=task_info_string)
|
|
TASK_LOG.error(message)
|
|
raise ValueError(message)
|
|
|
|
# Now do the work:
|
|
with dog_stats_api.timer('instructor_tasks.time.overall', tags=['action:{name}'.format(name=action_name)]):
|
|
task_progress = task_fcn(entry_id, course_id, task_input, action_name)
|
|
|
|
# Release any queries that the connection has been hanging onto:
|
|
reset_queries()
|
|
|
|
# log and exit, returning task_progress info as task result:
|
|
TASK_LOG.info('Finishing %s: final: %s', task_info_string, task_progress)
|
|
return task_progress
|
|
|
|
|
|
def perform_module_state_update(update_fcn, filter_fcn, _entry_id, course_id, task_input, action_name):
|
|
"""
|
|
Performs generic update by visiting StudentModule instances with the update_fcn provided.
|
|
|
|
StudentModule instances are those that match the specified `course_id` and `module_state_key`.
|
|
If `student_identifier` is not None, it is used as an additional filter to limit the modules to those belonging
|
|
to that student. If `student_identifier` is None, performs update on modules for all students on the specified problem.
|
|
|
|
If a `filter_fcn` is not None, it is applied to the query that has been constructed. It takes one
|
|
argument, which is the query being filtered, and returns the filtered version of the query.
|
|
|
|
The `update_fcn` is called on each StudentModule that passes the resulting filtering.
|
|
It is passed three arguments: the module_descriptor for the module pointed to by the
|
|
module_state_key, the particular StudentModule to update, and the xmodule_instance_args being
|
|
passed through. If the value returned by the update function evaluates to a boolean True,
|
|
the update is successful; False indicates the update on the particular student module failed.
|
|
A raised exception indicates a fatal condition -- that no other student modules should be considered.
|
|
|
|
The return value is a dict containing the task's results, with the following keys:
|
|
|
|
'attempted': number of attempts made
|
|
'succeeded': number of attempts that "succeeded"
|
|
'skipped': number of attempts that "skipped"
|
|
'failed': number of attempts that "failed"
|
|
'total': number of possible updates to attempt
|
|
'action_name': user-visible verb to use in status messages. Should be past-tense.
|
|
Pass-through of input `action_name`.
|
|
'duration_ms': how long the task has (or had) been running.
|
|
|
|
Because this is run internal to a task, it does not catch exceptions. These are allowed to pass up to the
|
|
next level, so that it can set the failure modes and capture the error trace in the InstructorTask and the
|
|
result object.
|
|
|
|
"""
|
|
# get start time for task:
|
|
start_time = time()
|
|
|
|
module_state_key = task_input.get('problem_url')
|
|
student_identifier = task_input.get('student')
|
|
|
|
# find the problem descriptor:
|
|
module_descriptor = modulestore().get_instance(course_id, module_state_key)
|
|
|
|
# find the module in question
|
|
modules_to_update = StudentModule.objects.filter(course_id=course_id,
|
|
module_state_key=module_state_key)
|
|
|
|
# give the option of updating an individual student. If not specified,
|
|
# then updates all students who have responded to a problem so far
|
|
student = None
|
|
if student_identifier is not None:
|
|
# if an identifier is supplied, then look for the student,
|
|
# and let it throw an exception if none is found.
|
|
if "@" in student_identifier:
|
|
student = User.objects.get(email=student_identifier)
|
|
elif student_identifier is not None:
|
|
student = User.objects.get(username=student_identifier)
|
|
|
|
if student is not None:
|
|
modules_to_update = modules_to_update.filter(student_id=student.id)
|
|
|
|
if filter_fcn is not None:
|
|
modules_to_update = filter_fcn(modules_to_update)
|
|
|
|
# perform the main loop
|
|
num_attempted = 0
|
|
num_succeeded = 0
|
|
num_skipped = 0
|
|
num_failed = 0
|
|
num_total = modules_to_update.count()
|
|
|
|
def get_task_progress():
|
|
"""Return a dict containing info about current task"""
|
|
current_time = time()
|
|
progress = {'action_name': action_name,
|
|
'attempted': num_attempted,
|
|
'succeeded': num_succeeded,
|
|
'skipped': num_skipped,
|
|
'failed': num_failed,
|
|
'total': num_total,
|
|
'duration_ms': int((current_time - start_time) * 1000),
|
|
}
|
|
return progress
|
|
|
|
task_progress = get_task_progress()
|
|
_get_current_task().update_state(state=PROGRESS, meta=task_progress)
|
|
for module_to_update in modules_to_update:
|
|
num_attempted += 1
|
|
# There is no try here: if there's an error, we let it throw, and the task will
|
|
# be marked as FAILED, with a stack trace.
|
|
with dog_stats_api.timer('instructor_tasks.module.time.step', tags=['action:{name}'.format(name=action_name)]):
|
|
update_status = update_fcn(module_descriptor, module_to_update)
|
|
if update_status == UPDATE_STATUS_SUCCEEDED:
|
|
# If the update_fcn returns true, then it performed some kind of work.
|
|
# Logging of failures is left to the update_fcn itself.
|
|
num_succeeded += 1
|
|
elif update_status == UPDATE_STATUS_FAILED:
|
|
num_failed += 1
|
|
elif update_status == UPDATE_STATUS_SKIPPED:
|
|
num_skipped += 1
|
|
else:
|
|
raise UpdateProblemModuleStateError("Unexpected update_status returned: {}".format(update_status))
|
|
|
|
# update task status:
|
|
task_progress = get_task_progress()
|
|
_get_current_task().update_state(state=PROGRESS, meta=task_progress)
|
|
|
|
return task_progress
|
|
|
|
|
|
def _get_task_id_from_xmodule_args(xmodule_instance_args):
|
|
"""Gets task_id from `xmodule_instance_args` dict, or returns default value if missing."""
|
|
return xmodule_instance_args.get('task_id', UNKNOWN_TASK_ID) if xmodule_instance_args is not None else UNKNOWN_TASK_ID
|
|
|
|
|
|
def _get_xqueue_callback_url_prefix(xmodule_instance_args):
|
|
"""Gets prefix to use when constructing xqueue_callback_url."""
|
|
return xmodule_instance_args.get('xqueue_callback_url_prefix', '') if xmodule_instance_args is not None else ''
|
|
|
|
|
|
def _get_track_function_for_task(student, xmodule_instance_args=None, source_page='x_module_task'):
|
|
"""
|
|
Make a tracking function that logs what happened.
|
|
|
|
For insertion into ModuleSystem, and used by CapaModule, which will
|
|
provide the event_type (as string) and event (as dict) as arguments.
|
|
The request_info and task_info (and page) are provided here.
|
|
"""
|
|
# get request-related tracking information from args passthrough, and supplement with task-specific
|
|
# information:
|
|
request_info = xmodule_instance_args.get('request_info', {}) if xmodule_instance_args is not None else {}
|
|
task_info = {'student': student.username, 'task_id': _get_task_id_from_xmodule_args(xmodule_instance_args)}
|
|
|
|
return lambda event_type, event: task_track(request_info, task_info, event_type, event, page=source_page)
|
|
|
|
|
|
def _get_module_instance_for_task(course_id, student, module_descriptor, xmodule_instance_args=None,
|
|
grade_bucket_type=None):
|
|
"""
|
|
Fetches a StudentModule instance for a given `course_id`, `student` object, and `module_descriptor`.
|
|
|
|
`xmodule_instance_args` is used to provide information for creating a track function and an XQueue callback.
|
|
These are passed, along with `grade_bucket_type`, to get_module_for_descriptor_internal, which sidesteps
|
|
the need for a Request object when instantiating an xmodule instance.
|
|
"""
|
|
# reconstitute the problem's corresponding XModule:
|
|
field_data_cache = FieldDataCache.cache_for_descriptor_descendents(course_id, student, module_descriptor)
|
|
|
|
# get request-related tracking information from args passthrough, and supplement with task-specific
|
|
# information:
|
|
request_info = xmodule_instance_args.get('request_info', {}) if xmodule_instance_args is not None else {}
|
|
task_info = {"student": student.username, "task_id": _get_task_id_from_xmodule_args(xmodule_instance_args)}
|
|
|
|
def make_track_function():
|
|
'''
|
|
Make a tracking function that logs what happened.
|
|
|
|
For insertion into ModuleSystem, and used by CapaModule, which will
|
|
provide the event_type (as string) and event (as dict) as arguments.
|
|
The request_info and task_info (and page) are provided here.
|
|
'''
|
|
return lambda event_type, event: task_track(request_info, task_info, event_type, event, page='x_module_task')
|
|
|
|
xqueue_callback_url_prefix = xmodule_instance_args.get('xqueue_callback_url_prefix', '') \
|
|
if xmodule_instance_args is not None else ''
|
|
|
|
return get_module_for_descriptor_internal(student, module_descriptor, field_data_cache, course_id,
|
|
make_track_function(), xqueue_callback_url_prefix,
|
|
grade_bucket_type=grade_bucket_type)
|
|
|
|
|
|
@transaction.autocommit
|
|
def rescore_problem_module_state(xmodule_instance_args, module_descriptor, student_module):
|
|
'''
|
|
Takes an XModule descriptor and a corresponding StudentModule object, and
|
|
performs rescoring on the student's problem submission.
|
|
|
|
Throws exceptions if the rescoring is fatal and should be aborted if in a loop.
|
|
In particular, raises UpdateProblemModuleStateError if module fails to instantiate,
|
|
or if the module doesn't support rescoring.
|
|
|
|
Returns True if problem was successfully rescored for the given student, and False
|
|
if problem encountered some kind of error in rescoring.
|
|
'''
|
|
# unpack the StudentModule:
|
|
course_id = student_module.course_id
|
|
student = student_module.student
|
|
module_state_key = student_module.module_state_key
|
|
instance = _get_module_instance_for_task(course_id, student, module_descriptor, xmodule_instance_args, grade_bucket_type='rescore')
|
|
|
|
if instance is None:
|
|
# Either permissions just changed, or someone is trying to be clever
|
|
# and load something they shouldn't have access to.
|
|
msg = "No module {loc} for student {student}--access denied?".format(loc=module_state_key,
|
|
student=student)
|
|
TASK_LOG.debug(msg)
|
|
raise UpdateProblemModuleStateError(msg)
|
|
|
|
if not hasattr(instance, 'rescore_problem'):
|
|
# This should also not happen, since it should be already checked in the caller,
|
|
# but check here to be sure.
|
|
msg = "Specified problem does not support rescoring."
|
|
raise UpdateProblemModuleStateError(msg)
|
|
|
|
result = instance.rescore_problem()
|
|
instance.save()
|
|
if 'success' not in result:
|
|
# don't consider these fatal, but false means that the individual call didn't complete:
|
|
TASK_LOG.warning(u"error processing rescore call for course {course}, problem {loc} and student {student}: "
|
|
"unexpected response {msg}".format(msg=result, course=course_id, loc=module_state_key, student=student))
|
|
return UPDATE_STATUS_FAILED
|
|
elif result['success'] not in ['correct', 'incorrect']:
|
|
TASK_LOG.warning(u"error processing rescore call for course {course}, problem {loc} and student {student}: "
|
|
"{msg}".format(msg=result['success'], course=course_id, loc=module_state_key, student=student))
|
|
return UPDATE_STATUS_FAILED
|
|
else:
|
|
TASK_LOG.debug(u"successfully processed rescore call for course {course}, problem {loc} and student {student}: "
|
|
"{msg}".format(msg=result['success'], course=course_id, loc=module_state_key, student=student))
|
|
return UPDATE_STATUS_SUCCEEDED
|
|
|
|
|
|
@transaction.autocommit
|
|
def reset_attempts_module_state(xmodule_instance_args, _module_descriptor, student_module):
|
|
"""
|
|
Resets problem attempts to zero for specified `student_module`.
|
|
|
|
Returns a status of UPDATE_STATUS_SUCCEEDED if a problem has non-zero attempts
|
|
that are being reset, and UPDATE_STATUS_SKIPPED otherwise.
|
|
"""
|
|
update_status = UPDATE_STATUS_SKIPPED
|
|
problem_state = json.loads(student_module.state) if student_module.state else {}
|
|
if 'attempts' in problem_state:
|
|
old_number_of_attempts = problem_state["attempts"]
|
|
if old_number_of_attempts > 0:
|
|
problem_state["attempts"] = 0
|
|
# convert back to json and save
|
|
student_module.state = json.dumps(problem_state)
|
|
student_module.save()
|
|
# get request-related tracking information from args passthrough,
|
|
# and supplement with task-specific information:
|
|
track_function = _get_track_function_for_task(student_module.student, xmodule_instance_args)
|
|
event_info = {"old_attempts": old_number_of_attempts, "new_attempts": 0}
|
|
track_function('problem_reset_attempts', event_info)
|
|
update_status = UPDATE_STATUS_SUCCEEDED
|
|
|
|
return update_status
|
|
|
|
|
|
@transaction.autocommit
|
|
def delete_problem_module_state(xmodule_instance_args, _module_descriptor, student_module):
|
|
"""
|
|
Delete the StudentModule entry.
|
|
|
|
Always returns UPDATE_STATUS_SUCCEEDED, indicating success, if it doesn't raise an exception due to database error.
|
|
"""
|
|
student_module.delete()
|
|
# get request-related tracking information from args passthrough,
|
|
# and supplement with task-specific information:
|
|
track_function = _get_track_function_for_task(student_module.student, xmodule_instance_args)
|
|
track_function('problem_delete_state', {})
|
|
return UPDATE_STATUS_SUCCEEDED
|