From 29731ec40b1fe843502640c3472d5d5f342aad16 Mon Sep 17 00:00:00 2001 From: Brian Wilson Date: Fri, 29 Mar 2013 15:20:34 -0400 Subject: [PATCH 1/3] management command to remove excess input_state entries --- .../management/commands/remove_input_state.py | 129 ++++++++++++++++++ 1 file changed, 129 insertions(+) create mode 100644 lms/djangoapps/courseware/management/commands/remove_input_state.py diff --git a/lms/djangoapps/courseware/management/commands/remove_input_state.py b/lms/djangoapps/courseware/management/commands/remove_input_state.py new file mode 100644 index 0000000000..722fa90fdd --- /dev/null +++ b/lms/djangoapps/courseware/management/commands/remove_input_state.py @@ -0,0 +1,129 @@ +''' +This is a one-off command aimed at fixing a temporary problem encountered where input_state was added to +the same dict object in capa problems, so was accumulating. The fix is simply to remove input_state entry +from state for all problems in the affected date range. +''' + +import json +import logging +from optparse import make_option + +from django.core.management.base import BaseCommand +from django.db import transaction + +from courseware.models import StudentModule, StudentModuleHistory + +LOG = logging.getLogger(__name__) + + +class Command(BaseCommand): + ''' + The fix here is to remove the "input_state" entry in the StudentModule objects of any problems that + contain them. No problem is yet making use of this, and the code should do the right thing if it's + missing (by recreating an empty dict for its value). + + To narrow down the set of problems that might need fixing, the StudentModule + objects to be checked is filtered down to those: + + created < '2013-03-29 16:30:00' (the problem must have been answered before the buggy code was reverted, + on Prod and Edge) + modified > '2013-03-28 22:00:00' (the problem must have been visited after the bug was introduced + on Prod and Edge) + state like '%input_state%' (the problem must have "input_state" set). + ''' + + num_visited = 0 + num_changed = 0 + num_hist_visited = 0 + num_hist_changed = 0 + + option_list = BaseCommand.option_list + ( + make_option('--save', + action='store_true', + dest='save_changes', + default=False, + help='Persist the changes that were encountered. If not set, no changes are saved.'), ) + + def fix_studentmodules(self, save_changes): + '''Identify the list of StudentModule objects that might need fixing, and then fix each one''' + modules = StudentModule.objects.filter(modified__gt='2013-03-28 22:00:00', + created__lt='2013-03-29 16:30:00', + state__contains='input_state') + + for module in modules: + self.remove_studentmodule_input_state(module, save_changes) + + LOG.info("Finished student modules: updating {0} of {1} modules".format(self.num_changed, self.num_visited)) + + hist_modules = StudentModuleHistory.objects.filter(created__gt='2013-03-28 22:00:00', + created__lt='2013-03-29 16:30:00', + state__contains='input_state') + + for hist_module in hist_modules: + self.remove_studentmodulehistory_input_state(hist_module, save_changes) + + LOG.info("Finished student history modules: updating {0} of {1} modules".format(self.num_hist_changed, self.num_hist_visited)) + + @transaction.autocommit + def remove_studentmodule_input_state(self, module, save_changes): + ''' Fix the grade assigned to a StudentModule''' + module_state = module.state + if module_state is None: + # not likely, since we filter on it. But in general... + LOG.info("No state found for {type} module {id} for student {student} in course {course_id}" + .format(type=module.module_type, id=module.module_state_key, + student=module.student.username, course_id=module.course_id)) + return + + state_dict = json.loads(module_state) + self.num_visited += 1 + + if 'input_state' not in state_dict: + pass + elif save_changes: + # make the change and persist + del state_dict['input_state'] + module.state = json.dumps(state_dict) + module.save() + self.num_changed += 1 + else: + # don't make the change, but increment the count indicating the change would be made + self.num_changed += 1 + + @transaction.autocommit + def remove_studentmodulehistory_input_state(self, module, save_changes): + ''' Fix the grade assigned to a StudentModule''' + module_state = module.state + if module_state is None: + # not likely, since we filter on it. But in general... + LOG.info("No state found for {type} module {id} for student {student} in course {course_id}" + .format(type=module.module_type, id=module.module_state_key, + student=module.student.username, course_id=module.course_id)) + return + + state_dict = json.loads(module_state) + self.num_hist_visited += 1 + + if 'input_state' not in state_dict: + pass + elif save_changes: + # make the change and persist + del state_dict['input_state'] + module.state = json.dumps(state_dict) + module.save() + self.num_hist_changed += 1 + else: + # don't make the change, but increment the count indicating the change would be made + self.num_hist_changed += 1 + + def handle(self, **options): + '''Handle management command request''' + + save_changes = options['save_changes'] + + LOG.info("Starting run: save_changes = {0}".format(save_changes)) + + self.fix_studentmodules(save_changes) + + LOG.info("Finished run: updating {0} of {1} student modules".format(self.num_changed, self.num_visited)) + LOG.info("Finished run: updating {0} of {1} student history modules".format(self.num_hist_changed, self.num_hist_visited)) From b6f95c71ce30f74f3b62ecbf39c26f14cc825143 Mon Sep 17 00:00:00 2001 From: Brian Wilson Date: Tue, 2 Apr 2013 16:20:46 -0400 Subject: [PATCH 2/3] change command to handle file input instead --- .../management/commands/remove_input_state.py | 84 ++++++++++++++----- 1 file changed, 62 insertions(+), 22 deletions(-) diff --git a/lms/djangoapps/courseware/management/commands/remove_input_state.py b/lms/djangoapps/courseware/management/commands/remove_input_state.py index 722fa90fdd..772758a33b 100644 --- a/lms/djangoapps/courseware/management/commands/remove_input_state.py +++ b/lms/djangoapps/courseware/management/commands/remove_input_state.py @@ -8,7 +8,7 @@ import json import logging from optparse import make_option -from django.core.management.base import BaseCommand +from django.core.management.base import BaseCommand, CommandError from django.db import transaction from courseware.models import StudentModule, StudentModuleHistory @@ -30,6 +30,17 @@ class Command(BaseCommand): modified > '2013-03-28 22:00:00' (the problem must have been visited after the bug was introduced on Prod and Edge) state like '%input_state%' (the problem must have "input_state" set). + + This filtering is done on the production database replica, so that the larger select queries don't lock + the real production database. The list of id values for Student Modules is written to a file, and the + file is passed into this command. The sql file passed to mysql contains: + + select sm.id from courseware_studentmodule sm + where sm.modified > "2013-03-28 22:00:00" + and sm.created < "2013-03-29 16:30:00" + and sm.state like "%input_state%" + and sm.module_type = 'problem'; + ''' num_visited = 0 @@ -42,26 +53,54 @@ class Command(BaseCommand): action='store_true', dest='save_changes', default=False, - help='Persist the changes that were encountered. If not set, no changes are saved.'), ) + help='Persist the changes that were encountered. If not set, no changes are saved.'), + make_option('--idlist', + action='store', + dest='idlist_path', + help='Path containing id values of StudentModule objects to clean (one per line).'), + ) - def fix_studentmodules(self, save_changes): - '''Identify the list of StudentModule objects that might need fixing, and then fix each one''' - modules = StudentModule.objects.filter(modified__gt='2013-03-28 22:00:00', - created__lt='2013-03-29 16:30:00', - state__contains='input_state') +# DON'T DO THIS: +# def fix_studentmodules(self, save_changes): +# '''Identify the list of StudentModule objects that might need fixing, and then fix each one''' +# modules = StudentModule.objects.filter(modified__gt='2013-03-28 22:00:00', +# created__lt='2013-03-29 16:30:00', +# state__contains='input_state') +# +# for module in modules: +# self.remove_studentmodule_input_state(module, save_changes) +# +# LOG.info("Finished student modules: updating {0} of {1} modules".format(self.num_changed, self.num_visited)) +# +# hist_modules = StudentModuleHistory.objects.filter(created__gt='2013-03-28 22:00:00', +# created__lt='2013-03-29 16:30:00', +# state__contains='input_state') +# +# for hist_module in hist_modules: +# self.remove_studentmodulehistory_input_state(hist_module, save_changes) +# +# LOG.info("Finished student history modules: updating {0} of {1} modules".format(self.num_hist_changed, self.num_hist_visited)) - for module in modules: + def fix_studentmodules_in_list(self, save_changes, idlist_path): + '''Read in the list of StudentModule objects that might need fixing, and then fix each one''' + + # open file and read id values from it: + for line in open(idlist_path, 'r'): + student_module_id = line.strip() + if student_module_id == 'id': + continue + try: + module = StudentModule.objects.get(id=student_module_id) + except: + LOG.error("Unable to find student module with id = {0}: skipping... ".format(student_module_id)) + continue self.remove_studentmodule_input_state(module, save_changes) + hist_modules = StudentModuleHistory.objects.filter(student_module_id = student_module_id) + for hist_module in hist_modules: + self.remove_studentmodulehistory_input_state(hist_module, save_changes) + LOG.info("Finished student modules: updating {0} of {1} modules".format(self.num_changed, self.num_visited)) - - hist_modules = StudentModuleHistory.objects.filter(created__gt='2013-03-28 22:00:00', - created__lt='2013-03-29 16:30:00', - state__contains='input_state') - - for hist_module in hist_modules: - self.remove_studentmodulehistory_input_state(hist_module, save_changes) - LOG.info("Finished student history modules: updating {0} of {1} modules".format(self.num_hist_changed, self.num_hist_visited)) @transaction.autocommit @@ -116,14 +155,15 @@ class Command(BaseCommand): # don't make the change, but increment the count indicating the change would be made self.num_hist_changed += 1 - def handle(self, **options): + def handle(self, *args, **options): '''Handle management command request''' - + if len(args) != 1: + raise CommandError("missing idlist file") + idlist_path = args[0] save_changes = options['save_changes'] + LOG.info("Starting run: reading from idlist file {0}; save_changes = {1}".format(idlist_path, save_changes)) - LOG.info("Starting run: save_changes = {0}".format(save_changes)) - - self.fix_studentmodules(save_changes) - + self.fix_studentmodules_in_list(save_changes, idlist_path) + LOG.info("Finished run: updating {0} of {1} student modules".format(self.num_changed, self.num_visited)) LOG.info("Finished run: updating {0} of {1} student history modules".format(self.num_hist_changed, self.num_hist_visited)) From b11f0e17f74a94411c00f169b89f70a2a8029aa1 Mon Sep 17 00:00:00 2001 From: Brian Wilson Date: Wed, 3 Apr 2013 10:22:43 -0400 Subject: [PATCH 3/3] respond to feedback --- .../management/commands/remove_input_state.py | 58 +++++-------------- 1 file changed, 16 insertions(+), 42 deletions(-) diff --git a/lms/djangoapps/courseware/management/commands/remove_input_state.py b/lms/djangoapps/courseware/management/commands/remove_input_state.py index 772758a33b..9adabeafc9 100644 --- a/lms/djangoapps/courseware/management/commands/remove_input_state.py +++ b/lms/djangoapps/courseware/management/commands/remove_input_state.py @@ -21,7 +21,7 @@ class Command(BaseCommand): The fix here is to remove the "input_state" entry in the StudentModule objects of any problems that contain them. No problem is yet making use of this, and the code should do the right thing if it's missing (by recreating an empty dict for its value). - + To narrow down the set of problems that might need fixing, the StudentModule objects to be checked is filtered down to those: @@ -30,15 +30,15 @@ class Command(BaseCommand): modified > '2013-03-28 22:00:00' (the problem must have been visited after the bug was introduced on Prod and Edge) state like '%input_state%' (the problem must have "input_state" set). - - This filtering is done on the production database replica, so that the larger select queries don't lock + + This filtering is done on the production database replica, so that the larger select queries don't lock the real production database. The list of id values for Student Modules is written to a file, and the file is passed into this command. The sql file passed to mysql contains: - - select sm.id from courseware_studentmodule sm - where sm.modified > "2013-03-28 22:00:00" - and sm.created < "2013-03-29 16:30:00" - and sm.state like "%input_state%" + + select sm.id from courseware_studentmodule sm + where sm.modified > "2013-03-28 22:00:00" + and sm.created < "2013-03-29 16:30:00" + and sm.state like "%input_state%" and sm.module_type = 'problem'; ''' @@ -53,56 +53,29 @@ class Command(BaseCommand): action='store_true', dest='save_changes', default=False, - help='Persist the changes that were encountered. If not set, no changes are saved.'), - make_option('--idlist', - action='store', - dest='idlist_path', - help='Path containing id values of StudentModule objects to clean (one per line).'), + help='Persist the changes that were encountered. If not set, no changes are saved.'), ) -# DON'T DO THIS: -# def fix_studentmodules(self, save_changes): -# '''Identify the list of StudentModule objects that might need fixing, and then fix each one''' -# modules = StudentModule.objects.filter(modified__gt='2013-03-28 22:00:00', -# created__lt='2013-03-29 16:30:00', -# state__contains='input_state') -# -# for module in modules: -# self.remove_studentmodule_input_state(module, save_changes) -# -# LOG.info("Finished student modules: updating {0} of {1} modules".format(self.num_changed, self.num_visited)) -# -# hist_modules = StudentModuleHistory.objects.filter(created__gt='2013-03-28 22:00:00', -# created__lt='2013-03-29 16:30:00', -# state__contains='input_state') -# -# for hist_module in hist_modules: -# self.remove_studentmodulehistory_input_state(hist_module, save_changes) -# -# LOG.info("Finished student history modules: updating {0} of {1} modules".format(self.num_hist_changed, self.num_hist_visited)) - def fix_studentmodules_in_list(self, save_changes, idlist_path): '''Read in the list of StudentModule objects that might need fixing, and then fix each one''' - + # open file and read id values from it: for line in open(idlist_path, 'r'): student_module_id = line.strip() + # skip the header, if present: if student_module_id == 'id': continue try: module = StudentModule.objects.get(id=student_module_id) - except: + except StudentModule.DoesNotExist: LOG.error("Unable to find student module with id = {0}: skipping... ".format(student_module_id)) continue self.remove_studentmodule_input_state(module, save_changes) - hist_modules = StudentModuleHistory.objects.filter(student_module_id = student_module_id) + hist_modules = StudentModuleHistory.objects.filter(student_module_id=student_module_id) for hist_module in hist_modules: self.remove_studentmodulehistory_input_state(hist_module, save_changes) - LOG.info("Finished student modules: updating {0} of {1} modules".format(self.num_changed, self.num_visited)) - LOG.info("Finished student history modules: updating {0} of {1} modules".format(self.num_hist_changed, self.num_hist_visited)) - @transaction.autocommit def remove_studentmodule_input_state(self, module, save_changes): ''' Fix the grade assigned to a StudentModule''' @@ -164,6 +137,7 @@ class Command(BaseCommand): LOG.info("Starting run: reading from idlist file {0}; save_changes = {1}".format(idlist_path, save_changes)) self.fix_studentmodules_in_list(save_changes, idlist_path) - + LOG.info("Finished run: updating {0} of {1} student modules".format(self.num_changed, self.num_visited)) - LOG.info("Finished run: updating {0} of {1} student history modules".format(self.num_hist_changed, self.num_hist_visited)) + LOG.info("Finished run: updating {0} of {1} student history modules".format(self.num_hist_changed, + self.num_hist_visited))