diff --git a/common/djangoapps/microsite_configuration/management/__init__.py b/common/djangoapps/microsite_configuration/management/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/common/djangoapps/microsite_configuration/management/commands/__init__.py b/common/djangoapps/microsite_configuration/management/commands/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/common/djangoapps/microsite_configuration/management/commands/delete_historical_microsite_data.py b/common/djangoapps/microsite_configuration/management/commands/delete_historical_microsite_data.py new file mode 100644 index 0000000000..be77600ff3 --- /dev/null +++ b/common/djangoapps/microsite_configuration/management/commands/delete_historical_microsite_data.py @@ -0,0 +1,35 @@ +""" +Command to delete all rows from these tables: +microsite_configuration_historicalmicrositeorganizationmapping +microsite_configuration_historicalmicrositetemplate +""" + +import logging +from common.djangoapps.microsite_configuration.models import MicrositeOrganizationMapping, MicrositeTemplate +from openedx.core.djangoapps.util.row_delete import delete_rows, BaseDeletionCommand +log = logging.getLogger(__name__) + + +class Command(BaseDeletionCommand): + """ + Example usage: ./manage.py lms --settings=devstack delete_historical_microsite_data + """ + help = 'Deletes all historical MicrositeOrganizationMapping and MicrositeTemplate rows (in chunks).' + + def handle(self, *args, **options): + """ + Deletes rows, chunking the deletes to avoid long table/row locks. + """ + chunk_size, sleep_between = super(Command, self).handle(*args, **options) + delete_rows( + MicrositeOrganizationMapping.objects, + 'microsite_configuration_historicalmicrositeorganizationmapping', + 'history_id', + chunk_size, sleep_between + ) + delete_rows( + MicrositeTemplate.objects, + 'microsite_configuration_historicalmicrositetemplate', + 'history_id', + chunk_size, sleep_between + ) diff --git a/common/djangoapps/student/management/commands/delete_historical_enrollment_data.py b/common/djangoapps/student/management/commands/delete_historical_enrollment_data.py new file mode 100644 index 0000000000..1131b47cc3 --- /dev/null +++ b/common/djangoapps/student/management/commands/delete_historical_enrollment_data.py @@ -0,0 +1,27 @@ +""" +Command to delete all rows from the student_historicalcourseenrollment table. +""" + +import logging +from student.models import CourseEnrollment +from openedx.core.djangoapps.util.row_delete import delete_rows, BaseDeletionCommand +log = logging.getLogger(__name__) + + +class Command(BaseDeletionCommand): + """ + Example usage: ./manage.py lms --settings=devstack delete_historical_enrollment_data + """ + help = 'Deletes all historical CourseEnrollment rows (in chunks).' + + def handle(self, *args, **options): + """ + Deletes rows, chunking the deletes to avoid long table/row locks. + """ + chunk_size, sleep_between = super(Command, self).handle(*args, **options) + delete_rows( + CourseEnrollment.objects, + 'student_historicalcourseenrollment', + 'history_id', + chunk_size, sleep_between + ) diff --git a/lms/djangoapps/verify_student/management/commands/delete_historical_verify_student_data.py b/lms/djangoapps/verify_student/management/commands/delete_historical_verify_student_data.py new file mode 100644 index 0000000000..68e85fe919 --- /dev/null +++ b/lms/djangoapps/verify_student/management/commands/delete_historical_verify_student_data.py @@ -0,0 +1,27 @@ +""" +Command to delete all rows from the verify_student_historicalverificationdeadline table. +""" + +import logging +from verify_student.models import VerificationDeadline +from openedx.core.djangoapps.util.row_delete import delete_rows, BaseDeletionCommand +log = logging.getLogger(__name__) + + +class Command(BaseDeletionCommand): + """ + Example usage: ./manage.py lms --settings=devstack delete_historical_verify_student_data + """ + help = 'Deletes all historical VerificationDeadline rows (in chunks).' + + def handle(self, *args, **options): + """ + Deletes rows, chunking the deletes to avoid long table/row locks. + """ + chunk_size, sleep_between = super(Command, self).handle(*args, **options) + delete_rows( + VerificationDeadline.objects, + 'verify_student_historicalverificationdeadline', + 'history_id', + chunk_size, sleep_between + ) diff --git a/openedx/core/djangoapps/api_admin/management/__init__.py b/openedx/core/djangoapps/api_admin/management/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/openedx/core/djangoapps/api_admin/management/commands/__init__.py b/openedx/core/djangoapps/api_admin/management/commands/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/openedx/core/djangoapps/api_admin/management/commands/delete_historical_api_admin_data.py b/openedx/core/djangoapps/api_admin/management/commands/delete_historical_api_admin_data.py new file mode 100644 index 0000000000..0b6ce34845 --- /dev/null +++ b/openedx/core/djangoapps/api_admin/management/commands/delete_historical_api_admin_data.py @@ -0,0 +1,27 @@ +""" +Command to delete all rows from the api_admin_historicalapiaccessrequest table. +""" + +import logging +from openedx.core.djangoapps.api_admin.models import ApiAccessRequest +from openedx.core.djangoapps.util.row_delete import delete_rows, BaseDeletionCommand +log = logging.getLogger(__name__) + + +class Command(BaseDeletionCommand): + """ + Example usage: ./manage.py lms --settings=devstack delete_historical_api_admin_data + """ + help = 'Deletes all historical ApiAccessRequest rows (in chunks).' + + def handle(self, *args, **options): + """ + Deletes rows, chunking the deletes to avoid long table/row locks. + """ + chunk_size, sleep_between = super(Command, self).handle(*args, **options) + delete_rows( + ApiAccessRequest.objects, + 'api_admin_historicalapiaccessrequest', + 'history_id', + chunk_size, sleep_between + ) diff --git a/openedx/core/djangoapps/credit/management/__init__.py b/openedx/core/djangoapps/credit/management/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/openedx/core/djangoapps/credit/management/commands/__init__.py b/openedx/core/djangoapps/credit/management/commands/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/openedx/core/djangoapps/credit/management/commands/delete_historical_credit_data.py b/openedx/core/djangoapps/credit/management/commands/delete_historical_credit_data.py new file mode 100644 index 0000000000..d9278cc1cf --- /dev/null +++ b/openedx/core/djangoapps/credit/management/commands/delete_historical_credit_data.py @@ -0,0 +1,34 @@ +""" +Command to delete all rows from the credit_historicalcreditrequest and +credit_historicalcreditrequirementstatus tables. +""" + +import logging +from openedx.core.djangoapps.credit.models import CreditRequest, CreditRequirementStatus +from openedx.core.djangoapps.util.row_delete import delete_rows, BaseDeletionCommand +log = logging.getLogger(__name__) + + +class Command(BaseDeletionCommand): + """ + Example usage: ./manage.py lms --settings=devstack delete_historical_credit_data + """ + help = 'Deletes all historical CreditRequest and CreditRequirementStatus rows (in chunks).' + + def handle(self, *args, **options): + """ + Deletes rows, chunking the deletes to avoid long table/row locks. + """ + chunk_size, sleep_between = super(Command, self).handle(*args, **options) + delete_rows( + CreditRequest.objects, + 'credit_historicalcreditrequest', + 'history_id', + chunk_size, sleep_between + ) + delete_rows( + CreditRequirementStatus.objects, + 'credit_historicalcreditrequirementstatus', + 'history_id', + chunk_size, sleep_between + ) diff --git a/openedx/core/djangoapps/util/row_delete.py b/openedx/core/djangoapps/util/row_delete.py new file mode 100644 index 0000000000..d5b02bc313 --- /dev/null +++ b/openedx/core/djangoapps/util/row_delete.py @@ -0,0 +1,103 @@ +""" +Code to delete rows from a table within a Django mgmt command using best practices. +""" + +import logging +import time +from django.core.management.base import CommandError, BaseCommand +from django.db import transaction + +log = logging.getLogger(__name__) + + +def delete_rows(model_mgr, + table_name, + primary_id_name, + chunk_size, + sleep_between): + """ + Deletes *ALL* rows from table, chunking the deletes to avoid long table/row locks. + + Args: + model_mgr (django.db.models.manager.Manager): Django ORM mgr for the table's model. + table_name (str): Name of table from which to delete all rows. + primary_id_name (str): Name of primary ID autoincrement column from table. + chunk_size (int): Number of rows to delete in each transaction. + sleep_between (float): Number of seconds to sleep between transactions. + """ + if chunk_size <= 0: + raise CommandError('Only positive chunk size is allowed ({}).'.format(chunk_size)) + if sleep_between < 0: + raise CommandError('Only non-negative sleep between seconds is allowed ({}).'.format(sleep_between)) + + # The "as id" below fools Django raw query into thinking the primary key is being queried. + # It's necessary because Django will throw an exception if the raw SQL does not query the primary key. + min_max_ids = model_mgr.raw( + 'SELECT MIN({}) as id, MAX({}) as max_id FROM {}'.format(primary_id_name, primary_id_name, table_name) + )[0] + min_id = min_max_ids.id + max_id = min_max_ids.max_id + if not min_id or not max_id: + log.info("No data exists in table %s - skipping.", table_name) + return + log.info( + "STARTED: Deleting around %s rows with chunk size of %s and %s seconds between chunks.", + max_id - min_id + 1, chunk_size, sleep_between + ) + + lower_id = min_id + while lower_id <= max_id: + deletions_now = min(chunk_size, max_id - lower_id + 1) + upper_id = lower_id + deletions_now + log.info("Deleting around %s rows between ids %s and %s...", deletions_now, lower_id, upper_id) + with transaction.atomic(): + # xss-lint: disable=python-wrap-html + delete_sql = 'DELETE FROM {} WHERE {} >= {} AND {} < {}'.format( + table_name, primary_id_name, lower_id, primary_id_name, upper_id + ) + log.info(delete_sql) + try: + list(model_mgr.raw(delete_sql)) + except TypeError: + # The list() above is simply to get the RawQuerySet to be evaluated. + # Without evaluation, the raw DELETE SQL will *not* actually execute. + # But - it will cause a "TypeError: 'NoneType' object is not iterable" to be ignored. + pass + lower_id += deletions_now + log.info("Sleeping %s seconds...", sleep_between) + time.sleep(sleep_between) + log.info("FINISHED: Deleted at most %s rows total.", max_id - min_id + 1) + + +class BaseDeletionCommand(BaseCommand): + """ + Base command used to delete all rows from a table. + """ + # Default maximum number of rows to delete in a single transaction. + DEFAULT_CHUNK_SIZE = 10000 + + # Default seconds to sleep between chunked deletes of rows. + DEFAULT_SLEEP_BETWEEN_DELETES = 0 + + def add_arguments(self, parser): + parser.add_argument( + '--chunk_size', + default=self.DEFAULT_CHUNK_SIZE, + type=int, + help='Maximum number of rows to delete in each DB transaction. Choose this value carefully to avoid DB outages!' + ) + parser.add_argument( + '--sleep_between', + default=self.DEFAULT_SLEEP_BETWEEN_DELETES, + type=float, + help='Seconds to sleep between chunked delete of rows.' + ) + + def handle(self, *args, **options): + """ + Deletes rows, chunking the deletes to avoid long table/row locks. + """ + return ( + options.get('chunk_size', self.DEFAULT_CHUNK_SIZE), + options.get('sleep_between', self.DEFAULT_SLEEP_BETWEEN_DELETES) + )