Files
edx-platform/cms/djangoapps/contentstore/management/commands/migrate_transcripts.py
2019-12-30 12:25:38 -05:00

156 lines
6.3 KiB
Python

"""
Command to migrate transcripts to django storage.
"""
import logging
from django.core.management import BaseCommand, CommandError
from opaque_keys import InvalidKeyError
from opaque_keys.edx.keys import CourseKey
from opaque_keys.edx.locator import CourseLocator
from six.moves import map
from cms.djangoapps.contentstore.tasks import (
DEFAULT_ALL_COURSES,
DEFAULT_COMMIT,
DEFAULT_FORCE_UPDATE,
enqueue_async_migrate_transcripts_tasks
)
from openedx.core.djangoapps.video_config.models import MigrationEnqueuedCourse, TranscriptMigrationSetting
from openedx.core.lib.command_utils import get_mutually_exclusive_required_option, parse_course_keys
from xmodule.modulestore.django import modulestore
log = logging.getLogger(__name__)
class Command(BaseCommand):
"""
Example usage:
$ ./manage.py cms migrate_transcripts --all-courses --force-update --commit
$ ./manage.py cms migrate_transcripts --course-id 'Course1' --course-id 'Course2' --commit
$ ./manage.py cms migrate_transcripts --from-settings
"""
help = 'Migrates transcripts to S3 for one or more courses.'
def add_arguments(self, parser):
"""
Add arguments to the command parser.
"""
parser.add_argument(
'--course-id', '--course_id',
dest='course_ids',
action='append',
help=u'Migrates transcripts for the list of courses.'
)
parser.add_argument(
'--all-courses', '--all', '--all_courses',
dest='all_courses',
action='store_true',
default=DEFAULT_ALL_COURSES,
help=u'Migrates transcripts to the configured django storage for all courses.'
)
parser.add_argument(
'--from-settings', '--from_settings',
dest='from_settings',
help='Migrate Transcripts with settings set via django admin',
action='store_true',
default=False,
)
parser.add_argument(
'--force-update', '--force_update',
dest='force_update',
action='store_true',
default=DEFAULT_FORCE_UPDATE,
help=u'Force migrate transcripts for the requested courses, overwrite if already present.'
)
parser.add_argument(
'--commit',
dest='commit',
action='store_true',
default=DEFAULT_COMMIT,
help=u'Commits the discovered video transcripts to django storage. '
u'Without this flag, the command will return the transcripts discovered for migration.'
)
def _parse_course_key(self, raw_value):
""" Parses course key from string """
try:
result = CourseKey.from_string(raw_value)
except InvalidKeyError:
raise CommandError(u"Invalid course_key: '%s'." % raw_value)
if not isinstance(result, CourseLocator):
raise CommandError(u"Argument {0} is not a course key".format(raw_value))
return result
def _get_migration_options(self, options):
"""
Returns the command arguments configured via django admin.
"""
force_update = options['force_update']
commit = options['commit']
courses_mode = get_mutually_exclusive_required_option(options, 'course_ids', 'all_courses', 'from_settings')
if courses_mode == 'all_courses':
course_keys = [course.id for course in modulestore().get_course_summaries()]
elif courses_mode == 'course_ids':
course_keys = list(map(self._parse_course_key, options['course_ids']))
else:
migration_settings = self._latest_settings()
if migration_settings.all_courses:
all_courses = [course.id for course in modulestore().get_course_summaries()]
# Following is to avoid re-rerunning migrations for the already enqueued courses.
# Although the migrations job is idempotent, but we need to track if the transcript migration
# job was initiated for specific course(s) in order to elevate load from the workers and for
# the job to be able identify the past enqueued courses.
migrated_courses = MigrationEnqueuedCourse.objects.all().values_list('course_id', flat=True)
non_migrated_courses = [
course_key
for course_key in all_courses
if course_key not in migrated_courses
]
# Course batch to be migrated.
course_keys = non_migrated_courses[:migration_settings.batch_size]
log.info(
(u'[Transcript Migration] Courses(total): %s, '
u'Courses(migrated): %s, Courses(non-migrated): %s, '
u'Courses(migration-in-process): %s'),
len(all_courses),
len(migrated_courses),
len(non_migrated_courses),
len(course_keys),
)
else:
course_keys = parse_course_keys(migration_settings.course_ids.split())
force_update = migration_settings.force_update
commit = migration_settings.commit
return course_keys, force_update, commit
def _latest_settings(self):
"""
Return the latest version of the TranscriptMigrationSetting
"""
return TranscriptMigrationSetting.current()
def handle(self, *args, **options):
"""
Invokes the migrate transcripts enqueue function.
"""
migration_settings = self._latest_settings()
course_keys, force_update, commit = self._get_migration_options(options)
command_run = migration_settings.increment_run() if commit else -1
enqueue_async_migrate_transcripts_tasks(
course_keys=course_keys, commit=commit, command_run=command_run, force_update=force_update
)
if commit and options.get('from_settings') and migration_settings.all_courses:
for course_key in course_keys:
enqueued_course, created = MigrationEnqueuedCourse.objects.get_or_create(course_id=course_key)
if created:
enqueued_course.command_run = command_run
enqueued_course.save()