Add batching support
This commit is contained in:
@@ -14,7 +14,7 @@ from cms.djangoapps.contentstore.tasks import (
|
||||
enqueue_async_migrate_transcripts_tasks
|
||||
)
|
||||
from openedx.core.lib.command_utils import get_mutually_exclusive_required_option, parse_course_keys
|
||||
from openedx.core.djangoapps.video_config.models import TranscriptMigrationSetting
|
||||
from openedx.core.djangoapps.video_config.models import TranscriptMigrationSetting, MigrationEnqueuedCourse
|
||||
from xmodule.modulestore.django import modulestore
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
@@ -93,12 +93,36 @@ class Command(BaseCommand):
|
||||
elif courses_mode == 'course_ids':
|
||||
course_keys = map(self._parse_course_key, options['course_ids'])
|
||||
else:
|
||||
if self._latest_settings().all_courses:
|
||||
course_keys = [course.id for course in modulestore().get_course_summaries()]
|
||||
migration_settings = self._latest_settings()
|
||||
if migration_settings.all_courses:
|
||||
all_courses = [course.id for course in modulestore().get_course_summaries()]
|
||||
# Following is to avoid re-rerunning migrations for the already enqueued courses.
|
||||
# Although the migrations job is idempotent, but we need to track if the transcript migration
|
||||
# job was initiated for specific course(s) in order to elevate load from the workers and for
|
||||
# the job to be able identify the past enqueued courses.
|
||||
migrated_courses = MigrationEnqueuedCourse.objects.all().values_list('course_id', flat=True)
|
||||
non_migrated_courses = [
|
||||
course_key
|
||||
for course_key in all_courses
|
||||
if course_key not in migrated_courses
|
||||
]
|
||||
# Course batch to be migrated.
|
||||
course_keys = non_migrated_courses[:migration_settings.batch_size]
|
||||
|
||||
log.info(
|
||||
('[Transcript Migration] Courses(total): %s, '
|
||||
'Courses(migrated): %s, Courses(non-migrated): %s, '
|
||||
'Courses(migration-in-process): %s'),
|
||||
len(all_courses),
|
||||
len(migrated_courses),
|
||||
len(non_migrated_courses),
|
||||
len(course_keys),
|
||||
)
|
||||
else:
|
||||
course_keys = parse_course_keys(self._latest_settings().course_ids.split())
|
||||
force_update = self._latest_settings().force_update
|
||||
commit = self._latest_settings().commit
|
||||
course_keys = parse_course_keys(migration_settings.course_ids.split())
|
||||
|
||||
force_update = migration_settings.force_update
|
||||
commit = migration_settings.commit
|
||||
|
||||
return course_keys, force_update, commit
|
||||
|
||||
@@ -112,8 +136,15 @@ class Command(BaseCommand):
|
||||
"""
|
||||
Invokes the migrate transcripts enqueue function.
|
||||
"""
|
||||
migration_settings = self._latest_settings()
|
||||
course_keys, force_update, commit = self._get_migration_options(options)
|
||||
command_run = self._latest_settings().increment_run() if commit else -1
|
||||
command_run = migration_settings.increment_run() if commit else -1
|
||||
enqueue_async_migrate_transcripts_tasks(
|
||||
course_keys=course_keys, commit=commit, command_run=command_run, force_update=force_update
|
||||
)
|
||||
|
||||
if commit and options.get('from_settings') and migration_settings.all_courses:
|
||||
MigrationEnqueuedCourse.objects.bulk_create([
|
||||
MigrationEnqueuedCourse(course_id=course_key, command_run=command_run)
|
||||
for course_key in course_keys
|
||||
])
|
||||
|
||||
@@ -2,11 +2,19 @@
|
||||
"""
|
||||
Tests for course transcript migration management command.
|
||||
"""
|
||||
import itertools
|
||||
import logging
|
||||
from datetime import datetime
|
||||
import pytz
|
||||
|
||||
import ddt
|
||||
from django.test import TestCase
|
||||
from django.core.management import call_command, CommandError
|
||||
from mock import patch
|
||||
|
||||
from openedx.core.djangoapps.video_config.models import (
|
||||
TranscriptMigrationSetting, MigrationEnqueuedCourse
|
||||
)
|
||||
from xmodule.modulestore.django import modulestore
|
||||
from xmodule.modulestore.tests.django_utils import ModuleStoreTestCase
|
||||
from xmodule.modulestore.tests.factories import CourseFactory, ItemFactory
|
||||
@@ -68,6 +76,7 @@ class TestArgParsing(TestCase):
|
||||
call_command('migrate_transcripts', '--course-id', 'invalid-course')
|
||||
|
||||
|
||||
@ddt.ddt
|
||||
class TestMigrateTranscripts(ModuleStoreTestCase):
|
||||
"""
|
||||
Tests migrating video transcripts in courses from contentstore to django storage
|
||||
@@ -286,3 +295,36 @@ class TestMigrateTranscripts(ModuleStoreTestCase):
|
||||
logger.check(
|
||||
*expected_log
|
||||
)
|
||||
|
||||
@ddt.data(*itertools.product([1, 2], [True, False], [True, False]))
|
||||
@ddt.unpack
|
||||
@patch('contentstore.management.commands.migrate_transcripts.log')
|
||||
def test_migrate_transcripts_batch_size(self, batch_size, commit, all_courses, mock_logger):
|
||||
"""
|
||||
Test that migrations across course batches, is working as expected.
|
||||
"""
|
||||
migration_settings = TranscriptMigrationSetting.objects.create(
|
||||
batch_size=batch_size, commit=commit, all_courses=all_courses
|
||||
)
|
||||
|
||||
# Assert the number of job runs and migration enqueued courses.
|
||||
self.assertEqual(migration_settings.command_run, 0)
|
||||
self.assertEqual(MigrationEnqueuedCourse.objects.count(), 0)
|
||||
|
||||
call_command('migrate_transcripts', '--from-settings')
|
||||
|
||||
migration_settings = TranscriptMigrationSetting.current()
|
||||
# Command run is only incremented if commit=True.
|
||||
expected_command_run = 1 if commit else 0
|
||||
self.assertEqual(migration_settings.command_run, expected_command_run)
|
||||
|
||||
if all_courses:
|
||||
mock_logger.info.assert_called_with(
|
||||
('[Transcript Migration] Courses(total): %s, Courses(migrated): %s, '
|
||||
'Courses(non-migrated): %s, Courses(migration-in-process): %s'),
|
||||
2, 0, 2, batch_size
|
||||
)
|
||||
|
||||
# enqueued courses are only persisted if commit=True and job is running for all courses.
|
||||
enqueued_courses = batch_size if commit and all_courses else 0
|
||||
self.assertEqual(MigrationEnqueuedCourse.objects.count(), enqueued_courses)
|
||||
|
||||
@@ -12,7 +12,7 @@ from openedx.core.djangoapps.video_config.forms import (
|
||||
from openedx.core.djangoapps.video_config.models import (
|
||||
CourseHLSPlaybackEnabledFlag, HLSPlaybackEnabledFlag,
|
||||
CourseVideoTranscriptEnabledFlag, VideoTranscriptEnabledFlag,
|
||||
TranscriptMigrationSetting,
|
||||
TranscriptMigrationSetting, MigrationEnqueuedCourse,
|
||||
)
|
||||
|
||||
|
||||
@@ -49,9 +49,23 @@ class CourseVideoTranscriptEnabledFlagAdmin(CourseSpecificEnabledFlagBaseAdmin):
|
||||
"""
|
||||
form = CourseVideoTranscriptFlagAdminForm
|
||||
|
||||
|
||||
class MigrationEnqueuedCourseAdmin(admin.ModelAdmin):
|
||||
"""
|
||||
Simple, read-only list/search view of the Courses whose transcripts have been migrated to S3.
|
||||
"""
|
||||
list_display = [
|
||||
'course_id',
|
||||
'command_run',
|
||||
]
|
||||
|
||||
search_fields = ['course_id', 'command_run']
|
||||
|
||||
|
||||
admin.site.register(HLSPlaybackEnabledFlag, ConfigurationModelAdmin)
|
||||
admin.site.register(CourseHLSPlaybackEnabledFlag, CourseHLSPlaybackEnabledFlagAdmin)
|
||||
|
||||
admin.site.register(VideoTranscriptEnabledFlag, ConfigurationModelAdmin)
|
||||
admin.site.register(CourseVideoTranscriptEnabledFlag, CourseVideoTranscriptEnabledFlagAdmin)
|
||||
admin.site.register(TranscriptMigrationSetting, ConfigurationModelAdmin)
|
||||
admin.site.register(MigrationEnqueuedCourse, MigrationEnqueuedCourseAdmin)
|
||||
|
||||
@@ -0,0 +1,35 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# Generated by Django 1.11.14 on 2018-07-19 07:52
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from django.db import migrations, models
|
||||
import django.utils.timezone
|
||||
import model_utils.fields
|
||||
import opaque_keys.edx.django.models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('video_config', '0004_transcriptmigrationsetting_command_run'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name='MigrationEnqueuedCourse',
|
||||
fields=[
|
||||
('created', model_utils.fields.AutoCreatedField(default=django.utils.timezone.now, editable=False, verbose_name='created')),
|
||||
('modified', model_utils.fields.AutoLastModifiedField(default=django.utils.timezone.now, editable=False, verbose_name='modified')),
|
||||
('course_id', opaque_keys.edx.django.models.CourseKeyField(db_index=True, max_length=255, primary_key=True, serialize=False)),
|
||||
('command_run', models.PositiveIntegerField(default=0)),
|
||||
],
|
||||
options={
|
||||
'abstract': False,
|
||||
},
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='transcriptmigrationsetting',
|
||||
name='batch_size',
|
||||
field=models.PositiveIntegerField(default=0),
|
||||
),
|
||||
]
|
||||
@@ -1,8 +1,10 @@
|
||||
"""
|
||||
Configuration models for Video XModule
|
||||
"""
|
||||
from config_models.models import ConfigurationModel
|
||||
from django.db.models import BooleanField, TextField, PositiveIntegerField
|
||||
|
||||
from config_models.models import ConfigurationModel
|
||||
from model_utils.models import TimeStampedModel
|
||||
from opaque_keys.edx.django.models import CourseKeyField
|
||||
|
||||
|
||||
@@ -150,6 +152,7 @@ class TranscriptMigrationSetting(ConfigurationModel):
|
||||
help_text="Flag to force migrate transcripts for the requested courses, overwrite if already present."
|
||||
)
|
||||
command_run = PositiveIntegerField(default=0)
|
||||
batch_size = PositiveIntegerField(default=0)
|
||||
commit = BooleanField(
|
||||
default=False,
|
||||
help_text="Dry-run or commit."
|
||||
@@ -170,3 +173,16 @@ class TranscriptMigrationSetting(ConfigurationModel):
|
||||
self.command_run += 1
|
||||
self.save()
|
||||
return self.command_run
|
||||
|
||||
|
||||
class MigrationEnqueuedCourse(TimeStampedModel):
|
||||
"""
|
||||
Temporary model to persist the course IDs who has been enqueued for transcripts migration to S3.
|
||||
"""
|
||||
course_id = CourseKeyField(db_index=True, primary_key=True, max_length=255)
|
||||
command_run = PositiveIntegerField(default=0)
|
||||
|
||||
def __unicode__(self):
|
||||
return u'MigrationEnqueuedCourse: ID={course_id}, Run={command_run}'.format(
|
||||
course_id=self.course_id, command_run=self.command_run
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user