diff --git a/cms/djangoapps/contentstore/management/commands/migrate_transcripts.py b/cms/djangoapps/contentstore/management/commands/migrate_transcripts.py index b8f89e212f..52f885e677 100644 --- a/cms/djangoapps/contentstore/management/commands/migrate_transcripts.py +++ b/cms/djangoapps/contentstore/management/commands/migrate_transcripts.py @@ -14,7 +14,7 @@ from cms.djangoapps.contentstore.tasks import ( enqueue_async_migrate_transcripts_tasks ) from openedx.core.lib.command_utils import get_mutually_exclusive_required_option, parse_course_keys -from openedx.core.djangoapps.video_config.models import TranscriptMigrationSetting +from openedx.core.djangoapps.video_config.models import TranscriptMigrationSetting, MigrationEnqueuedCourse from xmodule.modulestore.django import modulestore log = logging.getLogger(__name__) @@ -93,12 +93,36 @@ class Command(BaseCommand): elif courses_mode == 'course_ids': course_keys = map(self._parse_course_key, options['course_ids']) else: - if self._latest_settings().all_courses: - course_keys = [course.id for course in modulestore().get_course_summaries()] + migration_settings = self._latest_settings() + if migration_settings.all_courses: + all_courses = [course.id for course in modulestore().get_course_summaries()] + # Following is to avoid re-rerunning migrations for the already enqueued courses. + # Although the migrations job is idempotent, but we need to track if the transcript migration + # job was initiated for specific course(s) in order to elevate load from the workers and for + # the job to be able identify the past enqueued courses. + migrated_courses = MigrationEnqueuedCourse.objects.all().values_list('course_id', flat=True) + non_migrated_courses = [ + course_key + for course_key in all_courses + if course_key not in migrated_courses + ] + # Course batch to be migrated. + course_keys = non_migrated_courses[:migration_settings.batch_size] + + log.info( + ('[Transcript Migration] Courses(total): %s, ' + 'Courses(migrated): %s, Courses(non-migrated): %s, ' + 'Courses(migration-in-process): %s'), + len(all_courses), + len(migrated_courses), + len(non_migrated_courses), + len(course_keys), + ) else: - course_keys = parse_course_keys(self._latest_settings().course_ids.split()) - force_update = self._latest_settings().force_update - commit = self._latest_settings().commit + course_keys = parse_course_keys(migration_settings.course_ids.split()) + + force_update = migration_settings.force_update + commit = migration_settings.commit return course_keys, force_update, commit @@ -112,8 +136,15 @@ class Command(BaseCommand): """ Invokes the migrate transcripts enqueue function. """ + migration_settings = self._latest_settings() course_keys, force_update, commit = self._get_migration_options(options) - command_run = self._latest_settings().increment_run() if commit else -1 + command_run = migration_settings.increment_run() if commit else -1 enqueue_async_migrate_transcripts_tasks( course_keys=course_keys, commit=commit, command_run=command_run, force_update=force_update ) + + if commit and options.get('from_settings') and migration_settings.all_courses: + MigrationEnqueuedCourse.objects.bulk_create([ + MigrationEnqueuedCourse(course_id=course_key, command_run=command_run) + for course_key in course_keys + ]) diff --git a/cms/djangoapps/contentstore/management/commands/tests/test_migrate_transcripts.py b/cms/djangoapps/contentstore/management/commands/tests/test_migrate_transcripts.py index 5494d2cdb9..baae387dc1 100644 --- a/cms/djangoapps/contentstore/management/commands/tests/test_migrate_transcripts.py +++ b/cms/djangoapps/contentstore/management/commands/tests/test_migrate_transcripts.py @@ -2,11 +2,19 @@ """ Tests for course transcript migration management command. """ +import itertools import logging from datetime import datetime import pytz + +import ddt from django.test import TestCase from django.core.management import call_command, CommandError +from mock import patch + +from openedx.core.djangoapps.video_config.models import ( + TranscriptMigrationSetting, MigrationEnqueuedCourse +) from xmodule.modulestore.django import modulestore from xmodule.modulestore.tests.django_utils import ModuleStoreTestCase from xmodule.modulestore.tests.factories import CourseFactory, ItemFactory @@ -68,6 +76,7 @@ class TestArgParsing(TestCase): call_command('migrate_transcripts', '--course-id', 'invalid-course') +@ddt.ddt class TestMigrateTranscripts(ModuleStoreTestCase): """ Tests migrating video transcripts in courses from contentstore to django storage @@ -286,3 +295,36 @@ class TestMigrateTranscripts(ModuleStoreTestCase): logger.check( *expected_log ) + + @ddt.data(*itertools.product([1, 2], [True, False], [True, False])) + @ddt.unpack + @patch('contentstore.management.commands.migrate_transcripts.log') + def test_migrate_transcripts_batch_size(self, batch_size, commit, all_courses, mock_logger): + """ + Test that migrations across course batches, is working as expected. + """ + migration_settings = TranscriptMigrationSetting.objects.create( + batch_size=batch_size, commit=commit, all_courses=all_courses + ) + + # Assert the number of job runs and migration enqueued courses. + self.assertEqual(migration_settings.command_run, 0) + self.assertEqual(MigrationEnqueuedCourse.objects.count(), 0) + + call_command('migrate_transcripts', '--from-settings') + + migration_settings = TranscriptMigrationSetting.current() + # Command run is only incremented if commit=True. + expected_command_run = 1 if commit else 0 + self.assertEqual(migration_settings.command_run, expected_command_run) + + if all_courses: + mock_logger.info.assert_called_with( + ('[Transcript Migration] Courses(total): %s, Courses(migrated): %s, ' + 'Courses(non-migrated): %s, Courses(migration-in-process): %s'), + 2, 0, 2, batch_size + ) + + # enqueued courses are only persisted if commit=True and job is running for all courses. + enqueued_courses = batch_size if commit and all_courses else 0 + self.assertEqual(MigrationEnqueuedCourse.objects.count(), enqueued_courses) diff --git a/openedx/core/djangoapps/video_config/admin.py b/openedx/core/djangoapps/video_config/admin.py index 7228dd7a8c..b598a06ad4 100644 --- a/openedx/core/djangoapps/video_config/admin.py +++ b/openedx/core/djangoapps/video_config/admin.py @@ -12,7 +12,7 @@ from openedx.core.djangoapps.video_config.forms import ( from openedx.core.djangoapps.video_config.models import ( CourseHLSPlaybackEnabledFlag, HLSPlaybackEnabledFlag, CourseVideoTranscriptEnabledFlag, VideoTranscriptEnabledFlag, - TranscriptMigrationSetting, + TranscriptMigrationSetting, MigrationEnqueuedCourse, ) @@ -49,9 +49,23 @@ class CourseVideoTranscriptEnabledFlagAdmin(CourseSpecificEnabledFlagBaseAdmin): """ form = CourseVideoTranscriptFlagAdminForm + +class MigrationEnqueuedCourseAdmin(admin.ModelAdmin): + """ + Simple, read-only list/search view of the Courses whose transcripts have been migrated to S3. + """ + list_display = [ + 'course_id', + 'command_run', + ] + + search_fields = ['course_id', 'command_run'] + + admin.site.register(HLSPlaybackEnabledFlag, ConfigurationModelAdmin) admin.site.register(CourseHLSPlaybackEnabledFlag, CourseHLSPlaybackEnabledFlagAdmin) admin.site.register(VideoTranscriptEnabledFlag, ConfigurationModelAdmin) admin.site.register(CourseVideoTranscriptEnabledFlag, CourseVideoTranscriptEnabledFlagAdmin) admin.site.register(TranscriptMigrationSetting, ConfigurationModelAdmin) +admin.site.register(MigrationEnqueuedCourse, MigrationEnqueuedCourseAdmin) diff --git a/openedx/core/djangoapps/video_config/migrations/0005_auto_20180719_0752.py b/openedx/core/djangoapps/video_config/migrations/0005_auto_20180719_0752.py new file mode 100644 index 0000000000..2ac1a957cd --- /dev/null +++ b/openedx/core/djangoapps/video_config/migrations/0005_auto_20180719_0752.py @@ -0,0 +1,35 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.11.14 on 2018-07-19 07:52 +from __future__ import unicode_literals + +from django.db import migrations, models +import django.utils.timezone +import model_utils.fields +import opaque_keys.edx.django.models + + +class Migration(migrations.Migration): + + dependencies = [ + ('video_config', '0004_transcriptmigrationsetting_command_run'), + ] + + operations = [ + migrations.CreateModel( + name='MigrationEnqueuedCourse', + fields=[ + ('created', model_utils.fields.AutoCreatedField(default=django.utils.timezone.now, editable=False, verbose_name='created')), + ('modified', model_utils.fields.AutoLastModifiedField(default=django.utils.timezone.now, editable=False, verbose_name='modified')), + ('course_id', opaque_keys.edx.django.models.CourseKeyField(db_index=True, max_length=255, primary_key=True, serialize=False)), + ('command_run', models.PositiveIntegerField(default=0)), + ], + options={ + 'abstract': False, + }, + ), + migrations.AddField( + model_name='transcriptmigrationsetting', + name='batch_size', + field=models.PositiveIntegerField(default=0), + ), + ] diff --git a/openedx/core/djangoapps/video_config/models.py b/openedx/core/djangoapps/video_config/models.py index 5743182584..aead1c9c39 100644 --- a/openedx/core/djangoapps/video_config/models.py +++ b/openedx/core/djangoapps/video_config/models.py @@ -1,8 +1,10 @@ """ Configuration models for Video XModule """ -from config_models.models import ConfigurationModel from django.db.models import BooleanField, TextField, PositiveIntegerField + +from config_models.models import ConfigurationModel +from model_utils.models import TimeStampedModel from opaque_keys.edx.django.models import CourseKeyField @@ -150,6 +152,7 @@ class TranscriptMigrationSetting(ConfigurationModel): help_text="Flag to force migrate transcripts for the requested courses, overwrite if already present." ) command_run = PositiveIntegerField(default=0) + batch_size = PositiveIntegerField(default=0) commit = BooleanField( default=False, help_text="Dry-run or commit." @@ -170,3 +173,16 @@ class TranscriptMigrationSetting(ConfigurationModel): self.command_run += 1 self.save() return self.command_run + + +class MigrationEnqueuedCourse(TimeStampedModel): + """ + Temporary model to persist the course IDs who has been enqueued for transcripts migration to S3. + """ + course_id = CourseKeyField(db_index=True, primary_key=True, max_length=255) + command_run = PositiveIntegerField(default=0) + + def __unicode__(self): + return u'MigrationEnqueuedCourse: ID={course_id}, Run={command_run}'.format( + course_id=self.course_id, command_run=self.command_run + )