Add batching support

This commit is contained in:
Qubad786
2018-07-19 17:14:09 +05:00
committed by M. Rehan
parent 80e650d971
commit 2d24482cbf
5 changed files with 147 additions and 9 deletions

View File

@@ -14,7 +14,7 @@ from cms.djangoapps.contentstore.tasks import (
enqueue_async_migrate_transcripts_tasks
)
from openedx.core.lib.command_utils import get_mutually_exclusive_required_option, parse_course_keys
from openedx.core.djangoapps.video_config.models import TranscriptMigrationSetting
from openedx.core.djangoapps.video_config.models import TranscriptMigrationSetting, MigrationEnqueuedCourse
from xmodule.modulestore.django import modulestore
log = logging.getLogger(__name__)
@@ -93,12 +93,36 @@ class Command(BaseCommand):
elif courses_mode == 'course_ids':
course_keys = map(self._parse_course_key, options['course_ids'])
else:
if self._latest_settings().all_courses:
course_keys = [course.id for course in modulestore().get_course_summaries()]
migration_settings = self._latest_settings()
if migration_settings.all_courses:
all_courses = [course.id for course in modulestore().get_course_summaries()]
# Following is to avoid re-rerunning migrations for the already enqueued courses.
# Although the migrations job is idempotent, but we need to track if the transcript migration
# job was initiated for specific course(s) in order to elevate load from the workers and for
# the job to be able identify the past enqueued courses.
migrated_courses = MigrationEnqueuedCourse.objects.all().values_list('course_id', flat=True)
non_migrated_courses = [
course_key
for course_key in all_courses
if course_key not in migrated_courses
]
# Course batch to be migrated.
course_keys = non_migrated_courses[:migration_settings.batch_size]
log.info(
('[Transcript Migration] Courses(total): %s, '
'Courses(migrated): %s, Courses(non-migrated): %s, '
'Courses(migration-in-process): %s'),
len(all_courses),
len(migrated_courses),
len(non_migrated_courses),
len(course_keys),
)
else:
course_keys = parse_course_keys(self._latest_settings().course_ids.split())
force_update = self._latest_settings().force_update
commit = self._latest_settings().commit
course_keys = parse_course_keys(migration_settings.course_ids.split())
force_update = migration_settings.force_update
commit = migration_settings.commit
return course_keys, force_update, commit
@@ -112,8 +136,15 @@ class Command(BaseCommand):
"""
Invokes the migrate transcripts enqueue function.
"""
migration_settings = self._latest_settings()
course_keys, force_update, commit = self._get_migration_options(options)
command_run = self._latest_settings().increment_run() if commit else -1
command_run = migration_settings.increment_run() if commit else -1
enqueue_async_migrate_transcripts_tasks(
course_keys=course_keys, commit=commit, command_run=command_run, force_update=force_update
)
if commit and options.get('from_settings') and migration_settings.all_courses:
MigrationEnqueuedCourse.objects.bulk_create([
MigrationEnqueuedCourse(course_id=course_key, command_run=command_run)
for course_key in course_keys
])

View File

@@ -2,11 +2,19 @@
"""
Tests for course transcript migration management command.
"""
import itertools
import logging
from datetime import datetime
import pytz
import ddt
from django.test import TestCase
from django.core.management import call_command, CommandError
from mock import patch
from openedx.core.djangoapps.video_config.models import (
TranscriptMigrationSetting, MigrationEnqueuedCourse
)
from xmodule.modulestore.django import modulestore
from xmodule.modulestore.tests.django_utils import ModuleStoreTestCase
from xmodule.modulestore.tests.factories import CourseFactory, ItemFactory
@@ -68,6 +76,7 @@ class TestArgParsing(TestCase):
call_command('migrate_transcripts', '--course-id', 'invalid-course')
@ddt.ddt
class TestMigrateTranscripts(ModuleStoreTestCase):
"""
Tests migrating video transcripts in courses from contentstore to django storage
@@ -286,3 +295,36 @@ class TestMigrateTranscripts(ModuleStoreTestCase):
logger.check(
*expected_log
)
@ddt.data(*itertools.product([1, 2], [True, False], [True, False]))
@ddt.unpack
@patch('contentstore.management.commands.migrate_transcripts.log')
def test_migrate_transcripts_batch_size(self, batch_size, commit, all_courses, mock_logger):
"""
Test that migrations across course batches, is working as expected.
"""
migration_settings = TranscriptMigrationSetting.objects.create(
batch_size=batch_size, commit=commit, all_courses=all_courses
)
# Assert the number of job runs and migration enqueued courses.
self.assertEqual(migration_settings.command_run, 0)
self.assertEqual(MigrationEnqueuedCourse.objects.count(), 0)
call_command('migrate_transcripts', '--from-settings')
migration_settings = TranscriptMigrationSetting.current()
# Command run is only incremented if commit=True.
expected_command_run = 1 if commit else 0
self.assertEqual(migration_settings.command_run, expected_command_run)
if all_courses:
mock_logger.info.assert_called_with(
('[Transcript Migration] Courses(total): %s, Courses(migrated): %s, '
'Courses(non-migrated): %s, Courses(migration-in-process): %s'),
2, 0, 2, batch_size
)
# enqueued courses are only persisted if commit=True and job is running for all courses.
enqueued_courses = batch_size if commit and all_courses else 0
self.assertEqual(MigrationEnqueuedCourse.objects.count(), enqueued_courses)

View File

@@ -12,7 +12,7 @@ from openedx.core.djangoapps.video_config.forms import (
from openedx.core.djangoapps.video_config.models import (
CourseHLSPlaybackEnabledFlag, HLSPlaybackEnabledFlag,
CourseVideoTranscriptEnabledFlag, VideoTranscriptEnabledFlag,
TranscriptMigrationSetting,
TranscriptMigrationSetting, MigrationEnqueuedCourse,
)
@@ -49,9 +49,23 @@ class CourseVideoTranscriptEnabledFlagAdmin(CourseSpecificEnabledFlagBaseAdmin):
"""
form = CourseVideoTranscriptFlagAdminForm
class MigrationEnqueuedCourseAdmin(admin.ModelAdmin):
"""
Simple, read-only list/search view of the Courses whose transcripts have been migrated to S3.
"""
list_display = [
'course_id',
'command_run',
]
search_fields = ['course_id', 'command_run']
admin.site.register(HLSPlaybackEnabledFlag, ConfigurationModelAdmin)
admin.site.register(CourseHLSPlaybackEnabledFlag, CourseHLSPlaybackEnabledFlagAdmin)
admin.site.register(VideoTranscriptEnabledFlag, ConfigurationModelAdmin)
admin.site.register(CourseVideoTranscriptEnabledFlag, CourseVideoTranscriptEnabledFlagAdmin)
admin.site.register(TranscriptMigrationSetting, ConfigurationModelAdmin)
admin.site.register(MigrationEnqueuedCourse, MigrationEnqueuedCourseAdmin)

View File

@@ -0,0 +1,35 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.11.14 on 2018-07-19 07:52
from __future__ import unicode_literals
from django.db import migrations, models
import django.utils.timezone
import model_utils.fields
import opaque_keys.edx.django.models
class Migration(migrations.Migration):
dependencies = [
('video_config', '0004_transcriptmigrationsetting_command_run'),
]
operations = [
migrations.CreateModel(
name='MigrationEnqueuedCourse',
fields=[
('created', model_utils.fields.AutoCreatedField(default=django.utils.timezone.now, editable=False, verbose_name='created')),
('modified', model_utils.fields.AutoLastModifiedField(default=django.utils.timezone.now, editable=False, verbose_name='modified')),
('course_id', opaque_keys.edx.django.models.CourseKeyField(db_index=True, max_length=255, primary_key=True, serialize=False)),
('command_run', models.PositiveIntegerField(default=0)),
],
options={
'abstract': False,
},
),
migrations.AddField(
model_name='transcriptmigrationsetting',
name='batch_size',
field=models.PositiveIntegerField(default=0),
),
]

View File

@@ -1,8 +1,10 @@
"""
Configuration models for Video XModule
"""
from config_models.models import ConfigurationModel
from django.db.models import BooleanField, TextField, PositiveIntegerField
from config_models.models import ConfigurationModel
from model_utils.models import TimeStampedModel
from opaque_keys.edx.django.models import CourseKeyField
@@ -150,6 +152,7 @@ class TranscriptMigrationSetting(ConfigurationModel):
help_text="Flag to force migrate transcripts for the requested courses, overwrite if already present."
)
command_run = PositiveIntegerField(default=0)
batch_size = PositiveIntegerField(default=0)
commit = BooleanField(
default=False,
help_text="Dry-run or commit."
@@ -170,3 +173,16 @@ class TranscriptMigrationSetting(ConfigurationModel):
self.command_run += 1
self.save()
return self.command_run
class MigrationEnqueuedCourse(TimeStampedModel):
"""
Temporary model to persist the course IDs who has been enqueued for transcripts migration to S3.
"""
course_id = CourseKeyField(db_index=True, primary_key=True, max_length=255)
command_run = PositiveIntegerField(default=0)
def __unicode__(self):
return u'MigrationEnqueuedCourse: ID={course_id}, Run={command_run}'.format(
course_id=self.course_id, command_run=self.command_run
)