feat: Backfill and Django Admin for Learning Sequence Outline

* Adds the backfill_course_outlines management command to contentstore
* Adds a read-only Django admin interface to learning_sequences for the
  support team and debugging.
* Adds two new functions to the learning_sequences public API:
  key_supports_outlines and get_course_keys_with_outlines

The learning_sequences app isn't supposed to know about contentstore or
modulestore, as it's intended to be extracted out of edx-platform in the
long term. Therefore, the backfill_course_outlines command is in
contentstore, and not learning_sequences.

This work was tracked in TNL-7983, but it also fixes a bug where we were
trying to generate course outlines for libraries (TNL-7981).

All Open edX instances upgrading to Lilac should run the
backfill_course_outlines command as part of their upgrade process.
This commit is contained in:
David Ormsbee
2021-03-04 11:58:00 -05:00
parent 6a00878f59
commit f5b74fcf31
12 changed files with 374 additions and 12 deletions

View File

@@ -0,0 +1,66 @@
"""
Management command to create the course outline for all courses that are missing
an outline. Outlines are built automatically on course publish and manually
using the `update_course_outline` command, but they can be backfilled using this
command. People updating to Lilac release should run this command as part of the
upgrade process.
This should be invoked from the Studio process.
"""
import logging
from django.core.management.base import BaseCommand
from openedx.core.djangoapps.content.course_overviews.models import CourseOverview
from openedx.core.djangoapps.content.learning_sequences.api import (
get_course_keys_with_outlines,
key_supports_outlines,
)
from ...tasks import update_outline_from_modulestore_task
log = logging.getLogger('backfill_course_outlines')
class Command(BaseCommand):
"""
Invoke with:
python manage.py cms backfill_course_outlines
"""
help = (
"Backfill missing course outlines. This will queue a celery task for "
"each course with a missing outline, meaning that the outlines may be "
"generated minutes or hours after this script has finished running."
)
def add_arguments(self, parser):
parser.add_argument(
'--dry',
action='store_true',
help="Show course outlines that will be backfilled, but do not make any changes."
)
def handle(self, *args, **options):
dry_run = options.get('dry', False)
log.info("Starting backfill_course_outlines{}".format(" (dry run)" if dry_run else ""))
all_course_keys_qs = CourseOverview.objects.values_list('id', flat=True)
# .difference() is not supported in MySQL, but this at least does the
# SELECT NOT IN... subquery in the database rather than Python.
missing_outlines_qs = all_course_keys_qs.exclude(
id__in=get_course_keys_with_outlines()
)
num_courses_needing_outlines = len(missing_outlines_qs)
log.info(
"Found %d courses without outlines. Queuing tasks...",
num_courses_needing_outlines
)
for course_key in missing_outlines_qs:
if key_supports_outlines(course_key):
log.info("Queuing outline creation for %s", course_key)
if not dry_run:
update_outline_from_modulestore_task.delay(str(course_key))
else:
log.info("Outlines not supported for %s - skipping", course_key)

View File

@@ -0,0 +1,117 @@
"""
Tests for `backfill_course_outlines` Studio (cms) management command.
"""
from django.core.management import call_command
from opaque_keys.edx.keys import CourseKey
from openedx.core.djangoapps.content.course_overviews.models import CourseOverview
from openedx.core.djangoapps.content.learning_sequences.api import get_course_keys_with_outlines
from xmodule.modulestore import ModuleStoreEnum
from xmodule.modulestore.tests.django_utils import SharedModuleStoreTestCase
from xmodule.modulestore.tests.factories import CourseFactory, ItemFactory
from ....outlines import update_outline_from_modulestore
class BackfillCourseOutlinesTest(SharedModuleStoreTestCase):
"""
Test `backfill_orgs_and_org_courses`.
"""
def setUp(self):
"""
Create the CourseOverviews we need for this test case.
There's no publish signal, so we manually create the CourseOverviews.
Without that, backfill_orgs_and_org_courses has no way to figure out
which courses exist, which it needs in order to figure out which ones
need backfilling.
We can't turn on the course_published signal because if we did so, then
the outlines would get generated automatically, and there'd be nothing
to backfill.
"""
super().setUp()
CourseOverview.update_select_courses(self.course_keys, force_update=True)
@classmethod
def setUpClass(cls):
"""
We set up some content here, without publish signals enabled.
"""
super().setUpClass()
course_run_ids = [
"OpenEdX/OutlineCourse/OldMongoRun1",
"course-v1:OpenEdX+OutlineCourse+Run2",
"course-v1:OpenEdX+OutlineCourse+Run3",
]
cls.course_keys = [
CourseKey.from_string(course_run_id) for course_run_id in course_run_ids
]
for course_key in cls.course_keys:
if course_key.deprecated:
store_type = ModuleStoreEnum.Type.mongo
else:
store_type = ModuleStoreEnum.Type.split
with cls.store.default_store(store_type):
course = CourseFactory.create(
org=course_key.org,
number=course_key.course,
run=course_key.run,
display_name=f"Outline Backfill Test Course {course_key.run}"
)
with cls.store.bulk_operations(course_key):
section = ItemFactory.create(
parent_location=course.location,
category="chapter",
display_name="A Section"
)
sequence = ItemFactory.create(
parent_location=section.location,
category="sequential",
display_name="A Sequence"
)
unit = ItemFactory.create(
parent_location=sequence.location,
category="vertical",
display_name="A Unit"
)
ItemFactory.create(
parent_location=unit.location,
category="html",
display_name="An HTML Module"
)
def test_end_to_end(self):
"""Normal invocation, it should skip only the Old Mongo course."""
# In the beginning, we have no outlines...
assert not get_course_keys_with_outlines().exists()
# Run command and outlines appear for Split Mongo courses...
call_command("backfill_course_outlines")
course_keys_with_outlines = set(get_course_keys_with_outlines())
assert course_keys_with_outlines == {
CourseKey.from_string("course-v1:OpenEdX+OutlineCourse+Run2"),
CourseKey.from_string("course-v1:OpenEdX+OutlineCourse+Run3"),
}
def test_partial(self):
"""Also works when we've manually created one in advance."""
course_keys_with_outlines = set(get_course_keys_with_outlines())
assert not get_course_keys_with_outlines().exists()
# Manually create one
update_outline_from_modulestore(
CourseKey.from_string("course-v1:OpenEdX+OutlineCourse+Run2")
)
assert set(get_course_keys_with_outlines()) == {
CourseKey.from_string("course-v1:OpenEdX+OutlineCourse+Run2")
}
# backfill command should fill in the other
call_command("backfill_course_outlines")
course_keys_with_outlines = set(get_course_keys_with_outlines())
assert course_keys_with_outlines == {
CourseKey.from_string("course-v1:OpenEdX+OutlineCourse+Run2"),
CourseKey.from_string("course-v1:OpenEdX+OutlineCourse+Run3"),
}

View File

@@ -19,6 +19,7 @@ from common.djangoapps.track.event_transaction_utils import get_event_transactio
from common.djangoapps.util.module_utils import yield_dynamic_descriptor_descendants
from lms.djangoapps.grades.api import task_compute_all_grades_for_course
from openedx.core.djangoapps.credit.signals import on_course_publish
from openedx.core.djangoapps.content.learning_sequences.api import key_supports_outlines
from openedx.core.lib.gating import api as gating_api
from xmodule.modulestore.django import SignalHandler, modulestore
@@ -65,7 +66,8 @@ def listen_for_course_publish(sender, course_key, **kwargs): # pylint: disable=
# import here, because signal is registered at startup, but items in tasks are not yet able to be loaded
from cms.djangoapps.contentstore.tasks import update_outline_from_modulestore_task, update_search_index
update_outline_from_modulestore_task.delay(str(course_key))
if key_supports_outlines(course_key):
update_outline_from_modulestore_task.delay(str(course_key))
# Finally call into the course search subsystem
# to kick off an indexing action

View File

@@ -42,6 +42,7 @@ from cms.djangoapps.contentstore.utils import initialize_permissions, reverse_us
from cms.djangoapps.models.settings.course_metadata import CourseMetadata
from common.djangoapps.course_action_state.models import CourseRerunState
from common.djangoapps.student.auth import has_course_author_access
from openedx.core.djangoapps.content.learning_sequences.api import key_supports_outlines
from openedx.core.djangoapps.embargo.models import CountryAccessRule, RestrictedCourse
from openedx.core.lib.extract_tar import safetar_extractall
from xmodule.contentstore.django import contentstore
@@ -571,6 +572,16 @@ def update_outline_from_modulestore_task(course_key_str):
"""
try:
course_key = CourseKey.from_string(course_key_str)
if not key_supports_outlines(course_key):
LOGGER.warning(
(
"update_outline_from_modulestore_task called for course key"
" %s, which does not support learning_sequence outlines."
),
course_key_str
)
return
update_outline_from_modulestore(course_key)
except Exception: # pylint disable=broad-except
LOGGER.exception("Could not create course outline for course %s", course_key_str)

View File

@@ -0,0 +1,96 @@
"""
Read-only Django Admin for viewing Learning Sequences and Outline data.
"""
from datetime import datetime
from enum import Enum
import json
from django.contrib import admin
from django.utils.html import format_html
from opaque_keys import OpaqueKey
import attr
from .api import get_course_outline
from .models import LearningContext
class LearningContextAdmin(admin.ModelAdmin):
"""
This is a read-only model admin that is meant to be useful for querying.
Writes are disabled, because:
1. These values are auto-built/updated based on course publishes.
2. These are read either the Studio or LMS process, but it's only supposed
to be written to from the Studio process.
"""
list_display = (
'context_key',
'title',
'published_at',
'published_version',
'modified'
)
readonly_fields = (
'context_key',
'title',
'published_at',
'published_version',
'created',
'modified',
'outline',
)
search_fields = ['context_key', 'title']
actions = None
def outline(self, obj):
"""
Computed attribute that shows the outline JSON in the detail view.
"""
def json_serializer(_obj, _field, value):
if isinstance(value, OpaqueKey):
return str(value)
elif isinstance(value, Enum):
return value.value
elif isinstance(value, datetime):
return value.isoformat()
return value
outline_data = get_course_outline(obj.context_key)
outline_data_dict = attr.asdict(
outline_data,
recurse=True,
value_serializer=json_serializer,
)
outline_data_json = json.dumps(outline_data_dict, indent=2, sort_keys=True)
return format_html("<pre>\n{}\n</pre>", outline_data_json)
def has_add_permission(self, request):
"""
Disallow additions. See docstring for has_change_permission()
"""
return False
def has_change_permission(self, request, obj=None):
"""
Disallow edits.
This app rebuilds automatically based off of course publishes. Any
manual edits will be wiped out the next time someone touches the course,
so it's better to disallow this in the admin rather than to pretend this
works and have it suddenly change back when someone edits the course.
"""
return False
def has_delete_permission(self, request, obj=None):
"""
Disallow deletes.
Deleting these models can have far reaching consequences and delete a
lot of related data in other parts of the application/project. We should
only do update through the API, which allows us to rebuild the outlines.
"""
return False
admin.site.register(LearningContext, LearningContextAdmin)

View File

@@ -1,7 +1,9 @@
# lint-amnesty, pylint: disable=missing-module-docstring
from .outlines import (
get_course_keys_with_outlines,
get_course_outline,
get_user_course_outline,
get_user_course_outline_details,
key_supports_outlines,
replace_course_outline,
)

View File

@@ -10,10 +10,13 @@ from typing import Optional # lint-amnesty, pylint: disable=unused-import
import attr # lint-amnesty, pylint: disable=unused-import
from django.contrib.auth import get_user_model
from django.db.models.query import QuerySet
from django.db import transaction
from edx_django_utils.cache import TieredCache, get_cache_key # lint-amnesty, pylint: disable=unused-import
from edx_django_utils.monitoring import function_trace
from opaque_keys.edx.keys import CourseKey, UsageKey # lint-amnesty, pylint: disable=unused-import
from opaque_keys import OpaqueKey
from opaque_keys.edx.locator import LibraryLocator
from opaque_keys.edx.keys import CourseKey # lint-amnesty, pylint: disable=unused-import
from ..data import (
CourseLearningSequenceData,
@@ -46,13 +49,42 @@ log = logging.getLogger(__name__)
# Public API...
__all__ = [
'get_course_keys_with_outlines',
'get_course_outline',
'get_user_course_outline',
'get_user_course_outline_details',
'key_supports_outlines',
'replace_course_outline',
]
def key_supports_outlines(opaque_key: OpaqueKey) -> bool:
"""
Does this key-type support outlines?
Allow all non-deprecated CourseKeys except for v1 Libraries (which subclass
CourseKey but shouldn't). So our normal SplitMongo courses (CourseLocator)
will work, as will CCX courses. But libraries, pathways, and Old Mongo
courses will not.
"""
# Get LibraryLocators out of the way first because they subclass CourseKey.
if isinstance(opaque_key, LibraryLocator):
return False
# All other CourseKey types are acceptable if they're not deprecated. There
# are only two at the moment though, course-v1: and ccx-v1:. The old slash-
# separated course IDs (Org/Course/Run) are not supported.
if isinstance(opaque_key, CourseKey):
return not opaque_key.deprecated
return False
def get_course_keys_with_outlines() -> QuerySet:
"""Queryset of ContextKeys, iterable as a flat list."""
return LearningContext.objects.values_list('context_key', flat=True)
def get_course_outline(course_key: CourseKey) -> CourseOutlineData:
"""
Get the outline of a course run.

View File

@@ -3,22 +3,23 @@ Top level API tests. Tests API public contracts only. Do not import/create/mock
models for this app.
"""
from datetime import datetime, timezone
from mock import patch
from unittest import TestCase
import pytest
import attr
from django.conf import settings
from django.contrib.auth.models import AnonymousUser, User # lint-amnesty, pylint: disable=imported-auth-user
from edx_proctoring.exceptions import ProctoredExamNotFoundException
from edx_when.api import set_dates_for_course
from opaque_keys.edx.keys import CourseKey, UsageKey # lint-amnesty, pylint: disable=unused-import
from opaque_keys.edx.locator import BlockUsageLocator # lint-amnesty, pylint: disable=unused-import
from mock import patch
from opaque_keys.edx.keys import CourseKey
from opaque_keys.edx.locator import LibraryLocator
from edx_toggles.toggles.testutils import override_waffle_flag
from lms.djangoapps.courseware.tests.factories import BetaTesterFactory
from openedx.core.djangolib.testing.utils import CacheIsolationTestCase
from openedx.features.course_experience import COURSE_ENABLE_UNENROLLED_ACCESS_FLAG
from common.djangoapps.student.auth import user_has_role
from common.djangoapps.student.models import CourseEnrollment # lint-amnesty, pylint: disable=unused-import
from common.djangoapps.student.roles import CourseBetaTesterRole
from ...data import (
@@ -33,11 +34,25 @@ from ..outlines import (
get_course_outline,
get_user_course_outline,
get_user_course_outline_details,
replace_course_outline
key_supports_outlines,
replace_course_outline,
)
from .test_data import generate_sections
class OutlineSupportTestCase(TestCase):
"""
Make sure we know what kinds of course-like keys we support for outlines.
"""
def test_supported_types(self):
assert key_supports_outlines(CourseKey.from_string("course-v1:edX+100+2021"))
assert key_supports_outlines(CourseKey.from_string("ccx-v1:edX+100+2021+ccx@1"))
def test_unsupported_types(self):
assert not key_supports_outlines(CourseKey.from_string("edX/100/2021"))
assert not key_supports_outlines(LibraryLocator(org="edX", library="100"))
class CourseOutlineTestCase(CacheIsolationTestCase):
"""
Simple tests around reading and writing CourseOutlineData. No user info.

View File

@@ -7,7 +7,7 @@ from edx_proctoring.runtime import set_runtime_service
class LearningSequencesConfig(AppConfig): # lint-amnesty, pylint: disable=missing-class-docstring
name = 'openedx.core.djangoapps.content.learning_sequences'
verbose_name = _('Learning Sequences')
verbose_name = _('Learning Sequences and Outlines')
def ready(self):
# Register celery workers

View File

@@ -0,0 +1,18 @@
# Generated by Django 2.2.19 on 2021-03-04 16:53
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('learning_sequences', '0007_coursesequenceexam'),
]
operations = [
migrations.AlterField(
model_name='learningcontext',
name='title',
field=models.CharField(db_index=True, max_length=255),
),
]

View File

@@ -58,15 +58,18 @@ class LearningContext(TimeStampedModel):
context_key = LearningContextKeyField(
max_length=255, db_index=True, unique=True, null=False
)
title = models.CharField(max_length=255)
title = models.CharField(max_length=255, db_index=True)
published_at = models.DateTimeField(null=False)
published_version = models.CharField(max_length=255)
class Meta:
indexes = [
models.Index(fields=['-published_at'])
models.Index(fields=['-published_at']),
]
def __str__(self):
return f"LearningContext for {self.context_key}"
class CourseContext(TimeStampedModel):
"""

View File

@@ -169,8 +169,8 @@ class CourseOutlineView(APIView):
# Grab the user's outline and send our response...
try:
user_course_outline_details = get_user_course_outline_details(course_key, user, at_time)
except CourseOutlineData.DoesNotExist:
raise NotFound()
except CourseOutlineData.DoesNotExist as does_not_exist_err:
raise NotFound() from does_not_exist_err
serializer = self.UserCourseOutlineDataSerializer(user_course_outline_details)
return Response(serializer.data)