diff --git a/cms/djangoapps/contentstore/management/commands/backfill_course_outlines.py b/cms/djangoapps/contentstore/management/commands/backfill_course_outlines.py index 17737cafb6..99718ce0dd 100644 --- a/cms/djangoapps/contentstore/management/commands/backfill_course_outlines.py +++ b/cms/djangoapps/contentstore/management/commands/backfill_course_outlines.py @@ -40,24 +40,30 @@ class Command(BaseCommand): action='store_true', help="Show course outlines that will be backfilled, but do not make any changes." ) + parser.add_argument( + '--force', + action='store_true', + help="Force Outline re-generation for all Courses, not just missing ones." + ) def handle(self, *args, **options): dry_run = options.get('dry', False) - log.info("Starting backfill_course_outlines{}".format(" (dry run)" if dry_run else "")) + force_all = options.get('force', False) + log.info("Starting backfill_course_outlines: dry=%s, force=%s", dry_run, force_all) all_course_keys_qs = CourseOverview.objects.values_list('id', flat=True) - # .difference() is not supported in MySQL, but this at least does the - # SELECT NOT IN... subquery in the database rather than Python. - missing_outlines_qs = all_course_keys_qs.exclude( - id__in=get_course_keys_with_outlines() - ) - num_courses_needing_outlines = len(missing_outlines_qs) - log.info( - "Found %d courses without outlines. Queuing tasks...", - num_courses_needing_outlines - ) + if force_all: + target_courses_qs = all_course_keys_qs + log.info("Forcing re-generation for all %d course runs.", len(target_courses_qs)) + else: + # .difference() is not supported in MySQL, but this at least does the + # SELECT NOT IN... subquery in the database rather than Python. + target_courses_qs = all_course_keys_qs.exclude( + id__in=get_course_keys_with_outlines() + ) + log.info("Found %d courses without outlines.", len(target_courses_qs)) - for course_key in missing_outlines_qs: + for course_key in target_courses_qs: if key_supports_outlines(course_key): log.info("Queuing outline creation for %s", course_key) if not dry_run: diff --git a/cms/djangoapps/contentstore/outlines.py b/cms/djangoapps/contentstore/outlines.py index 7bb963ec6e..af35372727 100644 --- a/cms/djangoapps/contentstore/outlines.py +++ b/cms/djangoapps/contentstore/outlines.py @@ -4,11 +4,13 @@ is responsible for holding course outline data. Studio _pushes_ that data into learning_sequences at publish time. """ from datetime import timezone +from typing import List, Tuple from edx_django_utils.monitoring import function_trace, set_custom_attribute from openedx.core.djangoapps.content.learning_sequences.api import replace_course_outline from openedx.core.djangoapps.content.learning_sequences.data import ( + ContentErrorData, CourseLearningSequenceData, CourseOutlineData, CourseSectionData, @@ -20,70 +22,6 @@ from xmodule.modulestore import ModuleStoreEnum from xmodule.modulestore.django import modulestore -class CourseStructureError(Exception): - """ - Raise this if we can't create an outline because of the course structure. - - Courses built in Studio conform to a hierarchy that looks like: - Course -> Section (a.k.a. Chapter) -> Subsection (a.k.a. Sequence) - - OLX imports are much more freeform and can generate unusual structures that - we won't know how to handle. - """ - - -def _check_sequence_fields(sequence): - """ - Raise CourseStructureError if `sequence` is missing a required field. - - Do this instead of checking against specific block types to better future - proof ourselves against new sequence-types, aliases, changes in the way - dynamic mixing of XBlock types happens, as well as deprecation/removal of - the specific fields we care about. If it quacks like a duck... - """ - expected_fields = [ - 'display_name', - 'hide_after_due', - 'hide_from_toc', - 'is_practice_exam', - 'is_proctored_enabled', - 'is_time_limited', - 'visible_to_staff_only', - ] - for field in expected_fields: - if not hasattr(sequence, field): - msg = ( - f"Cannot create CourseOutline: Expected a Sequence at " - f"{sequence.location} (child of {sequence.parent}), " - f"but this object does not have sequence field {field}." - ) - raise CourseStructureError(msg) - - -def _check_section_fields(section): - """ - Raise CourseStructureError if `section` is missing a required field. - - Do this instead of checking against specific block types to better future - proof ourselves against new sequence-types, aliases, changes in the way - dynamic mixing of XBlock types happens, as well as deprecation/removal of - the specific fields we care about. If it quacks like a duck... - """ - expected_fields = [ - 'children', - 'hide_from_toc', - 'visible_to_staff_only', - ] - for field in expected_fields: - if not hasattr(section, field): - msg = ( - f"Cannot create CourseOutline: Expected a Section at " - f"{section.location} (child of {section.parent}), " - f"but this object does not have Section field {field}." - ) - raise CourseStructureError(msg) - - def _remove_version_info(usage_key): """ When we ask modulestore for the published branch in the Studio process @@ -105,21 +43,78 @@ def _remove_version_info(usage_key): return usage_key.map_into_course(unversioned_course_key) +def _error_for_not_section(not_section): + """ + ContentErrorData when we run into a child of that's not a Section. + + Has to be phrased in a way that makes sense to course teams. + """ + return ContentErrorData( + message=( + f' contains a <{not_section.location.block_type}> tag with ' + f'url_name="{not_section.location.block_id}" and ' + f'display_name="{getattr(not_section, "display_name", "")}". ' + f'Expected tag instead.' + ), + usage_key=_remove_version_info(not_section.location), + ) + + +def _error_for_not_sequence(section, not_sequence): + """ + ContentErrorData when we run into a child of Section that's not a Sequence. + + Has to be phrased in a way that makes sense to course teams. + """ + return ContentErrorData( + message=( + f' with url_name="{section.location.block_id}" and ' + f'display_name="{section.display_name}" contains a ' + f'<{not_sequence.location.block_type}> tag with ' + f'url_name="{not_sequence.location.block_id}" and ' + f'display_name="{getattr(not_sequence, "display_name", "")}". ' + f'Expected a tag instead.' + ), + usage_key=_remove_version_info(not_sequence.location), + ) + + def _make_section_data(section): """ - Generate a CourseSectionData from a SectionDescriptor. + Return a (CourseSectionData, List[ContentDataError]) from a SectionBlock. + + Can return None for CourseSectionData if it's not really a SectionBlock that + was passed in. This method does a lot of the work to convert modulestore fields to an input - that the learning_sequences app expects. It doesn't check for specific - classes (i.e. you could create your own Sequence-like XBlock), but it will - raise a CourseStructureError if anything you pass in is missing fields that - we expect in a SectionDescriptor or its SequenceDescriptor children. - """ - _check_section_fields(section) + that the learning_sequences app expects. OLX import permits structures that + are much less constrained than Studio's UI allows for, so whenever we run + into something that does not meet our Course -> Section -> Subsection + hierarchy expectations, we add a support-team-readable error message to our + list of ContentDataErrors to pass back. + At this point in the code, everything has already been deserialized into + SectionBlocks and SequenceBlocks, but we're going to phrase our messages in + ways that would make sense to someone looking at the import OLX, since that + is the layer that the course teams and support teams are working with. + """ + section_errors = [] + + # First check if it's not a section at all, and short circuit if it isn't. + if section.location.block_type != 'chapter': + section_errors.append(_error_for_not_section(section)) + return (None, section_errors) + + # We haven't officially killed off problemset and videosequence yet, so + # treat them as equivalent to sequential for now. + valid_sequence_tags = ['sequential', 'problemset', 'videosequence'] sequences_data = [] + for sequence in section.get_children(): - _check_sequence_fields(sequence) + if sequence.location.block_type not in valid_sequence_tags: + section_errors.append(_error_for_not_sequence(section, sequence)) + continue + sequences_data.append( CourseLearningSequenceData( usage_key=_remove_version_info(sequence.location), @@ -146,22 +141,30 @@ def _make_section_data(section): visible_to_staff_only=section.visible_to_staff_only, ), ) - return section_data + return section_data, section_errors @function_trace('get_outline_from_modulestore') -def get_outline_from_modulestore(course_key): +def get_outline_from_modulestore(course_key) -> Tuple[CourseOutlineData, List[ContentErrorData]]: """ - Get a learning_sequence.data.CourseOutlineData for a param:course_key + Return a CourseOutlineData and list of ContentErrorData for param:course_key + + This function does not write any data as a side-effect. It generates a + CourseOutlineData by inspecting the contents in the modulestore, but does + not push that data anywhere. This function only operates on the published + branch, and will not work on Old Mongo courses. """ store = modulestore() + content_errors = [] with store.branch_setting(ModuleStoreEnum.Branch.published_only, course_key): course = store.get_course(course_key, depth=2) sections_data = [] for section in course.get_children(): - section_data = _make_section_data(section) - sections_data.append(section_data) + section_data, section_errors = _make_section_data(section) + if section_data: + sections_data.append(section_data) + content_errors.extend(section_errors) course_outline_data = CourseOutlineData( course_key=course_key, @@ -183,7 +186,8 @@ def get_outline_from_modulestore(course_key): self_paced=course.self_paced, course_visibility=CourseVisibility(course.course_visibility), ) - return course_outline_data + + return (course_outline_data, content_errors) def update_outline_from_modulestore(course_key): @@ -196,6 +200,8 @@ def update_outline_from_modulestore(course_key): # New Relic for easier trace debugging. set_custom_attribute('course_id', str(course_key)) - course_outline_data = get_outline_from_modulestore(course_key) + course_outline_data, content_errors = get_outline_from_modulestore(course_key) set_custom_attribute('num_sequences', len(course_outline_data.sequences)) - replace_course_outline(course_outline_data) + set_custom_attribute('num_content_errors', len(content_errors)) + + replace_course_outline(course_outline_data, content_errors=content_errors) diff --git a/cms/djangoapps/contentstore/tests/test_outlines.py b/cms/djangoapps/contentstore/tests/test_outlines.py index 4454182ea0..5196967993 100644 --- a/cms/djangoapps/contentstore/tests/test_outlines.py +++ b/cms/djangoapps/contentstore/tests/test_outlines.py @@ -12,7 +12,7 @@ from xmodule.modulestore import ModuleStoreEnum from xmodule.modulestore.tests.django_utils import ModuleStoreTestCase from xmodule.modulestore.tests.factories import CourseFactory, ItemFactory -from ..outlines import CourseStructureError, get_outline_from_modulestore +from ..outlines import get_outline_from_modulestore class OutlineFromModuleStoreTestCase(ModuleStoreTestCase): @@ -58,7 +58,7 @@ class OutlineFromModuleStoreTestCase(ModuleStoreTestCase): # published branch to make sure we have the right data. with self.store.branch_setting(ModuleStoreEnum.Branch.published_only, self.course_key): published_course = self.store.get_course(self.course_key, depth=2) - outline = get_outline_from_modulestore(self.course_key) + outline, _errs = get_outline_from_modulestore(self.course_key) # Check basic metdata... assert outline.title == "OutlineFromModuleStoreTestCase Course" @@ -152,7 +152,7 @@ class OutlineFromModuleStoreTestCase(ModuleStoreTestCase): display_name=f"Seq_1_{i}", ) - outline = get_outline_from_modulestore(self.course_key) + outline, _errs = get_outline_from_modulestore(self.course_key) assert len(outline.sections) == 2 assert len(outline.sections[0].sequences) == 3 @@ -171,19 +171,58 @@ class OutlineFromModuleStoreTestCase(ModuleStoreTestCase): """ # Course -> Section -> Unit (No Sequence) with self.store.bulk_operations(self.course_key): - section = ItemFactory.create( + section_1 = ItemFactory.create( parent_location=self.draft_course.location, category='chapter', display_name="Section", ) + # This Unit should be skipped ItemFactory.create( - parent_location=section.location, + parent_location=section_1.location, category='vertical', - display_name="Unit" + display_name="u1" + ) + ItemFactory.create( + parent_location=section_1.location, + category='sequential', + display_name="standard_seq" + ) + ItemFactory.create( + parent_location=section_1.location, + category='problemset', + display_name="pset_seq" + ) + ItemFactory.create( + parent_location=section_1.location, + category='videosequence', + display_name="video_seq" ) - with self.assertRaises(CourseStructureError): - get_outline_from_modulestore(self.course_key) + # This should work fine + section_2 = ItemFactory.create( + parent_location=self.draft_course.location, + category='chapter', + display_name="Section 2", + ) + + # Second error message here + ItemFactory.create( + parent_location=section_2.location, + category='vertical', + display_name="u2" + ) + + outline, errs = get_outline_from_modulestore(self.course_key) + assert len(outline.sections) == 2 + assert len(outline.sections[0].sequences) == 3 + assert len(outline.sections[1].sequences) == 0 + assert len(outline.sequences) == 3 + + # Version-less usage keys + unit_1_loc = self.course_key.make_usage_key('vertical', 'u1') + unit_2_loc = self.course_key.make_usage_key('vertical', 'u2') + assert errs[0].usage_key == unit_1_loc + assert errs[1].usage_key == unit_2_loc def test_sequence_without_section(self): """ @@ -206,8 +245,13 @@ class OutlineFromModuleStoreTestCase(ModuleStoreTestCase): display_name="Unit", ) - with self.assertRaises(CourseStructureError): - get_outline_from_modulestore(self.course_key) + outline, errs = get_outline_from_modulestore(self.course_key) + assert len(errs) == 1 + + # Strip version information from seq.location before comparison. + assert errs[0].usage_key == seq.location.map_into_course(self.course_key) + assert outline.sections == [] + assert outline.sequences == {} def test_missing_display_names(self): """ @@ -225,7 +269,7 @@ class OutlineFromModuleStoreTestCase(ModuleStoreTestCase): display_name=None, ) - outline = get_outline_from_modulestore(self.course_key) + outline, _errs = get_outline_from_modulestore(self.course_key) assert outline.sections[0].title == section.url_name assert outline.sections[0].sequences[0].title == sequence.url_name @@ -238,7 +282,7 @@ class OutlineFromModuleStoreTestCase(ModuleStoreTestCase): published set of blocks will have version information when they're published, but learning_sequences ignores all of that). """ - outline = get_outline_from_modulestore(self.course_key) + outline, _errs = get_outline_from_modulestore(self.course_key) # Recently modified content can have full version information on their # CourseKeys. We need to strip that out and have versionless-CourseKeys diff --git a/openedx/core/djangoapps/content/learning_sequences/admin.py b/openedx/core/djangoapps/content/learning_sequences/admin.py index 2b3d7d2f68..ee1d38383c 100644 --- a/openedx/core/djangoapps/content/learning_sequences/admin.py +++ b/openedx/core/djangoapps/content/learning_sequences/admin.py @@ -6,15 +6,112 @@ from enum import Enum import json from django.contrib import admin -from django.utils.html import format_html +from django.utils.html import format_html, format_html_join +from django.utils.translation import gettext_lazy as _ from opaque_keys import OpaqueKey import attr -from .api import get_course_outline -from .models import LearningContext +from .api import get_content_errors, get_course_outline +from .models import CourseContext, CourseSectionSequence -class LearningContextAdmin(admin.ModelAdmin): +class HasErrorsListFilter(admin.SimpleListFilter): + """ + Filter to find Courses with content errors. + + The default Django filter on an integer field will give a choice of values, + which isn't something we really want. We just want a filter for > 0 errors. + """ + title = _("Error Status") + parameter_name = 'status' + + def lookups(self, request, model_admin): + return ( + ('has_errors', _('Courses with Errors')), + ) + + def queryset(self, request, queryset): + if self.value() == 'has_errors': + return queryset.filter( + learning_context__publish_report__num_errors__gt=0, + ) + + +class CourseSectionSequenceInline(admin.TabularInline): + """ + Inline for showing the sequences of a course. + + The queries look a bit weird because a CourseSectionSequence holds both + course-specific Sequence metadata, while much of the data we care about is + in LearningSequence (like the title) and CourseSequenceExam. + """ + model = CourseSectionSequence + verbose_name = "Sequence" + verbose_name_plural = "Sequences" + + fields = ( + 'title', + 'is_time_limited', + 'is_proctored_exam', + 'is_practice_exam', + 'accessible_after_due', + 'visible_to_staff_only', + 'hide_from_toc', + ) + readonly_fields = ( + 'title', + 'is_time_limited', + 'is_proctored_exam', + 'is_practice_exam', + 'accessible_after_due', + 'visible_to_staff_only', + 'hide_from_toc', + ) + ordering = ['ordering'] + + def get_queryset(self, request): + """ + Optimization to cut an extra two requests per sequence. + + We still have an N+1 issue, but given the number of sequences in a + course, this is tolerable even for large courses. It is possible to get + this down to one query if we do a lower level rendering for the + sequences later. + """ + qs = super().get_queryset(request) + qs = qs.select_related('section', 'sequence') + return qs + + def title(self, cs_seq): + return cs_seq.sequence.title + title.short_description = "Title" + + def accessible_after_due(self, cs_seq): + return not cs_seq.inaccessible_after_due + accessible_after_due.boolean = True + + def visible_to_staff_only(self, cs_seq): + return not cs_seq.visible_to_staff_only + visible_to_staff_only.boolean = True + visible_to_staff_only.short_description = "Staff Only" + + def is_time_limited(self, cs_seq): + return cs_seq.exam.is_time_limited + is_time_limited.boolean = True + is_time_limited.short_description = "Timed Exam" + + def is_proctored_exam(self, cs_seq): + return cs_seq.exam.is_proctored_exam + is_proctored_exam.boolean = True + is_proctored_exam.short_description = "Proctored Exam" + + def is_practice_exam(self, cs_seq): + return cs_seq.exam.is_practice_exam + is_practice_exam.boolean = True + is_practice_exam.short_description = "Practice Exam" + + +class CourseContextAdmin(admin.ModelAdmin): """ This is a read-only model admin that is meant to be useful for querying. @@ -25,25 +122,124 @@ class LearningContextAdmin(admin.ModelAdmin): to be written to from the Studio process. """ list_display = ( - 'context_key', + 'course_key', 'title', 'published_at', - 'published_version', - 'modified' + 'num_errors', + 'num_sections', + 'num_sequences', + ) + list_select_related = ( + 'learning_context', + 'learning_context__publish_report', + ) + list_filter = ( + HasErrorsListFilter, + 'learning_context__published_at', ) readonly_fields = ( - 'context_key', + 'course_key', 'title', 'published_at', 'published_version', 'created', 'modified', - 'outline', - ) - search_fields = ['context_key', 'title'] - actions = None + 'course_visibility', + 'self_paced', + 'days_early_for_beta', + 'entrance_exam_id', - def outline(self, obj): + 'error_details', + 'raw_outline', + ) + raw_id_fields = ( + 'learning_context', + ) + fieldsets = ( + ( + None, + { + 'fields': ( + 'course_key', + 'title', + 'published_at', + 'course_visibility', + 'self_paced', + 'days_early_for_beta', + 'entrance_exam_id', + ), + } + ), + ( + 'Outline Data', + { + 'fields': ( + 'num_sections', 'num_sequences', 'num_errors', 'error_details', + ), + } + ), + ( + 'Debug Details', + { + 'fields': ( + 'published_version', 'created', 'modified', 'raw_outline' + ), + 'classes': ('collapse',), + } + ), + ) + inlines = [ + CourseSectionSequenceInline, + ] + search_fields = ['learning_context__context_key', 'learning_context__title'] + ordering = ['-learning_context__published_at'] + + def get_queryset(self, request): + qs = super().get_queryset(request) + qs = qs.prefetch_related('section_sequences') + return qs + + def created(self, course_context): + return course_context.learning_context.created + created.short_description = "Record Created" + + def modified(self, course_context): + return course_context.learning_context.modified + modified.short_description = "Record Modified" + + def course_key(self, course_context): + return course_context.learning_context.context_key + + def title(self, course_context): + return course_context.learning_context.title + + def published_at(self, course_context): + published_at_dt = course_context.learning_context.published_at + return published_at_dt.strftime("%B %-d, %Y, %-I:%M %p") + published_at.short_description = "Published at (UTC)" + + def published_version(self, course_context): + return course_context.learning_context.published_version + + def _publish_report_attr(self, course_context, attr_name): + learning_context = course_context.learning_context + if not hasattr(learning_context, 'publish_report'): + return None + return getattr(learning_context.publish_report, attr_name) + + def num_errors(self, course_context): + return self._publish_report_attr(course_context, 'num_errors') + num_errors.short_description = "Errors" + + def num_sections(self, course_context): + return self._publish_report_attr(course_context, 'num_sections') + num_sections.short_description = "Sections" + + def num_sequences(self, course_context): + return self._publish_report_attr(course_context, 'num_sequences') + num_sequences.short_description = "Sequences" + + def raw_outline(self, obj): """ Computed attribute that shows the outline JSON in the detail view. """ @@ -56,7 +252,7 @@ class LearningContextAdmin(admin.ModelAdmin): return value.isoformat() return value - outline_data = get_course_outline(obj.context_key) + outline_data = get_course_outline(obj.learning_context.context_key) outline_data_dict = attr.asdict( outline_data, recurse=True, @@ -65,6 +261,46 @@ class LearningContextAdmin(admin.ModelAdmin): outline_data_json = json.dumps(outline_data_dict, indent=2, sort_keys=True) return format_html("
\n{}\n
", outline_data_json) + def error_details(self, course_context): + """ + Generates the HTML for Error Details. + """ + learning_context = course_context.learning_context + if not hasattr(learning_context, 'publish_report'): + return "" + + content_errors = get_content_errors(learning_context.context_key) + if not content_errors: + return format_html("

No errors were found.

") + + list_items = format_html_join( + "\n", + "
  • {}
    Usage Key: {}
  • ", + ( + (err_data.message, err_data.usage_key) + for err_data in content_errors + ) + ) + return format_html( + """ +

    + Parts of the course content were skipped when generating the Outline + because they did not follow the standard Course → Section → + Subsection hierarchy. Course structures like this cannot be created + in Studio, but can be created by OLX import. In OLX, this hierarchy + is represented by the tags {}{} → + {}. +

    +
      + {} +
    + """, + "", + "", + "", + list_items, + ) + def has_add_permission(self, request): """ Disallow additions. See docstring for has_change_permission() @@ -93,4 +329,4 @@ class LearningContextAdmin(admin.ModelAdmin): return False -admin.site.register(LearningContext, LearningContextAdmin) +admin.site.register(CourseContext, CourseContextAdmin) diff --git a/openedx/core/djangoapps/content/learning_sequences/api/__init__.py b/openedx/core/djangoapps/content/learning_sequences/api/__init__.py index e07639b24f..2ab8283a6e 100644 --- a/openedx/core/djangoapps/content/learning_sequences/api/__init__.py +++ b/openedx/core/djangoapps/content/learning_sequences/api/__init__.py @@ -1,5 +1,6 @@ # lint-amnesty, pylint: disable=missing-module-docstring from .outlines import ( + get_content_errors, get_course_keys_with_outlines, get_course_outline, get_user_course_outline, diff --git a/openedx/core/djangoapps/content/learning_sequences/api/outlines.py b/openedx/core/djangoapps/content/learning_sequences/api/outlines.py index d968b07427..b7df990a3e 100644 --- a/openedx/core/djangoapps/content/learning_sequences/api/outlines.py +++ b/openedx/core/djangoapps/content/learning_sequences/api/outlines.py @@ -6,7 +6,7 @@ __init__.py imports from here, and is a more stable place to import from. import logging from collections import defaultdict from datetime import datetime -from typing import Optional # lint-amnesty, pylint: disable=unused-import +from typing import Optional, List # lint-amnesty, pylint: disable=unused-import import attr # lint-amnesty, pylint: disable=unused-import from django.contrib.auth import get_user_model @@ -19,6 +19,7 @@ from opaque_keys.edx.locator import LibraryLocator from opaque_keys.edx.keys import CourseKey # lint-amnesty, pylint: disable=unused-import from ..data import ( + ContentErrorData, CourseLearningSequenceData, CourseOutlineData, CourseSectionData, @@ -29,12 +30,14 @@ from ..data import ( VisibilityData, ) from ..models import ( + ContentError, CourseSection, CourseSectionSequence, CourseContext, CourseSequenceExam, LearningContext, - LearningSequence + LearningSequence, + PublishReport, ) from .permissions import can_see_all_content from .processors.content_gating import ContentGatingOutlineProcessor @@ -49,6 +52,7 @@ log = logging.getLogger(__name__) # Public API... __all__ = [ + 'get_content_errors', 'get_course_keys_with_outlines', 'get_course_outline', 'get_user_course_outline', @@ -193,7 +197,10 @@ def _get_course_context_for_outline(course_key: CourseKey) -> CourseContext: ) try: course_context = ( - LearningContext.objects.select_related('course_context').get(context_key=course_key).course_context + LearningContext.objects + .select_related('course_context') + .get(context_key=course_key) + .course_context ) except LearningContext.DoesNotExist: # Could happen if it hasn't been published. @@ -203,6 +210,25 @@ def _get_course_context_for_outline(course_key: CourseKey) -> CourseContext: return course_context +def get_content_errors(course_key: CourseKey) -> List[ContentErrorData]: + """ + Get ContentErrors created in the most recent publish of this Course run. + """ + try: + learning_context = ( + LearningContext.objects.select_related('publish_report') + .get(context_key=course_key) + ) + publish_report = learning_context.publish_report + except (LearningContext.DoesNotExist, PublishReport.DoesNotExist): + return [] + + return [ + ContentErrorData(usage_key=error.usage_key, message=error.message) + for error in publish_report.content_errors.all().order_by('id') + ] + + @function_trace('learning_sequences.api.get_user_course_outline') def get_user_course_outline(course_key: CourseKey, user: User, @@ -246,6 +272,13 @@ def get_user_course_outline_details(course_key: CourseKey, def _get_user_course_outline_and_processors(course_key: CourseKey, # lint-amnesty, pylint: disable=missing-function-docstring user: User, at_time: datetime): + """ + Helper function that runs the outline processors. + + This function returns a UserCourseOutlineData and a dict of outline + processors that have executed their data loading and returned which + sequences to remove and which to mark as inaccessible. + """ # Record the user separately from the standard user_id that views record, # because it's possible to ask for views as other users if you're global # staff. Performance is going to vary based on the user we're asking the @@ -316,10 +349,11 @@ def _get_user_course_outline_and_processors(course_key: CourseKey, # lint-amnes @function_trace('learning_sequences.api.replace_course_outline') -def replace_course_outline(course_outline: CourseOutlineData): +def replace_course_outline(course_outline: CourseOutlineData, + content_errors: Optional[List[ContentErrorData]] = None): """ Replace the model data stored for the Course Outline with the contents of - course_outline (a CourseOutlineData). + course_outline (a CourseOutlineData). Record any content errors. This isn't particularly optimized at the moment. """ @@ -329,17 +363,20 @@ def replace_course_outline(course_outline: CourseOutlineData): ) set_custom_attribute('learning_sequences.api.course_id', str(course_outline.course_key)) + if content_errors is None: + content_errors = [] + with transaction.atomic(): # Update or create the basic CourseContext... course_context = _update_course_context(course_outline) - # Wipe out the CourseSectionSequences join+ordering table so we can - # delete CourseSection and LearningSequence objects more easily. + # Wipe out the CourseSectionSequences join+ordering table course_context.section_sequences.all().delete() _update_sections(course_outline, course_context) _update_sequences(course_outline, course_context) _update_course_section_sequences(course_outline, course_context) + _update_publish_report(course_outline, content_errors, course_context) def _update_course_context(course_outline: CourseOutlineData): @@ -457,3 +494,38 @@ def _update_course_section_sequences(course_outline: CourseOutlineData, course_c else: # Otherwise, delete any exams associated with it CourseSequenceExam.objects.filter(course_section_sequence=course_section_sequence).delete() + + +def _update_publish_report(course_outline: CourseOutlineData, + content_errors: List[ContentErrorData], + course_context: CourseContext): + """ + Record ContentErrors for this course publish. Deletes previous errors. + """ + set_custom_attribute('learning_sequences.api.num_content_errors', len(content_errors)) + learning_context = course_context.learning_context + try: + # Normal path if we're updating a PublishReport + publish_report = learning_context.publish_report + publish_report.num_errors = len(content_errors) + publish_report.num_sections = len(course_outline.sections) + publish_report.num_sequences = len(course_outline.sequences) + publish_report.content_errors.all().delete() + except PublishReport.DoesNotExist: + # Case where we're creating it for the first time. + publish_report = PublishReport( + learning_context=learning_context, + num_errors=len(content_errors), + num_sections=len(course_outline.sections), + num_sequences=len(course_outline.sequences), + ) + + publish_report.save() + publish_report.content_errors.bulk_create([ + ContentError( + publish_report=publish_report, + usage_key=error_data.usage_key, + message=error_data.message, + ) + for error_data in content_errors + ]) diff --git a/openedx/core/djangoapps/content/learning_sequences/api/tests/test_outlines.py b/openedx/core/djangoapps/content/learning_sequences/api/tests/test_outlines.py index dc57e7d855..5067d3d6a7 100644 --- a/openedx/core/djangoapps/content/learning_sequences/api/tests/test_outlines.py +++ b/openedx/core/djangoapps/content/learning_sequences/api/tests/test_outlines.py @@ -23,6 +23,7 @@ from common.djangoapps.student.auth import user_has_role from common.djangoapps.student.roles import CourseBetaTesterRole from ...data import ( + ContentErrorData, CourseLearningSequenceData, CourseOutlineData, CourseSectionData, @@ -31,6 +32,7 @@ from ...data import ( VisibilityData, ) from ..outlines import ( + get_content_errors, get_course_outline, get_user_course_outline, get_user_course_outline_details, @@ -1274,3 +1276,64 @@ class SequentialVisibilityTestCase(CacheIsolationTestCase): assert len(user_course_outline.sequences) == 6 assert all(is_sequence_accessible),\ 'Sequences should be accessible to enrolled, staff users for a public_outline course' + + +class ContentErrorTestCase(CacheIsolationTestCase): + """Test error collection and reporting.""" + + def test_errors(self): + """ + Basic tests for writing and retriving errors. + """ + course_key = CourseKey.from_string("course-v1:OpenEdX+Outlines+Errors") + outline = CourseOutlineData( + course_key=course_key, + title="Outline Errors Test Course!", + published_at=datetime(2021, 3, 21, tzinfo=timezone.utc), + published_version="8ebece4b69dd593d82fe2020", + sections=[], + self_paced=False, + days_early_for_beta=None, + entrance_exam_id=None, + course_visibility=CourseVisibility.PRIVATE, + ) + usage_key_1 = course_key.make_usage_key('sequential', 'seq1') + usage_key_2 = course_key.make_usage_key('sequential', 'seq2') + replace_course_outline( + outline, + content_errors=[ + # Explicitly set to no usage key. + ContentErrorData(message="Content is Hard", usage_key=None), + + # Implicitly set usage key + ContentErrorData("Simple Content Error Description"), + + # Multiple copies of the same usage key + ContentErrorData(message="Seq1 is wrong", usage_key=usage_key_1), + ContentErrorData(message="Seq1 is still wrong", usage_key=usage_key_1), + + # Another key + ContentErrorData(message="Seq2 is also wrong", usage_key=usage_key_2) + ] + ) + assert outline == get_course_outline(course_key) + + # Ordering is preserved. + assert get_content_errors(course_key) == [ + ContentErrorData(message="Content is Hard", usage_key=None), + ContentErrorData(message="Simple Content Error Description", usage_key=None), + ContentErrorData(message="Seq1 is wrong", usage_key=usage_key_1), + ContentErrorData(message="Seq1 is still wrong", usage_key=usage_key_1), + ContentErrorData(message="Seq2 is also wrong", usage_key=usage_key_2), + ] + + # Now do it again and make sure updates work as well as inserts + replace_course_outline( + outline, + content_errors=[ + ContentErrorData(message="Content is Hard", usage_key=None), + ] + ) + assert get_content_errors(course_key) == [ + ContentErrorData(message="Content is Hard", usage_key=None), + ] diff --git a/openedx/core/djangoapps/content/learning_sequences/data.py b/openedx/core/djangoapps/content/learning_sequences/data.py index d82e7707a7..fc56d955be 100644 --- a/openedx/core/djangoapps/content/learning_sequences/data.py +++ b/openedx/core/djangoapps/content/learning_sequences/data.py @@ -50,6 +50,22 @@ class ObjectDoesNotExist(Exception): pass # lint-amnesty, pylint: disable=unnecessary-pass +@attr.s(frozen=True) +class ContentErrorData: + """ + A human-readable description of something wrong with the content, to ease + the debugging of content issues–especially ones where content had to be + skipped because it was somehow malformed. The messages should be + comprehensible to course teams and support staff. + + Errors can refer to things that are not anywhere in the outline, such as + when things don't show up where we expect then to be and we omit them from + the outline (unknown tag types, sequences where we expect sections, etc.) + """ + message = attr.ib(type=str) + usage_key = attr.ib(type=Optional[UsageKey], default=None) + + @attr.s(frozen=True) class VisibilityData: """ diff --git a/openedx/core/djangoapps/content/learning_sequences/migrations/0009_contenterror_publishreport.py b/openedx/core/djangoapps/content/learning_sequences/migrations/0009_contenterror_publishreport.py new file mode 100644 index 0000000000..1c7135a847 --- /dev/null +++ b/openedx/core/djangoapps/content/learning_sequences/migrations/0009_contenterror_publishreport.py @@ -0,0 +1,34 @@ +# Generated by Django 2.2.19 on 2021-03-12 16:31 + +from django.db import migrations, models +import django.db.models.deletion +import opaque_keys.edx.django.models + + +class Migration(migrations.Migration): + + dependencies = [ + ('learning_sequences', '0008_add_learning_context_title_index'), + ] + + operations = [ + migrations.CreateModel( + name='PublishReport', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('num_errors', models.PositiveIntegerField()), + ('num_sections', models.PositiveIntegerField()), + ('num_sequences', models.PositiveIntegerField()), + ('learning_context', models.OneToOneField(on_delete=django.db.models.deletion.CASCADE, related_name='publish_report', to='learning_sequences.LearningContext')), + ], + ), + migrations.CreateModel( + name='ContentError', + fields=[ + ('id', models.BigAutoField(primary_key=True, serialize=False)), + ('usage_key', opaque_keys.edx.django.models.UsageKeyField(max_length=255, null=True)), + ('message', models.TextField(max_length=10000)), + ('publish_report', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='content_errors', to='learning_sequences.PublishReport')), + ], + ), + ] diff --git a/openedx/core/djangoapps/content/learning_sequences/migrations/0010_add_publishreport_indexes.py b/openedx/core/djangoapps/content/learning_sequences/migrations/0010_add_publishreport_indexes.py new file mode 100644 index 0000000000..f9501522ac --- /dev/null +++ b/openedx/core/djangoapps/content/learning_sequences/migrations/0010_add_publishreport_indexes.py @@ -0,0 +1,28 @@ +# Generated by Django 2.2.19 on 2021-03-13 18:25 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('learning_sequences', '0009_contenterror_publishreport'), + ] + + operations = [ + migrations.AlterField( + model_name='publishreport', + name='num_errors', + field=models.PositiveIntegerField(db_index=True), + ), + migrations.AlterField( + model_name='publishreport', + name='num_sections', + field=models.PositiveIntegerField(db_index=True), + ), + migrations.AlterField( + model_name='publishreport', + name='num_sequences', + field=models.PositiveIntegerField(db_index=True), + ), + ] diff --git a/openedx/core/djangoapps/content/learning_sequences/migrations/0011_course_meta_names.py b/openedx/core/djangoapps/content/learning_sequences/migrations/0011_course_meta_names.py new file mode 100644 index 0000000000..3e71ae7120 --- /dev/null +++ b/openedx/core/djangoapps/content/learning_sequences/migrations/0011_course_meta_names.py @@ -0,0 +1,21 @@ +# Generated by Django 2.2.19 on 2021-03-14 05:48 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('learning_sequences', '0010_add_publishreport_indexes'), + ] + + operations = [ + migrations.AlterModelOptions( + name='coursecontext', + options={'verbose_name': 'Course', 'verbose_name_plural': 'Courses'}, + ), + migrations.AlterModelOptions( + name='coursesectionsequence', + options={'verbose_name': 'Course Sequence', 'verbose_name_plural': 'Course Sequences'}, + ), + ] diff --git a/openedx/core/djangoapps/content/learning_sequences/models.py b/openedx/core/djangoapps/content/learning_sequences/models.py index 23f8770d5e..ecf0632e9e 100644 --- a/openedx/core/djangoapps/content/learning_sequences/models.py +++ b/openedx/core/djangoapps/content/learning_sequences/models.py @@ -87,6 +87,13 @@ class CourseContext(TimeStampedModel): self_paced = models.BooleanField(default=False) entrance_exam_id = models.CharField(max_length=255, null=True) + class Meta: + verbose_name = 'Course' + verbose_name_plural = 'Courses' + + def __str__(self): + return f"{self.learning_context.context_key} ({self.learning_context.title})" + class LearningSequence(TimeStampedModel): """ @@ -200,6 +207,11 @@ class CourseSectionSequence(CourseContentVisibilityMixin, TimeStampedModel): unique_together = [ ['course_context', 'ordering'], ] + verbose_name = 'Course Sequence' + verbose_name_plural = 'Course Sequences' + + def __str__(self): + return f"{self.section.title} > {self.sequence.title}" class CourseSequenceExam(TimeStampedModel): @@ -212,3 +224,51 @@ class CourseSequenceExam(TimeStampedModel): is_practice_exam = models.BooleanField(default=False) is_proctored_enabled = models.BooleanField(default=False) is_time_limited = models.BooleanField(default=False) + + +class PublishReport(models.Model): + """ + A report about the content that generated this LearningContext publish. + + All these fields could be derived with aggregate SQL functions, but it would + be slower and make the admin code more complex. Since we only write at + publish time, keeping things in sync is less of a concern. + """ + learning_context = models.OneToOneField( + LearningContext, on_delete=models.CASCADE, related_name='publish_report' + ) + num_errors = models.PositiveIntegerField(null=False, db_index=True) + num_sections = models.PositiveIntegerField(null=False, db_index=True) + num_sequences = models.PositiveIntegerField(null=False, db_index=True) + + +class ContentError(models.Model): + """ + Human readable content errors. + + If something got here, it means that we were able to make _something_ (or + there would be no LearningContext at all), but something about the published + state of the content is wrong and should be flagged to course and support + teams. In many cases, this will be some malformed course structure that gets + uploaded via OLX import–a process that is more forgiving than Studio's UI. + + It's a little weird to store errors in such a freeform manner like this. It + would be more efficient and flexible in terms of i18n if we were to store + error codes, and leave the message generation to the time of display. The + problem with that is that we don't know up front what the parameterization + for such errors would be. The current error messages being created are + fairly complicated and include references to multiple attributes of multiple + pieces of content with supporting breadcrumbs. Other future errors might + just be about display_name string length. + + So instead of trying to model all that internally, I'm just allowing for + freeform messages. It is quite possible that at some point we will come up + with a more comprehensive taxonomy of error messages, at which point we + could do a backfill to regenerate this data in a more normalized way. + """ + id = models.BigAutoField(primary_key=True) + publish_report = models.ForeignKey( + PublishReport, on_delete=models.CASCADE, related_name='content_errors' + ) + usage_key = UsageKeyField(max_length=255, null=True) + message = models.TextField(max_length=10000, blank=False, null=False)