feat: Outline error handling and admin improvements.

* Introduces the idea of content errors into the learning_sequences public API, accessible using get_content_errors(). * Makes course outline generation much more resilient to unusual structures (e.g. Section -> Unit with no Sequence in between), with the understanding that anything that doesn't conform to the standard structure will simply be skipped. * Improves the Django Admin for learning_sequences to display content errors and improve sequence data browsing within a course. * Switches the main table viewed in the Django admin from LearningContext to CourseContext, which is appropriate since only course runs generate outlines. This was done as part of TNL-8057, with the end goal of making course outline generation resilient enough to switch over apps to using the learning_sequences outline API. The types of course structure errors that this PR addresses cause display issues even in the current Outline Page experience, but would break the outline generation for learning_sequences altogether. The approach for error messages here is very generic, to keep modulestore concepts from seeping into learning_sequences (which is not aware of the modulestore/contentstore). We may need to address this later, with a more normalized content error data model. While the Django admin page is backwards compatible with the old versions of the models, we should run the backfill_course_outlines management command after deploying this change, to get the full benefits.
2021-03-12 00:33:09 -05:00
parent 36ebb3e124
commit 3a552c4fdf
11 changed files with 695 additions and 114 deletions
--- a/cms/djangoapps/contentstore/outlines.py
+++ b/cms/djangoapps/contentstore/outlines.py
@@ -4,11 +4,13 @@ is responsible for holding course outline data. Studio _pushes_ that data into
 learning_sequences at publish time.
 """
 from datetime import timezone
+from typing import List, Tuple

 from edx_django_utils.monitoring import function_trace, set_custom_attribute

 from openedx.core.djangoapps.content.learning_sequences.api import replace_course_outline
 from openedx.core.djangoapps.content.learning_sequences.data import (
+    ContentErrorData,
    CourseLearningSequenceData,
    CourseOutlineData,
    CourseSectionData,
@@ -20,70 +22,6 @@ from xmodule.modulestore import ModuleStoreEnum
 from xmodule.modulestore.django import modulestore


-class CourseStructureError(Exception):
-    """
-    Raise this if we can't create an outline because of the course structure.
-
-    Courses built in Studio conform to a hierarchy that looks like:
-        Course -> Section (a.k.a. Chapter) -> Subsection (a.k.a. Sequence)
-
-    OLX imports are much more freeform and can generate unusual structures that
-    we won't know how to handle.
-    """
-
-
-def _check_sequence_fields(sequence):
-    """
-    Raise CourseStructureError if `sequence` is missing a required field.
-
-    Do this instead of checking against specific block types to better future
-    proof ourselves against new sequence-types, aliases, changes in the way
-    dynamic mixing of XBlock types happens, as well as deprecation/removal of
-    the specific fields we care about. If it quacks like a duck...
-    """
-    expected_fields = [
-        'display_name',
-        'hide_after_due',
-        'hide_from_toc',
-        'is_practice_exam',
-        'is_proctored_enabled',
-        'is_time_limited',
-        'visible_to_staff_only',
-    ]
-    for field in expected_fields:
-        if not hasattr(sequence, field):
-            msg = (
-                f"Cannot create CourseOutline: Expected a Sequence at "
-                f"{sequence.location} (child of {sequence.parent}), "
-                f"but this object does not have sequence field {field}."
-            )
-            raise CourseStructureError(msg)
-
-
-def _check_section_fields(section):
-    """
-    Raise CourseStructureError if `section` is missing a required field.
-
-    Do this instead of checking against specific block types to better future
-    proof ourselves against new sequence-types, aliases, changes in the way
-    dynamic mixing of XBlock types happens, as well as deprecation/removal of
-    the specific fields we care about. If it quacks like a duck...
-    """
-    expected_fields = [
-        'children',
-        'hide_from_toc',
-        'visible_to_staff_only',
-    ]
-    for field in expected_fields:
-        if not hasattr(section, field):
-            msg = (
-                f"Cannot create CourseOutline: Expected a Section at "
-                f"{section.location} (child of {section.parent}), "
-                f"but this object does not have Section field {field}."
-            )
-            raise CourseStructureError(msg)
-
-
 def _remove_version_info(usage_key):
    """
    When we ask modulestore for the published branch in the Studio process
@@ -105,21 +43,78 @@ def _remove_version_info(usage_key):
    return usage_key.map_into_course(unversioned_course_key)


+def _error_for_not_section(not_section):
+    """
+    ContentErrorData when we run into a child of <course> that's not a Section.
+
+    Has to be phrased in a way that makes sense to course teams.
+    """
+    return ContentErrorData(
+        message=(
+            f'<course> contains a <{not_section.location.block_type}> tag with '
+            f'url_name="{not_section.location.block_id}" and '
+            f'display_name="{getattr(not_section, "display_name", "")}". '
+            f'Expected <chapter> tag instead.'
+        ),
+        usage_key=_remove_version_info(not_section.location),
+    )
+
+
+def _error_for_not_sequence(section, not_sequence):
+    """
+    ContentErrorData when we run into a child of Section that's not a Sequence.
+
+    Has to be phrased in a way that makes sense to course teams.
+    """
+    return ContentErrorData(
+        message=(
+            f'<chapter> with url_name="{section.location.block_id}" and '
+            f'display_name="{section.display_name}" contains a '
+            f'<{not_sequence.location.block_type}> tag with '
+            f'url_name="{not_sequence.location.block_id}" and '
+            f'display_name="{getattr(not_sequence, "display_name", "")}". '
+            f'Expected a <sequential> tag instead.'
+        ),
+        usage_key=_remove_version_info(not_sequence.location),
+    )
+
+
 def _make_section_data(section):
    """
-    Generate a CourseSectionData from a SectionDescriptor.
+    Return a (CourseSectionData, List[ContentDataError]) from a SectionBlock.
+
+    Can return None for CourseSectionData if it's not really a SectionBlock that
+    was passed in.

    This method does a lot of the work to convert modulestore fields to an input
-    that the learning_sequences app expects. It doesn't check for specific
-    classes (i.e. you could create your own Sequence-like XBlock), but it will
-    raise a CourseStructureError if anything you pass in is missing fields that
-    we expect in a SectionDescriptor or its SequenceDescriptor children.
-    """
-    _check_section_fields(section)
+    that the learning_sequences app expects. OLX import permits structures that
+    are much less constrained than Studio's UI allows for, so whenever we run
+    into something that does not meet our Course -> Section -> Subsection
+    hierarchy expectations, we add a support-team-readable error message to our
+    list of ContentDataErrors to pass back.

+    At this point in the code, everything has already been deserialized into
+    SectionBlocks and SequenceBlocks, but we're going to phrase our messages in
+    ways that would make sense to someone looking at the import OLX, since that
+    is the layer that the course teams and support teams are working with.
+    """
+    section_errors = []
+
+    # First check if it's not a section at all, and short circuit if it isn't.
+    if section.location.block_type != 'chapter':
+        section_errors.append(_error_for_not_section(section))
+        return (None, section_errors)
+
+    # We haven't officially killed off problemset and videosequence yet, so
+    # treat them as equivalent to sequential for now.
+    valid_sequence_tags = ['sequential', 'problemset', 'videosequence']
    sequences_data = []
+
    for sequence in section.get_children():
-        _check_sequence_fields(sequence)
+        if sequence.location.block_type not in valid_sequence_tags:
+            section_errors.append(_error_for_not_sequence(section, sequence))
+            continue
+
        sequences_data.append(
            CourseLearningSequenceData(
                usage_key=_remove_version_info(sequence.location),
@@ -146,22 +141,30 @@ def _make_section_data(section):
            visible_to_staff_only=section.visible_to_staff_only,
        ),
    )
-    return section_data
+    return section_data, section_errors


@function_trace('get_outline_from_modulestore')
-def get_outline_from_modulestore(course_key):
+def get_outline_from_modulestore(course_key) -> Tuple[CourseOutlineData, List[ContentErrorData]]:
    """
-    Get a learning_sequence.data.CourseOutlineData for a param:course_key
+    Return a CourseOutlineData and list of ContentErrorData for param:course_key
+
+    This function does not write any data as a side-effect. It generates a
+    CourseOutlineData by inspecting the contents in the modulestore, but does
+    not push that data anywhere. This function only operates on the published
+    branch, and will not work on Old Mongo courses.
    """
    store = modulestore()
+    content_errors = []

    with store.branch_setting(ModuleStoreEnum.Branch.published_only, course_key):
        course = store.get_course(course_key, depth=2)
        sections_data = []
        for section in course.get_children():
-            section_data = _make_section_data(section)
-            sections_data.append(section_data)
+            section_data, section_errors = _make_section_data(section)
+            if section_data:
+                sections_data.append(section_data)
+            content_errors.extend(section_errors)

        course_outline_data = CourseOutlineData(
            course_key=course_key,
@@ -183,7 +186,8 @@ def get_outline_from_modulestore(course_key):
            self_paced=course.self_paced,
            course_visibility=CourseVisibility(course.course_visibility),
        )
-    return course_outline_data
+
+    return (course_outline_data, content_errors)


 def update_outline_from_modulestore(course_key):
@@ -196,6 +200,8 @@ def update_outline_from_modulestore(course_key):
    # New Relic for easier trace debugging.
    set_custom_attribute('course_id', str(course_key))

-    course_outline_data = get_outline_from_modulestore(course_key)
+    course_outline_data, content_errors = get_outline_from_modulestore(course_key)
    set_custom_attribute('num_sequences', len(course_outline_data.sequences))
-    replace_course_outline(course_outline_data)
+    set_custom_attribute('num_content_errors', len(content_errors))
+
+    replace_course_outline(course_outline_data, content_errors=content_errors)
--- a/cms/djangoapps/contentstore/tests/test_outlines.py
+++ b/cms/djangoapps/contentstore/tests/test_outlines.py
@@ -12,7 +12,7 @@ from xmodule.modulestore import ModuleStoreEnum
 from xmodule.modulestore.tests.django_utils import ModuleStoreTestCase
 from xmodule.modulestore.tests.factories import CourseFactory, ItemFactory

-from ..outlines import CourseStructureError, get_outline_from_modulestore
+from ..outlines import get_outline_from_modulestore


 class OutlineFromModuleStoreTestCase(ModuleStoreTestCase):
@@ -58,7 +58,7 @@ class OutlineFromModuleStoreTestCase(ModuleStoreTestCase):
        # published branch to make sure we have the right data.
        with self.store.branch_setting(ModuleStoreEnum.Branch.published_only, self.course_key):
            published_course = self.store.get_course(self.course_key, depth=2)
-        outline = get_outline_from_modulestore(self.course_key)
+        outline, _errs = get_outline_from_modulestore(self.course_key)

        # Check basic metdata...
        assert outline.title == "OutlineFromModuleStoreTestCase Course"
@@ -152,7 +152,7 @@ class OutlineFromModuleStoreTestCase(ModuleStoreTestCase):
                    display_name=f"Seq_1_{i}",
                )

-        outline = get_outline_from_modulestore(self.course_key)
+        outline, _errs = get_outline_from_modulestore(self.course_key)

        assert len(outline.sections) == 2
        assert len(outline.sections[0].sequences) == 3
@@ -171,19 +171,58 @@ class OutlineFromModuleStoreTestCase(ModuleStoreTestCase):
        """
        # Course -> Section -> Unit (No Sequence)
        with self.store.bulk_operations(self.course_key):
-            section = ItemFactory.create(
+            section_1 = ItemFactory.create(
                parent_location=self.draft_course.location,
                category='chapter',
                display_name="Section",
            )
+            # This Unit should be skipped
            ItemFactory.create(
-                parent_location=section.location,
+                parent_location=section_1.location,
                category='vertical',
-                display_name="Unit"
+                display_name="u1"
+            )
+            ItemFactory.create(
+                parent_location=section_1.location,
+                category='sequential',
+                display_name="standard_seq"
+            )
+            ItemFactory.create(
+                parent_location=section_1.location,
+                category='problemset',
+                display_name="pset_seq"
+            )
+            ItemFactory.create(
+                parent_location=section_1.location,
+                category='videosequence',
+                display_name="video_seq"
            )

-        with self.assertRaises(CourseStructureError):
-            get_outline_from_modulestore(self.course_key)
+            # This should work fine
+            section_2 = ItemFactory.create(
+                parent_location=self.draft_course.location,
+                category='chapter',
+                display_name="Section 2",
+            )
+
+            # Second error message here
+            ItemFactory.create(
+                parent_location=section_2.location,
+                category='vertical',
+                display_name="u2"
+            )
+
+        outline, errs = get_outline_from_modulestore(self.course_key)
+        assert len(outline.sections) == 2
+        assert len(outline.sections[0].sequences) == 3
+        assert len(outline.sections[1].sequences) == 0
+        assert len(outline.sequences) == 3
+
+        # Version-less usage keys
+        unit_1_loc = self.course_key.make_usage_key('vertical', 'u1')
+        unit_2_loc = self.course_key.make_usage_key('vertical', 'u2')
+        assert errs[0].usage_key == unit_1_loc
+        assert errs[1].usage_key == unit_2_loc

    def test_sequence_without_section(self):
        """
@@ -206,8 +245,13 @@ class OutlineFromModuleStoreTestCase(ModuleStoreTestCase):
                display_name="Unit",
            )

-        with self.assertRaises(CourseStructureError):
-            get_outline_from_modulestore(self.course_key)
+        outline, errs = get_outline_from_modulestore(self.course_key)
+        assert len(errs) == 1
+
+        # Strip version information from seq.location before comparison.
+        assert errs[0].usage_key == seq.location.map_into_course(self.course_key)
+        assert outline.sections == []
+        assert outline.sequences == {}

    def test_missing_display_names(self):
        """
@@ -225,7 +269,7 @@ class OutlineFromModuleStoreTestCase(ModuleStoreTestCase):
                display_name=None,
            )

-        outline = get_outline_from_modulestore(self.course_key)
+        outline, _errs = get_outline_from_modulestore(self.course_key)
        assert outline.sections[0].title == section.url_name
        assert outline.sections[0].sequences[0].title == sequence.url_name

@@ -238,7 +282,7 @@ class OutlineFromModuleStoreTestCase(ModuleStoreTestCase):
        published set of blocks will have version information when they're
        published, but learning_sequences ignores all of that).
        """
-        outline = get_outline_from_modulestore(self.course_key)
+        outline, _errs = get_outline_from_modulestore(self.course_key)

        # Recently modified content can have full version information on their
        # CourseKeys. We need to strip that out and have versionless-CourseKeys
--- a/openedx/core/djangoapps/content/learning_sequences/admin.py
+++ b/openedx/core/djangoapps/content/learning_sequences/admin.py
@@ -6,15 +6,112 @@ from enum import Enum
 import json

 from django.contrib import admin
-from django.utils.html import format_html
+from django.utils.html import format_html, format_html_join
+from django.utils.translation import gettext_lazy as _
 from opaque_keys import OpaqueKey
 import attr

-from .api import get_course_outline
-from .models import LearningContext
+from .api import get_content_errors, get_course_outline
+from .models import CourseContext, CourseSectionSequence


-class LearningContextAdmin(admin.ModelAdmin):
+class HasErrorsListFilter(admin.SimpleListFilter):
+    """
+    Filter to find Courses with content errors.
+
+    The default Django filter on an integer field will give a choice of values,
+    which isn't something we really want. We just want a filter for > 0 errors.
+    """
+    title = _("Error Status")
+    parameter_name = 'status'
+
+    def lookups(self, request, model_admin):
+        return (
+            ('has_errors', _('Courses with Errors')),
+        )
+
+    def queryset(self, request, queryset):
+        if self.value() == 'has_errors':
+            return queryset.filter(
+                learning_context__publish_report__num_errors__gt=0,
+            )
+
+
+class CourseSectionSequenceInline(admin.TabularInline):
+    """
+    Inline for showing the sequences of a course.
+
+    The queries look a bit weird because a CourseSectionSequence holds both
+    course-specific Sequence metadata, while much of the data we care about is
+    in LearningSequence (like the title) and CourseSequenceExam.
+    """
+    model = CourseSectionSequence
+    verbose_name = "Sequence"
+    verbose_name_plural = "Sequences"
+
+    fields = (
+        'title',
+        'is_time_limited',
+        'is_proctored_exam',
+        'is_practice_exam',
+        'accessible_after_due',
+        'visible_to_staff_only',
+        'hide_from_toc',
+    )
+    readonly_fields = (
+        'title',
+        'is_time_limited',
+        'is_proctored_exam',
+        'is_practice_exam',
+        'accessible_after_due',
+        'visible_to_staff_only',
+        'hide_from_toc',
+    )
+    ordering = ['ordering']
+
+    def get_queryset(self, request):
+        """
+        Optimization to cut an extra two requests per sequence.
+
+        We still have an N+1 issue, but given the number of sequences in a
+        course, this is tolerable even for large courses. It is possible to get
+        this down to one query if we do a lower level rendering for the
+        sequences later.
+        """
+        qs = super().get_queryset(request)
+        qs = qs.select_related('section', 'sequence')
+        return qs
+
+    def title(self, cs_seq):
+        return cs_seq.sequence.title
+    title.short_description = "Title"
+
+    def accessible_after_due(self, cs_seq):
+        return not cs_seq.inaccessible_after_due
+    accessible_after_due.boolean = True
+
+    def visible_to_staff_only(self, cs_seq):
+        return not cs_seq.visible_to_staff_only
+    visible_to_staff_only.boolean = True
+    visible_to_staff_only.short_description = "Staff Only"
+
+    def is_time_limited(self, cs_seq):
+        return cs_seq.exam.is_time_limited
+    is_time_limited.boolean = True
+    is_time_limited.short_description = "Timed Exam"
+
+    def is_proctored_exam(self, cs_seq):
+        return cs_seq.exam.is_proctored_exam
+    is_proctored_exam.boolean = True
+    is_proctored_exam.short_description = "Proctored Exam"
+
+    def is_practice_exam(self, cs_seq):
+        return cs_seq.exam.is_practice_exam
+    is_practice_exam.boolean = True
+    is_practice_exam.short_description = "Practice Exam"
+
+
+class CourseContextAdmin(admin.ModelAdmin):
    """
    This is a read-only model admin that is meant to be useful for querying.

@@ -25,25 +122,124 @@ class LearningContextAdmin(admin.ModelAdmin):
       to be written to from the Studio process.
    """
    list_display = (
-        'context_key',
+        'course_key',
        'title',
        'published_at',
-        'published_version',
-        'modified'
+        'num_errors',
+        'num_sections',
+        'num_sequences',
+    )
+    list_select_related = (
+        'learning_context',
+        'learning_context__publish_report',
+    )
+    list_filter = (
+        HasErrorsListFilter,
+        'learning_context__published_at',
    )
    readonly_fields = (
-        'context_key',
+        'course_key',
        'title',
        'published_at',
        'published_version',
        'created',
        'modified',
-        'outline',
-    )
-    search_fields = ['context_key', 'title']
-    actions = None
+        'course_visibility',
+        'self_paced',
+        'days_early_for_beta',
+        'entrance_exam_id',

-    def outline(self, obj):
+        'error_details',
+        'raw_outline',
+    )
+    raw_id_fields = (
+        'learning_context',
+    )
+    fieldsets = (
+        (
+            None,
+            {
+                'fields': (
+                    'course_key',
+                    'title',
+                    'published_at',
+                    'course_visibility',
+                    'self_paced',
+                    'days_early_for_beta',
+                    'entrance_exam_id',
+                ),
+            }
+        ),
+        (
+            'Outline Data',
+            {
+                'fields': (
+                    'num_sections', 'num_sequences', 'num_errors', 'error_details',
+                ),
+            }
+        ),
+        (
+            'Debug Details',
+            {
+                'fields': (
+                    'published_version', 'created', 'modified', 'raw_outline'
+                ),
+                'classes': ('collapse',),
+            }
+        ),
+    )
+    inlines = [
+        CourseSectionSequenceInline,
+    ]
+    search_fields = ['learning_context__context_key', 'learning_context__title']
+    ordering = ['-learning_context__published_at']
+
+    def get_queryset(self, request):
+        qs = super().get_queryset(request)
+        qs = qs.prefetch_related('section_sequences')
+        return qs
+
+    def created(self, course_context):
+        return course_context.learning_context.created
+    created.short_description = "Record Created"
+
+    def modified(self, course_context):
+        return course_context.learning_context.modified
+    modified.short_description = "Record Modified"
+
+    def course_key(self, course_context):
+        return course_context.learning_context.context_key
+
+    def title(self, course_context):
+        return course_context.learning_context.title
+
+    def published_at(self, course_context):
+        published_at_dt = course_context.learning_context.published_at
+        return published_at_dt.strftime("%B %-d, %Y, %-I:%M %p")
+    published_at.short_description = "Published at (UTC)"
+
+    def published_version(self, course_context):
+        return course_context.learning_context.published_version
+
+    def _publish_report_attr(self, course_context, attr_name):
+        learning_context = course_context.learning_context
+        if not hasattr(learning_context, 'publish_report'):
+            return None
+        return getattr(learning_context.publish_report, attr_name)
+
+    def num_errors(self, course_context):
+        return self._publish_report_attr(course_context, 'num_errors')
+    num_errors.short_description = "Errors"
+
+    def num_sections(self, course_context):
+        return self._publish_report_attr(course_context, 'num_sections')
+    num_sections.short_description = "Sections"
+
+    def num_sequences(self, course_context):
+        return self._publish_report_attr(course_context, 'num_sequences')
+    num_sequences.short_description = "Sequences"
+
+    def raw_outline(self, obj):
        """
        Computed attribute that shows the outline JSON in the detail view.
        """
@@ -56,7 +252,7 @@ class LearningContextAdmin(admin.ModelAdmin):
                return value.isoformat()
            return value

-        outline_data = get_course_outline(obj.context_key)
+        outline_data = get_course_outline(obj.learning_context.context_key)
        outline_data_dict = attr.asdict(
            outline_data,
            recurse=True,
@@ -65,6 +261,46 @@ class LearningContextAdmin(admin.ModelAdmin):
        outline_data_json = json.dumps(outline_data_dict, indent=2, sort_keys=True)
        return format_html("<pre>\n{}\n</pre>", outline_data_json)

+    def error_details(self, course_context):
+        """
+        Generates the HTML for Error Details.
+        """
+        learning_context = course_context.learning_context
+        if not hasattr(learning_context, 'publish_report'):
+            return ""
+
+        content_errors = get_content_errors(learning_context.context_key)
+        if not content_errors:
+            return format_html("<p>No errors were found.</p>")
+
+        list_items = format_html_join(
+            "\n",
+            "<li>{} <br><small>Usage Key: {}</small></li>",
+            (
+                (err_data.message, err_data.usage_key)
+                for err_data in content_errors
+            )
+        )
+        return format_html(
+            """
+            <p>
+            Parts of the course content were skipped when generating the Outline
+            because they did not follow the standard Course → Section →
+            Subsection hierarchy. Course structures like this cannot be created
+            in Studio, but can be created by OLX import. In OLX, this hierarchy
+            is represented by the tags <code>{}</code> → <code>{}</code> →
+            <code>{}</code>.
+            </p>
+            <ol>
+                {}
+            </ol>
+            """,
+            "<course>",
+            "<chapter>",
+            "<sequential>",
+            list_items,
+        )
+
    def has_add_permission(self, request):
        """
        Disallow additions. See docstring for has_change_permission()
@@ -93,4 +329,4 @@ class LearningContextAdmin(admin.ModelAdmin):
        return False


-admin.site.register(LearningContext, LearningContextAdmin)
+admin.site.register(CourseContext, CourseContextAdmin)
--- a/openedx/core/djangoapps/content/learning_sequences/api/init.py
+++ b/openedx/core/djangoapps/content/learning_sequences/api/init.py
@@ -1,5 +1,6 @@
 # lint-amnesty, pylint: disable=missing-module-docstring
 from .outlines import (
+    get_content_errors,
    get_course_keys_with_outlines,
    get_course_outline,
    get_user_course_outline,
--- a/openedx/core/djangoapps/content/learning_sequences/api/outlines.py
+++ b/openedx/core/djangoapps/content/learning_sequences/api/outlines.py
@@ -6,7 +6,7 @@ __init__.py imports from here, and is a more stable place to import from.
 import logging
 from collections import defaultdict
 from datetime import datetime
-from typing import Optional  # lint-amnesty, pylint: disable=unused-import
+from typing import Optional, List  # lint-amnesty, pylint: disable=unused-import

 import attr  # lint-amnesty, pylint: disable=unused-import
 from django.contrib.auth import get_user_model
@@ -19,6 +19,7 @@ from opaque_keys.edx.locator import LibraryLocator
 from opaque_keys.edx.keys import CourseKey  # lint-amnesty, pylint: disable=unused-import

 from ..data import (
+    ContentErrorData,
    CourseLearningSequenceData,
    CourseOutlineData,
    CourseSectionData,
@@ -29,12 +30,14 @@ from ..data import (
    VisibilityData,
 )
 from ..models import (
+    ContentError,
    CourseSection,
    CourseSectionSequence,
    CourseContext,
    CourseSequenceExam,
    LearningContext,
-    LearningSequence
+    LearningSequence,
+    PublishReport,
 )
 from .permissions import can_see_all_content
 from .processors.content_gating import ContentGatingOutlineProcessor
@@ -49,6 +52,7 @@ log = logging.getLogger(__name__)

 # Public API...
 __all__ = [
+    'get_content_errors',
    'get_course_keys_with_outlines',
    'get_course_outline',
    'get_user_course_outline',
@@ -193,7 +197,10 @@ def _get_course_context_for_outline(course_key: CourseKey) -> CourseContext:
        )
    try:
        course_context = (
-            LearningContext.objects.select_related('course_context').get(context_key=course_key).course_context
+            LearningContext.objects
+                           .select_related('course_context')
+                           .get(context_key=course_key)
+                           .course_context
        )
    except LearningContext.DoesNotExist:
        # Could happen if it hasn't been published.
@@ -203,6 +210,25 @@ def _get_course_context_for_outline(course_key: CourseKey) -> CourseContext:
    return course_context


+def get_content_errors(course_key: CourseKey) -> List[ContentErrorData]:
+    """
+    Get ContentErrors created in the most recent publish of this Course run.
+    """
+    try:
+        learning_context = (
+            LearningContext.objects.select_related('publish_report')
+                                   .get(context_key=course_key)
+        )
+        publish_report = learning_context.publish_report
+    except (LearningContext.DoesNotExist, PublishReport.DoesNotExist):
+        return []
+
+    return [
+        ContentErrorData(usage_key=error.usage_key, message=error.message)
+        for error in publish_report.content_errors.all().order_by('id')
+    ]
+
+
@function_trace('learning_sequences.api.get_user_course_outline')
 def get_user_course_outline(course_key: CourseKey,
                            user: User,
@@ -246,6 +272,13 @@ def get_user_course_outline_details(course_key: CourseKey,
 def _get_user_course_outline_and_processors(course_key: CourseKey,  # lint-amnesty, pylint: disable=missing-function-docstring
                                            user: User,
                                            at_time: datetime):
+    """
+    Helper function that runs the outline processors.
+
+    This function returns a UserCourseOutlineData and a dict of outline
+    processors that have executed their data loading and returned which
+    sequences to remove and which to mark as inaccessible.
+    """
    # Record the user separately from the standard user_id that views record,
    # because it's possible to ask for views as other users if you're global
    # staff. Performance is going to vary based on the user we're asking the
@@ -316,10 +349,11 @@ def _get_user_course_outline_and_processors(course_key: CourseKey,  # lint-amnes


@function_trace('learning_sequences.api.replace_course_outline')
-def replace_course_outline(course_outline: CourseOutlineData):
+def replace_course_outline(course_outline: CourseOutlineData,
+                           content_errors: Optional[List[ContentErrorData]] = None):
    """
    Replace the model data stored for the Course Outline with the contents of
-    course_outline (a CourseOutlineData).
+    course_outline (a CourseOutlineData). Record any content errors.

    This isn't particularly optimized at the moment.
    """
@@ -329,17 +363,20 @@ def replace_course_outline(course_outline: CourseOutlineData):
    )
    set_custom_attribute('learning_sequences.api.course_id', str(course_outline.course_key))

+    if content_errors is None:
+        content_errors = []
+
    with transaction.atomic():
        # Update or create the basic CourseContext...
        course_context = _update_course_context(course_outline)

-        # Wipe out the CourseSectionSequences join+ordering table so we can
-        # delete CourseSection and LearningSequence objects more easily.
+        # Wipe out the CourseSectionSequences join+ordering table
        course_context.section_sequences.all().delete()

        _update_sections(course_outline, course_context)
        _update_sequences(course_outline, course_context)
        _update_course_section_sequences(course_outline, course_context)
+        _update_publish_report(course_outline, content_errors, course_context)


 def _update_course_context(course_outline: CourseOutlineData):
@@ -457,3 +494,38 @@ def _update_course_section_sequences(course_outline: CourseOutlineData, course_c
            else:
                # Otherwise, delete any exams associated with it
                CourseSequenceExam.objects.filter(course_section_sequence=course_section_sequence).delete()
+
+
+def _update_publish_report(course_outline: CourseOutlineData,
+                           content_errors: List[ContentErrorData],
+                           course_context: CourseContext):
+    """
+    Record ContentErrors for this course publish. Deletes previous errors.
+    """
+    set_custom_attribute('learning_sequences.api.num_content_errors', len(content_errors))
+    learning_context = course_context.learning_context
+    try:
+        # Normal path if we're updating a PublishReport
+        publish_report = learning_context.publish_report
+        publish_report.num_errors = len(content_errors)
+        publish_report.num_sections = len(course_outline.sections)
+        publish_report.num_sequences = len(course_outline.sequences)
+        publish_report.content_errors.all().delete()
+    except PublishReport.DoesNotExist:
+        # Case where we're creating it for the first time.
+        publish_report = PublishReport(
+            learning_context=learning_context,
+            num_errors=len(content_errors),
+            num_sections=len(course_outline.sections),
+            num_sequences=len(course_outline.sequences),
+        )
+
+    publish_report.save()
+    publish_report.content_errors.bulk_create([
+        ContentError(
+            publish_report=publish_report,
+            usage_key=error_data.usage_key,
+            message=error_data.message,
+        )
+        for error_data in content_errors
+    ])
--- a/openedx/core/djangoapps/content/learning_sequences/api/tests/test_outlines.py
+++ b/openedx/core/djangoapps/content/learning_sequences/api/tests/test_outlines.py
@@ -23,6 +23,7 @@ from common.djangoapps.student.auth import user_has_role
 from common.djangoapps.student.roles import CourseBetaTesterRole

 from ...data import (
+    ContentErrorData,
    CourseLearningSequenceData,
    CourseOutlineData,
    CourseSectionData,
@@ -31,6 +32,7 @@ from ...data import (
    VisibilityData,
 )
 from ..outlines import (
+    get_content_errors,
    get_course_outline,
    get_user_course_outline,
    get_user_course_outline_details,
@@ -1274,3 +1276,64 @@ class SequentialVisibilityTestCase(CacheIsolationTestCase):
                    assert len(user_course_outline.sequences) == 6
                    assert all(is_sequence_accessible),\
                        'Sequences should be accessible to enrolled, staff users for a public_outline course'
+
+
+class ContentErrorTestCase(CacheIsolationTestCase):
+    """Test error collection and reporting."""
+
+    def test_errors(self):
+        """
+        Basic tests for writing and retriving errors.
+        """
+        course_key = CourseKey.from_string("course-v1:OpenEdX+Outlines+Errors")
+        outline = CourseOutlineData(
+            course_key=course_key,
+            title="Outline Errors Test Course!",
+            published_at=datetime(2021, 3, 21, tzinfo=timezone.utc),
+            published_version="8ebece4b69dd593d82fe2020",
+            sections=[],
+            self_paced=False,
+            days_early_for_beta=None,
+            entrance_exam_id=None,
+            course_visibility=CourseVisibility.PRIVATE,
+        )
+        usage_key_1 = course_key.make_usage_key('sequential', 'seq1')
+        usage_key_2 = course_key.make_usage_key('sequential', 'seq2')
+        replace_course_outline(
+            outline,
+            content_errors=[
+                # Explicitly set to no usage key.
+                ContentErrorData(message="Content is Hard", usage_key=None),
+
+                # Implicitly set usage key
+                ContentErrorData("Simple Content Error Description"),
+
+                # Multiple copies of the same usage key
+                ContentErrorData(message="Seq1 is wrong", usage_key=usage_key_1),
+                ContentErrorData(message="Seq1 is still wrong", usage_key=usage_key_1),
+
+                # Another key
+                ContentErrorData(message="Seq2 is also wrong", usage_key=usage_key_2)
+            ]
+        )
+        assert outline == get_course_outline(course_key)
+
+        # Ordering is preserved.
+        assert get_content_errors(course_key) == [
+            ContentErrorData(message="Content is Hard", usage_key=None),
+            ContentErrorData(message="Simple Content Error Description", usage_key=None),
+            ContentErrorData(message="Seq1 is wrong", usage_key=usage_key_1),
+            ContentErrorData(message="Seq1 is still wrong", usage_key=usage_key_1),
+            ContentErrorData(message="Seq2 is also wrong", usage_key=usage_key_2),
+        ]
+
+        # Now do it again and make sure updates work as well as inserts
+        replace_course_outline(
+            outline,
+            content_errors=[
+                ContentErrorData(message="Content is Hard", usage_key=None),
+            ]
+        )
+        assert get_content_errors(course_key) == [
+            ContentErrorData(message="Content is Hard", usage_key=None),
+        ]
--- a/openedx/core/djangoapps/content/learning_sequences/data.py
+++ b/openedx/core/djangoapps/content/learning_sequences/data.py
@@ -50,6 +50,22 @@ class ObjectDoesNotExist(Exception):
    pass  # lint-amnesty, pylint: disable=unnecessary-pass


+@attr.s(frozen=True)
+class ContentErrorData:
+    """
+    A human-readable description of something wrong with the content, to ease
+    the debugging of content issues–especially ones where content had to be
+    skipped because it was somehow malformed. The messages should be
+    comprehensible to course teams and support staff.
+
+    Errors can refer to things that are not anywhere in the outline, such as
+    when things don't show up where we expect then to be and we omit them from
+    the outline (unknown tag types, sequences where we expect sections, etc.)
+    """
+    message = attr.ib(type=str)
+    usage_key = attr.ib(type=Optional[UsageKey], default=None)
+
+
@attr.s(frozen=True)
 class VisibilityData:
    """
--- a/openedx/core/djangoapps/content/learning_sequences/migrations/0009_contenterror_publishreport.py
+++ b/openedx/core/djangoapps/content/learning_sequences/migrations/0009_contenterror_publishreport.py
@@ -0,0 +1,34 @@
+# Generated by Django 2.2.19 on 2021-03-12 16:31
+
+from django.db import migrations, models
+import django.db.models.deletion
+import opaque_keys.edx.django.models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('learning_sequences', '0008_add_learning_context_title_index'),
+    ]
+
+    operations = [
+        migrations.CreateModel(
+            name='PublishReport',
+            fields=[
+                ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+                ('num_errors', models.PositiveIntegerField()),
+                ('num_sections', models.PositiveIntegerField()),
+                ('num_sequences', models.PositiveIntegerField()),
+                ('learning_context', models.OneToOneField(on_delete=django.db.models.deletion.CASCADE, related_name='publish_report', to='learning_sequences.LearningContext')),
+            ],
+        ),
+        migrations.CreateModel(
+            name='ContentError',
+            fields=[
+                ('id', models.BigAutoField(primary_key=True, serialize=False)),
+                ('usage_key', opaque_keys.edx.django.models.UsageKeyField(max_length=255, null=True)),
+                ('message', models.TextField(max_length=10000)),
+                ('publish_report', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='content_errors', to='learning_sequences.PublishReport')),
+            ],
+        ),
+    ]
--- a/openedx/core/djangoapps/content/learning_sequences/migrations/0010_add_publishreport_indexes.py
+++ b/openedx/core/djangoapps/content/learning_sequences/migrations/0010_add_publishreport_indexes.py
@@ -0,0 +1,28 @@
+# Generated by Django 2.2.19 on 2021-03-13 18:25
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('learning_sequences', '0009_contenterror_publishreport'),
+    ]
+
+    operations = [
+        migrations.AlterField(
+            model_name='publishreport',
+            name='num_errors',
+            field=models.PositiveIntegerField(db_index=True),
+        ),
+        migrations.AlterField(
+            model_name='publishreport',
+            name='num_sections',
+            field=models.PositiveIntegerField(db_index=True),
+        ),
+        migrations.AlterField(
+            model_name='publishreport',
+            name='num_sequences',
+            field=models.PositiveIntegerField(db_index=True),
+        ),
+    ]
--- a/openedx/core/djangoapps/content/learning_sequences/migrations/0011_course_meta_names.py
+++ b/openedx/core/djangoapps/content/learning_sequences/migrations/0011_course_meta_names.py
@@ -0,0 +1,21 @@
+# Generated by Django 2.2.19 on 2021-03-14 05:48
+
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('learning_sequences', '0010_add_publishreport_indexes'),
+    ]
+
+    operations = [
+        migrations.AlterModelOptions(
+            name='coursecontext',
+            options={'verbose_name': 'Course', 'verbose_name_plural': 'Courses'},
+        ),
+        migrations.AlterModelOptions(
+            name='coursesectionsequence',
+            options={'verbose_name': 'Course Sequence', 'verbose_name_plural': 'Course Sequences'},
+        ),
+    ]
--- a/openedx/core/djangoapps/content/learning_sequences/models.py
+++ b/openedx/core/djangoapps/content/learning_sequences/models.py
@@ -87,6 +87,13 @@ class CourseContext(TimeStampedModel):
    self_paced = models.BooleanField(default=False)
    entrance_exam_id = models.CharField(max_length=255, null=True)

+    class Meta:
+        verbose_name = 'Course'
+        verbose_name_plural = 'Courses'
+
+    def __str__(self):
+        return f"{self.learning_context.context_key} ({self.learning_context.title})"
+

 class LearningSequence(TimeStampedModel):
    """
@@ -200,6 +207,11 @@ class CourseSectionSequence(CourseContentVisibilityMixin, TimeStampedModel):
        unique_together = [
            ['course_context', 'ordering'],
        ]
+        verbose_name = 'Course Sequence'
+        verbose_name_plural = 'Course Sequences'
+
+    def __str__(self):
+        return f"{self.section.title} > {self.sequence.title}"


 class CourseSequenceExam(TimeStampedModel):
@@ -212,3 +224,51 @@ class CourseSequenceExam(TimeStampedModel):
    is_practice_exam = models.BooleanField(default=False)
    is_proctored_enabled = models.BooleanField(default=False)
    is_time_limited = models.BooleanField(default=False)
+
+
+class PublishReport(models.Model):
+    """
+    A report about the content that generated this LearningContext publish.
+
+    All these fields could be derived with aggregate SQL functions, but it would
+    be slower and make the admin code more complex. Since we only write at
+    publish time, keeping things in sync is less of a concern.
+    """
+    learning_context = models.OneToOneField(
+        LearningContext, on_delete=models.CASCADE, related_name='publish_report'
+    )
+    num_errors = models.PositiveIntegerField(null=False, db_index=True)
+    num_sections = models.PositiveIntegerField(null=False, db_index=True)
+    num_sequences = models.PositiveIntegerField(null=False, db_index=True)
+
+
+class ContentError(models.Model):
+    """
+    Human readable content errors.
+
+    If something got here, it means that we were able to make _something_ (or
+    there would be no LearningContext at all), but something about the published
+    state of the content is wrong and should be flagged to course and support
+    teams. In many cases, this will be some malformed course structure that gets
+    uploaded via OLX import–a process that is more forgiving than Studio's UI.
+
+    It's a little weird to store errors in such a freeform manner like this. It
+    would be more efficient and flexible in terms of i18n if we were to store
+    error codes, and leave the message generation to the time of display. The
+    problem with that is that we don't know up front what the parameterization
+    for such errors would be. The current error messages being created are
+    fairly complicated and include references to multiple attributes of multiple
+    pieces of content with supporting breadcrumbs. Other future errors might
+    just be about display_name string length.
+
+    So instead of trying to model all that internally, I'm just allowing for
+    freeform messages. It is quite possible that at some point we will come up
+    with a more comprehensive taxonomy of error messages, at which point we
+    could do a backfill to regenerate this data in a more normalized way.
+    """
+    id = models.BigAutoField(primary_key=True)
+    publish_report = models.ForeignKey(
+        PublishReport, on_delete=models.CASCADE, related_name='content_errors'
+    )
+    usage_key = UsageKeyField(max_length=255, null=True)
+    message = models.TextField(max_length=10000, blank=False, null=False)