From 168c5deee8c60aa1ce8fce9c332c9b8bee86182e Mon Sep 17 00:00:00 2001 From: David Ormsbee Date: Mon, 28 Sep 2020 12:25:28 -0400 Subject: [PATCH] Exclude CCX Courses from search indexing. Search indexing is prohibitively slow for large CCX courses, even taking hours in the case of some particularly large ones with thousands of blocks. Temporarily disabling this functionality until it can be made more performant (PSRE-288), so that we're not blocking the workers from doing more latency-sensitive work. There is a separate effort to put search indexing in its own set of workers. --- cms/djangoapps/contentstore/tasks.py | 12 ++++++++++++ .../contentstore/tests/test_courseware_index.py | 15 +++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/cms/djangoapps/contentstore/tasks.py b/cms/djangoapps/contentstore/tasks.py index 227a207b41..44e9200fb8 100644 --- a/cms/djangoapps/contentstore/tasks.py +++ b/cms/djangoapps/contentstore/tasks.py @@ -16,6 +16,7 @@ from celery import group from celery.task import task from celery.utils.log import get_task_logger from celery_utils.persist_on_failure import LoggedPersistOnFailureTask +from ccx_keys.locator import CCXLocator from django.conf import settings from django.contrib.auth import get_user_model from django.contrib.auth.models import User @@ -180,6 +181,17 @@ def update_search_index(course_id, triggered_time_isoformat): """ Updates course search index. """ try: course_key = CourseKey.from_string(course_id) + + # We skip search indexing for CCX courses because there is currently + # some issue around Modulestore caching that makes it prohibitively + # expensive (sometimes hours-long for really complex courses). + if isinstance(course_key, CCXLocator): + LOGGER.warning( + u'Search indexing skipped for CCX Course %s (this is currently too slow to run in production)', + course_id + ) + return + CoursewareSearchIndexer.index(modulestore(), course_key, triggered_at=(_parse_time(triggered_time_isoformat))) except SearchIndexingError as exc: diff --git a/cms/djangoapps/contentstore/tests/test_courseware_index.py b/cms/djangoapps/contentstore/tests/test_courseware_index.py index 039548d4af..430c8e6adc 100644 --- a/cms/djangoapps/contentstore/tests/test_courseware_index.py +++ b/cms/djangoapps/contentstore/tests/test_courseware_index.py @@ -27,6 +27,7 @@ from contentstore.courseware_index import ( SearchIndexingError ) from contentstore.signals.handlers import listen_for_course_publish, listen_for_library_update +from contentstore.tasks import update_search_index from contentstore.tests.utils import CourseTestCase from contentstore.utils import reverse_course_url, reverse_usage_url from course_modes.models import CourseMode @@ -755,6 +756,20 @@ class TestTaskExecution(SharedModuleStoreTestCase): response = searcher.search(field_dictionary={"library": library_search_key}) self.assertEqual(response["total"], 2) + def test_ignore_ccx(self): + """Test that we ignore CCX courses (it's too slow now).""" + # We're relying on our CCX short circuit to just stop execution as soon + # as it encounters a CCX key. If that isn't working properly, it will + # fall through to the normal indexing and raise an exception because + # there is no data or backing course behind the course key. + with patch('contentstore.courseware_index.CoursewareSearchIndexer.index') as mock_index: + self.assertIsNone( + update_search_index( + "ccx-v1:OpenEdX+FAKECOURSE+FAKERUN+ccx@1", "2020-09-28T16:41:57.150796" + ) + ) + self.assertFalse(mock_index.called) + @ddt.ddt class TestLibrarySearchIndexer(MixedWithOptionsTestCase):