Files
edx-platform/cms/djangoapps/contentstore/management/commands/reindex_course.py
Alison Langston e50490da88 feat: add optional inclusion date arg to course reindex command (#35830)
* feat: add optional inclusion date arg to course reindex command

* fix: pylint
2024-11-12 15:34:04 -05:00

185 lines
8.3 KiB
Python

""" Management command to update courses' search index """
import logging
from textwrap import dedent
from time import time
from datetime import date, datetime
from django.core.management import BaseCommand, CommandError
from django.conf import settings
from elasticsearch import exceptions
from opaque_keys import InvalidKeyError
from opaque_keys.edx.keys import CourseKey
from opaque_keys.edx.locator import CourseLocator
from search.search_engine_base import SearchEngine
from cms.djangoapps.contentstore.courseware_index import CourseAboutSearchIndexer, CoursewareSearchIndexer
from xmodule.modulestore.django import modulestore # lint-amnesty, pylint: disable=wrong-import-order
from .prompt import query_yes_no
class Command(BaseCommand):
"""
Command to re-index courses
Examples:
./manage.py reindex_course <course_id_1> <course_id_2> ... - reindexes courses with provided keys
./manage.py reindex_course --all --warning - reindexes all available courses with quieter logging
./manage.py reindex_course --setup - reindexes all courses for devstack setup
"""
help = dedent(__doc__)
CONFIRMATION_PROMPT = "Re-indexing all courses might be a time consuming operation. Do you want to continue?"
def add_arguments(self, parser):
parser.add_argument('course_ids',
nargs='*',
metavar='course_id')
parser.add_argument('--all',
action='store_true',
help='Reindex all courses')
parser.add_argument('--active',
action='store_true',
help='Reindex active courses only')
parser.add_argument('--from_inclusion_date',
action='store_true',
help='Reindex courses with a start date greater than COURSEWARE_SEARCH_INCLUSION_DATE'
)
parser.add_argument('--setup',
action='store_true',
help='Reindex all courses on developers stack setup')
parser.add_argument('--warning',
action='store_true',
help='Reduce logging to a WARNING level of output for progress tracking'
)
def _parse_course_key(self, raw_value):
""" Parses course key from string """
try:
result = CourseKey.from_string(raw_value)
except InvalidKeyError:
raise CommandError("Invalid course_key: '%s'." % raw_value) # lint-amnesty, pylint: disable=raise-missing-from
if not isinstance(result, CourseLocator):
raise CommandError(f"Argument {raw_value} is not a course key")
return result
def handle(self, *args, **options): # pylint: disable=too-many-statements
"""
By convention set by Django developers, this method actually executes command's actions.
So, there could be no better docstring than emphasize this once again.
"""
course_ids = options['course_ids']
all_option = options['all']
active_option = options['active']
inclusion_date_option = options['from_inclusion_date']
setup_option = options['setup']
readable_option = options['warning']
index_all_courses_option = all_option or setup_option
course_option_flag_option = index_all_courses_option or active_option or inclusion_date_option
if (not course_ids and not course_option_flag_option) or (course_ids and course_option_flag_option):
raise CommandError((
"reindex_course requires one or more <course_id>s"
" OR the --all, --active, --setup, or --from_inclusion_date flags."
))
store = modulestore()
if readable_option:
logging.disable(level=logging.INFO)
logging.warning('Reducing logging to WARNING level for easier progress tracking')
if index_all_courses_option:
if setup_option:
index_names = (CoursewareSearchIndexer.INDEX_NAME, CourseAboutSearchIndexer.INDEX_NAME)
for index_name in index_names:
try:
searcher = SearchEngine.get_search_engine(index_name)
except exceptions.ElasticsearchException as exc:
logging.exception('Search Engine error - %s', exc)
return
# Legacy Elasticsearch engine
if hasattr(searcher, '_es'): # pylint: disable=protected-access
index_exists = searcher._es.indices.exists(index=index_name) # pylint: disable=protected-access
index_mapping = searcher._es.indices.get_mapping( # pylint: disable=protected-access
index=index_name,
) if index_exists else {}
if index_exists and index_mapping:
return
# if reindexing is done during devstack setup step, don't prompt the user
if setup_option or query_yes_no(self.CONFIRMATION_PROMPT, default="no"):
# in case of --setup or --all, get the list of course keys from all courses
# that are stored in the modulestore
course_keys = [course.id for course in modulestore().get_courses()]
else:
return
elif active_option:
# in case of --active, we get the list of course keys from all courses
# that are stored in the modulestore and filter out the non-active
all_courses = modulestore().get_courses()
today = date.today()
# We keep the courses that has a start date and either don't have an end date
# or the end date is not in the past.
active_courses = filter(lambda course: course.start
and (not course.end or course.end.date() >= today),
all_courses)
course_keys = list(map(lambda course: course.id, active_courses))
logging.warning(f'Selected {len(course_keys)} active courses over a total of {len(all_courses)}.')
elif inclusion_date_option:
# in case of --from_inclusion_date, we get the list of course keys from all courses
# that are stored in modulestore and filter out courses with a start date less than
# the settings defined COURSEWARE_SEARCH_INCLUSION_DATE
all_courses = modulestore().get_courses()
inclusion_date = datetime.strptime(
settings.FEATURES.get('COURSEWARE_SEARCH_INCLUSION_DATE', '2020-01-01'),
'%Y-%m-%d'
)
# We keep the courses that has a start date and the start date is greater than the inclusion date
active_courses = filter(lambda course: course.start and (course.start >= inclusion_date), all_courses)
course_keys = list(map(lambda course: course.id, active_courses))
else:
# in case course keys are provided as arguments
course_keys = list(map(self._parse_course_key, course_ids))
total = len(course_keys)
logging.warning(f'Reindexing {total} courses...')
start = time()
count = 0
success = 0
errors = []
for course_key in course_keys:
try:
count += 1
CoursewareSearchIndexer.do_course_reindex(store, course_key)
success += 1
if count % 10 == 0 or count == total:
t = time() - start
remaining = total - success - len(errors)
logging.warning(f'{success} courses reindexed in {t:.1f} seconds. {remaining} remaining...')
except Exception as exc: # lint-amnesty, pylint: disable=broad-except
errors.append(course_key)
logging.exception('Error indexing course %s due to the error: %s.', course_key, exc)
t = time() - start
logging.warning(f'{success} of {total} courses reindexed succesfully. Total running time: {t:.1f} seconds.')
if errors:
logging.warning('Reindex failed for %s courses:', len(errors))
for course_key in errors:
logging.warning(course_key)