Add an index from program type to program UUID in the catalog cache.

This commit is contained in:
Alex Dusenbery
2019-08-06 07:38:32 -04:00
committed by Alex Dusenbery
parent e5f3da9afa
commit f307e49f94
6 changed files with 173 additions and 28 deletions

View File

@@ -12,3 +12,9 @@ SITE_PATHWAY_IDS_CACHE_KEY_TPL = 'pathway-ids-{domain}'
# Template used to create cache keys for individual courses to program uuids.
COURSE_PROGRAMS_CACHE_KEY_TPL = 'course-programs-{course_run_id}'
# Site-aware cache key template used to locate an item containing
# a list of all program UUIDs with a certain program type (the Site is required
# because program_type values are likely to be shared between different sites
# that live in the same environment).
PROGRAMS_BY_TYPE_CACHE_KEY_TPL = 'programs-by-type-{site_id}-{program_type}'

View File

@@ -8,16 +8,22 @@ from django.contrib.auth import get_user_model
from django.contrib.sites.models import Site
from django.core.cache import cache
from django.core.management import BaseCommand
from six import text_type
from openedx.core.djangoapps.catalog.cache import (
COURSE_PROGRAMS_CACHE_KEY_TPL,
PATHWAY_CACHE_KEY_TPL,
PROGRAM_CACHE_KEY_TPL,
PROGRAMS_BY_TYPE_CACHE_KEY_TPL,
SITE_PATHWAY_IDS_CACHE_KEY_TPL,
SITE_PROGRAM_UUIDS_CACHE_KEY_TPL,
)
from openedx.core.djangoapps.catalog.models import CatalogIntegration
from openedx.core.djangoapps.catalog.utils import create_catalog_api_client, course_run_keys_for_program
from openedx.core.djangoapps.catalog.utils import (
create_catalog_api_client,
course_run_keys_for_program,
normalize_program_type,
)
logger = logging.getLogger(__name__)
User = get_user_model() # pylint: disable=invalid-name
@@ -33,6 +39,7 @@ class Command(BaseCommand):
"""
help = "Rebuild the LMS' cache of program data."
# pylint: disable=unicode-format-string
def handle(self, *args, **options):
failure = False
logger.info('populate-multitenant-programs switch is ON')
@@ -51,6 +58,7 @@ class Command(BaseCommand):
programs = {}
pathways = {}
courses = {}
programs_by_type = {}
for site in Site.objects.all():
site_config = getattr(site, 'configuration', None)
if site_config is None or not site_config.get_value('COURSE_CATALOG_API_URL'):
@@ -63,21 +71,21 @@ class Command(BaseCommand):
uuids, program_uuids_failed = self.get_site_program_uuids(client, site)
new_programs, program_details_failed = self.fetch_program_details(client, uuids)
new_pathways, pathways_failed = self.get_pathways(client, site)
new_pathways, new_programs, pathway_processing_failed = self.process_pathways(site, new_pathways,
new_programs)
new_courses, courses_failed = self.get_courses(new_programs)
new_pathways, new_programs, pathway_processing_failed = self.process_pathways(
site, new_pathways, new_programs
)
failure = any([
program_uuids_failed,
program_details_failed,
pathways_failed,
pathway_processing_failed,
courses_failed,
])
programs.update(new_programs)
pathways.update(new_pathways)
courses.update(new_courses)
courses.update(self.get_courses(new_programs))
programs_by_type.update(self.get_programs_by_type(site, new_programs))
logger.info(u'Caching UUIDs for {total} programs for site {site_name}.'.format(
total=len(uuids),
@@ -92,24 +100,19 @@ class Command(BaseCommand):
))
cache.set(SITE_PATHWAY_IDS_CACHE_KEY_TPL.format(domain=site.domain), pathway_ids, None)
successful_programs = len(programs)
logger.info(u'Caching details for {successful_programs} programs.'.format(
successful_programs=successful_programs))
logger.info(u'Caching details for {} programs.'.format(len(programs)))
cache.set_many(programs, None)
successful_pathways = len(pathways)
logger.info(u'Caching details for {successful_pathways} pathways.'.format(
successful_pathways=successful_pathways))
logger.info(u'Caching details for {} pathways.'.format(len(pathways)))
cache.set_many(pathways, None)
successful_courses = len(courses)
logger.info(u'Caching programs uuids for {successful_courses} courses.'.format(
successful_courses=successful_courses))
logger.info(u'Caching programs uuids for {} courses.'.format(len(courses)))
cache.set_many(courses, None)
logger.info(text_type('Caching program UUIDs by {} program types.'.format(len(programs_by_type))))
cache.set_many(programs_by_type, None)
if failure:
# This will fail a Jenkins job running this command, letting site
# operators know that there was a problem.
sys.exit(1)
def get_site_program_uuids(self, client, site):
@@ -215,10 +218,21 @@ class Command(BaseCommand):
course not in a program.
"""
course_runs = defaultdict(list)
failure = False
for program in programs.values():
for course_run_key in course_run_keys_for_program(program):
course_run_cache_key = COURSE_PROGRAMS_CACHE_KEY_TPL.format(course_run_id=course_run_key)
course_runs[course_run_cache_key].append(program['uuid'])
return course_runs, failure
return course_runs
def get_programs_by_type(self, site, programs):
"""
Returns a dictionary mapping site-aware cache keys corresponding to program types
to lists of program uuids with that type.
"""
programs_by_type = defaultdict(list)
for program in programs.values():
program_type = normalize_program_type(program.get('type'))
cache_key = PROGRAMS_BY_TYPE_CACHE_KEY_TPL.format(site_id=site.id, program_type=program_type)
programs_by_type[cache_key].append(program['uuid'])
return programs_by_type

View File

@@ -12,9 +12,11 @@ from openedx.core.djangoapps.catalog.cache import (
COURSE_PROGRAMS_CACHE_KEY_TPL,
PATHWAY_CACHE_KEY_TPL,
PROGRAM_CACHE_KEY_TPL,
PROGRAMS_BY_TYPE_CACHE_KEY_TPL,
SITE_PATHWAY_IDS_CACHE_KEY_TPL,
SITE_PROGRAM_UUIDS_CACHE_KEY_TPL
)
from openedx.core.djangoapps.catalog.utils import normalize_program_type
from openedx.core.djangoapps.catalog.tests.factories import PathwayFactory, ProgramFactory
from openedx.core.djangoapps.catalog.tests.mixins import CatalogIntegrationMixin
from openedx.core.djangoapps.site_configuration.tests.mixins import SiteMixin
@@ -37,7 +39,7 @@ class TestCachePrograms(CatalogIntegrationMixin, CacheIsolationTestCase, SiteMix
self.catalog_integration = self.create_catalog_integration()
self.site_domain = 'testsite.com'
self.set_up_site(
self.site = self.set_up_site(
self.site_domain,
{
'COURSE_CATALOG_API_URL': self.catalog_integration.get_internal_api_url().rstrip('/')
@@ -190,6 +192,15 @@ class TestCachePrograms(CatalogIntegrationMixin, CacheIsolationTestCase, SiteMix
self.assertIn(self.programs[0]['uuid'], cache.get(course_run_cache_key))
self.assertIn(self.child_program['uuid'], cache.get(course_run_cache_key))
# for each program, assert that the program's UUID is in a cached list of
# program UUIDS by program type
for program in self.programs:
program_type = normalize_program_type(program.get('type', 'None'))
program_type_cache_key = PROGRAMS_BY_TYPE_CACHE_KEY_TPL.format(
site_id=self.site.id, program_type=program_type
)
self.assertIn(program['uuid'], cache.get(program_type_cache_key))
def test_handle_pathways(self):
"""
Verify that the command requests and caches credit pathways

View File

@@ -3,6 +3,7 @@
from __future__ import absolute_import
from collections import defaultdict
from datetime import timedelta
import mock
@@ -22,6 +23,7 @@ from openedx.core.djangoapps.catalog.cache import (
COURSE_PROGRAMS_CACHE_KEY_TPL,
PATHWAY_CACHE_KEY_TPL,
PROGRAM_CACHE_KEY_TPL,
PROGRAMS_BY_TYPE_CACHE_KEY_TPL,
SITE_PATHWAY_IDS_CACHE_KEY_TPL,
SITE_PROGRAM_UUIDS_CACHE_KEY_TPL
)
@@ -46,7 +48,9 @@ from openedx.core.djangoapps.catalog.utils import (
get_pathways,
get_program_types,
get_programs,
get_visible_sessions_for_entitlement
get_programs_by_type,
get_visible_sessions_for_entitlement,
normalize_program_type,
)
from openedx.core.djangoapps.content.course_overviews.tests.factories import CourseOverviewFactory
from openedx.core.djangoapps.site_configuration.tests.factories import SiteFactory
@@ -785,3 +789,85 @@ class TestProgramCourseRunCrawling(TestCase):
'course-run-4',
}
self.assertEqual(expected_course_runs, course_run_keys_for_program(self.complex_program))
@skip_unless_lms
class TestGetProgramsByType(CacheIsolationTestCase):
""" Test for the ``get_programs_by_type()`` function. """
ENABLED_CACHES = ['default']
@classmethod
def setUpClass(cls):
""" Sets up program data. """
super(TestGetProgramsByType, cls).setUpClass()
cls.site = SiteFactory()
cls.other_site = SiteFactory()
cls.masters_program_1 = ProgramFactory.create(type='Masters')
cls.masters_program_2 = ProgramFactory.create(type='Masters')
cls.masters_program_other_site = ProgramFactory.create(type='Masters')
cls.bachelors_program = ProgramFactory.create(type='Bachelors')
cls.no_type_program = ProgramFactory.create(type=None)
def setUp(self):
""" Loads program data into the cache before each test function. """
super(TestGetProgramsByType, self).setUp()
self.init_cache()
def init_cache(self):
""" This function plays the role of the ``cache_programs`` management command. """
all_programs = [
self.masters_program_1,
self.masters_program_2,
self.bachelors_program,
self.no_type_program,
self.masters_program_other_site
]
cached_programs = {
PROGRAM_CACHE_KEY_TPL.format(uuid=program['uuid']): program for program in all_programs
}
cache.set_many(cached_programs, None)
programs_by_type = defaultdict(list)
for program in all_programs:
program_type = normalize_program_type(program.get('type'))
site_id = self.site.id
if program == self.masters_program_other_site:
site_id = self.other_site.id
cache_key = PROGRAMS_BY_TYPE_CACHE_KEY_TPL.format(site_id=site_id, program_type=program_type)
programs_by_type[cache_key].append(program['uuid'])
cache.set_many(programs_by_type, None)
def test_get_masters_programs(self):
expected_programs = [self.masters_program_1, self.masters_program_2]
self.assertItemsEqual(expected_programs, get_programs_by_type(self.site, 'masters'))
def test_get_bachelors_programs(self):
expected_programs = [self.bachelors_program]
self.assertEqual(expected_programs, get_programs_by_type(self.site, 'bachelors'))
def test_get_no_such_type_programs(self):
expected_programs = []
self.assertEqual(expected_programs, get_programs_by_type(self.site, 'doctorate'))
def test_get_masters_programs_other_site(self):
expected_programs = [self.masters_program_other_site]
self.assertEqual(expected_programs, get_programs_by_type(self.other_site, 'masters'))
def test_get_programs_null_type(self):
expected_programs = [self.no_type_program]
self.assertEqual(expected_programs, get_programs_by_type(self.site, None))
def test_get_programs_false_type(self):
expected_programs = []
self.assertEqual(expected_programs, get_programs_by_type(self.site, False))
def test_normalize_program_type(self):
self.assertEqual('none', normalize_program_type(None))
self.assertEqual('false', normalize_program_type(False))
self.assertEqual('true', normalize_program_type(True))
self.assertEqual('', normalize_program_type(''))
self.assertEqual('masters', normalize_program_type('Masters'))
self.assertEqual('masters', normalize_program_type('masters'))

View File

@@ -1,5 +1,5 @@
"""Helper functions for working with the catalog service."""
from __future__ import absolute_import
from __future__ import absolute_import, unicode_literals
import copy
import datetime
@@ -7,12 +7,12 @@ import logging
import uuid
import pycountry
import six
from django.core.cache import cache
from django.core.exceptions import ObjectDoesNotExist
from edx_rest_api_client.client import EdxRestApiClient
from opaque_keys.edx.keys import CourseKey
from pytz import UTC
from six import text_type
from entitlements.utils import is_course_run_entitlement_fulfillable
from openedx.core.constants import COURSE_PUBLISHED
@@ -20,6 +20,7 @@ from openedx.core.djangoapps.catalog.cache import (
COURSE_PROGRAMS_CACHE_KEY_TPL,
PATHWAY_CACHE_KEY_TPL,
PROGRAM_CACHE_KEY_TPL,
PROGRAMS_BY_TYPE_CACHE_KEY_TPL,
SITE_PATHWAY_IDS_CACHE_KEY_TPL,
SITE_PROGRAM_UUIDS_CACHE_KEY_TPL
)
@@ -84,13 +85,14 @@ def check_catalog_integration_and_get_user(error_message_field):
return None, catalog_integration
def get_programs(site=None, uuid=None, uuids=None, course=None): # pylint: disable=redefined-outer-name
# pylint: disable=redefined-outer-name
def get_programs(site=None, uuid=None, uuids=None, course=None):
"""Read programs from the cache.
The cache is populated by a management command, cache_programs.
Keyword Arguments:
site (Site): django.contrib.sites.models object
site (Site): django.contrib.sites.models object to fetch programs of.
uuid (string): UUID identifying a specific program to read from the cache.
uuids (list of string): UUIDs identifying a specific programs to read from the cache.
course (string): course id identifying a specific course run to read from the cache.
@@ -122,12 +124,32 @@ def get_programs(site=None, uuid=None, uuids=None, course=None): # pylint: disa
return get_programs_by_uuids(uuids)
def get_programs_by_type(site, program_type):
"""
Keyword Arguments:
site (Site): The corresponding Site object to fetch programs for.
program_type (string): The program_type that matching programs must have.
Returns:
A list of programs for the given site with the given program_type.
"""
program_type_cache_key = PROGRAMS_BY_TYPE_CACHE_KEY_TPL.format(
site_id=site.id, program_type=normalize_program_type(program_type)
)
uuids = cache.get(program_type_cache_key, [])
if not uuids:
logger.warning(text_type(
'Failed to get program UUIDs from cache for site {} and type {}'.format(site.id, program_type)
))
return get_programs_by_uuids(uuids)
def get_programs_by_uuids(uuids):
"""
Gets a list of programs for the provided uuids
"""
# a list of UUID objects would be a perfectly reasonable parameter to provide
uuid_strings = [six.text_type(handle) for handle in uuids]
uuid_strings = [text_type(handle) for handle in uuids]
programs = cache.get_many([PROGRAM_CACHE_KEY_TPL.format(uuid=handle) for handle in uuid_strings])
programs = list(programs.values())
@@ -160,7 +182,7 @@ def get_program_types(name=None):
"""Retrieve program types from the catalog service.
Keyword Arguments:
name (string): Name identifying a specific program.
name (string): Name identifying a specific program type.
Returns:
list of dict, representing program types.
@@ -426,7 +448,7 @@ def get_course_uuid_for_course(course_run_key):
course_run_data = get_edx_api_data(
catalog_integration,
'course_runs',
resource_id=six.text_type(course_run_key),
resource_id=text_type(course_run_key),
api=api,
cache_key=run_cache_key if catalog_integration.is_cache_enabled else None,
long_term_cache=True,
@@ -596,3 +618,8 @@ def _course_runs_from_container(container):
for course in container.get('courses', [])
for course_run in course.get('course_runs', [])
]
def normalize_program_type(program_type):
""" Function that normalizes a program type string for use in a cache key. """
return str(program_type).lower()

View File

@@ -59,6 +59,7 @@ class SiteMixin(object):
values=site_configuration_values
)
self.use_site(site)
return site
def use_site(self, site):
"""