Add an index from program type to program UUID in the catalog cache.
This commit is contained in:
committed by
Alex Dusenbery
parent
e5f3da9afa
commit
f307e49f94
@@ -12,3 +12,9 @@ SITE_PATHWAY_IDS_CACHE_KEY_TPL = 'pathway-ids-{domain}'
|
||||
|
||||
# Template used to create cache keys for individual courses to program uuids.
|
||||
COURSE_PROGRAMS_CACHE_KEY_TPL = 'course-programs-{course_run_id}'
|
||||
|
||||
# Site-aware cache key template used to locate an item containing
|
||||
# a list of all program UUIDs with a certain program type (the Site is required
|
||||
# because program_type values are likely to be shared between different sites
|
||||
# that live in the same environment).
|
||||
PROGRAMS_BY_TYPE_CACHE_KEY_TPL = 'programs-by-type-{site_id}-{program_type}'
|
||||
|
||||
@@ -8,16 +8,22 @@ from django.contrib.auth import get_user_model
|
||||
from django.contrib.sites.models import Site
|
||||
from django.core.cache import cache
|
||||
from django.core.management import BaseCommand
|
||||
from six import text_type
|
||||
|
||||
from openedx.core.djangoapps.catalog.cache import (
|
||||
COURSE_PROGRAMS_CACHE_KEY_TPL,
|
||||
PATHWAY_CACHE_KEY_TPL,
|
||||
PROGRAM_CACHE_KEY_TPL,
|
||||
PROGRAMS_BY_TYPE_CACHE_KEY_TPL,
|
||||
SITE_PATHWAY_IDS_CACHE_KEY_TPL,
|
||||
SITE_PROGRAM_UUIDS_CACHE_KEY_TPL,
|
||||
)
|
||||
from openedx.core.djangoapps.catalog.models import CatalogIntegration
|
||||
from openedx.core.djangoapps.catalog.utils import create_catalog_api_client, course_run_keys_for_program
|
||||
from openedx.core.djangoapps.catalog.utils import (
|
||||
create_catalog_api_client,
|
||||
course_run_keys_for_program,
|
||||
normalize_program_type,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
User = get_user_model() # pylint: disable=invalid-name
|
||||
@@ -33,6 +39,7 @@ class Command(BaseCommand):
|
||||
"""
|
||||
help = "Rebuild the LMS' cache of program data."
|
||||
|
||||
# pylint: disable=unicode-format-string
|
||||
def handle(self, *args, **options):
|
||||
failure = False
|
||||
logger.info('populate-multitenant-programs switch is ON')
|
||||
@@ -51,6 +58,7 @@ class Command(BaseCommand):
|
||||
programs = {}
|
||||
pathways = {}
|
||||
courses = {}
|
||||
programs_by_type = {}
|
||||
for site in Site.objects.all():
|
||||
site_config = getattr(site, 'configuration', None)
|
||||
if site_config is None or not site_config.get_value('COURSE_CATALOG_API_URL'):
|
||||
@@ -63,21 +71,21 @@ class Command(BaseCommand):
|
||||
uuids, program_uuids_failed = self.get_site_program_uuids(client, site)
|
||||
new_programs, program_details_failed = self.fetch_program_details(client, uuids)
|
||||
new_pathways, pathways_failed = self.get_pathways(client, site)
|
||||
new_pathways, new_programs, pathway_processing_failed = self.process_pathways(site, new_pathways,
|
||||
new_programs)
|
||||
new_courses, courses_failed = self.get_courses(new_programs)
|
||||
new_pathways, new_programs, pathway_processing_failed = self.process_pathways(
|
||||
site, new_pathways, new_programs
|
||||
)
|
||||
|
||||
failure = any([
|
||||
program_uuids_failed,
|
||||
program_details_failed,
|
||||
pathways_failed,
|
||||
pathway_processing_failed,
|
||||
courses_failed,
|
||||
])
|
||||
|
||||
programs.update(new_programs)
|
||||
pathways.update(new_pathways)
|
||||
courses.update(new_courses)
|
||||
courses.update(self.get_courses(new_programs))
|
||||
programs_by_type.update(self.get_programs_by_type(site, new_programs))
|
||||
|
||||
logger.info(u'Caching UUIDs for {total} programs for site {site_name}.'.format(
|
||||
total=len(uuids),
|
||||
@@ -92,24 +100,19 @@ class Command(BaseCommand):
|
||||
))
|
||||
cache.set(SITE_PATHWAY_IDS_CACHE_KEY_TPL.format(domain=site.domain), pathway_ids, None)
|
||||
|
||||
successful_programs = len(programs)
|
||||
logger.info(u'Caching details for {successful_programs} programs.'.format(
|
||||
successful_programs=successful_programs))
|
||||
logger.info(u'Caching details for {} programs.'.format(len(programs)))
|
||||
cache.set_many(programs, None)
|
||||
|
||||
successful_pathways = len(pathways)
|
||||
logger.info(u'Caching details for {successful_pathways} pathways.'.format(
|
||||
successful_pathways=successful_pathways))
|
||||
logger.info(u'Caching details for {} pathways.'.format(len(pathways)))
|
||||
cache.set_many(pathways, None)
|
||||
|
||||
successful_courses = len(courses)
|
||||
logger.info(u'Caching programs uuids for {successful_courses} courses.'.format(
|
||||
successful_courses=successful_courses))
|
||||
logger.info(u'Caching programs uuids for {} courses.'.format(len(courses)))
|
||||
cache.set_many(courses, None)
|
||||
|
||||
logger.info(text_type('Caching program UUIDs by {} program types.'.format(len(programs_by_type))))
|
||||
cache.set_many(programs_by_type, None)
|
||||
|
||||
if failure:
|
||||
# This will fail a Jenkins job running this command, letting site
|
||||
# operators know that there was a problem.
|
||||
sys.exit(1)
|
||||
|
||||
def get_site_program_uuids(self, client, site):
|
||||
@@ -215,10 +218,21 @@ class Command(BaseCommand):
|
||||
course not in a program.
|
||||
"""
|
||||
course_runs = defaultdict(list)
|
||||
failure = False
|
||||
|
||||
for program in programs.values():
|
||||
for course_run_key in course_run_keys_for_program(program):
|
||||
course_run_cache_key = COURSE_PROGRAMS_CACHE_KEY_TPL.format(course_run_id=course_run_key)
|
||||
course_runs[course_run_cache_key].append(program['uuid'])
|
||||
return course_runs, failure
|
||||
return course_runs
|
||||
|
||||
def get_programs_by_type(self, site, programs):
|
||||
"""
|
||||
Returns a dictionary mapping site-aware cache keys corresponding to program types
|
||||
to lists of program uuids with that type.
|
||||
"""
|
||||
programs_by_type = defaultdict(list)
|
||||
for program in programs.values():
|
||||
program_type = normalize_program_type(program.get('type'))
|
||||
cache_key = PROGRAMS_BY_TYPE_CACHE_KEY_TPL.format(site_id=site.id, program_type=program_type)
|
||||
programs_by_type[cache_key].append(program['uuid'])
|
||||
return programs_by_type
|
||||
|
||||
@@ -12,9 +12,11 @@ from openedx.core.djangoapps.catalog.cache import (
|
||||
COURSE_PROGRAMS_CACHE_KEY_TPL,
|
||||
PATHWAY_CACHE_KEY_TPL,
|
||||
PROGRAM_CACHE_KEY_TPL,
|
||||
PROGRAMS_BY_TYPE_CACHE_KEY_TPL,
|
||||
SITE_PATHWAY_IDS_CACHE_KEY_TPL,
|
||||
SITE_PROGRAM_UUIDS_CACHE_KEY_TPL
|
||||
)
|
||||
from openedx.core.djangoapps.catalog.utils import normalize_program_type
|
||||
from openedx.core.djangoapps.catalog.tests.factories import PathwayFactory, ProgramFactory
|
||||
from openedx.core.djangoapps.catalog.tests.mixins import CatalogIntegrationMixin
|
||||
from openedx.core.djangoapps.site_configuration.tests.mixins import SiteMixin
|
||||
@@ -37,7 +39,7 @@ class TestCachePrograms(CatalogIntegrationMixin, CacheIsolationTestCase, SiteMix
|
||||
|
||||
self.catalog_integration = self.create_catalog_integration()
|
||||
self.site_domain = 'testsite.com'
|
||||
self.set_up_site(
|
||||
self.site = self.set_up_site(
|
||||
self.site_domain,
|
||||
{
|
||||
'COURSE_CATALOG_API_URL': self.catalog_integration.get_internal_api_url().rstrip('/')
|
||||
@@ -190,6 +192,15 @@ class TestCachePrograms(CatalogIntegrationMixin, CacheIsolationTestCase, SiteMix
|
||||
self.assertIn(self.programs[0]['uuid'], cache.get(course_run_cache_key))
|
||||
self.assertIn(self.child_program['uuid'], cache.get(course_run_cache_key))
|
||||
|
||||
# for each program, assert that the program's UUID is in a cached list of
|
||||
# program UUIDS by program type
|
||||
for program in self.programs:
|
||||
program_type = normalize_program_type(program.get('type', 'None'))
|
||||
program_type_cache_key = PROGRAMS_BY_TYPE_CACHE_KEY_TPL.format(
|
||||
site_id=self.site.id, program_type=program_type
|
||||
)
|
||||
self.assertIn(program['uuid'], cache.get(program_type_cache_key))
|
||||
|
||||
def test_handle_pathways(self):
|
||||
"""
|
||||
Verify that the command requests and caches credit pathways
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
|
||||
from __future__ import absolute_import
|
||||
|
||||
from collections import defaultdict
|
||||
from datetime import timedelta
|
||||
|
||||
import mock
|
||||
@@ -22,6 +23,7 @@ from openedx.core.djangoapps.catalog.cache import (
|
||||
COURSE_PROGRAMS_CACHE_KEY_TPL,
|
||||
PATHWAY_CACHE_KEY_TPL,
|
||||
PROGRAM_CACHE_KEY_TPL,
|
||||
PROGRAMS_BY_TYPE_CACHE_KEY_TPL,
|
||||
SITE_PATHWAY_IDS_CACHE_KEY_TPL,
|
||||
SITE_PROGRAM_UUIDS_CACHE_KEY_TPL
|
||||
)
|
||||
@@ -46,7 +48,9 @@ from openedx.core.djangoapps.catalog.utils import (
|
||||
get_pathways,
|
||||
get_program_types,
|
||||
get_programs,
|
||||
get_visible_sessions_for_entitlement
|
||||
get_programs_by_type,
|
||||
get_visible_sessions_for_entitlement,
|
||||
normalize_program_type,
|
||||
)
|
||||
from openedx.core.djangoapps.content.course_overviews.tests.factories import CourseOverviewFactory
|
||||
from openedx.core.djangoapps.site_configuration.tests.factories import SiteFactory
|
||||
@@ -785,3 +789,85 @@ class TestProgramCourseRunCrawling(TestCase):
|
||||
'course-run-4',
|
||||
}
|
||||
self.assertEqual(expected_course_runs, course_run_keys_for_program(self.complex_program))
|
||||
|
||||
|
||||
@skip_unless_lms
|
||||
class TestGetProgramsByType(CacheIsolationTestCase):
|
||||
""" Test for the ``get_programs_by_type()`` function. """
|
||||
ENABLED_CACHES = ['default']
|
||||
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
""" Sets up program data. """
|
||||
super(TestGetProgramsByType, cls).setUpClass()
|
||||
cls.site = SiteFactory()
|
||||
cls.other_site = SiteFactory()
|
||||
cls.masters_program_1 = ProgramFactory.create(type='Masters')
|
||||
cls.masters_program_2 = ProgramFactory.create(type='Masters')
|
||||
cls.masters_program_other_site = ProgramFactory.create(type='Masters')
|
||||
cls.bachelors_program = ProgramFactory.create(type='Bachelors')
|
||||
cls.no_type_program = ProgramFactory.create(type=None)
|
||||
|
||||
def setUp(self):
|
||||
""" Loads program data into the cache before each test function. """
|
||||
super(TestGetProgramsByType, self).setUp()
|
||||
self.init_cache()
|
||||
|
||||
def init_cache(self):
|
||||
""" This function plays the role of the ``cache_programs`` management command. """
|
||||
all_programs = [
|
||||
self.masters_program_1,
|
||||
self.masters_program_2,
|
||||
self.bachelors_program,
|
||||
self.no_type_program,
|
||||
self.masters_program_other_site
|
||||
]
|
||||
cached_programs = {
|
||||
PROGRAM_CACHE_KEY_TPL.format(uuid=program['uuid']): program for program in all_programs
|
||||
}
|
||||
cache.set_many(cached_programs, None)
|
||||
|
||||
programs_by_type = defaultdict(list)
|
||||
for program in all_programs:
|
||||
program_type = normalize_program_type(program.get('type'))
|
||||
site_id = self.site.id
|
||||
|
||||
if program == self.masters_program_other_site:
|
||||
site_id = self.other_site.id
|
||||
|
||||
cache_key = PROGRAMS_BY_TYPE_CACHE_KEY_TPL.format(site_id=site_id, program_type=program_type)
|
||||
programs_by_type[cache_key].append(program['uuid'])
|
||||
|
||||
cache.set_many(programs_by_type, None)
|
||||
|
||||
def test_get_masters_programs(self):
|
||||
expected_programs = [self.masters_program_1, self.masters_program_2]
|
||||
self.assertItemsEqual(expected_programs, get_programs_by_type(self.site, 'masters'))
|
||||
|
||||
def test_get_bachelors_programs(self):
|
||||
expected_programs = [self.bachelors_program]
|
||||
self.assertEqual(expected_programs, get_programs_by_type(self.site, 'bachelors'))
|
||||
|
||||
def test_get_no_such_type_programs(self):
|
||||
expected_programs = []
|
||||
self.assertEqual(expected_programs, get_programs_by_type(self.site, 'doctorate'))
|
||||
|
||||
def test_get_masters_programs_other_site(self):
|
||||
expected_programs = [self.masters_program_other_site]
|
||||
self.assertEqual(expected_programs, get_programs_by_type(self.other_site, 'masters'))
|
||||
|
||||
def test_get_programs_null_type(self):
|
||||
expected_programs = [self.no_type_program]
|
||||
self.assertEqual(expected_programs, get_programs_by_type(self.site, None))
|
||||
|
||||
def test_get_programs_false_type(self):
|
||||
expected_programs = []
|
||||
self.assertEqual(expected_programs, get_programs_by_type(self.site, False))
|
||||
|
||||
def test_normalize_program_type(self):
|
||||
self.assertEqual('none', normalize_program_type(None))
|
||||
self.assertEqual('false', normalize_program_type(False))
|
||||
self.assertEqual('true', normalize_program_type(True))
|
||||
self.assertEqual('', normalize_program_type(''))
|
||||
self.assertEqual('masters', normalize_program_type('Masters'))
|
||||
self.assertEqual('masters', normalize_program_type('masters'))
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
"""Helper functions for working with the catalog service."""
|
||||
from __future__ import absolute_import
|
||||
from __future__ import absolute_import, unicode_literals
|
||||
|
||||
import copy
|
||||
import datetime
|
||||
@@ -7,12 +7,12 @@ import logging
|
||||
import uuid
|
||||
|
||||
import pycountry
|
||||
import six
|
||||
from django.core.cache import cache
|
||||
from django.core.exceptions import ObjectDoesNotExist
|
||||
from edx_rest_api_client.client import EdxRestApiClient
|
||||
from opaque_keys.edx.keys import CourseKey
|
||||
from pytz import UTC
|
||||
from six import text_type
|
||||
|
||||
from entitlements.utils import is_course_run_entitlement_fulfillable
|
||||
from openedx.core.constants import COURSE_PUBLISHED
|
||||
@@ -20,6 +20,7 @@ from openedx.core.djangoapps.catalog.cache import (
|
||||
COURSE_PROGRAMS_CACHE_KEY_TPL,
|
||||
PATHWAY_CACHE_KEY_TPL,
|
||||
PROGRAM_CACHE_KEY_TPL,
|
||||
PROGRAMS_BY_TYPE_CACHE_KEY_TPL,
|
||||
SITE_PATHWAY_IDS_CACHE_KEY_TPL,
|
||||
SITE_PROGRAM_UUIDS_CACHE_KEY_TPL
|
||||
)
|
||||
@@ -84,13 +85,14 @@ def check_catalog_integration_and_get_user(error_message_field):
|
||||
return None, catalog_integration
|
||||
|
||||
|
||||
def get_programs(site=None, uuid=None, uuids=None, course=None): # pylint: disable=redefined-outer-name
|
||||
# pylint: disable=redefined-outer-name
|
||||
def get_programs(site=None, uuid=None, uuids=None, course=None):
|
||||
"""Read programs from the cache.
|
||||
|
||||
The cache is populated by a management command, cache_programs.
|
||||
|
||||
Keyword Arguments:
|
||||
site (Site): django.contrib.sites.models object
|
||||
site (Site): django.contrib.sites.models object to fetch programs of.
|
||||
uuid (string): UUID identifying a specific program to read from the cache.
|
||||
uuids (list of string): UUIDs identifying a specific programs to read from the cache.
|
||||
course (string): course id identifying a specific course run to read from the cache.
|
||||
@@ -122,12 +124,32 @@ def get_programs(site=None, uuid=None, uuids=None, course=None): # pylint: disa
|
||||
return get_programs_by_uuids(uuids)
|
||||
|
||||
|
||||
def get_programs_by_type(site, program_type):
|
||||
"""
|
||||
Keyword Arguments:
|
||||
site (Site): The corresponding Site object to fetch programs for.
|
||||
program_type (string): The program_type that matching programs must have.
|
||||
|
||||
Returns:
|
||||
A list of programs for the given site with the given program_type.
|
||||
"""
|
||||
program_type_cache_key = PROGRAMS_BY_TYPE_CACHE_KEY_TPL.format(
|
||||
site_id=site.id, program_type=normalize_program_type(program_type)
|
||||
)
|
||||
uuids = cache.get(program_type_cache_key, [])
|
||||
if not uuids:
|
||||
logger.warning(text_type(
|
||||
'Failed to get program UUIDs from cache for site {} and type {}'.format(site.id, program_type)
|
||||
))
|
||||
return get_programs_by_uuids(uuids)
|
||||
|
||||
|
||||
def get_programs_by_uuids(uuids):
|
||||
"""
|
||||
Gets a list of programs for the provided uuids
|
||||
"""
|
||||
# a list of UUID objects would be a perfectly reasonable parameter to provide
|
||||
uuid_strings = [six.text_type(handle) for handle in uuids]
|
||||
uuid_strings = [text_type(handle) for handle in uuids]
|
||||
|
||||
programs = cache.get_many([PROGRAM_CACHE_KEY_TPL.format(uuid=handle) for handle in uuid_strings])
|
||||
programs = list(programs.values())
|
||||
@@ -160,7 +182,7 @@ def get_program_types(name=None):
|
||||
"""Retrieve program types from the catalog service.
|
||||
|
||||
Keyword Arguments:
|
||||
name (string): Name identifying a specific program.
|
||||
name (string): Name identifying a specific program type.
|
||||
|
||||
Returns:
|
||||
list of dict, representing program types.
|
||||
@@ -426,7 +448,7 @@ def get_course_uuid_for_course(course_run_key):
|
||||
course_run_data = get_edx_api_data(
|
||||
catalog_integration,
|
||||
'course_runs',
|
||||
resource_id=six.text_type(course_run_key),
|
||||
resource_id=text_type(course_run_key),
|
||||
api=api,
|
||||
cache_key=run_cache_key if catalog_integration.is_cache_enabled else None,
|
||||
long_term_cache=True,
|
||||
@@ -596,3 +618,8 @@ def _course_runs_from_container(container):
|
||||
for course in container.get('courses', [])
|
||||
for course_run in course.get('course_runs', [])
|
||||
]
|
||||
|
||||
|
||||
def normalize_program_type(program_type):
|
||||
""" Function that normalizes a program type string for use in a cache key. """
|
||||
return str(program_type).lower()
|
||||
|
||||
@@ -59,6 +59,7 @@ class SiteMixin(object):
|
||||
values=site_configuration_values
|
||||
)
|
||||
self.use_site(site)
|
||||
return site
|
||||
|
||||
def use_site(self, site):
|
||||
"""
|
||||
|
||||
Reference in New Issue
Block a user