From 6cf2503f0bb1ba3f2eccfeaca2f58c0a8952b857 Mon Sep 17 00:00:00 2001 From: Renzo Lucioni Date: Tue, 2 May 2017 11:44:06 -0400 Subject: [PATCH] Add management command for caching program data This command requests every available program from the discovery service, writing each to its own cache entry with an indefinite expiration. It is meant to be run on a scheduled basis and should be the only code updating these cache entries. LEARNER-382 --- openedx/core/djangoapps/catalog/cache.py | 5 + .../management/commands/cache_programs.py | 86 +++++++++ .../commands/tests/test_cache_programs.py | 164 ++++++++++++++++++ 3 files changed, 255 insertions(+) create mode 100644 openedx/core/djangoapps/catalog/cache.py create mode 100644 openedx/core/djangoapps/catalog/management/commands/cache_programs.py create mode 100644 openedx/core/djangoapps/catalog/management/commands/tests/test_cache_programs.py diff --git a/openedx/core/djangoapps/catalog/cache.py b/openedx/core/djangoapps/catalog/cache.py new file mode 100644 index 0000000000..8e07bf9242 --- /dev/null +++ b/openedx/core/djangoapps/catalog/cache.py @@ -0,0 +1,5 @@ +# Template used to create cache keys for individual programs. +PROGRAM_CACHE_KEY_TPL = 'program-{uuid}' + +# Cache key used to locate an item containing a list of all program UUIDs. +PROGRAM_UUIDS_CACHE_KEY = 'program-uuids' diff --git a/openedx/core/djangoapps/catalog/management/commands/cache_programs.py b/openedx/core/djangoapps/catalog/management/commands/cache_programs.py new file mode 100644 index 0000000000..eeb34d42c3 --- /dev/null +++ b/openedx/core/djangoapps/catalog/management/commands/cache_programs.py @@ -0,0 +1,86 @@ +import logging +import sys + +from django.contrib.auth import get_user_model +from django.core.cache import cache +from django.core.management import BaseCommand + +from openedx.core.djangoapps.catalog.cache import PROGRAM_CACHE_KEY_TPL, PROGRAM_UUIDS_CACHE_KEY +from openedx.core.djangoapps.catalog.models import CatalogIntegration +from openedx.core.djangoapps.catalog.utils import create_catalog_api_client + + +logger = logging.getLogger(__name__) +User = get_user_model() # pylint: disable=invalid-name + + +class Command(BaseCommand): + """Management command used to cache program data. + + This command requests every available program from the discovery + service, writing each to its own cache entry with an indefinite expiration. + It is meant to be run on a scheduled basis and should be the only code + updating these cache entries. + """ + help = "Rebuild the LMS' cache of program data." + + def handle(self, *args, **options): + catalog_integration = CatalogIntegration.current() + username = catalog_integration.service_username + + try: + user = User.objects.get(username=username) + client = create_catalog_api_client(user, catalog_integration) + except User.DoesNotExist: + logger.error( + 'Failed to create API client. Service user {username} does not exist.'.format(username) + ) + raise + + try: + querystring = { + 'exclude_utm': 1, + 'status': ('active', 'retired'), + 'uuids_only': 1, + } + + logger.info('Requesting program UUIDs.') + uuids = client.programs.get(**querystring) + except: # pylint: disable=bare-except + logger.error('Failed to retrieve program UUIDs.') + raise + + total = len(uuids) + logger.info('Caching UUIDs for {total} programs.'.format(total=total)) + cache.set(PROGRAM_UUIDS_CACHE_KEY, uuids, None) + + programs = {} + failure = False + for uuid in uuids: + try: + logger.info('Requesting details for program {uuid}.'.format(uuid=uuid)) + program = client.programs(uuid).get() + + cache_key = PROGRAM_CACHE_KEY_TPL.format(uuid=uuid) + programs[cache_key] = program + except: # pylint: disable=bare-except + logger.exception('Failed to retrieve details for program {uuid}.'.format(uuid=uuid)) + failure = True + + continue + + successful = len(programs) + logger.info('Caching details for {successful} programs.'.format(successful=successful)) + cache.set_many(programs, None) + + logger.info( + 'Program caching complete. Successfully cached {successful} of {total} programs.'.format( + successful=successful, + total=total + ) + ) + + if failure: + # This will fail a Jenkins job running this command, letting site + # operators know that there was a problem. + sys.exit(1) diff --git a/openedx/core/djangoapps/catalog/management/commands/tests/test_cache_programs.py b/openedx/core/djangoapps/catalog/management/commands/tests/test_cache_programs.py new file mode 100644 index 0000000000..64a115ed35 --- /dev/null +++ b/openedx/core/djangoapps/catalog/management/commands/tests/test_cache_programs.py @@ -0,0 +1,164 @@ +import json + +import httpretty +from django.core.cache import cache +from django.core.management import call_command + +from openedx.core.djangoapps.catalog.cache import PROGRAM_CACHE_KEY_TPL, PROGRAM_UUIDS_CACHE_KEY +from openedx.core.djangoapps.catalog.tests.factories import ProgramFactory +from openedx.core.djangoapps.catalog.tests.mixins import CatalogIntegrationMixin +from openedx.core.djangolib.testing.utils import CacheIsolationTestCase, skip_unless_lms +from student.tests.factories import UserFactory + + +@skip_unless_lms +@httpretty.activate +class TestCachePrograms(CatalogIntegrationMixin, CacheIsolationTestCase): + ENABLED_CACHES = ['default'] + + def setUp(self): + super(TestCachePrograms, self).setUp() + + self.catalog_integration = self.create_catalog_integration() + + self.list_url = self.catalog_integration.internal_api_url.rstrip('/') + '/programs/' + self.detail_tpl = self.list_url.rstrip('/') + '/{uuid}/' + + self.programs = ProgramFactory.create_batch(3) + self.uuids = [program['uuid'] for program in self.programs] + + def mock_list(self): + def list_callback(request, uri, headers): + expected = { + 'exclude_utm': ['1'], + 'status': ['active', 'retired'], + 'uuids_only': ['1'] + } + self.assertEqual(request.querystring, expected) + + return (200, headers, json.dumps(self.uuids)) + + httpretty.register_uri( + httpretty.GET, + self.list_url, + body=list_callback, + content_type='application/json' + ) + + def mock_detail(self, uuid, program): + httpretty.register_uri( + httpretty.GET, + self.detail_tpl.format(uuid=uuid), + body=json.dumps(program), + content_type='application/json' + ) + + def test_handle(self): + """ + Verify that the command requests and caches program UUIDs and details. + """ + # Ideally, this user would be created in the test setup and deleted in + # the one test case which covers the case where the user is missing. However, + # that deletion causes "OperationalError: no such table: wiki_attachmentrevision" + # when run on Jenkins. + UserFactory(username=self.catalog_integration.service_username) + + programs = { + PROGRAM_CACHE_KEY_TPL.format(uuid=program['uuid']): program for program in self.programs + } + + self.mock_list() + + for uuid in self.uuids: + program = programs[PROGRAM_CACHE_KEY_TPL.format(uuid=uuid)] + self.mock_detail(uuid, program) + + call_command('cache_programs') + + cached_uuids = cache.get(PROGRAM_UUIDS_CACHE_KEY) + self.assertEqual( + set(cached_uuids), + set(self.uuids) + ) + + program_keys = list(programs.keys()) + cached_programs = cache.get_many(program_keys) + # Verify that the keys were all cache hits. + self.assertEqual( + set(cached_programs), + set(programs) + ) + + # We can't use a set comparison here because these values are dictionaries + # and aren't hashable. We've already verified that all programs came out + # of the cache above, so all we need to do here is verify the accuracy of + # the data itself. + for key, program in cached_programs.items(): + self.assertEqual(program, programs[key]) + + def test_handle_missing_service_user(self): + """ + Verify that the command raises an exception when run without a service + user, and that program UUIDs are not cached. + """ + with self.assertRaises(Exception): + call_command('cache_programs') + + cached_uuids = cache.get(PROGRAM_UUIDS_CACHE_KEY) + self.assertEqual(cached_uuids, None) + + def test_handle_missing_uuids(self): + """ + Verify that the command raises an exception when it fails to retrieve + program UUIDs. + """ + UserFactory(username=self.catalog_integration.service_username) + + with self.assertRaises(Exception): + call_command('cache_programs') + + cached_uuids = cache.get(PROGRAM_UUIDS_CACHE_KEY) + self.assertEqual(cached_uuids, None) + + def test_handle_missing_programs(self): + """ + Verify that a problem retrieving a program doesn't prevent the command + from retrieving and caching other programs, but does cause it to exit + with a non-zero exit code. + """ + UserFactory(username=self.catalog_integration.service_username) + + all_programs = { + PROGRAM_CACHE_KEY_TPL.format(uuid=program['uuid']): program for program in self.programs + } + partial_programs = { + PROGRAM_CACHE_KEY_TPL.format(uuid=program['uuid']): program for program in self.programs[:2] + } + + self.mock_list() + + for uuid in self.uuids[:2]: + program = partial_programs[PROGRAM_CACHE_KEY_TPL.format(uuid=uuid)] + self.mock_detail(uuid, program) + + with self.assertRaises(SystemExit) as context: + call_command('cache_programs') + + self.assertEqual(context.exception.code, 1) + + cached_uuids = cache.get(PROGRAM_UUIDS_CACHE_KEY) + self.assertEqual( + set(cached_uuids), + set(self.uuids) + ) + + program_keys = list(all_programs.keys()) + cached_programs = cache.get_many(program_keys) + # One of the cache keys should result in a cache miss. + self.assertEqual( + set(cached_programs), + set(partial_programs) + ) + + for key, program in cached_programs.items(): + self.assertEqual(program, partial_programs[key])