From 4bb5cb52a64682cd52fee61d30c62491de854af1 Mon Sep 17 00:00:00 2001 From: Braden MacDonald Date: Fri, 28 Feb 2020 10:41:28 -0800 Subject: [PATCH] Improve caching of blockstore data This includes an optimization to the get_bundle_version_files_cached method, which is used very often when loading blockstore data; it was previously being cached only in a process-local cache (lru_cache). My hunch is that in production, with many appservers and LMS workers and frequent deployments and a large number of bundles, the process-local cache is not being hit very often. I also increased the MAX_BLOCKSTORE_CACHE_DELAY from 60s to 300s; this reduces the frequency with which we check if either (A) an external system modified the blockstore bundle and/or (B) we have a cache invalidation bug somewhere. I am increasing it because that check is more expensive than I thought (calling blockstore API to ascertain latest version of a particular bundle), and I haven't seen any cache invalidation errors that this would help to work around. (Plus, increasing this will make such bugs more obvious.) --- openedx/core/djangolib/blockstore_cache.py | 46 ++++++++++++---------- 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/openedx/core/djangolib/blockstore_cache.py b/openedx/core/djangolib/blockstore_cache.py index ee85f3bd43..9beeaf6071 100644 --- a/openedx/core/djangolib/blockstore_cache.py +++ b/openedx/core/djangolib/blockstore_cache.py @@ -13,7 +13,6 @@ from datetime import datetime from uuid import UUID from django.core.cache import caches, InvalidCacheBackendError -from django.utils.lru_cache import lru_cache from pytz import UTC import requests @@ -33,7 +32,7 @@ except InvalidCacheBackendError: # (Note that we do usually explicitly invalidate this cache during write # operations though, so this setting mostly affects actions by external systems # on Blockstore or bugs where we left out the cache invalidation step.) -MAX_BLOCKSTORE_CACHE_DELAY = 60 +MAX_BLOCKSTORE_CACHE_DELAY = 60 * 5 class BundleCache(object): @@ -144,18 +143,23 @@ def get_bundle_version_number(bundle_uuid, draft_name=None): return version -@lru_cache(maxsize=200) def get_bundle_version_files_cached(bundle_uuid, bundle_version): """ Get the files in the specified BundleVersion. Since BundleVersions are - immutable, this should be cached as aggressively as possible (ideally - it would be infinitely but we don't want to use up all available memory). - So this method uses lru_cache() to cache results in process-local memory. - - (Note: This can't use BundleCache because BundleCache only associates data - with the most recent bundleversion, not a specified bundleversion) + immutable, this should be cached as aggressively as possible. """ - return blockstore_api.get_bundle_version_files(bundle_uuid, bundle_version) + # Use the blockstore django cache directly; this can't use BundleCache because BundleCache only associates data + # with the most recent bundleversion, not a specified bundleversion + # This key is '_v2' to avoid reading invalid values cached by a past version of this code with no timeout. + cache_key = 'bundle_version_files_v2:{}:{}'.format(bundle_uuid, bundle_version) + result = cache.get(cache_key) + if result is None: + result = blockstore_api.get_bundle_version_files(bundle_uuid, bundle_version) + # Cache this result. We should be able to cache this forever, since bundle versions are immutable, but currently + # this result may contain signed S3 URLs which become invalid after 3600 seconds. If Blockstore is improved to + # return URLs that redirect to the signed S3 URLs, then this can be changed to cache forever. + cache.set(cache_key, result, timeout=1800) + return result def get_bundle_draft_files_cached(bundle_uuid, draft_name): @@ -216,20 +220,22 @@ def get_bundle_file_data_with_cache(bundle_uuid, path, bundle_version=None, draf return response.content -@lru_cache(maxsize=200) def get_bundle_version_direct_links_cached(bundle_uuid, bundle_version): """ Get the direct links in the specified BundleVersion. Since BundleVersions - are immutable, this should be cached as aggressively as possible (ideally - it would be infinitely but we don't want to use up all available memory). - So this method uses lru_cache() to cache results in process-local memory. - - (Note: This can't use BundleCache because BundleCache only associates data - with the most recent bundleversion, not a specified bundleversion) + are immutable, this should be cached as aggressively as possible. """ - return { - link.name: link.direct for link in blockstore_api.get_bundle_version_links(bundle_uuid, bundle_version).values() - } + # Use the blockstore django cache directly; this can't use BundleCache because BundleCache only associates data + # with the most recent bundleversion, not a specified bundleversion + cache_key = 'bundle_version_direct_links:{}:{}'.format(bundle_uuid, bundle_version) + result = cache.get(cache_key) + if result is None: + result = { + link.name: link.direct + for link in blockstore_api.get_bundle_version_links(bundle_uuid, bundle_version).values() + } + cache.set(cache_key, result, timeout=None) # Cache forever since bundle versions are immutable + return result def get_bundle_draft_direct_links_cached(bundle_uuid, draft_name):