Improve caching of blockstore data
This includes an optimization to the get_bundle_version_files_cached method, which is used very often when loading blockstore data; it was previously being cached only in a process-local cache (lru_cache). My hunch is that in production, with many appservers and LMS workers and frequent deployments and a large number of bundles, the process-local cache is not being hit very often. I also increased the MAX_BLOCKSTORE_CACHE_DELAY from 60s to 300s; this reduces the frequency with which we check if either (A) an external system modified the blockstore bundle and/or (B) we have a cache invalidation bug somewhere. I am increasing it because that check is more expensive than I thought (calling blockstore API to ascertain latest version of a particular bundle), and I haven't seen any cache invalidation errors that this would help to work around. (Plus, increasing this will make such bugs more obvious.)
This commit is contained in:
@@ -13,7 +13,6 @@ from datetime import datetime
|
||||
from uuid import UUID
|
||||
|
||||
from django.core.cache import caches, InvalidCacheBackendError
|
||||
from django.utils.lru_cache import lru_cache
|
||||
from pytz import UTC
|
||||
import requests
|
||||
|
||||
@@ -33,7 +32,7 @@ except InvalidCacheBackendError:
|
||||
# (Note that we do usually explicitly invalidate this cache during write
|
||||
# operations though, so this setting mostly affects actions by external systems
|
||||
# on Blockstore or bugs where we left out the cache invalidation step.)
|
||||
MAX_BLOCKSTORE_CACHE_DELAY = 60
|
||||
MAX_BLOCKSTORE_CACHE_DELAY = 60 * 5
|
||||
|
||||
|
||||
class BundleCache(object):
|
||||
@@ -144,18 +143,23 @@ def get_bundle_version_number(bundle_uuid, draft_name=None):
|
||||
return version
|
||||
|
||||
|
||||
@lru_cache(maxsize=200)
|
||||
def get_bundle_version_files_cached(bundle_uuid, bundle_version):
|
||||
"""
|
||||
Get the files in the specified BundleVersion. Since BundleVersions are
|
||||
immutable, this should be cached as aggressively as possible (ideally
|
||||
it would be infinitely but we don't want to use up all available memory).
|
||||
So this method uses lru_cache() to cache results in process-local memory.
|
||||
|
||||
(Note: This can't use BundleCache because BundleCache only associates data
|
||||
with the most recent bundleversion, not a specified bundleversion)
|
||||
immutable, this should be cached as aggressively as possible.
|
||||
"""
|
||||
return blockstore_api.get_bundle_version_files(bundle_uuid, bundle_version)
|
||||
# Use the blockstore django cache directly; this can't use BundleCache because BundleCache only associates data
|
||||
# with the most recent bundleversion, not a specified bundleversion
|
||||
# This key is '_v2' to avoid reading invalid values cached by a past version of this code with no timeout.
|
||||
cache_key = 'bundle_version_files_v2:{}:{}'.format(bundle_uuid, bundle_version)
|
||||
result = cache.get(cache_key)
|
||||
if result is None:
|
||||
result = blockstore_api.get_bundle_version_files(bundle_uuid, bundle_version)
|
||||
# Cache this result. We should be able to cache this forever, since bundle versions are immutable, but currently
|
||||
# this result may contain signed S3 URLs which become invalid after 3600 seconds. If Blockstore is improved to
|
||||
# return URLs that redirect to the signed S3 URLs, then this can be changed to cache forever.
|
||||
cache.set(cache_key, result, timeout=1800)
|
||||
return result
|
||||
|
||||
|
||||
def get_bundle_draft_files_cached(bundle_uuid, draft_name):
|
||||
@@ -216,20 +220,22 @@ def get_bundle_file_data_with_cache(bundle_uuid, path, bundle_version=None, draf
|
||||
return response.content
|
||||
|
||||
|
||||
@lru_cache(maxsize=200)
|
||||
def get_bundle_version_direct_links_cached(bundle_uuid, bundle_version):
|
||||
"""
|
||||
Get the direct links in the specified BundleVersion. Since BundleVersions
|
||||
are immutable, this should be cached as aggressively as possible (ideally
|
||||
it would be infinitely but we don't want to use up all available memory).
|
||||
So this method uses lru_cache() to cache results in process-local memory.
|
||||
|
||||
(Note: This can't use BundleCache because BundleCache only associates data
|
||||
with the most recent bundleversion, not a specified bundleversion)
|
||||
are immutable, this should be cached as aggressively as possible.
|
||||
"""
|
||||
return {
|
||||
link.name: link.direct for link in blockstore_api.get_bundle_version_links(bundle_uuid, bundle_version).values()
|
||||
}
|
||||
# Use the blockstore django cache directly; this can't use BundleCache because BundleCache only associates data
|
||||
# with the most recent bundleversion, not a specified bundleversion
|
||||
cache_key = 'bundle_version_direct_links:{}:{}'.format(bundle_uuid, bundle_version)
|
||||
result = cache.get(cache_key)
|
||||
if result is None:
|
||||
result = {
|
||||
link.name: link.direct
|
||||
for link in blockstore_api.get_bundle_version_links(bundle_uuid, bundle_version).values()
|
||||
}
|
||||
cache.set(cache_key, result, timeout=None) # Cache forever since bundle versions are immutable
|
||||
return result
|
||||
|
||||
|
||||
def get_bundle_draft_direct_links_cached(bundle_uuid, draft_name):
|
||||
|
||||
Reference in New Issue
Block a user