Improve caching of blockstore data

This includes an optimization to the get_bundle_version_files_cached method, which is used very often when loading blockstore data; it was previously being cached only in a process-local cache (lru_cache). My hunch is that in production, with many appservers and LMS workers and frequent deployments and a large number of bundles, the process-local cache is not being hit very often.

I also increased the MAX_BLOCKSTORE_CACHE_DELAY from 60s to 300s; this reduces the frequency with which we check if either (A) an external system modified the blockstore bundle and/or (B) we have a cache invalidation bug somewhere. I am increasing it because that check is more expensive than I thought (calling blockstore API to ascertain latest version of a particular bundle), and I haven't seen any cache invalidation errors that this would help to work around. (Plus, increasing this will make such bugs more obvious.)
This commit is contained in:
Braden MacDonald
2020-02-28 10:41:28 -08:00
parent a8d5749b6b
commit 4bb5cb52a6

View File

@@ -13,7 +13,6 @@ from datetime import datetime
from uuid import UUID
from django.core.cache import caches, InvalidCacheBackendError
from django.utils.lru_cache import lru_cache
from pytz import UTC
import requests
@@ -33,7 +32,7 @@ except InvalidCacheBackendError:
# (Note that we do usually explicitly invalidate this cache during write
# operations though, so this setting mostly affects actions by external systems
# on Blockstore or bugs where we left out the cache invalidation step.)
MAX_BLOCKSTORE_CACHE_DELAY = 60
MAX_BLOCKSTORE_CACHE_DELAY = 60 * 5
class BundleCache(object):
@@ -144,18 +143,23 @@ def get_bundle_version_number(bundle_uuid, draft_name=None):
return version
@lru_cache(maxsize=200)
def get_bundle_version_files_cached(bundle_uuid, bundle_version):
"""
Get the files in the specified BundleVersion. Since BundleVersions are
immutable, this should be cached as aggressively as possible (ideally
it would be infinitely but we don't want to use up all available memory).
So this method uses lru_cache() to cache results in process-local memory.
(Note: This can't use BundleCache because BundleCache only associates data
with the most recent bundleversion, not a specified bundleversion)
immutable, this should be cached as aggressively as possible.
"""
return blockstore_api.get_bundle_version_files(bundle_uuid, bundle_version)
# Use the blockstore django cache directly; this can't use BundleCache because BundleCache only associates data
# with the most recent bundleversion, not a specified bundleversion
# This key is '_v2' to avoid reading invalid values cached by a past version of this code with no timeout.
cache_key = 'bundle_version_files_v2:{}:{}'.format(bundle_uuid, bundle_version)
result = cache.get(cache_key)
if result is None:
result = blockstore_api.get_bundle_version_files(bundle_uuid, bundle_version)
# Cache this result. We should be able to cache this forever, since bundle versions are immutable, but currently
# this result may contain signed S3 URLs which become invalid after 3600 seconds. If Blockstore is improved to
# return URLs that redirect to the signed S3 URLs, then this can be changed to cache forever.
cache.set(cache_key, result, timeout=1800)
return result
def get_bundle_draft_files_cached(bundle_uuid, draft_name):
@@ -216,20 +220,22 @@ def get_bundle_file_data_with_cache(bundle_uuid, path, bundle_version=None, draf
return response.content
@lru_cache(maxsize=200)
def get_bundle_version_direct_links_cached(bundle_uuid, bundle_version):
"""
Get the direct links in the specified BundleVersion. Since BundleVersions
are immutable, this should be cached as aggressively as possible (ideally
it would be infinitely but we don't want to use up all available memory).
So this method uses lru_cache() to cache results in process-local memory.
(Note: This can't use BundleCache because BundleCache only associates data
with the most recent bundleversion, not a specified bundleversion)
are immutable, this should be cached as aggressively as possible.
"""
return {
link.name: link.direct for link in blockstore_api.get_bundle_version_links(bundle_uuid, bundle_version).values()
}
# Use the blockstore django cache directly; this can't use BundleCache because BundleCache only associates data
# with the most recent bundleversion, not a specified bundleversion
cache_key = 'bundle_version_direct_links:{}:{}'.format(bundle_uuid, bundle_version)
result = cache.get(cache_key)
if result is None:
result = {
link.name: link.direct
for link in blockstore_api.get_bundle_version_links(bundle_uuid, bundle_version).values()
}
cache.set(cache_key, result, timeout=None) # Cache forever since bundle versions are immutable
return result
def get_bundle_draft_direct_links_cached(bundle_uuid, draft_name):