From 4ddb02eff835689e5ff563edfe779b7d7257f12d Mon Sep 17 00:00:00 2001 From: Braden MacDonald Date: Tue, 25 Feb 2020 13:27:35 -0800 Subject: [PATCH 1/2] Fix: transient S3 errors were not being reported properly Also improve usefulness of some blockstore runtime logs for debugging Context: Sometimes when trying to load an XBlock's XML file from Amazon S3, AWS will return a 4xx or 5xx response along with error XML like: NoSuchKeyThe specified key does not exist.foo/bar... A bug in the get_bundle_file_data_with_cache method would cause this XML to be returned to the runtime anyways, as if it were the expected OLX. This would then (obviously) lead to strange parsing bugs, e.g. when trying to interpret as an . This fixes the bug and improves the logging, both to make this sort of issue easier to debug in the future and to return whatever detailed error code S3 provides (or Blockstore, if S3 is not being used). --- .../xblock/runtime/blockstore_field_data.py | 4 +++- .../djangoapps/xblock/runtime/blockstore_runtime.py | 9 +++++++-- .../core/djangoapps/xblock/runtime/olx_parsing.py | 3 +++ openedx/core/djangolib/blockstore_cache.py | 12 ++++++++++-- openedx/core/lib/blockstore_api/__init__.py | 1 + openedx/core/lib/blockstore_api/exceptions.py | 4 ++++ 6 files changed, 28 insertions(+), 5 deletions(-) diff --git a/openedx/core/djangoapps/xblock/runtime/blockstore_field_data.py b/openedx/core/djangoapps/xblock/runtime/blockstore_field_data.py index d849bfcfb9..517e93b3e2 100644 --- a/openedx/core/djangoapps/xblock/runtime/blockstore_field_data.py +++ b/openedx/core/djangoapps/xblock/runtime/blockstore_field_data.py @@ -180,7 +180,9 @@ class BlockstoreFieldData(FieldData): # Otherwise, this is an anomalous get() before the XML was fully loaded: # This could happen if an XBlock's parse_xml() method tried to read a field before setting it, # if an XBlock read field data in its constructor (forbidden), or if an XBlock was loaded via - # some means other than runtime.get_block() + # some means other than runtime.get_block(). One way this can happen is if you log/print an XBlock during + # XML parsing, because ScopedStorageMixin.__repr__ will try to print all field values, and any fields which + # aren't mentioned in the XML (which are left at their default) will be "not loaded yet." log.exception( "XBlock %s tried to read from field data (%s) that wasn't loaded from Blockstore!", block.scope_ids.usage_id, name, diff --git a/openedx/core/djangoapps/xblock/runtime/blockstore_runtime.py b/openedx/core/djangoapps/xblock/runtime/blockstore_runtime.py index 8b3558bced..2f474ebdd2 100644 --- a/openedx/core/djangoapps/xblock/runtime/blockstore_runtime.py +++ b/openedx/core/djangoapps/xblock/runtime/blockstore_runtime.py @@ -13,7 +13,7 @@ from xblock.fields import ScopeIds from openedx.core.djangoapps.xblock.learning_context.manager import get_learning_context_impl from openedx.core.djangoapps.xblock.runtime.runtime import XBlockRuntime -from openedx.core.djangoapps.xblock.runtime.olx_parsing import parse_xblock_include +from openedx.core.djangoapps.xblock.runtime.olx_parsing import parse_xblock_include, BundleFormatException from openedx.core.djangoapps.xblock.runtime.serializer import serialize_xblock from openedx.core.djangolib.blockstore_cache import ( BundleCache, @@ -85,7 +85,12 @@ class BlockstoreXBlockRuntime(XBlockRuntime): This runtime API should normally be used via runtime.get_block() -> block.parse_xml() -> runtime.add_node_as_child """ - parsed_include = parse_xblock_include(node) + try: + parsed_include = parse_xblock_include(node) + except BundleFormatException: + # We need to log the XBlock ID or this will be hard to debug + log.error("BundleFormatException when parsing XBlock %s", block.scope_ids.usage_id) + raise # Also log details and stack trace self.add_child_include(block, parsed_include) def add_child_include(self, block, parsed_include): diff --git a/openedx/core/djangoapps/xblock/runtime/olx_parsing.py b/openedx/core/djangoapps/xblock/runtime/olx_parsing.py index e4a27e996b..9318cf415a 100644 --- a/openedx/core/djangoapps/xblock/runtime/olx_parsing.py +++ b/openedx/core/djangoapps/xblock/runtime/olx_parsing.py @@ -27,6 +27,9 @@ def parse_xblock_include(include_node): # An XBlock include looks like: # # Where "source" and "usage" are optional. + if include_node.tag != 'xblock-include': + # xss-lint: disable=python-wrap-html + raise BundleFormatException("Expected an XML node, but got <{}>".format(include_node.tag)) try: definition_path = include_node.attrib['definition'] except KeyError: diff --git a/openedx/core/djangolib/blockstore_cache.py b/openedx/core/djangolib/blockstore_cache.py index c1d12d99c3..040bc6a127 100644 --- a/openedx/core/djangolib/blockstore_cache.py +++ b/openedx/core/djangolib/blockstore_cache.py @@ -202,8 +202,16 @@ def get_bundle_file_data_with_cache(bundle_uuid, path, bundle_version=None, draf cached list of files in each bundle if available. """ file_info = get_bundle_file_metadata_with_cache(bundle_uuid, path, bundle_version, draft_name) - with requests.get(file_info.url, stream=True) as r: - return r.content + response = requests.get(file_info.url) + if response.status_code != 200: + try: + error_response = response.content.decode('utf-8')[:500] + except UnicodeDecodeError: + error_response = '(error details unavailable - response was not a [unicode] string)' + raise blockstore_api.BundleStorageError( + "Unexpected error trying to read {} from bundle {}: \n{}".format(path, bundle_uuid, error_response) + ) + return response.content @lru_cache(maxsize=200) diff --git a/openedx/core/lib/blockstore_api/__init__.py b/openedx/core/lib/blockstore_api/__init__.py index 288aeada6c..8b6de205c6 100644 --- a/openedx/core/lib/blockstore_api/__init__.py +++ b/openedx/core/lib/blockstore_api/__init__.py @@ -51,4 +51,5 @@ from .exceptions import ( BundleNotFound, DraftNotFound, BundleFileNotFound, + BundleStorageError, ) diff --git a/openedx/core/lib/blockstore_api/exceptions.py b/openedx/core/lib/blockstore_api/exceptions.py index 06e6de93c6..b58251d31f 100644 --- a/openedx/core/lib/blockstore_api/exceptions.py +++ b/openedx/core/lib/blockstore_api/exceptions.py @@ -25,3 +25,7 @@ class DraftNotFound(NotFound): class BundleFileNotFound(NotFound): pass + + +class BundleStorageError(BlockstoreException): + pass From 8b9594e7dd426f16805ce897729381fe3d5e78f5 Mon Sep 17 00:00:00 2001 From: Braden MacDonald Date: Fri, 28 Feb 2020 10:41:28 -0800 Subject: [PATCH 2/2] Improve caching of blockstore data This includes an optimization to the get_bundle_version_files_cached method, which is used very often when loading blockstore data; it was previously being cached only in a process-local cache (lru_cache). My hunch is that in production, with many appservers and LMS workers and frequent deployments and a large number of bundles, the process-local cache is not being hit very often. I also increased the MAX_BLOCKSTORE_CACHE_DELAY from 60s to 300s; this reduces the frequency with which we check if either (A) an external system modified the blockstore bundle and/or (B) we have a cache invalidation bug somewhere. I am increasing it because that check is more expensive than I thought (calling blockstore API to ascertain latest version of a particular bundle), and I haven't seen any cache invalidation errors that this would help to work around. (Plus, increasing this will make such bugs more obvious.) --- openedx/core/djangolib/blockstore_cache.py | 42 +++++++++++----------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/openedx/core/djangolib/blockstore_cache.py b/openedx/core/djangolib/blockstore_cache.py index 040bc6a127..c7b94f2061 100644 --- a/openedx/core/djangolib/blockstore_cache.py +++ b/openedx/core/djangolib/blockstore_cache.py @@ -13,7 +13,6 @@ from datetime import datetime from uuid import UUID from django.core.cache import caches, InvalidCacheBackendError -from django.utils.lru_cache import lru_cache from pytz import UTC import requests @@ -33,7 +32,7 @@ except InvalidCacheBackendError: # (Note that we do usually explicitly invalidate this cache during write # operations though, so this setting mostly affects actions by external systems # on Blockstore or bugs where we left out the cache invalidation step.) -MAX_BLOCKSTORE_CACHE_DELAY = 60 +MAX_BLOCKSTORE_CACHE_DELAY = 60 * 5 class BundleCache(object): @@ -144,18 +143,19 @@ def get_bundle_version_number(bundle_uuid, draft_name=None): return version -@lru_cache(maxsize=200) def get_bundle_version_files_cached(bundle_uuid, bundle_version): """ Get the files in the specified BundleVersion. Since BundleVersions are - immutable, this should be cached as aggressively as possible (ideally - it would be infinitely but we don't want to use up all available memory). - So this method uses lru_cache() to cache results in process-local memory. - - (Note: This can't use BundleCache because BundleCache only associates data - with the most recent bundleversion, not a specified bundleversion) + immutable, this should be cached as aggressively as possible. """ - return blockstore_api.get_bundle_version_files(bundle_uuid, bundle_version) + # Use the blockstore django cache directly; this can't use BundleCache because BundleCache only associates data + # with the most recent bundleversion, not a specified bundleversion + cache_key = 'bundle_version_files:{}:{}'.format(bundle_uuid, bundle_version) + result = cache.get(cache_key) + if result is None: + result = blockstore_api.get_bundle_version_files(bundle_uuid, bundle_version) + cache.set(cache_key, result, timeout=None) # Cache forever since bundle versions are immutable + return result def get_bundle_draft_files_cached(bundle_uuid, draft_name): @@ -214,20 +214,22 @@ def get_bundle_file_data_with_cache(bundle_uuid, path, bundle_version=None, draf return response.content -@lru_cache(maxsize=200) def get_bundle_version_direct_links_cached(bundle_uuid, bundle_version): """ Get the direct links in the specified BundleVersion. Since BundleVersions - are immutable, this should be cached as aggressively as possible (ideally - it would be infinitely but we don't want to use up all available memory). - So this method uses lru_cache() to cache results in process-local memory. - - (Note: This can't use BundleCache because BundleCache only associates data - with the most recent bundleversion, not a specified bundleversion) + are immutable, this should be cached as aggressively as possible. """ - return { - link.name: link.direct for link in blockstore_api.get_bundle_version_links(bundle_uuid, bundle_version).values() - } + # Use the blockstore django cache directly; this can't use BundleCache because BundleCache only associates data + # with the most recent bundleversion, not a specified bundleversion + cache_key = 'bundle_version_direct_links:{}:{}'.format(bundle_uuid, bundle_version) + result = cache.get(cache_key) + if result is None: + result = { + link.name: link.direct + for link in blockstore_api.get_bundle_version_links(bundle_uuid, bundle_version).values() + } + cache.set(cache_key, result, timeout=None) # Cache forever since bundle versions are immutable + return result def get_bundle_draft_direct_links_cached(bundle_uuid, draft_name):