From 684f254a7764f5bdc8de122232483a75c81ab7ac Mon Sep 17 00:00:00 2001 From: Feanil Patel Date: Thu, 5 Dec 2019 14:37:18 -0500 Subject: [PATCH 1/4] Add error handling in course structure cache. When going between python 2 and python 3.5 we can get pickeld course structires that are incompatible no matter what we do do to the bug linked in the comment. In this case, handle the error and delete the corrupt data from the cache. Making this fairly generic because if we have any bad data in cache we don't want it to blow up the whole process. Just delete the bad data and try again. --- .../split_mongo/mongo_connection.py | 36 +++++++++++-------- .../tests/test_split_modulestore.py | 7 ++++ 2 files changed, 28 insertions(+), 15 deletions(-) diff --git a/common/lib/xmodule/xmodule/modulestore/split_mongo/mongo_connection.py b/common/lib/xmodule/xmodule/modulestore/split_mongo/mongo_connection.py index 928566e179..cdfd854d09 100644 --- a/common/lib/xmodule/xmodule/modulestore/split_mongo/mongo_connection.py +++ b/common/lib/xmodule/xmodule/modulestore/split_mongo/mongo_connection.py @@ -226,24 +226,30 @@ class CourseStructureCache(object): return None with TIMER.timer("CourseStructureCache.get", course_context) as tagger: - compressed_pickled_data = self.cache.get(key) - tagger.tag(from_cache=str(compressed_pickled_data is not None).lower()) + try: + compressed_pickled_data = self.cache.get(key) + tagger.tag(from_cache=str(compressed_pickled_data is not None).lower()) - if compressed_pickled_data is None: - # Always log cache misses, because they are unexpected - tagger.sample_rate = 1 + if compressed_pickled_data is None: + # Always log cache misses, because they are unexpected + tagger.sample_rate = 1 + return None + + tagger.measure('compressed_size', len(compressed_pickled_data)) + + pickled_data = zlib.decompress(compressed_pickled_data) + tagger.measure('uncompressed_size', len(pickled_data)) + + if six.PY2: + return pickle.loads(pickled_data) + else: + return pickle.loads(pickled_data, encoding='latin-1') + except Exception: + # The cached data is corrupt in some way, get rid of it. + log.warning("CourseStructureCache: Bad data in cache for %s", course_context) + self.cache.delete(key) return None - tagger.measure('compressed_size', len(compressed_pickled_data)) - - pickled_data = zlib.decompress(compressed_pickled_data) - tagger.measure('uncompressed_size', len(pickled_data)) - - if six.PY2: - return pickle.loads(pickled_data) - else: - return pickle.loads(pickled_data, encoding='latin-1') - def set(self, key, structure, course_context=None): """Given a structure, will pickle, compress, and write to cache.""" if self.cache is None: diff --git a/common/lib/xmodule/xmodule/modulestore/tests/test_split_modulestore.py b/common/lib/xmodule/xmodule/modulestore/tests/test_split_modulestore.py index 6b72e27e81..d8de53452d 100644 --- a/common/lib/xmodule/xmodule/modulestore/tests/test_split_modulestore.py +++ b/common/lib/xmodule/xmodule/modulestore/tests/test_split_modulestore.py @@ -977,6 +977,13 @@ class TestCourseStructureCache(SplitModuleTest): # now make sure that you get the same structure self.assertEqual(cached_structure, not_cached_structure) + # If data is corrupted, get it from mongo again. + cache_key = self.new_course.id.version_guid + self.cache.set(cache_key, b"bad_data") + with check_mongo_calls(1): + not_cached_structure = self._get_structure(self.new_course) + + @patch('xmodule.modulestore.split_mongo.mongo_connection.get_cache') def test_course_structure_cache_no_cache_configured(self, mock_get_cache): mock_get_cache.side_effect = InvalidCacheBackendError From 48e9724808fa7683518ccfd5d8cf89522e6bf5da Mon Sep 17 00:00:00 2001 From: Feanil Patel Date: Thu, 5 Dec 2019 17:03:26 -0500 Subject: [PATCH 2/4] Handle other places where we might be unpickling things. We use pickling in conjunction with caches in these cases so we should be able to fallback to the uncached behavior if the pickled data is corrupt in some way. eg. it's a python 2 pickle we don't know how to read. --- .../djangoapps/content/block_structure/store.py | 10 +++++++++- openedx/core/lib/edx_api_utils.py | 13 +++++++++---- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/openedx/core/djangoapps/content/block_structure/store.py b/openedx/core/djangoapps/content/block_structure/store.py index c6568f031f..aa4d90d481 100644 --- a/openedx/core/djangoapps/content/block_structure/store.py +++ b/openedx/core/djangoapps/content/block_structure/store.py @@ -207,7 +207,15 @@ class BlockStructureStore(object): """ Deserializes the given data and returns the parsed block_structure. """ - block_relations, transformer_data, block_data_map = zunpickle(serialized_data) + + try: + block_relations, transformer_data, block_data_map = zunpickle(serialized_data) + except Exception: + # Somehow failed to de-serialized the data, assume it's corrupt. + bs_model = self._get_model(root_block_usage_key) + logger.warning("BlockStructure: Failed to load data from cache for %s", bs_model) + raise BlockStructureNotFound(bs_model.data_usage_key) + return BlockStructureFactory.create_new( root_block_usage_key, block_relations, diff --git a/openedx/core/lib/edx_api_utils.py b/openedx/core/lib/edx_api_utils.py index 0f1fab26a1..e927dbaf5d 100644 --- a/openedx/core/lib/edx_api_utils.py +++ b/openedx/core/lib/edx_api_utils.py @@ -56,11 +56,16 @@ def get_edx_api_data(api_config, resource, api, resource_id=None, querystring=No cached = cache.get(cache_key) if cached: - cached_response = zunpickle(cached) - if fields: - cached_response = get_fields(fields, cached_response) + try: + cached_response = zunpickle(cached) + if fields: + cached_response = get_fields(fields, cached_response) - return cached_response + return cached_response + except Exception: + # Data is corrupt in some way. + log.warning("Data for cache is corrupt for cache key %s", cache_key) + cache.delete(cache_key) try: endpoint = getattr(api, resource) From bf482f498977171842a5b08b99af0c51644dac56 Mon Sep 17 00:00:00 2001 From: Feanil Patel Date: Fri, 6 Dec 2019 13:38:58 -0500 Subject: [PATCH 3/4] Respond to code review. --- .../xmodule/modulestore/tests/test_split_modulestore.py | 4 +++- openedx/core/djangoapps/content/block_structure/store.py | 2 +- openedx/core/lib/edx_api_utils.py | 9 +++++---- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/common/lib/xmodule/xmodule/modulestore/tests/test_split_modulestore.py b/common/lib/xmodule/xmodule/modulestore/tests/test_split_modulestore.py index d8de53452d..c12915a620 100644 --- a/common/lib/xmodule/xmodule/modulestore/tests/test_split_modulestore.py +++ b/common/lib/xmodule/xmodule/modulestore/tests/test_split_modulestore.py @@ -981,8 +981,10 @@ class TestCourseStructureCache(SplitModuleTest): cache_key = self.new_course.id.version_guid self.cache.set(cache_key, b"bad_data") with check_mongo_calls(1): - not_cached_structure = self._get_structure(self.new_course) + not_corrupt_structure = self._get_structure(self.new_course) + # now make sure that you get the same structure + self.assertEqual(not_corrupt_structure, not_cached_structure) @patch('xmodule.modulestore.split_mongo.mongo_connection.get_cache') def test_course_structure_cache_no_cache_configured(self, mock_get_cache): diff --git a/openedx/core/djangoapps/content/block_structure/store.py b/openedx/core/djangoapps/content/block_structure/store.py index aa4d90d481..43de5cc478 100644 --- a/openedx/core/djangoapps/content/block_structure/store.py +++ b/openedx/core/djangoapps/content/block_structure/store.py @@ -213,7 +213,7 @@ class BlockStructureStore(object): except Exception: # Somehow failed to de-serialized the data, assume it's corrupt. bs_model = self._get_model(root_block_usage_key) - logger.warning("BlockStructure: Failed to load data from cache for %s", bs_model) + logger.exception("BlockStructure: Failed to load data from cache for %s", bs_model) raise BlockStructureNotFound(bs_model.data_usage_key) return BlockStructureFactory.create_new( diff --git a/openedx/core/lib/edx_api_utils.py b/openedx/core/lib/edx_api_utils.py index e927dbaf5d..3c9ca66f42 100644 --- a/openedx/core/lib/edx_api_utils.py +++ b/openedx/core/lib/edx_api_utils.py @@ -58,14 +58,15 @@ def get_edx_api_data(api_config, resource, api, resource_id=None, querystring=No if cached: try: cached_response = zunpickle(cached) - if fields: - cached_response = get_fields(fields, cached_response) - - return cached_response except Exception: # Data is corrupt in some way. log.warning("Data for cache is corrupt for cache key %s", cache_key) cache.delete(cache_key) + else: + if fields: + cached_response = get_fields(fields, cached_response) + + return cached_response try: endpoint = getattr(api, resource) From f485ff450d9f7629ddd7ae2cb4d1dafd5e11772f Mon Sep 17 00:00:00 2001 From: Feanil Patel Date: Mon, 9 Dec 2019 11:03:22 -0500 Subject: [PATCH 4/4] Deal with pylint. --- openedx/core/djangoapps/content/block_structure/store.py | 2 +- openedx/core/lib/edx_api_utils.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/openedx/core/djangoapps/content/block_structure/store.py b/openedx/core/djangoapps/content/block_structure/store.py index 43de5cc478..b35b92f61b 100644 --- a/openedx/core/djangoapps/content/block_structure/store.py +++ b/openedx/core/djangoapps/content/block_structure/store.py @@ -213,7 +213,7 @@ class BlockStructureStore(object): except Exception: # Somehow failed to de-serialized the data, assume it's corrupt. bs_model = self._get_model(root_block_usage_key) - logger.exception("BlockStructure: Failed to load data from cache for %s", bs_model) + logger.exception(u"BlockStructure: Failed to load data from cache for %s", bs_model) raise BlockStructureNotFound(bs_model.data_usage_key) return BlockStructureFactory.create_new( diff --git a/openedx/core/lib/edx_api_utils.py b/openedx/core/lib/edx_api_utils.py index 3c9ca66f42..237c7b5ac5 100644 --- a/openedx/core/lib/edx_api_utils.py +++ b/openedx/core/lib/edx_api_utils.py @@ -58,7 +58,7 @@ def get_edx_api_data(api_config, resource, api, resource_id=None, querystring=No if cached: try: cached_response = zunpickle(cached) - except Exception: + except Exception: # pylint: disable=broad-except # Data is corrupt in some way. log.warning("Data for cache is corrupt for cache key %s", cache_key) cache.delete(cache_key)