From 39ab0f31dc475881d01c49ca783a597215316660 Mon Sep 17 00:00:00 2001 From: David Ormsbee Date: Wed, 27 May 2015 18:35:03 -0400 Subject: [PATCH 1/2] Cache SplitMongo course structures in memcached. This is primarily to reduce load on MongoDB, where we've lately had performance problems that we suspect are caused by very large course structures being evicted from MongoDB's cache. This may potentially give us a path to better performance as well, but that's not the goal of this commit. Surprisingly, LZ4 seemed to actually run more slowly than zlib for this. Possibly because of some overhead in the Python bindings? GZip was also surprisingly slow given that it uses zlib underneath (something like 5x slower). Use separate cache backend for caching structures. Abstract out course structure cache. add datadog metrics for compressed course structure sizes Since we're using a different cache background, we don't need to have a cache prefix Use dummy cache backend for tests. Fallback to default cache if course_structure_cache doesn't exist. --- cms/envs/dev.py | 5 +- cms/envs/test.py | 4 +- .../split_mongo/mongo_connection.py | 60 +++++++++++++++++-- .../tests/test_mixed_modulestore.py | 2 +- lms/envs/dev.py | 4 ++ lms/envs/test.py | 4 +- 6 files changed, 69 insertions(+), 10 deletions(-) diff --git a/cms/envs/dev.py b/cms/envs/dev.py index 216817d66c..7ba5d46121 100644 --- a/cms/envs/dev.py +++ b/cms/envs/dev.py @@ -115,7 +115,10 @@ CACHES = { 'BACKEND': 'django.core.cache.backends.locmem.LocMemCache', 'LOCATION': 'edx_location_mem_cache', }, - + 'course_structure_cache': { + 'BACKEND': 'django.core.cache.backends.locmem.LocMemCache', + 'LOCATION': 'edx_course_structure_mem_cache', + }, } # Make the keyedcache startup warnings go away diff --git a/cms/envs/test.py b/cms/envs/test.py index b637368c9f..082960a804 100644 --- a/cms/envs/test.py +++ b/cms/envs/test.py @@ -166,7 +166,9 @@ CACHES = { 'BACKEND': 'django.core.cache.backends.locmem.LocMemCache', 'LOCATION': 'edx_location_mem_cache', }, - + 'course_structure_cache': { + 'BACKEND': 'django.core.cache.backends.dummy.DummyCache', + }, } # Add external_auth to Installed apps for testing diff --git a/common/lib/xmodule/xmodule/modulestore/split_mongo/mongo_connection.py b/common/lib/xmodule/xmodule/modulestore/split_mongo/mongo_connection.py index ab7f922df5..bcee23b7f0 100644 --- a/common/lib/xmodule/xmodule/modulestore/split_mongo/mongo_connection.py +++ b/common/lib/xmodule/xmodule/modulestore/split_mongo/mongo_connection.py @@ -2,7 +2,11 @@ Segregation of pymongo functions from the data modeling mechanisms for split modulestore. """ import datetime +import cPickle as pickle import math +import re +import zlib +from mongodb_proxy import autoretry_read, MongoProxy import pymongo import pytz import re @@ -11,6 +15,8 @@ from time import time # Import this just to export it from pymongo.errors import DuplicateKeyError # pylint: disable=unused-import +from django.core.cache import get_cache, InvalidCacheBackendError +import dogstats_wrapper as dog_stats_api from contracts import check, new_contract from mongodb_proxy import autoretry_read, MongoProxy @@ -203,6 +209,40 @@ def structure_to_mongo(structure, course_context=None): return new_structure +class CourseStructureCache(object): + """ + Wrapper around django cache object to cache course structure objects. + The course structures are pickled and compressed when cached. + """ + def __init__(self): + try: + self.cache = get_cache('course_structure_cache') + except InvalidCacheBackendError: + self.cache = get_cache('default') + + def get(self, key): + """Pull the compressed, pickled struct data from cache and deserialize.""" + compressed_pickled_data = self.cache.get(key) + if compressed_pickled_data is None: + return None + return pickle.loads(zlib.decompress(compressed_pickled_data)) + + def set(self, key, structure): + """Given a structure, will pickle, compress, and write to cache.""" + pickled_data = pickle.dumps(structure, pickle.HIGHEST_PROTOCOL) + # 1 = Fastest (slightly larger results) + compressed_pickled_data = zlib.compress(pickled_data, 1) + + # record compressed course structure sizes + dog_stats_api.histogram( + 'compressed_course_structure.size', + len(compressed_pickled_data), + tags=[key] + ) + # Stuctures are immutable, so we set a timeout of "never" + self.cache.set(key, compressed_pickled_data, None) + + class MongoConnection(object): """ Segregation of pymongo functions from the data modeling mechanisms for split modulestore. @@ -256,15 +296,23 @@ class MongoConnection(object): def get_structure(self, key, course_context=None): """ - Get the structure from the persistence mechanism whose id is the given key + Get the structure from the persistence mechanism whose id is the given key. + + This method will use a cached version of the structure if it is availble. """ with TIMER.timer("get_structure", course_context) as tagger_get_structure: - with TIMER.timer("get_structure.find_one", course_context) as tagger_find_one: - doc = self.structures.find_one({'_id': key}) - tagger_find_one.measure("blocks", len(doc['blocks'])) - tagger_get_structure.measure("blocks", len(doc['blocks'])) + cache = CourseStructureCache() - return structure_from_mongo(doc, course_context) + structure = cache.get(key) + tagger_get_structure.tag(from_cache='true' if structure else 'false') + if not structure: + with TIMER.timer("get_structure.find_one", course_context) as tagger_find_one: + doc = self.structures.find_one({'_id': key}) + tagger_find_one.measure("blocks", len(doc['blocks'])) + structure = structure_from_mongo(doc, course_context) + cache.set(key, structure) + + return structure @autoretry_read() def find_structures_by_id(self, ids, course_context=None): diff --git a/common/lib/xmodule/xmodule/modulestore/tests/test_mixed_modulestore.py b/common/lib/xmodule/xmodule/modulestore/tests/test_mixed_modulestore.py index b41bf268d6..e83c9be516 100644 --- a/common/lib/xmodule/xmodule/modulestore/tests/test_mixed_modulestore.py +++ b/common/lib/xmodule/xmodule/modulestore/tests/test_mixed_modulestore.py @@ -794,7 +794,7 @@ class TestMixedModuleStore(CommonMixedModuleStoreSetup): # find: find parent (definition.children) 2x, find draft item, get inheritance items # send: one delete query for specific item # Split: - # find: active_version & structure + # find: active_version & structure (cached) # send: update structure and active_versions @ddt.data(('draft', 4, 1), ('split', 2, 2)) @ddt.unpack diff --git a/lms/envs/dev.py b/lms/envs/dev.py index b8060f230d..a0e3dee83f 100644 --- a/lms/envs/dev.py +++ b/lms/envs/dev.py @@ -94,6 +94,10 @@ CACHES = { 'BACKEND': 'django.core.cache.backends.locmem.LocMemCache', 'LOCATION': 'edx_location_mem_cache', }, + 'course_structure_cache': { + 'BACKEND': 'django.core.cache.backends.locmem.LocMemCache', + 'LOCATION': 'edx_course_structure_mem_cache', + }, } diff --git a/lms/envs/test.py b/lms/envs/test.py index ffc2013c47..9803879579 100644 --- a/lms/envs/test.py +++ b/lms/envs/test.py @@ -207,7 +207,9 @@ CACHES = { 'BACKEND': 'django.core.cache.backends.locmem.LocMemCache', 'LOCATION': 'edx_location_mem_cache', }, - + 'course_structure_cache': { + 'BACKEND': 'django.core.cache.backends.dummy.DummyCache', + }, } # Dummy secret key for dev From b1346fc7476268eeb29d9d139d5dc1cccba542e7 Mon Sep 17 00:00:00 2001 From: Adam Palay Date: Mon, 22 Jun 2015 23:14:07 -0400 Subject: [PATCH 2/2] add tests for structure cache use default cache for tests add test for when cache isn't configured add test for dummy cache noop if no cache found --- .../xmodule/xmodule/modulestore/__init__.py | 26 +++++++ .../split_mongo/mongo_connection.py | 14 +++- .../tests/test_mixed_modulestore.py | 2 +- .../tests/test_split_modulestore.py | 75 +++++++++++++++++++ 4 files changed, 113 insertions(+), 4 deletions(-) diff --git a/common/lib/xmodule/xmodule/modulestore/__init__.py b/common/lib/xmodule/xmodule/modulestore/__init__.py index c167c25422..f47c901b65 100644 --- a/common/lib/xmodule/xmodule/modulestore/__init__.py +++ b/common/lib/xmodule/xmodule/modulestore/__init__.py @@ -369,6 +369,19 @@ class EditInfo(object): source_version="UNSET" if self.source_version is None else self.source_version, ) # pylint: disable=bad-continuation + def __eq__(self, edit_info): + """ + Two EditInfo instances are equal iff their storable representations + are equal. + """ + return self.to_storable() == edit_info.to_storable() + + def __neq__(self, edit_info): + """ + Two EditInfo instances are not equal if they're not equal. + """ + return not self == edit_info + class BlockData(object): """ @@ -426,6 +439,19 @@ class BlockData(object): classname=self.__class__.__name__, ) # pylint: disable=bad-continuation + def __eq__(self, block_data): + """ + Two BlockData objects are equal iff all their attributes are equal. + """ + attrs = ['fields', 'block_type', 'definition', 'defaults', 'edit_info'] + return all(getattr(self, attr) == getattr(block_data, attr) for attr in attrs) + + def __neq__(self, block_data): + """ + Just define this as not self.__eq__(block_data) + """ + return not self == block_data + new_contract('BlockData', BlockData) diff --git a/common/lib/xmodule/xmodule/modulestore/split_mongo/mongo_connection.py b/common/lib/xmodule/xmodule/modulestore/split_mongo/mongo_connection.py index bcee23b7f0..fc179c81ba 100644 --- a/common/lib/xmodule/xmodule/modulestore/split_mongo/mongo_connection.py +++ b/common/lib/xmodule/xmodule/modulestore/split_mongo/mongo_connection.py @@ -4,9 +4,7 @@ Segregation of pymongo functions from the data modeling mechanisms for split mod import datetime import cPickle as pickle import math -import re import zlib -from mongodb_proxy import autoretry_read, MongoProxy import pymongo import pytz import re @@ -213,15 +211,22 @@ class CourseStructureCache(object): """ Wrapper around django cache object to cache course structure objects. The course structures are pickled and compressed when cached. + + If the 'course_structure_cache' doesn't exist, then don't do anything for + for set and get. """ def __init__(self): + self.no_cache_found = False try: self.cache = get_cache('course_structure_cache') except InvalidCacheBackendError: - self.cache = get_cache('default') + self.no_cache_found = True def get(self, key): """Pull the compressed, pickled struct data from cache and deserialize.""" + if self.no_cache_found: + return None + compressed_pickled_data = self.cache.get(key) if compressed_pickled_data is None: return None @@ -229,6 +234,9 @@ class CourseStructureCache(object): def set(self, key, structure): """Given a structure, will pickle, compress, and write to cache.""" + if self.no_cache_found: + return None + pickled_data = pickle.dumps(structure, pickle.HIGHEST_PROTOCOL) # 1 = Fastest (slightly larger results) compressed_pickled_data = zlib.compress(pickled_data, 1) diff --git a/common/lib/xmodule/xmodule/modulestore/tests/test_mixed_modulestore.py b/common/lib/xmodule/xmodule/modulestore/tests/test_mixed_modulestore.py index e83c9be516..b41bf268d6 100644 --- a/common/lib/xmodule/xmodule/modulestore/tests/test_mixed_modulestore.py +++ b/common/lib/xmodule/xmodule/modulestore/tests/test_mixed_modulestore.py @@ -794,7 +794,7 @@ class TestMixedModuleStore(CommonMixedModuleStoreSetup): # find: find parent (definition.children) 2x, find draft item, get inheritance items # send: one delete query for specific item # Split: - # find: active_version & structure (cached) + # find: active_version & structure # send: update structure and active_versions @ddt.data(('draft', 4, 1), ('split', 2, 2)) @ddt.unpack diff --git a/common/lib/xmodule/xmodule/modulestore/tests/test_split_modulestore.py b/common/lib/xmodule/xmodule/modulestore/tests/test_split_modulestore.py index a47655f1a7..06b5067223 100644 --- a/common/lib/xmodule/xmodule/modulestore/tests/test_split_modulestore.py +++ b/common/lib/xmodule/xmodule/modulestore/tests/test_split_modulestore.py @@ -12,6 +12,7 @@ import uuid from contracts import contract from nose.plugins.attrib import attr +from django.core.cache import get_cache, InvalidCacheBackendError from openedx.core.lib import tempdir from xblock.fields import Reference, ReferenceList, ReferenceValueDict @@ -29,6 +30,7 @@ from xmodule.fields import Date, Timedelta from xmodule.modulestore.split_mongo.split import SplitMongoModuleStore from xmodule.modulestore.tests.test_modulestore import check_has_course_method from xmodule.modulestore.split_mongo import BlockKey +from xmodule.modulestore.tests.factories import check_mongo_calls from xmodule.modulestore.tests.mongo_connection import MONGO_PORT_NUM, MONGO_HOST from xmodule.modulestore.tests.utils import mock_tab_from_json from xmodule.modulestore.edit_info import EditInfoMixin @@ -771,6 +773,79 @@ class SplitModuleCourseTests(SplitModuleTest): self.assertEqual(result.children[0].children[0].locator.version_guid, versions[0]) +class TestCourseStructureCache(SplitModuleTest): + """Tests for the CourseStructureCache""" + + def setUp(self): + # use the default cache, since the `course_structure_cache` + # is a dummy cache during testing + self.cache = get_cache('default') + + # make sure we clear the cache before every test... + self.cache.clear() + # ... and after + self.addCleanup(self.cache.clear) + + # make a new course: + self.user = random.getrandbits(32) + self.new_course = modulestore().create_course( + 'org', 'course', 'test_run', self.user, BRANCH_NAME_DRAFT, + ) + + super(TestCourseStructureCache, self).setUp() + + @patch('xmodule.modulestore.split_mongo.mongo_connection.get_cache') + def test_course_structure_cache(self, mock_get_cache): + # force get_cache to return the default cache so we can test + # its caching behavior + mock_get_cache.return_value = self.cache + + with check_mongo_calls(1): + not_cached_structure = self._get_structure(self.new_course) + + # when cache is warmed, we should have one fewer mongo call + with check_mongo_calls(0): + cached_structure = self._get_structure(self.new_course) + + # now make sure that you get the same structure + self.assertEqual(cached_structure, not_cached_structure) + + @patch('xmodule.modulestore.split_mongo.mongo_connection.get_cache') + def test_course_structure_cache_no_cache_configured(self, mock_get_cache): + mock_get_cache.side_effect = InvalidCacheBackendError + + with check_mongo_calls(1): + not_cached_structure = self._get_structure(self.new_course) + + # if the cache isn't configured, we expect to have to make + # another mongo call here if we want the same course structure + with check_mongo_calls(1): + cached_structure = self._get_structure(self.new_course) + + # now make sure that you get the same structure + self.assertEqual(cached_structure, not_cached_structure) + + def test_dummy_cache(self): + with check_mongo_calls(1): + not_cached_structure = self._get_structure(self.new_course) + + # Since the test is using the dummy cache, it's not actually caching + # anything + with check_mongo_calls(1): + cached_structure = self._get_structure(self.new_course) + + # now make sure that you get the same structure + self.assertEqual(cached_structure, not_cached_structure) + + def _get_structure(self, course): + """ + Helper function to get a structure from a course. + """ + return modulestore().db_connection.get_structure( + course.location.as_object_id(course.location.version_guid) + ) + + class SplitModuleItemTests(SplitModuleTest): ''' Item read tests including inheritance