From c5b0115be0051238fdbca584fbc93091dd272a3a Mon Sep 17 00:00:00 2001 From: David Ormsbee Date: Thu, 23 Feb 2012 13:28:00 -0500 Subject: [PATCH] add User object caching --- cache_toolbox/COPYING | 24 ++++ cache_toolbox/README.rst | 4 + cache_toolbox/__init__.py | 29 +++++ cache_toolbox/app_settings.py | 8 ++ cache_toolbox/core.py | 101 +++++++++++++++++ cache_toolbox/middleware.py | 97 ++++++++++++++++ cache_toolbox/model.py | 78 +++++++++++++ cache_toolbox/relation.py | 118 ++++++++++++++++++++ cache_toolbox/templatetags/__init__.py | 0 cache_toolbox/templatetags/cache_toolbox.py | 69 ++++++++++++ settings.py | 7 +- 11 files changed, 533 insertions(+), 2 deletions(-) create mode 100644 cache_toolbox/COPYING create mode 100644 cache_toolbox/README.rst create mode 100644 cache_toolbox/__init__.py create mode 100644 cache_toolbox/app_settings.py create mode 100644 cache_toolbox/core.py create mode 100644 cache_toolbox/middleware.py create mode 100644 cache_toolbox/model.py create mode 100644 cache_toolbox/relation.py create mode 100644 cache_toolbox/templatetags/__init__.py create mode 100644 cache_toolbox/templatetags/cache_toolbox.py diff --git a/cache_toolbox/COPYING b/cache_toolbox/COPYING new file mode 100644 index 0000000000..ae625588aa --- /dev/null +++ b/cache_toolbox/COPYING @@ -0,0 +1,24 @@ +Copyright © 2010, 2011 UUMC Ltd. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/cache_toolbox/README.rst b/cache_toolbox/README.rst new file mode 100644 index 0000000000..cf8b23e638 --- /dev/null +++ b/cache_toolbox/README.rst @@ -0,0 +1,4 @@ +django-cache-toolbox +============================ + +Documentation: http://code.playfire.com/django-cache-toolbox/ diff --git a/cache_toolbox/__init__.py b/cache_toolbox/__init__.py new file mode 100644 index 0000000000..e6609567f9 --- /dev/null +++ b/cache_toolbox/__init__.py @@ -0,0 +1,29 @@ +""" +:mod:`cache_toolbox` --- Non-magical object caching tools for Django +==================================================================== + +Introduction +------------ + +``cache_toolbox`` is intended to be a lightweight series of independent tools +to leverage caching within Django projects. + +The tools are deliberately `non-magical`. That is to say, instances are never +retrieved from caches behind your back and regular Django ``.filter()`` / +``.get()`` queries continue to work exactly as before. + +Because of this, you can introduce ``cache_toolbox`` into your project slowly +when needed rather than "switching" to it with invasive changes. + +Links +----- + +View/download code + https://github.com/playfire/django-cache-toolbox + +File a bug + https://github.com/playfire/django-cache-toolbox/issues +""" + +from .model import cache_model +from .relation import cache_relation diff --git a/cache_toolbox/app_settings.py b/cache_toolbox/app_settings.py new file mode 100644 index 0000000000..16e8c36bfa --- /dev/null +++ b/cache_toolbox/app_settings.py @@ -0,0 +1,8 @@ +from django.conf import settings + +# Default cache timeout +CACHE_TOOLBOX_DEFAULT_TIMEOUT = getattr( + settings, + 'CACHE_TOOLBOX_DEFAULT_TIMEOUT', + 60 * 60 * 24 * 3, +) diff --git a/cache_toolbox/core.py b/cache_toolbox/core.py new file mode 100644 index 0000000000..8abc16c7be --- /dev/null +++ b/cache_toolbox/core.py @@ -0,0 +1,101 @@ +""" +Core methods +------------ + +.. autofunction:: cache_toolbox.core.get_instance +.. autofunction:: cache_toolbox.core.delete_instance +.. autofunction:: cache_toolbox.core.instance_key + +""" + +from django.core.cache import cache +from django.db import DEFAULT_DB_ALIAS + +from . import app_settings + +def get_instance(model, instance_or_pk, timeout=None, using=None): + """ + Returns the ``model`` instance with a primary key of ``instance_or_pk``. + + If the data is cached it will be returned from there, otherwise the regular + Django ORM is queried for this instance and the data stored in the cache. + + If omitted, the timeout value defaults to + ``settings.CACHE_TOOLBOX_DEFAULT_TIMEOUT`` instead of 0 (zero). + + Example:: + + >>> get_instance(User, 1) # Cache miss + + >>> get_instance(User, 1) # Cache hit + + >>> User.objects.get(pk=1) == get_instance(User, 1) + True + """ + + pk = getattr(instance_or_pk, 'pk', instance_or_pk) + key = instance_key(model, instance_or_pk) + data = cache.get(key) + + if data is not None: + try: + # Try and construct instance from dictionary + instance = model(pk=pk, **data) + + # Ensure instance knows that it already exists in the database, + # otherwise we will fail any uniqueness checks when saving the + # instance. + instance._state.adding = False + + # Specify database so that instance is setup correctly. We don't + # namespace cached objects by their origin database, however. + instance._state.db = using or DEFAULT_DB_ALIAS + + return instance + except: + # Error when deserialising - remove from the cache; we will + # fallback and return the underlying instance + cache.delete(key) + + # Use the default manager so we are never filtered by a .get_query_set() + instance = model._default_manager.using(using).get(pk=pk) + + data = {} + for field in instance._meta.fields: + # Harmless to save, but saves space in the dictionary - we already know + # the primary key when we lookup + if field.primary_key: + continue + + if field.get_internal_type() == 'FileField': + # Avoid problems with serializing FileFields + # by only serializing the file name + file = getattr(instance, field.attname) + data[field.attname] = file.name + else: + data[field.attname] = getattr(instance, field.attname) + + if timeout is None: + timeout = app_settings.CACHE_TOOLBOX_DEFAULT_TIMEOUT + + cache.set(key, data, timeout) + + return instance + +def delete_instance(model, *instance_or_pk): + """ + Purges the cache keys for the instances of this model. + """ + + cache.delete_many([instance_key(model, x) for x in instance_or_pk]) + +def instance_key(model, instance_or_pk): + """ + Returns the cache key for this (model, instance) pair. + """ + + return '%s.%s:%d' % ( + model._meta.app_label, + model._meta.module_name, + getattr(instance_or_pk, 'pk', instance_or_pk), + ) diff --git a/cache_toolbox/middleware.py b/cache_toolbox/middleware.py new file mode 100644 index 0000000000..97f0bdb2af --- /dev/null +++ b/cache_toolbox/middleware.py @@ -0,0 +1,97 @@ +""" +Cache-backed ``AuthenticationMiddleware`` +----------------------------------------- + +``CacheBackedAuthenticationMiddleware`` is an +``django.contrib.auth.middleware.AuthenticationMiddleware`` replacement to +avoid querying the database for a ``User`` instance in each request. + +Whilst the built-in ``AuthenticationMiddleware`` mechanism will only obtain the +``User`` instance when it is required, the vast majority of sites will do so on +every page to render "Logged in as 'X'" text as well to evaluate the result of +``user.is_authenticated()`` and ``user.is_superuser`` to provide conditional +functionality. + +This middleware eliminates the cost of retrieving this ``User`` instance by +caching it using the ``cache_toolbox`` instance caching mechanisms. + +Depending on your average number of queries per page, saving one query per +request can---in aggregate---reduce load on your database. In addition, +avoiding the database entirely for pages can avoid incurring any connection +latency in your environment, resulting in faster page loads for your users. + +Saving this data in the cache can also be used as a way of authenticating users +in systems outside of Django that should not access your database. For +example, a "maintenance mode" page would be able to render a personalised +message without touching the database at all but rather authenticating via the +cache. + +``CacheBackedAuthenticationMiddleware`` is ``AUTHENTICATION_BACKENDS`` agnostic. + +Implementation +~~~~~~~~~~~~~~ + +The cache and session backends are still accessed on each request - we are +simply assuming that they are cheaper (or otherwise more preferable) to access +than your database. (In the future, signed cookies may allow us to avoid this +lookup altogether -- whilst we could not safely save ``User.password`` in a +cookie, we could use delayed loading to pull it out when needed.) + +Another alternative solution would be to store the attributes in the user's +session instead of in the cache. This would save the cache hit on every request +as all the relevant data would be pulled in one go from the session backend. +However, this has two main disadvantages: + + * Session keys are not deterministic -- after making changes to an + ``auth_user`` row in the database, you cannot determine the user's session + key to flush the now out-of-sync data (and doing so would log them out + anyway). + + * Stores data per-session rather than per-user -- if a user logs in from + multiple computers the data is duplicated in each session. This problem is + compounded by most projects wishing to avoid expiring session data as long + as possible (in addition to storing sessions in persistent stores). + +Usage +~~~~~ + +To use, find ``MIDDLEWARE_CLASSES`` in your ``settings.py`` and replace:: + + MIDDLEWARE_CLASSES = [ + ... + 'django.contrib.auth.middleware.AuthenticationMiddleware', + ... + ] + +with:: + + MIDDLEWARE_CLASSES = [ + ... + 'cache_toolbox.middleware.CacheBackedAuthenticationMiddleware', + ... + ] + +You should confirm you are using a ``SESSION_ENGINE`` that doesn't query the +database for each request. The built-in ``cached_db`` engine is the safest +choice for most environments but you may be happy with the trade-offs of the +``memcached`` backend - see the Django documentation for more details. + +""" + +from django.contrib.auth import SESSION_KEY +from django.contrib.auth.models import User +from django.contrib.auth.middleware import AuthenticationMiddleware + +from .model import cache_model + +class CacheBackedAuthenticationMiddleware(AuthenticationMiddleware): + def __init__(self): + cache_model(User) + + def process_request(self, request): + try: + # Try and construct a User instance from data stored in the cache + request.user = User.get_cached(request.session[SESSION_KEY]) + except: + # Fallback to constructing the User from the database. + super(CacheBackedAuthenticationMiddleware, self).process_request(request) diff --git a/cache_toolbox/model.py b/cache_toolbox/model.py new file mode 100644 index 0000000000..8ac8f0d249 --- /dev/null +++ b/cache_toolbox/model.py @@ -0,0 +1,78 @@ +""" +Caching model instances +----------------------- + +``cache_model`` adds utility methods to a model to obtain ``ForeignKey`` +instances via the cache. + +Usage +~~~~~ + +:: + + from django.db import models + from django.contrib.auth.models import User + + class Foo(models.Model): + name = models.CharField(length=20) + + cache_model(Foo) + +:: + + >>> a = Foo.objects.create(name='a') + >>> a + + >>> Foo.get_cached(a.pk) # Cache miss + + >>> a = Foo.get_cached(a.pk) # Cache hit + >>> a.name + u'a' + +Instances returned from ``get_cached`` are real model instances:: + + >>> a = Foo.get_cached(a.pk) # Cache hit + >>> type(a) + + >>> a.pk + 1L + +Invalidation +~~~~~~~~~~~~ + +Invalidation is performed automatically upon saving or deleting a ``Foo`` +instance:: + + >>> a = Foo.objects.create(name='a') + >>> a.name = 'b' + >>> a.save() + >>> a = Foo.get_cached(a.pk) + >>> a.name + u'b' + >>> a.delete() + >>> a = Foo.get_cached(a.pk) + ... Foo.DoesNotExist +""" + +from django.db.models.signals import post_save, post_delete + +from .core import get_instance, delete_instance + +def cache_model(model, timeout=None): + if hasattr(model, 'get_cached'): + # Already patched + return + + def clear_cache(sender, instance, *args, **kwargs): + delete_instance(sender, instance) + + post_save.connect(clear_cache, sender=model, weak=False) + post_delete.connect(clear_cache, sender=model, weak=False) + + @classmethod + def get(cls, pk, using=None): + if pk is None: + return None + return get_instance(cls, pk, timeout, using) + + model.get_cached = get diff --git a/cache_toolbox/relation.py b/cache_toolbox/relation.py new file mode 100644 index 0000000000..eae7d93f6f --- /dev/null +++ b/cache_toolbox/relation.py @@ -0,0 +1,118 @@ +""" +Caching instances via ``related_name`` +-------------------------------------- + +``cache_relation`` adds utility methods to a model to obtain ``related_name`` +instances via the cache. + +Usage +~~~~~ + +:: + + from django.db import models + from django.contrib.auth.models import User + + class Foo(models.Model): + user = models.OneToOneField( + User, + primary_key=True, + related_name='foo', + ) + + name = models.CharField(max_length=20) + + cache_relation(User.foo) + +:: + + >>> user = User.objects.get(pk=1) + >>> user.foo_cache # Cache miss - hits the database + + >>> user = User.objects.get(pk=1) + >>> user.foo_cache # Cache hit - no database access + + >>> user = User.objects.get(pk=2) + >>> user.foo # Regular lookup - hits the database + + >>> user.foo_cache # Special-case: Will not hit cache or database. + + +Accessing ``user_instance.foo_cache`` (note the "_cache" suffix) will now +obtain the related ``Foo`` instance via the cache. Accessing the original +``user_instance.foo`` attribute will perform the lookup as normal. + +Invalidation +~~~~~~~~~~~~ + +Upon saving (or deleting) the instance, the cache is cleared. For example:: + + >>> user = User.objects.get(pk=1) + >>> foo = user.foo_cache # (Assume cache hit from previous session) + >>> foo.name = "New name" + >>> foo.save() # Cache is cleared on save + >>> user = User.objects.get(pk=1) + >>> user.foo_cache # Cache miss. + + +Manual invalidation may also be performed using the following methods:: + + >>> user_instance.foo_cache_clear() + >>> User.foo_cache_clear_fk(user_instance_pk) + +Manual invalidation is required if you use ``.update()`` methods which the +``post_save`` and ``post_delete`` hooks cannot intercept. + +Support +~~~~~~~ + +``cache_relation`` currently only works with ``OneToOneField`` fields. Support +for regular ``ForeignKey`` fields is planned. +""" + +from django.db.models.signals import post_save, post_delete + +from .core import get_instance, delete_instance + +def cache_relation(descriptor, timeout=None): + rel = descriptor.related + related_name = '%s_cache' % rel.field.related_query_name() + + @property + def get(self): + # Always use the cached "real" instance if available + try: + return getattr(self, descriptor.cache_name) + except AttributeError: + pass + + # Lookup cached instance + try: + return getattr(self, '_%s_cache' % related_name) + except AttributeError: + pass + + instance = get_instance(rel.model, self.pk, timeout) + + setattr(self, '_%s_cache' % related_name, instance) + + return instance + setattr(rel.parent_model, related_name, get) + + # Clearing cache + + def clear(self): + delete_instance(rel.model, self) + + @classmethod + def clear_pk(cls, *instances_or_pk): + delete_instance(rel.model, *instances_or_pk) + + def clear_cache(sender, instance, *args, **kwargs): + delete_instance(rel.model, instance) + + setattr(rel.parent_model, '%s_clear' % related_name, clear) + setattr(rel.parent_model, '%s_clear_pk' % related_name, clear_pk) + + post_save.connect(clear_cache, sender=rel.model, weak=False) + post_delete.connect(clear_cache, sender=rel.model, weak=False) diff --git a/cache_toolbox/templatetags/__init__.py b/cache_toolbox/templatetags/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/cache_toolbox/templatetags/cache_toolbox.py b/cache_toolbox/templatetags/cache_toolbox.py new file mode 100644 index 0000000000..feea2af1c8 --- /dev/null +++ b/cache_toolbox/templatetags/cache_toolbox.py @@ -0,0 +1,69 @@ +from django import template +from django.core.cache import cache +from django.template import Node, TemplateSyntaxError, Variable +from django.template import resolve_variable + +register = template.Library() + +class CacheNode(Node): + def __init__(self, nodelist, expire_time, key): + self.nodelist = nodelist + self.expire_time = Variable(expire_time) + self.key = key + + def render(self, context): + key = resolve_variable(self.key, context) + expire_time = int(self.expire_time.resolve(context)) + + value = cache.get(key) + if value is None: + value = self.nodelist.render(context) + cache.set(key, value, expire_time) + return value + +@register.tag +def cachedeterministic(parser, token): + """ + This will cache the contents of a template fragment for a given amount of + time, just like {% cache .. %} except that the key is deterministic and not + mangled or run through MD5. + + Usage:: + + {% cachedeterministic [expire_time] [key] %} + .. some expensive processing .. + {% endcachedeterministic %} + + """ + nodelist = parser.parse(('endcachedeterministic',)) + parser.delete_first_token() + tokens = token.contents.split() + if len(tokens) != 3: + raise TemplateSyntaxError(u"'%r' tag requires 2 arguments." % tokens[0]) + return CacheNode(nodelist, tokens[1], tokens[2]) + +class ShowIfCachedNode(Node): + def __init__(self, key): + self.key = key + + def render(self, context): + key = resolve_variable(self.key, context) + return cache.get(key) or '' + +@register.tag +def showifcached(parser, token): + """ + Show content if it exists in the cache, otherwise display nothing. + + The key is entirely deterministic and not mangled or run through MD5 (cf. + {% cache %}) + + Usage:: + + {% showifcached [key] %} + + """ + tokens = token.contents.split() + if len(tokens) != 2: + raise TemplateSyntaxError(u"'%r' tag requires 1 argument." % tokens[0]) + return ShowIfCachedNode(tokens[1]) diff --git a/settings.py b/settings.py index e8f4342168..4660163e8d 100644 --- a/settings.py +++ b/settings.py @@ -105,10 +105,13 @@ TEMPLATE_LOADERS = ( ) MIDDLEWARE_CLASSES = ( + 'util.middleware.ExceptionLoggingMiddleware', + 'django.middleware.cache.UpdateCacheMiddleware', 'django.middleware.common.CommonMiddleware', 'django.contrib.sessions.middleware.SessionMiddleware', 'django.middleware.csrf.CsrfViewMiddleware', - 'django.contrib.auth.middleware.AuthenticationMiddleware', + #'django.contrib.auth.middleware.AuthenticationMiddleware', + 'cache_toolbox.middleware.CacheBackedAuthenticationMiddleware', 'django.contrib.messages.middleware.MessageMiddleware', 'track.middleware.TrackMiddleware', 'mitxmako.middleware.MakoMiddleware', @@ -283,7 +286,6 @@ site.addsitedir(os.path.join(os.path.dirname(askbot.__file__), 'deps')) TEMPLATE_LOADERS = TEMPLATE_LOADERS + ('askbot.skins.loaders.filesystem_load_template_source',) MIDDLEWARE_CLASSES = MIDDLEWARE_CLASSES + ( - 'util.middleware.ExceptionLoggingMiddleware', 'askbot.middleware.anon_user.ConnectToSessionMessagesMiddleware', 'askbot.middleware.forum_mode.ForumModeMiddleware', 'askbot.middleware.cancel.CancelActionMiddleware', @@ -292,6 +294,7 @@ MIDDLEWARE_CLASSES = MIDDLEWARE_CLASSES + ( 'askbot.middleware.view_log.ViewLogMiddleware', 'askbot.middleware.spaceless.SpacelessMiddleware', # 'askbot.middleware.pagesize.QuestionsPageSizeMiddleware', + 'django.middleware.cache.FetchFromCacheMiddleware', ) FILE_UPLOAD_TEMP_DIR = os.path.join(os.path.dirname(__file__), 'tmp').replace('\\','/')