From 88ff84dd2e76c2f275fe6515b6aebbf5b444601a Mon Sep 17 00:00:00 2001 From: Calen Pennington Date: Fri, 25 May 2012 09:30:04 -0400 Subject: [PATCH 1/3] Use a key function that is safe for use w/ memcache --- envs/dev.py | 4 +++- envs/devplus.py | 2 ++ envs/static.py | 4 +++- envs/test.py | 4 +++- lib/util/memcache.py | 18 ++++++++++++++++++ 5 files changed, 29 insertions(+), 3 deletions(-) create mode 100644 lib/util/memcache.py diff --git a/envs/dev.py b/envs/dev.py index 9d65059447..cf430dab86 100644 --- a/envs/dev.py +++ b/envs/dev.py @@ -31,7 +31,8 @@ CACHES = { # In staging/prod envs, the sessions also live here. 'default': { 'BACKEND': 'django.core.cache.backends.locmem.LocMemCache', - 'LOCATION': 'mitx_loc_mem_cache' + 'LOCATION': 'mitx_loc_mem_cache', + 'KEY_FUNCTION': 'util.memcache.safe_key', }, # The general cache is what you get if you use our util.cache. It's used for @@ -43,6 +44,7 @@ CACHES = { 'BACKEND': 'django.core.cache.backends.dummy.DummyCache', 'KEY_PREFIX': 'general', 'VERSION': 4, + 'KEY_FUNCTION': 'util.cache.memcache_safe_key', } } diff --git a/envs/devplus.py b/envs/devplus.py index 4f763b925c..1c81bdd7b2 100644 --- a/envs/devplus.py +++ b/envs/devplus.py @@ -30,12 +30,14 @@ CACHES = { 'default': { 'BACKEND': 'django.core.cache.backends.memcached.MemcachedCache', 'LOCATION': '127.0.0.1:11211', + 'KEY_FUNCTION': 'util.memcache.safe_key', }, 'general': { 'BACKEND': 'django.core.cache.backends.memcached.MemcachedCache', 'LOCATION': '127.0.0.1:11211', 'KEY_PREFIX' : 'general', 'VERSION' : 5, + 'KEY_FUNCTION': 'util.memcache.safe_key', } } diff --git a/envs/static.py b/envs/static.py index 65309c4795..55469ce332 100644 --- a/envs/static.py +++ b/envs/static.py @@ -30,7 +30,8 @@ CACHES = { # In staging/prod envs, the sessions also live here. 'default': { 'BACKEND': 'django.core.cache.backends.locmem.LocMemCache', - 'LOCATION': 'mitx_loc_mem_cache' + 'LOCATION': 'mitx_loc_mem_cache', + 'KEY_FUNCTION': 'util.memcache.safe_key', }, # The general cache is what you get if you use our util.cache. It's used for @@ -42,6 +43,7 @@ CACHES = { 'BACKEND': 'django.core.cache.backends.dummy.DummyCache', 'KEY_PREFIX': 'general', 'VERSION': 4, + 'KEY_FUNCTION': 'util.memcache.safe_key', } } diff --git a/envs/test.py b/envs/test.py index bf83511786..6702a3c852 100644 --- a/envs/test.py +++ b/envs/test.py @@ -54,7 +54,8 @@ CACHES = { # In staging/prod envs, the sessions also live here. 'default': { 'BACKEND': 'django.core.cache.backends.locmem.LocMemCache', - 'LOCATION': 'mitx_loc_mem_cache' + 'LOCATION': 'mitx_loc_mem_cache', + 'KEY_FUNCTION': 'util.memcache.safe_key', }, # The general cache is what you get if you use our util.cache. It's used for @@ -66,6 +67,7 @@ CACHES = { 'BACKEND': 'django.core.cache.backends.dummy.DummyCache', 'KEY_PREFIX': 'general', 'VERSION': 4, + 'KEY_FUNCTION': 'util.memcache.safe_key', } } diff --git a/lib/util/memcache.py b/lib/util/memcache.py new file mode 100644 index 0000000000..e0c6037b0c --- /dev/null +++ b/lib/util/memcache.py @@ -0,0 +1,18 @@ +""" +This module provides a KEY_FUNCTION suitable for use with a memcache backend +so that we can cache any keys, not just ones that memcache would ordinarily accept +""" +from django.utils.hashcompat import md5_constructor +from django.utils.encoding import smart_str +import string + +def safe_key(key, key_prefix, version): + safe_key = smart_str(key) + for char in safe_key: + if ord(char) < 33 or ord(char) == 127: + safe_key = safe_key.replace(char, '_') + + if len(safe_key) > 250: + safe_key = md5_constructor(safe_key) + + return ":".join([key_prefix, str(version), safe_key]) From dea0d28ace79b363ac828aca650383fd30543e57 Mon Sep 17 00:00:00 2001 From: Calen Pennington Date: Fri, 25 May 2012 10:52:04 -0400 Subject: [PATCH 2/3] Url encode memcache keys, rather than writing a new encoding scheme. Also use fasthash, rather than md5 --- djangoapps/courseware/content_parser.py | 14 +++++--------- lib/util/memcache.py | 16 +++++++++------- 2 files changed, 14 insertions(+), 16 deletions(-) diff --git a/djangoapps/courseware/content_parser.py b/djangoapps/courseware/content_parser.py index adb10f7dfc..dbd3befae8 100644 --- a/djangoapps/courseware/content_parser.py +++ b/djangoapps/courseware/content_parser.py @@ -7,7 +7,6 @@ Does some caching (to be explained). ''' -import hashlib import logging import os import re @@ -16,6 +15,7 @@ import urllib from datetime import timedelta from lxml import etree +from util.memcache import fasthash try: # This lets us do __name__ == ='__main__' from django.conf import settings @@ -57,10 +57,6 @@ def parse_timedelta(time_str): time_params[name] = int(param) return timedelta(**time_params) -def fasthash(string): - m = hashlib.new("md4") - m.update(string) - return "id"+m.hexdigest() def xpath(xml, query_string, **args): ''' Safe xpath query into an xml tree: @@ -160,11 +156,11 @@ def user_groups(user): cache_expiration = 60 * 60 # one hour # Kill caching on dev machines -- we switch groups a lot - group_names = cache.get(fasthash(key)) + group_names = cache.get(key) if group_names is None: group_names = [u.name for u in UserTestGroup.objects.filter(users=user)] - cache.set(fasthash(key), group_names, cache_expiration) + cache.set(key, group_names, cache_expiration) return group_names @@ -203,7 +199,7 @@ def course_file(user,coursename=None): cache_key = filename + "_processed?dev_content:" + str(options['dev_content']) + "&groups:" + str(sorted(groups)) if "dev" not in settings.DEFAULT_GROUPS: - tree_string = cache.get(fasthash(cache_key)) + tree_string = cache.get(cache_key) else: tree_string = None @@ -211,7 +207,7 @@ def course_file(user,coursename=None): tree = course_xml_process(etree.XML(render_to_string(filename, options, namespace = 'course'))) tree_string = etree.tostring(tree) - cache.set(fasthash(cache_key), tree_string, 60) + cache.set(cache_key, tree_string, 60) else: tree = etree.XML(tree_string) diff --git a/lib/util/memcache.py b/lib/util/memcache.py index e0c6037b0c..7af5581406 100644 --- a/lib/util/memcache.py +++ b/lib/util/memcache.py @@ -2,17 +2,19 @@ This module provides a KEY_FUNCTION suitable for use with a memcache backend so that we can cache any keys, not just ones that memcache would ordinarily accept """ -from django.utils.hashcompat import md5_constructor from django.utils.encoding import smart_str -import string +import hashlib +import urllib + +def fasthash(string): + m = hashlib.new("md4") + m.update(string) + return "id"+m.hexdigest() def safe_key(key, key_prefix, version): - safe_key = smart_str(key) - for char in safe_key: - if ord(char) < 33 or ord(char) == 127: - safe_key = safe_key.replace(char, '_') + safe_key = urllib.quote_plus(smart_str(key)) if len(safe_key) > 250: - safe_key = md5_constructor(safe_key) + safe_key = fasthash(safe_key) return ":".join([key_prefix, str(version), safe_key]) From 1b5a9bc547de255449209856f1e2edb393e57d82 Mon Sep 17 00:00:00 2001 From: Calen Pennington Date: Fri, 25 May 2012 11:04:05 -0400 Subject: [PATCH 3/3] Moving the 'id' prefix back into content_parser where it exists --- djangoapps/courseware/content_parser.py | 2 +- lib/util/memcache.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/djangoapps/courseware/content_parser.py b/djangoapps/courseware/content_parser.py index dbd3befae8..d557e5ac36 100644 --- a/djangoapps/courseware/content_parser.py +++ b/djangoapps/courseware/content_parser.py @@ -118,7 +118,7 @@ def id_tag(course): new_id = default_ids[elem.tag] + new_id elem.set('id', new_id) else: - elem.set('id', fasthash(etree.tostring(elem))) + elem.set('id', "id"+fasthash(etree.tostring(elem))) def propogate_downward_tag(element, attribute_name, parent_attribute = None): ''' This call is to pass down an attribute to all children. If an element diff --git a/lib/util/memcache.py b/lib/util/memcache.py index 7af5581406..3da65a1b51 100644 --- a/lib/util/memcache.py +++ b/lib/util/memcache.py @@ -9,7 +9,7 @@ import urllib def fasthash(string): m = hashlib.new("md4") m.update(string) - return "id"+m.hexdigest() + return m.hexdigest() def safe_key(key, key_prefix, version): safe_key = urllib.quote_plus(smart_str(key))