PLAT-1957 Better memory leak diagnostic tooling

2018-02-14 17:48:25 -05:00
parent 730c64011b
commit a022461891
12 changed files with 582 additions and 22 deletions
--- a/cms/envs/common.py
+++ b/cms/envs/common.py
@@ -43,6 +43,7 @@ from __future__ import absolute_import

 import imp
 import os
+import tempfile
 import sys
 from datetime import timedelta

@@ -322,7 +323,6 @@ GEOIPV6_PATH = REPO_ROOT / "common/static/data/geoip/GeoIPv6.dat"

 ############################# TEMPLATE CONFIGURATION #############################
 # Mako templating
-import tempfile
 MAKO_MODULE_DIR = os.path.join(tempfile.gettempdir(), 'mako_cms')
 MAKO_TEMPLATE_DIRS_BASE = [
    PROJECT_ROOT / 'templates',
@@ -447,11 +447,10 @@ else:
    _csrf_middleware = 'django.middleware.csrf.CsrfViewMiddleware'

 MIDDLEWARE_CLASSES = [
+    'openedx.core.djangoapps.monitoring_utils.middleware.MonitoringMemoryMiddleware',
    'crum.CurrentRequestUserMiddleware',
    'openedx.core.djangoapps.request_cache.middleware.RequestCache',

-    'openedx.core.djangoapps.monitoring_utils.middleware.MonitoringMemoryMiddleware',
-
    'openedx.core.djangoapps.header_control.middleware.HeaderControlMiddleware',
    'django.middleware.cache.UpdateCacheMiddleware',
    'django.middleware.common.CommonMiddleware',
@@ -515,6 +514,9 @@ X_FRAME_OPTIONS = 'ALLOW'
 # Platform for Privacy Preferences header
 P3P_HEADER = 'CP="Open EdX does not have a P3P policy."'

+# Let MemoryUsageData create the directory as needed
+MEMORY_GRAPH_DIRECTORY = None
+
 ############# XBlock Configuration ##########

 # Import after sys.path fixup
@@ -1127,6 +1129,9 @@ INSTALLED_APPS = [

    # Asset management for mako templates
    'pipeline_mako',
+
+    # Memory leak diagnostics
+    'openedx.core.djangoapps.monitoring_utils.apps.MonitoringUtilsConfig'
 ]


--- a/cms/envs/devstack.py
+++ b/cms/envs/devstack.py
@@ -2,6 +2,7 @@
 Specific overrides to the base prod settings to make development easier.
 """

+import logging
 from os.path import abspath, dirname, join

 from .aws import *  # pylint: disable=wildcard-import, unused-wildcard-import
@@ -18,8 +19,6 @@ HTTPS = 'off'

 ################################ LOGGERS ######################################

-import logging
-
 # Disable noisy loggers
 for pkg_name in ['track.contexts', 'track.middleware', 'dd.dogapi']:
    logging.getLogger(pkg_name).setLevel(logging.CRITICAL)
@@ -95,16 +94,22 @@ DEBUG_TOOLBAR_CONFIG = {


 def should_show_debug_toolbar(request):
-    # We always want the toolbar on devstack unless running tests from another Docker container
+    # We always want the toolbar on devstack unless running tests from another
+    # Docker container or actively diagnosing a memory leak
    if request.get_host().startswith('edx.devstack.studio:'):
        return False
-    return True
+    from openedx.core.djangoapps.monitoring_utils import MemoryUsageData
+    return not MemoryUsageData.tables_are_enabled


 # To see stacktraces for MongoDB queries, set this to True.
 # Stacktraces slow down page loads drastically (for pages with lots of queries).
 DEBUG_TOOLBAR_MONGO_STACKTRACES = False

+############################## MEMORY MONITORING ##############################
+
+MEMORY_GRAPH_DIRECTORY = REPO_ROOT / 'test_root' / 'log' / 'memory_graphs' / 'cms_{}'.format(os.getpid())
+WSGI_APPLICATION = 'openedx.core.djangoapps.monitoring_utils.WSGIServer'

 ################################ MILESTONES ################################
 FEATURES['MILESTONES_APP'] = True
--- a/lms/envs/common.py
+++ b/lms/envs/common.py
@@ -32,6 +32,7 @@ Longer TODO:
 import imp
 import sys
 import os
+import tempfile

 import django
 from path import Path as path
@@ -543,7 +544,6 @@ OAUTH2_PROVIDER_APPLICATION_MODEL = 'oauth2_provider.Application'

 ################################## TEMPLATE CONFIGURATION #####################################
 # Mako templating
-import tempfile
 MAKO_MODULE_DIR = os.path.join(tempfile.gettempdir(), 'mako_lms')
 MAKO_TEMPLATE_DIRS_BASE = [
    PROJECT_ROOT / 'templates',
@@ -1341,6 +1341,9 @@ X_FRAME_OPTIONS = 'ALLOW'
 # Platform for Privacy Preferences header
 P3P_HEADER = 'CP="Open EdX does not have a P3P policy."'

+# Let MemoryUsageData create the directory as needed
+MEMORY_GRAPH_DIRECTORY = None
+
 ############################### PIPELINE #######################################

 PIPELINE_ENABLED = True
--- a/lms/envs/devstack.py
+++ b/lms/envs/devstack.py
@@ -1,6 +1,7 @@
 """
 Specific overrides to the base prod settings to make development easier.
 """
+import logging
 from os.path import abspath, dirname, join

 from .aws import *  # pylint: disable=wildcard-import, unused-wildcard-import
@@ -26,7 +27,6 @@ ENTERPRISE_API_URL = LMS_INTERNAL_ROOT_URL + '/enterprise/api/v1/'
 ################################ LOGGERS ######################################

 # Silence noisy logs
-import logging
 LOG_OVERRIDES = [
    ('track.contexts', logging.CRITICAL),
    ('track.middleware', logging.CRITICAL),
@@ -88,10 +88,20 @@ DEBUG_TOOLBAR_CONFIG = {


 def should_show_debug_toolbar(request):
-    # We always want the toolbar on devstack unless running tests from another Docker container
+    # We always want the toolbar on devstack unless running tests from another
+    # Docker container or actively diagnosing a memory leak
    if request.get_host().startswith('edx.devstack.lms:'):
        return False
-    return True
+    from openedx.core.djangoapps.monitoring_utils import MemoryUsageData
+    return not MemoryUsageData.tables_are_enabled
+
+
+############################## MEMORY MONITORING ##############################
+
+INSTALLED_APPS.append('openedx.core.djangoapps.monitoring_utils.apps.MonitoringUtilsConfig')
+MEMORY_GRAPH_DIRECTORY = REPO_ROOT / 'test_root' / 'log' / 'memory_graphs' / 'lms_{}'.format(os.getpid())
+MIDDLEWARE_CLASSES.insert(0, 'openedx.core.djangoapps.monitoring_utils.middleware.MonitoringMemoryMiddleware')
+WSGI_APPLICATION = 'openedx.core.djangoapps.monitoring_utils.WSGIServer'

 ########################### PIPELINE #################################

--- a/lms/envs/load_test.py
+++ b/lms/envs/load_test.py
@@ -20,4 +20,6 @@ EXCLUDE_CSRF = lambda elem: elem not in [
 DEFAULT_TEMPLATE_ENGINE['OPTIONS']['context_processors'] = filter(
    EXCLUDE_CSRF, DEFAULT_TEMPLATE_ENGINE['OPTIONS']['context_processors']
 )
+INSTALLED_APPS.append('openedx.core.djangoapps.monitoring_utils.apps.MonitoringUtilsConfig')
+MIDDLEWARE_CLASSES.insert(0, 'openedx.core.djangoapps.monitoring_utils.middleware.MonitoringMemoryMiddleware')
 MIDDLEWARE_CLASSES = filter(EXCLUDE_CSRF, MIDDLEWARE_CLASSES)
--- a/openedx/core/djangoapps/monitoring_utils/init.py
+++ b/openedx/core/djangoapps/monitoring_utils/init.py
@@ -23,6 +23,7 @@ TODO: supply additional public functions for storing strings and booleans.
 from contextlib import contextmanager

 from . import middleware
+from .utils import MemoryUsageData, WSGIServer
 try:
    import newrelic.agent
 except ImportError:
--- a/openedx/core/djangoapps/monitoring_utils/apps.py
+++ b/openedx/core/djangoapps/monitoring_utils/apps.py
@@ -0,0 +1,16 @@
+"""
+Monitoring Utilities Configuration
+"""
+from __future__ import absolute_import
+
+from django.apps import AppConfig
+
+
+class MonitoringUtilsConfig(AppConfig):
+    """
+    Default configuration for the "openedx.core.djangoapps.monitoring_utils" Django application.
+    """
+    name = u'openedx.core.djangoapps.monitoring_utils'
+
+    def ready(self):
+        from . import signals  # pylint: disable=unused-variable
--- a/openedx/core/djangoapps/monitoring_utils/middleware.py
+++ b/openedx/core/djangoapps/monitoring_utils/middleware.py
@@ -13,6 +13,7 @@ import logging
 from uuid import uuid4

 import psutil
+from six import text_type

 from openedx.core.djangoapps.request_cache import get_cache
 from openedx.core.djangoapps.waffle_utils import WaffleSwitchNamespace
@@ -89,27 +90,26 @@ class MonitoringMemoryMiddleware(object):
    guid_key = u'guid_key'

    def process_request(self, request):
+        """
+        Record pre-request memory usage data
+        """
        if self._is_enabled():
-            self._cache[self.guid_key] = unicode(uuid4())
+            setattr(request, self.guid_key, text_type(uuid4()))
            log_prefix = self._log_prefix(u"Before", request)
-            self._cache[self.memory_data_key] = self._memory_data(log_prefix)
+            setattr(request, self.memory_data_key, self._memory_data(log_prefix))

    def process_response(self, request, response):
+        """
+        Record post-request memory usage data
+        """
        if self._is_enabled():
            log_prefix = self._log_prefix(u"After", request)
            new_memory_data = self._memory_data(log_prefix)

            log_prefix = self._log_prefix(u"Diff", request)
-            self._log_diff_memory_data(log_prefix, new_memory_data, self._cache.get(self.memory_data_key))
+            self._log_diff_memory_data(log_prefix, new_memory_data, getattr(request, self.memory_data_key))
        return response

-    @property
-    def _cache(self):
-        """
-        Namespaced request cache for tracking memory usage.
-        """
-        return get_cache(name='monitoring_memory')
-
    def _log_prefix(self, prefix, request):
        """
        Returns a formatted prefix for logging for the given request.
@@ -117,7 +117,7 @@ class MonitoringMemoryMiddleware(object):
        # After a celery task runs, the request cache is cleared. So if celery
        # tasks are running synchronously (CELERY_ALWAYS _EAGER), "guid_key"
        # will no longer be in the request cache when process_response executes.
-        cached_guid = self._cache.get(self.guid_key) or u"without_guid"
+        cached_guid = getattr(request, self.guid_key) or u"without_guid"
        return u"{} request '{} {} {}'".format(prefix, request.method, request.path, cached_guid)

    def _memory_data(self, log_prefix):
--- a/openedx/core/djangoapps/monitoring_utils/signals.py
+++ b/openedx/core/djangoapps/monitoring_utils/signals.py
@@ -0,0 +1,20 @@
+"""
+Memory leak troubleshooting via request lifecycle signals.
+"""
+
+from __future__ import absolute_import
+
+from django.core.signals import request_started
+from django.dispatch import receiver
+
+from . import MemoryUsageData
+
+
+@receiver(request_started)
+def reset_memory_statistics(sender, **kwargs):  # pylint: disable=unused-argument
+    """
+    Use Django's signal for the start of request processing as the trigger to
+    start tracking new objects in memory when the
+    ``monitoring_utils.log_memory_tables`` Waffle switch is enabled.
+    """
+    MemoryUsageData.start_counting()
--- a/openedx/core/djangoapps/monitoring_utils/utils.py
+++ b/openedx/core/djangoapps/monitoring_utils/utils.py
@@ -0,0 +1,495 @@
+"""
+Monitoring utilities which aren't used by the application by default, but can
+be used as needed to troubleshoot problems.
+"""
+from __future__ import absolute_import, print_function
+
+import gc
+import itertools
+import logging
+import operator
+import os
+import socket
+import sys
+import tempfile
+from StringIO import StringIO
+from collections import defaultdict
+
+from django.conf import settings
+from django.core.servers.basehttp import WSGIServer as DjangoWSGIServer
+from django.utils.lru_cache import lru_cache
+
+import gunicorn.util
+from objgraph import (
+    _long_typename,
+    _short_typename,
+    at_addrs,
+    show_backrefs,
+    show_refs,
+)
+
+from openedx.core.djangoapps.waffle_utils import WaffleSwitchNamespace
+
+indices = defaultdict(itertools.count)
+
+# The directory in which graph files will be created.
+GRAPH_DIRECTORY_PATH = settings.MEMORY_GRAPH_DIRECTORY
+
+# The max number of object types for which to show data on the console
+MAX_CONSOLE_ROWS = 30
+
+# The max number of object types for which to generate reference graphs
+MAX_GRAPHED_OBJECT_TYPES = 5
+
+# Maximum depth of forward reference graphs
+REFS_DEPTH = 3
+
+# Maximum depth of backward reference graphs
+BACK_REFS_DEPTH = 8
+
+# Max number of objects per type to use as starting points in the reference graphs
+MAX_OBJECTS_PER_TYPE = 10
+
+# Object type names for which table rows and graphs should not be generated if
+# the new object count is below the given threshold.  "set" is ignored by
+# default because many sets are created in the course of tracking the number
+# of new objects of each type.  "ApdexStats", "SplitResult", and "TimeStats"
+# are New Relic data which sometimes outlives the duration of the request but
+# usually doesn't stick around long-term.
+IGNORE_THRESHOLDS = {
+    'ApdexStats': 10,
+    'SplitResult': 50,
+    'TimeStats': 500,
+    'set': 10000,
+}
+
+WAFFLE_NAMESPACE = 'monitoring_utils'
+
+log = logging.getLogger(__name__)
+
+
+def show_memory_leaks(
+        label=u'memory_leaks',
+        max_console_rows=MAX_CONSOLE_ROWS,
+        max_graphed_object_types=MAX_GRAPHED_OBJECT_TYPES,
+        refs_depth=REFS_DEPTH,
+        back_refs_depth=BACK_REFS_DEPTH,
+        max_objects_per_type=MAX_OBJECTS_PER_TYPE,
+        ignore_thresholds=None,
+        graph_directory_path=GRAPH_DIRECTORY_PATH,
+        memory_table_buffer=None,
+        skip_first_graphs=True):
+    """
+    Call this function to get data about memory leaks; what objects are being
+    leaked, where did they come from, and what do they contain?  The leaks
+    are measured from the last call to ``get_new_ids()`` (which is called
+    within this function).  Some data is printed to stdout, and more details
+    are available in graphs stored at the paths printed to stdout.  Subsequent
+    calls with the same label are indicated by an increasing index in the
+    filename.
+
+    Args:
+        label (unicode): The start of the filename for each graph
+        max_console_rows (int): The max number of object types for which to
+            show data on the console
+        max_graphed_object_types (int): The max number of object types for
+            which to generate reference graphs
+        refs_depth (int): Maximum depth of forward reference graphs
+        back_refs_depth (int): Maximum depth of backward reference graphs
+        max_objects_per_type (int): Max number of objects per type to use as
+            starting points in the reference graphs
+        ignore_thresholds (dict): Object type names for which table rows and
+            graphs should not be generated if the new object count is below
+            the corresponding number.
+        graph_directory_path (unicode): The directory in which graph files
+            will be created.  It will be created if it doesn't already exist.
+        memory_table_buffer (StringIO): Storage for the generated table of
+            memory statistics.  Ideally, create this before starting to
+            count newly allocated objects.
+        skip_first_graphs (bool): True if the first call to this function for
+            a given label should not produce graphs (the default behavior).
+            The first call to a given block of code often initializes an
+            assortment of objects which aren't really leaked memory.
+    """
+    if graph_directory_path is None:
+        graph_directory_path = MemoryUsageData.graph_directory_path()
+    if ignore_thresholds is None:
+        ignore_thresholds = IGNORE_THRESHOLDS
+    if memory_table_buffer is None:
+        memory_table_buffer = StringIO()
+    new_ids = get_new_ids(limit=max_console_rows, ignore_thresholds=ignore_thresholds,
+                          output=memory_table_buffer)
+    memory_table_text = memory_table_buffer.getvalue()
+    log.info('\n' + memory_table_text)
+
+    if not os.path.exists(graph_directory_path):
+        os.makedirs(graph_directory_path)
+    label = label.replace(':', '_')
+    index = indices[label].next() + 1
+    data = {'label': label, 'index': index}
+    path = os.path.join(graph_directory_path, u'{label}_{index}.txt'.format(**data))
+    with open(path, 'w') as f:
+        f.write(memory_table_text)
+
+    if index == 1 and skip_first_graphs:
+        return
+
+    graphed_types = 0
+    sorted_by_count = sorted(new_ids.items(), key=lambda entry: len(entry[1]), reverse=True)
+    for item in sorted_by_count:
+        type_name = item[0]
+        object_ids = new_ids[type_name]
+        if not object_ids:
+            continue
+        objects = at_addrs(list(object_ids)[:max_objects_per_type])
+        data['type_name'] = type_name
+
+        if back_refs_depth > 0:
+            path = os.path.join(graph_directory_path, u'{label}_{index}_{type_name}_backrefs.dot'.format(**data))
+            show_backrefs(objects, max_depth=back_refs_depth, filename=path)
+            log.info('Generated memory graph at {}'.format(path))
+
+        if refs_depth > 0:
+            path = os.path.join(graph_directory_path, u'{label}_{index}_{type_name}_refs.dot'.format(**data))
+            show_refs(objects, max_depth=refs_depth, filename=path)
+            log.info('Generated memory graph at {}'.format(path))
+
+        graphed_types += 1
+        if graphed_types >= max_graphed_object_types:
+            break
+
+
+class MemoryUsageData(object):
+    """
+    Memory analysis data and configuration options for the current request.
+    Do *NOT* use this in production; it slows down most requests by about an
+    order of magnitude, even the ones which aren't being specifically studied.
+
+    Call ``MemoryUsageData.analyze()`` from a view and enable the appropriate
+    waffle switch(es) to start generating memory leak diagnostic information:
+
+    * monitoring_utils.log_memory_tables - Log a table of data on object types
+      for which the total number in memory increased during the request.
+      Enabling this switch disables Django Debug Toolbar, since it leaks
+      many objects with every request.
+
+    * monitoring_utils.create_memory_graphs - Also generate reference graphs
+      for some of the apparently leaked objects.
+
+    When using this in development via Django's runserver command, be sure to
+    pass it the ``--nothreading`` option to avoid concurrent memory changes
+    while serving static assets.  In devstack, do this in docker-compose.yml.
+
+    To use this feature on a sandbox, you also need to append
+    ``openedx.core.djangoapps.monitoring_utils.apps.MonitoringUtilsConfig`` to
+    the end of the ``INSTALLED_APPS`` Django setting.  This is present by
+    default for devstack and load test environments, but absent from the
+    ``aws`` settings module to avoid a little overhead at the start of each
+    request even when the Waffle switches are disabled (mainly just to load
+    the switch from the database).
+
+    Configuration options for the depth of the graphs, how many leaked
+    objects of each type to graph, and so forth are currently set as constants
+    in ``monitoring_utils.utils``.  The graphs are saved as GraphViz .dot
+    files in the directory specified by the ``MEMORY_GRAPH_DIRECTORY`` Django
+    setting.  These can be viewed directly using xdot (Linux) or ZGRViewer
+    (macOS), or converted to a standard image format such as PNG or SVG using
+    GraphViz.
+    """
+    graphs_are_enabled = False
+    tables_are_enabled = False
+    table_buffer = None
+    view_name = None
+    gunicorn_is_patched = False
+
+    @classmethod
+    def start_counting(cls):
+        """
+        Prepare to collect memory usage data for a new request.
+        """
+        if cls._is_switch_enabled(u'log_memory_tables'):
+            if not cls.gunicorn_is_patched:
+                cls._patch_gunicorn()
+            cls.tables_are_enabled = True
+            cls.table_buffer = StringIO()
+            cls.graphs_are_enabled = cls._is_switch_enabled(u'create_memory_graphs')
+            cls._set_memory_leak_baseline()
+
+    @classmethod
+    def analyze(cls, request):
+        """
+        Call this anywhere in a view to record memory usage data at the end
+        of the request.
+        """
+        cls.view_name = request.resolver_match.view_name
+
+    @classmethod
+    def stop_counting(cls):
+        """
+        Stop collecting memory usage data for the current request, and
+        generate any requested output for it.
+        """
+        if cls.tables_are_enabled and cls.view_name:
+            if cls.graphs_are_enabled:
+                show_memory_leaks(cls.view_name, memory_table_buffer=cls.table_buffer)
+            else:
+                show_memory_leaks(cls.view_name, refs_depth=0,
+                                  back_refs_depth=0, memory_table_buffer=cls.table_buffer)
+        cls._reset()
+
+    @classmethod
+    @lru_cache()
+    def graph_directory_path(cls):
+        """
+        Get the default temporary directory for the current process in which
+        to store memory reference graphs.
+        """
+        if settings.ROOT_URLCONF == 'lms.urls':
+            service = 'lms'
+        else:
+            service = 'cms'
+        return os.path.join(tempfile.mkdtemp(prefix='memory_graphs'),
+                            '{service}_{pid}'.format(service=service, pid=os.getpid()))
+
+    @staticmethod
+    def _is_switch_enabled(name):
+        return WaffleSwitchNamespace(name=WAFFLE_NAMESPACE).is_enabled(name)
+
+    @classmethod
+    def _patch_gunicorn(cls):
+        """
+        Patch gunicorn to record memory usage data when appropriate.  Django's
+        ``request_finished`` signal and gunicorn's ``post_request`` hook
+        aren't called late enough to be useful for this; the response is still
+        in scope, so none of the objects attached to it can be garbage
+        collected yet.
+        """
+        gunicorn.util.close = gunicorn_util_close
+        cls.gunicorn_is_patched = True
+
+    @classmethod
+    def _reset(cls):
+        """
+        Reset all the attributes to their default values in preparation for a
+        new request.
+        """
+        cls.graphs_are_enabled = False
+        cls.tables_are_enabled = False
+        cls.table_buffer = None
+        cls.view_name = None
+
+    @classmethod
+    def _set_memory_leak_baseline(cls):
+        """
+        Reset the starting point from which the next call to
+        ``objgraph.get_new_ids()`` will count newly created objects.
+        """
+        with open(os.devnull, 'w') as devnull:
+            get_new_ids(output=devnull)
+
+
+def gunicorn_util_close(sock):
+    """
+    Replacement for gunicorn.util.close() which does memory usage analysis if
+    the relevant Waffle switch was active at the start of the request.
+
+    This monkeypatch is appropriate for gunicorn==0.17.4 and should be updated
+    as needed when upgrading gunicorn.
+    """
+    try:
+        sock.close()
+    except socket.error:
+        pass
+    MemoryUsageData.stop_counting()
+
+
+class WSGIServer(DjangoWSGIServer):
+    """
+    A WSGI server to be used by Django's runserver management command so that
+    memory usage can be analyzed after the response is garbage collected.
+    Specified by the ``WSGI_SERVER`` Django setting in the devstack settings
+    files.
+    """
+    def close_request(self, request):
+        MemoryUsageData.stop_counting()
+
+
+# The following is copied and modified from objgraph, since it doesn't yet
+# provide good hooks for customizing this operation
+
+def get_new_ids(skip_update=False, limit=10, sortby='deltas',  # pylint: disable=dangerous-default-value
+                shortnames=None, ignore_thresholds=IGNORE_THRESHOLDS,
+                output=None, _state={}):
+    """Find and display new objects allocated since last call.
+
+    Shows the increase in object counts since last call to this
+    function and returns the memory address ids for new objects.
+
+    Returns a dictionary mapping object type names to sets of object IDs
+    that have been created since the last time this function was called.
+
+    ``skip_update`` (bool): If True, returns the same dictionary that
+    was returned during the previous call without updating the internal
+    state or examining the objects currently in memory.
+
+    ``limit`` (int): The maximum number of rows that you want to print
+    data for.  Use 0 to suppress the printing.  Use None to print everything.
+
+    ``sortby`` (str): This is the column that you want to sort by in
+    descending order.  Possible values are: 'old', 'current', 'new',
+    'deltas'
+
+    ``shortnames`` (bool): If True, classes with the same name but
+    defined in different modules will be lumped together.  If False,
+    all type names will be qualified with the module name.  If None (default),
+    ``get_new_ids`` will remember the value from previous calls, so it's
+    enough to prime this once.  By default the primed value is True.
+
+    ``_state`` (dict): Stores old, current, and new_ids in memory.
+    It is used by the function to store the internal state between calls.
+    Never pass in this argument unless you know what you're doing.
+
+    The caveats documented in :func:`growth` apply.
+
+    When one gets new_ids from :func:`get_new_ids`, one can use
+    :func:`at_addrs` to get a list of those objects. Then one can iterate over
+    the new objects, print out what they are, and call :func:`show_backrefs` or
+    :func:`show_chain` to see where they are referenced.
+
+    Example:
+
+        >>> _ = get_new_ids() # store current objects in _state
+        >>> _ = get_new_ids() # current_ids become old_ids in _state
+        >>> a = [0, 1, 2] # list we don't know about
+        >>> b = [3, 4, 5] # list we don't know about
+        >>> new_ids = get_new_ids(limit=3) # we see new lists
+        ======================================================================
+        Type                    Old_ids  Current_ids      New_ids Count_Deltas
+        ======================================================================
+        list                        324          326           +3           +2
+        dict                       1125         1125           +0           +0
+        wrapper_descriptor         1001         1001           +0           +0
+        ======================================================================
+        >>> new_lists = at_addrs(new_ids['list'])
+        >>> a in new_lists
+        True
+        >>> b in new_lists
+        True
+    """
+    if ignore_thresholds is None:
+        ignore_thresholds = IGNORE_THRESHOLDS
+    _initialize_state(_state)
+    new_ids = _state['new']
+    if skip_update:
+        return new_ids
+    old_ids = _state['old']
+    current_ids = _state['current']
+    if shortnames is None:
+        shortnames = _state['shortnames']
+    else:
+        _state['shortnames'] = shortnames
+    gc.collect()
+    objects = gc.get_objects()
+    for class_name in old_ids:
+        old_ids[class_name].clear()
+    for class_name, ids_set in current_ids.items():
+        old_ids[class_name].update(ids_set)
+    for class_name in current_ids:
+        current_ids[class_name].clear()
+    for o in objects:
+        if shortnames:
+            class_name = _short_typename(o)
+        else:
+            class_name = _long_typename(o)
+        id_number = id(o)
+        current_ids[class_name].add(id_number)
+    for class_name in new_ids:
+        new_ids[class_name].clear()
+    rows = []
+    keys_to_remove = []
+    for class_name in current_ids:
+        num_old = len(old_ids[class_name])
+        num_current = len(current_ids[class_name])
+        if num_old == 0 and num_current == 0:
+            # remove the key from our dicts if we don't have any old or
+            # current class_name objects
+            keys_to_remove.append(class_name)
+            continue
+        new_ids_set = current_ids[class_name] - old_ids[class_name]
+        new_ids[class_name].update(new_ids_set)
+        num_new = len(new_ids_set)
+        num_delta = num_current - num_old
+        if num_delta < 1 or (class_name in ignore_thresholds and num_current < ignore_thresholds[class_name]):
+            # ignore types with no net increase or whose overall count isn't large enough to worry us
+            if class_name in new_ids:
+                del new_ids[class_name]
+            continue
+        row = (class_name, num_old, num_current, num_new, num_delta)
+        rows.append(row)
+    for key in keys_to_remove:
+        del old_ids[key]
+        del current_ids[key]
+        if key in new_ids:
+            del new_ids[key]
+    index_by_sortby = {'old': 1, 'current': 2, 'new': 3, 'deltas': 4}
+    rows.sort(key=operator.itemgetter(index_by_sortby[sortby], 0),
+              reverse=True)
+    _show_results(rows, limit, output)
+    return new_ids
+
+
+def _initialize_state(state):
+    """
+    Initialize the object ID tracking data if it hasn't been done yet.
+    """
+    if not state:
+        state['old'] = defaultdict(set)
+        state['current'] = defaultdict(set)
+        state['new'] = defaultdict(set)
+        state['shortnames'] = True
+
+
+def _show_results(rows, limit, output):
+    """
+    Show information about the memory leaked since the previous call to
+    ``get_new_ids()``.
+
+    Args:
+        rows (list): The data rows to be displayed (if any)
+        limit (int): The max number of rows to display
+        output (stream): The output stream to send results to
+    """
+    if output is None:
+        output = sys.stdout
+    if not rows:
+        _show_no_leaks_message(output)
+    else:
+        _show_leaks_table(rows, limit, output)
+
+
+def _show_no_leaks_message(output):
+    """
+    Print a message, indicating that no memory leaks were found, to the given
+    output stream.
+    """
+    print('=' * 51, file=output)
+    print('No object types increased their net count in memory', file=output)
+    print('=' * 51, file=output)
+
+
+def _show_leaks_table(rows, limit, output):
+    """
+    Print a summary table of the leaked objects to the given output stream.
+    """
+    if limit is not None:
+        rows = rows[:limit]
+    width = max(len(row[0]) for row in rows)
+    print('=' * (width + 13 * 4), file=output)
+    print('%-*s%13s%13s%13s%13s' %
+          (width, 'Type', 'Old_ids', 'Current_ids', 'New_ids', 'Count_Deltas'),
+          file=output)
+    print('=' * (width + 13 * 4), file=output)
+    for row_class, old, current, new, delta in rows:
+        print('%-*s%13d%13d%+13d%+13d' %
+              (width, row_class, old, current, new, delta), file=output)
+    print('=' * (width + 13 * 4), file=output)
--- a/openedx/tests/settings.py
+++ b/openedx/tests/settings.py
@@ -83,6 +83,7 @@ INSTALLED_APPS = (
 LMS_ROOT_URL = 'http://localhost:8000'

 MEDIA_ROOT = tempfile.mkdtemp()
+MEMORY_GRAPH_DIRECTORY = tempfile.mkdtemp(prefix='memory_graphs')

 MICROSITE_BACKEND = 'microsite_configuration.backends.filebased.FilebasedMicrositeBackend'
 MICROSITE_TEMPLATE_BACKEND = 'microsite_configuration.backends.filebased.FilebasedMicrositeTemplateBackend'
--- a/requirements/edx/base.txt
+++ b/requirements/edx/base.txt
@@ -75,6 +75,7 @@ fs-s3fs==0.1.5
 futures==3.2.0 ; python_version == "2.7"
 GitPython==0.3.2.RC1
 glob2==0.3
+graphviz==0.8.2
 gunicorn==0.17.4
 help-tokens==1.0.3
 httpretty==0.8.14
@@ -84,6 +85,7 @@ mako==1.0.2
 Markdown>=2.6,<2.7
 mongoengine==0.10.0
 MySQL-python==1.2.5
+objgraph==3.4.0
 networkx==1.7
 nltk==3.2.5
 nose-xunitmp==0.3.2