diff --git a/common/lib/xmodule/xmodule/course_module.py b/common/lib/xmodule/xmodule/course_module.py index f8e18a9355..4d3b2b1e00 100644 --- a/common/lib/xmodule/xmodule/course_module.py +++ b/common/lib/xmodule/xmodule/course_module.py @@ -6,6 +6,7 @@ from xmodule.modulestore import Location from xmodule.seq_module import SequenceDescriptor, SequenceModule from xmodule.timeparse import parse_time, stringify_time from xmodule.util.decorators import lazyproperty +from datetime import datetime import json import logging import requests @@ -18,6 +19,8 @@ log = logging.getLogger(__name__) edx_xml_parser = etree.XMLParser(dtd_validation=False, load_dtd=False, remove_comments=True, remove_blank_text=True) +_cached_toc = {} + class CourseDescriptor(SequenceDescriptor): module_class = SequenceModule @@ -50,6 +53,24 @@ class CourseDescriptor(SequenceDescriptor): """ toc_url = self.book_url + 'toc.xml' + # cdodge: I've added this caching of TOC because in Mongo-backed instances (but not Filesystem stores) + # course modules have a very short lifespan and are constantly being created and torn down. + # Since this module in the __init__() method does a synchronous call to AWS to get the TOC + # this is causing a big performance problem. So let's be a bit smarter about this and cache + # each fetch and store in-mem for 10 minutes. + # NOTE: I have to get this onto sandbox ASAP as we're having runtime failures. I'd like to swing back and + # rewrite to use the traditional Django in-memory cache. + try: + # see if we already fetched this + if toc_url in _cached_toc: + (table_of_contents, timestamp) = _cached_toc[toc_url] + age = datetime.now() - timestamp + # expire every 10 minutes + if age.seconds < 600: + return table_of_contents + except Exception as err: + pass + # Get the table of contents from S3 log.info("Retrieving textbook table of contents from %s" % toc_url) try: @@ -62,6 +83,7 @@ class CourseDescriptor(SequenceDescriptor): # TOC is XML. Parse it try: table_of_contents = etree.fromstring(r.text) + _cached_toc[toc_url] = (table_of_contents, datetime.now()) except Exception as err: msg = 'Error %s: Unable to parse XML for textbook table of contents at %s' % (err, toc_url) log.error(msg)