diff --git a/README b/README
index 6860772c35..a76ea66690 100644
--- a/README
+++ b/README
@@ -1,2 +1 @@
-This branch (re-)adds dynamic math and symbolicresponse.
-Test cases included.
+see doc/ for documentation.
diff --git a/common/lib/mitxmako/shortcuts.py b/common/lib/mitxmako/shortcuts.py
index 9f6044b81e..72a9b81e3c 100644
--- a/common/lib/mitxmako/shortcuts.py
+++ b/common/lib/mitxmako/shortcuts.py
@@ -12,13 +12,16 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+import logging
+
+log = logging.getLogger("mitx.common.lib.mitxmako")
+
from django.template import Context
from django.http import HttpResponse
from . import middleware
from django.conf import settings
-
def render_to_string(template_name, dictionary, context=None, namespace='main'):
context_instance = Context(dictionary)
# add dictionary to context_instance
@@ -27,8 +30,11 @@ def render_to_string(template_name, dictionary, context=None, namespace='main'):
context_dictionary = {}
context_instance['settings'] = settings
context_instance['MITX_ROOT_URL'] = settings.MITX_ROOT_URL
- for d in middleware.requestcontext:
- context_dictionary.update(d)
+
+ # In various testing contexts, there might not be a current request context.
+ if middleware.requestcontext is not None:
+ for d in middleware.requestcontext:
+ context_dictionary.update(d)
for d in context_instance:
context_dictionary.update(d)
if context:
diff --git a/common/test/README.md b/common/test/README.md
new file mode 100644
index 0000000000..07c49f7d92
--- /dev/null
+++ b/common/test/README.md
@@ -0,0 +1,6 @@
+Common test infrastructure for LMS + CMS
+===========================
+
+data/ has some test course data.
+
+Once the course validation is separated from django, we should have scripts here that checks that a course consists only of xml that we understand.
diff --git a/common/test/data/full/README.md b/common/test/data/full/README.md
new file mode 100644
index 0000000000..812ca471ce
--- /dev/null
+++ b/common/test/data/full/README.md
@@ -0,0 +1 @@
+This is a realistic course, with many different module types and a lot of structure. It is based on 6.002x.
diff --git a/common/test/data/simple/README.md b/common/test/data/simple/README.md
new file mode 100644
index 0000000000..69ff6b4ed0
--- /dev/null
+++ b/common/test/data/simple/README.md
@@ -0,0 +1,2 @@
+This is a simple, but non-trivial, course using multiple module types and some nested structure.
+
diff --git a/common/test/data/toy/README.md b/common/test/data/toy/README.md
new file mode 100644
index 0000000000..59ab392ed3
--- /dev/null
+++ b/common/test/data/toy/README.md
@@ -0,0 +1 @@
+This is a very very simple course, useful for initial debugging of processing code.
diff --git a/common/test/data/toy/toy_course.xml b/common/test/data/toy/toy_course.xml
new file mode 100644
index 0000000000..ecac9a4776
--- /dev/null
+++ b/common/test/data/toy/toy_course.xml
@@ -0,0 +1,11 @@
+
+
+
+
+
+
+
+
diff --git a/lms/djangoapps/courseware/content_parser.py b/lms/djangoapps/courseware/content_parser.py
index 4f96db5284..62163c86b0 100644
--- a/lms/djangoapps/courseware/content_parser.py
+++ b/lms/djangoapps/courseware/content_parser.py
@@ -26,10 +26,13 @@ import xmodule
''' This file will eventually form an abstraction layer between the
course XML file and the rest of the system.
-
-TODO: Shift everything from xml.dom.minidom to XPath (or XQuery)
'''
+# ==== This section has no direct dependencies on django ====================================
+# NOTE: it does still have some indirect dependencies:
+# util.memcache.fasthash (which does not depend on memcache at all)
+#
+
class ContentException(Exception):
pass
@@ -38,29 +41,6 @@ log = logging.getLogger("mitx.courseware")
def format_url_params(params):
return [ urllib.quote(string.replace(' ','_')) for string in params ]
-def xpath(xml, query_string, **args):
- ''' Safe xpath query into an xml tree:
- * xml is the tree.
- * query_string is the query
- * args are the parameters. Substitute for {params}.
- We should remove this with the move to lxml.
- We should also use lxml argument passing. '''
- doc = etree.fromstring(xml)
- #print type(doc)
- def escape(x):
- # TODO: This should escape the string. For now, we just assume it's made of valid characters.
- # Couldn't figure out how to escape for lxml in a few quick Googles
- valid_chars="".join(map(chr, range(ord('a'),ord('z')+1)+range(ord('A'),ord('Z')+1)+range(ord('0'), ord('9')+1)))+"_ "
- for e in x:
- if e not in valid_chars:
- raise Exception("Invalid char in xpath expression. TODO: Escape")
- return x
-
- args=dict( ((k, escape(args[k])) for k in args) )
- #print args
- results = doc.xpath(query_string.format(**args))
- return results
-
def xpath_remove(tree, path):
''' Remove all items matching path from lxml tree. Works in
place.'''
@@ -69,35 +49,34 @@ def xpath_remove(tree, path):
item.getparent().remove(item)
return tree
-if __name__=='__main__':
- print xpath('', '/{search}/problem[@name="{name}"]',
- search='html', name="Bob")
-
def id_tag(course):
''' Tag all course elements with unique IDs '''
default_ids = xmodule.get_default_ids()
# Tag elements with unique IDs
- elements = course.xpath("|".join(['//'+c for c in default_ids]))
+ elements = course.xpath("|".join('//' + c for c in default_ids))
for elem in elements:
if elem.get('id'):
pass
elif elem.get(default_ids[elem.tag]):
- new_id = elem.get(default_ids[elem.tag])
- new_id = "".join([a for a in new_id if a.isalnum()]) # Convert to alphanumeric
- # Without this, a conflict may occur between an hmtl or youtube id
+ new_id = elem.get(default_ids[elem.tag])
+ # Convert to alphanumeric
+ new_id = "".join(a for a in new_id if a.isalnum())
+
+ # Without this, a conflict may occur between an html or youtube id
new_id = default_ids[elem.tag] + new_id
elem.set('id', new_id)
else:
- elem.set('id', "id"+fasthash(etree.tostring(elem)))
+ elem.set('id', "id" + fasthash(etree.tostring(elem)))
def propogate_downward_tag(element, attribute_name, parent_attribute = None):
''' This call is to pass down an attribute to all children. If an element
has this attribute, it will be "inherited" by all of its children. If a
child (A) already has that attribute, A will keep the same attribute and
all of A's children will inherit A's attribute. This is a recursive call.'''
-
- if (parent_attribute is None): #This is the entry call. Select all elements with this attribute
+
+ if (parent_attribute is None):
+ #This is the entry call. Select all elements with this attribute
all_attributed_elements = element.xpath("//*[@" + attribute_name +"]")
for attributed_element in all_attributed_elements:
attribute_value = attributed_element.get(attribute_name)
@@ -118,6 +97,164 @@ def propogate_downward_tag(element, attribute_name, parent_attribute = None):
#to its children later.
return
+
+def course_xml_process(tree):
+ ''' Do basic pre-processing of an XML tree. Assign IDs to all
+ items without. Propagate due dates, grace periods, etc. to child
+ items.
+ '''
+ replace_custom_tags(tree)
+ id_tag(tree)
+ propogate_downward_tag(tree, "due")
+ propogate_downward_tag(tree, "graded")
+ propogate_downward_tag(tree, "graceperiod")
+ propogate_downward_tag(tree, "showanswer")
+ propogate_downward_tag(tree, "rerandomize")
+ return tree
+
+
+def toc_from_xml(dom, active_chapter, active_section):
+ '''
+ Create a table of contents from the course xml.
+
+ Return format:
+ [ {'name': name, 'sections': SECTIONS, 'active': bool}, ... ]
+
+ where SECTIONS is a list
+ [ {'name': name, 'format': format, 'due': due, 'active' : bool}, ...]
+
+ active is set for the section and chapter corresponding to the passed
+ parameters. Everything else comes from the xml, or defaults to "".
+
+ chapters with name 'hidden' are skipped.
+ '''
+ name = dom.xpath('//course/@name')[0]
+
+ chapters = dom.xpath('//course[@name=$name]/chapter', name=name)
+ ch = list()
+ for c in chapters:
+ if c.get('name') == 'hidden':
+ continue
+ sections = list()
+ for s in dom.xpath('//course[@name=$name]/chapter[@name=$chname]/section',
+ name=name, chname=c.get('name')):
+
+ format = s.get("subtitle") if s.get("subtitle") else s.get("format") or ""
+ active = (c.get("name") == active_chapter and
+ s.get("name") == active_section)
+
+ sections.append({'name': s.get("name") or "",
+ 'format': format,
+ 'due': s.get("due") or "",
+ 'active': active})
+
+ ch.append({'name': c.get("name"),
+ 'sections': sections,
+ 'active': c.get("name") == active_chapter})
+ return ch
+
+
+def replace_custom_tags_dir(tree, dir):
+ '''
+ Process tree to replace all custom tags defined in dir.
+ '''
+ tags = os.listdir(dir)
+ for tag in tags:
+ for element in tree.iter(tag):
+ element.tag = 'customtag'
+ impl = etree.SubElement(element, 'impl')
+ impl.text = tag
+
+def parse_course_file(filename, options, namespace):
+ '''
+ Parse a course file with the given options, and return the resulting
+ xml tree object.
+
+ Options should be a dictionary including keys
+ 'dev_content': bool,
+ 'groups' : [list, of, user, groups]
+
+ namespace is used to in searching for the file. Could be e.g. 'course',
+ 'sections'.
+ '''
+ xml = etree.XML(render_to_string(filename, options, namespace=namespace))
+ return course_xml_process(xml)
+
+
+def get_section(section, options, dirname):
+ '''
+ Given the name of a section, an options dict containing keys
+ 'dev_content' and 'groups', and a directory to look in,
+ returns the xml tree for the section, or None if there's no
+ such section.
+ '''
+ filename = section + ".xml"
+
+ if filename not in os.listdir(dirname):
+ log.error(filename + " not in " + str(os.listdir(dirname)))
+ return None
+
+ tree = parse_course_file(filename, options, namespace='sections')
+ return tree
+
+
+def get_module(tree, module, id_tag, module_id, sections_dirname, options):
+ '''
+ Given the xml tree of the course, get the xml string for a module
+ with the specified module type, id_tag, module_id. Looks in
+ sections_dirname for sections.
+
+ id_tag -- use id_tag if the place the module stores its id is not 'id'
+ '''
+ # Sanitize input
+ if not module.isalnum():
+ raise Exception("Module is not alphanumeric")
+
+ if not module_id.isalnum():
+ raise Exception("Module ID is not alphanumeric")
+
+ # Generate search
+ xpath_search='//{module}[(@{id_tag} = "{id}") or (@id = "{id}")]'.format(
+ module=module,
+ id_tag=id_tag,
+ id=module_id)
+
+
+ result_set = tree.xpath(xpath_search)
+ if len(result_set) < 1:
+ # Not found in main tree. Let's look in the section files.
+ section_list = (s[:-4] for s in os.listdir(sections_dirname) if s[-4:]=='.xml')
+ for section in section_list:
+ try:
+ s = get_section(section, options, sections_dirname)
+ except etree.XMLSyntaxError:
+ ex = sys.exc_info()
+ raise ContentException("Malformed XML in " + section +
+ "(" + str(ex[1].msg) + ")")
+ result_set = s.xpath(xpath_search)
+ if len(result_set) != 0:
+ break
+
+ if len(result_set) > 1:
+ log.error("WARNING: Potentially malformed course file", module, module_id)
+
+ if len(result_set)==0:
+ log.error('[content_parser.get_module] cannot find %s in course.xml tree',
+ xpath_search)
+ log.error('tree = %s' % etree.tostring(tree, pretty_print=True))
+ return None
+
+ # log.debug('[courseware.content_parser.module_xml] found %s' % result_set)
+
+ return etree.tostring(result_set[0])
+
+
+
+
+
+
+# ==== All Django-specific code below =============================================
+
def user_groups(user):
if not user.is_authenticated():
return []
@@ -135,154 +272,94 @@ def user_groups(user):
return group_names
- # return [u.name for u in UserTestGroup.objects.raw("select * from auth_user, student_usertestgroup, student_usertestgroup_users where auth_user.id = student_usertestgroup_users.user_id and student_usertestgroup_users.usertestgroup_id = student_usertestgroup.id and auth_user.id = %s", [user.id])]
+
+def get_options(user):
+ return {'dev_content': settings.DEV_CONTENT,
+ 'groups': user_groups(user)}
+
def replace_custom_tags(tree):
- tags = os.listdir(settings.DATA_DIR+'/custom_tags')
- for tag in tags:
- for element in tree.iter(tag):
- element.tag = 'customtag'
- impl = etree.SubElement(element, 'impl')
- impl.text = tag
+ '''Replace custom tags defined in our custom_tags dir'''
+ replace_custom_tags_dir(tree, settings.DATA_DIR+'/custom_tags')
-def course_xml_process(tree):
- ''' Do basic pre-processing of an XML tree. Assign IDs to all
- items without. Propagate due dates, grace periods, etc. to child
- items.
+
+def course_file(user, coursename=None):
+ ''' Given a user, return an xml tree object for the course file.
+
+ Handles getting the right file, and processing it depending on the
+ groups the user is in. Does caching of the xml strings.
'''
- replace_custom_tags(tree)
- id_tag(tree)
- propogate_downward_tag(tree, "due")
- propogate_downward_tag(tree, "graded")
- propogate_downward_tag(tree, "graceperiod")
- propogate_downward_tag(tree, "showanswer")
- propogate_downward_tag(tree, "rerandomize")
- return tree
-
-def course_file(user,coursename=None):
- ''' Given a user, return course.xml'''
if user.is_authenticated():
- filename = UserProfile.objects.get(user=user).courseware # user.profile_cache.courseware
+ # use user.profile_cache.courseware?
+ filename = UserProfile.objects.get(user=user).courseware
else:
filename = 'guest_course.xml'
- # if a specific course is specified, then use multicourse to get the right path to the course XML directory
+ # if a specific course is specified, then use multicourse to get
+ # the right path to the course XML directory
if coursename and settings.ENABLE_MULTICOURSE:
xp = multicourse_settings.get_course_xmlpath(coursename)
filename = xp + filename # prefix the filename with the path
groups = user_groups(user)
- options = {'dev_content':settings.DEV_CONTENT,
- 'groups' : groups}
+ options = get_options(user)
+ # Try the cache...
+ cache_key = "{0}_processed?dev_content:{1}&groups:{2}".format(
+ filename,
+ options['dev_content'],
+ sorted(groups))
- cache_key = filename + "_processed?dev_content:" + str(options['dev_content']) + "&groups:" + str(sorted(groups))
- if "dev" not in settings.DEFAULT_GROUPS:
- tree_string = cache.get(cache_key)
- else:
+ if "dev" in settings.DEFAULT_GROUPS:
tree_string = None
+ else:
+ tree_string = cache.get(cache_key)
- if settings.DEBUG:
- log.info('[courseware.content_parser.course_file] filename=%s, cache_key=%s' % (filename,cache_key))
- # print '[courseware.content_parser.course_file] tree_string = ',tree_string
-
- if not tree_string:
- tree = course_xml_process(etree.XML(render_to_string(filename, options, namespace = 'course')))
- tree_string = etree.tostring(tree)
-
- cache.set(cache_key, tree_string, 60)
- else:
+ if tree_string:
tree = etree.XML(tree_string)
+ else:
+ tree = parse_course_file(filename, options, namespace='course')
+ # Cache it
+ tree_string = etree.tostring(tree)
+ cache.set(cache_key, tree_string, 60)
return tree
-def section_file(user, section, coursename=None, dironly=False):
+
+def sections_dir(coursename=None):
+ ''' Get directory where sections information is stored.
+ '''
+ # if a specific course is specified, then use multicourse to get the
+ # right path to the course XML directory
+ xp = ''
+ if coursename and settings.ENABLE_MULTICOURSE:
+ xp = multicourse_settings.get_course_xmlpath(coursename)
+
+ return settings.DATA_DIR + xp + '/sections/'
+
+
+
+def section_file(user, section, coursename=None):
'''
Given a user and the name of a section, return that section.
This is done specific to each course.
- If dironly=True then return the sections directory.
- TODO: This is a bit weird; dironly should be scrapped.
+
+ Returns the xml tree for the section, or None if there's no such section.
'''
- filename = section+".xml"
+ dirname = sections_dir(coursename)
- # if a specific course is specified, then use multicourse to get the right path to the course XML directory
- xp = ''
- if coursename and settings.ENABLE_MULTICOURSE: xp = multicourse_settings.get_course_xmlpath(coursename)
- dirname = settings.DATA_DIR + xp + '/sections/'
-
- if dironly: return dirname
-
- if filename not in os.listdir(dirname):
- log.error(filename+" not in "+str(os.listdir(dirname)))
- return None
-
- options = {'dev_content':settings.DEV_CONTENT,
- 'groups' : user_groups(user)}
-
- tree = course_xml_process(etree.XML(render_to_string(filename, options, namespace = 'sections')))
- return tree
+ return get_section(section, options, dirname)
def module_xml(user, module, id_tag, module_id, coursename=None):
''' Get XML for a module based on module and module_id. Assumes
- module occurs once in courseware XML file or hidden section. '''
- # Sanitize input
- if not module.isalnum():
- raise Exception("Module is not alphanumeric")
- if not module_id.isalnum():
- raise Exception("Module ID is not alphanumeric")
- # Generate search
- xpath_search='//{module}[(@{id_tag} = "{id}") or (@id = "{id}")]'.format(module=module,
- id_tag=id_tag,
- id=module_id)
- #result_set=doc.xpathEval(xpath_search)
- doc = course_file(user,coursename)
- sdirname = section_file(user,'',coursename,True) # get directory where sections information is stored
- section_list = (s[:-4] for s in os.listdir(sdirname) if s[-4:]=='.xml')
+ module occurs once in courseware XML file or hidden section.
+ '''
+ tree = course_file(user, coursename)
+ sdirname = sections_dir(coursename)
+ options = get_options(user)
- result_set=doc.xpath(xpath_search)
- if len(result_set)<1:
- for section in section_list:
- try:
- s = section_file(user, section, coursename)
- except etree.XMLSyntaxError:
- ex= sys.exc_info()
- raise ContentException("Malformed XML in " + section+ "("+str(ex[1].msg)+")")
- result_set = s.xpath(xpath_search)
- if len(result_set) != 0:
- break
-
- if len(result_set)>1:
- log.error("WARNING: Potentially malformed course file", module, module_id)
- if len(result_set)==0:
- if settings.DEBUG:
- log.error('[courseware.content_parser.module_xml] cannot find %s in course.xml tree' % xpath_search)
- log.error('tree = %s' % etree.tostring(doc,pretty_print=True))
- return None
- if settings.DEBUG:
- log.info('[courseware.content_parser.module_xml] found %s' % result_set)
- return etree.tostring(result_set[0])
- #return result_set[0].serialize()
-
-def toc_from_xml(dom, active_chapter, active_section):
- name = dom.xpath('//course/@name')[0]
-
- chapters = dom.xpath('//course[@name=$name]/chapter', name=name)
- ch=list()
- for c in chapters:
- if c.get('name') == 'hidden':
- continue
- sections=list()
- for s in dom.xpath('//course[@name=$name]/chapter[@name=$chname]/section', name=name, chname=c.get('name')):
- sections.append({'name':s.get("name") or "",
- 'format':s.get("subtitle") if s.get("subtitle") else s.get("format") or "",
- 'due':s.get("due") or "",
- 'active':(c.get("name")==active_chapter and \
- s.get("name")==active_section)})
- ch.append({'name':c.get("name"),
- 'sections':sections,
- 'active':(c.get("name")==active_chapter)})
- return ch
+ return get_module(tree, module, id_tag, module_id, sdirname, options)
diff --git a/lms/djangoapps/courseware/management/commands/check_course.py b/lms/djangoapps/courseware/management/commands/check_course.py
index 755cf089f4..8de29bdd8b 100644
--- a/lms/djangoapps/courseware/management/commands/check_course.py
+++ b/lms/djangoapps/courseware/management/commands/check_course.py
@@ -10,46 +10,98 @@ from courseware.content_parser import course_file
import courseware.module_render
import xmodule
+import mitxmako.middleware as middleware
+middleware.MakoMiddleware()
+
+def check_names(user, course):
+ '''
+ Complain if any problems have alphanumeric names.
+ TODO (vshnayder): there are some in 6.002x that don't. Is that actually a problem?
+ '''
+ all_ok = True
+ print "Confirming all problems have alphanumeric names"
+ for problem in course.xpath('//problem'):
+ filename = problem.get('filename')
+ if not filename.isalnum():
+ print "==============> Invalid (non-alphanumeric) filename", filename
+ all_ok = False
+ return all_ok
+
+def check_rendering(user, course):
+ '''Check that all modules render'''
+ all_ok = True
+ print "Confirming all modules render. Nothing should print during this step. "
+ for module in course.xpath('//problem|//html|//video|//vertical|//sequential|/tab'):
+ module_class = xmodule.modx_modules[module.tag]
+ # TODO: Abstract this out in render_module.py
+ try:
+ module_class(etree.tostring(module),
+ module.get('id'),
+ ajax_url='',
+ state=None,
+ track_function = lambda x,y,z:None,
+ render_function = lambda x: {'content':'','type':'video'})
+ except Exception as ex:
+ print "==============> Error in ", etree.tostring(module)
+ print ""
+ print ex
+ all_ok = False
+ print "Module render check finished"
+ return all_ok
+
+def check_sections(user, course):
+ all_ok = True
+ sections_dir = settings.DATA_DIR + "/sections"
+ print "Checking that all sections exist and parse properly"
+ if os.path.exists(sections_dir):
+ print "Checking all section includes are valid XML"
+ for f in os.listdir(sections_dir):
+ sectionfile = sections_dir + '/' + f
+ #print sectionfile
+ # skip non-xml files:
+ if not sectionfile.endswith('xml'):
+ continue
+ try:
+ etree.parse(sectionfile)
+ except Exception as ex:
+ print "================> Error parsing ", sectionfile
+ print ex
+ all_ok = False
+ print "checked all sections"
+ else:
+ print "Skipping check of include files -- no section includes dir ("+sections_dir+")"
+ return all_ok
+
class Command(BaseCommand):
help = "Does basic validity tests on course.xml."
def handle(self, *args, **options):
- check = True
+ all_ok = True
+
+ # TODO (vshnayder): create dummy user objects. Anon, authenticated, staff.
+ # Check that everything works for each.
+ # The objects probably shouldn't be actual django users to avoid unneeded
+ # dependency on django.
+
+ # TODO: use args as list of files to check. Fix loading to work for other files.
+
sample_user = User.objects.all()[0]
+
+
print "Attempting to load courseware"
course = course_file(sample_user)
- print "Confirming all problems have alphanumeric names"
- for problem in course.xpath('//problem'):
- filename = problem.get('filename')
- if not filename.isalnum():
- print "==============> Invalid (non-alphanumeric) filename", filename
- check = False
- print "Confirming all modules render. Nothing should print during this step. "
- for module in course.xpath('//problem|//html|//video|//vertical|//sequential|/tab'):
- module_class = xmodule.modx_modules[module.tag]
- # TODO: Abstract this out in render_module.py
- try:
- module_class(etree.tostring(module),
- module.get('id'),
- ajax_url='',
- state=None,
- track_function = lambda x,y,z:None,
- render_function = lambda x: {'content':'','type':'video'})
- except:
- print "==============> Error in ", etree.tostring(module)
- check = False
- print "Module render check finished"
- sections_dir = settings.DATA_DIR+"sections"
- if os.path.exists(sections_dir):
- print "Checking all section includes are valid XML"
- for f in os.listdir(sections_dir):
- print f
- etree.parse(sections_dir+'/'+f)
- else:
- print "Skipping check of include files -- no section includes dir ("+sections_dir+")"
+
+ to_run = [check_names,
+ # TODO (vshnayder) : make check_rendering work (use module_render.py),
+ # turn it on
+ # check_rendering,
+ check_sections,
+ ]
+ for check in to_run:
+ all_ok = check(sample_user, course) and all_ok
+
# TODO: print "Checking course properly annotated with preprocess.py"
-
- if check:
+ if all_ok:
print 'Courseware passes all checks!'
else:
print "Courseware fails some checks"