try: from django.conf import settings from auth.models import UserProfile except: settings = None from lxml import etree import json import hashlib import logging ''' This file will eventually form an abstraction layer between the course XML file and the rest of the system. TODO: Shift everything from xml.dom.minidom to XPath (or XQuery) ''' log = logging.getLogger("mitx.courseware") def fasthash(string): m = hashlib.new("md4") m.update(string) return "id"+m.hexdigest() def xpath(xml, query_string, **args): ''' Safe xpath query into an xml tree: * xml is the tree. * query_string is the query * args are the parameters. Substitute for {params}. We should remove this with the move to lxml. We should also use lxml argument passing. ''' doc = etree.fromstring(xml) print type(doc) def escape(x): # TODO: This should escape the string. For now, we just assume it's made of valid characters. # Couldn't figure out how to escape for lxml in a few quick Googles valid_chars="".join(map(chr, range(ord('a'),ord('z')+1)+range(ord('A'),ord('Z')+1)+range(ord('0'), ord('9')+1)))+"_ " for e in x: if e not in valid_chars: raise Exception("Invalid char in xpath expression. TODO: Escape") return x args=dict( ((k, escape(args[k])) for k in args) ) print args results = doc.xpath(query_string.format(**args)) return results def xpath_remove(tree, path): ''' Remove all items matching path from lxml tree. Works in place.''' items = tree.xpath(path) for item in items: item.getparent().remove(item) return tree if __name__=='__main__': print xpath('', '/{search}/problem[@name="{name}"]', search='html', name="Bob") def item(l, default="", process=lambda x:x): if len(l)==0: return default elif len(l)==1: return process(l[0]) else: raise Exception('Malformed XML') def id_tag(course): ''' Tag all course elements with unique IDs ''' default_ids = {'video':'youtube', 'problem':'filename', 'sequential':'id', 'html':'filename', 'vertical':'id', 'tab':'id', 'schematic':'id'} # Tag elements with unique IDs elements = course.xpath("|".join(['//'+c for c in default_ids])) for elem in elements: if elem.get('id'): pass elif elem.get(default_ids[elem.tag]): new_id = elem.get(default_ids[elem.tag]) # Convert to alphanumeric new_id = "".join([a for a in new_id if a.isalnum()]) elem.set('id', new_id) else: elem.set('id', fasthash(etree.tostring(elem))) def due_tag(course): # The primary purpose of this tagging is to make sure that each problem # inherits the due date from the section that it is in. We also make # sure that each section has a due date. If it does not, it inherits # the last section's due date. This is to make sure that the sections # are in chronological order. It is an exception to have a later section # due before an earlier one. # How are due dates handled for different time zones? What _time_ are things due? # First, we grab the first due date to occur. This is our starting date. firstSectionDue = course.xpath("//section[@due]")[0] # I tried adding [1] to the end of the query string to select the first, # but it didn't work. Is this not supported in etree? # All new dates must be further than currentDate currentDate = firstSectionDue.get('due') sections = course.xpath("//section") for section in sections: existingDate = section.get('due') if existingDate: #TODO: Make sure existing date is further into the future than currentDate currentDate = existingDate else: section.set('due', currentDate) problems=course.xpath('//section[@name=$section]//problem', section=section.get('name')) for problem in problems: problem.set('due', currentDate) def course_file(user): # TODO: Cache. tree = etree.parse(settings.DATA_DIR+UserProfile.objects.get(user=user).courseware) id_tag(tree) due_tag(tree) return tree def module_xml(coursefile, module, id_tag, module_id): ''' Get XML for a module based on module and module_id. Assumes module occurs once in courseware XML file.. ''' doc = coursefile # Sanitize input if not module.isalnum(): raise Exception("Module is not alphanumeric") if not module_id.isalnum(): raise Exception("Module ID is not alphanumeric") xpath_search='//*/{module}[(@{id_tag} = "{id}") or (@id = "{id}")]'.format(module=module, id_tag=id_tag, id=module_id) #result_set=doc.xpathEval(xpath_search) result_set=doc.xpath(xpath_search) if len(result_set)>1: print "WARNING: Potentially malformed course file", module, module_id if len(result_set)==0: return None return etree.tostring(result_set[0]) #return result_set[0].serialize() def toc_from_xml(dom, active_chapter, active_section): name = dom.xpath('//course/@name')[0] chapters = dom.xpath('//course[@name=$name]/chapter', name=name) ch=list() for c in chapters: if c.get('name') == 'hidden': continue sections=list() for s in dom.xpath('//course[@name=$name]/chapter[@name=$chname]/section', name=name, chname=c.get('name')): sections.append({'name':s.get("name") or "", 'time':s.get("time") or "", 'format':s.get("format") or "", 'due':s.get("due") or "", 'active':(c.get("name")==active_chapter and \ s.get("name")==active_section)}) ch.append({'name':c.get("name"), 'sections':sections, 'active':(c.get("name")==active_chapter)}) return ch