diff --git a/README b/README index 6860772c35..a76ea66690 100644 --- a/README +++ b/README @@ -1,2 +1 @@ -This branch (re-)adds dynamic math and symbolicresponse. -Test cases included. +see doc/ for documentation. diff --git a/common/lib/mitxmako/shortcuts.py b/common/lib/mitxmako/shortcuts.py index 9f6044b81e..72a9b81e3c 100644 --- a/common/lib/mitxmako/shortcuts.py +++ b/common/lib/mitxmako/shortcuts.py @@ -12,13 +12,16 @@ # See the License for the specific language governing permissions and # limitations under the License. +import logging + +log = logging.getLogger("mitx.common.lib.mitxmako") + from django.template import Context from django.http import HttpResponse from . import middleware from django.conf import settings - def render_to_string(template_name, dictionary, context=None, namespace='main'): context_instance = Context(dictionary) # add dictionary to context_instance @@ -27,8 +30,11 @@ def render_to_string(template_name, dictionary, context=None, namespace='main'): context_dictionary = {} context_instance['settings'] = settings context_instance['MITX_ROOT_URL'] = settings.MITX_ROOT_URL - for d in middleware.requestcontext: - context_dictionary.update(d) + + # In various testing contexts, there might not be a current request context. + if middleware.requestcontext is not None: + for d in middleware.requestcontext: + context_dictionary.update(d) for d in context_instance: context_dictionary.update(d) if context: diff --git a/common/test/README.md b/common/test/README.md new file mode 100644 index 0000000000..07c49f7d92 --- /dev/null +++ b/common/test/README.md @@ -0,0 +1,6 @@ +Common test infrastructure for LMS + CMS +=========================== + +data/ has some test course data. + +Once the course validation is separated from django, we should have scripts here that checks that a course consists only of xml that we understand. diff --git a/common/test/data/full/README.md b/common/test/data/full/README.md new file mode 100644 index 0000000000..812ca471ce --- /dev/null +++ b/common/test/data/full/README.md @@ -0,0 +1 @@ +This is a realistic course, with many different module types and a lot of structure. It is based on 6.002x. diff --git a/common/test/data/simple/README.md b/common/test/data/simple/README.md new file mode 100644 index 0000000000..69ff6b4ed0 --- /dev/null +++ b/common/test/data/simple/README.md @@ -0,0 +1,2 @@ +This is a simple, but non-trivial, course using multiple module types and some nested structure. + diff --git a/common/test/data/toy/README.md b/common/test/data/toy/README.md new file mode 100644 index 0000000000..59ab392ed3 --- /dev/null +++ b/common/test/data/toy/README.md @@ -0,0 +1 @@ +This is a very very simple course, useful for initial debugging of processing code. diff --git a/common/test/data/toy/toy_course.xml b/common/test/data/toy/toy_course.xml new file mode 100644 index 0000000000..ecac9a4776 --- /dev/null +++ b/common/test/data/toy/toy_course.xml @@ -0,0 +1,11 @@ + + +
+
+
+ +
+
+
diff --git a/lms/djangoapps/courseware/content_parser.py b/lms/djangoapps/courseware/content_parser.py index 4f96db5284..62163c86b0 100644 --- a/lms/djangoapps/courseware/content_parser.py +++ b/lms/djangoapps/courseware/content_parser.py @@ -26,10 +26,13 @@ import xmodule ''' This file will eventually form an abstraction layer between the course XML file and the rest of the system. - -TODO: Shift everything from xml.dom.minidom to XPath (or XQuery) ''' +# ==== This section has no direct dependencies on django ==================================== +# NOTE: it does still have some indirect dependencies: +# util.memcache.fasthash (which does not depend on memcache at all) +# + class ContentException(Exception): pass @@ -38,29 +41,6 @@ log = logging.getLogger("mitx.courseware") def format_url_params(params): return [ urllib.quote(string.replace(' ','_')) for string in params ] -def xpath(xml, query_string, **args): - ''' Safe xpath query into an xml tree: - * xml is the tree. - * query_string is the query - * args are the parameters. Substitute for {params}. - We should remove this with the move to lxml. - We should also use lxml argument passing. ''' - doc = etree.fromstring(xml) - #print type(doc) - def escape(x): - # TODO: This should escape the string. For now, we just assume it's made of valid characters. - # Couldn't figure out how to escape for lxml in a few quick Googles - valid_chars="".join(map(chr, range(ord('a'),ord('z')+1)+range(ord('A'),ord('Z')+1)+range(ord('0'), ord('9')+1)))+"_ " - for e in x: - if e not in valid_chars: - raise Exception("Invalid char in xpath expression. TODO: Escape") - return x - - args=dict( ((k, escape(args[k])) for k in args) ) - #print args - results = doc.xpath(query_string.format(**args)) - return results - def xpath_remove(tree, path): ''' Remove all items matching path from lxml tree. Works in place.''' @@ -69,35 +49,34 @@ def xpath_remove(tree, path): item.getparent().remove(item) return tree -if __name__=='__main__': - print xpath('', '/{search}/problem[@name="{name}"]', - search='html', name="Bob") - def id_tag(course): ''' Tag all course elements with unique IDs ''' default_ids = xmodule.get_default_ids() # Tag elements with unique IDs - elements = course.xpath("|".join(['//'+c for c in default_ids])) + elements = course.xpath("|".join('//' + c for c in default_ids)) for elem in elements: if elem.get('id'): pass elif elem.get(default_ids[elem.tag]): - new_id = elem.get(default_ids[elem.tag]) - new_id = "".join([a for a in new_id if a.isalnum()]) # Convert to alphanumeric - # Without this, a conflict may occur between an hmtl or youtube id + new_id = elem.get(default_ids[elem.tag]) + # Convert to alphanumeric + new_id = "".join(a for a in new_id if a.isalnum()) + + # Without this, a conflict may occur between an html or youtube id new_id = default_ids[elem.tag] + new_id elem.set('id', new_id) else: - elem.set('id', "id"+fasthash(etree.tostring(elem))) + elem.set('id', "id" + fasthash(etree.tostring(elem))) def propogate_downward_tag(element, attribute_name, parent_attribute = None): ''' This call is to pass down an attribute to all children. If an element has this attribute, it will be "inherited" by all of its children. If a child (A) already has that attribute, A will keep the same attribute and all of A's children will inherit A's attribute. This is a recursive call.''' - - if (parent_attribute is None): #This is the entry call. Select all elements with this attribute + + if (parent_attribute is None): + #This is the entry call. Select all elements with this attribute all_attributed_elements = element.xpath("//*[@" + attribute_name +"]") for attributed_element in all_attributed_elements: attribute_value = attributed_element.get(attribute_name) @@ -118,6 +97,164 @@ def propogate_downward_tag(element, attribute_name, parent_attribute = None): #to its children later. return + +def course_xml_process(tree): + ''' Do basic pre-processing of an XML tree. Assign IDs to all + items without. Propagate due dates, grace periods, etc. to child + items. + ''' + replace_custom_tags(tree) + id_tag(tree) + propogate_downward_tag(tree, "due") + propogate_downward_tag(tree, "graded") + propogate_downward_tag(tree, "graceperiod") + propogate_downward_tag(tree, "showanswer") + propogate_downward_tag(tree, "rerandomize") + return tree + + +def toc_from_xml(dom, active_chapter, active_section): + ''' + Create a table of contents from the course xml. + + Return format: + [ {'name': name, 'sections': SECTIONS, 'active': bool}, ... ] + + where SECTIONS is a list + [ {'name': name, 'format': format, 'due': due, 'active' : bool}, ...] + + active is set for the section and chapter corresponding to the passed + parameters. Everything else comes from the xml, or defaults to "". + + chapters with name 'hidden' are skipped. + ''' + name = dom.xpath('//course/@name')[0] + + chapters = dom.xpath('//course[@name=$name]/chapter', name=name) + ch = list() + for c in chapters: + if c.get('name') == 'hidden': + continue + sections = list() + for s in dom.xpath('//course[@name=$name]/chapter[@name=$chname]/section', + name=name, chname=c.get('name')): + + format = s.get("subtitle") if s.get("subtitle") else s.get("format") or "" + active = (c.get("name") == active_chapter and + s.get("name") == active_section) + + sections.append({'name': s.get("name") or "", + 'format': format, + 'due': s.get("due") or "", + 'active': active}) + + ch.append({'name': c.get("name"), + 'sections': sections, + 'active': c.get("name") == active_chapter}) + return ch + + +def replace_custom_tags_dir(tree, dir): + ''' + Process tree to replace all custom tags defined in dir. + ''' + tags = os.listdir(dir) + for tag in tags: + for element in tree.iter(tag): + element.tag = 'customtag' + impl = etree.SubElement(element, 'impl') + impl.text = tag + +def parse_course_file(filename, options, namespace): + ''' + Parse a course file with the given options, and return the resulting + xml tree object. + + Options should be a dictionary including keys + 'dev_content': bool, + 'groups' : [list, of, user, groups] + + namespace is used to in searching for the file. Could be e.g. 'course', + 'sections'. + ''' + xml = etree.XML(render_to_string(filename, options, namespace=namespace)) + return course_xml_process(xml) + + +def get_section(section, options, dirname): + ''' + Given the name of a section, an options dict containing keys + 'dev_content' and 'groups', and a directory to look in, + returns the xml tree for the section, or None if there's no + such section. + ''' + filename = section + ".xml" + + if filename not in os.listdir(dirname): + log.error(filename + " not in " + str(os.listdir(dirname))) + return None + + tree = parse_course_file(filename, options, namespace='sections') + return tree + + +def get_module(tree, module, id_tag, module_id, sections_dirname, options): + ''' + Given the xml tree of the course, get the xml string for a module + with the specified module type, id_tag, module_id. Looks in + sections_dirname for sections. + + id_tag -- use id_tag if the place the module stores its id is not 'id' + ''' + # Sanitize input + if not module.isalnum(): + raise Exception("Module is not alphanumeric") + + if not module_id.isalnum(): + raise Exception("Module ID is not alphanumeric") + + # Generate search + xpath_search='//{module}[(@{id_tag} = "{id}") or (@id = "{id}")]'.format( + module=module, + id_tag=id_tag, + id=module_id) + + + result_set = tree.xpath(xpath_search) + if len(result_set) < 1: + # Not found in main tree. Let's look in the section files. + section_list = (s[:-4] for s in os.listdir(sections_dirname) if s[-4:]=='.xml') + for section in section_list: + try: + s = get_section(section, options, sections_dirname) + except etree.XMLSyntaxError: + ex = sys.exc_info() + raise ContentException("Malformed XML in " + section + + "(" + str(ex[1].msg) + ")") + result_set = s.xpath(xpath_search) + if len(result_set) != 0: + break + + if len(result_set) > 1: + log.error("WARNING: Potentially malformed course file", module, module_id) + + if len(result_set)==0: + log.error('[content_parser.get_module] cannot find %s in course.xml tree', + xpath_search) + log.error('tree = %s' % etree.tostring(tree, pretty_print=True)) + return None + + # log.debug('[courseware.content_parser.module_xml] found %s' % result_set) + + return etree.tostring(result_set[0]) + + + + + + +# ==== All Django-specific code below ============================================= + def user_groups(user): if not user.is_authenticated(): return [] @@ -135,154 +272,94 @@ def user_groups(user): return group_names - # return [u.name for u in UserTestGroup.objects.raw("select * from auth_user, student_usertestgroup, student_usertestgroup_users where auth_user.id = student_usertestgroup_users.user_id and student_usertestgroup_users.usertestgroup_id = student_usertestgroup.id and auth_user.id = %s", [user.id])] + +def get_options(user): + return {'dev_content': settings.DEV_CONTENT, + 'groups': user_groups(user)} + def replace_custom_tags(tree): - tags = os.listdir(settings.DATA_DIR+'/custom_tags') - for tag in tags: - for element in tree.iter(tag): - element.tag = 'customtag' - impl = etree.SubElement(element, 'impl') - impl.text = tag + '''Replace custom tags defined in our custom_tags dir''' + replace_custom_tags_dir(tree, settings.DATA_DIR+'/custom_tags') -def course_xml_process(tree): - ''' Do basic pre-processing of an XML tree. Assign IDs to all - items without. Propagate due dates, grace periods, etc. to child - items. + +def course_file(user, coursename=None): + ''' Given a user, return an xml tree object for the course file. + + Handles getting the right file, and processing it depending on the + groups the user is in. Does caching of the xml strings. ''' - replace_custom_tags(tree) - id_tag(tree) - propogate_downward_tag(tree, "due") - propogate_downward_tag(tree, "graded") - propogate_downward_tag(tree, "graceperiod") - propogate_downward_tag(tree, "showanswer") - propogate_downward_tag(tree, "rerandomize") - return tree - -def course_file(user,coursename=None): - ''' Given a user, return course.xml''' if user.is_authenticated(): - filename = UserProfile.objects.get(user=user).courseware # user.profile_cache.courseware + # use user.profile_cache.courseware? + filename = UserProfile.objects.get(user=user).courseware else: filename = 'guest_course.xml' - # if a specific course is specified, then use multicourse to get the right path to the course XML directory + # if a specific course is specified, then use multicourse to get + # the right path to the course XML directory if coursename and settings.ENABLE_MULTICOURSE: xp = multicourse_settings.get_course_xmlpath(coursename) filename = xp + filename # prefix the filename with the path groups = user_groups(user) - options = {'dev_content':settings.DEV_CONTENT, - 'groups' : groups} + options = get_options(user) + # Try the cache... + cache_key = "{0}_processed?dev_content:{1}&groups:{2}".format( + filename, + options['dev_content'], + sorted(groups)) - cache_key = filename + "_processed?dev_content:" + str(options['dev_content']) + "&groups:" + str(sorted(groups)) - if "dev" not in settings.DEFAULT_GROUPS: - tree_string = cache.get(cache_key) - else: + if "dev" in settings.DEFAULT_GROUPS: tree_string = None + else: + tree_string = cache.get(cache_key) - if settings.DEBUG: - log.info('[courseware.content_parser.course_file] filename=%s, cache_key=%s' % (filename,cache_key)) - # print '[courseware.content_parser.course_file] tree_string = ',tree_string - - if not tree_string: - tree = course_xml_process(etree.XML(render_to_string(filename, options, namespace = 'course'))) - tree_string = etree.tostring(tree) - - cache.set(cache_key, tree_string, 60) - else: + if tree_string: tree = etree.XML(tree_string) + else: + tree = parse_course_file(filename, options, namespace='course') + # Cache it + tree_string = etree.tostring(tree) + cache.set(cache_key, tree_string, 60) return tree -def section_file(user, section, coursename=None, dironly=False): + +def sections_dir(coursename=None): + ''' Get directory where sections information is stored. + ''' + # if a specific course is specified, then use multicourse to get the + # right path to the course XML directory + xp = '' + if coursename and settings.ENABLE_MULTICOURSE: + xp = multicourse_settings.get_course_xmlpath(coursename) + + return settings.DATA_DIR + xp + '/sections/' + + + +def section_file(user, section, coursename=None): ''' Given a user and the name of a section, return that section. This is done specific to each course. - If dironly=True then return the sections directory. - TODO: This is a bit weird; dironly should be scrapped. + + Returns the xml tree for the section, or None if there's no such section. ''' - filename = section+".xml" + dirname = sections_dir(coursename) - # if a specific course is specified, then use multicourse to get the right path to the course XML directory - xp = '' - if coursename and settings.ENABLE_MULTICOURSE: xp = multicourse_settings.get_course_xmlpath(coursename) - dirname = settings.DATA_DIR + xp + '/sections/' - - if dironly: return dirname - - if filename not in os.listdir(dirname): - log.error(filename+" not in "+str(os.listdir(dirname))) - return None - - options = {'dev_content':settings.DEV_CONTENT, - 'groups' : user_groups(user)} - - tree = course_xml_process(etree.XML(render_to_string(filename, options, namespace = 'sections'))) - return tree + return get_section(section, options, dirname) def module_xml(user, module, id_tag, module_id, coursename=None): ''' Get XML for a module based on module and module_id. Assumes - module occurs once in courseware XML file or hidden section. ''' - # Sanitize input - if not module.isalnum(): - raise Exception("Module is not alphanumeric") - if not module_id.isalnum(): - raise Exception("Module ID is not alphanumeric") - # Generate search - xpath_search='//{module}[(@{id_tag} = "{id}") or (@id = "{id}")]'.format(module=module, - id_tag=id_tag, - id=module_id) - #result_set=doc.xpathEval(xpath_search) - doc = course_file(user,coursename) - sdirname = section_file(user,'',coursename,True) # get directory where sections information is stored - section_list = (s[:-4] for s in os.listdir(sdirname) if s[-4:]=='.xml') + module occurs once in courseware XML file or hidden section. + ''' + tree = course_file(user, coursename) + sdirname = sections_dir(coursename) + options = get_options(user) - result_set=doc.xpath(xpath_search) - if len(result_set)<1: - for section in section_list: - try: - s = section_file(user, section, coursename) - except etree.XMLSyntaxError: - ex= sys.exc_info() - raise ContentException("Malformed XML in " + section+ "("+str(ex[1].msg)+")") - result_set = s.xpath(xpath_search) - if len(result_set) != 0: - break - - if len(result_set)>1: - log.error("WARNING: Potentially malformed course file", module, module_id) - if len(result_set)==0: - if settings.DEBUG: - log.error('[courseware.content_parser.module_xml] cannot find %s in course.xml tree' % xpath_search) - log.error('tree = %s' % etree.tostring(doc,pretty_print=True)) - return None - if settings.DEBUG: - log.info('[courseware.content_parser.module_xml] found %s' % result_set) - return etree.tostring(result_set[0]) - #return result_set[0].serialize() - -def toc_from_xml(dom, active_chapter, active_section): - name = dom.xpath('//course/@name')[0] - - chapters = dom.xpath('//course[@name=$name]/chapter', name=name) - ch=list() - for c in chapters: - if c.get('name') == 'hidden': - continue - sections=list() - for s in dom.xpath('//course[@name=$name]/chapter[@name=$chname]/section', name=name, chname=c.get('name')): - sections.append({'name':s.get("name") or "", - 'format':s.get("subtitle") if s.get("subtitle") else s.get("format") or "", - 'due':s.get("due") or "", - 'active':(c.get("name")==active_chapter and \ - s.get("name")==active_section)}) - ch.append({'name':c.get("name"), - 'sections':sections, - 'active':(c.get("name")==active_chapter)}) - return ch + return get_module(tree, module, id_tag, module_id, sdirname, options) diff --git a/lms/djangoapps/courseware/management/commands/check_course.py b/lms/djangoapps/courseware/management/commands/check_course.py index 755cf089f4..8de29bdd8b 100644 --- a/lms/djangoapps/courseware/management/commands/check_course.py +++ b/lms/djangoapps/courseware/management/commands/check_course.py @@ -10,46 +10,98 @@ from courseware.content_parser import course_file import courseware.module_render import xmodule +import mitxmako.middleware as middleware +middleware.MakoMiddleware() + +def check_names(user, course): + ''' + Complain if any problems have alphanumeric names. + TODO (vshnayder): there are some in 6.002x that don't. Is that actually a problem? + ''' + all_ok = True + print "Confirming all problems have alphanumeric names" + for problem in course.xpath('//problem'): + filename = problem.get('filename') + if not filename.isalnum(): + print "==============> Invalid (non-alphanumeric) filename", filename + all_ok = False + return all_ok + +def check_rendering(user, course): + '''Check that all modules render''' + all_ok = True + print "Confirming all modules render. Nothing should print during this step. " + for module in course.xpath('//problem|//html|//video|//vertical|//sequential|/tab'): + module_class = xmodule.modx_modules[module.tag] + # TODO: Abstract this out in render_module.py + try: + module_class(etree.tostring(module), + module.get('id'), + ajax_url='', + state=None, + track_function = lambda x,y,z:None, + render_function = lambda x: {'content':'','type':'video'}) + except Exception as ex: + print "==============> Error in ", etree.tostring(module) + print "" + print ex + all_ok = False + print "Module render check finished" + return all_ok + +def check_sections(user, course): + all_ok = True + sections_dir = settings.DATA_DIR + "/sections" + print "Checking that all sections exist and parse properly" + if os.path.exists(sections_dir): + print "Checking all section includes are valid XML" + for f in os.listdir(sections_dir): + sectionfile = sections_dir + '/' + f + #print sectionfile + # skip non-xml files: + if not sectionfile.endswith('xml'): + continue + try: + etree.parse(sectionfile) + except Exception as ex: + print "================> Error parsing ", sectionfile + print ex + all_ok = False + print "checked all sections" + else: + print "Skipping check of include files -- no section includes dir ("+sections_dir+")" + return all_ok + class Command(BaseCommand): help = "Does basic validity tests on course.xml." def handle(self, *args, **options): - check = True + all_ok = True + + # TODO (vshnayder): create dummy user objects. Anon, authenticated, staff. + # Check that everything works for each. + # The objects probably shouldn't be actual django users to avoid unneeded + # dependency on django. + + # TODO: use args as list of files to check. Fix loading to work for other files. + sample_user = User.objects.all()[0] + + print "Attempting to load courseware" course = course_file(sample_user) - print "Confirming all problems have alphanumeric names" - for problem in course.xpath('//problem'): - filename = problem.get('filename') - if not filename.isalnum(): - print "==============> Invalid (non-alphanumeric) filename", filename - check = False - print "Confirming all modules render. Nothing should print during this step. " - for module in course.xpath('//problem|//html|//video|//vertical|//sequential|/tab'): - module_class = xmodule.modx_modules[module.tag] - # TODO: Abstract this out in render_module.py - try: - module_class(etree.tostring(module), - module.get('id'), - ajax_url='', - state=None, - track_function = lambda x,y,z:None, - render_function = lambda x: {'content':'','type':'video'}) - except: - print "==============> Error in ", etree.tostring(module) - check = False - print "Module render check finished" - sections_dir = settings.DATA_DIR+"sections" - if os.path.exists(sections_dir): - print "Checking all section includes are valid XML" - for f in os.listdir(sections_dir): - print f - etree.parse(sections_dir+'/'+f) - else: - print "Skipping check of include files -- no section includes dir ("+sections_dir+")" + + to_run = [check_names, + # TODO (vshnayder) : make check_rendering work (use module_render.py), + # turn it on + # check_rendering, + check_sections, + ] + for check in to_run: + all_ok = check(sample_user, course) and all_ok + # TODO: print "Checking course properly annotated with preprocess.py" - - if check: + if all_ok: print 'Courseware passes all checks!' else: print "Courseware fails some checks"