diff --git a/common/xml_cleanup.py b/common/xml_cleanup.py deleted file mode 100755 index 050189a7a3..0000000000 --- a/common/xml_cleanup.py +++ /dev/null @@ -1,115 +0,0 @@ -#!/usr/bin/env python - -""" -Victor's xml cleanup script. A big pile of useful hacks. Do not use -without carefully reading the code and deciding that this is what you want. - -In particular, the remove-meta option is only intended to be used after pulling out a policy -using the metadata_to_json management command. -""" - -import os -import fnmatch -import re -import sys -from lxml import etree -from collections import defaultdict - -INVALID_CHARS = re.compile(r"[^\w.-]") - - -def clean(value): - """ - Return value, made into a form legal for locations - """ - return re.sub('_+', '_', INVALID_CHARS.sub('_', value)) - - -# category -> set of url_names for that category that we've already seen -used_names = defaultdict(set) - - -def clean_unique(category, name): - cleaned = clean(name) - if cleaned not in used_names[category]: - used_names[category].add(cleaned) - return cleaned - x = 1 - while cleaned + str(x) in used_names[category]: - x += 1 - - # Found one! - cleaned = cleaned + str(x) - used_names[category].add(cleaned) - return cleaned - - -def cleanup(filepath, remove_meta): - # Keys that are exported to the policy file, and so - # can be removed from the xml afterward - to_remove = ('format', 'display_name', - 'graceperiod', 'showanswer', 'rerandomize', - 'start', 'due', 'graded', 'hide_from_toc', - 'ispublic', 'xqa_key') - - try: - print "Cleaning {0}".format(filepath) - with open(filepath) as f: - parser = etree.XMLParser(remove_comments=False) - xml = etree.parse(filepath, parser=parser) - except: - print "Error parsing file {0}".format(filepath) - return - - for node in xml.iter(tag=etree.Element): - attrs = node.attrib - if 'url_name' in attrs: - used_names[node.tag].add(attrs['url_name']) - if 'name' in attrs: - # Replace name with an identical display_name, and a unique url_name - name = attrs['name'] - attrs['display_name'] = name - attrs['url_name'] = clean_unique(node.tag, name) - del attrs['name'] - - if 'url_name' in attrs and 'slug' in attrs: - print "WARNING: {0} has both slug and url_name".format(node) - - if ('url_name' in attrs and 'filename' in attrs and - len(attrs) == 2 and attrs['url_name'] == attrs['filename']): - # This is a pointer tag in disguise. Get rid of the filename. - print 'turning {0}.{1} into a pointer tag'.format(node.tag, attrs['url_name']) - del attrs['filename'] - - if remove_meta: - for attr in to_remove: - if attr in attrs: - del attrs[attr] - - with open(filepath, "w") as f: - f.write(etree.tostring(xml)) - - -def find_replace(directory, filePattern, remove_meta): - for path, dirs, files in os.walk(os.path.abspath(directory)): - for filename in fnmatch.filter(files, filePattern): - filepath = os.path.join(path, filename) - cleanup(filepath, remove_meta) - - -def main(args): - usage = "xml_cleanup [dir] [remove-meta]" - n = len(args) - if n < 1 or n > 2 or (n == 2 and args[1] != 'remove-meta'): - print usage - return - - remove_meta = False - if n == 2: - remove_meta = True - - find_replace(args[0], '*.xml', remove_meta) - - -if __name__ == '__main__': - main(sys.argv[1:])