From ee5dae2fa782957dde6f843bf75bc797fe7c5743 Mon Sep 17 00:00:00 2001 From: stv Date: Tue, 25 Nov 2014 22:49:36 -0500 Subject: [PATCH] Remove antiquated cleanup script This script was originally written to clean up XML scripts. In the more than two years since it was originally written, the only updates it has received have been to remove PEP8/PyLint/quality violations. It's hard to imagine that this is still useful to anyone, though I'm open to discussion. --- common/xml_cleanup.py | 115 ------------------------------------------ 1 file changed, 115 deletions(-) delete mode 100755 common/xml_cleanup.py diff --git a/common/xml_cleanup.py b/common/xml_cleanup.py deleted file mode 100755 index 050189a7a3..0000000000 --- a/common/xml_cleanup.py +++ /dev/null @@ -1,115 +0,0 @@ -#!/usr/bin/env python - -""" -Victor's xml cleanup script. A big pile of useful hacks. Do not use -without carefully reading the code and deciding that this is what you want. - -In particular, the remove-meta option is only intended to be used after pulling out a policy -using the metadata_to_json management command. -""" - -import os -import fnmatch -import re -import sys -from lxml import etree -from collections import defaultdict - -INVALID_CHARS = re.compile(r"[^\w.-]") - - -def clean(value): - """ - Return value, made into a form legal for locations - """ - return re.sub('_+', '_', INVALID_CHARS.sub('_', value)) - - -# category -> set of url_names for that category that we've already seen -used_names = defaultdict(set) - - -def clean_unique(category, name): - cleaned = clean(name) - if cleaned not in used_names[category]: - used_names[category].add(cleaned) - return cleaned - x = 1 - while cleaned + str(x) in used_names[category]: - x += 1 - - # Found one! - cleaned = cleaned + str(x) - used_names[category].add(cleaned) - return cleaned - - -def cleanup(filepath, remove_meta): - # Keys that are exported to the policy file, and so - # can be removed from the xml afterward - to_remove = ('format', 'display_name', - 'graceperiod', 'showanswer', 'rerandomize', - 'start', 'due', 'graded', 'hide_from_toc', - 'ispublic', 'xqa_key') - - try: - print "Cleaning {0}".format(filepath) - with open(filepath) as f: - parser = etree.XMLParser(remove_comments=False) - xml = etree.parse(filepath, parser=parser) - except: - print "Error parsing file {0}".format(filepath) - return - - for node in xml.iter(tag=etree.Element): - attrs = node.attrib - if 'url_name' in attrs: - used_names[node.tag].add(attrs['url_name']) - if 'name' in attrs: - # Replace name with an identical display_name, and a unique url_name - name = attrs['name'] - attrs['display_name'] = name - attrs['url_name'] = clean_unique(node.tag, name) - del attrs['name'] - - if 'url_name' in attrs and 'slug' in attrs: - print "WARNING: {0} has both slug and url_name".format(node) - - if ('url_name' in attrs and 'filename' in attrs and - len(attrs) == 2 and attrs['url_name'] == attrs['filename']): - # This is a pointer tag in disguise. Get rid of the filename. - print 'turning {0}.{1} into a pointer tag'.format(node.tag, attrs['url_name']) - del attrs['filename'] - - if remove_meta: - for attr in to_remove: - if attr in attrs: - del attrs[attr] - - with open(filepath, "w") as f: - f.write(etree.tostring(xml)) - - -def find_replace(directory, filePattern, remove_meta): - for path, dirs, files in os.walk(os.path.abspath(directory)): - for filename in fnmatch.filter(files, filePattern): - filepath = os.path.join(path, filename) - cleanup(filepath, remove_meta) - - -def main(args): - usage = "xml_cleanup [dir] [remove-meta]" - n = len(args) - if n < 1 or n > 2 or (n == 2 and args[1] != 'remove-meta'): - print usage - return - - remove_meta = False - if n == 2: - remove_meta = True - - find_replace(args[0], '*.xml', remove_meta) - - -if __name__ == '__main__': - main(sys.argv[1:])