Remove antiquated cleanup script
This script was originally written to clean up XML scripts. In the more than two years since it was originally written, the only updates it has received have been to remove PEP8/PyLint/quality violations. It's hard to imagine that this is still useful to anyone, though I'm open to discussion.
This commit is contained in:
@@ -1,115 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
"""
|
||||
Victor's xml cleanup script. A big pile of useful hacks. Do not use
|
||||
without carefully reading the code and deciding that this is what you want.
|
||||
|
||||
In particular, the remove-meta option is only intended to be used after pulling out a policy
|
||||
using the metadata_to_json management command.
|
||||
"""
|
||||
|
||||
import os
|
||||
import fnmatch
|
||||
import re
|
||||
import sys
|
||||
from lxml import etree
|
||||
from collections import defaultdict
|
||||
|
||||
INVALID_CHARS = re.compile(r"[^\w.-]")
|
||||
|
||||
|
||||
def clean(value):
|
||||
"""
|
||||
Return value, made into a form legal for locations
|
||||
"""
|
||||
return re.sub('_+', '_', INVALID_CHARS.sub('_', value))
|
||||
|
||||
|
||||
# category -> set of url_names for that category that we've already seen
|
||||
used_names = defaultdict(set)
|
||||
|
||||
|
||||
def clean_unique(category, name):
|
||||
cleaned = clean(name)
|
||||
if cleaned not in used_names[category]:
|
||||
used_names[category].add(cleaned)
|
||||
return cleaned
|
||||
x = 1
|
||||
while cleaned + str(x) in used_names[category]:
|
||||
x += 1
|
||||
|
||||
# Found one!
|
||||
cleaned = cleaned + str(x)
|
||||
used_names[category].add(cleaned)
|
||||
return cleaned
|
||||
|
||||
|
||||
def cleanup(filepath, remove_meta):
|
||||
# Keys that are exported to the policy file, and so
|
||||
# can be removed from the xml afterward
|
||||
to_remove = ('format', 'display_name',
|
||||
'graceperiod', 'showanswer', 'rerandomize',
|
||||
'start', 'due', 'graded', 'hide_from_toc',
|
||||
'ispublic', 'xqa_key')
|
||||
|
||||
try:
|
||||
print "Cleaning {0}".format(filepath)
|
||||
with open(filepath) as f:
|
||||
parser = etree.XMLParser(remove_comments=False)
|
||||
xml = etree.parse(filepath, parser=parser)
|
||||
except:
|
||||
print "Error parsing file {0}".format(filepath)
|
||||
return
|
||||
|
||||
for node in xml.iter(tag=etree.Element):
|
||||
attrs = node.attrib
|
||||
if 'url_name' in attrs:
|
||||
used_names[node.tag].add(attrs['url_name'])
|
||||
if 'name' in attrs:
|
||||
# Replace name with an identical display_name, and a unique url_name
|
||||
name = attrs['name']
|
||||
attrs['display_name'] = name
|
||||
attrs['url_name'] = clean_unique(node.tag, name)
|
||||
del attrs['name']
|
||||
|
||||
if 'url_name' in attrs and 'slug' in attrs:
|
||||
print "WARNING: {0} has both slug and url_name".format(node)
|
||||
|
||||
if ('url_name' in attrs and 'filename' in attrs and
|
||||
len(attrs) == 2 and attrs['url_name'] == attrs['filename']):
|
||||
# This is a pointer tag in disguise. Get rid of the filename.
|
||||
print 'turning {0}.{1} into a pointer tag'.format(node.tag, attrs['url_name'])
|
||||
del attrs['filename']
|
||||
|
||||
if remove_meta:
|
||||
for attr in to_remove:
|
||||
if attr in attrs:
|
||||
del attrs[attr]
|
||||
|
||||
with open(filepath, "w") as f:
|
||||
f.write(etree.tostring(xml))
|
||||
|
||||
|
||||
def find_replace(directory, filePattern, remove_meta):
|
||||
for path, dirs, files in os.walk(os.path.abspath(directory)):
|
||||
for filename in fnmatch.filter(files, filePattern):
|
||||
filepath = os.path.join(path, filename)
|
||||
cleanup(filepath, remove_meta)
|
||||
|
||||
|
||||
def main(args):
|
||||
usage = "xml_cleanup [dir] [remove-meta]"
|
||||
n = len(args)
|
||||
if n < 1 or n > 2 or (n == 2 and args[1] != 'remove-meta'):
|
||||
print usage
|
||||
return
|
||||
|
||||
remove_meta = False
|
||||
if n == 2:
|
||||
remove_meta = True
|
||||
|
||||
find_replace(args[0], '*.xml', remove_meta)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main(sys.argv[1:])
|
||||
Reference in New Issue
Block a user