Merge pull request #429 from MITx/feature/victor/strip-metadata
Feature/victor/strip metadata
This commit is contained in:
@@ -21,6 +21,7 @@ def process_includes(fn):
|
||||
xml_object = etree.fromstring(xml_data)
|
||||
next_include = xml_object.find('include')
|
||||
while next_include is not None:
|
||||
system.error_tracker("WARNING: the <include> tag is deprecated, and will go away.")
|
||||
file = next_include.get('file')
|
||||
parent = next_include.getparent()
|
||||
|
||||
@@ -67,6 +68,8 @@ class SemanticSectionDescriptor(XModuleDescriptor):
|
||||
the child element
|
||||
"""
|
||||
xml_object = etree.fromstring(xml_data)
|
||||
system.error_tracker("WARNING: the <{}> tag is deprecated. Please do not use in new content."
|
||||
.format(xml_object.tag))
|
||||
|
||||
if len(xml_object) == 1:
|
||||
for (key, val) in xml_object.items():
|
||||
@@ -74,7 +77,7 @@ class SemanticSectionDescriptor(XModuleDescriptor):
|
||||
|
||||
return system.process_xml(etree.tostring(xml_object[0]))
|
||||
else:
|
||||
xml_object.tag = 'sequence'
|
||||
xml_object.tag = 'sequential'
|
||||
return system.process_xml(etree.tostring(xml_object))
|
||||
|
||||
|
||||
@@ -83,10 +86,14 @@ class TranslateCustomTagDescriptor(XModuleDescriptor):
|
||||
def from_xml(cls, xml_data, system, org=None, course=None):
|
||||
"""
|
||||
Transforms the xml_data from <$custom_tag attr="" attr=""/> to
|
||||
<customtag attr="" attr=""><impl>$custom_tag</impl></customtag>
|
||||
<customtag attr="" attr="" impl="$custom_tag"/>
|
||||
"""
|
||||
|
||||
xml_object = etree.fromstring(xml_data)
|
||||
system.error_tracker('WARNING: the <{tag}> tag is deprecated. '
|
||||
'Instead, use <customtag impl="{tag}" attr1="..." attr2="..."/>. '
|
||||
.format(tag=xml_object.tag))
|
||||
|
||||
tag = xml_object.tag
|
||||
xml_object.tag = 'customtag'
|
||||
xml_object.attrib['impl'] = tag
|
||||
|
||||
@@ -237,7 +237,7 @@ class CapaModule(XModule):
|
||||
else:
|
||||
raise
|
||||
|
||||
content = {'name': self.metadata['display_name'],
|
||||
content = {'name': self.display_name,
|
||||
'html': html,
|
||||
'weight': self.weight,
|
||||
}
|
||||
@@ -467,7 +467,7 @@ class CapaModule(XModule):
|
||||
return {'success': msg}
|
||||
log.exception("Error in capa_module problem checking")
|
||||
raise Exception("error in capa_module")
|
||||
|
||||
|
||||
self.attempts = self.attempts + 1
|
||||
self.lcp.done = True
|
||||
|
||||
|
||||
@@ -140,7 +140,7 @@ class CourseDescriptor(SequenceDescriptor):
|
||||
|
||||
@property
|
||||
def title(self):
|
||||
return self.metadata['display_name']
|
||||
return self.display_name
|
||||
|
||||
@property
|
||||
def number(self):
|
||||
|
||||
@@ -112,8 +112,8 @@ class TestMongoModuleStore(object):
|
||||
should_work = (
|
||||
("i4x://edX/toy/video/Welcome",
|
||||
("edX/toy/2012_Fall", "Overview", "Welcome", None)),
|
||||
("i4x://edX/toy/html/toylab",
|
||||
("edX/toy/2012_Fall", "Overview", "Toy_Videos", None)),
|
||||
("i4x://edX/toy/chapter/Overview",
|
||||
("edX/toy/2012_Fall", "Overview", None, None)),
|
||||
)
|
||||
for location, expected in should_work:
|
||||
assert_equals(path_to_location(self.store, location), expected)
|
||||
|
||||
@@ -31,7 +31,8 @@ def clean_out_mako_templating(xml_string):
|
||||
return xml_string
|
||||
|
||||
class ImportSystem(XMLParsingSystem, MakoDescriptorSystem):
|
||||
def __init__(self, xmlstore, org, course, course_dir, error_tracker, **kwargs):
|
||||
def __init__(self, xmlstore, org, course, course_dir,
|
||||
policy, error_tracker, **kwargs):
|
||||
"""
|
||||
A class that handles loading from xml. Does some munging to ensure that
|
||||
all elements have unique slugs.
|
||||
@@ -97,7 +98,7 @@ class ImportSystem(XMLParsingSystem, MakoDescriptorSystem):
|
||||
MakoDescriptorSystem.__init__(self, load_item, resources_fs,
|
||||
error_tracker, render_template, **kwargs)
|
||||
XMLParsingSystem.__init__(self, load_item, resources_fs,
|
||||
error_tracker, process_xml, **kwargs)
|
||||
error_tracker, process_xml, policy, **kwargs)
|
||||
|
||||
|
||||
class XMLModuleStore(ModuleStoreBase):
|
||||
@@ -184,6 +185,7 @@ class XMLModuleStore(ModuleStoreBase):
|
||||
if not os.path.exists(policy_path):
|
||||
return {}
|
||||
try:
|
||||
log.debug("Loading policy from {}".format(policy_path))
|
||||
with open(policy_path) as f:
|
||||
return json.load(f)
|
||||
except (IOError, ValueError) as err:
|
||||
@@ -232,13 +234,16 @@ class XMLModuleStore(ModuleStoreBase):
|
||||
tracker(msg)
|
||||
course = course_dir
|
||||
|
||||
system = ImportSystem(self, org, course, course_dir, tracker)
|
||||
url_name = course_data.get('url_name')
|
||||
if url_name:
|
||||
policy_path = self.data_dir / course_dir / 'policies' / '{}.json'.format(url_name)
|
||||
policy = self.load_policy(policy_path, tracker)
|
||||
else:
|
||||
policy = {}
|
||||
|
||||
system = ImportSystem(self, org, course, course_dir, policy, tracker)
|
||||
|
||||
course_descriptor = system.process_xml(etree.tostring(course_data))
|
||||
policy_path = self.data_dir / course_dir / 'policy.json'
|
||||
|
||||
policy = self.load_policy(policy_path, tracker)
|
||||
XModuleDescriptor.apply_policy(course_descriptor, policy)
|
||||
|
||||
# NOTE: The descriptors end up loading somewhat bottom up, which
|
||||
# breaks metadata inheritance via get_children(). Instead
|
||||
|
||||
@@ -76,7 +76,7 @@ class SequenceModule(XModule):
|
||||
contents.append({
|
||||
'content': child.get_html(),
|
||||
'title': "\n".join(
|
||||
grand_child.metadata['display_name'].strip()
|
||||
grand_child.display_name.strip()
|
||||
for grand_child in child.get_children()
|
||||
if 'display_name' in grand_child.metadata
|
||||
),
|
||||
@@ -107,7 +107,7 @@ class SequenceModule(XModule):
|
||||
class SequenceDescriptor(MakoModuleDescriptor, XmlDescriptor):
|
||||
mako_template = 'widgets/sequence-edit.html'
|
||||
module_class = SequenceModule
|
||||
|
||||
|
||||
stores_state = True # For remembering where in the sequence the student is
|
||||
|
||||
@classmethod
|
||||
|
||||
@@ -42,9 +42,9 @@ class DummySystem(XMLParsingSystem):
|
||||
descriptor.get_children()
|
||||
return descriptor
|
||||
|
||||
|
||||
policy = {}
|
||||
XMLParsingSystem.__init__(self, load_item, self.resources_fs,
|
||||
self.errorlog.tracker, process_xml)
|
||||
self.errorlog.tracker, process_xml, policy)
|
||||
|
||||
def render_template(self, template, context):
|
||||
raise Exception("Shouldn't be called")
|
||||
|
||||
@@ -425,23 +425,6 @@ class XModuleDescriptor(Plugin, HTMLSnippet):
|
||||
if k not in self._inherited_metadata)
|
||||
|
||||
|
||||
@staticmethod
|
||||
def apply_policy(node, policy):
|
||||
"""
|
||||
Given a descriptor, traverse all its descendants and update its metadata
|
||||
with the policy.
|
||||
|
||||
Notes:
|
||||
- this does not propagate inherited metadata. The caller should
|
||||
call compute_inherited_metadata after applying the policy.
|
||||
- metadata specified in the policy overrides metadata in the xml
|
||||
"""
|
||||
k = policy_key(node.location)
|
||||
if k in policy:
|
||||
node.metadata.update(policy[k])
|
||||
for c in node.get_children():
|
||||
XModuleDescriptor.apply_policy(c, policy)
|
||||
|
||||
@staticmethod
|
||||
def compute_inherited_metadata(node):
|
||||
"""Given a descriptor, traverse all of its descendants and do metadata
|
||||
@@ -697,16 +680,19 @@ class DescriptorSystem(object):
|
||||
|
||||
|
||||
class XMLParsingSystem(DescriptorSystem):
|
||||
def __init__(self, load_item, resources_fs, error_tracker, process_xml, **kwargs):
|
||||
def __init__(self, load_item, resources_fs, error_tracker, process_xml, policy, **kwargs):
|
||||
"""
|
||||
load_item, resources_fs, error_tracker: see DescriptorSystem
|
||||
|
||||
policy: a policy dictionary for overriding xml metadata
|
||||
|
||||
process_xml: Takes an xml string, and returns a XModuleDescriptor
|
||||
created from that xml
|
||||
"""
|
||||
DescriptorSystem.__init__(self, load_item, resources_fs, error_tracker,
|
||||
**kwargs)
|
||||
self.process_xml = process_xml
|
||||
self.policy = policy
|
||||
|
||||
|
||||
class ModuleSystem(object):
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from xmodule.x_module import XModuleDescriptor
|
||||
from xmodule.x_module import (XModuleDescriptor, policy_key)
|
||||
from xmodule.modulestore import Location
|
||||
from lxml import etree
|
||||
import json
|
||||
@@ -270,6 +270,11 @@ class XmlDescriptor(XModuleDescriptor):
|
||||
log.debug('Error %s in loading metadata %s' % (err,dmdata))
|
||||
metadata['definition_metadata_err'] = str(err)
|
||||
|
||||
# Set/override any metadata specified by policy
|
||||
k = policy_key(location)
|
||||
if k in system.policy:
|
||||
metadata.update(system.policy[k])
|
||||
|
||||
return cls(
|
||||
system,
|
||||
definition,
|
||||
|
||||
@@ -1,9 +0,0 @@
|
||||
<course name="Toy Course" org="edX" course="toy" graceperiod="1 day 5 hours 59 minutes 59 seconds" slug="2012_Fall" start="2015-07-17T12:00">
|
||||
<chapter name="Overview">
|
||||
<videosequence format="Lecture Sequence" name="Toy Videos">
|
||||
<html name="toylab" filename="toylab"/>
|
||||
<video name="Video Resources" youtube="1.0:1bK-WdDi6Qw"/>
|
||||
</videosequence>
|
||||
<video name="Welcome" youtube="1.0:p2Q6BrNhdh8"/>
|
||||
</chapter>
|
||||
</course>
|
||||
1
common/test/data/toy/course.xml
Symbolic link
1
common/test/data/toy/course.xml
Symbolic link
@@ -0,0 +1 @@
|
||||
roots/2012_Fall.xml
|
||||
9
common/test/data/toy/course/2012_Fall.xml
Normal file
9
common/test/data/toy/course/2012_Fall.xml
Normal file
@@ -0,0 +1,9 @@
|
||||
<course>
|
||||
<chapter url_name="Overview">
|
||||
<videosequence url_name="Toy_Videos">
|
||||
<html url_name="toylab"/>
|
||||
<video url_name="Video_Resources" youtube="1.0:1bK-WdDi6Qw"/>
|
||||
</videosequence>
|
||||
<video url_name="Welcome" youtube="1.0:p2Q6BrNhdh8"/>
|
||||
</chapter>
|
||||
</course>
|
||||
23
common/test/data/toy/policies/2012_Fall.json
Normal file
23
common/test/data/toy/policies/2012_Fall.json
Normal file
@@ -0,0 +1,23 @@
|
||||
{
|
||||
"course/2012_Fall": {
|
||||
"graceperiod": "2 days 5 hours 59 minutes 59 seconds",
|
||||
"start": "2015-07-17T12:00",
|
||||
"display_name": "Toy Course"
|
||||
},
|
||||
"chapter/Overview": {
|
||||
"display_name": "Overview"
|
||||
},
|
||||
"videosequence/Toy_Videos": {
|
||||
"display_name": "Toy Videos",
|
||||
"format": "Lecture Sequence"
|
||||
},
|
||||
"html/toylab": {
|
||||
"display_name": "Toy lab"
|
||||
},
|
||||
"video/Video_Resources": {
|
||||
"display_name": "Video Resources"
|
||||
},
|
||||
"video/Welcome": {
|
||||
"display_name": "Welcome"
|
||||
}
|
||||
}
|
||||
1
common/test/data/toy/roots/2012_Fall.xml
Normal file
1
common/test/data/toy/roots/2012_Fall.xml
Normal file
@@ -0,0 +1 @@
|
||||
<course org="edX" course="toy" url_name="2012_Fall"/>
|
||||
112
common/xml_cleanup.py
Executable file
112
common/xml_cleanup.py
Executable file
@@ -0,0 +1,112 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
"""
|
||||
Victor's xml cleanup script. A big pile of useful hacks. Do not use
|
||||
without carefully reading the code and deciding that this is what you want.
|
||||
|
||||
In particular, the remove-meta option is only intended to be used after pulling out a policy
|
||||
using the metadata_to_json management command.
|
||||
"""
|
||||
|
||||
import os, fnmatch, re, sys
|
||||
from lxml import etree
|
||||
from collections import defaultdict
|
||||
|
||||
INVALID_CHARS = re.compile(r"[^\w.-]")
|
||||
|
||||
def clean(value):
|
||||
"""
|
||||
Return value, made into a form legal for locations
|
||||
"""
|
||||
return re.sub('_+', '_', INVALID_CHARS.sub('_', value))
|
||||
|
||||
|
||||
# category -> set of url_names for that category that we've already seen
|
||||
used_names = defaultdict(set)
|
||||
|
||||
def clean_unique(category, name):
|
||||
cleaned = clean(name)
|
||||
if cleaned not in used_names[category]:
|
||||
used_names[category].add(cleaned)
|
||||
return cleaned
|
||||
x = 1
|
||||
while cleaned + str(x) in used_names[category]:
|
||||
x += 1
|
||||
|
||||
# Found one!
|
||||
cleaned = cleaned + str(x)
|
||||
used_names[category].add(cleaned)
|
||||
return cleaned
|
||||
|
||||
def cleanup(filepath, remove_meta):
|
||||
# Keys that are exported to the policy file, and so
|
||||
# can be removed from the xml afterward
|
||||
to_remove = ('format', 'display_name',
|
||||
'graceperiod', 'showanswer', 'rerandomize',
|
||||
'start', 'due', 'graded', 'hide_from_toc',
|
||||
'ispublic', 'xqa_key')
|
||||
|
||||
try:
|
||||
print "Cleaning {}".format(filepath)
|
||||
with open(filepath) as f:
|
||||
parser = etree.XMLParser(remove_comments=False)
|
||||
xml = etree.parse(filepath, parser=parser)
|
||||
except:
|
||||
print "Error parsing file {}".format(filepath)
|
||||
return
|
||||
|
||||
for node in xml.iter(tag=etree.Element):
|
||||
attrs = node.attrib
|
||||
if 'url_name' in attrs:
|
||||
used_names[node.tag].add(attrs['url_name'])
|
||||
if 'name' in attrs:
|
||||
# Replace name with an identical display_name, and a unique url_name
|
||||
name = attrs['name']
|
||||
attrs['display_name'] = name
|
||||
attrs['url_name'] = clean_unique(node.tag, name)
|
||||
del attrs['name']
|
||||
|
||||
if 'url_name' in attrs and 'slug' in attrs:
|
||||
print "WARNING: {} has both slug and url_name"
|
||||
|
||||
if ('url_name' in attrs and 'filename' in attrs and
|
||||
len(attrs)==2 and attrs['url_name'] == attrs['filename']):
|
||||
# This is a pointer tag in disguise. Get rid of the filename.
|
||||
print 'turning {}.{} into a pointer tag'.format(node.tag, attrs['url_name'])
|
||||
del attrs['filename']
|
||||
|
||||
if remove_meta:
|
||||
for attr in to_remove:
|
||||
if attr in attrs:
|
||||
del attrs[attr]
|
||||
|
||||
|
||||
with open(filepath, "w") as f:
|
||||
f.write(etree.tostring(xml))
|
||||
|
||||
|
||||
def find_replace(directory, filePattern, remove_meta):
|
||||
for path, dirs, files in os.walk(os.path.abspath(directory)):
|
||||
for filename in fnmatch.filter(files, filePattern):
|
||||
filepath = os.path.join(path, filename)
|
||||
cleanup(filepath, remove_meta)
|
||||
|
||||
|
||||
def main(args):
|
||||
usage = "xml_cleanup [dir] [remove-meta]"
|
||||
n = len(args)
|
||||
if n < 1 or n > 2 or (n == 2 and args[1] != 'remove-meta'):
|
||||
print usage
|
||||
return
|
||||
|
||||
remove_meta = False
|
||||
if n == 2:
|
||||
remove_meta = True
|
||||
|
||||
find_replace(args[0], '*.xml', remove_meta)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main(sys.argv[1:])
|
||||
|
||||
|
||||
@@ -35,7 +35,7 @@ def manage_modulestores(request,reload_dir=None):
|
||||
ip = request.META.get('HTTP_X_REAL_IP','') # nginx reverse proxy
|
||||
if not ip:
|
||||
ip = request.META.get('REMOTE_ADDR','None')
|
||||
|
||||
|
||||
if LOCAL_DEBUG:
|
||||
html += '<h3>IP address: %s ' % ip
|
||||
html += '<h3>User: %s ' % request.user
|
||||
@@ -48,7 +48,7 @@ def manage_modulestores(request,reload_dir=None):
|
||||
html += 'Permission denied'
|
||||
html += "</body></html>"
|
||||
log.debug('request denied, ALLOWED_IPS=%s' % ALLOWED_IPS)
|
||||
return HttpResponse(html)
|
||||
return HttpResponse(html)
|
||||
|
||||
#----------------------------------------
|
||||
# reload course if specified
|
||||
@@ -74,10 +74,10 @@ def manage_modulestores(request,reload_dir=None):
|
||||
#----------------------------------------
|
||||
|
||||
dumpfields = ['definition','location','metadata']
|
||||
|
||||
|
||||
for cdir, course in def_ms.courses.items():
|
||||
html += '<hr width="100%"/>'
|
||||
html += '<h2>Course: %s (%s)</h2>' % (course.metadata['display_name'],cdir)
|
||||
html += '<h2>Course: %s (%s)</h2>' % (course.display_name,cdir)
|
||||
|
||||
for field in dumpfields:
|
||||
data = getattr(course,field)
|
||||
@@ -89,7 +89,7 @@ def manage_modulestores(request,reload_dir=None):
|
||||
html += '</ul>'
|
||||
else:
|
||||
html += '<ul><li>%s</li></ul>' % escape(data)
|
||||
|
||||
|
||||
|
||||
#----------------------------------------
|
||||
|
||||
@@ -107,4 +107,4 @@ def manage_modulestores(request,reload_dir=None):
|
||||
log.debug('def_ms=%s' % unicode(def_ms))
|
||||
|
||||
html += "</body></html>"
|
||||
return HttpResponse(html)
|
||||
return HttpResponse(html)
|
||||
|
||||
Reference in New Issue
Block a user