From f81f94ec674305d03ed877e34d1aa024ef922205 Mon Sep 17 00:00:00 2001
From: Victor Shnayder <victor@mitx.mit.edu>
Date: Wed, 22 Aug 2012 09:59:01 -0400
Subject: [PATCH 1/2] Allow location url_names to contain ':', mapping to / on
 file load

* New format: {tag}://{org}/{course}/{category}/{name}[@{revision}]
* Updated tests, code
* Added test chapter with : in url_name in toy course
* added test html page with : in url_name
* added a note to docs
---
 cms/djangoapps/github_sync/tests/__init__.py  |  2 +-
 common/lib/xmodule/xmodule/html_module.py     | 30 ++++++++++++----
 .../xmodule/xmodule/modulestore/__init__.py   | 36 +++++++++++++------
 .../modulestore/tests/test_location.py        | 32 +++++++++++------
 .../lib/xmodule/xmodule/tests/test_import.py  | 34 ++++++++++++++++++
 common/lib/xmodule/xmodule/xml_module.py      | 16 ++++++---
 common/test/data/toy/chapter/secret/magic.xml |  3 ++
 common/test/data/toy/course/2012_Fall.xml     |  3 +-
 .../data/toy/html/{ => secret}/toylab.html    |  0
 .../data/toy/html/{ => secret}/toylab.xml     |  0
 common/test/data/toy/policies/2012_Fall.json  |  2 +-
 11 files changed, 123 insertions(+), 35 deletions(-)
 create mode 100644 common/test/data/toy/chapter/secret/magic.xml
 rename common/test/data/toy/html/{ => secret}/toylab.html (100%)
 rename common/test/data/toy/html/{ => secret}/toylab.xml (100%)

diff --git a/cms/djangoapps/github_sync/tests/__init__.py b/cms/djangoapps/github_sync/tests/__init__.py
index 581ac3cb25..e2b9a909a7 100644
--- a/cms/djangoapps/github_sync/tests/__init__.py
+++ b/cms/djangoapps/github_sync/tests/__init__.py
@@ -61,7 +61,7 @@ class GithubSyncTestCase(TestCase):
         self.assertIn(
             Location('i4x://edX/toy/chapter/Overview'),
             [child.location for child in self.import_course.get_children()])
-        self.assertEquals(1, len(self.import_course.get_children()))
+        self.assertEquals(2, len(self.import_course.get_children()))
 
     @patch('github_sync.sync_with_github')
     def test_sync_all_with_github(self, sync_with_github):
diff --git a/common/lib/xmodule/xmodule/html_module.py b/common/lib/xmodule/xmodule/html_module.py
index 6c424c26f2..3f834c335c 100644
--- a/common/lib/xmodule/xmodule/html_module.py
+++ b/common/lib/xmodule/xmodule/html_module.py
@@ -4,9 +4,10 @@ import logging
 import os
 import sys
 from lxml import etree
+from path import path
 
 from .x_module import XModule
-from .xml_module import XmlDescriptor
+from .xml_module import XmlDescriptor, name_to_pathname
 from .editing_module import EditingDescriptor
 from .stringify import stringify_children
 from .html_checker import check_html
@@ -75,9 +76,19 @@ class HtmlDescriptor(XmlDescriptor, EditingDescriptor):
             cls.clean_metadata_from_xml(definition_xml)
             return {'data': stringify_children(definition_xml)}
         else:
-            # html is special.  cls.filename_extension is 'xml', but if 'filename' is in the definition,
-            # that means to load from .html
-            filepath = "{category}/{name}.html".format(category='html', name=filename)
+            # html is special.  cls.filename_extension is 'xml', but
+            # if 'filename' is in the definition, that means to load
+            # from .html
+            # 'filename' in html pointers is a relative path
+            # (not same as 'html/blah.html' when the pointer is in a directory itself)
+            pointer_path = "{category}/{url_path}".format(category='html',
+                                                  url_path=name_to_pathname(location.name))
+            base = path(pointer_path).dirname()
+            #log.debug("base = {0}, base.dirname={1}, filename={2}".format(base, base.dirname(), filename))
+            filepath = "{base}/{name}.html".format(base=base, name=filename)
+            #log.debug("looking for html file for {0} at {1}".format(location, filepath))
+
+
 
             # VS[compat]
             # TODO (cpennington): If the file doesn't exist at the right path,
@@ -128,13 +139,18 @@ class HtmlDescriptor(XmlDescriptor, EditingDescriptor):
             pass
 
         # Not proper format.  Write html to file, return an empty tag
-        filepath = u'{category}/{name}.html'.format(category=self.category,
-                                                    name=self.url_name)
+        pathname = name_to_pathname(self.url_name)
+        pathdir = path(pathname).dirname()
+        filepath = u'{category}/{pathname}.html'.format(category=self.category,
+                                                    pathname=pathname)
 
         resource_fs.makedir(os.path.dirname(filepath), allow_recreate=True)
         with resource_fs.open(filepath, 'w') as file:
             file.write(self.definition['data'])
 
+        # write out the relative name
+        relname = path(pathname).basename()
+
         elt = etree.Element('html')
-        elt.set("filename", self.url_name)
+        elt.set("filename", relname)
         return elt
diff --git a/common/lib/xmodule/xmodule/modulestore/__init__.py b/common/lib/xmodule/xmodule/modulestore/__init__.py
index d6dd85deea..c3bbc1e508 100644
--- a/common/lib/xmodule/xmodule/modulestore/__init__.py
+++ b/common/lib/xmodule/xmodule/modulestore/__init__.py
@@ -13,18 +13,21 @@ from xmodule.errortracker import ErrorLog, make_error_tracker
 
 log = logging.getLogger('mitx.' + 'modulestore')
 
+
 URL_RE = re.compile("""
     (?P<tag>[^:]+)://
     (?P<org>[^/]+)/
     (?P<course>[^/]+)/
     (?P<category>[^/]+)/
-    (?P<name>[^/]+)
-    (/(?P<revision>[^/]+))?
+    (?P<name>[^@]+)
+    (@(?P<revision>[^/]+))?
     """, re.VERBOSE)
 
 # TODO (cpennington): We should decide whether we want to expand the
 # list of valid characters in a location
 INVALID_CHARS = re.compile(r"[^\w.-]")
+# Names are allowed to have colons.
+INVALID_CHARS_NAME = re.compile(r"[^\w.:-]")
 
 _LocationBase = namedtuple('LocationBase', 'tag org course category name revision')
 
@@ -34,7 +37,7 @@ class Location(_LocationBase):
     Encodes a location.
 
     Locations representations of URLs of the
-    form {tag}://{org}/{course}/{category}/{name}[/{revision}]
+    form {tag}://{org}/{course}/{category}/{name}[@{revision}]
 
     However, they can also be represented a dictionaries (specifying each component),
     tuples or list (specified in order), or as strings of the url
@@ -81,7 +84,7 @@ class Location(_LocationBase):
 
         location - Can be any of the following types:
             string: should be of the form
-                    {tag}://{org}/{course}/{category}/{name}[/{revision}]
+                    {tag}://{org}/{course}/{category}/{name}[@{revision}]
 
             list: should be of the form [tag, org, course, category, name, revision]
 
@@ -99,10 +102,11 @@ class Location(_LocationBase):
         ommitted.
 
         Components must be composed of alphanumeric characters, or the
-        characters '_', '-', and '.'
+        characters '_', '-', and '.'.  The name component is additionally allowed to have ':',
+        which is interpreted specially for xml storage.
 
-        Components may be set to None, which may be interpreted by some contexts
-        to mean wildcard selection
+        Components may be set to None, which may be interpreted in some contexts
+        to mean wildcard selection.
         """
 
 
@@ -116,14 +120,23 @@ class Location(_LocationBase):
             return _LocationBase.__new__(_cls, *([None] * 6))
 
         def check_dict(dict_):
-            check_list(dict_.itervalues())
+            # Order matters, so flatten out into a list
+            keys = ['tag', 'org', 'course', 'category', 'name', 'revision']
+            list_ = [dict_[k] for k in keys]
+            check_list(list_)
 
         def check_list(list_):
-            for val in list_:
-                if val is not None and INVALID_CHARS.search(val) is not None:
+            def check(val, regexp):
+                if val is not None and regexp.search(val) is not None:
                     log.debug('invalid characters val="%s", list_="%s"' % (val, list_))
                     raise InvalidLocationError(location)
 
+            list_ = list(list_)
+            for val in list_[:4] + [list_[5]]:
+                check(val, INVALID_CHARS)
+            # names allow colons
+            check(list_[4], INVALID_CHARS_NAME)
+
         if isinstance(location, basestring):
             match = URL_RE.match(location)
             if match is None:
@@ -162,7 +175,7 @@ class Location(_LocationBase):
         """
         url = "{tag}://{org}/{course}/{category}/{name}".format(**self.dict())
         if self.revision:
-            url += "/" + self.revision
+            url += "@" + self.revision
         return url
 
     def html_id(self):
@@ -170,6 +183,7 @@ class Location(_LocationBase):
         Return a string with a version of the location that is safe for use in
         html id attributes
         """
+        # TODO: is ':' ok in html ids?
         return "-".join(str(v) for v in self.list()
                         if v is not None).replace('.', '_')
 
diff --git a/common/lib/xmodule/xmodule/modulestore/tests/test_location.py b/common/lib/xmodule/xmodule/modulestore/tests/test_location.py
index 70c6351685..529b1f88eb 100644
--- a/common/lib/xmodule/xmodule/modulestore/tests/test_location.py
+++ b/common/lib/xmodule/xmodule/modulestore/tests/test_location.py
@@ -10,7 +10,7 @@ def check_string_roundtrip(url):
 
 def test_string_roundtrip():
     check_string_roundtrip("tag://org/course/category/name")
-    check_string_roundtrip("tag://org/course/category/name/revision")
+    check_string_roundtrip("tag://org/course/category/name@revision")
 
 
 input_dict = {
@@ -21,18 +21,28 @@ input_dict = {
     'org': 'org'
 }
 
+
+also_valid_dict = {
+    'tag': 'tag',
+    'course': 'course',
+    'category': 'category',
+    'name': 'name:more_name',
+    'org': 'org'
+}
+
+
 input_list = ['tag', 'org', 'course', 'category', 'name']
 
 input_str = "tag://org/course/category/name"
-input_str_rev = "tag://org/course/category/name/revision"
+input_str_rev = "tag://org/course/category/name@revision"
 
-valid = (input_list, input_dict, input_str, input_str_rev)
+valid = (input_list, input_dict, input_str, input_str_rev, also_valid_dict)
 
 invalid_dict = {
     'tag': 'tag',
     'course': 'course',
     'category': 'category',
-    'name': 'name/more_name',
+    'name': 'name@more_name',
     'org': 'org'
 }
 
@@ -45,8 +55,9 @@ invalid_dict2 = {
 }
 
 invalid = ("foo", ["foo"], ["foo", "bar"],
-           ["foo", "bar", "baz", "blat", "foo/bar"],
-           "tag://org/course/category/name with spaces/revision",
+           ["foo", "bar", "baz", "blat:blat", "foo:bar"],  # ':' ok in name, not in category
+           "tag://org/course/category/name with spaces@revision",
+           "tag://org/course/category/name/with/slashes@revision",
            invalid_dict,
            invalid_dict2)
 
@@ -62,16 +73,15 @@ def test_dict():
     assert_equals(dict(revision=None, **input_dict), Location(input_dict).dict())
 
     input_dict['revision'] = 'revision'
-    assert_equals("tag://org/course/category/name/revision", Location(input_dict).url())
+    assert_equals("tag://org/course/category/name@revision", Location(input_dict).url())
     assert_equals(input_dict, Location(input_dict).dict())
 
-
 def test_list():
     assert_equals("tag://org/course/category/name", Location(input_list).url())
     assert_equals(input_list + [None], Location(input_list).list())
 
     input_list.append('revision')
-    assert_equals("tag://org/course/category/name/revision", Location(input_list).url())
+    assert_equals("tag://org/course/category/name@revision", Location(input_list).url())
     assert_equals(input_list, Location(input_list).list())
 
 
@@ -87,8 +97,10 @@ def test_none():
 def test_invalid_locations():
     assert_raises(InvalidLocationError, Location, "foo")
     assert_raises(InvalidLocationError, Location, ["foo", "bar"])
+    assert_raises(InvalidLocationError, Location, ["foo", "bar", "baz", "blat/blat", "foo"])
     assert_raises(InvalidLocationError, Location, ["foo", "bar", "baz", "blat", "foo/bar"])
-    assert_raises(InvalidLocationError, Location, "tag://org/course/category/name with spaces/revision")
+    assert_raises(InvalidLocationError, Location, "tag://org/course/category/name with spaces@revision")
+    assert_raises(InvalidLocationError, Location, "tag://org/course/category/name/revision")
 
 
 def test_equality():
diff --git a/common/lib/xmodule/xmodule/tests/test_import.py b/common/lib/xmodule/xmodule/tests/test_import.py
index 3454366c1a..a369850209 100644
--- a/common/lib/xmodule/xmodule/tests/test_import.py
+++ b/common/lib/xmodule/xmodule/tests/test_import.py
@@ -255,3 +255,37 @@ class ImportTestCase(unittest.TestCase):
         two_toy_video =  modulestore.get_instance(two_toy_id, location)
         self.assertEqual(toy_video.metadata['youtube'], "1.0:p2Q6BrNhdh8")
         self.assertEqual(two_toy_video.metadata['youtube'], "1.0:p2Q6BrNhdh9")
+
+
+    def test_colon_in_url_name(self):
+        """Ensure that colons in url_names convert to file paths properly"""
+
+        print "Starting import"
+        modulestore = XMLModuleStore(DATA_DIR, eager=True, course_dirs=['toy'])
+
+        courses = modulestore.get_courses()
+        self.assertEquals(len(courses), 1)
+        course = courses[0]
+        course_id = course.id
+
+        print "course errors:"
+        for (msg, err) in modulestore.get_item_errors(course.location):
+            print msg
+            print err
+
+        chapters = course.get_children()
+        self.assertEquals(len(chapters), 2)
+
+        ch2 = chapters[1]
+        self.assertEquals(ch2.url_name, "secret:magic")
+
+        print "Ch2 location: ", ch2.location
+
+        also_ch2 = modulestore.get_instance(course_id, ch2.location)
+        self.assertEquals(ch2, also_ch2)
+
+        print "making sure html loaded"
+        cloc = course.location
+        loc = Location(cloc.tag, cloc.org, cloc.course, 'html', 'secret:toylab')
+        html = modulestore.get_instance(course_id, loc)
+        self.assertEquals(html.display_name, "Toy lab")
diff --git a/common/lib/xmodule/xmodule/xml_module.py b/common/lib/xmodule/xmodule/xml_module.py
index b6f791ffc6..25dc4e0c6e 100644
--- a/common/lib/xmodule/xmodule/xml_module.py
+++ b/common/lib/xmodule/xmodule/xml_module.py
@@ -12,6 +12,12 @@ import sys
 
 log = logging.getLogger(__name__)
 
+def name_to_pathname(name):
+    """
+    Convert a location name for use in a path: replace ':' with '/'.
+    This allows users of the xml format to organize content into directories
+    """
+    return name.replace(':', '/')
 
 def is_pointer_tag(xml_obj):
     """
@@ -245,8 +251,8 @@ class XmlDescriptor(XModuleDescriptor):
         # VS[compat] -- detect new-style each-in-a-file mode
         if is_pointer_tag(xml_object):
             # new style:
-            # read the actual definition file--named using url_name
-            filepath = cls._format_filepath(xml_object.tag, url_name)
+            # read the actual definition file--named using url_name.replace(':','/')
+            filepath = cls._format_filepath(xml_object.tag, name_to_pathname(url_name))
             definition_xml = cls.load_file(filepath, system.resources_fs, location)
         else:
             definition_xml = xml_object	# this is just a pointer, not the real definition content
@@ -292,7 +298,8 @@ class XmlDescriptor(XModuleDescriptor):
         """If this returns True, write the definition of this descriptor to a separate
         file.
 
-        NOTE: Do not override this without a good reason.  It is here specifically for customtag...
+        NOTE: Do not override this without a good reason.  It is here
+        specifically for customtag...
         """
         return True
 
@@ -335,7 +342,8 @@ class XmlDescriptor(XModuleDescriptor):
 
         if self.export_to_file():
             # Write the definition to a file
-            filepath = self.__class__._format_filepath(self.category, self.url_name)
+            url_path = name_to_pathname(self.url_name)
+            filepath = self.__class__._format_filepath(self.category, url_path)
             resource_fs.makedir(os.path.dirname(filepath), allow_recreate=True)
             with resource_fs.open(filepath, 'w') as file:
                 file.write(etree.tostring(xml_object, pretty_print=True))
diff --git a/common/test/data/toy/chapter/secret/magic.xml b/common/test/data/toy/chapter/secret/magic.xml
new file mode 100644
index 0000000000..dd16380a70
--- /dev/null
+++ b/common/test/data/toy/chapter/secret/magic.xml
@@ -0,0 +1,3 @@
+<chapter>
+    <video url_name="toyvideo" youtube="blahblah"/>
+</chapter>
diff --git a/common/test/data/toy/course/2012_Fall.xml b/common/test/data/toy/course/2012_Fall.xml
index d34eb9d56a..c87fccd9ab 100644
--- a/common/test/data/toy/course/2012_Fall.xml
+++ b/common/test/data/toy/course/2012_Fall.xml
@@ -1,9 +1,10 @@
 <course>
   <chapter url_name="Overview">
     <videosequence url_name="Toy_Videos">
-      <html url_name="toylab"/>
+      <html url_name="secret:toylab"/>
       <video url_name="Video_Resources" youtube="1.0:1bK-WdDi6Qw"/>
     </videosequence>
     <video url_name="Welcome" youtube="1.0:p2Q6BrNhdh8"/>
   </chapter>
+  <chapter url_name="secret:magic"/>
 </course>
diff --git a/common/test/data/toy/html/toylab.html b/common/test/data/toy/html/secret/toylab.html
similarity index 100%
rename from common/test/data/toy/html/toylab.html
rename to common/test/data/toy/html/secret/toylab.html
diff --git a/common/test/data/toy/html/toylab.xml b/common/test/data/toy/html/secret/toylab.xml
similarity index 100%
rename from common/test/data/toy/html/toylab.xml
rename to common/test/data/toy/html/secret/toylab.xml
diff --git a/common/test/data/toy/policies/2012_Fall.json b/common/test/data/toy/policies/2012_Fall.json
index 6c501d66f8..5ea437a8e4 100644
--- a/common/test/data/toy/policies/2012_Fall.json
+++ b/common/test/data/toy/policies/2012_Fall.json
@@ -11,7 +11,7 @@
         "display_name": "Toy Videos",
         "format": "Lecture Sequence"
     },
-    "html/toylab": {
+    "html/secret:toylab": {
         "display_name": "Toy lab"
     },
     "video/Video_Resources": {

From 84732d03b6d4856f40297a42610a4a22ea63d444 Mon Sep 17 00:00:00 2001
From: Victor Shnayder <victor@mitx.mit.edu>
Date: Mon, 27 Aug 2012 11:43:47 -0400
Subject: [PATCH 2/2] add note about ":" in format docs

---
 doc/xml-format.md | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/doc/xml-format.md b/doc/xml-format.md
index 55bcda4480..256d4839bf 100644
--- a/doc/xml-format.md
+++ b/doc/xml-format.md
@@ -189,7 +189,13 @@ This video has been encoded at 4 different speeds: 0.75x, 1x, 1.25x, and 1.5x.
 
 ## More on `url_name`s
 
-Every content element (within a course) should have a unique id.  This id is formed as `{category}/{url_name}`, or automatically generated from the content if `url_name` is not specified.  Categories are the different tag types ('chapter', 'problem', 'html', 'sequential', etc).  Url_name is a string containing a-z, A-Z, dot (.) and _.  This is what appears in urls that point to this object.
+Every content element (within a course) should have a unique id.  This id is formed as `{category}/{url_name}`, or automatically generated from the content if `url_name` is not specified.  Categories are the different tag types ('chapter', 'problem', 'html', 'sequential', etc).  Url_name is a string containing a-z, A-Z, dot (.), underscore (_), and ':'.  This is what appears in urls that point to this object.
+
+Colon (':') is special--when looking for the content definition in an xml, ':' will be replaced with '/'.  This allows organizing content into folders.  For example, given the pointer tag
+
+    <problem url_name="conceptual:add_apples_and_oranges"/>
+
+we would look for the problem definition in `problem/conceptual/add_apples_and_oranges.xml`.   (There is a technical reason why we can't just allow '/' in the url_name directly.)
 
 __IMPORTANT__: A student's state for a particular content element is tied to the element id, so the automatic id generation if only ok for elements that do not need to store any student state (e.g. verticals or customtags).  For problems, sequentials, and videos, and any other element where we keep track of what the student has done and where they are at, you should specify a unique `url_name`.  Of course, any content element that is split out into a file will need a `url_name` to specify where to find the definition.  When the CMS comes online, it will use these ids to enable content reuse, so if there is a logical name for something, please do specify it.