diff --git a/i18n/converter.py b/i18n/converter.py
index e873dcb2a4..d3987bebe2 100644
--- a/i18n/converter.py
+++ b/i18n/converter.py
@@ -20,7 +20,16 @@ class Converter(object):
# matches tags like these:
# HTML: , ,
,
# Python: %(date)s, %(name)s
- tag_pattern = re.compile(r'(<[-\w" .:?=/]*>)|({[^}]*})|(%\([^)]*\)\w)', re.I)
+ tag_pattern = re.compile(r'''
+ (<[-\w" .:?=/]*>) | #
+ ({[^}]*}) | # {tag}
+ (%\([^)]*\)\w) | # %(tag)s
+ (&\w+;) | # &entity;
+ (&\#\d+;) | # Ӓ
+ (&\#x[0-9a-f]+;) # ꯍ
+ ''',
+ re.IGNORECASE|re.VERBOSE
+ )
def convert(self, string):
"""Returns: a converted tagged string
diff --git a/i18n/dummy.py b/i18n/dummy.py
index 76283d8704..e82429dcbd 100644
--- a/i18n/dummy.py
+++ b/i18n/dummy.py
@@ -34,8 +34,11 @@ TABLE = {'A': u'\xC0',
'I': U'\xCC',
'i': u'\xEF',
'O': u'\xD8',
- 'o': u'\xF6',
- 'u': u'\xFC'
+ 'o': u'\xF8',
+ 'U': u'\xDB',
+ 'u': u'\xFC',
+ 'Y': u'\xDD',
+ 'y': u'\xFD',
}
diff --git a/i18n/tests/test_converter.py b/i18n/tests/test_converter.py
index 69cfc40260..b1989ede94 100644
--- a/i18n/tests/test_converter.py
+++ b/i18n/tests/test_converter.py
@@ -22,7 +22,7 @@ class TestConverter(TestCase):
Assert that embedded HTML and python tags are not converted.
"""
c = UpcaseConverter()
- test_cases = (
+ test_cases = [
# no tags
('big bad wolf', 'BIG BAD WOLF'),
# one html tag
@@ -36,7 +36,11 @@ class TestConverter(TestCase):
# both kinds of tags
('big %(adjective)s %(noun)s',
'BIG %(adjective)s %(noun)s'),
- )
- for (source, expected) in test_cases:
+ # .format-style tags
+ ('The {0} barn is {1!r}.', 'THE {0} BARN IS {1!r}.'),
+ # HTML entities
+ ('© 2013 edX, ', '© 2013 EDX, '),
+ ]
+ for source, expected in test_cases:
result = c.convert(source)
self.assertEquals(result, expected)
diff --git a/i18n/tests/test_dummy.py b/i18n/tests/test_dummy.py
index 88addb5a95..4670fe5635 100644
--- a/i18n/tests/test_dummy.py
+++ b/i18n/tests/test_dummy.py
@@ -18,23 +18,24 @@ class TestDummy(TestCase):
Tests with a dummy converter (adds spurious accents to strings).
Assert that embedded HTML and python tags are not converted.
"""
- test_cases = (("hello my name is Bond, James Bond",
- u'h\xe9ll\xf6 my n\xe4m\xe9 \xefs B\xf6nd, J\xe4m\xe9s B\xf6nd Lorem i#'),
+ test_cases = [
+ ("hello my name is Bond, James Bond",
+ u'h\xe9ll\xf8 m\xfd n\xe4m\xe9 \xefs B\xf8nd, J\xe4m\xe9s B\xf8nd Lorem i#'),
- ('don\'t convert tag ids',
- u'd\xf6n\'t \xe7\xf6nv\xe9rt t\xe4g \xefds Lorem ipsu#'),
-
- ('don\'t convert %(name)s tags on %(date)s',
- u"d\xf6n't \xe7\xf6nv\xe9rt %(name)s t\xe4gs \xf6n %(date)s Lorem ips#")
- )
- for (source, expected) in test_cases:
+ ('don\'t convert tag ids',
+ u'd\xf8n\'t \xe7\xf8nv\xe9rt t\xe4g \xefds Lorem ipsu#'),
+
+ ('don\'t convert %(name)s tags on %(date)s',
+ u"d\xf8n't \xe7\xf8nv\xe9rt %(name)s t\xe4gs \xf8n %(date)s Lorem ips#")
+ ]
+ for source, expected in test_cases:
result = self.converter.convert(source)
self.assertEquals(result, expected)
def test_singular(self):
entry = POEntry()
entry.msgid = 'A lovely day for a cup of tea.'
- expected = u'\xc0 l\xf6v\xe9ly d\xe4y f\xf6r \xe4 \xe7\xfcp \xf6f t\xe9\xe4. Lorem i#'
+ expected = u'\xc0 l\xf8v\xe9l\xfd d\xe4\xfd f\xf8r \xe4 \xe7\xfcp \xf8f t\xe9\xe4. Lorem i#'
self.converter.convert_msg(entry)
self.assertEquals(entry.msgstr, expected)
@@ -42,8 +43,8 @@ class TestDummy(TestCase):
entry = POEntry()
entry.msgid = 'A lovely day for a cup of tea.'
entry.msgid_plural = 'A lovely day for some cups of tea.'
- expected_s = u'\xc0 l\xf6v\xe9ly d\xe4y f\xf6r \xe4 \xe7\xfcp \xf6f t\xe9\xe4. Lorem i#'
- expected_p = u'\xc0 l\xf6v\xe9ly d\xe4y f\xf6r s\xf6m\xe9 \xe7\xfcps \xf6f t\xe9\xe4. Lorem ip#'
+ expected_s = u'\xc0 l\xf8v\xe9l\xfd d\xe4\xfd f\xf8r \xe4 \xe7\xfcp \xf8f t\xe9\xe4. Lorem i#'
+ expected_p = u'\xc0 l\xf8v\xe9l\xfd d\xe4\xfd f\xf8r s\xf8m\xe9 \xe7\xfcps \xf8f t\xe9\xe4. Lorem ip#'
self.converter.convert_msg(entry)
result = entry.msgstr_plural
self.assertEquals(result['0'], expected_s)