From f1f76a9ad102580b5f0c4c111d544c4eb2c20eed Mon Sep 17 00:00:00 2001 From: Ned Batchelder Date: Fri, 20 Dec 2013 10:25:32 -0500 Subject: [PATCH] Dummy text has more accents, and properly ignored more non-text things. --- i18n/converter.py | 11 ++++++++++- i18n/dummy.py | 7 +++++-- i18n/tests/test_converter.py | 10 +++++++--- i18n/tests/test_dummy.py | 25 +++++++++++++------------ 4 files changed, 35 insertions(+), 18 deletions(-) diff --git a/i18n/converter.py b/i18n/converter.py index e873dcb2a4..d3987bebe2 100644 --- a/i18n/converter.py +++ b/i18n/converter.py @@ -20,7 +20,16 @@ class Converter(object): # matches tags like these: # HTML: , ,
, # Python: %(date)s, %(name)s - tag_pattern = re.compile(r'(<[-\w" .:?=/]*>)|({[^}]*})|(%\([^)]*\)\w)', re.I) + tag_pattern = re.compile(r''' + (<[-\w" .:?=/]*>) | # + ({[^}]*}) | # {tag} + (%\([^)]*\)\w) | # %(tag)s + (&\w+;) | # &entity; + (&\#\d+;) | # Ӓ + (&\#x[0-9a-f]+;) # ꯍ + ''', + re.IGNORECASE|re.VERBOSE + ) def convert(self, string): """Returns: a converted tagged string diff --git a/i18n/dummy.py b/i18n/dummy.py index 76283d8704..e82429dcbd 100644 --- a/i18n/dummy.py +++ b/i18n/dummy.py @@ -34,8 +34,11 @@ TABLE = {'A': u'\xC0', 'I': U'\xCC', 'i': u'\xEF', 'O': u'\xD8', - 'o': u'\xF6', - 'u': u'\xFC' + 'o': u'\xF8', + 'U': u'\xDB', + 'u': u'\xFC', + 'Y': u'\xDD', + 'y': u'\xFD', } diff --git a/i18n/tests/test_converter.py b/i18n/tests/test_converter.py index 69cfc40260..b1989ede94 100644 --- a/i18n/tests/test_converter.py +++ b/i18n/tests/test_converter.py @@ -22,7 +22,7 @@ class TestConverter(TestCase): Assert that embedded HTML and python tags are not converted. """ c = UpcaseConverter() - test_cases = ( + test_cases = [ # no tags ('big bad wolf', 'BIG BAD WOLF'), # one html tag @@ -36,7 +36,11 @@ class TestConverter(TestCase): # both kinds of tags ('big %(adjective)s %(noun)s', 'BIG %(adjective)s %(noun)s'), - ) - for (source, expected) in test_cases: + # .format-style tags + ('The {0} barn is {1!r}.', 'THE {0} BARN IS {1!r}.'), + # HTML entities + ('© 2013 edX,  ', '© 2013 EDX,  '), + ] + for source, expected in test_cases: result = c.convert(source) self.assertEquals(result, expected) diff --git a/i18n/tests/test_dummy.py b/i18n/tests/test_dummy.py index 88addb5a95..4670fe5635 100644 --- a/i18n/tests/test_dummy.py +++ b/i18n/tests/test_dummy.py @@ -18,23 +18,24 @@ class TestDummy(TestCase): Tests with a dummy converter (adds spurious accents to strings). Assert that embedded HTML and python tags are not converted. """ - test_cases = (("hello my name is Bond, James Bond", - u'h\xe9ll\xf6 my n\xe4m\xe9 \xefs B\xf6nd, J\xe4m\xe9s B\xf6nd Lorem i#'), + test_cases = [ + ("hello my name is Bond, James Bond", + u'h\xe9ll\xf8 m\xfd n\xe4m\xe9 \xefs B\xf8nd, J\xe4m\xe9s B\xf8nd Lorem i#'), - ('don\'t convert tag ids', - u'd\xf6n\'t \xe7\xf6nv\xe9rt t\xe4g \xefds Lorem ipsu#'), - - ('don\'t convert %(name)s tags on %(date)s', - u"d\xf6n't \xe7\xf6nv\xe9rt %(name)s t\xe4gs \xf6n %(date)s Lorem ips#") - ) - for (source, expected) in test_cases: + ('don\'t convert tag ids', + u'd\xf8n\'t \xe7\xf8nv\xe9rt t\xe4g \xefds Lorem ipsu#'), + + ('don\'t convert %(name)s tags on %(date)s', + u"d\xf8n't \xe7\xf8nv\xe9rt %(name)s t\xe4gs \xf8n %(date)s Lorem ips#") + ] + for source, expected in test_cases: result = self.converter.convert(source) self.assertEquals(result, expected) def test_singular(self): entry = POEntry() entry.msgid = 'A lovely day for a cup of tea.' - expected = u'\xc0 l\xf6v\xe9ly d\xe4y f\xf6r \xe4 \xe7\xfcp \xf6f t\xe9\xe4. Lorem i#' + expected = u'\xc0 l\xf8v\xe9l\xfd d\xe4\xfd f\xf8r \xe4 \xe7\xfcp \xf8f t\xe9\xe4. Lorem i#' self.converter.convert_msg(entry) self.assertEquals(entry.msgstr, expected) @@ -42,8 +43,8 @@ class TestDummy(TestCase): entry = POEntry() entry.msgid = 'A lovely day for a cup of tea.' entry.msgid_plural = 'A lovely day for some cups of tea.' - expected_s = u'\xc0 l\xf6v\xe9ly d\xe4y f\xf6r \xe4 \xe7\xfcp \xf6f t\xe9\xe4. Lorem i#' - expected_p = u'\xc0 l\xf6v\xe9ly d\xe4y f\xf6r s\xf6m\xe9 \xe7\xfcps \xf6f t\xe9\xe4. Lorem ip#' + expected_s = u'\xc0 l\xf8v\xe9l\xfd d\xe4\xfd f\xf8r \xe4 \xe7\xfcp \xf8f t\xe9\xe4. Lorem i#' + expected_p = u'\xc0 l\xf8v\xe9l\xfd d\xe4\xfd f\xf8r s\xf8m\xe9 \xe7\xfcps \xf8f t\xe9\xe4. Lorem ip#' self.converter.convert_msg(entry) result = entry.msgstr_plural self.assertEquals(result['0'], expected_s)