From 209f8cc3dbc8c227f0e37cc4e9ab3f1cc6063e27 Mon Sep 17 00:00:00 2001 From: Ned Batchelder Date: Fri, 27 Dec 2013 16:34:43 -0500 Subject: [PATCH] Lorem is now fancy, and HTML tags are recognized with single-quote attributes. --- i18n/converter.py | 6 +- i18n/dummy.py | 106 ++++++++++++++++++++--------------- i18n/tests/test_converter.py | 3 + i18n/tests/test_dummy.py | 40 ++++++++----- lms/templates/login.html | 2 +- 5 files changed, 94 insertions(+), 63 deletions(-) diff --git a/i18n/converter.py b/i18n/converter.py index d3987bebe2..9a982347ee 100644 --- a/i18n/converter.py +++ b/i18n/converter.py @@ -21,9 +21,9 @@ class Converter(object): # HTML: , ,
, # Python: %(date)s, %(name)s tag_pattern = re.compile(r''' - (<[-\w" .:?=/]*>) | # - ({[^}]*}) | # {tag} - (%\([^)]*\)\w) | # %(tag)s + (<[^>]+>) | # + ({[^}]+}) | # {tag} + (%\([\w]+\)\w) | # %(tag)s (&\w+;) | # &entity; (&\#\d+;) | # Ӓ (&\#x[0-9a-f]+;) # ꯍ diff --git a/i18n/dummy.py b/i18n/dummy.py index e82429dcbd..987c971447 100644 --- a/i18n/dummy.py +++ b/i18n/dummy.py @@ -1,56 +1,70 @@ +# -*- coding: utf-8 -*- +r""" +Creates new localization properties files in a dummy language. + +Each property file is derived from the equivalent en_US file, with these +transformations applied: + +1. Every vowel is replaced with an equivalent with extra accent marks. + +2. Every string is padded out to +30% length to simulate verbose languages + (such as German) to see if layout and flows work properly. + +3. Every string is terminated with a '#' character to make it easier to detect + truncation. + +Example use:: + + >>> from dummy import Dummy + >>> c = Dummy() + >>> c.convert("My name is Bond, James Bond") + u'M\xfd n\xe4m\xe9 \xefs B\xf8nd, J\xe4m\xe9s B\xf8nd \u2360\u03c3\u044f\u0454\u043c \u03b9\u03c1#' + >>> print c.convert("My name is Bond, James Bond") + Mý nämé ïs Bønd, Jämés Bønd Ⱡσяєм ιρ# + >>> print c.convert("don't convert tag ids") + døn't çønvért täg ïds Ⱡσяєм ιρѕυ# + >>> print c.convert("don't convert %(name)s tags on %(date)s") + døn't çønvért %(name)s tägs øn %(date)s Ⱡσяєм ιρѕ# + +""" + from converter import Converter -# Creates new localization properties files in a dummy language -# Each property file is derived from the equivalent en_US file, except -# 1. Every vowel is replaced with an equivalent with extra accent marks -# 2. Every string is padded out to +30% length to simulate verbose languages (e.g. German) -# to see if layout and flows work properly -# 3. Every string is terminated with a '#' character to make it easier to detect truncation - - -# -------------------------------- -# Example use: -# >>> from dummy import Dummy -# >>> c = Dummy() -# >>> c.convert("hello my name is Bond, James Bond") -# u'h\xe9ll\xf6 my n\xe4m\xe9 \xefs B\xf6nd, J\xe4m\xe9s B\xf6nd Lorem i#' -# -# >>> c.convert('don\'t convert tag ids') -# u'd\xf6n\'t \xe7\xf6nv\xe9rt t\xe4g \xefds Lorem ipsu#' -# -# >>> c.convert('don\'t convert %(name)s tags on %(date)s') -# u"d\xf6n't \xe7\xf6nv\xe9rt %(name)s t\xe4gs \xf6n %(date)s Lorem ips#" - - # Substitute plain characters with accented lookalikes. # http://tlt.its.psu.edu/suggestions/international/web/codehtml.html#accent -TABLE = {'A': u'\xC0', - 'a': u'\xE4', - 'b': u'\xDF', - 'C': u'\xc7', - 'c': u'\xE7', - 'E': u'\xC9', - 'e': u'\xE9', - 'I': U'\xCC', - 'i': u'\xEF', - 'O': u'\xD8', - 'o': u'\xF8', - 'U': u'\xDB', - 'u': u'\xFC', - 'Y': u'\xDD', - 'y': u'\xFD', - } - +TABLE = { + 'A': u'À', + 'a': u'ä', + 'b': u'ß', + 'C': u'Ç', + 'c': u'ç', + 'E': u'É', + 'e': u'é', + 'I': u'Ì', + 'i': u'ï', + 'O': u'Ø', + 'o': u'ø', + 'U': u'Û', + 'u': u'ü', + 'Y': u'Ý', + 'y': u'ý', +} # The print industry's standard dummy text, in use since the 1500s -# see http://www.lipsum.com/ -LOREM = ' Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed ' \ - 'do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad ' \ - 'minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ' \ - 'ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate ' \ - 'velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat ' \ - 'cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. ' +# see http://www.lipsum.com/, then fed through a "fancy-text" converter. +# The string should start with a space. +LOREM = " " + " ".join( # join and split just make the string easier here. + u""" + Ⱡσяєм ιρѕυм ∂σłσя ѕιт αмєт, ¢σηѕє¢тєтυя α∂ιριѕι¢ιηg єłιт, ѕє∂ ∂σ єιυѕмσ∂ + тємρσя ιη¢ι∂ι∂υηт υт łαвσяє єт ∂σłσяє мαgηα αłιqυα. υт єηιм α∂ мιηιм + νєηιαм, qυιѕ ησѕтяυ∂ єχєя¢ιтαтιση υłłαм¢σ łαвσяιѕ ηιѕι υт αłιqυιρ єχ єα + ¢σммσ∂σ ¢σηѕєqυαт. ∂υιѕ αυтє ιяυяє ∂σłσя ιη яєρяєнєη∂єяιт ιη νσłυρтαтє + νєłιт єѕѕє ¢ιłłυм ∂σłσяє єυ ƒυgιαт ηυłłα ραяιαтυя. єχ¢єρтєυя ѕιηт σ¢¢αє¢αт + ¢υρι∂αтαт ηση ρяσι∂єηт, ѕυηт ιη ¢υłρα qυι σƒƒι¢ια ∂єѕєяυηт мσłłιт αηιм ι∂ + єѕт łαвσяυм. + """.split() +) # To simulate more verbose languages (like German), pad the length of a string # by a multiple of PAD_FACTOR diff --git a/i18n/tests/test_converter.py b/i18n/tests/test_converter.py index b1989ede94..f2fec593d4 100644 --- a/i18n/tests/test_converter.py +++ b/i18n/tests/test_converter.py @@ -29,6 +29,9 @@ class TestConverter(TestCase): ('big bad wolf', 'BIG BAD WOLF'), # two html tags ('big bad wolf', 'BIG BAD WOLF'), + # html tags with attributes + ('bar baz', 'BAR BAZ'), + ("bar baz", "BAR BAZ"), # one python tag ('big %(adjective)s wolf', 'BIG %(adjective)s WOLF'), # two python tags diff --git a/i18n/tests/test_dummy.py b/i18n/tests/test_dummy.py index 4670fe5635..fbef3910ab 100644 --- a/i18n/tests/test_dummy.py +++ b/i18n/tests/test_dummy.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- import os, string, random from unittest import TestCase from polib import POEntry @@ -13,39 +14,52 @@ class TestDummy(TestCase): def setUp(self): self.converter = dummy.Dummy() + def assertUnicodeEquals(self, str1, str2): + """Just like assertEquals, but doesn't put Unicode into the fail message. + + Either nose, or rake, or something, deals very badly with unusual + Unicode characters in the assertions, so we use repr here to keep + things safe. + + """ + self.assertEquals( + str1, str2, + "Mismatch: %r != %r" % (str1, str2), + ) + def test_dummy(self): """ Tests with a dummy converter (adds spurious accents to strings). Assert that embedded HTML and python tags are not converted. """ test_cases = [ - ("hello my name is Bond, James Bond", - u'h\xe9ll\xf8 m\xfd n\xe4m\xe9 \xefs B\xf8nd, J\xe4m\xe9s B\xf8nd Lorem i#'), + (u"hello my name is Bond, James Bond", + u"héllø mý nämé ïs Bønd, Jämés Bønd Ⱡσяєм ι#"), - ('don\'t convert tag ids', - u'd\xf8n\'t \xe7\xf8nv\xe9rt t\xe4g \xefds Lorem ipsu#'), + (u"don't convert tag ids", + u"døn't çønvért täg ïds Ⱡσяєм ιρѕυ#"), - ('don\'t convert %(name)s tags on %(date)s', - u"d\xf8n't \xe7\xf8nv\xe9rt %(name)s t\xe4gs \xf8n %(date)s Lorem ips#") + (u"don't convert %(name)s tags on %(date)s", + u"døn't çønvért %(name)s tägs øn %(date)s Ⱡσяєм ιρѕ#"), ] for source, expected in test_cases: result = self.converter.convert(source) - self.assertEquals(result, expected) + self.assertUnicodeEquals(result, expected) def test_singular(self): entry = POEntry() entry.msgid = 'A lovely day for a cup of tea.' - expected = u'\xc0 l\xf8v\xe9l\xfd d\xe4\xfd f\xf8r \xe4 \xe7\xfcp \xf8f t\xe9\xe4. Lorem i#' + expected = u'À løvélý däý før ä çüp øf téä. Ⱡσяєм ι#' self.converter.convert_msg(entry) - self.assertEquals(entry.msgstr, expected) + self.assertUnicodeEquals(entry.msgstr, expected) def test_plural(self): entry = POEntry() entry.msgid = 'A lovely day for a cup of tea.' entry.msgid_plural = 'A lovely day for some cups of tea.' - expected_s = u'\xc0 l\xf8v\xe9l\xfd d\xe4\xfd f\xf8r \xe4 \xe7\xfcp \xf8f t\xe9\xe4. Lorem i#' - expected_p = u'\xc0 l\xf8v\xe9l\xfd d\xe4\xfd f\xf8r s\xf8m\xe9 \xe7\xfcps \xf8f t\xe9\xe4. Lorem ip#' + expected_s = u'À løvélý däý før ä çüp øf téä. Ⱡσяєм ι#' + expected_p = u'À løvélý däý før sømé çüps øf téä. Ⱡσяєм ιρ#' self.converter.convert_msg(entry) result = entry.msgstr_plural - self.assertEquals(result['0'], expected_s) - self.assertEquals(result['1'], expected_p) + self.assertUnicodeEquals(result['0'], expected_s) + self.assertUnicodeEquals(result['1'], expected_p) diff --git a/lms/templates/login.html b/lms/templates/login.html index 72d903eed2..877751495b 100644 --- a/lms/templates/login.html +++ b/lms/templates/login.html @@ -89,7 +89,7 @@ $submitButton. addClass('is-disabled'). prop('disabled', true). - html(gettext('Processing your account information …')); + html("${_(u'Processing your account information…')}"); } }