diff --git a/.gitignore b/.gitignore index f3eaf7c976..76834aeb55 100644 --- a/.gitignore +++ b/.gitignore @@ -37,6 +37,8 @@ codekit-config.json !djangojs.mo conf/locale/en/LC_MESSAGES/*.po conf/locale/en/LC_MESSAGES/*.mo +conf/locale/fake*/LC_MESSAGES/*.po +conf/locale/fake*/LC_MESSAGES/*.mo conf/locale/messages.mo ### Testing artifacts diff --git a/conf/locale/config.yaml b/conf/locale/config.yaml index 35d0272a4b..59c539f607 100644 --- a/conf/locale/config.yaml +++ b/conf/locale/config.yaml @@ -51,8 +51,10 @@ locales: - zh_TW # Chinese (Taiwan) -# The locale used for fake-accented English, for testing. -dummy-locale: eo +# The locales used for fake-accented English, for testing. +dummy_locales: + - eo + - fake2 # Directories we don't search for strings. ignore_dirs: diff --git a/i18n/config.py b/i18n/config.py index be712c0886..982fffaaac 100644 --- a/i18n/config.py +++ b/i18n/config.py @@ -17,6 +17,7 @@ class Configuration(object): Reads localization configuration in json format. """ DEFAULTS = { + 'dummy_locales': [], 'generate_merge': {}, 'ignore_dirs': [], 'locales': ['en'], @@ -42,18 +43,6 @@ class Configuration(object): return self._config.get(name, self.DEFAULTS[name]) raise AttributeError("Configuration has no such setting: {!r}".format(name)) - @property - def dummy_locale(self): - """ - Returns a locale to use for the dummy text, e.g. 'eo'. - Throws exception if no dummy-locale is declared. - The locale is a string. - """ - dummy = self._config.get('dummy-locale', None) - if not dummy: - raise Exception('Could not read dummy-locale from configuration file.') - return dummy - def get_messages_dir(self, locale): """ Returns the name of the directory holding the po files for locale. diff --git a/i18n/dummy.py b/i18n/dummy.py index fcf91d6b7b..d72c31eb55 100755 --- a/i18n/dummy.py +++ b/i18n/dummy.py @@ -31,98 +31,22 @@ from i18n.config import CONFIGURATION from i18n.execute import create_dir_if_necessary from i18n.converter import Converter -# Substitute plain characters with accented lookalikes. -# http://tlt.its.psu.edu/suggestions/international/web/codehtml.html#accent -TABLE = { - 'A': u'À', - 'a': u'ä', - 'b': u'ß', - 'C': u'Ç', - 'c': u'ç', - 'E': u'É', - 'e': u'é', - 'I': u'Ì', - 'i': u'ï', - 'O': u'Ø', - 'o': u'ø', - 'U': u'Û', - 'u': u'ü', - 'Y': u'Ý', - 'y': u'ý', -} +class BaseDummyConverter(Converter): + """Base class for dummy converters. -# The print industry's standard dummy text, in use since the 1500s -# see http://www.lipsum.com/, then fed through a "fancy-text" converter. -# The string should start with a space, so that it joins nicely with the text -# that precedes it. The Lorem contains an apostrophe since French often does, -# and translated strings get put into single-quoted strings, which then break. -LOREM = " " + " ".join( # join and split just make the string easier here. - u""" - Ⱡ'σяєм ιρѕυм ∂σłσя ѕιт αмєт, ¢σηѕє¢тєтυя α∂ιριѕι¢ιηg єłιт, ѕє∂ ∂σ єιυѕмσ∂ - тємρσя ιη¢ι∂ι∂υηт υт łαвσяє єт ∂σłσяє мαgηα αłιqυα. υт єηιм α∂ мιηιм - νєηιαм, qυιѕ ησѕтяυ∂ єχєя¢ιтαтιση υłłαм¢σ łαвσяιѕ ηιѕι υт αłιqυιρ єχ єα - ¢σммσ∂σ ¢σηѕєqυαт. ∂υιѕ αυтє ιяυяє ∂σłσя ιη яєρяєнєη∂єяιт ιη νσłυρтαтє - νєłιт єѕѕє ¢ιłłυм ∂σłσяє єυ ƒυgιαт ηυłłα ραяιαтυя. єχ¢єρтєυя ѕιηт σ¢¢αє¢αт - ¢υρι∂αтαт ηση ρяσι∂єηт, ѕυηт ιη ¢υłρα qυι σƒƒι¢ια ∂єѕєяυηт мσłłιт αηιм ι∂ - єѕт łαвσяυм. - """.split() -) + String conversion goes through a character map, then gets padded. -# To simulate more verbose languages (like German), pad the length of a string -# by a multiple of PAD_FACTOR -PAD_FACTOR = 1.33 - - -class Dummy(Converter): - r""" - Creates new localization properties files in a dummy language. - - Each property file is derived from the equivalent en_US file, with these - transformations applied: - - 1. Every vowel is replaced with an equivalent with extra accent marks. - - 2. Every string is padded out to +30% length to simulate verbose languages - (such as German) to see if layout and flows work properly. - - 3. Every string is terminated with a '#' character to make it easier to detect - truncation. - - Example use:: - - >>> from dummy import Dummy - >>> c = Dummy() - >>> c.convert("My name is Bond, James Bond") - u'M\xfd n\xe4m\xe9 \xefs B\xf8nd, J\xe4m\xe9s B\xf8nd \u2360\u03c3\u044f\u0454\u043c \u03b9\u03c1#' - >>> print c.convert("My name is Bond, James Bond") - Mý nämé ïs Bønd, Jämés Bønd Ⱡσяєм ιρ# - >>> print c.convert("don't convert tag ids") - døn't çønvért täg ïds Ⱡσяєм ιρѕυ# - >>> print c.convert("don't convert %(name)s tags on %(date)s") - døn't çønvért %(name)s tägs øn %(date)s Ⱡσяєм ιρѕ# """ - def convert(self, string): - result = Converter.convert(self, string) - return self.pad(result) + TABLE = {} def inner_convert_string(self, string): - for k, v in TABLE.items(): - string = string.replace(k, v) - return string + for old, new in self.TABLE.items(): + string = string.replace(old, new) + return self.pad(string) def pad(self, string): - """add some lorem ipsum text to the end of string""" - size = len(string) - if size < 7: - target = size * 3 - else: - target = int(size*PAD_FACTOR) - return string + self.terminate(LOREM[:(target-size)]) - - def terminate(self, string): - """replaces the final char of string with #""" - return string[:-1] + '#' + return string def convert_msg(self, msg): """ @@ -159,15 +83,95 @@ class Dummy(Converter): return translated -def make_dummy(file, locale): +class Dummy(BaseDummyConverter): + r""" + Creates new localization properties files in a dummy language. + + Each property file is derived from the equivalent en_US file, with these + transformations applied: + + 1. Every vowel is replaced with an equivalent with extra accent marks. + + 2. Every string is padded out to +30% length to simulate verbose languages + (such as German) to see if layout and flows work properly. + + 3. Every string is terminated with a '#' character to make it easier to detect + truncation. + + Example use:: + + >>> from dummy import Dummy + >>> c = Dummy() + >>> c.convert("My name is Bond, James Bond") + u'M\xfd n\xe4m\xe9 \xefs B\xf8nd, J\xe4m\xe9s B\xf8nd \u2360\u03c3\u044f\u0454\u043c \u03b9\u03c1#' + >>> print c.convert("My name is Bond, James Bond") + Mý nämé ïs Bønd, Jämés Bønd Ⱡσяєм ιρ# + >>> print c.convert("don't convert tag ids") + døn't çønvért täg ïds Ⱡσяєм ιρѕυ# + >>> print c.convert("don't convert %(name)s tags on %(date)s") + døn't çønvért %(name)s tägs øn %(date)s Ⱡσяєм ιρѕ# + + """ + # Substitute plain characters with accented lookalikes. + # http://tlt.its.psu.edu/suggestions/international/web/codehtml.html#accent + TABLE = dict(zip( + u"AabCcEeIiOoUuYy", + u"ÀäßÇçÉéÌïÖöÛüÝý" + )) + + # The print industry's standard dummy text, in use since the 1500s + # see http://www.lipsum.com/, then fed through a "fancy-text" converter. + # The string should start with a space, so that it joins nicely with the text + # that precedes it. The Lorem contains an apostrophe since French often does, + # and translated strings get put into single-quoted strings, which then break. + LOREM = " " + " ".join( # join and split just make the string easier here. + u""" + Ⱡ'σяєм ιρѕυм ∂σłσя ѕιт αмєт, ¢σηѕє¢тєтυя α∂ιριѕι¢ιηg єłιт, ѕє∂ ∂σ єιυѕмσ∂ + тємρσя ιη¢ι∂ι∂υηт υт łαвσяє єт ∂σłσяє мαgηα αłιqυα. υт єηιм α∂ мιηιм + νєηιαм, qυιѕ ησѕтяυ∂ єχєя¢ιтαтιση υłłαм¢σ łαвσяιѕ ηιѕι υт αłιqυιρ єχ єα + ¢σммσ∂σ ¢σηѕєqυαт. ∂υιѕ αυтє ιяυяє ∂σłσя ιη яєρяєнєη∂єяιт ιη νσłυρтαтє + νєłιт єѕѕє ¢ιłłυм ∂σłσяє єυ ƒυgιαт ηυłłα ραяιαтυя. єχ¢єρтєυя ѕιηт σ¢¢αє¢αт + ¢υρι∂αтαт ηση ρяσι∂єηт, ѕυηт ιη ¢υłρα qυι σƒƒι¢ια ∂єѕєяυηт мσłłιт αηιм ι∂ + єѕт łαвσяυм. + """.split() + ) + + # To simulate more verbose languages (like German), pad the length of a string + # by a multiple of PAD_FACTOR + PAD_FACTOR = 1.33 + + def pad(self, string): + """add some lorem ipsum text to the end of string""" + size = len(string) + if size < 7: + target = size * 3 + else: + target = int(size * self.PAD_FACTOR) + pad_len = target - size - 1 + return string + self.LOREM[:pad_len] + "#" + + +class Dummy2(BaseDummyConverter): + """A second dummy converter. + + Like Dummy, but uses a different obvious but readable automatic conversion: + Strikes-through many letters, and turns lower-case letters upside-down. + + """ + TABLE = dict(zip( + u"ABCDEGHIJKLOPRTUYZabcdefghijklmnopqrstuvwxyz", + u"ȺɃȻĐɆǤĦƗɈꝀŁØⱣɌŦɄɎƵɐqɔpǝɟƃɥᴉɾʞlɯuødbɹsʇnʌʍxʎz" + )) + + +def make_dummy(filename, locale, converter): """ Takes a source po file, reads it, and writes out a new po file in :param locale: containing a dummy translation. """ - if not path(file).exists(): - raise IOError('File does not exist: %s' % file) - pofile = polib.pofile(file) - converter = Dummy() + if not path(filename).exists(): + raise IOError('File does not exist: %r' % filename) + pofile = polib.pofile(filename) for msg in pofile: converter.convert_msg(msg) @@ -175,7 +179,7 @@ def make_dummy(file, locale): # do something reasonable. pofile.metadata['Plural-Forms'] = 'nplurals=2; plural=(n != 1);' - new_file = new_filename(file, locale) + new_file = new_filename(filename, locale) create_dir_if_necessary(new_file) pofile.save(new_file) @@ -191,12 +195,12 @@ def main(): """ Generate dummy strings for all source po files. """ - LOCALE = CONFIGURATION.dummy_locale SOURCE_MSGS_DIR = CONFIGURATION.source_messages_dir - print "Processing source language files into dummy strings:" - for source_file in CONFIGURATION.source_messages_dir.walkfiles('*.po'): - print ' ', source_file.relpath() - make_dummy(SOURCE_MSGS_DIR.joinpath(source_file), LOCALE) + for locale, converter in zip(CONFIGURATION.dummy_locales, [Dummy(), Dummy2()]): + print "Processing source language files into dummy strings, locale {}:".format(locale) + for source_file in CONFIGURATION.source_messages_dir.walkfiles('*.po'): + print ' ', source_file.relpath() + make_dummy(SOURCE_MSGS_DIR.joinpath(source_file), locale, converter) print diff --git a/i18n/generate.py b/i18n/generate.py index b8e9f32db7..ff70127a65 100755 --- a/i18n/generate.py +++ b/i18n/generate.py @@ -115,7 +115,8 @@ def main(argv=None): for locale in CONFIGURATION.translated_locales: merge_files(locale, fail_if_missing=args.strict) # Dummy text is not required. Don't raise exception if files are missing. - merge_files(CONFIGURATION.dummy_locale, fail_if_missing=False) + for locale in CONFIGURATION.dummy_locales: + merge_files(locale, fail_if_missing=False) compile_cmd = 'django-admin.py compilemessages' execute(compile_cmd, working_directory=BASE_DIR) diff --git a/i18n/tests/test_config.py b/i18n/tests/test_config.py index d82b050e07..c05694faef 100644 --- a/i18n/tests/test_config.py +++ b/i18n/tests/test_config.py @@ -29,5 +29,5 @@ class TestConfiguration(TestCase): self.assertIsNotNone(locales) self.assertIsInstance(locales, list) self.assertIn('en', locales) - self.assertEqual('eo', CONFIGURATION.dummy_locale) + self.assertEqual('eo', CONFIGURATION.dummy_locales[0]) self.assertEqual('en', CONFIGURATION.source_locale) diff --git a/i18n/tests/test_dummy.py b/i18n/tests/test_dummy.py index a641f146b1..7f3e8ca07c 100644 --- a/i18n/tests/test_dummy.py +++ b/i18n/tests/test_dummy.py @@ -33,13 +33,13 @@ class TestDummy(TestCase): @ddt.data( (u"hello my name is Bond, James Bond", - u"héllø mý nämé ïs Bønd, Jämés Bønd Ⱡ'σяєм ι#"), + u"héllö mý nämé ïs Bönd, Jämés Bönd Ⱡ'σяєм ι#"), (u"don't convert tag ids", - u"døn't çønvért täg ïds Ⱡ'σяєм ιρѕυ#"), + u"dön't çönvért täg ïds Ⱡ'σяєм#"), (u"don't convert %(name)s tags on %(date)s", - u"døn't çønvért %(name)s tägs øn %(date)s Ⱡ'σяєм ιρѕ#"), + u"dön't çönvért %(name)s tägs ön %(date)s Ⱡ'σяєм #"), ) def test_dummy(self, data): """ @@ -53,7 +53,7 @@ class TestDummy(TestCase): def test_singular(self): entry = POEntry() entry.msgid = "A lovely day for a cup of tea." - expected = u"À løvélý däý før ä çüp øf téä. Ⱡ'σяєм #" + expected = u"À lövélý däý för ä çüp öf téä. Ⱡ'σяєм #" self.converter.convert_msg(entry) self.assertUnicodeEquals(entry.msgstr, expected) @@ -61,8 +61,8 @@ class TestDummy(TestCase): entry = POEntry() entry.msgid = "A lovely day for a cup of tea." entry.msgid_plural = "A lovely day for some cups of tea." - expected_s = u"À løvélý däý før ä çüp øf téä. Ⱡ'σяєм #" - expected_p = u"À løvélý däý før sømé çüps øf téä. Ⱡ'σяєм ιρ#" + expected_s = u"À lövélý däý för ä çüp öf téä. Ⱡ'σяєм #" + expected_p = u"À lövélý däý för sömé çüps öf téä. Ⱡ'σяєм ιρ#" self.converter.convert_msg(entry) result = entry.msgstr_plural self.assertUnicodeEquals(result['0'], expected_s) diff --git a/lms/envs/common.py b/lms/envs/common.py index 5748f65cb8..73e6902bd4 100644 --- a/lms/envs/common.py +++ b/lms/envs/common.py @@ -498,7 +498,8 @@ LANGUAGE_CODE = 'en' # http://www.i18nguy.com/unicode/language-identifiers.html # Sourced from http://www.localeplanet.com/icu/ and wikipedia LANGUAGES = ( - ('eo', u'Dummy Language (Esperanto)'), # Dummy languaged used for testing + ('eo', u'Dummy Language (Esperanto)'), # Dummy language used for testing + ('fake2', u'Fake translations'), # Another dummy language for testing (not pushed to prod) ('ach', u'Acholi'), # Acoli ('ar', u'العربية'), # Arabic