More than one dummy language
This commit is contained in:
2
.gitignore
vendored
2
.gitignore
vendored
@@ -37,6 +37,8 @@ codekit-config.json
|
||||
!djangojs.mo
|
||||
conf/locale/en/LC_MESSAGES/*.po
|
||||
conf/locale/en/LC_MESSAGES/*.mo
|
||||
conf/locale/fake*/LC_MESSAGES/*.po
|
||||
conf/locale/fake*/LC_MESSAGES/*.mo
|
||||
conf/locale/messages.mo
|
||||
|
||||
### Testing artifacts
|
||||
|
||||
@@ -51,8 +51,10 @@ locales:
|
||||
- zh_TW # Chinese (Taiwan)
|
||||
|
||||
|
||||
# The locale used for fake-accented English, for testing.
|
||||
dummy-locale: eo
|
||||
# The locales used for fake-accented English, for testing.
|
||||
dummy_locales:
|
||||
- eo
|
||||
- fake2
|
||||
|
||||
# Directories we don't search for strings.
|
||||
ignore_dirs:
|
||||
|
||||
@@ -17,6 +17,7 @@ class Configuration(object):
|
||||
Reads localization configuration in json format.
|
||||
"""
|
||||
DEFAULTS = {
|
||||
'dummy_locales': [],
|
||||
'generate_merge': {},
|
||||
'ignore_dirs': [],
|
||||
'locales': ['en'],
|
||||
@@ -42,18 +43,6 @@ class Configuration(object):
|
||||
return self._config.get(name, self.DEFAULTS[name])
|
||||
raise AttributeError("Configuration has no such setting: {!r}".format(name))
|
||||
|
||||
@property
|
||||
def dummy_locale(self):
|
||||
"""
|
||||
Returns a locale to use for the dummy text, e.g. 'eo'.
|
||||
Throws exception if no dummy-locale is declared.
|
||||
The locale is a string.
|
||||
"""
|
||||
dummy = self._config.get('dummy-locale', None)
|
||||
if not dummy:
|
||||
raise Exception('Could not read dummy-locale from configuration file.')
|
||||
return dummy
|
||||
|
||||
def get_messages_dir(self, locale):
|
||||
"""
|
||||
Returns the name of the directory holding the po files for locale.
|
||||
|
||||
194
i18n/dummy.py
194
i18n/dummy.py
@@ -31,98 +31,22 @@ from i18n.config import CONFIGURATION
|
||||
from i18n.execute import create_dir_if_necessary
|
||||
from i18n.converter import Converter
|
||||
|
||||
# Substitute plain characters with accented lookalikes.
|
||||
# http://tlt.its.psu.edu/suggestions/international/web/codehtml.html#accent
|
||||
TABLE = {
|
||||
'A': u'À',
|
||||
'a': u'ä',
|
||||
'b': u'ß',
|
||||
'C': u'Ç',
|
||||
'c': u'ç',
|
||||
'E': u'É',
|
||||
'e': u'é',
|
||||
'I': u'Ì',
|
||||
'i': u'ï',
|
||||
'O': u'Ø',
|
||||
'o': u'ø',
|
||||
'U': u'Û',
|
||||
'u': u'ü',
|
||||
'Y': u'Ý',
|
||||
'y': u'ý',
|
||||
}
|
||||
|
||||
class BaseDummyConverter(Converter):
|
||||
"""Base class for dummy converters.
|
||||
|
||||
# The print industry's standard dummy text, in use since the 1500s
|
||||
# see http://www.lipsum.com/, then fed through a "fancy-text" converter.
|
||||
# The string should start with a space, so that it joins nicely with the text
|
||||
# that precedes it. The Lorem contains an apostrophe since French often does,
|
||||
# and translated strings get put into single-quoted strings, which then break.
|
||||
LOREM = " " + " ".join( # join and split just make the string easier here.
|
||||
u"""
|
||||
Ⱡ'σяєм ιρѕυм ∂σłσя ѕιт αмєт, ¢σηѕє¢тєтυя α∂ιριѕι¢ιηg єłιт, ѕє∂ ∂σ єιυѕмσ∂
|
||||
тємρσя ιη¢ι∂ι∂υηт υт łαвσяє єт ∂σłσяє мαgηα αłιqυα. υт єηιм α∂ мιηιм
|
||||
νєηιαм, qυιѕ ησѕтяυ∂ єχєя¢ιтαтιση υłłαм¢σ łαвσяιѕ ηιѕι υт αłιqυιρ єχ єα
|
||||
¢σммσ∂σ ¢σηѕєqυαт. ∂υιѕ αυтє ιяυяє ∂σłσя ιη яєρяєнєη∂єяιт ιη νσłυρтαтє
|
||||
νєłιт єѕѕє ¢ιłłυм ∂σłσяє єυ ƒυgιαт ηυłłα ραяιαтυя. єχ¢єρтєυя ѕιηт σ¢¢αє¢αт
|
||||
¢υρι∂αтαт ηση ρяσι∂єηт, ѕυηт ιη ¢υłρα qυι σƒƒι¢ια ∂єѕєяυηт мσłłιт αηιм ι∂
|
||||
єѕт łαвσяυм.
|
||||
""".split()
|
||||
)
|
||||
String conversion goes through a character map, then gets padded.
|
||||
|
||||
# To simulate more verbose languages (like German), pad the length of a string
|
||||
# by a multiple of PAD_FACTOR
|
||||
PAD_FACTOR = 1.33
|
||||
|
||||
|
||||
class Dummy(Converter):
|
||||
r"""
|
||||
Creates new localization properties files in a dummy language.
|
||||
|
||||
Each property file is derived from the equivalent en_US file, with these
|
||||
transformations applied:
|
||||
|
||||
1. Every vowel is replaced with an equivalent with extra accent marks.
|
||||
|
||||
2. Every string is padded out to +30% length to simulate verbose languages
|
||||
(such as German) to see if layout and flows work properly.
|
||||
|
||||
3. Every string is terminated with a '#' character to make it easier to detect
|
||||
truncation.
|
||||
|
||||
Example use::
|
||||
|
||||
>>> from dummy import Dummy
|
||||
>>> c = Dummy()
|
||||
>>> c.convert("My name is Bond, James Bond")
|
||||
u'M\xfd n\xe4m\xe9 \xefs B\xf8nd, J\xe4m\xe9s B\xf8nd \u2360\u03c3\u044f\u0454\u043c \u03b9\u03c1#'
|
||||
>>> print c.convert("My name is Bond, James Bond")
|
||||
Mý nämé ïs Bønd, Jämés Bønd Ⱡσяєм ιρ#
|
||||
>>> print c.convert("don't convert <a href='href'>tag ids</a>")
|
||||
døn't çønvért <a href='href'>täg ïds</a> Ⱡσяєм ιρѕυ#
|
||||
>>> print c.convert("don't convert %(name)s tags on %(date)s")
|
||||
døn't çønvért %(name)s tägs øn %(date)s Ⱡσяєм ιρѕ#
|
||||
"""
|
||||
def convert(self, string):
|
||||
result = Converter.convert(self, string)
|
||||
return self.pad(result)
|
||||
TABLE = {}
|
||||
|
||||
def inner_convert_string(self, string):
|
||||
for k, v in TABLE.items():
|
||||
string = string.replace(k, v)
|
||||
return string
|
||||
for old, new in self.TABLE.items():
|
||||
string = string.replace(old, new)
|
||||
return self.pad(string)
|
||||
|
||||
def pad(self, string):
|
||||
"""add some lorem ipsum text to the end of string"""
|
||||
size = len(string)
|
||||
if size < 7:
|
||||
target = size * 3
|
||||
else:
|
||||
target = int(size*PAD_FACTOR)
|
||||
return string + self.terminate(LOREM[:(target-size)])
|
||||
|
||||
def terminate(self, string):
|
||||
"""replaces the final char of string with #"""
|
||||
return string[:-1] + '#'
|
||||
return string
|
||||
|
||||
def convert_msg(self, msg):
|
||||
"""
|
||||
@@ -159,15 +83,95 @@ class Dummy(Converter):
|
||||
return translated
|
||||
|
||||
|
||||
def make_dummy(file, locale):
|
||||
class Dummy(BaseDummyConverter):
|
||||
r"""
|
||||
Creates new localization properties files in a dummy language.
|
||||
|
||||
Each property file is derived from the equivalent en_US file, with these
|
||||
transformations applied:
|
||||
|
||||
1. Every vowel is replaced with an equivalent with extra accent marks.
|
||||
|
||||
2. Every string is padded out to +30% length to simulate verbose languages
|
||||
(such as German) to see if layout and flows work properly.
|
||||
|
||||
3. Every string is terminated with a '#' character to make it easier to detect
|
||||
truncation.
|
||||
|
||||
Example use::
|
||||
|
||||
>>> from dummy import Dummy
|
||||
>>> c = Dummy()
|
||||
>>> c.convert("My name is Bond, James Bond")
|
||||
u'M\xfd n\xe4m\xe9 \xefs B\xf8nd, J\xe4m\xe9s B\xf8nd \u2360\u03c3\u044f\u0454\u043c \u03b9\u03c1#'
|
||||
>>> print c.convert("My name is Bond, James Bond")
|
||||
Mý nämé ïs Bønd, Jämés Bønd Ⱡσяєм ιρ#
|
||||
>>> print c.convert("don't convert <a href='href'>tag ids</a>")
|
||||
døn't çønvért <a href='href'>täg ïds</a> Ⱡσяєм ιρѕυ#
|
||||
>>> print c.convert("don't convert %(name)s tags on %(date)s")
|
||||
døn't çønvért %(name)s tägs øn %(date)s Ⱡσяєм ιρѕ#
|
||||
|
||||
"""
|
||||
# Substitute plain characters with accented lookalikes.
|
||||
# http://tlt.its.psu.edu/suggestions/international/web/codehtml.html#accent
|
||||
TABLE = dict(zip(
|
||||
u"AabCcEeIiOoUuYy",
|
||||
u"ÀäßÇçÉéÌïÖöÛüÝý"
|
||||
))
|
||||
|
||||
# The print industry's standard dummy text, in use since the 1500s
|
||||
# see http://www.lipsum.com/, then fed through a "fancy-text" converter.
|
||||
# The string should start with a space, so that it joins nicely with the text
|
||||
# that precedes it. The Lorem contains an apostrophe since French often does,
|
||||
# and translated strings get put into single-quoted strings, which then break.
|
||||
LOREM = " " + " ".join( # join and split just make the string easier here.
|
||||
u"""
|
||||
Ⱡ'σяєм ιρѕυм ∂σłσя ѕιт αмєт, ¢σηѕє¢тєтυя α∂ιριѕι¢ιηg єłιт, ѕє∂ ∂σ єιυѕмσ∂
|
||||
тємρσя ιη¢ι∂ι∂υηт υт łαвσяє єт ∂σłσяє мαgηα αłιqυα. υт єηιм α∂ мιηιм
|
||||
νєηιαм, qυιѕ ησѕтяυ∂ єχєя¢ιтαтιση υłłαм¢σ łαвσяιѕ ηιѕι υт αłιqυιρ єχ єα
|
||||
¢σммσ∂σ ¢σηѕєqυαт. ∂υιѕ αυтє ιяυяє ∂σłσя ιη яєρяєнєη∂єяιт ιη νσłυρтαтє
|
||||
νєłιт єѕѕє ¢ιłłυм ∂σłσяє єυ ƒυgιαт ηυłłα ραяιαтυя. єχ¢єρтєυя ѕιηт σ¢¢αє¢αт
|
||||
¢υρι∂αтαт ηση ρяσι∂єηт, ѕυηт ιη ¢υłρα qυι σƒƒι¢ια ∂єѕєяυηт мσłłιт αηιм ι∂
|
||||
єѕт łαвσяυм.
|
||||
""".split()
|
||||
)
|
||||
|
||||
# To simulate more verbose languages (like German), pad the length of a string
|
||||
# by a multiple of PAD_FACTOR
|
||||
PAD_FACTOR = 1.33
|
||||
|
||||
def pad(self, string):
|
||||
"""add some lorem ipsum text to the end of string"""
|
||||
size = len(string)
|
||||
if size < 7:
|
||||
target = size * 3
|
||||
else:
|
||||
target = int(size * self.PAD_FACTOR)
|
||||
pad_len = target - size - 1
|
||||
return string + self.LOREM[:pad_len] + "#"
|
||||
|
||||
|
||||
class Dummy2(BaseDummyConverter):
|
||||
"""A second dummy converter.
|
||||
|
||||
Like Dummy, but uses a different obvious but readable automatic conversion:
|
||||
Strikes-through many letters, and turns lower-case letters upside-down.
|
||||
|
||||
"""
|
||||
TABLE = dict(zip(
|
||||
u"ABCDEGHIJKLOPRTUYZabcdefghijklmnopqrstuvwxyz",
|
||||
u"ȺɃȻĐɆǤĦƗɈꝀŁØⱣɌŦɄɎƵɐqɔpǝɟƃɥᴉɾʞlɯuødbɹsʇnʌʍxʎz"
|
||||
))
|
||||
|
||||
|
||||
def make_dummy(filename, locale, converter):
|
||||
"""
|
||||
Takes a source po file, reads it, and writes out a new po file
|
||||
in :param locale: containing a dummy translation.
|
||||
"""
|
||||
if not path(file).exists():
|
||||
raise IOError('File does not exist: %s' % file)
|
||||
pofile = polib.pofile(file)
|
||||
converter = Dummy()
|
||||
if not path(filename).exists():
|
||||
raise IOError('File does not exist: %r' % filename)
|
||||
pofile = polib.pofile(filename)
|
||||
for msg in pofile:
|
||||
converter.convert_msg(msg)
|
||||
|
||||
@@ -175,7 +179,7 @@ def make_dummy(file, locale):
|
||||
# do something reasonable.
|
||||
pofile.metadata['Plural-Forms'] = 'nplurals=2; plural=(n != 1);'
|
||||
|
||||
new_file = new_filename(file, locale)
|
||||
new_file = new_filename(filename, locale)
|
||||
create_dir_if_necessary(new_file)
|
||||
pofile.save(new_file)
|
||||
|
||||
@@ -191,12 +195,12 @@ def main():
|
||||
"""
|
||||
Generate dummy strings for all source po files.
|
||||
"""
|
||||
LOCALE = CONFIGURATION.dummy_locale
|
||||
SOURCE_MSGS_DIR = CONFIGURATION.source_messages_dir
|
||||
print "Processing source language files into dummy strings:"
|
||||
for source_file in CONFIGURATION.source_messages_dir.walkfiles('*.po'):
|
||||
print ' ', source_file.relpath()
|
||||
make_dummy(SOURCE_MSGS_DIR.joinpath(source_file), LOCALE)
|
||||
for locale, converter in zip(CONFIGURATION.dummy_locales, [Dummy(), Dummy2()]):
|
||||
print "Processing source language files into dummy strings, locale {}:".format(locale)
|
||||
for source_file in CONFIGURATION.source_messages_dir.walkfiles('*.po'):
|
||||
print ' ', source_file.relpath()
|
||||
make_dummy(SOURCE_MSGS_DIR.joinpath(source_file), locale, converter)
|
||||
print
|
||||
|
||||
|
||||
|
||||
@@ -115,7 +115,8 @@ def main(argv=None):
|
||||
for locale in CONFIGURATION.translated_locales:
|
||||
merge_files(locale, fail_if_missing=args.strict)
|
||||
# Dummy text is not required. Don't raise exception if files are missing.
|
||||
merge_files(CONFIGURATION.dummy_locale, fail_if_missing=False)
|
||||
for locale in CONFIGURATION.dummy_locales:
|
||||
merge_files(locale, fail_if_missing=False)
|
||||
|
||||
compile_cmd = 'django-admin.py compilemessages'
|
||||
execute(compile_cmd, working_directory=BASE_DIR)
|
||||
|
||||
@@ -29,5 +29,5 @@ class TestConfiguration(TestCase):
|
||||
self.assertIsNotNone(locales)
|
||||
self.assertIsInstance(locales, list)
|
||||
self.assertIn('en', locales)
|
||||
self.assertEqual('eo', CONFIGURATION.dummy_locale)
|
||||
self.assertEqual('eo', CONFIGURATION.dummy_locales[0])
|
||||
self.assertEqual('en', CONFIGURATION.source_locale)
|
||||
|
||||
@@ -33,13 +33,13 @@ class TestDummy(TestCase):
|
||||
|
||||
@ddt.data(
|
||||
(u"hello my name is Bond, James Bond",
|
||||
u"héllø mý nämé ïs Bønd, Jämés Bønd Ⱡ'σяєм ι#"),
|
||||
u"héllö mý nämé ïs Bönd, Jämés Bönd Ⱡ'σяєм ι#"),
|
||||
|
||||
(u"don't convert <a href='href'>tag ids</a>",
|
||||
u"døn't çønvért <a href='href'>täg ïds</a> Ⱡ'σяєм ιρѕυ#"),
|
||||
u"dön't çönvért <a href='href'>täg ïds</a> Ⱡ'σяєм#"),
|
||||
|
||||
(u"don't convert %(name)s tags on %(date)s",
|
||||
u"døn't çønvért %(name)s tägs øn %(date)s Ⱡ'σяєм ιρѕ#"),
|
||||
u"dön't çönvért %(name)s tägs ön %(date)s Ⱡ'σяєм #"),
|
||||
)
|
||||
def test_dummy(self, data):
|
||||
"""
|
||||
@@ -53,7 +53,7 @@ class TestDummy(TestCase):
|
||||
def test_singular(self):
|
||||
entry = POEntry()
|
||||
entry.msgid = "A lovely day for a cup of tea."
|
||||
expected = u"À løvélý däý før ä çüp øf téä. Ⱡ'σяєм #"
|
||||
expected = u"À lövélý däý för ä çüp öf téä. Ⱡ'σяєм #"
|
||||
self.converter.convert_msg(entry)
|
||||
self.assertUnicodeEquals(entry.msgstr, expected)
|
||||
|
||||
@@ -61,8 +61,8 @@ class TestDummy(TestCase):
|
||||
entry = POEntry()
|
||||
entry.msgid = "A lovely day for a cup of tea."
|
||||
entry.msgid_plural = "A lovely day for some cups of tea."
|
||||
expected_s = u"À løvélý däý før ä çüp øf téä. Ⱡ'σяєм #"
|
||||
expected_p = u"À løvélý däý før sømé çüps øf téä. Ⱡ'σяєм ιρ#"
|
||||
expected_s = u"À lövélý däý för ä çüp öf téä. Ⱡ'σяєм #"
|
||||
expected_p = u"À lövélý däý för sömé çüps öf téä. Ⱡ'σяєм ιρ#"
|
||||
self.converter.convert_msg(entry)
|
||||
result = entry.msgstr_plural
|
||||
self.assertUnicodeEquals(result['0'], expected_s)
|
||||
|
||||
@@ -498,7 +498,8 @@ LANGUAGE_CODE = 'en' # http://www.i18nguy.com/unicode/language-identifiers.html
|
||||
|
||||
# Sourced from http://www.localeplanet.com/icu/ and wikipedia
|
||||
LANGUAGES = (
|
||||
('eo', u'Dummy Language (Esperanto)'), # Dummy languaged used for testing
|
||||
('eo', u'Dummy Language (Esperanto)'), # Dummy language used for testing
|
||||
('fake2', u'Fake translations'), # Another dummy language for testing (not pushed to prod)
|
||||
|
||||
('ach', u'Acholi'), # Acoli
|
||||
('ar', u'العربية'), # Arabic
|
||||
|
||||
Reference in New Issue
Block a user