222 lines
8.1 KiB
Python
Executable File
222 lines
8.1 KiB
Python
Executable File
#!/usr/bin/env python
|
||
# -*- coding: utf-8 -*-
|
||
|
||
"""
|
||
Generate test translation files from human-readable po files.
|
||
|
||
Dummy language is specified in configuration file (see config.py)
|
||
two letter language codes reference:
|
||
see http://www.loc.gov/standards/iso639-2/php/code_list.php
|
||
|
||
Django will not localize in languages that django itself has not been
|
||
localized for. So we are using a well-known language (default='eo').
|
||
Django languages are listed in django.conf.global_settings.LANGUAGES
|
||
|
||
po files can be generated with this:
|
||
django-admin.py makemessages --all --extension html -l en
|
||
|
||
Usage:
|
||
|
||
$ ./dummy.py
|
||
|
||
generates output conf/locale/$DUMMY_LOCALE/LC_MESSAGES,
|
||
where $DUMMY_LOCALE is the dummy_locale value set in the i18n config
|
||
"""
|
||
from __future__ import print_function
|
||
import re
|
||
import sys
|
||
import argparse
|
||
|
||
import polib
|
||
from path import path
|
||
|
||
from i18n.config import CONFIGURATION
|
||
from i18n.converter import Converter
|
||
|
||
|
||
class BaseDummyConverter(Converter):
|
||
"""Base class for dummy converters.
|
||
|
||
String conversion goes through a character map, then gets padded.
|
||
|
||
"""
|
||
TABLE = {}
|
||
|
||
def inner_convert_string(self, string):
|
||
for old, new in self.TABLE.items():
|
||
string = string.replace(old, new)
|
||
return self.pad(string)
|
||
|
||
def pad(self, string):
|
||
return string
|
||
|
||
def convert_msg(self, msg):
|
||
"""
|
||
Takes one POEntry object and converts it (adds a dummy translation to it)
|
||
msg is an instance of polib.POEntry
|
||
"""
|
||
source = msg.msgid
|
||
if not source:
|
||
# don't translate empty string
|
||
return
|
||
|
||
plural = msg.msgid_plural
|
||
if plural:
|
||
# translate singular and plural
|
||
foreign_single = self.convert(source)
|
||
foreign_plural = self.convert(plural)
|
||
plural = {
|
||
'0': self.final_newline(source, foreign_single),
|
||
'1': self.final_newline(plural, foreign_plural),
|
||
}
|
||
msg.msgstr_plural = plural
|
||
else:
|
||
foreign = self.convert(source)
|
||
msg.msgstr = self.final_newline(source, foreign)
|
||
|
||
def final_newline(self, original, translated):
|
||
""" Returns a new translated string.
|
||
If last char of original is a newline, make sure translation
|
||
has a newline too.
|
||
"""
|
||
if original:
|
||
if original[-1] == '\n' and translated[-1] != '\n':
|
||
translated += '\n'
|
||
return translated
|
||
|
||
|
||
class Dummy(BaseDummyConverter):
|
||
r"""
|
||
Creates new localization properties files in a dummy language.
|
||
|
||
Each property file is derived from the equivalent en_US file, with these
|
||
transformations applied:
|
||
|
||
1. Every vowel is replaced with an equivalent with extra accent marks.
|
||
|
||
2. Every string is padded out to +30% length to simulate verbose languages
|
||
(such as German) to see if layout and flows work properly.
|
||
|
||
3. Every string is terminated with a '#' character to make it easier to detect
|
||
truncation.
|
||
|
||
Example use::
|
||
|
||
>>> from dummy import Dummy
|
||
>>> c = Dummy()
|
||
>>> c.convert("My name is Bond, James Bond")
|
||
u'M\xfd n\xe4m\xe9 \xefs B\xf8nd, J\xe4m\xe9s B\xf8nd \u2360\u03c3\u044f\u0454\u043c \u03b9\u03c1#'
|
||
>>> print c.convert("My name is Bond, James Bond")
|
||
Mý nämé ïs Bønd, Jämés Bønd Ⱡσяєм ιρ#
|
||
>>> print c.convert("don't convert <a href='href'>tag ids</a>")
|
||
døn't çønvért <a href='href'>täg ïds</a> Ⱡσяєм ιρѕυ#
|
||
>>> print c.convert("don't convert %(name)s tags on %(date)s")
|
||
døn't çønvért %(name)s tägs øn %(date)s Ⱡσяєм ιρѕ#
|
||
|
||
"""
|
||
# Substitute plain characters with accented lookalikes.
|
||
# http://tlt.its.psu.edu/suggestions/international/web/codehtml.html#accent
|
||
TABLE = dict(zip(
|
||
u"AabCcEeIiOoUuYy",
|
||
u"ÀäßÇçÉéÌïÖöÛüÝý"
|
||
))
|
||
|
||
# The print industry's standard dummy text, in use since the 1500s
|
||
# see http://www.lipsum.com/, then fed through a "fancy-text" converter.
|
||
# The string should start with a space, so that it joins nicely with the text
|
||
# that precedes it. The Lorem contains an apostrophe since French often does,
|
||
# and translated strings get put into single-quoted strings, which then break.
|
||
LOREM = " " + " ".join( # join and split just make the string easier here.
|
||
u"""
|
||
Ⱡ'σяєм ιρѕυм ∂σłσя ѕιт αмєт, ¢σηѕє¢тєтυя α∂ιριѕι¢ιηg єłιт, ѕє∂ ∂σ єιυѕмσ∂
|
||
тємρσя ιη¢ι∂ι∂υηт υт łαвσяє єт ∂σłσяє мαgηα αłιqυα. υт єηιм α∂ мιηιм
|
||
νєηιαм, qυιѕ ησѕтяυ∂ єχєя¢ιтαтιση υłłαм¢σ łαвσяιѕ ηιѕι υт αłιqυιρ єχ єα
|
||
¢σммσ∂σ ¢σηѕєqυαт. ∂υιѕ αυтє ιяυяє ∂σłσя ιη яєρяєнєη∂єяιт ιη νσłυρтαтє
|
||
νєłιт єѕѕє ¢ιłłυм ∂σłσяє єυ ƒυgιαт ηυłłα ραяιαтυя. єχ¢єρтєυя ѕιηт σ¢¢αє¢αт
|
||
¢υρι∂αтαт ηση ρяσι∂єηт, ѕυηт ιη ¢υłρα qυι σƒƒι¢ια ∂єѕєяυηт мσłłιт αηιм ι∂
|
||
єѕт łαвσяυм.
|
||
""".split()
|
||
)
|
||
|
||
# To simulate more verbose languages (like German), pad the length of a string
|
||
# by a multiple of PAD_FACTOR
|
||
PAD_FACTOR = 1.33
|
||
|
||
def pad(self, string):
|
||
"""add some lorem ipsum text to the end of string"""
|
||
size = len(string)
|
||
if size < 7:
|
||
target = size * 3
|
||
else:
|
||
target = int(size * self.PAD_FACTOR)
|
||
pad_len = target - size - 1
|
||
return string + self.LOREM[:pad_len] + "#"
|
||
|
||
|
||
class Dummy2(BaseDummyConverter):
|
||
"""A second dummy converter.
|
||
|
||
Like Dummy, but uses a different obvious but readable automatic conversion:
|
||
Strikes-through many letters, and turns lower-case letters upside-down.
|
||
|
||
"""
|
||
TABLE = dict(zip(
|
||
u"ABCDEGHIJKLOPRTUYZabcdefghijklmnopqrstuvwxyz",
|
||
u"ȺɃȻĐɆǤĦƗɈꝀŁØⱣɌŦɄɎƵɐqɔpǝɟƃɥᴉɾʞlɯuødbɹsʇnʌʍxʎz"
|
||
))
|
||
|
||
|
||
def make_dummy(filename, locale, converter):
|
||
"""
|
||
Takes a source po file, reads it, and writes out a new po file
|
||
in :param locale: containing a dummy translation.
|
||
"""
|
||
if not path(filename).exists():
|
||
raise IOError('File does not exist: %r' % filename)
|
||
pofile = polib.pofile(filename)
|
||
for msg in pofile:
|
||
# Some strings are actually formatting strings, don't dummy-ify them,
|
||
# or dates will look like "DÀTÉ_TÌMÉ_FÖRMÀT Ⱡ'σ# EST"
|
||
if re.match(r"^[A-Z_]+_FORMAT$", msg.msgid):
|
||
continue
|
||
converter.convert_msg(msg)
|
||
|
||
# Apply declaration for English pluralization rules so that ngettext will
|
||
# do something reasonable.
|
||
pofile.metadata['Plural-Forms'] = 'nplurals=2; plural=(n != 1);'
|
||
|
||
new_file = new_filename(filename, locale)
|
||
new_file.parent.makedirs_p()
|
||
pofile.save(new_file)
|
||
|
||
|
||
def new_filename(original_filename, new_locale):
|
||
"""Returns a filename derived from original_filename, using new_locale as the locale"""
|
||
f = path(original_filename)
|
||
new_file = f.parent.parent.parent / new_locale / f.parent.name / f.name
|
||
return new_file.abspath()
|
||
|
||
|
||
def main(verbosity=1):
|
||
"""
|
||
Generate dummy strings for all source po files.
|
||
"""
|
||
SOURCE_MSGS_DIR = CONFIGURATION.source_messages_dir
|
||
for locale, converter in zip(CONFIGURATION.dummy_locales, [Dummy(), Dummy2()]):
|
||
if verbosity:
|
||
print('Processing source language files into dummy strings, locale "{}"'.format(locale))
|
||
for source_file in CONFIGURATION.source_messages_dir.walkfiles('*.po'):
|
||
if verbosity:
|
||
print(' ', source_file.relpath())
|
||
make_dummy(SOURCE_MSGS_DIR.joinpath(source_file), locale, converter)
|
||
if verbosity:
|
||
print()
|
||
|
||
|
||
if __name__ == '__main__':
|
||
# pylint: disable=invalid-name
|
||
parser = argparse.ArgumentParser(description=__doc__)
|
||
parser.add_argument("--verbose", "-v", action="count", default=0)
|
||
args = parser.parse_args()
|
||
main(verbosity=args.verbose)
|