Moving these tools to i18n-tools repo
This commit is contained in:
@@ -1,36 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
"""
|
||||
Utility for cleaning up your local directory after switching between
|
||||
branches with different translation levels (eg master branch, with only
|
||||
reviewed translations, versus dev branch, with all translations)
|
||||
"""
|
||||
from __future__ import print_function
|
||||
import os
|
||||
|
||||
from i18n.config import CONFIGURATION
|
||||
from i18n.execute import execute
|
||||
|
||||
|
||||
def clean_conf_folder(locale):
|
||||
"""Remove the configuration directory for `locale`"""
|
||||
dirname = CONFIGURATION.get_messages_dir(locale)
|
||||
command = "rm -rf {}".format(dirname)
|
||||
print(command)
|
||||
try:
|
||||
execute(command)
|
||||
except Exception as exc:
|
||||
print("Encountered error {}; continuing...".format(exc))
|
||||
return
|
||||
|
||||
|
||||
def clean_configuration_directory():
|
||||
"""
|
||||
Remove the configuration directories for all locales
|
||||
in CONFIGURATION.translated_locales
|
||||
"""
|
||||
for locale in CONFIGURATION.translated_locales:
|
||||
clean_conf_folder(locale)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
clean_configuration_directory()
|
||||
@@ -1,69 +0,0 @@
|
||||
import os
|
||||
|
||||
import yaml
|
||||
from path import path
|
||||
|
||||
# BASE_DIR is the working directory to execute django-admin commands from.
|
||||
# Typically this should be the 'edx-platform' directory.
|
||||
BASE_DIR = path(__file__).abspath().dirname().dirname()
|
||||
|
||||
# LOCALE_DIR contains the locale files.
|
||||
# Typically this should be 'edx-platform/conf/locale'
|
||||
LOCALE_DIR = BASE_DIR.joinpath('conf', 'locale')
|
||||
|
||||
|
||||
class Configuration(object):
|
||||
"""
|
||||
Reads localization configuration in json format.
|
||||
"""
|
||||
DEFAULTS = {
|
||||
'dummy_locales': [],
|
||||
'generate_merge': {},
|
||||
'ignore_dirs': [],
|
||||
'locales': ['en'],
|
||||
'segment': {},
|
||||
'source_locale': 'en',
|
||||
'third_party': [],
|
||||
}
|
||||
|
||||
def __init__(self, filename):
|
||||
self._filename = filename
|
||||
self._config = self.read_config(filename)
|
||||
|
||||
def read_config(self, filename):
|
||||
"""
|
||||
Returns data found in config file (as dict), or raises exception if file not found
|
||||
"""
|
||||
if not os.path.exists(filename):
|
||||
raise Exception("Configuration file cannot be found: %s" % filename)
|
||||
with open(filename) as stream:
|
||||
return yaml.safe_load(stream)
|
||||
|
||||
def __getattr__(self, name):
|
||||
if name in self.DEFAULTS:
|
||||
return self._config.get(name, self.DEFAULTS[name])
|
||||
raise AttributeError("Configuration has no such setting: {!r}".format(name))
|
||||
|
||||
def get_messages_dir(self, locale):
|
||||
"""
|
||||
Returns the name of the directory holding the po files for locale.
|
||||
Example: edx-platform/conf/locale/fr/LC_MESSAGES
|
||||
"""
|
||||
return LOCALE_DIR.joinpath(locale, 'LC_MESSAGES')
|
||||
|
||||
@property
|
||||
def source_messages_dir(self):
|
||||
"""
|
||||
Returns the name of the directory holding the source-language po files (English).
|
||||
Example: edx-platform/conf/locale/en/LC_MESSAGES
|
||||
"""
|
||||
return self.get_messages_dir(self.source_locale)
|
||||
|
||||
@property
|
||||
def translated_locales(self):
|
||||
"""
|
||||
Returns the set of locales to be translated (ignoring the source_locale).
|
||||
"""
|
||||
return sorted(set(self.locales) - set([self.source_locale]))
|
||||
|
||||
CONFIGURATION = Configuration(LOCALE_DIR.joinpath('config.yaml').normpath())
|
||||
@@ -1,74 +0,0 @@
|
||||
import re
|
||||
import itertools
|
||||
|
||||
|
||||
class Converter(object):
|
||||
"""Converter is an abstract class that transforms strings.
|
||||
It hides embedded tags (HTML or Python sequences) from transformation
|
||||
|
||||
To implement Converter, provide implementation for inner_convert_string()
|
||||
|
||||
Strategy:
|
||||
1. extract tags embedded in the string
|
||||
a. use the index of each extracted tag to re-insert it later
|
||||
b. replace tags in string with numbers (<0>, <1>, etc.)
|
||||
c. save extracted tags in a separate list
|
||||
2. convert string
|
||||
3. re-insert the extracted tags
|
||||
|
||||
"""
|
||||
|
||||
# matches tags like these:
|
||||
# HTML: <B>, </B>, <BR/>, <textformat leading="10">
|
||||
# Python: %(date)s, %(name)s
|
||||
tag_pattern = re.compile(
|
||||
r'''
|
||||
(<[^>]+>) | # <tag>
|
||||
({[^}]+}) | # {tag}
|
||||
(%\([\w]+\)\w) | # %(tag)s
|
||||
(&\w+;) | # &entity;
|
||||
(&\#\d+;) | # Ӓ
|
||||
(&\#x[0-9a-f]+;) # ꯍ
|
||||
''',
|
||||
re.IGNORECASE | re.VERBOSE
|
||||
)
|
||||
|
||||
def convert(self, string):
|
||||
"""Returns: a converted tagged string
|
||||
param: string (contains html tags)
|
||||
|
||||
Don't replace characters inside tags
|
||||
"""
|
||||
(string, tags) = self.detag_string(string)
|
||||
string = self.inner_convert_string(string)
|
||||
string = self.retag_string(string, tags)
|
||||
return string
|
||||
|
||||
def detag_string(self, string):
|
||||
"""Extracts tags from string.
|
||||
|
||||
returns (string, list) where
|
||||
string: string has tags replaced by indices (<BR>... => <0>, <1>, <2>, etc.)
|
||||
list: list of the removed tags ('<BR>', '<I>', '</I>')
|
||||
"""
|
||||
counter = itertools.count(0)
|
||||
count = lambda m: '<%s>' % counter.next()
|
||||
tags = self.tag_pattern.findall(string)
|
||||
tags = [''.join(tag) for tag in tags]
|
||||
(new, nfound) = self.tag_pattern.subn(count, string)
|
||||
if len(tags) != nfound:
|
||||
raise Exception('tags dont match:' + string)
|
||||
return (new, tags)
|
||||
|
||||
def retag_string(self, string, tags):
|
||||
"""substitutes each tag back into string, into occurrences of <0>, <1> etc"""
|
||||
for (i, tag) in enumerate(tags):
|
||||
p = '<%s>' % i
|
||||
string = re.sub(p, tag, string, 1)
|
||||
return string
|
||||
|
||||
# ------------------------------
|
||||
# Customize this in subclasses of Converter
|
||||
|
||||
def inner_convert_string(self, string):
|
||||
return string # do nothing by default
|
||||
221
i18n/dummy.py
221
i18n/dummy.py
@@ -1,221 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
Generate test translation files from human-readable po files.
|
||||
|
||||
Dummy language is specified in configuration file (see config.py)
|
||||
two letter language codes reference:
|
||||
see http://www.loc.gov/standards/iso639-2/php/code_list.php
|
||||
|
||||
Django will not localize in languages that django itself has not been
|
||||
localized for. So we are using a well-known language (default='eo').
|
||||
Django languages are listed in django.conf.global_settings.LANGUAGES
|
||||
|
||||
po files can be generated with this:
|
||||
django-admin.py makemessages --all --extension html -l en
|
||||
|
||||
Usage:
|
||||
|
||||
$ ./dummy.py
|
||||
|
||||
generates output conf/locale/$DUMMY_LOCALE/LC_MESSAGES,
|
||||
where $DUMMY_LOCALE is the dummy_locale value set in the i18n config
|
||||
"""
|
||||
from __future__ import print_function
|
||||
import re
|
||||
import sys
|
||||
import argparse
|
||||
|
||||
import polib
|
||||
from path import path
|
||||
|
||||
from i18n.config import CONFIGURATION
|
||||
from i18n.converter import Converter
|
||||
|
||||
|
||||
class BaseDummyConverter(Converter):
|
||||
"""Base class for dummy converters.
|
||||
|
||||
String conversion goes through a character map, then gets padded.
|
||||
|
||||
"""
|
||||
TABLE = {}
|
||||
|
||||
def inner_convert_string(self, string):
|
||||
for old, new in self.TABLE.items():
|
||||
string = string.replace(old, new)
|
||||
return self.pad(string)
|
||||
|
||||
def pad(self, string):
|
||||
return string
|
||||
|
||||
def convert_msg(self, msg):
|
||||
"""
|
||||
Takes one POEntry object and converts it (adds a dummy translation to it)
|
||||
msg is an instance of polib.POEntry
|
||||
"""
|
||||
source = msg.msgid
|
||||
if not source:
|
||||
# don't translate empty string
|
||||
return
|
||||
|
||||
plural = msg.msgid_plural
|
||||
if plural:
|
||||
# translate singular and plural
|
||||
foreign_single = self.convert(source)
|
||||
foreign_plural = self.convert(plural)
|
||||
plural = {
|
||||
'0': self.final_newline(source, foreign_single),
|
||||
'1': self.final_newline(plural, foreign_plural),
|
||||
}
|
||||
msg.msgstr_plural = plural
|
||||
else:
|
||||
foreign = self.convert(source)
|
||||
msg.msgstr = self.final_newline(source, foreign)
|
||||
|
||||
def final_newline(self, original, translated):
|
||||
""" Returns a new translated string.
|
||||
If last char of original is a newline, make sure translation
|
||||
has a newline too.
|
||||
"""
|
||||
if original:
|
||||
if original[-1] == '\n' and translated[-1] != '\n':
|
||||
translated += '\n'
|
||||
return translated
|
||||
|
||||
|
||||
class Dummy(BaseDummyConverter):
|
||||
r"""
|
||||
Creates new localization properties files in a dummy language.
|
||||
|
||||
Each property file is derived from the equivalent en_US file, with these
|
||||
transformations applied:
|
||||
|
||||
1. Every vowel is replaced with an equivalent with extra accent marks.
|
||||
|
||||
2. Every string is padded out to +30% length to simulate verbose languages
|
||||
(such as German) to see if layout and flows work properly.
|
||||
|
||||
3. Every string is terminated with a '#' character to make it easier to detect
|
||||
truncation.
|
||||
|
||||
Example use::
|
||||
|
||||
>>> from dummy import Dummy
|
||||
>>> c = Dummy()
|
||||
>>> c.convert("My name is Bond, James Bond")
|
||||
u'M\xfd n\xe4m\xe9 \xefs B\xf8nd, J\xe4m\xe9s B\xf8nd \u2360\u03c3\u044f\u0454\u043c \u03b9\u03c1#'
|
||||
>>> print c.convert("My name is Bond, James Bond")
|
||||
Mý nämé ïs Bønd, Jämés Bønd Ⱡσяєм ιρ#
|
||||
>>> print c.convert("don't convert <a href='href'>tag ids</a>")
|
||||
døn't çønvért <a href='href'>täg ïds</a> Ⱡσяєм ιρѕυ#
|
||||
>>> print c.convert("don't convert %(name)s tags on %(date)s")
|
||||
døn't çønvért %(name)s tägs øn %(date)s Ⱡσяєм ιρѕ#
|
||||
|
||||
"""
|
||||
# Substitute plain characters with accented lookalikes.
|
||||
# http://tlt.its.psu.edu/suggestions/international/web/codehtml.html#accent
|
||||
TABLE = dict(zip(
|
||||
u"AabCcEeIiOoUuYy",
|
||||
u"ÀäßÇçÉéÌïÖöÛüÝý"
|
||||
))
|
||||
|
||||
# The print industry's standard dummy text, in use since the 1500s
|
||||
# see http://www.lipsum.com/, then fed through a "fancy-text" converter.
|
||||
# The string should start with a space, so that it joins nicely with the text
|
||||
# that precedes it. The Lorem contains an apostrophe since French often does,
|
||||
# and translated strings get put into single-quoted strings, which then break.
|
||||
LOREM = " " + " ".join( # join and split just make the string easier here.
|
||||
u"""
|
||||
Ⱡ'σяєм ιρѕυм ∂σłσя ѕιт αмєт, ¢σηѕє¢тєтυя α∂ιριѕι¢ιηg єłιт, ѕє∂ ∂σ єιυѕмσ∂
|
||||
тємρσя ιη¢ι∂ι∂υηт υт łαвσяє єт ∂σłσяє мαgηα αłιqυα. υт єηιм α∂ мιηιм
|
||||
νєηιαм, qυιѕ ησѕтяυ∂ єχєя¢ιтαтιση υłłαм¢σ łαвσяιѕ ηιѕι υт αłιqυιρ єχ єα
|
||||
¢σммσ∂σ ¢σηѕєqυαт. ∂υιѕ αυтє ιяυяє ∂σłσя ιη яєρяєнєη∂єяιт ιη νσłυρтαтє
|
||||
νєłιт єѕѕє ¢ιłłυм ∂σłσяє єυ ƒυgιαт ηυłłα ραяιαтυя. єχ¢єρтєυя ѕιηт σ¢¢αє¢αт
|
||||
¢υρι∂αтαт ηση ρяσι∂єηт, ѕυηт ιη ¢υłρα qυι σƒƒι¢ια ∂єѕєяυηт мσłłιт αηιм ι∂
|
||||
єѕт łαвσяυм.
|
||||
""".split()
|
||||
)
|
||||
|
||||
# To simulate more verbose languages (like German), pad the length of a string
|
||||
# by a multiple of PAD_FACTOR
|
||||
PAD_FACTOR = 1.33
|
||||
|
||||
def pad(self, string):
|
||||
"""add some lorem ipsum text to the end of string"""
|
||||
size = len(string)
|
||||
if size < 7:
|
||||
target = size * 3
|
||||
else:
|
||||
target = int(size * self.PAD_FACTOR)
|
||||
pad_len = target - size - 1
|
||||
return string + self.LOREM[:pad_len] + "#"
|
||||
|
||||
|
||||
class Dummy2(BaseDummyConverter):
|
||||
"""A second dummy converter.
|
||||
|
||||
Like Dummy, but uses a different obvious but readable automatic conversion:
|
||||
Strikes-through many letters, and turns lower-case letters upside-down.
|
||||
|
||||
"""
|
||||
TABLE = dict(zip(
|
||||
u"ABCDEGHIJKLOPRTUYZabcdefghijklmnopqrstuvwxyz",
|
||||
u"ȺɃȻĐɆǤĦƗɈꝀŁØⱣɌŦɄɎƵɐqɔpǝɟƃɥᴉɾʞlɯuødbɹsʇnʌʍxʎz"
|
||||
))
|
||||
|
||||
|
||||
def make_dummy(filename, locale, converter):
|
||||
"""
|
||||
Takes a source po file, reads it, and writes out a new po file
|
||||
in :param locale: containing a dummy translation.
|
||||
"""
|
||||
if not path(filename).exists():
|
||||
raise IOError('File does not exist: %r' % filename)
|
||||
pofile = polib.pofile(filename)
|
||||
for msg in pofile:
|
||||
# Some strings are actually formatting strings, don't dummy-ify them,
|
||||
# or dates will look like "DÀTÉ_TÌMÉ_FÖRMÀT Ⱡ'σ# EST"
|
||||
if re.match(r"^[A-Z_]+_FORMAT$", msg.msgid):
|
||||
continue
|
||||
converter.convert_msg(msg)
|
||||
|
||||
# Apply declaration for English pluralization rules so that ngettext will
|
||||
# do something reasonable.
|
||||
pofile.metadata['Plural-Forms'] = 'nplurals=2; plural=(n != 1);'
|
||||
|
||||
new_file = new_filename(filename, locale)
|
||||
new_file.parent.makedirs_p()
|
||||
pofile.save(new_file)
|
||||
|
||||
|
||||
def new_filename(original_filename, new_locale):
|
||||
"""Returns a filename derived from original_filename, using new_locale as the locale"""
|
||||
f = path(original_filename)
|
||||
new_file = f.parent.parent.parent / new_locale / f.parent.name / f.name
|
||||
return new_file.abspath()
|
||||
|
||||
|
||||
def main(verbosity=1):
|
||||
"""
|
||||
Generate dummy strings for all source po files.
|
||||
"""
|
||||
SOURCE_MSGS_DIR = CONFIGURATION.source_messages_dir
|
||||
for locale, converter in zip(CONFIGURATION.dummy_locales, [Dummy(), Dummy2()]):
|
||||
if verbosity:
|
||||
print('Processing source language files into dummy strings, locale "{}"'.format(locale))
|
||||
for source_file in CONFIGURATION.source_messages_dir.walkfiles('*.po'):
|
||||
if verbosity:
|
||||
print(' ', source_file.relpath())
|
||||
make_dummy(SOURCE_MSGS_DIR.joinpath(source_file), locale, converter)
|
||||
if verbosity:
|
||||
print()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# pylint: disable=invalid-name
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument("--verbose", "-v", action="count", default=0)
|
||||
args = parser.parse_args()
|
||||
main(verbosity=args.verbose)
|
||||
@@ -1,49 +0,0 @@
|
||||
"""
|
||||
Utility library file for executing shell commands
|
||||
"""
|
||||
import os
|
||||
import subprocess
|
||||
import logging
|
||||
|
||||
from i18n.config import BASE_DIR
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def execute(command, working_directory=BASE_DIR, stderr=subprocess.STDOUT):
|
||||
"""
|
||||
Executes shell command in a given working_directory.
|
||||
Command is a string to pass to the shell.
|
||||
Output is ignored.
|
||||
"""
|
||||
LOG.info("Executing in %s ...", working_directory)
|
||||
LOG.info(command)
|
||||
subprocess.check_call(command, cwd=working_directory, stderr=stderr, shell=True)
|
||||
|
||||
|
||||
def call(command, working_directory=BASE_DIR):
|
||||
"""
|
||||
Executes shell command in a given working_directory.
|
||||
Command is a list of strings to execute as a command line.
|
||||
Returns a tuple of two strings: (stdout, stderr)
|
||||
|
||||
"""
|
||||
LOG.info(command)
|
||||
p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=working_directory, shell=True)
|
||||
out, err = p.communicate()
|
||||
return (out, err)
|
||||
|
||||
|
||||
def remove_file(filename, verbose=True):
|
||||
"""
|
||||
Attempt to delete filename.
|
||||
log is boolean. If true, removal is logged.
|
||||
Log a warning if file does not exist.
|
||||
Logging filenames are releative to BASE_DIR to cut down on noise in output.
|
||||
"""
|
||||
if verbose:
|
||||
LOG.info('Deleting file %s' % os.path.relpath(filename, BASE_DIR))
|
||||
if not os.path.exists(filename):
|
||||
LOG.warn("File does not exist: %s" % os.path.relpath(filename, BASE_DIR))
|
||||
else:
|
||||
os.remove(filename)
|
||||
233
i18n/extract.py
233
i18n/extract.py
@@ -1,233 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
"""
|
||||
See https://edx-wiki.atlassian.net/wiki/display/ENG/PO+File+workflow
|
||||
|
||||
This task extracts all English strings from all source code
|
||||
and produces three human-readable files:
|
||||
conf/locale/en/LC_MESSAGES/django-partial.po
|
||||
conf/locale/en/LC_MESSAGES/djangojs-partial.po
|
||||
conf/locale/en/LC_MESSAGES/mako.po
|
||||
|
||||
This task will clobber any existing django.po file.
|
||||
This is because django-admin.py makemessages hardcodes this filename
|
||||
and it cannot be overridden.
|
||||
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
import importlib
|
||||
import os
|
||||
import os.path
|
||||
import logging
|
||||
import sys
|
||||
import argparse
|
||||
|
||||
from path import path
|
||||
from polib import pofile
|
||||
|
||||
from i18n.config import BASE_DIR, LOCALE_DIR, CONFIGURATION
|
||||
from i18n.execute import execute, remove_file
|
||||
from i18n.segment import segment_pofiles
|
||||
|
||||
|
||||
EDX_MARKER = "edX translation file"
|
||||
LOG = logging.getLogger(__name__)
|
||||
DEVNULL = open(os.devnull, 'wb')
|
||||
|
||||
|
||||
def base(path1, *paths):
|
||||
"""Return a relative path from BASE_DIR to path1 / paths[0] / ... """
|
||||
return BASE_DIR.relpathto(path1.joinpath(*paths))
|
||||
|
||||
|
||||
def main(verbosity=1):
|
||||
"""
|
||||
Main entry point of script
|
||||
"""
|
||||
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
|
||||
LOCALE_DIR.parent.makedirs_p()
|
||||
source_msgs_dir = CONFIGURATION.source_messages_dir
|
||||
remove_file(source_msgs_dir.joinpath('django.po'))
|
||||
|
||||
# Extract strings from mako templates.
|
||||
verbosity_map = {
|
||||
0: "-q",
|
||||
1: "",
|
||||
2: "-v",
|
||||
}
|
||||
babel_verbosity = verbosity_map.get(verbosity, "")
|
||||
|
||||
if verbosity:
|
||||
stderr = None
|
||||
else:
|
||||
stderr = DEVNULL
|
||||
|
||||
babel_cmd_template = 'pybabel {verbosity} extract -F {config} -c "Translators:" . -o {output}'
|
||||
|
||||
babel_mako_cmd = babel_cmd_template.format(
|
||||
verbosity=babel_verbosity,
|
||||
config=base(LOCALE_DIR, 'babel_mako.cfg'),
|
||||
output=base(CONFIGURATION.source_messages_dir, 'mako.po'),
|
||||
)
|
||||
execute(babel_mako_cmd, working_directory=BASE_DIR, stderr=stderr)
|
||||
|
||||
babel_underscore_cmd = babel_cmd_template.format(
|
||||
verbosity=babel_verbosity,
|
||||
config=base(LOCALE_DIR, 'babel_underscore.cfg'),
|
||||
output=base(CONFIGURATION.source_messages_dir, 'underscore.po'),
|
||||
)
|
||||
execute(babel_underscore_cmd, working_directory=BASE_DIR, stderr=stderr)
|
||||
|
||||
makemessages = "django-admin.py makemessages -l en -v{}".format(verbosity)
|
||||
ignores = " ".join('--ignore="{}/*"'.format(d) for d in CONFIGURATION.ignore_dirs)
|
||||
if ignores:
|
||||
makemessages += " " + ignores
|
||||
|
||||
# Extract strings from django source files, including .py files.
|
||||
make_django_cmd = makemessages + ' --extension html'
|
||||
execute(make_django_cmd, working_directory=BASE_DIR, stderr=stderr)
|
||||
|
||||
# Extract strings from Javascript source files.
|
||||
make_djangojs_cmd = makemessages + ' -d djangojs --extension js'
|
||||
execute(make_djangojs_cmd, working_directory=BASE_DIR, stderr=stderr)
|
||||
|
||||
# makemessages creates 'django.po'. This filename is hardcoded.
|
||||
# Rename it to django-partial.po to enable merging into django.po later.
|
||||
os.rename(
|
||||
source_msgs_dir.joinpath('django.po'),
|
||||
source_msgs_dir.joinpath('django-partial.po')
|
||||
)
|
||||
|
||||
# makemessages creates 'djangojs.po'. This filename is hardcoded.
|
||||
# Rename it to djangojs-partial.po to enable merging into djangojs.po later.
|
||||
os.rename(
|
||||
source_msgs_dir.joinpath('djangojs.po'),
|
||||
source_msgs_dir.joinpath('djangojs-partial.po')
|
||||
)
|
||||
|
||||
files_to_clean = set()
|
||||
|
||||
# Extract strings from third-party applications.
|
||||
for app_name in CONFIGURATION.third_party:
|
||||
# Import the app to find out where it is. Then use pybabel to extract
|
||||
# from that directory.
|
||||
app_module = importlib.import_module(app_name)
|
||||
app_dir = path(app_module.__file__).dirname().dirname()
|
||||
output_file = source_msgs_dir / (app_name + ".po")
|
||||
files_to_clean.add(output_file)
|
||||
|
||||
babel_cmd = 'pybabel {verbosity} extract -F {config} -c "Translators:" {app} -o {output}'
|
||||
babel_cmd = babel_cmd.format(
|
||||
verbosity=babel_verbosity,
|
||||
config=LOCALE_DIR / 'babel_third_party.cfg',
|
||||
app=app_name,
|
||||
output=output_file,
|
||||
)
|
||||
execute(babel_cmd, working_directory=app_dir, stderr=stderr)
|
||||
|
||||
# Segment the generated files.
|
||||
segmented_files = segment_pofiles("en")
|
||||
files_to_clean.update(segmented_files)
|
||||
|
||||
# Finish each file.
|
||||
for filename in files_to_clean:
|
||||
LOG.info('Cleaning %s' % filename)
|
||||
po = pofile(source_msgs_dir.joinpath(filename))
|
||||
# replace default headers with edX headers
|
||||
fix_header(po)
|
||||
# replace default metadata with edX metadata
|
||||
fix_metadata(po)
|
||||
# remove key strings which belong in messages.po
|
||||
strip_key_strings(po)
|
||||
po.save()
|
||||
|
||||
|
||||
def fix_header(po):
|
||||
"""
|
||||
Replace default headers with edX headers
|
||||
"""
|
||||
|
||||
# By default, django-admin.py makemessages creates this header:
|
||||
#
|
||||
# SOME DESCRIPTIVE TITLE.
|
||||
# Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER
|
||||
# This file is distributed under the same license as the PACKAGE package.
|
||||
# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
|
||||
|
||||
po.metadata_is_fuzzy = [] # remove [u'fuzzy']
|
||||
header = po.header
|
||||
fixes = (
|
||||
('SOME DESCRIPTIVE TITLE', EDX_MARKER),
|
||||
('Translations template for PROJECT.', EDX_MARKER),
|
||||
('YEAR', str(datetime.utcnow().year)),
|
||||
('ORGANIZATION', 'edX'),
|
||||
("THE PACKAGE'S COPYRIGHT HOLDER", "EdX"),
|
||||
(
|
||||
'This file is distributed under the same license as the PROJECT project.',
|
||||
'This file is distributed under the GNU AFFERO GENERAL PUBLIC LICENSE.'
|
||||
),
|
||||
(
|
||||
'This file is distributed under the same license as the PACKAGE package.',
|
||||
'This file is distributed under the GNU AFFERO GENERAL PUBLIC LICENSE.'
|
||||
),
|
||||
('FIRST AUTHOR <EMAIL@ADDRESS>', 'EdX Team <info@edx.org>'),
|
||||
)
|
||||
for src, dest in fixes:
|
||||
header = header.replace(src, dest)
|
||||
po.header = header
|
||||
|
||||
|
||||
def fix_metadata(po):
|
||||
"""
|
||||
Replace default metadata with edX metadata
|
||||
"""
|
||||
|
||||
# By default, django-admin.py makemessages creates this metadata:
|
||||
#
|
||||
# {u'PO-Revision-Date': u'YEAR-MO-DA HO:MI+ZONE',
|
||||
# u'Language': u'',
|
||||
# u'Content-Transfer-Encoding': u'8bit',
|
||||
# u'Project-Id-Version': u'PACKAGE VERSION',
|
||||
# u'Report-Msgid-Bugs-To': u'',
|
||||
# u'Last-Translator': u'FULL NAME <EMAIL@ADDRESS>',
|
||||
# u'Language-Team': u'LANGUAGE <LL@li.org>',
|
||||
# u'POT-Creation-Date': u'2013-04-25 14:14-0400',
|
||||
# u'Content-Type': u'text/plain; charset=UTF-8',
|
||||
# u'MIME-Version': u'1.0'}
|
||||
|
||||
fixes = {
|
||||
'PO-Revision-Date': datetime.utcnow(),
|
||||
'Report-Msgid-Bugs-To': 'openedx-translation@googlegroups.com',
|
||||
'Project-Id-Version': '0.1a',
|
||||
'Language': 'en',
|
||||
'Last-Translator': '',
|
||||
'Language-Team': 'openedx-translation <openedx-translation@googlegroups.com>',
|
||||
}
|
||||
po.metadata.update(fixes)
|
||||
|
||||
|
||||
def strip_key_strings(po):
|
||||
"""
|
||||
Removes all entries in PO which are key strings.
|
||||
These entries should appear only in messages.po, not in any other po files.
|
||||
"""
|
||||
newlist = [entry for entry in po if not is_key_string(entry.msgid)]
|
||||
del po[:]
|
||||
po += newlist
|
||||
|
||||
|
||||
def is_key_string(string):
|
||||
"""
|
||||
returns True if string is a key string.
|
||||
Key strings begin with underscore.
|
||||
"""
|
||||
return len(string) > 1 and string[0] == '_'
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# pylint: disable=invalid-name
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument('--verbose', '-v', action='count', default=0)
|
||||
args = parser.parse_args()
|
||||
main(verbosity=args.verbose)
|
||||
141
i18n/generate.py
141
i18n/generate.py
@@ -1,141 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
"""
|
||||
See https://edx-wiki.atlassian.net/wiki/display/ENG/PO+File+workflow
|
||||
|
||||
This task merges and compiles the human-readable .po files on the
|
||||
local filesystem into machine-readable .mo files. This is typically
|
||||
necessary as part of the build process since these .mo files are
|
||||
needed by Django when serving the web app.
|
||||
|
||||
The configuration file (in edx-platform/conf/locale/config.yaml) specifies which
|
||||
languages to generate.
|
||||
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
|
||||
from polib import pofile
|
||||
|
||||
from i18n.config import BASE_DIR, CONFIGURATION
|
||||
from i18n.execute import execute
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
DEVNULL = open(os.devnull, "wb")
|
||||
|
||||
|
||||
def merge(locale, target='django.po', sources=('django-partial.po',), fail_if_missing=True):
|
||||
"""
|
||||
For the given locale, merge the `sources` files to become the `target`
|
||||
file. Note that the target file might also be one of the sources.
|
||||
|
||||
If fail_if_missing is true, and the files to be merged are missing,
|
||||
throw an Exception, otherwise return silently.
|
||||
|
||||
If fail_if_missing is false, and the files to be merged are missing,
|
||||
just return silently.
|
||||
|
||||
"""
|
||||
LOG.info('Merging {target} for locale {locale}'.format(target=target, locale=locale))
|
||||
locale_directory = CONFIGURATION.get_messages_dir(locale)
|
||||
try:
|
||||
validate_files(locale_directory, sources)
|
||||
except Exception, e:
|
||||
if not fail_if_missing:
|
||||
return
|
||||
raise
|
||||
|
||||
# merged file is merged.po
|
||||
merge_cmd = 'msgcat -o merged.po ' + ' '.join(sources)
|
||||
execute(merge_cmd, working_directory=locale_directory)
|
||||
|
||||
# clean up redunancies in the metadata
|
||||
merged_filename = locale_directory.joinpath('merged.po')
|
||||
clean_pofile(merged_filename)
|
||||
|
||||
# rename merged.po -> django.po (default)
|
||||
target_filename = locale_directory.joinpath(target)
|
||||
os.rename(merged_filename, target_filename)
|
||||
|
||||
|
||||
def merge_files(locale, fail_if_missing=True):
|
||||
"""
|
||||
Merge all the files in `locale`, as specified in config.yaml.
|
||||
"""
|
||||
for target, sources in CONFIGURATION.generate_merge.items():
|
||||
merge(locale, target, sources, fail_if_missing)
|
||||
|
||||
|
||||
def clean_pofile(file):
|
||||
"""
|
||||
Clean various aspect of a .po file.
|
||||
|
||||
Fixes:
|
||||
|
||||
- Removes the ,fuzzy flag on metadata.
|
||||
|
||||
- Removes occurrence line numbers so that the generated files don't
|
||||
generate a lot of line noise when they're committed.
|
||||
|
||||
- Removes any flags ending with "-format". Mac gettext seems to add
|
||||
these flags, Linux does not, and we don't seem to need them. By
|
||||
removing them, we reduce the unimportant differences that clutter
|
||||
diffs as different developers work on the files.
|
||||
|
||||
"""
|
||||
# Reading in the .po file and saving it again fixes redundancies.
|
||||
pomsgs = pofile(file)
|
||||
# The msgcat tool marks the metadata as fuzzy, but it's ok as it is.
|
||||
pomsgs.metadata_is_fuzzy = False
|
||||
for entry in pomsgs:
|
||||
# Remove line numbers
|
||||
entry.occurrences = [(filename, None) for (filename, lineno) in entry.occurrences]
|
||||
# Remove -format flags
|
||||
entry.flags = [f for f in entry.flags if not f.endswith("-format")]
|
||||
pomsgs.save()
|
||||
|
||||
|
||||
def validate_files(dir, files_to_merge):
|
||||
"""
|
||||
Asserts that the given files exist.
|
||||
files_to_merge is a list of file names (no directories).
|
||||
dir is the directory (a path object from path.py) in which the files should appear.
|
||||
raises an Exception if any of the files are not in dir.
|
||||
"""
|
||||
for path in files_to_merge:
|
||||
pathname = dir.joinpath(path)
|
||||
if not pathname.exists():
|
||||
raise Exception("I18N: Cannot generate because file not found: {0}".format(pathname))
|
||||
|
||||
|
||||
def main(strict=True, verbosity=1):
|
||||
"""
|
||||
Main entry point for script
|
||||
"""
|
||||
for locale in CONFIGURATION.translated_locales:
|
||||
merge_files(locale, fail_if_missing=strict)
|
||||
# Dummy text is not required. Don't raise exception if files are missing.
|
||||
for locale in CONFIGURATION.dummy_locales:
|
||||
merge_files(locale, fail_if_missing=False)
|
||||
|
||||
compile_cmd = 'django-admin.py compilemessages -v{}'.format(verbosity)
|
||||
if verbosity:
|
||||
stderr = None
|
||||
else:
|
||||
stderr = DEVNULL
|
||||
execute(compile_cmd, working_directory=BASE_DIR, stderr=stderr)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
|
||||
|
||||
# pylint: disable=invalid-name
|
||||
parser = argparse.ArgumentParser(description="Generate merged and compiled message files.")
|
||||
parser.add_argument("--strict", action='store_true', help="Complain about missing files.")
|
||||
parser.add_argument("--verbose", "-v", action="count", default=0)
|
||||
args = parser.parse_args()
|
||||
|
||||
main(strict=args.strict, verbosity=args.verbose)
|
||||
148
i18n/segment.py
148
i18n/segment.py
@@ -1,148 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
"""
|
||||
Segment a .po file to produce smaller files based on the locations of the
|
||||
messages.
|
||||
"""
|
||||
|
||||
import copy
|
||||
import fnmatch
|
||||
import logging
|
||||
import sys
|
||||
import argparse
|
||||
import polib
|
||||
import textwrap
|
||||
|
||||
from i18n.config import CONFIGURATION
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def segment_pofiles(locale):
|
||||
"""Segment all the pofiles for `locale`.
|
||||
|
||||
Returns a set of filenames, all the segment files written.
|
||||
|
||||
"""
|
||||
files_written = set()
|
||||
for filename, segments in CONFIGURATION.segment.items():
|
||||
filename = CONFIGURATION.get_messages_dir(locale) / filename
|
||||
files_written.update(segment_pofile(filename, segments))
|
||||
return files_written
|
||||
|
||||
|
||||
def segment_pofile(filename, segments):
|
||||
"""Segment a .po file using patterns in `segments`.
|
||||
|
||||
The .po file at `filename` is read, and the occurrence locations of its
|
||||
messages are examined. `segments` is a dictionary: the keys are segment
|
||||
.po filenames, the values are lists of patterns::
|
||||
|
||||
{
|
||||
'django-studio.po': [
|
||||
'cms/*',
|
||||
'some-other-studio-place/*',
|
||||
],
|
||||
'django-weird.po': [
|
||||
'*/weird_*.*',
|
||||
],
|
||||
}
|
||||
|
||||
If all a message's occurrences match the patterns for a segment, then that
|
||||
message is written to the new segmented .po file.
|
||||
|
||||
Any message that matches no segments, or more than one, is written back to
|
||||
the original file.
|
||||
|
||||
Arguments:
|
||||
filename (path.path): a path object referring to the original .po file.
|
||||
segments (dict): specification of the segments to create.
|
||||
|
||||
Returns:
|
||||
a set of path objects, all the segment files written.
|
||||
|
||||
"""
|
||||
reading_msg = "Reading {num} entries from {file}"
|
||||
writing_msg = "Writing {num} entries to {file}"
|
||||
|
||||
source_po = polib.pofile(filename)
|
||||
LOG.info(reading_msg.format(file=filename, num=len(source_po)))
|
||||
|
||||
# A new pofile just like the source, but with no messages. We'll put
|
||||
# anything not segmented into this file.
|
||||
remaining_po = copy.deepcopy(source_po)
|
||||
remaining_po[:] = []
|
||||
|
||||
# Turn the segments dictionary into two structures: segment_patterns is a
|
||||
# list of (pattern, segmentfile) pairs. segment_po_files is a dict mapping
|
||||
# segment file names to pofile objects of their contents.
|
||||
segment_po_files = {filename: remaining_po}
|
||||
segment_patterns = []
|
||||
for segmentfile, patterns in segments.items():
|
||||
segment_po_files[segmentfile] = copy.deepcopy(remaining_po)
|
||||
segment_patterns.extend((pat, segmentfile) for pat in patterns)
|
||||
|
||||
# Examine each message in the source file. If all of its occurrences match
|
||||
# a pattern for the same segment, it goes in that segment. Otherwise, it
|
||||
# goes in remaining.
|
||||
for msg in source_po:
|
||||
msg_segments = set()
|
||||
for occ_file, _ in msg.occurrences:
|
||||
for pat, segment_file in segment_patterns:
|
||||
if fnmatch.fnmatch(occ_file, pat):
|
||||
msg_segments.add(segment_file)
|
||||
break
|
||||
else:
|
||||
msg_segments.add(filename)
|
||||
|
||||
assert msg_segments
|
||||
if len(msg_segments) == 1:
|
||||
# This message belongs in this segment.
|
||||
segment_file = msg_segments.pop()
|
||||
segment_po_files[segment_file].append(msg)
|
||||
else:
|
||||
# It's in more than one segment, so put it back in the main file.
|
||||
remaining_po.append(msg)
|
||||
|
||||
# Write out the results.
|
||||
files_written = set()
|
||||
for segment_file, pofile in segment_po_files.items():
|
||||
out_file = filename.dirname() / segment_file
|
||||
if len(pofile) == 0:
|
||||
LOG.error("No messages to write to {file}, did you run segment twice?".format(file=out_file))
|
||||
else:
|
||||
LOG.info(writing_msg.format(file=out_file, num=len(pofile)))
|
||||
pofile.save(out_file)
|
||||
files_written.add(out_file)
|
||||
|
||||
return files_written
|
||||
|
||||
|
||||
def main(locales=None, verbosity=1): # pylint: disable=unused-argument
|
||||
"""
|
||||
Main entry point of script
|
||||
"""
|
||||
# This is used as a tool only to segment translation files when adding a
|
||||
# new segment. In the regular workflow, the work is done by the extract
|
||||
# phase calling the functions above.
|
||||
locales = locales or []
|
||||
for locale in locales:
|
||||
segment_pofiles(locale)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
|
||||
|
||||
# pylint: disable=invalid-name
|
||||
description = textwrap.dedent("""
|
||||
Segment the .po files in LOCALE(s) based on the segmenting rules in
|
||||
config.yaml.
|
||||
|
||||
Note that segmenting is *not* idempotent: it modifies the input file, so
|
||||
be careful that you don't run it twice on the same file.
|
||||
""".strip())
|
||||
|
||||
parser = argparse.ArgumentParser(description=description)
|
||||
parser.add_argument("locale", nargs="+", help="a locale to segment")
|
||||
parser.add_argument("--verbose", "-v", action="count", default=0)
|
||||
args = parser.parse_args()
|
||||
main(locales=args.locale, verbosity=args.verbose)
|
||||
@@ -1,37 +0,0 @@
|
||||
# This is test data.
|
||||
#
|
||||
msgid ""
|
||||
msgstr ""
|
||||
"Project-Id-Version: 0.1a\n"
|
||||
"Report-Msgid-Bugs-To: openedx-translation@googlegroups.com\n"
|
||||
"POT-Creation-Date: 2014-01-22 15:35-0500\n"
|
||||
"PO-Revision-Date: 2014-01-22 20:35:52.096456\n"
|
||||
"Last-Translator: \n"
|
||||
"Language-Team: openedx-translation <openedx-translation@googlegroups.com>\n"
|
||||
"MIME-Version: 1.0\n"
|
||||
"Content-Type: text/plain; charset=UTF-8\n"
|
||||
"Content-Transfer-Encoding: 8bit\n"
|
||||
"Language: en\n"
|
||||
|
||||
#: cms/djangoapps/contentstore/views/tabs.py:39
|
||||
#: lms/djangoapps/instructor/views/instructor_dashboard.py:111
|
||||
msgid "Course Info"
|
||||
msgstr "stuff about the course"
|
||||
|
||||
#: common/djangoapps/course_modes/models.py:43
|
||||
msgid "Honor Code Certificate"
|
||||
msgstr "your paper"
|
||||
|
||||
#: common/djangoapps/course_modes/views.py:81
|
||||
#: common/djangoapps/student/views.py:478
|
||||
msgid "Enrollment is closed"
|
||||
msgstr "no way, dude"
|
||||
|
||||
#: common/static/js/vendor/mathjax-MathJax-c9db6ac/docs/source/mjtheme/layout.html:129
|
||||
#: lms/templates/wiki/plugins/attachments/index.html:40
|
||||
msgid "Search"
|
||||
msgstr "find it!"
|
||||
|
||||
#: lms/djangoapps/courseware/features/video.py:111
|
||||
msgid "ERROR: No playable video sources found!"
|
||||
msgstr "try youtube, dude!"
|
||||
@@ -1,52 +0,0 @@
|
||||
# This is test data.
|
||||
#
|
||||
msgid ""
|
||||
msgstr ""
|
||||
"Project-Id-Version: 0.1a\n"
|
||||
"Report-Msgid-Bugs-To: openedx-translation@googlegroups.com\n"
|
||||
"POT-Creation-Date: 2014-01-22 15:35-0500\n"
|
||||
"PO-Revision-Date: 2014-01-22 20:35:52.096456\n"
|
||||
"Last-Translator: \n"
|
||||
"Language-Team: openedx-translation <openedx-translation@googlegroups.com>\n"
|
||||
"MIME-Version: 1.0\n"
|
||||
"Content-Type: text/plain; charset=UTF-8\n"
|
||||
"Content-Transfer-Encoding: 8bit\n"
|
||||
"Language: en\n"
|
||||
|
||||
#: cms/djangoapps/contentstore/views/tabs.py:39
|
||||
#: lms/djangoapps/instructor/views/instructor_dashboard.py:111
|
||||
msgid "Course Info"
|
||||
msgstr "stuff about the course"
|
||||
|
||||
#: common/djangoapps/course_modes/models.py:43
|
||||
msgid "Honor Code Certificate"
|
||||
msgstr "your paper"
|
||||
|
||||
#: common/djangoapps/course_modes/views.py:81
|
||||
#: common/djangoapps/student/views.py:478
|
||||
msgid "Enrollment is closed"
|
||||
msgstr "no way, dude"
|
||||
|
||||
#: cms/djangoapps/contentstore/views/course.py:237
|
||||
msgid ""
|
||||
"There is already a course defined with the same organization, course number,"
|
||||
" and course run. Please change either organization or course number to be "
|
||||
"unique."
|
||||
msgstr "org/course/run, wtf??"
|
||||
|
||||
#: cms/djangoapps/contentstore/views/course.py:243
|
||||
#: cms/djangoapps/contentstore/views/course.py:247
|
||||
#: other_cms/djangoapps/contentstore/views/course.py:269
|
||||
#: cms/djangoapps/contentstore/views/course.py:272
|
||||
msgid ""
|
||||
"Please change either the organization or course number so that it is unique."
|
||||
msgstr "pick again!"
|
||||
|
||||
#: common/static/js/vendor/mathjax-MathJax-c9db6ac/docs/source/mjtheme/layout.html:129
|
||||
#: lms/templates/wiki/plugins/attachments/index.html:40
|
||||
msgid "Search"
|
||||
msgstr "find it!"
|
||||
|
||||
#: lms/djangoapps/courseware/features/video.py:111
|
||||
msgid "ERROR: No playable video sources found!"
|
||||
msgstr "try youtube, dude!"
|
||||
@@ -1,29 +0,0 @@
|
||||
# This is test data.
|
||||
#
|
||||
msgid ""
|
||||
msgstr ""
|
||||
"Project-Id-Version: 0.1a\n"
|
||||
"Report-Msgid-Bugs-To: openedx-translation@googlegroups.com\n"
|
||||
"POT-Creation-Date: 2014-01-22 15:35-0500\n"
|
||||
"PO-Revision-Date: 2014-01-22 20:35:52.096456\n"
|
||||
"Last-Translator: \n"
|
||||
"Language-Team: openedx-translation <openedx-translation@googlegroups.com>\n"
|
||||
"MIME-Version: 1.0\n"
|
||||
"Content-Type: text/plain; charset=UTF-8\n"
|
||||
"Content-Transfer-Encoding: 8bit\n"
|
||||
"Language: en\n"
|
||||
|
||||
#: cms/djangoapps/contentstore/views/course.py:237
|
||||
msgid ""
|
||||
"There is already a course defined with the same organization, course number,"
|
||||
" and course run. Please change either organization or course number to be "
|
||||
"unique."
|
||||
msgstr "org/course/run, wtf??"
|
||||
|
||||
#: cms/djangoapps/contentstore/views/course.py:243
|
||||
#: cms/djangoapps/contentstore/views/course.py:247
|
||||
#: other_cms/djangoapps/contentstore/views/course.py:269
|
||||
#: cms/djangoapps/contentstore/views/course.py:272
|
||||
msgid ""
|
||||
"Please change either the organization or course number so that it is unique."
|
||||
msgstr "pick again!"
|
||||
@@ -1,58 +0,0 @@
|
||||
"""
|
||||
Test that the compiled .mo files match the translations in the
|
||||
uncompiled .po files.
|
||||
|
||||
This is required because we are checking in the .mo files into
|
||||
the repo, but compiling them is a manual process. We want to make
|
||||
sure that we find out if someone forgets the compilation step.
|
||||
"""
|
||||
|
||||
import ddt
|
||||
import polib
|
||||
from unittest import TestCase
|
||||
|
||||
from i18n.config import CONFIGURATION, LOCALE_DIR
|
||||
|
||||
@ddt.ddt
|
||||
class TestCompiledMessages(TestCase):
|
||||
"""
|
||||
Test that mo files match their source po files
|
||||
"""
|
||||
|
||||
PO_FILES = ['django.po', 'djangojs.po']
|
||||
|
||||
@ddt.data(*CONFIGURATION.translated_locales)
|
||||
def test_translated_messages(self, locale):
|
||||
message_dir = LOCALE_DIR / locale / 'LC_MESSAGES'
|
||||
for pofile_name in self.PO_FILES:
|
||||
pofile_path = message_dir / pofile_name
|
||||
pofile = polib.pofile(pofile_path)
|
||||
mofile = polib.mofile(pofile_path.stripext() + '.mo')
|
||||
|
||||
po_entries = {entry.msgid: entry for entry in pofile.translated_entries()}
|
||||
mo_entries = {entry.msgid: entry for entry in mofile.translated_entries()}
|
||||
|
||||
# Check that there are no entries in po that aren't in mo, and vice-versa
|
||||
self.assertEquals(po_entries.viewkeys(), mo_entries.viewkeys())
|
||||
|
||||
for entry_id, po_entry in po_entries.iteritems():
|
||||
mo_entry = mo_entries[entry_id]
|
||||
for attr in ('msgstr', 'msgid_plural', 'msgstr_plural', 'msgctxt', 'obsolete', 'encoding'):
|
||||
po_attr = getattr(po_entry, attr)
|
||||
mo_attr = getattr(mo_entry, attr)
|
||||
|
||||
# The msgstr_plural in the mo_file is keyed on ints, but in the po_file it's
|
||||
# keyed on strings. This normalizes them.
|
||||
if attr == 'msgstr_plural':
|
||||
po_attr = {int(key): val for (key, val) in po_attr.items()}
|
||||
|
||||
self.assertEquals(
|
||||
po_attr,
|
||||
mo_attr,
|
||||
"When comparing {} for entry {!r}, {!r} from the .po file doesn't match {!r} from the .mo file".format(
|
||||
attr,
|
||||
entry_id,
|
||||
po_attr,
|
||||
mo_attr,
|
||||
)
|
||||
)
|
||||
@@ -1,33 +0,0 @@
|
||||
import os
|
||||
from unittest import TestCase
|
||||
|
||||
from i18n.config import Configuration, LOCALE_DIR, CONFIGURATION
|
||||
|
||||
class TestConfiguration(TestCase):
|
||||
"""
|
||||
Tests functionality of i18n/config.py
|
||||
"""
|
||||
|
||||
def test_config(self):
|
||||
config_filename = os.path.normpath(os.path.join(LOCALE_DIR, 'config.yaml'))
|
||||
config = Configuration(config_filename)
|
||||
self.assertEqual(config.source_locale, 'en')
|
||||
|
||||
def test_no_config(self):
|
||||
config_filename = os.path.normpath(os.path.join(LOCALE_DIR, 'no_such_file'))
|
||||
with self.assertRaises(Exception):
|
||||
Configuration(config_filename)
|
||||
|
||||
def test_valid_configuration(self):
|
||||
"""
|
||||
Make sure we have a valid configuration file,
|
||||
and that it contains an 'en' locale.
|
||||
Also check values of dummy_locale and source_locale.
|
||||
"""
|
||||
self.assertIsNotNone(CONFIGURATION)
|
||||
locales = CONFIGURATION.locales
|
||||
self.assertIsNotNone(locales)
|
||||
self.assertIsInstance(locales, list)
|
||||
self.assertIn('en', locales)
|
||||
self.assertEqual('eo', CONFIGURATION.dummy_locales[0])
|
||||
self.assertEqual('en', CONFIGURATION.source_locale)
|
||||
@@ -1,61 +0,0 @@
|
||||
"""Tests of i18n/converter.py"""
|
||||
|
||||
from unittest import TestCase
|
||||
|
||||
import ddt
|
||||
|
||||
from i18n import converter
|
||||
|
||||
class UpcaseConverter(converter.Converter):
|
||||
"""
|
||||
Converts a string to uppercase. Just used for testing.
|
||||
"""
|
||||
def inner_convert_string(self, string):
|
||||
return string.upper()
|
||||
|
||||
|
||||
@ddt.ddt
|
||||
class TestConverter(TestCase):
|
||||
"""
|
||||
Tests functionality of i18n/converter.py
|
||||
"""
|
||||
|
||||
@ddt.data(
|
||||
# no tags
|
||||
('big bad wolf',
|
||||
'BIG BAD WOLF'),
|
||||
# one html tag
|
||||
('big <strong>bad</strong> wolf',
|
||||
'BIG <strong>BAD</strong> WOLF'),
|
||||
# two html tags
|
||||
('big <b>bad</b> gray <i>wolf</i>',
|
||||
'BIG <b>BAD</b> GRAY <i>WOLF</i>'),
|
||||
# html tags with attributes
|
||||
('<a href="foo">bar</a> baz',
|
||||
'<a href="foo">BAR</a> BAZ'),
|
||||
("<a href='foo'>bar</a> baz",
|
||||
"<a href='foo'>BAR</a> BAZ"),
|
||||
# one python tag
|
||||
('big %(adjective)s wolf',
|
||||
'BIG %(adjective)s WOLF'),
|
||||
# two python tags
|
||||
('big %(adjective)s gray %(noun)s',
|
||||
'BIG %(adjective)s GRAY %(noun)s'),
|
||||
# both kinds of tags
|
||||
('<strong>big</strong> %(adjective)s %(noun)s',
|
||||
'<strong>BIG</strong> %(adjective)s %(noun)s'),
|
||||
# .format-style tags
|
||||
('The {0} barn is {1!r}.',
|
||||
'THE {0} BARN IS {1!r}.'),
|
||||
# HTML entities
|
||||
('<b>© 2013 edX,  </b>',
|
||||
'<b>© 2013 EDX,  </b>'),
|
||||
)
|
||||
def test_converter(self, data):
|
||||
"""
|
||||
Tests with a simple converter (converts strings to uppercase).
|
||||
Assert that embedded HTML and python tags are not converted.
|
||||
"""
|
||||
source, expected = data
|
||||
result = UpcaseConverter().convert(source)
|
||||
self.assertEquals(result, expected)
|
||||
@@ -1,69 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""Tests of i18n/dummy.py"""
|
||||
|
||||
from unittest import TestCase
|
||||
|
||||
import ddt
|
||||
from polib import POEntry
|
||||
|
||||
from i18n import dummy
|
||||
|
||||
|
||||
@ddt.ddt
|
||||
class TestDummy(TestCase):
|
||||
"""
|
||||
Tests functionality of i18n/dummy.py
|
||||
"""
|
||||
|
||||
def setUp(self):
|
||||
self.converter = dummy.Dummy()
|
||||
|
||||
def assertUnicodeEquals(self, str1, str2):
|
||||
"""Just like assertEquals, but doesn't put Unicode into the fail message.
|
||||
|
||||
Either nose, or rake, or something, deals very badly with unusual
|
||||
Unicode characters in the assertions, so we use repr here to keep
|
||||
things safe.
|
||||
|
||||
"""
|
||||
self.assertEquals(
|
||||
str1, str2,
|
||||
"Mismatch: %r != %r" % (str1, str2),
|
||||
)
|
||||
|
||||
@ddt.data(
|
||||
(u"hello my name is Bond, James Bond",
|
||||
u"héllö mý nämé ïs Bönd, Jämés Bönd Ⱡ'σяєм ι#"),
|
||||
|
||||
(u"don't convert <a href='href'>tag ids</a>",
|
||||
u"dön't çönvért <a href='href'>täg ïds</a> Ⱡ'σяєм#"),
|
||||
|
||||
(u"don't convert %(name)s tags on %(date)s",
|
||||
u"dön't çönvért %(name)s tägs ön %(date)s Ⱡ'σяєм #"),
|
||||
)
|
||||
def test_dummy(self, data):
|
||||
"""
|
||||
Tests with a dummy converter (adds spurious accents to strings).
|
||||
Assert that embedded HTML and python tags are not converted.
|
||||
"""
|
||||
source, expected = data
|
||||
result = self.converter.convert(source)
|
||||
self.assertUnicodeEquals(result, expected)
|
||||
|
||||
def test_singular(self):
|
||||
entry = POEntry()
|
||||
entry.msgid = "A lovely day for a cup of tea."
|
||||
expected = u"À lövélý däý för ä çüp öf téä. Ⱡ'σяєм #"
|
||||
self.converter.convert_msg(entry)
|
||||
self.assertUnicodeEquals(entry.msgstr, expected)
|
||||
|
||||
def test_plural(self):
|
||||
entry = POEntry()
|
||||
entry.msgid = "A lovely day for a cup of tea."
|
||||
entry.msgid_plural = "A lovely day for some cups of tea."
|
||||
expected_s = u"À lövélý däý för ä çüp öf téä. Ⱡ'σяєм #"
|
||||
expected_p = u"À lövélý däý för sömé çüps öf téä. Ⱡ'σяєм ιρ#"
|
||||
self.converter.convert_msg(entry)
|
||||
result = entry.msgstr_plural
|
||||
self.assertUnicodeEquals(result['0'], expected_s)
|
||||
self.assertUnicodeEquals(result['1'], expected_p)
|
||||
@@ -1,91 +0,0 @@
|
||||
from datetime import datetime, timedelta
|
||||
import os
|
||||
from unittest import TestCase
|
||||
|
||||
from nose.plugins.skip import SkipTest
|
||||
import polib
|
||||
from pytz import UTC
|
||||
|
||||
from i18n import extract
|
||||
from i18n.config import CONFIGURATION
|
||||
|
||||
# Make sure setup runs only once
|
||||
SETUP_HAS_RUN = False
|
||||
|
||||
|
||||
class TestExtract(TestCase):
|
||||
"""
|
||||
Tests functionality of i18n/extract.py
|
||||
"""
|
||||
generated_files = ('django-partial.po', 'djangojs-partial.po', 'mako.po')
|
||||
|
||||
def setUp(self):
|
||||
# Skip this test because it takes too long (>1 minute)
|
||||
# TODO: figure out how to declare a "long-running" test suite
|
||||
# and add this test to it.
|
||||
raise SkipTest()
|
||||
|
||||
global SETUP_HAS_RUN
|
||||
|
||||
# Subtract 1 second to help comparisons with file-modify time succeed,
|
||||
# since os.path.getmtime() is not millisecond-accurate
|
||||
self.start_time = datetime.now(UTC) - timedelta(seconds=1)
|
||||
super(TestExtract, self).setUp()
|
||||
if not SETUP_HAS_RUN:
|
||||
# Run extraction script. Warning, this takes 1 minute or more
|
||||
extract.main(verbosity=0)
|
||||
SETUP_HAS_RUN = True
|
||||
|
||||
def get_files(self):
|
||||
"""
|
||||
This is a generator.
|
||||
Returns the fully expanded filenames for all extracted files
|
||||
Fails assertion if one of the files doesn't exist.
|
||||
"""
|
||||
for filename in self.generated_files:
|
||||
path = os.path.join(CONFIGURATION.source_messages_dir, filename)
|
||||
exists = os.path.exists(path)
|
||||
self.assertTrue(exists, msg='Missing file: %s' % filename)
|
||||
if exists:
|
||||
yield path
|
||||
|
||||
def test_files(self):
|
||||
"""
|
||||
Asserts that each auto-generated file has been modified since 'extract' was launched.
|
||||
Intended to show that the file has been touched by 'extract'.
|
||||
"""
|
||||
|
||||
for path in self.get_files():
|
||||
self.assertTrue(datetime.fromtimestamp(os.path.getmtime(path)) > self.start_time,
|
||||
msg='File not recently modified: %s' % os.path.basename(path))
|
||||
|
||||
def test_is_keystring(self):
|
||||
"""
|
||||
Verifies is_keystring predicate
|
||||
"""
|
||||
entry1 = polib.POEntry()
|
||||
entry2 = polib.POEntry()
|
||||
entry1.msgid = "_.lms.admin.warning.keystring"
|
||||
entry2.msgid = "This is not a keystring"
|
||||
self.assertTrue(extract.is_key_string(entry1.msgid))
|
||||
self.assertFalse(extract.is_key_string(entry2.msgid))
|
||||
|
||||
def test_headers(self):
|
||||
"""Verify all headers have been modified"""
|
||||
for path in self.get_files():
|
||||
po = polib.pofile(path)
|
||||
header = po.header
|
||||
self.assertEqual(
|
||||
header.find('edX translation file'),
|
||||
0,
|
||||
msg='Missing header in %s:\n"%s"' % (os.path.basename(path), header)
|
||||
)
|
||||
|
||||
def test_metadata(self):
|
||||
"""Verify all metadata has been modified"""
|
||||
for path in self.get_files():
|
||||
po = polib.pofile(path)
|
||||
metadata = po.metadata
|
||||
value = metadata['Report-Msgid-Bugs-To']
|
||||
expected = 'openedx-translation@googlegroups.com'
|
||||
self.assertEquals(expected, value)
|
||||
@@ -1,96 +0,0 @@
|
||||
from datetime import datetime, timedelta
|
||||
import os
|
||||
import sys
|
||||
import string
|
||||
import random
|
||||
import re
|
||||
|
||||
from unittest import TestCase
|
||||
from mock import patch
|
||||
from polib import pofile
|
||||
from pytz import UTC
|
||||
|
||||
from i18n import extract
|
||||
from i18n import generate
|
||||
from i18n import dummy
|
||||
from i18n.config import CONFIGURATION
|
||||
|
||||
|
||||
class TestGenerate(TestCase):
|
||||
"""
|
||||
Tests functionality of i18n/generate.py
|
||||
"""
|
||||
generated_files = ('django-partial.po', 'djangojs-partial.po', 'mako.po')
|
||||
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
sys.stderr.write(
|
||||
"\nExtracting i18n strings and generating dummy translations; "
|
||||
"this may take a few minutes\n"
|
||||
)
|
||||
sys.stderr.flush()
|
||||
extract.main(verbosity=0)
|
||||
dummy.main(verbosity=0)
|
||||
|
||||
def setUp(self):
|
||||
# Subtract 1 second to help comparisons with file-modify time succeed,
|
||||
# since os.path.getmtime() is not millisecond-accurate
|
||||
self.start_time = datetime.now(UTC) - timedelta(seconds=1)
|
||||
|
||||
def test_merge(self):
|
||||
"""
|
||||
Tests merge script on English source files.
|
||||
"""
|
||||
filename = os.path.join(CONFIGURATION.source_messages_dir, random_name())
|
||||
generate.merge(CONFIGURATION.source_locale, target=filename)
|
||||
self.assertTrue(os.path.exists(filename))
|
||||
os.remove(filename)
|
||||
|
||||
# Patch dummy_locales to not have esperanto present
|
||||
@patch.object(CONFIGURATION, 'dummy_locales', ['fake2'])
|
||||
def test_main(self):
|
||||
"""
|
||||
Runs generate.main() which should merge source files,
|
||||
then compile all sources in all configured languages.
|
||||
Validates output by checking all .mo files in all configured languages.
|
||||
.mo files should exist, and be recently created (modified
|
||||
after start of test suite)
|
||||
"""
|
||||
generate.main(verbosity=0, strict=False)
|
||||
for locale in CONFIGURATION.translated_locales:
|
||||
for filename in ('django', 'djangojs'):
|
||||
mofile = filename+'.mo'
|
||||
path = os.path.join(CONFIGURATION.get_messages_dir(locale), mofile)
|
||||
exists = os.path.exists(path)
|
||||
self.assertTrue(exists, msg='Missing file in locale %s: %s' % (locale, mofile))
|
||||
self.assertTrue(datetime.fromtimestamp(os.path.getmtime(path), UTC) >= self.start_time,
|
||||
msg='File not recently modified: %s' % path)
|
||||
# Segmenting means that the merge headers don't work they way they
|
||||
# used to, so don't make this check for now. I'm not sure if we'll
|
||||
# get the merge header back eventually, or delete this code eventually.
|
||||
# self.assert_merge_headers(locale)
|
||||
|
||||
def assert_merge_headers(self, locale):
|
||||
"""
|
||||
This is invoked by test_main to ensure that it runs after
|
||||
calling generate.main().
|
||||
|
||||
There should be exactly three merge comment headers
|
||||
in our merged .po file. This counts them to be sure.
|
||||
A merge comment looks like this:
|
||||
# #-#-#-#-# django-partial.po (0.1a) #-#-#-#-#
|
||||
|
||||
"""
|
||||
path = os.path.join(CONFIGURATION.get_messages_dir(locale), 'django.po')
|
||||
po = pofile(path)
|
||||
pattern = re.compile('^#-#-#-#-#', re.M)
|
||||
match = pattern.findall(po.header)
|
||||
self.assertEqual(len(match), 3,
|
||||
msg="Found %s (should be 3) merge comments in the header for %s" % \
|
||||
(len(match), path))
|
||||
|
||||
|
||||
def random_name(size=6):
|
||||
"""Returns random filename as string, like test-4BZ81W"""
|
||||
chars = string.ascii_uppercase + string.digits
|
||||
return 'test-' + ''.join(random.choice(chars) for x in range(size))
|
||||
@@ -1,58 +0,0 @@
|
||||
"""Test i18n/segment.py"""
|
||||
|
||||
import os.path
|
||||
import shutil
|
||||
import unittest
|
||||
|
||||
from path import path
|
||||
import polib
|
||||
|
||||
from i18n.segment import segment_pofile
|
||||
|
||||
|
||||
HERE = path(__file__).dirname()
|
||||
TEST_DATA = HERE / "data"
|
||||
WORK = HERE / "work"
|
||||
|
||||
|
||||
class SegmentTest(unittest.TestCase):
|
||||
"""Test segment_pofile."""
|
||||
|
||||
def setUp(self):
|
||||
if not os.path.exists(WORK):
|
||||
os.mkdir(WORK)
|
||||
self.addCleanup(shutil.rmtree, WORK)
|
||||
|
||||
def assert_pofile_same(self, pofile1, pofile2):
|
||||
"""The paths `p1` and `p2` should be identical pofiles."""
|
||||
po1 = polib.pofile(pofile1)
|
||||
po2 = polib.pofile(pofile2)
|
||||
self.assertEqual(po1, po2)
|
||||
|
||||
def test_sample_data(self):
|
||||
work_file = WORK / "django.po"
|
||||
shutil.copyfile(TEST_DATA / "django_before.po", work_file)
|
||||
original_pofile = polib.pofile(work_file)
|
||||
|
||||
written = segment_pofile(
|
||||
work_file,
|
||||
{
|
||||
'studio.po': [
|
||||
'cms/*',
|
||||
'other_cms/*',
|
||||
],
|
||||
}
|
||||
)
|
||||
|
||||
self.assertEqual(written, set([WORK / "django.po", WORK / "studio.po"]))
|
||||
|
||||
pofiles = [polib.pofile(f) for f in written]
|
||||
after_entries = sum(len(pofile) for pofile in pofiles)
|
||||
self.assertEqual(len(original_pofile), after_entries)
|
||||
|
||||
original_ids = set(m.msgid for m in original_pofile)
|
||||
after_ids = set(m.msgid for pofile in pofiles for m in pofile)
|
||||
self.assertEqual(original_ids, after_ids)
|
||||
|
||||
self.assert_pofile_same(WORK / "django.po", TEST_DATA / "django_after.po")
|
||||
self.assert_pofile_same(WORK / "studio.po", TEST_DATA / "studio.po")
|
||||
@@ -1,87 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import print_function
|
||||
import sys
|
||||
from polib import pofile
|
||||
import argparse
|
||||
|
||||
from i18n.config import CONFIGURATION
|
||||
from i18n.execute import execute
|
||||
from i18n.extract import EDX_MARKER
|
||||
|
||||
TRANSIFEX_HEADER = u'edX community translations have been downloaded from {}'
|
||||
TRANSIFEX_URL = 'https://www.transifex.com/projects/p/edx-platform/'
|
||||
|
||||
|
||||
def push():
|
||||
execute('tx push -s')
|
||||
|
||||
|
||||
def pull():
|
||||
print("Pulling languages from transifex...")
|
||||
# Pull translations from all languages where there is
|
||||
# at least 10% reviewed translations
|
||||
execute('tx pull --mode=reviewed --all')
|
||||
clean_translated_locales()
|
||||
|
||||
|
||||
def clean_translated_locales():
|
||||
"""
|
||||
Strips out the warning from all translated po files
|
||||
about being an English source file.
|
||||
"""
|
||||
for locale in CONFIGURATION.translated_locales:
|
||||
clean_locale(locale)
|
||||
|
||||
|
||||
def clean_locale(locale):
|
||||
"""
|
||||
Strips out the warning from all of a locale's translated po files
|
||||
about being an English source file.
|
||||
Iterates over machine-generated files.
|
||||
"""
|
||||
dirname = CONFIGURATION.get_messages_dir(locale)
|
||||
for filename in ('django-partial.po', 'djangojs-partial.po', 'mako.po'):
|
||||
clean_file(dirname.joinpath(filename))
|
||||
|
||||
|
||||
def clean_file(filename):
|
||||
"""
|
||||
Strips out the warning from a translated po file about being an English source file.
|
||||
Replaces warning with a note about coming from Transifex.
|
||||
"""
|
||||
try:
|
||||
po = pofile(filename)
|
||||
except Exception as exc:
|
||||
# An exception can occur when a language is deleted from Transifex.
|
||||
# Don't totally fail here.
|
||||
print("Encountered error {} with filename {} - language project may no longer exist on Transifex".format(exc, filename))
|
||||
return
|
||||
if po.header.find(EDX_MARKER) != -1:
|
||||
new_header = get_new_header(po)
|
||||
new = po.header.replace(EDX_MARKER, new_header)
|
||||
po.header = new
|
||||
po.save()
|
||||
|
||||
|
||||
def get_new_header(po):
|
||||
team = po.metadata.get('Language-Team', None)
|
||||
if not team:
|
||||
return TRANSIFEX_HEADER.format(TRANSIFEX_URL)
|
||||
else:
|
||||
return TRANSIFEX_HEADER.format(team)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# pylint: disable=invalid-name
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("command", help="push or pull")
|
||||
parser.add_argument("--verbose", "-v")
|
||||
args = parser.parse_args()
|
||||
# pylint: enable=invalid-name
|
||||
|
||||
if args.command == "push":
|
||||
push()
|
||||
elif args.command == "pull":
|
||||
pull()
|
||||
else:
|
||||
raise Exception("unknown command ({cmd})".format(cmd=args.command))
|
||||
218
i18n/validate.py
218
i18n/validate.py
@@ -1,218 +0,0 @@
|
||||
"""Tests that validate .po files."""
|
||||
|
||||
import argparse
|
||||
import codecs
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import textwrap
|
||||
|
||||
import polib
|
||||
|
||||
from i18n.config import LOCALE_DIR
|
||||
from i18n.execute import call
|
||||
from i18n.converter import Converter
|
||||
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def validate_po_files(root, report_empty=False):
|
||||
"""
|
||||
Validate all of the po files found in the root directory.
|
||||
"""
|
||||
|
||||
for dirpath, __, filenames in os.walk(root):
|
||||
for name in filenames:
|
||||
__, ext = os.path.splitext(name)
|
||||
if ext.lower() == '.po':
|
||||
filename = os.path.join(dirpath, name)
|
||||
# First validate the format of this file
|
||||
msgfmt_check_po_file(filename)
|
||||
# Now, check that the translated strings are valid, and optionally check for empty translations
|
||||
check_messages(filename, report_empty)
|
||||
|
||||
|
||||
def msgfmt_check_po_file(filename):
|
||||
"""
|
||||
Call GNU msgfmt -c on each .po file to validate its format.
|
||||
Any errors caught by msgfmt are logged to log.
|
||||
"""
|
||||
# Use relative paths to make output less noisy.
|
||||
rfile = os.path.relpath(filename, LOCALE_DIR)
|
||||
out, err = call('msgfmt -c {}'.format(rfile), working_directory=LOCALE_DIR)
|
||||
if err != '':
|
||||
log.info('\n' + out)
|
||||
log.warn('\n' + err)
|
||||
|
||||
|
||||
def tags_in_string(msg):
|
||||
"""
|
||||
Return the set of tags in a message string.
|
||||
|
||||
Tags includes HTML tags, data placeholders, etc.
|
||||
|
||||
Skips tags that might change due to translations: HTML entities, <abbr>,
|
||||
and so on.
|
||||
|
||||
"""
|
||||
def is_linguistic_tag(tag):
|
||||
"""Is this tag one that can change with the language?"""
|
||||
if tag.startswith("&"):
|
||||
return True
|
||||
if any(x in tag for x in ["<abbr>", "<abbr ", "</abbr>"]):
|
||||
return True
|
||||
return False
|
||||
|
||||
__, tags = Converter().detag_string(msg)
|
||||
return set(t for t in tags if not is_linguistic_tag(t))
|
||||
|
||||
|
||||
def astral(msg):
|
||||
"""Does `msg` have characters outside the Basic Multilingual Plane?"""
|
||||
return any(ord(c) > 0xFFFF for c in msg)
|
||||
|
||||
|
||||
def check_messages(filename, report_empty=False):
|
||||
"""
|
||||
Checks messages in various ways:
|
||||
|
||||
Translations must have the same slots as the English. Messages can't have astral
|
||||
characters in them.
|
||||
|
||||
If report_empty is True, will also report empty translation strings.
|
||||
|
||||
"""
|
||||
# Don't check English files.
|
||||
if "/locale/en/" in filename:
|
||||
return
|
||||
|
||||
# problems will be a list of tuples. Each is a description, and a msgid,
|
||||
# and then zero or more translations.
|
||||
problems = []
|
||||
pomsgs = polib.pofile(filename)
|
||||
for msg in pomsgs:
|
||||
# Check for characters Javascript can't support.
|
||||
# https://code.djangoproject.com/ticket/21725
|
||||
if astral(msg.msgstr):
|
||||
problems.append(("Non-BMP char", msg.msgid, msg.msgstr))
|
||||
|
||||
if msg.msgid_plural:
|
||||
# Plurals: two strings in, N strings out.
|
||||
source = msg.msgid + " | " + msg.msgid_plural
|
||||
translation = " | ".join(v for k, v in sorted(msg.msgstr_plural.items()))
|
||||
empty = any(not t.strip() for t in msg.msgstr_plural.values())
|
||||
else:
|
||||
# Singular: just one string in and one string out.
|
||||
source = msg.msgid
|
||||
translation = msg.msgstr
|
||||
empty = not msg.msgstr.strip()
|
||||
|
||||
if empty:
|
||||
if report_empty:
|
||||
problems.append(("Empty translation", source))
|
||||
else:
|
||||
id_tags = tags_in_string(source)
|
||||
tx_tags = tags_in_string(translation)
|
||||
|
||||
# Check if tags don't match
|
||||
if id_tags != tx_tags:
|
||||
id_has = u", ".join(u'"{}"'.format(t) for t in id_tags - tx_tags)
|
||||
tx_has = u", ".join(u'"{}"'.format(t) for t in tx_tags - id_tags)
|
||||
if id_has and tx_has:
|
||||
diff = u"{} vs {}".format(id_has, tx_has)
|
||||
elif id_has:
|
||||
diff = u"{} missing".format(id_has)
|
||||
else:
|
||||
diff = u"{} added".format(tx_has)
|
||||
problems.append((
|
||||
"Different tags in source and translation",
|
||||
source,
|
||||
translation,
|
||||
diff
|
||||
))
|
||||
|
||||
if problems:
|
||||
problem_file = filename.replace(".po", ".prob")
|
||||
id_filler = textwrap.TextWrapper(width=79, initial_indent=" msgid: ", subsequent_indent=" " * 9)
|
||||
tx_filler = textwrap.TextWrapper(width=79, initial_indent=" -----> ", subsequent_indent=" " * 9)
|
||||
with codecs.open(problem_file, "w", encoding="utf8") as prob_file:
|
||||
for problem in problems:
|
||||
desc, msgid = problem[:2]
|
||||
prob_file.write(u"{}\n{}\n".format(desc, id_filler.fill(msgid)))
|
||||
for translation in problem[2:]:
|
||||
prob_file.write(u"{}\n".format(tx_filler.fill(translation)))
|
||||
prob_file.write(u"\n")
|
||||
|
||||
log.error(" {0} problems in {1}, details in .prob file".format(len(problems), filename))
|
||||
else:
|
||||
log.info(" No problems found in {0}".format(filename))
|
||||
|
||||
|
||||
def get_parser():
|
||||
"""
|
||||
Returns an argument parser for this script.
|
||||
"""
|
||||
parser = argparse.ArgumentParser(description=( # pylint: disable=redefined-outer-name
|
||||
"Automatically finds translation errors in all edx-platform *.po files, "
|
||||
"for all languages, unless one or more language(s) is specified to check."
|
||||
))
|
||||
|
||||
parser.add_argument(
|
||||
'-l', '--language',
|
||||
type=str,
|
||||
nargs='*',
|
||||
help="Specify one or more specific language code(s) to check (eg 'ko_KR')."
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-e', '--empty',
|
||||
action='store_true',
|
||||
help="Includes empty translation strings in .prob files."
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-v', '--verbose',
|
||||
action='count', default=0,
|
||||
help="Turns on info-level logging."
|
||||
)
|
||||
|
||||
return parser
|
||||
|
||||
|
||||
def main(languages=None, empty=False, verbosity=1): # pylint: disable=unused-argument
|
||||
"""
|
||||
Main entry point for script
|
||||
"""
|
||||
languages = languages or []
|
||||
|
||||
if not languages:
|
||||
root = LOCALE_DIR
|
||||
validate_po_files(root, empty)
|
||||
return
|
||||
|
||||
# languages will be a list of language codes; test each language.
|
||||
for language in languages:
|
||||
root = LOCALE_DIR / language
|
||||
# Assert that a directory for this language code exists on the system
|
||||
if not root.isdir():
|
||||
log.error(" {0} is not a valid directory.\nSkipping language '{1}'".format(root, language))
|
||||
continue
|
||||
# If we found the language code's directory, validate the files.
|
||||
validate_po_files(root, empty)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# pylint: disable=invalid-name
|
||||
parser = get_parser()
|
||||
args = parser.parse_args()
|
||||
if args.verbose:
|
||||
log_level = logging.INFO
|
||||
else:
|
||||
log_level = logging.WARNING
|
||||
logging.basicConfig(stream=sys.stdout, level=log_level)
|
||||
# pylint: enable=invalid-name
|
||||
|
||||
print("Validating languages...")
|
||||
main(languages=args.language, empty=args.empty, verbosity=args.verbose)
|
||||
print("Finished validating languages")
|
||||
@@ -28,4 +28,5 @@
|
||||
-e git+https://github.com/edx/acid-block.git@459aff7b63db8f2c5decd1755706c1a64fb4ebb1#egg=acid-xblock
|
||||
-e git+https://github.com/edx/edx-ora2.git@release-2014-06-13T11.52#egg=edx-ora2
|
||||
-e git+https://github.com/edx/opaque-keys.git@5929789900b3d0a354ce7274bde74edfd0430f03#egg=opaque-keys
|
||||
git+https://github.com/edx/ease.git@a990b25ed4238acb1b15ee6f027465db3a10960e#egg=ease
|
||||
-e git+https://github.com/edx/i18n-tools.git@c186d9d877773734908e49ccc5c01407e6ad8199#egg=i18n-tools
|
||||
-e git+https://github.com/edx/ease.git@a990b25ed4238acb1b15ee6f027465db3a10960e#egg=ease
|
||||
|
||||
Reference in New Issue
Block a user