Merge pull request #2017 from edx/ned/i18n-minor-improvements
Minor i18n improvements
This commit is contained in:
@@ -84,7 +84,8 @@ urlpatterns += patterns(
|
||||
|
||||
js_info_dict = {
|
||||
'domain': 'djangojs',
|
||||
'packages': ('cms',),
|
||||
# No packages needed, we get LOCALE_PATHS anyway.
|
||||
'packages': (),
|
||||
}
|
||||
|
||||
urlpatterns += patterns('',
|
||||
|
||||
@@ -11,7 +11,7 @@ BASE_DIR = path(__file__).abspath().dirname().joinpath('..').normpath()
|
||||
LOCALE_DIR = BASE_DIR.joinpath('conf', 'locale')
|
||||
|
||||
|
||||
class Configuration:
|
||||
class Configuration(object):
|
||||
"""
|
||||
# Reads localization configuration in json format
|
||||
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
import re
|
||||
import itertools
|
||||
|
||||
class Converter:
|
||||
class Converter(object):
|
||||
"""Converter is an abstract class that transforms strings.
|
||||
It hides embedded tags (HTML or Python sequences) from transformation
|
||||
|
||||
|
||||
To implement Converter, provide implementation for inner_convert_string()
|
||||
|
||||
Strategy:
|
||||
@@ -16,16 +16,25 @@ class Converter:
|
||||
3. re-insert the extracted tags
|
||||
|
||||
"""
|
||||
|
||||
|
||||
# matches tags like these:
|
||||
# HTML: <B>, </B>, <BR/>, <textformat leading="10">
|
||||
# Python: %(date)s, %(name)s
|
||||
tag_pattern = re.compile(r'(<[-\w" .:?=/]*>)|({[^}]*})|(%\([^)]*\)\w)', re.I)
|
||||
tag_pattern = re.compile(r'''
|
||||
(<[-\w" .:?=/]*>) | # <tag>
|
||||
({[^}]*}) | # {tag}
|
||||
(%\([^)]*\)\w) | # %(tag)s
|
||||
(&\w+;) | # &entity;
|
||||
(&\#\d+;) | # Ӓ
|
||||
(&\#x[0-9a-f]+;) # ꯍ
|
||||
''',
|
||||
re.IGNORECASE|re.VERBOSE
|
||||
)
|
||||
|
||||
def convert(self, string):
|
||||
"""Returns: a converted tagged string
|
||||
param: string (contains html tags)
|
||||
|
||||
|
||||
Don't replace characters inside tags
|
||||
"""
|
||||
(string, tags) = self.detag_string(string)
|
||||
@@ -35,7 +44,7 @@ class Converter:
|
||||
|
||||
def detag_string(self, string):
|
||||
"""Extracts tags from string.
|
||||
|
||||
|
||||
returns (string, list) where
|
||||
string: string has tags replaced by indices (<BR>... => <0>, <1>, <2>, etc.)
|
||||
list: list of the removed tags ('<BR>', '<I>', '</I>')
|
||||
@@ -62,4 +71,3 @@ class Converter:
|
||||
|
||||
def inner_convert_string(self, string):
|
||||
return string # do nothing by default
|
||||
|
||||
|
||||
@@ -34,8 +34,11 @@ TABLE = {'A': u'\xC0',
|
||||
'I': U'\xCC',
|
||||
'i': u'\xEF',
|
||||
'O': u'\xD8',
|
||||
'o': u'\xF6',
|
||||
'u': u'\xFC'
|
||||
'o': u'\xF8',
|
||||
'U': u'\xDB',
|
||||
'u': u'\xFC',
|
||||
'Y': u'\xDD',
|
||||
'y': u'\xFD',
|
||||
}
|
||||
|
||||
|
||||
@@ -54,49 +57,47 @@ LOREM = ' Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed ' \
|
||||
PAD_FACTOR = 1.3
|
||||
|
||||
|
||||
class Dummy (Converter):
|
||||
class Dummy(Converter):
|
||||
"""
|
||||
A string converter that generates dummy strings with fake accents
|
||||
and lorem ipsum padding.
|
||||
"""
|
||||
|
||||
"""
|
||||
def convert(self, string):
|
||||
result = Converter.convert(self, string)
|
||||
return self.pad(result)
|
||||
|
||||
def inner_convert_string(self, string):
|
||||
for (k,v) in TABLE.items():
|
||||
for k, v in TABLE.items():
|
||||
string = string.replace(k, v)
|
||||
return string
|
||||
|
||||
|
||||
def pad(self, string):
|
||||
"""add some lorem ipsum text to the end of string"""
|
||||
size = len(string)
|
||||
if size < 7:
|
||||
target = size*3
|
||||
target = size * 3
|
||||
else:
|
||||
target = int(size*PAD_FACTOR)
|
||||
return string + self.terminate(LOREM[:(target-size)])
|
||||
|
||||
def terminate(self, string):
|
||||
"""replaces the final char of string with #"""
|
||||
return string[:-1]+'#'
|
||||
return string[:-1] + '#'
|
||||
|
||||
def init_msgs(self, msgs):
|
||||
"""
|
||||
Make sure the first msg in msgs has a plural property.
|
||||
msgs is list of instances of polib.POEntry
|
||||
"""
|
||||
if len(msgs)==0:
|
||||
if not msgs:
|
||||
return
|
||||
headers = msgs[0].get_property('msgstr')
|
||||
has_plural = len([header for header in headers if header.find('Plural-Forms:') == 0])>0
|
||||
has_plural = any(header.startswith('Plural-Forms:') for header in headers)
|
||||
if not has_plural:
|
||||
# Apply declaration for English pluralization rules
|
||||
plural = "Plural-Forms: nplurals=2; plural=(n != 1);\\n"
|
||||
headers.append(plural)
|
||||
|
||||
|
||||
def convert_msg(self, msg):
|
||||
"""
|
||||
@@ -104,19 +105,18 @@ class Dummy (Converter):
|
||||
msg is an instance of polib.POEntry
|
||||
"""
|
||||
source = msg.msgid
|
||||
if len(source)==0:
|
||||
if not source:
|
||||
# don't translate empty string
|
||||
return
|
||||
|
||||
plural = msg.msgid_plural
|
||||
if len(plural)>0:
|
||||
if plural:
|
||||
# translate singular and plural
|
||||
foreign_single = self.convert(source)
|
||||
foreign_plural = self.convert(plural)
|
||||
plural = {'0': self.final_newline(source, foreign_single),
|
||||
'1': self.final_newline(plural, foreign_plural)}
|
||||
msg.msgstr_plural = plural
|
||||
return
|
||||
else:
|
||||
foreign = self.convert(source)
|
||||
msg.msgstr = self.final_newline(source, foreign)
|
||||
@@ -126,7 +126,7 @@ class Dummy (Converter):
|
||||
If last char of original is a newline, make sure translation
|
||||
has a newline too.
|
||||
"""
|
||||
if len(original)>1:
|
||||
if original[-1]=='\n' and translated[-1]!='\n':
|
||||
return translated + '\n'
|
||||
if original:
|
||||
if original[-1] == '\n' and translated[-1] != '\n':
|
||||
translated += '\n'
|
||||
return translated
|
||||
|
||||
@@ -11,13 +11,13 @@ def execute(command, working_directory=BASE_DIR):
|
||||
Output is ignored.
|
||||
"""
|
||||
LOG.info(command)
|
||||
subprocess.call(command.split(' '), cwd=working_directory)
|
||||
subprocess.check_output(command.split(' '), cwd=working_directory, stderr=subprocess.STDOUT)
|
||||
|
||||
|
||||
def call(command, working_directory=BASE_DIR):
|
||||
"""
|
||||
Executes shell command in a given working_directory.
|
||||
Command is a string to pass to the shell.
|
||||
Command is a list of strings to execute as a command line.
|
||||
Returns a tuple of two strings: (stdout, stderr)
|
||||
|
||||
"""
|
||||
@@ -25,7 +25,8 @@ def call(command, working_directory=BASE_DIR):
|
||||
p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=working_directory)
|
||||
out, err = p.communicate()
|
||||
return (out, err)
|
||||
|
||||
|
||||
|
||||
def create_dir_if_necessary(pathname):
|
||||
dirname = os.path.dirname(pathname)
|
||||
if not os.path.exists(dirname):
|
||||
|
||||
@@ -1,18 +1,18 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
|
||||
"""
|
||||
See https://edx-wiki.atlassian.net/wiki/display/ENG/PO+File+workflow
|
||||
|
||||
This task extracts all English strings from all source code
|
||||
and produces three human-readable files:
|
||||
This task extracts all English strings from all source code
|
||||
and produces three human-readable files:
|
||||
conf/locale/en/LC_MESSAGES/django-partial.po
|
||||
conf/locale/en/LC_MESSAGES/djangojs.po
|
||||
conf/locale/en/LC_MESSAGES/mako.po
|
||||
|
||||
This task will clobber any existing django.po file.
|
||||
This is because django-admin.py makemessages hardcodes this filename
|
||||
and it cannot be overridden.
|
||||
|
||||
This task will clobber any existing django.po file.
|
||||
This is because django-admin.py makemessages hardcodes this filename
|
||||
and it cannot be overridden.
|
||||
|
||||
"""
|
||||
|
||||
import os, sys, logging
|
||||
@@ -34,7 +34,7 @@ SOURCE_WARN = 'This English source file is machine-generated. Do not check it in
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
def main ():
|
||||
def main():
|
||||
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
|
||||
create_dir_if_necessary(LOCALE_DIR)
|
||||
source_msgs_dir = CONFIGURATION.source_messages_dir
|
||||
@@ -44,23 +44,28 @@ def main ():
|
||||
for filename in generated_files:
|
||||
remove_file(source_msgs_dir.joinpath(filename))
|
||||
|
||||
|
||||
# Extract strings from mako templates
|
||||
# Extract strings from mako templates.
|
||||
babel_mako_cmd = 'pybabel extract -F %s -c "TRANSLATORS:" . -o %s' % (BABEL_CONFIG, BABEL_OUT)
|
||||
|
||||
# Extract strings from django source files
|
||||
make_django_cmd = 'django-admin.py makemessages -l en --ignore=src/* --ignore=i18n/* ' \
|
||||
+ '--extension html'
|
||||
|
||||
# Extract strings from javascript source files
|
||||
make_djangojs_cmd = 'django-admin.py makemessages -l en -d djangojs --ignore=src/* ' \
|
||||
+ '--ignore=i18n/* --extension js'
|
||||
# Extract strings from django source files.
|
||||
make_django_cmd = (
|
||||
'django-admin.py makemessages -l en --ignore=src/* --ignore=i18n/* '
|
||||
'--extension html'
|
||||
)
|
||||
# Extract strings from Javascript source files.
|
||||
make_djangojs_cmd = (
|
||||
'django-admin.py makemessages -l en --ignore=src/* --ignore=i18n/* '
|
||||
'-d djangojs --extension js'
|
||||
)
|
||||
execute(babel_mako_cmd, working_directory=BASE_DIR)
|
||||
execute(make_django_cmd, working_directory=BASE_DIR)
|
||||
|
||||
# makemessages creates 'django.po'. This filename is hardcoded.
|
||||
# Rename it to django-partial.po to enable merging into django.po later.
|
||||
os.rename(source_msgs_dir.joinpath('django.po'),
|
||||
source_msgs_dir.joinpath('django-partial.po'))
|
||||
os.rename(
|
||||
source_msgs_dir.joinpath('django.po'),
|
||||
source_msgs_dir.joinpath('django-partial.po')
|
||||
)
|
||||
execute(make_djangojs_cmd, working_directory=BASE_DIR)
|
||||
|
||||
for filename in generated_files:
|
||||
@@ -101,7 +106,7 @@ def fix_header(po):
|
||||
('FIRST AUTHOR <EMAIL@ADDRESS>',
|
||||
'EdX Team <info@edx.org>')
|
||||
)
|
||||
for (src, dest) in fixes:
|
||||
for src, dest in fixes:
|
||||
header = header.replace(src, dest)
|
||||
po.header = header
|
||||
|
||||
@@ -112,12 +117,12 @@ def fix_header(po):
|
||||
u'Content-Transfer-Encoding': u'8bit',
|
||||
u'Project-Id-Version': u'PACKAGE VERSION',
|
||||
u'Report-Msgid-Bugs-To': u'',
|
||||
u'Last-Translator': u'FULL NAME <EMAIL@ADDRESS>',
|
||||
u'Last-Translator': u'FULL NAME <EMAIL@ADDRESS>',
|
||||
u'Language-Team': u'LANGUAGE <LL@li.org>',
|
||||
u'POT-Creation-Date': u'2013-04-25 14:14-0400',
|
||||
u'Content-Type': u'text/plain; charset=UTF-8',
|
||||
u'MIME-Version': u'1.0'}
|
||||
"""
|
||||
"""
|
||||
|
||||
def fix_metadata(po):
|
||||
"""
|
||||
@@ -146,7 +151,7 @@ def is_key_string(string):
|
||||
returns True if string is a key string.
|
||||
Key strings begin with underscore.
|
||||
"""
|
||||
return len(string)>1 and string[0]=='_'
|
||||
return len(string) > 1 and string[0] == '_'
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
||||
@@ -1,16 +1,16 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
"""
|
||||
See https://edx-wiki.atlassian.net/wiki/display/ENG/PO+File+workflow
|
||||
See https://edx-wiki.atlassian.net/wiki/display/ENG/PO+File+workflow
|
||||
|
||||
This task merges and compiles the human-readable .po files on the
|
||||
local filesystem into machine-readable .mo files. This is typically
|
||||
necessary as part of the build process since these .mo files are
|
||||
needed by Django when serving the web app.
|
||||
|
||||
This task merges and compiles the human-readable .pofiles on the
|
||||
local filesystem into machine-readable .mofiles. This is typically
|
||||
necessary as part of the build process since these .mofiles are
|
||||
needed by Django when serving the web app.
|
||||
The configuration file (in edx-platform/conf/locale/config) specifies which
|
||||
languages to generate.
|
||||
|
||||
The configuration file (in edx-platform/conf/locale/config) specifies which
|
||||
languages to generate.
|
||||
"""
|
||||
|
||||
import os, sys, logging
|
||||
@@ -26,10 +26,13 @@ def merge(locale, target='django.po', fail_if_missing=True):
|
||||
"""
|
||||
For the given locale, merge django-partial.po, messages.po, mako.po -> django.po
|
||||
target is the resulting filename
|
||||
If fail_if_missing is True, and the files to be merged are missing,
|
||||
throw an Exception.
|
||||
If fail_if_missing is False, and the files to be merged are missing,
|
||||
|
||||
If fail_if_missing is true, and the files to be merged are missing,
|
||||
throw an Exception, otherwise return silently.
|
||||
|
||||
If fail_if_missing is false, and the files to be merged are missing,
|
||||
just return silently.
|
||||
|
||||
"""
|
||||
LOG.info('Merging locale={0}'.format(locale))
|
||||
locale_directory = CONFIGURATION.get_messages_dir(locale)
|
||||
|
||||
@@ -51,11 +51,7 @@ def new_filename(original_filename, new_locale):
|
||||
orig_dir = os.path.dirname(original_filename)
|
||||
msgs_dir = os.path.basename(orig_dir)
|
||||
orig_file = os.path.basename(original_filename)
|
||||
return os.path.abspath(os.path.join(orig_dir,
|
||||
'../..',
|
||||
new_locale,
|
||||
msgs_dir,
|
||||
orig_file))
|
||||
return os.path.abspath(os.path.join(orig_dir, '../..', new_locale, msgs_dir, orig_file))
|
||||
|
||||
if __name__ == '__main__':
|
||||
# required arg: file
|
||||
|
||||
@@ -1,6 +0,0 @@
|
||||
from test_config import TestConfiguration
|
||||
from test_extract import TestExtract
|
||||
from test_generate import TestGenerate
|
||||
from test_converter import TestConverter
|
||||
from test_dummy import TestDummy
|
||||
import test_validate
|
||||
|
||||
@@ -17,7 +17,7 @@ class TestConfiguration(TestCase):
|
||||
config_filename = os.path.normpath(os.path.join(LOCALE_DIR, 'no_such_file'))
|
||||
with self.assertRaises(Exception):
|
||||
Configuration(config_filename)
|
||||
|
||||
|
||||
def test_valid_configuration(self):
|
||||
"""
|
||||
Make sure we have a valid configuration file,
|
||||
|
||||
@@ -3,7 +3,7 @@ from unittest import TestCase
|
||||
|
||||
import converter
|
||||
|
||||
class UpcaseConverter (converter.Converter):
|
||||
class UpcaseConverter(converter.Converter):
|
||||
"""
|
||||
Converts a string to uppercase. Just used for testing.
|
||||
"""
|
||||
@@ -22,7 +22,7 @@ class TestConverter(TestCase):
|
||||
Assert that embedded HTML and python tags are not converted.
|
||||
"""
|
||||
c = UpcaseConverter()
|
||||
test_cases = (
|
||||
test_cases = [
|
||||
# no tags
|
||||
('big bad wolf', 'BIG BAD WOLF'),
|
||||
# one html tag
|
||||
@@ -36,7 +36,11 @@ class TestConverter(TestCase):
|
||||
# both kinds of tags
|
||||
('<strong>big</strong> %(adjective)s %(noun)s',
|
||||
'<strong>BIG</strong> %(adjective)s %(noun)s'),
|
||||
)
|
||||
for (source, expected) in test_cases:
|
||||
# .format-style tags
|
||||
('The {0} barn is {1!r}.', 'THE {0} BARN IS {1!r}.'),
|
||||
# HTML entities
|
||||
('<b>© 2013 edX,  </b>', '<b>© 2013 EDX,  </b>'),
|
||||
]
|
||||
for source, expected in test_cases:
|
||||
result = c.convert(source)
|
||||
self.assertEquals(result, expected)
|
||||
|
||||
@@ -18,23 +18,24 @@ class TestDummy(TestCase):
|
||||
Tests with a dummy converter (adds spurious accents to strings).
|
||||
Assert that embedded HTML and python tags are not converted.
|
||||
"""
|
||||
test_cases = (("hello my name is Bond, James Bond",
|
||||
u'h\xe9ll\xf6 my n\xe4m\xe9 \xefs B\xf6nd, J\xe4m\xe9s B\xf6nd Lorem i#'),
|
||||
test_cases = [
|
||||
("hello my name is Bond, James Bond",
|
||||
u'h\xe9ll\xf8 m\xfd n\xe4m\xe9 \xefs B\xf8nd, J\xe4m\xe9s B\xf8nd Lorem i#'),
|
||||
|
||||
('don\'t convert <a href="href">tag ids</a>',
|
||||
u'd\xf6n\'t \xe7\xf6nv\xe9rt <a href="href">t\xe4g \xefds</a> Lorem ipsu#'),
|
||||
|
||||
('don\'t convert %(name)s tags on %(date)s',
|
||||
u"d\xf6n't \xe7\xf6nv\xe9rt %(name)s t\xe4gs \xf6n %(date)s Lorem ips#")
|
||||
)
|
||||
for (source, expected) in test_cases:
|
||||
('don\'t convert <a href="href">tag ids</a>',
|
||||
u'd\xf8n\'t \xe7\xf8nv\xe9rt <a href="href">t\xe4g \xefds</a> Lorem ipsu#'),
|
||||
|
||||
('don\'t convert %(name)s tags on %(date)s',
|
||||
u"d\xf8n't \xe7\xf8nv\xe9rt %(name)s t\xe4gs \xf8n %(date)s Lorem ips#")
|
||||
]
|
||||
for source, expected in test_cases:
|
||||
result = self.converter.convert(source)
|
||||
self.assertEquals(result, expected)
|
||||
|
||||
def test_singular(self):
|
||||
entry = POEntry()
|
||||
entry.msgid = 'A lovely day for a cup of tea.'
|
||||
expected = u'\xc0 l\xf6v\xe9ly d\xe4y f\xf6r \xe4 \xe7\xfcp \xf6f t\xe9\xe4. Lorem i#'
|
||||
expected = u'\xc0 l\xf8v\xe9l\xfd d\xe4\xfd f\xf8r \xe4 \xe7\xfcp \xf8f t\xe9\xe4. Lorem i#'
|
||||
self.converter.convert_msg(entry)
|
||||
self.assertEquals(entry.msgstr, expected)
|
||||
|
||||
@@ -42,8 +43,8 @@ class TestDummy(TestCase):
|
||||
entry = POEntry()
|
||||
entry.msgid = 'A lovely day for a cup of tea.'
|
||||
entry.msgid_plural = 'A lovely day for some cups of tea.'
|
||||
expected_s = u'\xc0 l\xf6v\xe9ly d\xe4y f\xf6r \xe4 \xe7\xfcp \xf6f t\xe9\xe4. Lorem i#'
|
||||
expected_p = u'\xc0 l\xf6v\xe9ly d\xe4y f\xf6r s\xf6m\xe9 \xe7\xfcps \xf6f t\xe9\xe4. Lorem ip#'
|
||||
expected_s = u'\xc0 l\xf8v\xe9l\xfd d\xe4\xfd f\xf8r \xe4 \xe7\xfcp \xf8f t\xe9\xe4. Lorem i#'
|
||||
expected_p = u'\xc0 l\xf8v\xe9l\xfd d\xe4\xfd f\xf8r s\xf8m\xe9 \xe7\xfcps \xf8f t\xe9\xe4. Lorem ip#'
|
||||
self.converter.convert_msg(entry)
|
||||
result = entry.msgstr_plural
|
||||
self.assertEquals(result['0'], expected_s)
|
||||
|
||||
@@ -4,14 +4,14 @@ from nose.plugins.skip import SkipTest
|
||||
|
||||
from config import LOCALE_DIR
|
||||
from execute import call
|
||||
|
||||
|
||||
def test_po_files(root=LOCALE_DIR):
|
||||
"""
|
||||
This is a generator. It yields all of the .po files under root, and tests each one.
|
||||
"""
|
||||
log = logging.getLogger(__name__)
|
||||
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
|
||||
|
||||
|
||||
for (dirpath, dirnames, filenames) in os.walk(root):
|
||||
for name in filenames:
|
||||
(base, ext) = os.path.splitext(name)
|
||||
@@ -24,11 +24,8 @@ def validate_po_file(filename, log):
|
||||
Call GNU msgfmt -c on each .po file to validate its format.
|
||||
Any errors caught by msgfmt are logged to log.
|
||||
"""
|
||||
# Skip this test for now because it's very noisy
|
||||
raise SkipTest()
|
||||
# Use relative paths to make output less noisy.
|
||||
rfile = os.path.relpath(filename, LOCALE_DIR)
|
||||
(out, err) = call(['msgfmt','-c', rfile], working_directory=LOCALE_DIR)
|
||||
if err != '':
|
||||
log.warn('\n'+err)
|
||||
|
||||
|
||||
@@ -27,7 +27,7 @@ def clean_translated_locales():
|
||||
for locale in CONFIGURATION.locales:
|
||||
if locale != CONFIGURATION.source_locale:
|
||||
clean_locale(locale)
|
||||
|
||||
|
||||
def clean_locale(locale):
|
||||
"""
|
||||
Strips out the warning from all of a locale's translated po files
|
||||
@@ -58,7 +58,7 @@ def get_new_header(po):
|
||||
return TRANSIFEX_HEADER % team
|
||||
|
||||
if __name__ == '__main__':
|
||||
if len(sys.argv)<2:
|
||||
if len(sys.argv) < 2:
|
||||
raise Exception("missing argument: push or pull")
|
||||
arg = sys.argv[1]
|
||||
if arg == 'push':
|
||||
@@ -67,4 +67,3 @@ if __name__ == '__main__':
|
||||
pull()
|
||||
else:
|
||||
raise Exception("unknown argument: (%s)" % arg)
|
||||
|
||||
|
||||
@@ -72,7 +72,8 @@ urlpatterns += (
|
||||
|
||||
js_info_dict = {
|
||||
'domain': 'djangojs',
|
||||
'packages': ('lms',),
|
||||
# No packages needed, we get LOCALE_PATHS anyway.
|
||||
'packages': (),
|
||||
}
|
||||
|
||||
urlpatterns += (
|
||||
|
||||
Reference in New Issue
Block a user