diff --git a/common/lib/xmodule/xmodule/js/src/capa/display.coffee b/common/lib/xmodule/xmodule/js/src/capa/display.coffee
index 91b83e21f3..f65860b7ab 100644
--- a/common/lib/xmodule/xmodule/js/src/capa/display.coffee
+++ b/common/lib/xmodule/xmodule/js/src/capa/display.coffee
@@ -405,7 +405,7 @@ class @Problem
formulaequationinput: (element) ->
$(element).find('input').on 'input', ->
$p = $(element).find('p.status')
- `// Translators: the word Answer here is about answering a problem the student must solve.`
+ `// Translators: the word unanswered here is about answering a problem the student must solve.`
$p.text gettext("unanswered")
$p.parent().removeClass().addClass "unanswered"
@@ -434,7 +434,7 @@ class @Problem
textline: (element) ->
$(element).find('input').on 'input', ->
$p = $(element).find('p.status')
- `// Translators: the word Answer here is about answering a problem the student must solve.`
+ `// Translators: the word unanswered here is about answering a problem the student must solve.`
$p.text gettext("unanswered")
$p.parent().removeClass().addClass "unanswered"
diff --git a/i18n/converter.py b/i18n/converter.py
index d3987bebe2..9a982347ee 100644
--- a/i18n/converter.py
+++ b/i18n/converter.py
@@ -21,9 +21,9 @@ class Converter(object):
# HTML: , ,
,
# Python: %(date)s, %(name)s
tag_pattern = re.compile(r'''
- (<[-\w" .:?=/]*>) | #
- ({[^}]*}) | # {tag}
- (%\([^)]*\)\w) | # %(tag)s
+ (<[^>]+>) | #
+ ({[^}]+}) | # {tag}
+ (%\([\w]+\)\w) | # %(tag)s
(&\w+;) | # &entity;
(&\#\d+;) | # Ӓ
(&\#x[0-9a-f]+;) # ꯍ
diff --git a/i18n/dummy.py b/i18n/dummy.py
index e82429dcbd..b192069329 100644
--- a/i18n/dummy.py
+++ b/i18n/dummy.py
@@ -1,56 +1,70 @@
+# -*- coding: utf-8 -*-
+r"""
+Creates new localization properties files in a dummy language.
+
+Each property file is derived from the equivalent en_US file, with these
+transformations applied:
+
+1. Every vowel is replaced with an equivalent with extra accent marks.
+
+2. Every string is padded out to +30% length to simulate verbose languages
+ (such as German) to see if layout and flows work properly.
+
+3. Every string is terminated with a '#' character to make it easier to detect
+ truncation.
+
+Example use::
+
+ >>> from dummy import Dummy
+ >>> c = Dummy()
+ >>> c.convert("My name is Bond, James Bond")
+ u'M\xfd n\xe4m\xe9 \xefs B\xf8nd, J\xe4m\xe9s B\xf8nd \u2360\u03c3\u044f\u0454\u043c \u03b9\u03c1#'
+ >>> print c.convert("My name is Bond, James Bond")
+ Mý nämé ïs Bønd, Jämés Bønd Ⱡσяєм ιρ#
+ >>> print c.convert("don't convert tag ids")
+ døn't çønvért täg ïds Ⱡσяєм ιρѕυ#
+ >>> print c.convert("don't convert %(name)s tags on %(date)s")
+ døn't çønvért %(name)s tägs øn %(date)s Ⱡσяєм ιρѕ#
+
+"""
+
from converter import Converter
-# Creates new localization properties files in a dummy language
-# Each property file is derived from the equivalent en_US file, except
-# 1. Every vowel is replaced with an equivalent with extra accent marks
-# 2. Every string is padded out to +30% length to simulate verbose languages (e.g. German)
-# to see if layout and flows work properly
-# 3. Every string is terminated with a '#' character to make it easier to detect truncation
-
-
-# --------------------------------
-# Example use:
-# >>> from dummy import Dummy
-# >>> c = Dummy()
-# >>> c.convert("hello my name is Bond, James Bond")
-# u'h\xe9ll\xf6 my n\xe4m\xe9 \xefs B\xf6nd, J\xe4m\xe9s B\xf6nd Lorem i#'
-#
-# >>> c.convert('don\'t convert tag ids')
-# u'd\xf6n\'t \xe7\xf6nv\xe9rt t\xe4g \xefds Lorem ipsu#'
-#
-# >>> c.convert('don\'t convert %(name)s tags on %(date)s')
-# u"d\xf6n't \xe7\xf6nv\xe9rt %(name)s t\xe4gs \xf6n %(date)s Lorem ips#"
-
-
# Substitute plain characters with accented lookalikes.
# http://tlt.its.psu.edu/suggestions/international/web/codehtml.html#accent
-TABLE = {'A': u'\xC0',
- 'a': u'\xE4',
- 'b': u'\xDF',
- 'C': u'\xc7',
- 'c': u'\xE7',
- 'E': u'\xC9',
- 'e': u'\xE9',
- 'I': U'\xCC',
- 'i': u'\xEF',
- 'O': u'\xD8',
- 'o': u'\xF8',
- 'U': u'\xDB',
- 'u': u'\xFC',
- 'Y': u'\xDD',
- 'y': u'\xFD',
- }
-
+TABLE = {
+ 'A': u'À',
+ 'a': u'ä',
+ 'b': u'ß',
+ 'C': u'Ç',
+ 'c': u'ç',
+ 'E': u'É',
+ 'e': u'é',
+ 'I': u'Ì',
+ 'i': u'ï',
+ 'O': u'Ø',
+ 'o': u'ø',
+ 'U': u'Û',
+ 'u': u'ü',
+ 'Y': u'Ý',
+ 'y': u'ý',
+}
# The print industry's standard dummy text, in use since the 1500s
-# see http://www.lipsum.com/
-LOREM = ' Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed ' \
- 'do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad ' \
- 'minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ' \
- 'ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate ' \
- 'velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat ' \
- 'cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. '
+# see http://www.lipsum.com/, then fed through a "fancy-text" converter.
+# The string should start with a space.
+LOREM = " " + " ".join( # join and split just make the string easier here.
+ u"""
+ Ⱡσяєм ιρѕυм ∂σłσя ѕιт αмєт, ¢σηѕє¢тєтυя α∂ιριѕι¢ιηg єłιт, ѕє∂ ∂σ єιυѕмσ∂
+ тємρσя ιη¢ι∂ι∂υηт υт łαвσяє єт ∂σłσяє мαgηα αłιqυα. υт єηιм α∂ мιηιм
+ νєηιαм, qυιѕ ησѕтяυ∂ єχєя¢ιтαтιση υłłαм¢σ łαвσяιѕ ηιѕι υт αłιqυιρ єχ єα
+ ¢σммσ∂σ ¢σηѕєqυαт. ∂υιѕ αυтє ιяυяє ∂σłσя ιη яєρяєнєη∂єяιт ιη νσłυρтαтє
+ νєłιт єѕѕє ¢ιłłυм ∂σłσяє єυ ƒυgιαт ηυłłα ραяιαтυя. єχ¢єρтєυя ѕιηт σ¢¢αє¢αт
+ ¢υρι∂αтαт ηση ρяσι∂єηт, ѕυηт ιη ¢υłρα qυι σƒƒι¢ια ∂єѕєяυηт мσłłιт αηιм ι∂
+ єѕт łαвσяυм.
+ """.split()
+)
# To simulate more verbose languages (like German), pad the length of a string
# by a multiple of PAD_FACTOR
@@ -85,20 +99,6 @@ class Dummy(Converter):
"""replaces the final char of string with #"""
return string[:-1] + '#'
- def init_msgs(self, msgs):
- """
- Make sure the first msg in msgs has a plural property.
- msgs is list of instances of polib.POEntry
- """
- if not msgs:
- return
- headers = msgs[0].get_property('msgstr')
- has_plural = any(header.startswith('Plural-Forms:') for header in headers)
- if not has_plural:
- # Apply declaration for English pluralization rules
- plural = "Plural-Forms: nplurals=2; plural=(n != 1);\\n"
- headers.append(plural)
-
def convert_msg(self, msg):
"""
Takes one POEntry object and converts it (adds a dummy translation to it)
@@ -114,8 +114,10 @@ class Dummy(Converter):
# translate singular and plural
foreign_single = self.convert(source)
foreign_plural = self.convert(plural)
- plural = {'0': self.final_newline(source, foreign_single),
- '1': self.final_newline(plural, foreign_plural)}
+ plural = {
+ '0': self.final_newline(source, foreign_single),
+ '1': self.final_newline(plural, foreign_plural),
+ }
msg.msgstr_plural = plural
else:
foreign = self.convert(source)
diff --git a/i18n/extract.py b/i18n/extract.py
index 2bb1baf60d..694f1740e4 100755
--- a/i18n/extract.py
+++ b/i18n/extract.py
@@ -45,7 +45,7 @@ def main():
remove_file(source_msgs_dir.joinpath(filename))
# Extract strings from mako templates.
- babel_mako_cmd = 'pybabel extract -F %s -c "TRANSLATORS:" . -o %s' % (BABEL_CONFIG, BABEL_OUT)
+ babel_mako_cmd = 'pybabel extract -F %s -c "Translators:" . -o %s' % (BABEL_CONFIG, BABEL_OUT)
# Extract strings from django source files.
make_django_cmd = (
diff --git a/i18n/generate.py b/i18n/generate.py
index 3d565ba091..8afa93c655 100755
--- a/i18n/generate.py
+++ b/i18n/generate.py
@@ -60,9 +60,12 @@ def merge(locale, target='django.po', fail_if_missing=True):
def clean_metadata(file):
"""
Clean up redundancies in the metadata caused by merging.
- This reads in a PO file and simply saves it back out again.
"""
- pofile(file).save()
+ # Reading in the .po file and saving it again fixes redundancies.
+ pomsgs = pofile(file)
+ # The msgcat tool marks the metadata as fuzzy, but it's ok as it is.
+ pomsgs.metadata_is_fuzzy = False
+ pomsgs.save()
def validate_files(dir, files_to_merge):
diff --git a/i18n/make_dummy.py b/i18n/make_dummy.py
index 1d9be34b10..11021d4036 100755
--- a/i18n/make_dummy.py
+++ b/i18n/make_dummy.py
@@ -38,9 +38,15 @@ def main(file, locale):
raise IOError('File does not exist: %s' % file)
pofile = polib.pofile(file)
converter = Dummy()
- converter.init_msgs(pofile.translated_entries())
for msg in pofile:
converter.convert_msg(msg)
+
+ # If any message has a plural, then the file needs plural information.
+ # Apply declaration for English pluralization rules so that ngettext will
+ # do something reasonable.
+ if any(m.msgid_plural for m in pofile):
+ pofile.metadata['Plural-Forms'] = 'nplurals=2; plural=(n != 1);'
+
new_file = new_filename(file, locale)
create_dir_if_necessary(new_file)
pofile.save(new_file)
diff --git a/i18n/tests/test_converter.py b/i18n/tests/test_converter.py
index b1989ede94..e893f7c258 100644
--- a/i18n/tests/test_converter.py
+++ b/i18n/tests/test_converter.py
@@ -1,5 +1,8 @@
+"""Tests of i18n/converter.py"""
+
import os
from unittest import TestCase
+import ddt
import converter
@@ -11,36 +14,48 @@ class UpcaseConverter(converter.Converter):
return string.upper()
+@ddt.ddt
class TestConverter(TestCase):
"""
Tests functionality of i18n/converter.py
"""
- def test_converter(self):
+ @ddt.data(
+ # no tags
+ ('big bad wolf',
+ 'BIG BAD WOLF'),
+ # one html tag
+ ('big bad wolf',
+ 'BIG BAD WOLF'),
+ # two html tags
+ ('big bad gray wolf',
+ 'BIG BAD GRAY WOLF'),
+ # html tags with attributes
+ ('bar baz',
+ 'BAR BAZ'),
+ ("bar baz",
+ "BAR BAZ"),
+ # one python tag
+ ('big %(adjective)s wolf',
+ 'BIG %(adjective)s WOLF'),
+ # two python tags
+ ('big %(adjective)s gray %(noun)s',
+ 'BIG %(adjective)s GRAY %(noun)s'),
+ # both kinds of tags
+ ('big %(adjective)s %(noun)s',
+ 'BIG %(adjective)s %(noun)s'),
+ # .format-style tags
+ ('The {0} barn is {1!r}.',
+ 'THE {0} BARN IS {1!r}.'),
+ # HTML entities
+ ('© 2013 edX, ',
+ '© 2013 EDX, '),
+ )
+ def test_converter(self, data):
"""
Tests with a simple converter (converts strings to uppercase).
Assert that embedded HTML and python tags are not converted.
"""
- c = UpcaseConverter()
- test_cases = [
- # no tags
- ('big bad wolf', 'BIG BAD WOLF'),
- # one html tag
- ('big bad wolf', 'BIG BAD WOLF'),
- # two html tags
- ('big bad wolf', 'BIG BAD WOLF'),
- # one python tag
- ('big %(adjective)s wolf', 'BIG %(adjective)s WOLF'),
- # two python tags
- ('big %(adjective)s %(noun)s', 'BIG %(adjective)s %(noun)s'),
- # both kinds of tags
- ('big %(adjective)s %(noun)s',
- 'BIG %(adjective)s %(noun)s'),
- # .format-style tags
- ('The {0} barn is {1!r}.', 'THE {0} BARN IS {1!r}.'),
- # HTML entities
- ('© 2013 edX, ', '© 2013 EDX, '),
- ]
- for source, expected in test_cases:
- result = c.convert(source)
- self.assertEquals(result, expected)
+ source, expected = data
+ result = UpcaseConverter().convert(source)
+ self.assertEquals(result, expected)
diff --git a/i18n/tests/test_dummy.py b/i18n/tests/test_dummy.py
index 4670fe5635..2d1b1b71c3 100644
--- a/i18n/tests/test_dummy.py
+++ b/i18n/tests/test_dummy.py
@@ -1,10 +1,16 @@
+# -*- coding: utf-8 -*-
+"""Tests of i18n/dummy.py"""
+
import os, string, random
from unittest import TestCase
+
+import ddt
from polib import POEntry
import dummy
+@ddt.ddt
class TestDummy(TestCase):
"""
Tests functionality of i18n/dummy.py
@@ -13,39 +19,52 @@ class TestDummy(TestCase):
def setUp(self):
self.converter = dummy.Dummy()
- def test_dummy(self):
+ def assertUnicodeEquals(self, str1, str2):
+ """Just like assertEquals, but doesn't put Unicode into the fail message.
+
+ Either nose, or rake, or something, deals very badly with unusual
+ Unicode characters in the assertions, so we use repr here to keep
+ things safe.
+
+ """
+ self.assertEquals(
+ str1, str2,
+ "Mismatch: %r != %r" % (str1, str2),
+ )
+
+ @ddt.data(
+ (u"hello my name is Bond, James Bond",
+ u"héllø mý nämé ïs Bønd, Jämés Bønd Ⱡσяєм ι#"),
+
+ (u"don't convert tag ids",
+ u"døn't çønvért täg ïds Ⱡσяєм ιρѕυ#"),
+
+ (u"don't convert %(name)s tags on %(date)s",
+ u"døn't çønvért %(name)s tägs øn %(date)s Ⱡσяєм ιρѕ#"),
+ )
+ def test_dummy(self, data):
"""
Tests with a dummy converter (adds spurious accents to strings).
Assert that embedded HTML and python tags are not converted.
"""
- test_cases = [
- ("hello my name is Bond, James Bond",
- u'h\xe9ll\xf8 m\xfd n\xe4m\xe9 \xefs B\xf8nd, J\xe4m\xe9s B\xf8nd Lorem i#'),
-
- ('don\'t convert tag ids',
- u'd\xf8n\'t \xe7\xf8nv\xe9rt t\xe4g \xefds Lorem ipsu#'),
-
- ('don\'t convert %(name)s tags on %(date)s',
- u"d\xf8n't \xe7\xf8nv\xe9rt %(name)s t\xe4gs \xf8n %(date)s Lorem ips#")
- ]
- for source, expected in test_cases:
- result = self.converter.convert(source)
- self.assertEquals(result, expected)
+ source, expected = data
+ result = self.converter.convert(source)
+ self.assertUnicodeEquals(result, expected)
def test_singular(self):
entry = POEntry()
entry.msgid = 'A lovely day for a cup of tea.'
- expected = u'\xc0 l\xf8v\xe9l\xfd d\xe4\xfd f\xf8r \xe4 \xe7\xfcp \xf8f t\xe9\xe4. Lorem i#'
+ expected = u'À løvélý däý før ä çüp øf téä. Ⱡσяєм ι#'
self.converter.convert_msg(entry)
- self.assertEquals(entry.msgstr, expected)
+ self.assertUnicodeEquals(entry.msgstr, expected)
def test_plural(self):
entry = POEntry()
entry.msgid = 'A lovely day for a cup of tea.'
entry.msgid_plural = 'A lovely day for some cups of tea.'
- expected_s = u'\xc0 l\xf8v\xe9l\xfd d\xe4\xfd f\xf8r \xe4 \xe7\xfcp \xf8f t\xe9\xe4. Lorem i#'
- expected_p = u'\xc0 l\xf8v\xe9l\xfd d\xe4\xfd f\xf8r s\xf8m\xe9 \xe7\xfcps \xf8f t\xe9\xe4. Lorem ip#'
+ expected_s = u'À løvélý däý før ä çüp øf téä. Ⱡσяєм ι#'
+ expected_p = u'À løvélý däý før sømé çüps øf téä. Ⱡσяєм ιρ#'
self.converter.convert_msg(entry)
result = entry.msgstr_plural
- self.assertEquals(result['0'], expected_s)
- self.assertEquals(result['1'], expected_p)
+ self.assertUnicodeEquals(result['0'], expected_s)
+ self.assertUnicodeEquals(result['1'], expected_p)
diff --git a/i18n/tests/test_validate.py b/i18n/tests/test_validate.py
index 2876f1c2f8..a7c400da0f 100644
--- a/i18n/tests/test_validate.py
+++ b/i18n/tests/test_validate.py
@@ -1,9 +1,17 @@
-import os, sys, logging
-from unittest import TestCase
-from nose.plugins.skip import SkipTest
+"""Tests that validate .po files."""
+
+import codecs
+import logging
+import os
+import sys
+import textwrap
+
+import polib
from config import LOCALE_DIR
from execute import call
+from converter import Converter
+
def test_po_files(root=LOCALE_DIR):
"""
@@ -12,20 +20,120 @@ def test_po_files(root=LOCALE_DIR):
log = logging.getLogger(__name__)
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
- for (dirpath, dirnames, filenames) in os.walk(root):
+ for dirpath, __, filenames in os.walk(root):
for name in filenames:
- (base, ext) = os.path.splitext(name)
+ __, ext = os.path.splitext(name)
if ext.lower() == '.po':
- yield validate_po_file, os.path.join(dirpath, name), log
+ filename = os.path.join(dirpath, name)
+ yield msgfmt_check_po_file, filename, log
+ yield check_messages, filename
-def validate_po_file(filename, log):
+def msgfmt_check_po_file(filename, log):
"""
Call GNU msgfmt -c on each .po file to validate its format.
Any errors caught by msgfmt are logged to log.
"""
# Use relative paths to make output less noisy.
rfile = os.path.relpath(filename, LOCALE_DIR)
- (out, err) = call(['msgfmt','-c', rfile], working_directory=LOCALE_DIR)
+ out, err = call(['msgfmt', '-c', rfile], working_directory=LOCALE_DIR)
if err != '':
- log.warn('\n'+err)
+ log.info('\n' + out)
+ log.warn('\n' + err)
+ assert not err
+
+
+def tags_in_string(msg):
+ """
+ Return the set of tags in a message string.
+
+ Tags includes HTML tags, data placeholders, etc.
+
+ Skips tags that might change due to translations: HTML entities, ,
+ and so on.
+
+ """
+ def is_linguistic_tag(tag):
+ """Is this tag one that can change with the language?"""
+ if tag.startswith("&"):
+ return True
+ if any(x in tag for x in ["", ""]):
+ return True
+ return False
+
+ __, tags = Converter().detag_string(msg)
+ return set(t for t in tags if not is_linguistic_tag(t))
+
+
+def astral(msg):
+ """Does `msg` have characters outside the Basic Multilingual Plane?"""
+ return any(ord(c) > 0xFFFF for c in msg)
+
+
+def check_messages(filename):
+ """
+ Checks messages in various ways:
+
+ Translations must have the same slots as the English. The translation
+ must not be empty. Messages can't have astral characters in them.
+
+ """
+ # Don't check English files.
+ if "/locale/en/" in filename:
+ return
+
+ # problems will be a list of tuples. Each is a description, and a msgid,
+ # and then zero or more translations.
+ problems = []
+ pomsgs = polib.pofile(filename)
+ for msg in pomsgs:
+ # Check for characters Javascript can't support.
+ # https://code.djangoproject.com/ticket/21725
+ if astral(msg.msgstr):
+ problems.append(("Non-BMP char", msg.msgid, msg.msgstr))
+
+ if msg.msgid_plural:
+ # Plurals: two strings in, N strings out.
+ source = msg.msgid + " | " + msg.msgid_plural
+ translation = " | ".join(v for k,v in sorted(msg.msgstr_plural.items()))
+ empty = any(not t.strip() for t in msg.msgstr_plural.values())
+ else:
+ # Singular: just one string in and one string out.
+ source = msg.msgid
+ translation = msg.msgstr
+ empty = not msg.msgstr.strip()
+
+ if empty:
+ problems.append(("Empty translation", source))
+ else:
+ id_tags = tags_in_string(source)
+ tx_tags = tags_in_string(translation)
+ if id_tags != tx_tags:
+ id_has = u", ".join(u'"{}"'.format(t) for t in id_tags - tx_tags)
+ tx_has = u", ".join(u'"{}"'.format(t) for t in tx_tags - id_tags)
+ if id_has and tx_has:
+ diff = u"{} vs {}".format(id_has, tx_has)
+ elif id_has:
+ diff = u"{} missing".format(id_has)
+ else:
+ diff = u"{} added".format(tx_has)
+ problems.append((
+ "Different tags in source and translation",
+ source,
+ translation,
+ diff
+ ))
+
+ if problems:
+ problem_file = filename.replace(".po", ".prob")
+ id_filler = textwrap.TextWrapper(width=79, initial_indent=" msgid: ", subsequent_indent=" " * 9)
+ tx_filler = textwrap.TextWrapper(width=79, initial_indent=" -----> ", subsequent_indent=" " * 9)
+ with codecs.open(problem_file, "w", encoding="utf8") as prob_file:
+ for problem in problems:
+ desc, msgid = problem[:2]
+ prob_file.write(u"{}\n{}\n".format(desc, id_filler.fill(msgid)))
+ for translation in problem[2:]:
+ prob_file.write(u"{}\n".format(tx_filler.fill(translation)))
+ prob_file.write(u"\n")
+
+ assert not problems, "Found %d problems in %s, details in .prob file" % (len(problems), filename)
diff --git a/i18n/transifex.py b/i18n/transifex.py
index d8fdd2c4bf..8653c901f9 100755
--- a/i18n/transifex.py
+++ b/i18n/transifex.py
@@ -15,6 +15,7 @@ def push():
def pull():
for locale in CONFIGURATION.locales:
if locale != CONFIGURATION.source_locale:
+ print "Pulling %s from transifex..." % locale
execute('tx pull -l %s' % locale)
clean_translated_locales()
diff --git a/lms/templates/login.html b/lms/templates/login.html
index 72d903eed2..877751495b 100644
--- a/lms/templates/login.html
+++ b/lms/templates/login.html
@@ -89,7 +89,7 @@
$submitButton.
addClass('is-disabled').
prop('disabled', true).
- html(gettext('Processing your account information …'));
+ html("${_(u'Processing your account information…')}");
}
}