diff --git a/common/lib/xmodule/xmodule/js/src/capa/display.coffee b/common/lib/xmodule/xmodule/js/src/capa/display.coffee index 91b83e21f3..f65860b7ab 100644 --- a/common/lib/xmodule/xmodule/js/src/capa/display.coffee +++ b/common/lib/xmodule/xmodule/js/src/capa/display.coffee @@ -405,7 +405,7 @@ class @Problem formulaequationinput: (element) -> $(element).find('input').on 'input', -> $p = $(element).find('p.status') - `// Translators: the word Answer here is about answering a problem the student must solve.` + `// Translators: the word unanswered here is about answering a problem the student must solve.` $p.text gettext("unanswered") $p.parent().removeClass().addClass "unanswered" @@ -434,7 +434,7 @@ class @Problem textline: (element) -> $(element).find('input').on 'input', -> $p = $(element).find('p.status') - `// Translators: the word Answer here is about answering a problem the student must solve.` + `// Translators: the word unanswered here is about answering a problem the student must solve.` $p.text gettext("unanswered") $p.parent().removeClass().addClass "unanswered" diff --git a/i18n/converter.py b/i18n/converter.py index d3987bebe2..9a982347ee 100644 --- a/i18n/converter.py +++ b/i18n/converter.py @@ -21,9 +21,9 @@ class Converter(object): # HTML: , ,
, # Python: %(date)s, %(name)s tag_pattern = re.compile(r''' - (<[-\w" .:?=/]*>) | # - ({[^}]*}) | # {tag} - (%\([^)]*\)\w) | # %(tag)s + (<[^>]+>) | # + ({[^}]+}) | # {tag} + (%\([\w]+\)\w) | # %(tag)s (&\w+;) | # &entity; (&\#\d+;) | # Ӓ (&\#x[0-9a-f]+;) # ꯍ diff --git a/i18n/dummy.py b/i18n/dummy.py index e82429dcbd..b192069329 100644 --- a/i18n/dummy.py +++ b/i18n/dummy.py @@ -1,56 +1,70 @@ +# -*- coding: utf-8 -*- +r""" +Creates new localization properties files in a dummy language. + +Each property file is derived from the equivalent en_US file, with these +transformations applied: + +1. Every vowel is replaced with an equivalent with extra accent marks. + +2. Every string is padded out to +30% length to simulate verbose languages + (such as German) to see if layout and flows work properly. + +3. Every string is terminated with a '#' character to make it easier to detect + truncation. + +Example use:: + + >>> from dummy import Dummy + >>> c = Dummy() + >>> c.convert("My name is Bond, James Bond") + u'M\xfd n\xe4m\xe9 \xefs B\xf8nd, J\xe4m\xe9s B\xf8nd \u2360\u03c3\u044f\u0454\u043c \u03b9\u03c1#' + >>> print c.convert("My name is Bond, James Bond") + Mý nämé ïs Bønd, Jämés Bønd Ⱡσяєм ιρ# + >>> print c.convert("don't convert tag ids") + døn't çønvért täg ïds Ⱡσяєм ιρѕυ# + >>> print c.convert("don't convert %(name)s tags on %(date)s") + døn't çønvért %(name)s tägs øn %(date)s Ⱡσяєм ιρѕ# + +""" + from converter import Converter -# Creates new localization properties files in a dummy language -# Each property file is derived from the equivalent en_US file, except -# 1. Every vowel is replaced with an equivalent with extra accent marks -# 2. Every string is padded out to +30% length to simulate verbose languages (e.g. German) -# to see if layout and flows work properly -# 3. Every string is terminated with a '#' character to make it easier to detect truncation - - -# -------------------------------- -# Example use: -# >>> from dummy import Dummy -# >>> c = Dummy() -# >>> c.convert("hello my name is Bond, James Bond") -# u'h\xe9ll\xf6 my n\xe4m\xe9 \xefs B\xf6nd, J\xe4m\xe9s B\xf6nd Lorem i#' -# -# >>> c.convert('don\'t convert tag ids') -# u'd\xf6n\'t \xe7\xf6nv\xe9rt t\xe4g \xefds Lorem ipsu#' -# -# >>> c.convert('don\'t convert %(name)s tags on %(date)s') -# u"d\xf6n't \xe7\xf6nv\xe9rt %(name)s t\xe4gs \xf6n %(date)s Lorem ips#" - - # Substitute plain characters with accented lookalikes. # http://tlt.its.psu.edu/suggestions/international/web/codehtml.html#accent -TABLE = {'A': u'\xC0', - 'a': u'\xE4', - 'b': u'\xDF', - 'C': u'\xc7', - 'c': u'\xE7', - 'E': u'\xC9', - 'e': u'\xE9', - 'I': U'\xCC', - 'i': u'\xEF', - 'O': u'\xD8', - 'o': u'\xF8', - 'U': u'\xDB', - 'u': u'\xFC', - 'Y': u'\xDD', - 'y': u'\xFD', - } - +TABLE = { + 'A': u'À', + 'a': u'ä', + 'b': u'ß', + 'C': u'Ç', + 'c': u'ç', + 'E': u'É', + 'e': u'é', + 'I': u'Ì', + 'i': u'ï', + 'O': u'Ø', + 'o': u'ø', + 'U': u'Û', + 'u': u'ü', + 'Y': u'Ý', + 'y': u'ý', +} # The print industry's standard dummy text, in use since the 1500s -# see http://www.lipsum.com/ -LOREM = ' Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed ' \ - 'do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad ' \ - 'minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ' \ - 'ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate ' \ - 'velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat ' \ - 'cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. ' +# see http://www.lipsum.com/, then fed through a "fancy-text" converter. +# The string should start with a space. +LOREM = " " + " ".join( # join and split just make the string easier here. + u""" + Ⱡσяєм ιρѕυм ∂σłσя ѕιт αмєт, ¢σηѕє¢тєтυя α∂ιριѕι¢ιηg єłιт, ѕє∂ ∂σ єιυѕмσ∂ + тємρσя ιη¢ι∂ι∂υηт υт łαвσяє єт ∂σłσяє мαgηα αłιqυα. υт єηιм α∂ мιηιм + νєηιαм, qυιѕ ησѕтяυ∂ єχєя¢ιтαтιση υłłαм¢σ łαвσяιѕ ηιѕι υт αłιqυιρ єχ єα + ¢σммσ∂σ ¢σηѕєqυαт. ∂υιѕ αυтє ιяυяє ∂σłσя ιη яєρяєнєη∂єяιт ιη νσłυρтαтє + νєłιт єѕѕє ¢ιłłυм ∂σłσяє єυ ƒυgιαт ηυłłα ραяιαтυя. єχ¢єρтєυя ѕιηт σ¢¢αє¢αт + ¢υρι∂αтαт ηση ρяσι∂єηт, ѕυηт ιη ¢υłρα qυι σƒƒι¢ια ∂єѕєяυηт мσłłιт αηιм ι∂ + єѕт łαвσяυм. + """.split() +) # To simulate more verbose languages (like German), pad the length of a string # by a multiple of PAD_FACTOR @@ -85,20 +99,6 @@ class Dummy(Converter): """replaces the final char of string with #""" return string[:-1] + '#' - def init_msgs(self, msgs): - """ - Make sure the first msg in msgs has a plural property. - msgs is list of instances of polib.POEntry - """ - if not msgs: - return - headers = msgs[0].get_property('msgstr') - has_plural = any(header.startswith('Plural-Forms:') for header in headers) - if not has_plural: - # Apply declaration for English pluralization rules - plural = "Plural-Forms: nplurals=2; plural=(n != 1);\\n" - headers.append(plural) - def convert_msg(self, msg): """ Takes one POEntry object and converts it (adds a dummy translation to it) @@ -114,8 +114,10 @@ class Dummy(Converter): # translate singular and plural foreign_single = self.convert(source) foreign_plural = self.convert(plural) - plural = {'0': self.final_newline(source, foreign_single), - '1': self.final_newline(plural, foreign_plural)} + plural = { + '0': self.final_newline(source, foreign_single), + '1': self.final_newline(plural, foreign_plural), + } msg.msgstr_plural = plural else: foreign = self.convert(source) diff --git a/i18n/extract.py b/i18n/extract.py index 2bb1baf60d..694f1740e4 100755 --- a/i18n/extract.py +++ b/i18n/extract.py @@ -45,7 +45,7 @@ def main(): remove_file(source_msgs_dir.joinpath(filename)) # Extract strings from mako templates. - babel_mako_cmd = 'pybabel extract -F %s -c "TRANSLATORS:" . -o %s' % (BABEL_CONFIG, BABEL_OUT) + babel_mako_cmd = 'pybabel extract -F %s -c "Translators:" . -o %s' % (BABEL_CONFIG, BABEL_OUT) # Extract strings from django source files. make_django_cmd = ( diff --git a/i18n/generate.py b/i18n/generate.py index 3d565ba091..8afa93c655 100755 --- a/i18n/generate.py +++ b/i18n/generate.py @@ -60,9 +60,12 @@ def merge(locale, target='django.po', fail_if_missing=True): def clean_metadata(file): """ Clean up redundancies in the metadata caused by merging. - This reads in a PO file and simply saves it back out again. """ - pofile(file).save() + # Reading in the .po file and saving it again fixes redundancies. + pomsgs = pofile(file) + # The msgcat tool marks the metadata as fuzzy, but it's ok as it is. + pomsgs.metadata_is_fuzzy = False + pomsgs.save() def validate_files(dir, files_to_merge): diff --git a/i18n/make_dummy.py b/i18n/make_dummy.py index 1d9be34b10..11021d4036 100755 --- a/i18n/make_dummy.py +++ b/i18n/make_dummy.py @@ -38,9 +38,15 @@ def main(file, locale): raise IOError('File does not exist: %s' % file) pofile = polib.pofile(file) converter = Dummy() - converter.init_msgs(pofile.translated_entries()) for msg in pofile: converter.convert_msg(msg) + + # If any message has a plural, then the file needs plural information. + # Apply declaration for English pluralization rules so that ngettext will + # do something reasonable. + if any(m.msgid_plural for m in pofile): + pofile.metadata['Plural-Forms'] = 'nplurals=2; plural=(n != 1);' + new_file = new_filename(file, locale) create_dir_if_necessary(new_file) pofile.save(new_file) diff --git a/i18n/tests/test_converter.py b/i18n/tests/test_converter.py index b1989ede94..e893f7c258 100644 --- a/i18n/tests/test_converter.py +++ b/i18n/tests/test_converter.py @@ -1,5 +1,8 @@ +"""Tests of i18n/converter.py""" + import os from unittest import TestCase +import ddt import converter @@ -11,36 +14,48 @@ class UpcaseConverter(converter.Converter): return string.upper() +@ddt.ddt class TestConverter(TestCase): """ Tests functionality of i18n/converter.py """ - def test_converter(self): + @ddt.data( + # no tags + ('big bad wolf', + 'BIG BAD WOLF'), + # one html tag + ('big bad wolf', + 'BIG BAD WOLF'), + # two html tags + ('big bad gray wolf', + 'BIG BAD GRAY WOLF'), + # html tags with attributes + ('bar baz', + 'BAR BAZ'), + ("bar baz", + "BAR BAZ"), + # one python tag + ('big %(adjective)s wolf', + 'BIG %(adjective)s WOLF'), + # two python tags + ('big %(adjective)s gray %(noun)s', + 'BIG %(adjective)s GRAY %(noun)s'), + # both kinds of tags + ('big %(adjective)s %(noun)s', + 'BIG %(adjective)s %(noun)s'), + # .format-style tags + ('The {0} barn is {1!r}.', + 'THE {0} BARN IS {1!r}.'), + # HTML entities + ('© 2013 edX,  ', + '© 2013 EDX,  '), + ) + def test_converter(self, data): """ Tests with a simple converter (converts strings to uppercase). Assert that embedded HTML and python tags are not converted. """ - c = UpcaseConverter() - test_cases = [ - # no tags - ('big bad wolf', 'BIG BAD WOLF'), - # one html tag - ('big bad wolf', 'BIG BAD WOLF'), - # two html tags - ('big bad wolf', 'BIG BAD WOLF'), - # one python tag - ('big %(adjective)s wolf', 'BIG %(adjective)s WOLF'), - # two python tags - ('big %(adjective)s %(noun)s', 'BIG %(adjective)s %(noun)s'), - # both kinds of tags - ('big %(adjective)s %(noun)s', - 'BIG %(adjective)s %(noun)s'), - # .format-style tags - ('The {0} barn is {1!r}.', 'THE {0} BARN IS {1!r}.'), - # HTML entities - ('© 2013 edX,  ', '© 2013 EDX,  '), - ] - for source, expected in test_cases: - result = c.convert(source) - self.assertEquals(result, expected) + source, expected = data + result = UpcaseConverter().convert(source) + self.assertEquals(result, expected) diff --git a/i18n/tests/test_dummy.py b/i18n/tests/test_dummy.py index 4670fe5635..2d1b1b71c3 100644 --- a/i18n/tests/test_dummy.py +++ b/i18n/tests/test_dummy.py @@ -1,10 +1,16 @@ +# -*- coding: utf-8 -*- +"""Tests of i18n/dummy.py""" + import os, string, random from unittest import TestCase + +import ddt from polib import POEntry import dummy +@ddt.ddt class TestDummy(TestCase): """ Tests functionality of i18n/dummy.py @@ -13,39 +19,52 @@ class TestDummy(TestCase): def setUp(self): self.converter = dummy.Dummy() - def test_dummy(self): + def assertUnicodeEquals(self, str1, str2): + """Just like assertEquals, but doesn't put Unicode into the fail message. + + Either nose, or rake, or something, deals very badly with unusual + Unicode characters in the assertions, so we use repr here to keep + things safe. + + """ + self.assertEquals( + str1, str2, + "Mismatch: %r != %r" % (str1, str2), + ) + + @ddt.data( + (u"hello my name is Bond, James Bond", + u"héllø mý nämé ïs Bønd, Jämés Bønd Ⱡσяєм ι#"), + + (u"don't convert tag ids", + u"døn't çønvért täg ïds Ⱡσяєм ιρѕυ#"), + + (u"don't convert %(name)s tags on %(date)s", + u"døn't çønvért %(name)s tägs øn %(date)s Ⱡσяєм ιρѕ#"), + ) + def test_dummy(self, data): """ Tests with a dummy converter (adds spurious accents to strings). Assert that embedded HTML and python tags are not converted. """ - test_cases = [ - ("hello my name is Bond, James Bond", - u'h\xe9ll\xf8 m\xfd n\xe4m\xe9 \xefs B\xf8nd, J\xe4m\xe9s B\xf8nd Lorem i#'), - - ('don\'t convert tag ids', - u'd\xf8n\'t \xe7\xf8nv\xe9rt t\xe4g \xefds Lorem ipsu#'), - - ('don\'t convert %(name)s tags on %(date)s', - u"d\xf8n't \xe7\xf8nv\xe9rt %(name)s t\xe4gs \xf8n %(date)s Lorem ips#") - ] - for source, expected in test_cases: - result = self.converter.convert(source) - self.assertEquals(result, expected) + source, expected = data + result = self.converter.convert(source) + self.assertUnicodeEquals(result, expected) def test_singular(self): entry = POEntry() entry.msgid = 'A lovely day for a cup of tea.' - expected = u'\xc0 l\xf8v\xe9l\xfd d\xe4\xfd f\xf8r \xe4 \xe7\xfcp \xf8f t\xe9\xe4. Lorem i#' + expected = u'À løvélý däý før ä çüp øf téä. Ⱡσяєм ι#' self.converter.convert_msg(entry) - self.assertEquals(entry.msgstr, expected) + self.assertUnicodeEquals(entry.msgstr, expected) def test_plural(self): entry = POEntry() entry.msgid = 'A lovely day for a cup of tea.' entry.msgid_plural = 'A lovely day for some cups of tea.' - expected_s = u'\xc0 l\xf8v\xe9l\xfd d\xe4\xfd f\xf8r \xe4 \xe7\xfcp \xf8f t\xe9\xe4. Lorem i#' - expected_p = u'\xc0 l\xf8v\xe9l\xfd d\xe4\xfd f\xf8r s\xf8m\xe9 \xe7\xfcps \xf8f t\xe9\xe4. Lorem ip#' + expected_s = u'À løvélý däý før ä çüp øf téä. Ⱡσяєм ι#' + expected_p = u'À løvélý däý før sømé çüps øf téä. Ⱡσяєм ιρ#' self.converter.convert_msg(entry) result = entry.msgstr_plural - self.assertEquals(result['0'], expected_s) - self.assertEquals(result['1'], expected_p) + self.assertUnicodeEquals(result['0'], expected_s) + self.assertUnicodeEquals(result['1'], expected_p) diff --git a/i18n/tests/test_validate.py b/i18n/tests/test_validate.py index 2876f1c2f8..a7c400da0f 100644 --- a/i18n/tests/test_validate.py +++ b/i18n/tests/test_validate.py @@ -1,9 +1,17 @@ -import os, sys, logging -from unittest import TestCase -from nose.plugins.skip import SkipTest +"""Tests that validate .po files.""" + +import codecs +import logging +import os +import sys +import textwrap + +import polib from config import LOCALE_DIR from execute import call +from converter import Converter + def test_po_files(root=LOCALE_DIR): """ @@ -12,20 +20,120 @@ def test_po_files(root=LOCALE_DIR): log = logging.getLogger(__name__) logging.basicConfig(stream=sys.stdout, level=logging.INFO) - for (dirpath, dirnames, filenames) in os.walk(root): + for dirpath, __, filenames in os.walk(root): for name in filenames: - (base, ext) = os.path.splitext(name) + __, ext = os.path.splitext(name) if ext.lower() == '.po': - yield validate_po_file, os.path.join(dirpath, name), log + filename = os.path.join(dirpath, name) + yield msgfmt_check_po_file, filename, log + yield check_messages, filename -def validate_po_file(filename, log): +def msgfmt_check_po_file(filename, log): """ Call GNU msgfmt -c on each .po file to validate its format. Any errors caught by msgfmt are logged to log. """ # Use relative paths to make output less noisy. rfile = os.path.relpath(filename, LOCALE_DIR) - (out, err) = call(['msgfmt','-c', rfile], working_directory=LOCALE_DIR) + out, err = call(['msgfmt', '-c', rfile], working_directory=LOCALE_DIR) if err != '': - log.warn('\n'+err) + log.info('\n' + out) + log.warn('\n' + err) + assert not err + + +def tags_in_string(msg): + """ + Return the set of tags in a message string. + + Tags includes HTML tags, data placeholders, etc. + + Skips tags that might change due to translations: HTML entities, , + and so on. + + """ + def is_linguistic_tag(tag): + """Is this tag one that can change with the language?""" + if tag.startswith("&"): + return True + if any(x in tag for x in ["", ""]): + return True + return False + + __, tags = Converter().detag_string(msg) + return set(t for t in tags if not is_linguistic_tag(t)) + + +def astral(msg): + """Does `msg` have characters outside the Basic Multilingual Plane?""" + return any(ord(c) > 0xFFFF for c in msg) + + +def check_messages(filename): + """ + Checks messages in various ways: + + Translations must have the same slots as the English. The translation + must not be empty. Messages can't have astral characters in them. + + """ + # Don't check English files. + if "/locale/en/" in filename: + return + + # problems will be a list of tuples. Each is a description, and a msgid, + # and then zero or more translations. + problems = [] + pomsgs = polib.pofile(filename) + for msg in pomsgs: + # Check for characters Javascript can't support. + # https://code.djangoproject.com/ticket/21725 + if astral(msg.msgstr): + problems.append(("Non-BMP char", msg.msgid, msg.msgstr)) + + if msg.msgid_plural: + # Plurals: two strings in, N strings out. + source = msg.msgid + " | " + msg.msgid_plural + translation = " | ".join(v for k,v in sorted(msg.msgstr_plural.items())) + empty = any(not t.strip() for t in msg.msgstr_plural.values()) + else: + # Singular: just one string in and one string out. + source = msg.msgid + translation = msg.msgstr + empty = not msg.msgstr.strip() + + if empty: + problems.append(("Empty translation", source)) + else: + id_tags = tags_in_string(source) + tx_tags = tags_in_string(translation) + if id_tags != tx_tags: + id_has = u", ".join(u'"{}"'.format(t) for t in id_tags - tx_tags) + tx_has = u", ".join(u'"{}"'.format(t) for t in tx_tags - id_tags) + if id_has and tx_has: + diff = u"{} vs {}".format(id_has, tx_has) + elif id_has: + diff = u"{} missing".format(id_has) + else: + diff = u"{} added".format(tx_has) + problems.append(( + "Different tags in source and translation", + source, + translation, + diff + )) + + if problems: + problem_file = filename.replace(".po", ".prob") + id_filler = textwrap.TextWrapper(width=79, initial_indent=" msgid: ", subsequent_indent=" " * 9) + tx_filler = textwrap.TextWrapper(width=79, initial_indent=" -----> ", subsequent_indent=" " * 9) + with codecs.open(problem_file, "w", encoding="utf8") as prob_file: + for problem in problems: + desc, msgid = problem[:2] + prob_file.write(u"{}\n{}\n".format(desc, id_filler.fill(msgid))) + for translation in problem[2:]: + prob_file.write(u"{}\n".format(tx_filler.fill(translation))) + prob_file.write(u"\n") + + assert not problems, "Found %d problems in %s, details in .prob file" % (len(problems), filename) diff --git a/i18n/transifex.py b/i18n/transifex.py index d8fdd2c4bf..8653c901f9 100755 --- a/i18n/transifex.py +++ b/i18n/transifex.py @@ -15,6 +15,7 @@ def push(): def pull(): for locale in CONFIGURATION.locales: if locale != CONFIGURATION.source_locale: + print "Pulling %s from transifex..." % locale execute('tx pull -l %s' % locale) clean_translated_locales() diff --git a/lms/templates/login.html b/lms/templates/login.html index 72d903eed2..877751495b 100644 --- a/lms/templates/login.html +++ b/lms/templates/login.html @@ -89,7 +89,7 @@ $submitButton. addClass('is-disabled'). prop('disabled', true). - html(gettext('Processing your account information …')); + html("${_(u'Processing your account information…')}"); } }