Files
edx-platform/i18n/tests/test_validate.py
Ned Batchelder af120fdf53 Better handling of plurals during .po validation
Now any of the plurals being missing will count as an Empty translation,
and for looking at tags, the two English strings and all the translated
strings are mooshed together.
2014-01-07 09:43:46 -05:00

140 lines
4.7 KiB
Python

"""Tests that validate .po files."""
import codecs
import logging
import os
import sys
import textwrap
import polib
from config import LOCALE_DIR
from execute import call
from converter import Converter
def test_po_files(root=LOCALE_DIR):
"""
This is a generator. It yields all of the .po files under root, and tests each one.
"""
log = logging.getLogger(__name__)
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
for dirpath, __, filenames in os.walk(root):
for name in filenames:
__, ext = os.path.splitext(name)
if ext.lower() == '.po':
filename = os.path.join(dirpath, name)
yield msgfmt_check_po_file, filename, log
yield check_messages, filename
def msgfmt_check_po_file(filename, log):
"""
Call GNU msgfmt -c on each .po file to validate its format.
Any errors caught by msgfmt are logged to log.
"""
# Use relative paths to make output less noisy.
rfile = os.path.relpath(filename, LOCALE_DIR)
out, err = call(['msgfmt', '-c', rfile], working_directory=LOCALE_DIR)
if err != '':
log.info('\n' + out)
log.warn('\n' + err)
assert not err
def tags_in_string(msg):
"""
Return the set of tags in a message string.
Tags includes HTML tags, data placeholders, etc.
Skips tags that might change due to translations: HTML entities, <abbr>,
and so on.
"""
def is_linguistic_tag(tag):
"""Is this tag one that can change with the language?"""
if tag.startswith("&"):
return True
if any(x in tag for x in ["<abbr>", "<abbr ", "</abbr>"]):
return True
return False
__, tags = Converter().detag_string(msg)
return set(t for t in tags if not is_linguistic_tag(t))
def astral(msg):
"""Does `msg` have characters outside the Basic Multilingual Plane?"""
return any(ord(c) > 0xFFFF for c in msg)
def check_messages(filename):
"""
Checks messages in various ways:
Translations must have the same slots as the English. The translation
must not be empty. Messages can't have astral characters in them.
"""
# Don't check English files.
if "/locale/en/" in filename:
return
# problems will be a list of tuples. Each is a description, and a msgid,
# and then zero or more translations.
problems = []
pomsgs = polib.pofile(filename)
for msg in pomsgs:
# Check for characters Javascript can't support.
# https://code.djangoproject.com/ticket/21725
if astral(msg.msgstr):
problems.append(("Non-BMP char", msg.msgid, msg.msgstr))
if msg.msgid_plural:
# Plurals: two strings in, N strings out.
source = msg.msgid + " | " + msg.msgid_plural
translation = " | ".join(v for k,v in sorted(msg.msgstr_plural.items()))
empty = any(not t.strip() for t in msg.msgstr_plural.values())
else:
# Singular: just one string in and one string out.
source = msg.msgid
translation = msg.msgstr
empty = not msg.msgstr.strip()
if empty:
problems.append(("Empty translation", source))
else:
id_tags = tags_in_string(source)
tx_tags = tags_in_string(translation)
if id_tags != tx_tags:
id_has = u", ".join(u'"{}"'.format(t) for t in id_tags - tx_tags)
tx_has = u", ".join(u'"{}"'.format(t) for t in tx_tags - id_tags)
if id_has and tx_has:
diff = u"{} vs {}".format(id_has, tx_has)
elif id_has:
diff = u"{} missing".format(id_has)
else:
diff = u"{} added".format(tx_has)
problems.append((
"Different tags in source and translation",
source,
translation,
diff
))
if problems:
problem_file = filename.replace(".po", ".prob")
id_filler = textwrap.TextWrapper(width=79, initial_indent=" msgid: ", subsequent_indent=" " * 9)
tx_filler = textwrap.TextWrapper(width=79, initial_indent=" -----> ", subsequent_indent=" " * 9)
with codecs.open(problem_file, "w", encoding="utf8") as prob_file:
for problem in problems:
desc, msgid = problem[:2]
prob_file.write(u"{}\n{}\n".format(desc, id_filler.fill(msgid)))
for translation in problem[2:]:
prob_file.write(u"{}\n".format(tx_filler.fill(translation)))
prob_file.write(u"\n")
assert not problems, "Found %d problems in %s, details in .prob file" % (len(problems), filename)