Now any of the plurals being missing will count as an Empty translation, and for looking at tags, the two English strings and all the translated strings are mooshed together.
140 lines
4.7 KiB
Python
140 lines
4.7 KiB
Python
"""Tests that validate .po files."""
|
|
|
|
import codecs
|
|
import logging
|
|
import os
|
|
import sys
|
|
import textwrap
|
|
|
|
import polib
|
|
|
|
from config import LOCALE_DIR
|
|
from execute import call
|
|
from converter import Converter
|
|
|
|
|
|
def test_po_files(root=LOCALE_DIR):
|
|
"""
|
|
This is a generator. It yields all of the .po files under root, and tests each one.
|
|
"""
|
|
log = logging.getLogger(__name__)
|
|
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
|
|
|
|
for dirpath, __, filenames in os.walk(root):
|
|
for name in filenames:
|
|
__, ext = os.path.splitext(name)
|
|
if ext.lower() == '.po':
|
|
filename = os.path.join(dirpath, name)
|
|
yield msgfmt_check_po_file, filename, log
|
|
yield check_messages, filename
|
|
|
|
|
|
def msgfmt_check_po_file(filename, log):
|
|
"""
|
|
Call GNU msgfmt -c on each .po file to validate its format.
|
|
Any errors caught by msgfmt are logged to log.
|
|
"""
|
|
# Use relative paths to make output less noisy.
|
|
rfile = os.path.relpath(filename, LOCALE_DIR)
|
|
out, err = call(['msgfmt', '-c', rfile], working_directory=LOCALE_DIR)
|
|
if err != '':
|
|
log.info('\n' + out)
|
|
log.warn('\n' + err)
|
|
assert not err
|
|
|
|
|
|
def tags_in_string(msg):
|
|
"""
|
|
Return the set of tags in a message string.
|
|
|
|
Tags includes HTML tags, data placeholders, etc.
|
|
|
|
Skips tags that might change due to translations: HTML entities, <abbr>,
|
|
and so on.
|
|
|
|
"""
|
|
def is_linguistic_tag(tag):
|
|
"""Is this tag one that can change with the language?"""
|
|
if tag.startswith("&"):
|
|
return True
|
|
if any(x in tag for x in ["<abbr>", "<abbr ", "</abbr>"]):
|
|
return True
|
|
return False
|
|
|
|
__, tags = Converter().detag_string(msg)
|
|
return set(t for t in tags if not is_linguistic_tag(t))
|
|
|
|
|
|
def astral(msg):
|
|
"""Does `msg` have characters outside the Basic Multilingual Plane?"""
|
|
return any(ord(c) > 0xFFFF for c in msg)
|
|
|
|
|
|
def check_messages(filename):
|
|
"""
|
|
Checks messages in various ways:
|
|
|
|
Translations must have the same slots as the English. The translation
|
|
must not be empty. Messages can't have astral characters in them.
|
|
|
|
"""
|
|
# Don't check English files.
|
|
if "/locale/en/" in filename:
|
|
return
|
|
|
|
# problems will be a list of tuples. Each is a description, and a msgid,
|
|
# and then zero or more translations.
|
|
problems = []
|
|
pomsgs = polib.pofile(filename)
|
|
for msg in pomsgs:
|
|
# Check for characters Javascript can't support.
|
|
# https://code.djangoproject.com/ticket/21725
|
|
if astral(msg.msgstr):
|
|
problems.append(("Non-BMP char", msg.msgid, msg.msgstr))
|
|
|
|
if msg.msgid_plural:
|
|
# Plurals: two strings in, N strings out.
|
|
source = msg.msgid + " | " + msg.msgid_plural
|
|
translation = " | ".join(v for k,v in sorted(msg.msgstr_plural.items()))
|
|
empty = any(not t.strip() for t in msg.msgstr_plural.values())
|
|
else:
|
|
# Singular: just one string in and one string out.
|
|
source = msg.msgid
|
|
translation = msg.msgstr
|
|
empty = not msg.msgstr.strip()
|
|
|
|
if empty:
|
|
problems.append(("Empty translation", source))
|
|
else:
|
|
id_tags = tags_in_string(source)
|
|
tx_tags = tags_in_string(translation)
|
|
if id_tags != tx_tags:
|
|
id_has = u", ".join(u'"{}"'.format(t) for t in id_tags - tx_tags)
|
|
tx_has = u", ".join(u'"{}"'.format(t) for t in tx_tags - id_tags)
|
|
if id_has and tx_has:
|
|
diff = u"{} vs {}".format(id_has, tx_has)
|
|
elif id_has:
|
|
diff = u"{} missing".format(id_has)
|
|
else:
|
|
diff = u"{} added".format(tx_has)
|
|
problems.append((
|
|
"Different tags in source and translation",
|
|
source,
|
|
translation,
|
|
diff
|
|
))
|
|
|
|
if problems:
|
|
problem_file = filename.replace(".po", ".prob")
|
|
id_filler = textwrap.TextWrapper(width=79, initial_indent=" msgid: ", subsequent_indent=" " * 9)
|
|
tx_filler = textwrap.TextWrapper(width=79, initial_indent=" -----> ", subsequent_indent=" " * 9)
|
|
with codecs.open(problem_file, "w", encoding="utf8") as prob_file:
|
|
for problem in problems:
|
|
desc, msgid = problem[:2]
|
|
prob_file.write(u"{}\n{}\n".format(desc, id_filler.fill(msgid)))
|
|
for translation in problem[2:]:
|
|
prob_file.write(u"{}\n".format(tx_filler.fill(translation)))
|
|
prob_file.write(u"\n")
|
|
|
|
assert not problems, "Found %d problems in %s, details in .prob file" % (len(problems), filename)
|