From a6ffe8c12d366d503b504b341e94058394aeb381 Mon Sep 17 00:00:00 2001 From: "Dave St.Germain" Date: Mon, 2 Jun 2014 16:54:12 -0400 Subject: [PATCH] Moving these tools to i18n-tools repo --- i18n/__init__.py | 0 i18n/branch_cleanup.py | 36 ----- i18n/config.py | 69 -------- i18n/converter.py | 74 --------- i18n/dummy.py | 221 ------------------------- i18n/execute.py | 49 ------ i18n/extract.py | 233 --------------------------- i18n/generate.py | 141 ---------------- i18n/segment.py | 148 ----------------- i18n/tests/__init__.py | 0 i18n/tests/data/django_after.po | 37 ----- i18n/tests/data/django_before.po | 52 ------ i18n/tests/data/studio.po | 29 ---- i18n/tests/test_compiled_messages.py | 58 ------- i18n/tests/test_config.py | 33 ---- i18n/tests/test_converter.py | 61 ------- i18n/tests/test_dummy.py | 69 -------- i18n/tests/test_extract.py | 91 ----------- i18n/tests/test_generate.py | 96 ----------- i18n/tests/test_segment.py | 58 ------- i18n/transifex.py | 87 ---------- i18n/validate.py | 218 ------------------------- requirements/edx/github.txt | 3 +- setup.py | 1 - 24 files changed, 2 insertions(+), 1862 deletions(-) delete mode 100644 i18n/__init__.py delete mode 100644 i18n/branch_cleanup.py delete mode 100644 i18n/config.py delete mode 100644 i18n/converter.py delete mode 100755 i18n/dummy.py delete mode 100644 i18n/execute.py delete mode 100755 i18n/extract.py delete mode 100755 i18n/generate.py delete mode 100755 i18n/segment.py delete mode 100644 i18n/tests/__init__.py delete mode 100644 i18n/tests/data/django_after.po delete mode 100644 i18n/tests/data/django_before.po delete mode 100644 i18n/tests/data/studio.po delete mode 100644 i18n/tests/test_compiled_messages.py delete mode 100644 i18n/tests/test_config.py delete mode 100644 i18n/tests/test_converter.py delete mode 100644 i18n/tests/test_dummy.py delete mode 100644 i18n/tests/test_extract.py delete mode 100644 i18n/tests/test_generate.py delete mode 100644 i18n/tests/test_segment.py delete mode 100755 i18n/transifex.py delete mode 100644 i18n/validate.py diff --git a/i18n/__init__.py b/i18n/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/i18n/branch_cleanup.py b/i18n/branch_cleanup.py deleted file mode 100644 index 9b1c1cc36c..0000000000 --- a/i18n/branch_cleanup.py +++ /dev/null @@ -1,36 +0,0 @@ -#!/usr/bin/env python -""" -Utility for cleaning up your local directory after switching between -branches with different translation levels (eg master branch, with only -reviewed translations, versus dev branch, with all translations) -""" -from __future__ import print_function -import os - -from i18n.config import CONFIGURATION -from i18n.execute import execute - - -def clean_conf_folder(locale): - """Remove the configuration directory for `locale`""" - dirname = CONFIGURATION.get_messages_dir(locale) - command = "rm -rf {}".format(dirname) - print(command) - try: - execute(command) - except Exception as exc: - print("Encountered error {}; continuing...".format(exc)) - return - - -def clean_configuration_directory(): - """ - Remove the configuration directories for all locales - in CONFIGURATION.translated_locales - """ - for locale in CONFIGURATION.translated_locales: - clean_conf_folder(locale) - - -if __name__ == '__main__': - clean_configuration_directory() diff --git a/i18n/config.py b/i18n/config.py deleted file mode 100644 index dbd5591b3f..0000000000 --- a/i18n/config.py +++ /dev/null @@ -1,69 +0,0 @@ -import os - -import yaml -from path import path - -# BASE_DIR is the working directory to execute django-admin commands from. -# Typically this should be the 'edx-platform' directory. -BASE_DIR = path(__file__).abspath().dirname().dirname() - -# LOCALE_DIR contains the locale files. -# Typically this should be 'edx-platform/conf/locale' -LOCALE_DIR = BASE_DIR.joinpath('conf', 'locale') - - -class Configuration(object): - """ - Reads localization configuration in json format. - """ - DEFAULTS = { - 'dummy_locales': [], - 'generate_merge': {}, - 'ignore_dirs': [], - 'locales': ['en'], - 'segment': {}, - 'source_locale': 'en', - 'third_party': [], - } - - def __init__(self, filename): - self._filename = filename - self._config = self.read_config(filename) - - def read_config(self, filename): - """ - Returns data found in config file (as dict), or raises exception if file not found - """ - if not os.path.exists(filename): - raise Exception("Configuration file cannot be found: %s" % filename) - with open(filename) as stream: - return yaml.safe_load(stream) - - def __getattr__(self, name): - if name in self.DEFAULTS: - return self._config.get(name, self.DEFAULTS[name]) - raise AttributeError("Configuration has no such setting: {!r}".format(name)) - - def get_messages_dir(self, locale): - """ - Returns the name of the directory holding the po files for locale. - Example: edx-platform/conf/locale/fr/LC_MESSAGES - """ - return LOCALE_DIR.joinpath(locale, 'LC_MESSAGES') - - @property - def source_messages_dir(self): - """ - Returns the name of the directory holding the source-language po files (English). - Example: edx-platform/conf/locale/en/LC_MESSAGES - """ - return self.get_messages_dir(self.source_locale) - - @property - def translated_locales(self): - """ - Returns the set of locales to be translated (ignoring the source_locale). - """ - return sorted(set(self.locales) - set([self.source_locale])) - -CONFIGURATION = Configuration(LOCALE_DIR.joinpath('config.yaml').normpath()) diff --git a/i18n/converter.py b/i18n/converter.py deleted file mode 100644 index 9a3a97db0a..0000000000 --- a/i18n/converter.py +++ /dev/null @@ -1,74 +0,0 @@ -import re -import itertools - - -class Converter(object): - """Converter is an abstract class that transforms strings. - It hides embedded tags (HTML or Python sequences) from transformation - - To implement Converter, provide implementation for inner_convert_string() - - Strategy: - 1. extract tags embedded in the string - a. use the index of each extracted tag to re-insert it later - b. replace tags in string with numbers (<0>, <1>, etc.) - c. save extracted tags in a separate list - 2. convert string - 3. re-insert the extracted tags - - """ - - # matches tags like these: - # HTML: , ,
, - # Python: %(date)s, %(name)s - tag_pattern = re.compile( - r''' - (<[^>]+>) | # - ({[^}]+}) | # {tag} - (%\([\w]+\)\w) | # %(tag)s - (&\w+;) | # &entity; - (&\#\d+;) | # Ӓ - (&\#x[0-9a-f]+;) # ꯍ - ''', - re.IGNORECASE | re.VERBOSE - ) - - def convert(self, string): - """Returns: a converted tagged string - param: string (contains html tags) - - Don't replace characters inside tags - """ - (string, tags) = self.detag_string(string) - string = self.inner_convert_string(string) - string = self.retag_string(string, tags) - return string - - def detag_string(self, string): - """Extracts tags from string. - - returns (string, list) where - string: string has tags replaced by indices (
... => <0>, <1>, <2>, etc.) - list: list of the removed tags ('
', '', '') - """ - counter = itertools.count(0) - count = lambda m: '<%s>' % counter.next() - tags = self.tag_pattern.findall(string) - tags = [''.join(tag) for tag in tags] - (new, nfound) = self.tag_pattern.subn(count, string) - if len(tags) != nfound: - raise Exception('tags dont match:' + string) - return (new, tags) - - def retag_string(self, string, tags): - """substitutes each tag back into string, into occurrences of <0>, <1> etc""" - for (i, tag) in enumerate(tags): - p = '<%s>' % i - string = re.sub(p, tag, string, 1) - return string - - # ------------------------------ - # Customize this in subclasses of Converter - - def inner_convert_string(self, string): - return string # do nothing by default diff --git a/i18n/dummy.py b/i18n/dummy.py deleted file mode 100755 index d2f42a5bc9..0000000000 --- a/i18n/dummy.py +++ /dev/null @@ -1,221 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -""" -Generate test translation files from human-readable po files. - -Dummy language is specified in configuration file (see config.py) -two letter language codes reference: -see http://www.loc.gov/standards/iso639-2/php/code_list.php - -Django will not localize in languages that django itself has not been -localized for. So we are using a well-known language (default='eo'). -Django languages are listed in django.conf.global_settings.LANGUAGES - -po files can be generated with this: -django-admin.py makemessages --all --extension html -l en - -Usage: - -$ ./dummy.py - -generates output conf/locale/$DUMMY_LOCALE/LC_MESSAGES, -where $DUMMY_LOCALE is the dummy_locale value set in the i18n config -""" -from __future__ import print_function -import re -import sys -import argparse - -import polib -from path import path - -from i18n.config import CONFIGURATION -from i18n.converter import Converter - - -class BaseDummyConverter(Converter): - """Base class for dummy converters. - - String conversion goes through a character map, then gets padded. - - """ - TABLE = {} - - def inner_convert_string(self, string): - for old, new in self.TABLE.items(): - string = string.replace(old, new) - return self.pad(string) - - def pad(self, string): - return string - - def convert_msg(self, msg): - """ - Takes one POEntry object and converts it (adds a dummy translation to it) - msg is an instance of polib.POEntry - """ - source = msg.msgid - if not source: - # don't translate empty string - return - - plural = msg.msgid_plural - if plural: - # translate singular and plural - foreign_single = self.convert(source) - foreign_plural = self.convert(plural) - plural = { - '0': self.final_newline(source, foreign_single), - '1': self.final_newline(plural, foreign_plural), - } - msg.msgstr_plural = plural - else: - foreign = self.convert(source) - msg.msgstr = self.final_newline(source, foreign) - - def final_newline(self, original, translated): - """ Returns a new translated string. - If last char of original is a newline, make sure translation - has a newline too. - """ - if original: - if original[-1] == '\n' and translated[-1] != '\n': - translated += '\n' - return translated - - -class Dummy(BaseDummyConverter): - r""" - Creates new localization properties files in a dummy language. - - Each property file is derived from the equivalent en_US file, with these - transformations applied: - - 1. Every vowel is replaced with an equivalent with extra accent marks. - - 2. Every string is padded out to +30% length to simulate verbose languages - (such as German) to see if layout and flows work properly. - - 3. Every string is terminated with a '#' character to make it easier to detect - truncation. - - Example use:: - - >>> from dummy import Dummy - >>> c = Dummy() - >>> c.convert("My name is Bond, James Bond") - u'M\xfd n\xe4m\xe9 \xefs B\xf8nd, J\xe4m\xe9s B\xf8nd \u2360\u03c3\u044f\u0454\u043c \u03b9\u03c1#' - >>> print c.convert("My name is Bond, James Bond") - Mý nämé ïs Bønd, Jämés Bønd Ⱡσяєм ιρ# - >>> print c.convert("don't convert tag ids") - døn't çønvért täg ïds Ⱡσяєм ιρѕυ# - >>> print c.convert("don't convert %(name)s tags on %(date)s") - døn't çønvért %(name)s tägs øn %(date)s Ⱡσяєм ιρѕ# - - """ - # Substitute plain characters with accented lookalikes. - # http://tlt.its.psu.edu/suggestions/international/web/codehtml.html#accent - TABLE = dict(zip( - u"AabCcEeIiOoUuYy", - u"ÀäßÇçÉéÌïÖöÛüÝý" - )) - - # The print industry's standard dummy text, in use since the 1500s - # see http://www.lipsum.com/, then fed through a "fancy-text" converter. - # The string should start with a space, so that it joins nicely with the text - # that precedes it. The Lorem contains an apostrophe since French often does, - # and translated strings get put into single-quoted strings, which then break. - LOREM = " " + " ".join( # join and split just make the string easier here. - u""" - Ⱡ'σяєм ιρѕυм ∂σłσя ѕιт αмєт, ¢σηѕє¢тєтυя α∂ιριѕι¢ιηg єłιт, ѕє∂ ∂σ єιυѕмσ∂ - тємρσя ιη¢ι∂ι∂υηт υт łαвσяє єт ∂σłσяє мαgηα αłιqυα. υт єηιм α∂ мιηιм - νєηιαм, qυιѕ ησѕтяυ∂ єχєя¢ιтαтιση υłłαм¢σ łαвσяιѕ ηιѕι υт αłιqυιρ єχ єα - ¢σммσ∂σ ¢σηѕєqυαт. ∂υιѕ αυтє ιяυяє ∂σłσя ιη яєρяєнєη∂єяιт ιη νσłυρтαтє - νєłιт єѕѕє ¢ιłłυм ∂σłσяє єυ ƒυgιαт ηυłłα ραяιαтυя. єχ¢єρтєυя ѕιηт σ¢¢αє¢αт - ¢υρι∂αтαт ηση ρяσι∂єηт, ѕυηт ιη ¢υłρα qυι σƒƒι¢ια ∂єѕєяυηт мσłłιт αηιм ι∂ - єѕт łαвσяυм. - """.split() - ) - - # To simulate more verbose languages (like German), pad the length of a string - # by a multiple of PAD_FACTOR - PAD_FACTOR = 1.33 - - def pad(self, string): - """add some lorem ipsum text to the end of string""" - size = len(string) - if size < 7: - target = size * 3 - else: - target = int(size * self.PAD_FACTOR) - pad_len = target - size - 1 - return string + self.LOREM[:pad_len] + "#" - - -class Dummy2(BaseDummyConverter): - """A second dummy converter. - - Like Dummy, but uses a different obvious but readable automatic conversion: - Strikes-through many letters, and turns lower-case letters upside-down. - - """ - TABLE = dict(zip( - u"ABCDEGHIJKLOPRTUYZabcdefghijklmnopqrstuvwxyz", - u"ȺɃȻĐɆǤĦƗɈꝀŁØⱣɌŦɄɎƵɐqɔpǝɟƃɥᴉɾʞlɯuødbɹsʇnʌʍxʎz" - )) - - -def make_dummy(filename, locale, converter): - """ - Takes a source po file, reads it, and writes out a new po file - in :param locale: containing a dummy translation. - """ - if not path(filename).exists(): - raise IOError('File does not exist: %r' % filename) - pofile = polib.pofile(filename) - for msg in pofile: - # Some strings are actually formatting strings, don't dummy-ify them, - # or dates will look like "DÀTÉ_TÌMÉ_FÖRMÀT Ⱡ'σ# EST" - if re.match(r"^[A-Z_]+_FORMAT$", msg.msgid): - continue - converter.convert_msg(msg) - - # Apply declaration for English pluralization rules so that ngettext will - # do something reasonable. - pofile.metadata['Plural-Forms'] = 'nplurals=2; plural=(n != 1);' - - new_file = new_filename(filename, locale) - new_file.parent.makedirs_p() - pofile.save(new_file) - - -def new_filename(original_filename, new_locale): - """Returns a filename derived from original_filename, using new_locale as the locale""" - f = path(original_filename) - new_file = f.parent.parent.parent / new_locale / f.parent.name / f.name - return new_file.abspath() - - -def main(verbosity=1): - """ - Generate dummy strings for all source po files. - """ - SOURCE_MSGS_DIR = CONFIGURATION.source_messages_dir - for locale, converter in zip(CONFIGURATION.dummy_locales, [Dummy(), Dummy2()]): - if verbosity: - print('Processing source language files into dummy strings, locale "{}"'.format(locale)) - for source_file in CONFIGURATION.source_messages_dir.walkfiles('*.po'): - if verbosity: - print(' ', source_file.relpath()) - make_dummy(SOURCE_MSGS_DIR.joinpath(source_file), locale, converter) - if verbosity: - print() - - -if __name__ == '__main__': - # pylint: disable=invalid-name - parser = argparse.ArgumentParser(description=__doc__) - parser.add_argument("--verbose", "-v", action="count", default=0) - args = parser.parse_args() - main(verbosity=args.verbose) diff --git a/i18n/execute.py b/i18n/execute.py deleted file mode 100644 index bc6f626fae..0000000000 --- a/i18n/execute.py +++ /dev/null @@ -1,49 +0,0 @@ -""" -Utility library file for executing shell commands -""" -import os -import subprocess -import logging - -from i18n.config import BASE_DIR - -LOG = logging.getLogger(__name__) - - -def execute(command, working_directory=BASE_DIR, stderr=subprocess.STDOUT): - """ - Executes shell command in a given working_directory. - Command is a string to pass to the shell. - Output is ignored. - """ - LOG.info("Executing in %s ...", working_directory) - LOG.info(command) - subprocess.check_call(command, cwd=working_directory, stderr=stderr, shell=True) - - -def call(command, working_directory=BASE_DIR): - """ - Executes shell command in a given working_directory. - Command is a list of strings to execute as a command line. - Returns a tuple of two strings: (stdout, stderr) - - """ - LOG.info(command) - p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=working_directory, shell=True) - out, err = p.communicate() - return (out, err) - - -def remove_file(filename, verbose=True): - """ - Attempt to delete filename. - log is boolean. If true, removal is logged. - Log a warning if file does not exist. - Logging filenames are releative to BASE_DIR to cut down on noise in output. - """ - if verbose: - LOG.info('Deleting file %s' % os.path.relpath(filename, BASE_DIR)) - if not os.path.exists(filename): - LOG.warn("File does not exist: %s" % os.path.relpath(filename, BASE_DIR)) - else: - os.remove(filename) diff --git a/i18n/extract.py b/i18n/extract.py deleted file mode 100755 index 0aeeaadff3..0000000000 --- a/i18n/extract.py +++ /dev/null @@ -1,233 +0,0 @@ -#!/usr/bin/env python - -""" -See https://edx-wiki.atlassian.net/wiki/display/ENG/PO+File+workflow - -This task extracts all English strings from all source code -and produces three human-readable files: - conf/locale/en/LC_MESSAGES/django-partial.po - conf/locale/en/LC_MESSAGES/djangojs-partial.po - conf/locale/en/LC_MESSAGES/mako.po - -This task will clobber any existing django.po file. -This is because django-admin.py makemessages hardcodes this filename -and it cannot be overridden. - -""" - -from datetime import datetime -import importlib -import os -import os.path -import logging -import sys -import argparse - -from path import path -from polib import pofile - -from i18n.config import BASE_DIR, LOCALE_DIR, CONFIGURATION -from i18n.execute import execute, remove_file -from i18n.segment import segment_pofiles - - -EDX_MARKER = "edX translation file" -LOG = logging.getLogger(__name__) -DEVNULL = open(os.devnull, 'wb') - - -def base(path1, *paths): - """Return a relative path from BASE_DIR to path1 / paths[0] / ... """ - return BASE_DIR.relpathto(path1.joinpath(*paths)) - - -def main(verbosity=1): - """ - Main entry point of script - """ - logging.basicConfig(stream=sys.stdout, level=logging.INFO) - LOCALE_DIR.parent.makedirs_p() - source_msgs_dir = CONFIGURATION.source_messages_dir - remove_file(source_msgs_dir.joinpath('django.po')) - - # Extract strings from mako templates. - verbosity_map = { - 0: "-q", - 1: "", - 2: "-v", - } - babel_verbosity = verbosity_map.get(verbosity, "") - - if verbosity: - stderr = None - else: - stderr = DEVNULL - - babel_cmd_template = 'pybabel {verbosity} extract -F {config} -c "Translators:" . -o {output}' - - babel_mako_cmd = babel_cmd_template.format( - verbosity=babel_verbosity, - config=base(LOCALE_DIR, 'babel_mako.cfg'), - output=base(CONFIGURATION.source_messages_dir, 'mako.po'), - ) - execute(babel_mako_cmd, working_directory=BASE_DIR, stderr=stderr) - - babel_underscore_cmd = babel_cmd_template.format( - verbosity=babel_verbosity, - config=base(LOCALE_DIR, 'babel_underscore.cfg'), - output=base(CONFIGURATION.source_messages_dir, 'underscore.po'), - ) - execute(babel_underscore_cmd, working_directory=BASE_DIR, stderr=stderr) - - makemessages = "django-admin.py makemessages -l en -v{}".format(verbosity) - ignores = " ".join('--ignore="{}/*"'.format(d) for d in CONFIGURATION.ignore_dirs) - if ignores: - makemessages += " " + ignores - - # Extract strings from django source files, including .py files. - make_django_cmd = makemessages + ' --extension html' - execute(make_django_cmd, working_directory=BASE_DIR, stderr=stderr) - - # Extract strings from Javascript source files. - make_djangojs_cmd = makemessages + ' -d djangojs --extension js' - execute(make_djangojs_cmd, working_directory=BASE_DIR, stderr=stderr) - - # makemessages creates 'django.po'. This filename is hardcoded. - # Rename it to django-partial.po to enable merging into django.po later. - os.rename( - source_msgs_dir.joinpath('django.po'), - source_msgs_dir.joinpath('django-partial.po') - ) - - # makemessages creates 'djangojs.po'. This filename is hardcoded. - # Rename it to djangojs-partial.po to enable merging into djangojs.po later. - os.rename( - source_msgs_dir.joinpath('djangojs.po'), - source_msgs_dir.joinpath('djangojs-partial.po') - ) - - files_to_clean = set() - - # Extract strings from third-party applications. - for app_name in CONFIGURATION.third_party: - # Import the app to find out where it is. Then use pybabel to extract - # from that directory. - app_module = importlib.import_module(app_name) - app_dir = path(app_module.__file__).dirname().dirname() - output_file = source_msgs_dir / (app_name + ".po") - files_to_clean.add(output_file) - - babel_cmd = 'pybabel {verbosity} extract -F {config} -c "Translators:" {app} -o {output}' - babel_cmd = babel_cmd.format( - verbosity=babel_verbosity, - config=LOCALE_DIR / 'babel_third_party.cfg', - app=app_name, - output=output_file, - ) - execute(babel_cmd, working_directory=app_dir, stderr=stderr) - - # Segment the generated files. - segmented_files = segment_pofiles("en") - files_to_clean.update(segmented_files) - - # Finish each file. - for filename in files_to_clean: - LOG.info('Cleaning %s' % filename) - po = pofile(source_msgs_dir.joinpath(filename)) - # replace default headers with edX headers - fix_header(po) - # replace default metadata with edX metadata - fix_metadata(po) - # remove key strings which belong in messages.po - strip_key_strings(po) - po.save() - - -def fix_header(po): - """ - Replace default headers with edX headers - """ - - # By default, django-admin.py makemessages creates this header: - # - # SOME DESCRIPTIVE TITLE. - # Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER - # This file is distributed under the same license as the PACKAGE package. - # FIRST AUTHOR , YEAR. - - po.metadata_is_fuzzy = [] # remove [u'fuzzy'] - header = po.header - fixes = ( - ('SOME DESCRIPTIVE TITLE', EDX_MARKER), - ('Translations template for PROJECT.', EDX_MARKER), - ('YEAR', str(datetime.utcnow().year)), - ('ORGANIZATION', 'edX'), - ("THE PACKAGE'S COPYRIGHT HOLDER", "EdX"), - ( - 'This file is distributed under the same license as the PROJECT project.', - 'This file is distributed under the GNU AFFERO GENERAL PUBLIC LICENSE.' - ), - ( - 'This file is distributed under the same license as the PACKAGE package.', - 'This file is distributed under the GNU AFFERO GENERAL PUBLIC LICENSE.' - ), - ('FIRST AUTHOR ', 'EdX Team '), - ) - for src, dest in fixes: - header = header.replace(src, dest) - po.header = header - - -def fix_metadata(po): - """ - Replace default metadata with edX metadata - """ - - # By default, django-admin.py makemessages creates this metadata: - # - # {u'PO-Revision-Date': u'YEAR-MO-DA HO:MI+ZONE', - # u'Language': u'', - # u'Content-Transfer-Encoding': u'8bit', - # u'Project-Id-Version': u'PACKAGE VERSION', - # u'Report-Msgid-Bugs-To': u'', - # u'Last-Translator': u'FULL NAME ', - # u'Language-Team': u'LANGUAGE ', - # u'POT-Creation-Date': u'2013-04-25 14:14-0400', - # u'Content-Type': u'text/plain; charset=UTF-8', - # u'MIME-Version': u'1.0'} - - fixes = { - 'PO-Revision-Date': datetime.utcnow(), - 'Report-Msgid-Bugs-To': 'openedx-translation@googlegroups.com', - 'Project-Id-Version': '0.1a', - 'Language': 'en', - 'Last-Translator': '', - 'Language-Team': 'openedx-translation ', - } - po.metadata.update(fixes) - - -def strip_key_strings(po): - """ - Removes all entries in PO which are key strings. - These entries should appear only in messages.po, not in any other po files. - """ - newlist = [entry for entry in po if not is_key_string(entry.msgid)] - del po[:] - po += newlist - - -def is_key_string(string): - """ - returns True if string is a key string. - Key strings begin with underscore. - """ - return len(string) > 1 and string[0] == '_' - - -if __name__ == '__main__': - # pylint: disable=invalid-name - parser = argparse.ArgumentParser(description=__doc__) - parser.add_argument('--verbose', '-v', action='count', default=0) - args = parser.parse_args() - main(verbosity=args.verbose) diff --git a/i18n/generate.py b/i18n/generate.py deleted file mode 100755 index 673ca4486a..0000000000 --- a/i18n/generate.py +++ /dev/null @@ -1,141 +0,0 @@ -#!/usr/bin/env python - -""" -See https://edx-wiki.atlassian.net/wiki/display/ENG/PO+File+workflow - -This task merges and compiles the human-readable .po files on the -local filesystem into machine-readable .mo files. This is typically -necessary as part of the build process since these .mo files are -needed by Django when serving the web app. - -The configuration file (in edx-platform/conf/locale/config.yaml) specifies which -languages to generate. - -""" - -import argparse -import logging -import os -import sys - -from polib import pofile - -from i18n.config import BASE_DIR, CONFIGURATION -from i18n.execute import execute - -LOG = logging.getLogger(__name__) -DEVNULL = open(os.devnull, "wb") - - -def merge(locale, target='django.po', sources=('django-partial.po',), fail_if_missing=True): - """ - For the given locale, merge the `sources` files to become the `target` - file. Note that the target file might also be one of the sources. - - If fail_if_missing is true, and the files to be merged are missing, - throw an Exception, otherwise return silently. - - If fail_if_missing is false, and the files to be merged are missing, - just return silently. - - """ - LOG.info('Merging {target} for locale {locale}'.format(target=target, locale=locale)) - locale_directory = CONFIGURATION.get_messages_dir(locale) - try: - validate_files(locale_directory, sources) - except Exception, e: - if not fail_if_missing: - return - raise - - # merged file is merged.po - merge_cmd = 'msgcat -o merged.po ' + ' '.join(sources) - execute(merge_cmd, working_directory=locale_directory) - - # clean up redunancies in the metadata - merged_filename = locale_directory.joinpath('merged.po') - clean_pofile(merged_filename) - - # rename merged.po -> django.po (default) - target_filename = locale_directory.joinpath(target) - os.rename(merged_filename, target_filename) - - -def merge_files(locale, fail_if_missing=True): - """ - Merge all the files in `locale`, as specified in config.yaml. - """ - for target, sources in CONFIGURATION.generate_merge.items(): - merge(locale, target, sources, fail_if_missing) - - -def clean_pofile(file): - """ - Clean various aspect of a .po file. - - Fixes: - - - Removes the ,fuzzy flag on metadata. - - - Removes occurrence line numbers so that the generated files don't - generate a lot of line noise when they're committed. - - - Removes any flags ending with "-format". Mac gettext seems to add - these flags, Linux does not, and we don't seem to need them. By - removing them, we reduce the unimportant differences that clutter - diffs as different developers work on the files. - - """ - # Reading in the .po file and saving it again fixes redundancies. - pomsgs = pofile(file) - # The msgcat tool marks the metadata as fuzzy, but it's ok as it is. - pomsgs.metadata_is_fuzzy = False - for entry in pomsgs: - # Remove line numbers - entry.occurrences = [(filename, None) for (filename, lineno) in entry.occurrences] - # Remove -format flags - entry.flags = [f for f in entry.flags if not f.endswith("-format")] - pomsgs.save() - - -def validate_files(dir, files_to_merge): - """ - Asserts that the given files exist. - files_to_merge is a list of file names (no directories). - dir is the directory (a path object from path.py) in which the files should appear. - raises an Exception if any of the files are not in dir. - """ - for path in files_to_merge: - pathname = dir.joinpath(path) - if not pathname.exists(): - raise Exception("I18N: Cannot generate because file not found: {0}".format(pathname)) - - -def main(strict=True, verbosity=1): - """ - Main entry point for script - """ - for locale in CONFIGURATION.translated_locales: - merge_files(locale, fail_if_missing=strict) - # Dummy text is not required. Don't raise exception if files are missing. - for locale in CONFIGURATION.dummy_locales: - merge_files(locale, fail_if_missing=False) - - compile_cmd = 'django-admin.py compilemessages -v{}'.format(verbosity) - if verbosity: - stderr = None - else: - stderr = DEVNULL - execute(compile_cmd, working_directory=BASE_DIR, stderr=stderr) - - -if __name__ == '__main__': - logging.basicConfig(stream=sys.stdout, level=logging.INFO) - - # pylint: disable=invalid-name - parser = argparse.ArgumentParser(description="Generate merged and compiled message files.") - parser.add_argument("--strict", action='store_true', help="Complain about missing files.") - parser.add_argument("--verbose", "-v", action="count", default=0) - args = parser.parse_args() - - main(strict=args.strict, verbosity=args.verbose) diff --git a/i18n/segment.py b/i18n/segment.py deleted file mode 100755 index d8466addd1..0000000000 --- a/i18n/segment.py +++ /dev/null @@ -1,148 +0,0 @@ -#!/usr/bin/env python -""" -Segment a .po file to produce smaller files based on the locations of the -messages. -""" - -import copy -import fnmatch -import logging -import sys -import argparse -import polib -import textwrap - -from i18n.config import CONFIGURATION - -LOG = logging.getLogger(__name__) - - -def segment_pofiles(locale): - """Segment all the pofiles for `locale`. - - Returns a set of filenames, all the segment files written. - - """ - files_written = set() - for filename, segments in CONFIGURATION.segment.items(): - filename = CONFIGURATION.get_messages_dir(locale) / filename - files_written.update(segment_pofile(filename, segments)) - return files_written - - -def segment_pofile(filename, segments): - """Segment a .po file using patterns in `segments`. - - The .po file at `filename` is read, and the occurrence locations of its - messages are examined. `segments` is a dictionary: the keys are segment - .po filenames, the values are lists of patterns:: - - { - 'django-studio.po': [ - 'cms/*', - 'some-other-studio-place/*', - ], - 'django-weird.po': [ - '*/weird_*.*', - ], - } - - If all a message's occurrences match the patterns for a segment, then that - message is written to the new segmented .po file. - - Any message that matches no segments, or more than one, is written back to - the original file. - - Arguments: - filename (path.path): a path object referring to the original .po file. - segments (dict): specification of the segments to create. - - Returns: - a set of path objects, all the segment files written. - - """ - reading_msg = "Reading {num} entries from {file}" - writing_msg = "Writing {num} entries to {file}" - - source_po = polib.pofile(filename) - LOG.info(reading_msg.format(file=filename, num=len(source_po))) - - # A new pofile just like the source, but with no messages. We'll put - # anything not segmented into this file. - remaining_po = copy.deepcopy(source_po) - remaining_po[:] = [] - - # Turn the segments dictionary into two structures: segment_patterns is a - # list of (pattern, segmentfile) pairs. segment_po_files is a dict mapping - # segment file names to pofile objects of their contents. - segment_po_files = {filename: remaining_po} - segment_patterns = [] - for segmentfile, patterns in segments.items(): - segment_po_files[segmentfile] = copy.deepcopy(remaining_po) - segment_patterns.extend((pat, segmentfile) for pat in patterns) - - # Examine each message in the source file. If all of its occurrences match - # a pattern for the same segment, it goes in that segment. Otherwise, it - # goes in remaining. - for msg in source_po: - msg_segments = set() - for occ_file, _ in msg.occurrences: - for pat, segment_file in segment_patterns: - if fnmatch.fnmatch(occ_file, pat): - msg_segments.add(segment_file) - break - else: - msg_segments.add(filename) - - assert msg_segments - if len(msg_segments) == 1: - # This message belongs in this segment. - segment_file = msg_segments.pop() - segment_po_files[segment_file].append(msg) - else: - # It's in more than one segment, so put it back in the main file. - remaining_po.append(msg) - - # Write out the results. - files_written = set() - for segment_file, pofile in segment_po_files.items(): - out_file = filename.dirname() / segment_file - if len(pofile) == 0: - LOG.error("No messages to write to {file}, did you run segment twice?".format(file=out_file)) - else: - LOG.info(writing_msg.format(file=out_file, num=len(pofile))) - pofile.save(out_file) - files_written.add(out_file) - - return files_written - - -def main(locales=None, verbosity=1): # pylint: disable=unused-argument - """ - Main entry point of script - """ - # This is used as a tool only to segment translation files when adding a - # new segment. In the regular workflow, the work is done by the extract - # phase calling the functions above. - locales = locales or [] - for locale in locales: - segment_pofiles(locale) - - -if __name__ == "__main__": - logging.basicConfig(stream=sys.stdout, level=logging.INFO) - - # pylint: disable=invalid-name - description = textwrap.dedent(""" - Segment the .po files in LOCALE(s) based on the segmenting rules in - config.yaml. - - Note that segmenting is *not* idempotent: it modifies the input file, so - be careful that you don't run it twice on the same file. - """.strip()) - - parser = argparse.ArgumentParser(description=description) - parser.add_argument("locale", nargs="+", help="a locale to segment") - parser.add_argument("--verbose", "-v", action="count", default=0) - args = parser.parse_args() - main(locales=args.locale, verbosity=args.verbose) diff --git a/i18n/tests/__init__.py b/i18n/tests/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/i18n/tests/data/django_after.po b/i18n/tests/data/django_after.po deleted file mode 100644 index f1d7ae6425..0000000000 --- a/i18n/tests/data/django_after.po +++ /dev/null @@ -1,37 +0,0 @@ -# This is test data. -# -msgid "" -msgstr "" -"Project-Id-Version: 0.1a\n" -"Report-Msgid-Bugs-To: openedx-translation@googlegroups.com\n" -"POT-Creation-Date: 2014-01-22 15:35-0500\n" -"PO-Revision-Date: 2014-01-22 20:35:52.096456\n" -"Last-Translator: \n" -"Language-Team: openedx-translation \n" -"MIME-Version: 1.0\n" -"Content-Type: text/plain; charset=UTF-8\n" -"Content-Transfer-Encoding: 8bit\n" -"Language: en\n" - -#: cms/djangoapps/contentstore/views/tabs.py:39 -#: lms/djangoapps/instructor/views/instructor_dashboard.py:111 -msgid "Course Info" -msgstr "stuff about the course" - -#: common/djangoapps/course_modes/models.py:43 -msgid "Honor Code Certificate" -msgstr "your paper" - -#: common/djangoapps/course_modes/views.py:81 -#: common/djangoapps/student/views.py:478 -msgid "Enrollment is closed" -msgstr "no way, dude" - -#: common/static/js/vendor/mathjax-MathJax-c9db6ac/docs/source/mjtheme/layout.html:129 -#: lms/templates/wiki/plugins/attachments/index.html:40 -msgid "Search" -msgstr "find it!" - -#: lms/djangoapps/courseware/features/video.py:111 -msgid "ERROR: No playable video sources found!" -msgstr "try youtube, dude!" diff --git a/i18n/tests/data/django_before.po b/i18n/tests/data/django_before.po deleted file mode 100644 index 9e508547ad..0000000000 --- a/i18n/tests/data/django_before.po +++ /dev/null @@ -1,52 +0,0 @@ -# This is test data. -# -msgid "" -msgstr "" -"Project-Id-Version: 0.1a\n" -"Report-Msgid-Bugs-To: openedx-translation@googlegroups.com\n" -"POT-Creation-Date: 2014-01-22 15:35-0500\n" -"PO-Revision-Date: 2014-01-22 20:35:52.096456\n" -"Last-Translator: \n" -"Language-Team: openedx-translation \n" -"MIME-Version: 1.0\n" -"Content-Type: text/plain; charset=UTF-8\n" -"Content-Transfer-Encoding: 8bit\n" -"Language: en\n" - -#: cms/djangoapps/contentstore/views/tabs.py:39 -#: lms/djangoapps/instructor/views/instructor_dashboard.py:111 -msgid "Course Info" -msgstr "stuff about the course" - -#: common/djangoapps/course_modes/models.py:43 -msgid "Honor Code Certificate" -msgstr "your paper" - -#: common/djangoapps/course_modes/views.py:81 -#: common/djangoapps/student/views.py:478 -msgid "Enrollment is closed" -msgstr "no way, dude" - -#: cms/djangoapps/contentstore/views/course.py:237 -msgid "" -"There is already a course defined with the same organization, course number," -" and course run. Please change either organization or course number to be " -"unique." -msgstr "org/course/run, wtf??" - -#: cms/djangoapps/contentstore/views/course.py:243 -#: cms/djangoapps/contentstore/views/course.py:247 -#: other_cms/djangoapps/contentstore/views/course.py:269 -#: cms/djangoapps/contentstore/views/course.py:272 -msgid "" -"Please change either the organization or course number so that it is unique." -msgstr "pick again!" - -#: common/static/js/vendor/mathjax-MathJax-c9db6ac/docs/source/mjtheme/layout.html:129 -#: lms/templates/wiki/plugins/attachments/index.html:40 -msgid "Search" -msgstr "find it!" - -#: lms/djangoapps/courseware/features/video.py:111 -msgid "ERROR: No playable video sources found!" -msgstr "try youtube, dude!" diff --git a/i18n/tests/data/studio.po b/i18n/tests/data/studio.po deleted file mode 100644 index 33fabc380c..0000000000 --- a/i18n/tests/data/studio.po +++ /dev/null @@ -1,29 +0,0 @@ -# This is test data. -# -msgid "" -msgstr "" -"Project-Id-Version: 0.1a\n" -"Report-Msgid-Bugs-To: openedx-translation@googlegroups.com\n" -"POT-Creation-Date: 2014-01-22 15:35-0500\n" -"PO-Revision-Date: 2014-01-22 20:35:52.096456\n" -"Last-Translator: \n" -"Language-Team: openedx-translation \n" -"MIME-Version: 1.0\n" -"Content-Type: text/plain; charset=UTF-8\n" -"Content-Transfer-Encoding: 8bit\n" -"Language: en\n" - -#: cms/djangoapps/contentstore/views/course.py:237 -msgid "" -"There is already a course defined with the same organization, course number," -" and course run. Please change either organization or course number to be " -"unique." -msgstr "org/course/run, wtf??" - -#: cms/djangoapps/contentstore/views/course.py:243 -#: cms/djangoapps/contentstore/views/course.py:247 -#: other_cms/djangoapps/contentstore/views/course.py:269 -#: cms/djangoapps/contentstore/views/course.py:272 -msgid "" -"Please change either the organization or course number so that it is unique." -msgstr "pick again!" diff --git a/i18n/tests/test_compiled_messages.py b/i18n/tests/test_compiled_messages.py deleted file mode 100644 index bac8089a83..0000000000 --- a/i18n/tests/test_compiled_messages.py +++ /dev/null @@ -1,58 +0,0 @@ -""" -Test that the compiled .mo files match the translations in the -uncompiled .po files. - -This is required because we are checking in the .mo files into -the repo, but compiling them is a manual process. We want to make -sure that we find out if someone forgets the compilation step. -""" - -import ddt -import polib -from unittest import TestCase - -from i18n.config import CONFIGURATION, LOCALE_DIR - -@ddt.ddt -class TestCompiledMessages(TestCase): - """ - Test that mo files match their source po files - """ - - PO_FILES = ['django.po', 'djangojs.po'] - - @ddt.data(*CONFIGURATION.translated_locales) - def test_translated_messages(self, locale): - message_dir = LOCALE_DIR / locale / 'LC_MESSAGES' - for pofile_name in self.PO_FILES: - pofile_path = message_dir / pofile_name - pofile = polib.pofile(pofile_path) - mofile = polib.mofile(pofile_path.stripext() + '.mo') - - po_entries = {entry.msgid: entry for entry in pofile.translated_entries()} - mo_entries = {entry.msgid: entry for entry in mofile.translated_entries()} - - # Check that there are no entries in po that aren't in mo, and vice-versa - self.assertEquals(po_entries.viewkeys(), mo_entries.viewkeys()) - - for entry_id, po_entry in po_entries.iteritems(): - mo_entry = mo_entries[entry_id] - for attr in ('msgstr', 'msgid_plural', 'msgstr_plural', 'msgctxt', 'obsolete', 'encoding'): - po_attr = getattr(po_entry, attr) - mo_attr = getattr(mo_entry, attr) - - # The msgstr_plural in the mo_file is keyed on ints, but in the po_file it's - # keyed on strings. This normalizes them. - if attr == 'msgstr_plural': - po_attr = {int(key): val for (key, val) in po_attr.items()} - - self.assertEquals( - po_attr, - mo_attr, - "When comparing {} for entry {!r}, {!r} from the .po file doesn't match {!r} from the .mo file".format( - attr, - entry_id, - po_attr, - mo_attr, - ) - ) diff --git a/i18n/tests/test_config.py b/i18n/tests/test_config.py deleted file mode 100644 index c05694faef..0000000000 --- a/i18n/tests/test_config.py +++ /dev/null @@ -1,33 +0,0 @@ -import os -from unittest import TestCase - -from i18n.config import Configuration, LOCALE_DIR, CONFIGURATION - -class TestConfiguration(TestCase): - """ - Tests functionality of i18n/config.py - """ - - def test_config(self): - config_filename = os.path.normpath(os.path.join(LOCALE_DIR, 'config.yaml')) - config = Configuration(config_filename) - self.assertEqual(config.source_locale, 'en') - - def test_no_config(self): - config_filename = os.path.normpath(os.path.join(LOCALE_DIR, 'no_such_file')) - with self.assertRaises(Exception): - Configuration(config_filename) - - def test_valid_configuration(self): - """ - Make sure we have a valid configuration file, - and that it contains an 'en' locale. - Also check values of dummy_locale and source_locale. - """ - self.assertIsNotNone(CONFIGURATION) - locales = CONFIGURATION.locales - self.assertIsNotNone(locales) - self.assertIsInstance(locales, list) - self.assertIn('en', locales) - self.assertEqual('eo', CONFIGURATION.dummy_locales[0]) - self.assertEqual('en', CONFIGURATION.source_locale) diff --git a/i18n/tests/test_converter.py b/i18n/tests/test_converter.py deleted file mode 100644 index 6b9c83b2ca..0000000000 --- a/i18n/tests/test_converter.py +++ /dev/null @@ -1,61 +0,0 @@ -"""Tests of i18n/converter.py""" - -from unittest import TestCase - -import ddt - -from i18n import converter - -class UpcaseConverter(converter.Converter): - """ - Converts a string to uppercase. Just used for testing. - """ - def inner_convert_string(self, string): - return string.upper() - - -@ddt.ddt -class TestConverter(TestCase): - """ - Tests functionality of i18n/converter.py - """ - - @ddt.data( - # no tags - ('big bad wolf', - 'BIG BAD WOLF'), - # one html tag - ('big bad wolf', - 'BIG BAD WOLF'), - # two html tags - ('big bad gray wolf', - 'BIG BAD GRAY WOLF'), - # html tags with attributes - ('bar baz', - 'BAR BAZ'), - ("bar baz", - "BAR BAZ"), - # one python tag - ('big %(adjective)s wolf', - 'BIG %(adjective)s WOLF'), - # two python tags - ('big %(adjective)s gray %(noun)s', - 'BIG %(adjective)s GRAY %(noun)s'), - # both kinds of tags - ('big %(adjective)s %(noun)s', - 'BIG %(adjective)s %(noun)s'), - # .format-style tags - ('The {0} barn is {1!r}.', - 'THE {0} BARN IS {1!r}.'), - # HTML entities - ('© 2013 edX,  ', - '© 2013 EDX,  '), - ) - def test_converter(self, data): - """ - Tests with a simple converter (converts strings to uppercase). - Assert that embedded HTML and python tags are not converted. - """ - source, expected = data - result = UpcaseConverter().convert(source) - self.assertEquals(result, expected) diff --git a/i18n/tests/test_dummy.py b/i18n/tests/test_dummy.py deleted file mode 100644 index 7f3e8ca07c..0000000000 --- a/i18n/tests/test_dummy.py +++ /dev/null @@ -1,69 +0,0 @@ -# -*- coding: utf-8 -*- -"""Tests of i18n/dummy.py""" - -from unittest import TestCase - -import ddt -from polib import POEntry - -from i18n import dummy - - -@ddt.ddt -class TestDummy(TestCase): - """ - Tests functionality of i18n/dummy.py - """ - - def setUp(self): - self.converter = dummy.Dummy() - - def assertUnicodeEquals(self, str1, str2): - """Just like assertEquals, but doesn't put Unicode into the fail message. - - Either nose, or rake, or something, deals very badly with unusual - Unicode characters in the assertions, so we use repr here to keep - things safe. - - """ - self.assertEquals( - str1, str2, - "Mismatch: %r != %r" % (str1, str2), - ) - - @ddt.data( - (u"hello my name is Bond, James Bond", - u"héllö mý nämé ïs Bönd, Jämés Bönd Ⱡ'σяєм ι#"), - - (u"don't convert tag ids", - u"dön't çönvért täg ïds Ⱡ'σяєм#"), - - (u"don't convert %(name)s tags on %(date)s", - u"dön't çönvért %(name)s tägs ön %(date)s Ⱡ'σяєм #"), - ) - def test_dummy(self, data): - """ - Tests with a dummy converter (adds spurious accents to strings). - Assert that embedded HTML and python tags are not converted. - """ - source, expected = data - result = self.converter.convert(source) - self.assertUnicodeEquals(result, expected) - - def test_singular(self): - entry = POEntry() - entry.msgid = "A lovely day for a cup of tea." - expected = u"À lövélý däý för ä çüp öf téä. Ⱡ'σяєм #" - self.converter.convert_msg(entry) - self.assertUnicodeEquals(entry.msgstr, expected) - - def test_plural(self): - entry = POEntry() - entry.msgid = "A lovely day for a cup of tea." - entry.msgid_plural = "A lovely day for some cups of tea." - expected_s = u"À lövélý däý för ä çüp öf téä. Ⱡ'σяєм #" - expected_p = u"À lövélý däý för sömé çüps öf téä. Ⱡ'σяєм ιρ#" - self.converter.convert_msg(entry) - result = entry.msgstr_plural - self.assertUnicodeEquals(result['0'], expected_s) - self.assertUnicodeEquals(result['1'], expected_p) diff --git a/i18n/tests/test_extract.py b/i18n/tests/test_extract.py deleted file mode 100644 index 4293fbe661..0000000000 --- a/i18n/tests/test_extract.py +++ /dev/null @@ -1,91 +0,0 @@ -from datetime import datetime, timedelta -import os -from unittest import TestCase - -from nose.plugins.skip import SkipTest -import polib -from pytz import UTC - -from i18n import extract -from i18n.config import CONFIGURATION - -# Make sure setup runs only once -SETUP_HAS_RUN = False - - -class TestExtract(TestCase): - """ - Tests functionality of i18n/extract.py - """ - generated_files = ('django-partial.po', 'djangojs-partial.po', 'mako.po') - - def setUp(self): - # Skip this test because it takes too long (>1 minute) - # TODO: figure out how to declare a "long-running" test suite - # and add this test to it. - raise SkipTest() - - global SETUP_HAS_RUN - - # Subtract 1 second to help comparisons with file-modify time succeed, - # since os.path.getmtime() is not millisecond-accurate - self.start_time = datetime.now(UTC) - timedelta(seconds=1) - super(TestExtract, self).setUp() - if not SETUP_HAS_RUN: - # Run extraction script. Warning, this takes 1 minute or more - extract.main(verbosity=0) - SETUP_HAS_RUN = True - - def get_files(self): - """ - This is a generator. - Returns the fully expanded filenames for all extracted files - Fails assertion if one of the files doesn't exist. - """ - for filename in self.generated_files: - path = os.path.join(CONFIGURATION.source_messages_dir, filename) - exists = os.path.exists(path) - self.assertTrue(exists, msg='Missing file: %s' % filename) - if exists: - yield path - - def test_files(self): - """ - Asserts that each auto-generated file has been modified since 'extract' was launched. - Intended to show that the file has been touched by 'extract'. - """ - - for path in self.get_files(): - self.assertTrue(datetime.fromtimestamp(os.path.getmtime(path)) > self.start_time, - msg='File not recently modified: %s' % os.path.basename(path)) - - def test_is_keystring(self): - """ - Verifies is_keystring predicate - """ - entry1 = polib.POEntry() - entry2 = polib.POEntry() - entry1.msgid = "_.lms.admin.warning.keystring" - entry2.msgid = "This is not a keystring" - self.assertTrue(extract.is_key_string(entry1.msgid)) - self.assertFalse(extract.is_key_string(entry2.msgid)) - - def test_headers(self): - """Verify all headers have been modified""" - for path in self.get_files(): - po = polib.pofile(path) - header = po.header - self.assertEqual( - header.find('edX translation file'), - 0, - msg='Missing header in %s:\n"%s"' % (os.path.basename(path), header) - ) - - def test_metadata(self): - """Verify all metadata has been modified""" - for path in self.get_files(): - po = polib.pofile(path) - metadata = po.metadata - value = metadata['Report-Msgid-Bugs-To'] - expected = 'openedx-translation@googlegroups.com' - self.assertEquals(expected, value) diff --git a/i18n/tests/test_generate.py b/i18n/tests/test_generate.py deleted file mode 100644 index ba01e752c5..0000000000 --- a/i18n/tests/test_generate.py +++ /dev/null @@ -1,96 +0,0 @@ -from datetime import datetime, timedelta -import os -import sys -import string -import random -import re - -from unittest import TestCase -from mock import patch -from polib import pofile -from pytz import UTC - -from i18n import extract -from i18n import generate -from i18n import dummy -from i18n.config import CONFIGURATION - - -class TestGenerate(TestCase): - """ - Tests functionality of i18n/generate.py - """ - generated_files = ('django-partial.po', 'djangojs-partial.po', 'mako.po') - - @classmethod - def setUpClass(cls): - sys.stderr.write( - "\nExtracting i18n strings and generating dummy translations; " - "this may take a few minutes\n" - ) - sys.stderr.flush() - extract.main(verbosity=0) - dummy.main(verbosity=0) - - def setUp(self): - # Subtract 1 second to help comparisons with file-modify time succeed, - # since os.path.getmtime() is not millisecond-accurate - self.start_time = datetime.now(UTC) - timedelta(seconds=1) - - def test_merge(self): - """ - Tests merge script on English source files. - """ - filename = os.path.join(CONFIGURATION.source_messages_dir, random_name()) - generate.merge(CONFIGURATION.source_locale, target=filename) - self.assertTrue(os.path.exists(filename)) - os.remove(filename) - - # Patch dummy_locales to not have esperanto present - @patch.object(CONFIGURATION, 'dummy_locales', ['fake2']) - def test_main(self): - """ - Runs generate.main() which should merge source files, - then compile all sources in all configured languages. - Validates output by checking all .mo files in all configured languages. - .mo files should exist, and be recently created (modified - after start of test suite) - """ - generate.main(verbosity=0, strict=False) - for locale in CONFIGURATION.translated_locales: - for filename in ('django', 'djangojs'): - mofile = filename+'.mo' - path = os.path.join(CONFIGURATION.get_messages_dir(locale), mofile) - exists = os.path.exists(path) - self.assertTrue(exists, msg='Missing file in locale %s: %s' % (locale, mofile)) - self.assertTrue(datetime.fromtimestamp(os.path.getmtime(path), UTC) >= self.start_time, - msg='File not recently modified: %s' % path) - # Segmenting means that the merge headers don't work they way they - # used to, so don't make this check for now. I'm not sure if we'll - # get the merge header back eventually, or delete this code eventually. - # self.assert_merge_headers(locale) - - def assert_merge_headers(self, locale): - """ - This is invoked by test_main to ensure that it runs after - calling generate.main(). - - There should be exactly three merge comment headers - in our merged .po file. This counts them to be sure. - A merge comment looks like this: - # #-#-#-#-# django-partial.po (0.1a) #-#-#-#-# - - """ - path = os.path.join(CONFIGURATION.get_messages_dir(locale), 'django.po') - po = pofile(path) - pattern = re.compile('^#-#-#-#-#', re.M) - match = pattern.findall(po.header) - self.assertEqual(len(match), 3, - msg="Found %s (should be 3) merge comments in the header for %s" % \ - (len(match), path)) - - -def random_name(size=6): - """Returns random filename as string, like test-4BZ81W""" - chars = string.ascii_uppercase + string.digits - return 'test-' + ''.join(random.choice(chars) for x in range(size)) diff --git a/i18n/tests/test_segment.py b/i18n/tests/test_segment.py deleted file mode 100644 index 079a74ff05..0000000000 --- a/i18n/tests/test_segment.py +++ /dev/null @@ -1,58 +0,0 @@ -"""Test i18n/segment.py""" - -import os.path -import shutil -import unittest - -from path import path -import polib - -from i18n.segment import segment_pofile - - -HERE = path(__file__).dirname() -TEST_DATA = HERE / "data" -WORK = HERE / "work" - - -class SegmentTest(unittest.TestCase): - """Test segment_pofile.""" - - def setUp(self): - if not os.path.exists(WORK): - os.mkdir(WORK) - self.addCleanup(shutil.rmtree, WORK) - - def assert_pofile_same(self, pofile1, pofile2): - """The paths `p1` and `p2` should be identical pofiles.""" - po1 = polib.pofile(pofile1) - po2 = polib.pofile(pofile2) - self.assertEqual(po1, po2) - - def test_sample_data(self): - work_file = WORK / "django.po" - shutil.copyfile(TEST_DATA / "django_before.po", work_file) - original_pofile = polib.pofile(work_file) - - written = segment_pofile( - work_file, - { - 'studio.po': [ - 'cms/*', - 'other_cms/*', - ], - } - ) - - self.assertEqual(written, set([WORK / "django.po", WORK / "studio.po"])) - - pofiles = [polib.pofile(f) for f in written] - after_entries = sum(len(pofile) for pofile in pofiles) - self.assertEqual(len(original_pofile), after_entries) - - original_ids = set(m.msgid for m in original_pofile) - after_ids = set(m.msgid for pofile in pofiles for m in pofile) - self.assertEqual(original_ids, after_ids) - - self.assert_pofile_same(WORK / "django.po", TEST_DATA / "django_after.po") - self.assert_pofile_same(WORK / "studio.po", TEST_DATA / "studio.po") diff --git a/i18n/transifex.py b/i18n/transifex.py deleted file mode 100755 index dc30a645bc..0000000000 --- a/i18n/transifex.py +++ /dev/null @@ -1,87 +0,0 @@ -#!/usr/bin/env python -from __future__ import print_function -import sys -from polib import pofile -import argparse - -from i18n.config import CONFIGURATION -from i18n.execute import execute -from i18n.extract import EDX_MARKER - -TRANSIFEX_HEADER = u'edX community translations have been downloaded from {}' -TRANSIFEX_URL = 'https://www.transifex.com/projects/p/edx-platform/' - - -def push(): - execute('tx push -s') - - -def pull(): - print("Pulling languages from transifex...") - # Pull translations from all languages where there is - # at least 10% reviewed translations - execute('tx pull --mode=reviewed --all') - clean_translated_locales() - - -def clean_translated_locales(): - """ - Strips out the warning from all translated po files - about being an English source file. - """ - for locale in CONFIGURATION.translated_locales: - clean_locale(locale) - - -def clean_locale(locale): - """ - Strips out the warning from all of a locale's translated po files - about being an English source file. - Iterates over machine-generated files. - """ - dirname = CONFIGURATION.get_messages_dir(locale) - for filename in ('django-partial.po', 'djangojs-partial.po', 'mako.po'): - clean_file(dirname.joinpath(filename)) - - -def clean_file(filename): - """ - Strips out the warning from a translated po file about being an English source file. - Replaces warning with a note about coming from Transifex. - """ - try: - po = pofile(filename) - except Exception as exc: - # An exception can occur when a language is deleted from Transifex. - # Don't totally fail here. - print("Encountered error {} with filename {} - language project may no longer exist on Transifex".format(exc, filename)) - return - if po.header.find(EDX_MARKER) != -1: - new_header = get_new_header(po) - new = po.header.replace(EDX_MARKER, new_header) - po.header = new - po.save() - - -def get_new_header(po): - team = po.metadata.get('Language-Team', None) - if not team: - return TRANSIFEX_HEADER.format(TRANSIFEX_URL) - else: - return TRANSIFEX_HEADER.format(team) - - -if __name__ == '__main__': - # pylint: disable=invalid-name - parser = argparse.ArgumentParser() - parser.add_argument("command", help="push or pull") - parser.add_argument("--verbose", "-v") - args = parser.parse_args() - # pylint: enable=invalid-name - - if args.command == "push": - push() - elif args.command == "pull": - pull() - else: - raise Exception("unknown command ({cmd})".format(cmd=args.command)) diff --git a/i18n/validate.py b/i18n/validate.py deleted file mode 100644 index 48a4015822..0000000000 --- a/i18n/validate.py +++ /dev/null @@ -1,218 +0,0 @@ -"""Tests that validate .po files.""" - -import argparse -import codecs -import logging -import os -import sys -import textwrap - -import polib - -from i18n.config import LOCALE_DIR -from i18n.execute import call -from i18n.converter import Converter - - -log = logging.getLogger(__name__) - - -def validate_po_files(root, report_empty=False): - """ - Validate all of the po files found in the root directory. - """ - - for dirpath, __, filenames in os.walk(root): - for name in filenames: - __, ext = os.path.splitext(name) - if ext.lower() == '.po': - filename = os.path.join(dirpath, name) - # First validate the format of this file - msgfmt_check_po_file(filename) - # Now, check that the translated strings are valid, and optionally check for empty translations - check_messages(filename, report_empty) - - -def msgfmt_check_po_file(filename): - """ - Call GNU msgfmt -c on each .po file to validate its format. - Any errors caught by msgfmt are logged to log. - """ - # Use relative paths to make output less noisy. - rfile = os.path.relpath(filename, LOCALE_DIR) - out, err = call('msgfmt -c {}'.format(rfile), working_directory=LOCALE_DIR) - if err != '': - log.info('\n' + out) - log.warn('\n' + err) - - -def tags_in_string(msg): - """ - Return the set of tags in a message string. - - Tags includes HTML tags, data placeholders, etc. - - Skips tags that might change due to translations: HTML entities, , - and so on. - - """ - def is_linguistic_tag(tag): - """Is this tag one that can change with the language?""" - if tag.startswith("&"): - return True - if any(x in tag for x in ["", ""]): - return True - return False - - __, tags = Converter().detag_string(msg) - return set(t for t in tags if not is_linguistic_tag(t)) - - -def astral(msg): - """Does `msg` have characters outside the Basic Multilingual Plane?""" - return any(ord(c) > 0xFFFF for c in msg) - - -def check_messages(filename, report_empty=False): - """ - Checks messages in various ways: - - Translations must have the same slots as the English. Messages can't have astral - characters in them. - - If report_empty is True, will also report empty translation strings. - - """ - # Don't check English files. - if "/locale/en/" in filename: - return - - # problems will be a list of tuples. Each is a description, and a msgid, - # and then zero or more translations. - problems = [] - pomsgs = polib.pofile(filename) - for msg in pomsgs: - # Check for characters Javascript can't support. - # https://code.djangoproject.com/ticket/21725 - if astral(msg.msgstr): - problems.append(("Non-BMP char", msg.msgid, msg.msgstr)) - - if msg.msgid_plural: - # Plurals: two strings in, N strings out. - source = msg.msgid + " | " + msg.msgid_plural - translation = " | ".join(v for k, v in sorted(msg.msgstr_plural.items())) - empty = any(not t.strip() for t in msg.msgstr_plural.values()) - else: - # Singular: just one string in and one string out. - source = msg.msgid - translation = msg.msgstr - empty = not msg.msgstr.strip() - - if empty: - if report_empty: - problems.append(("Empty translation", source)) - else: - id_tags = tags_in_string(source) - tx_tags = tags_in_string(translation) - - # Check if tags don't match - if id_tags != tx_tags: - id_has = u", ".join(u'"{}"'.format(t) for t in id_tags - tx_tags) - tx_has = u", ".join(u'"{}"'.format(t) for t in tx_tags - id_tags) - if id_has and tx_has: - diff = u"{} vs {}".format(id_has, tx_has) - elif id_has: - diff = u"{} missing".format(id_has) - else: - diff = u"{} added".format(tx_has) - problems.append(( - "Different tags in source and translation", - source, - translation, - diff - )) - - if problems: - problem_file = filename.replace(".po", ".prob") - id_filler = textwrap.TextWrapper(width=79, initial_indent=" msgid: ", subsequent_indent=" " * 9) - tx_filler = textwrap.TextWrapper(width=79, initial_indent=" -----> ", subsequent_indent=" " * 9) - with codecs.open(problem_file, "w", encoding="utf8") as prob_file: - for problem in problems: - desc, msgid = problem[:2] - prob_file.write(u"{}\n{}\n".format(desc, id_filler.fill(msgid))) - for translation in problem[2:]: - prob_file.write(u"{}\n".format(tx_filler.fill(translation))) - prob_file.write(u"\n") - - log.error(" {0} problems in {1}, details in .prob file".format(len(problems), filename)) - else: - log.info(" No problems found in {0}".format(filename)) - - -def get_parser(): - """ - Returns an argument parser for this script. - """ - parser = argparse.ArgumentParser(description=( # pylint: disable=redefined-outer-name - "Automatically finds translation errors in all edx-platform *.po files, " - "for all languages, unless one or more language(s) is specified to check." - )) - - parser.add_argument( - '-l', '--language', - type=str, - nargs='*', - help="Specify one or more specific language code(s) to check (eg 'ko_KR')." - ) - - parser.add_argument( - '-e', '--empty', - action='store_true', - help="Includes empty translation strings in .prob files." - ) - - parser.add_argument( - '-v', '--verbose', - action='count', default=0, - help="Turns on info-level logging." - ) - - return parser - - -def main(languages=None, empty=False, verbosity=1): # pylint: disable=unused-argument - """ - Main entry point for script - """ - languages = languages or [] - - if not languages: - root = LOCALE_DIR - validate_po_files(root, empty) - return - - # languages will be a list of language codes; test each language. - for language in languages: - root = LOCALE_DIR / language - # Assert that a directory for this language code exists on the system - if not root.isdir(): - log.error(" {0} is not a valid directory.\nSkipping language '{1}'".format(root, language)) - continue - # If we found the language code's directory, validate the files. - validate_po_files(root, empty) - - -if __name__ == '__main__': - # pylint: disable=invalid-name - parser = get_parser() - args = parser.parse_args() - if args.verbose: - log_level = logging.INFO - else: - log_level = logging.WARNING - logging.basicConfig(stream=sys.stdout, level=log_level) - # pylint: enable=invalid-name - - print("Validating languages...") - main(languages=args.language, empty=args.empty, verbosity=args.verbose) - print("Finished validating languages") diff --git a/requirements/edx/github.txt b/requirements/edx/github.txt index aba6a1b234..f7e0716a3d 100644 --- a/requirements/edx/github.txt +++ b/requirements/edx/github.txt @@ -28,4 +28,5 @@ -e git+https://github.com/edx/acid-block.git@459aff7b63db8f2c5decd1755706c1a64fb4ebb1#egg=acid-xblock -e git+https://github.com/edx/edx-ora2.git@release-2014-06-13T11.52#egg=edx-ora2 -e git+https://github.com/edx/opaque-keys.git@5929789900b3d0a354ce7274bde74edfd0430f03#egg=opaque-keys -git+https://github.com/edx/ease.git@a990b25ed4238acb1b15ee6f027465db3a10960e#egg=ease +-e git+https://github.com/edx/i18n-tools.git@c186d9d877773734908e49ccc5c01407e6ad8199#egg=i18n-tools +-e git+https://github.com/edx/ease.git@a990b25ed4238acb1b15ee6f027465db3a10960e#egg=ease diff --git a/setup.py b/setup.py index 0fd2f08840..cdd8f74211 100644 --- a/setup.py +++ b/setup.py @@ -10,6 +10,5 @@ setup( packages=[ "lms", "cms", - "i18n", ], )