diff --git a/conf/locale/config b/conf/locale/config index fe811ee02e..2d01e1ea43 100644 --- a/conf/locale/config +++ b/conf/locale/config @@ -1 +1 @@ -{"locales" : ["en", "fr", "de"]} +{"locales" : ["en"]} diff --git a/i18n/execute.py b/i18n/execute.py new file mode 100644 index 0000000000..3c3416b65d --- /dev/null +++ b/i18n/execute.py @@ -0,0 +1,86 @@ +import os, subprocess, logging, json + +def init_module(): + """ + Initializes module parameters + """ + global BASE_DIR, LOCALE_DIR, CONFIG_FILENAME, SOURCE_MSGS_DIR, SOURCE_LOCALE, LOG + + # BASE_DIR is the working directory to execute django-admin commands from. + # Typically this should be the 'mitx' directory. + BASE_DIR = os.path.normpath(os.path.dirname(os.path.abspath(__file__))+'/..') + + # Source language is English + SOURCE_LOCALE = 'en' + + # LOCALE_DIR contains the locale files. + # Typically this should be 'mitx/conf/locale' + LOCALE_DIR = BASE_DIR + '/conf/locale' + + # CONFIG_FILENAME contains localization configuration in json format + CONFIG_FILENAME = LOCALE_DIR + '/config' + + # SOURCE_MSGS_DIR contains the English po files. + SOURCE_MSGS_DIR = messages_dir(SOURCE_LOCALE) + + # Default logger. + LOG = get_logger() + + +def messages_dir(locale): + """ + Returns the name of the directory holding the po files for locale. + Example: mitx/conf/locale/en/LC_MESSAGES + """ + return os.path.join(LOCALE_DIR, locale, 'LC_MESSAGES') + +def get_logger(): + """Returns a default logger""" + log = logging.getLogger(__name__) + log.setLevel(logging.INFO) + log_handler = logging.StreamHandler() + log_handler.setFormatter(logging.Formatter('%(asctime)s [%(levelname)s] %(message)s')) + log.addHandler(log_handler) + return log + +# Run this after defining messages_dir and get_logger, because it depends on these. +init_module() + +def execute (command, working_directory=BASE_DIR, log=LOG): + """ + Executes shell command in a given working_directory. + Command is a string to pass to the shell. + Output is logged to log. + """ + log.info(command) + subprocess.call(command.split(' '), cwd=working_directory) + +def get_config(): + """Returns data found in config file, or returns None if file not found""" + config_path = os.path.abspath(CONFIG_FILENAME) + if not os.path.exists(config_path): + log.warn("Configuration file cannot be found: %s" % \ + os.path.relpath(config_path, BASE_DIR)) + return None + with open(config_path) as stream: + return json.load(stream) + +def create_dir_if_necessary(pathname): + dirname = os.path.dirname(pathname) + if not os.path.exists(dirname): + os.makedirs(dirname) + + +def remove_file(filename, log=LOG, verbose=True): + """ + Attempt to delete filename. + Log a warning if file does not exist. + Logging filenames are releative to BASE_DIR to cut down on noise in output. + """ + if verbose: + log.info('Deleting file %s' % os.path.relpath(filename, BASE_DIR)) + if not os.path.exists(filename): + log.warn("File does not exist: %s" % os.path.relpath(filename, BASE_DIR)) + else: + os.remove(filename) + diff --git a/i18n/extract.py b/i18n/extract.py new file mode 100755 index 0000000000..409a5344ea --- /dev/null +++ b/i18n/extract.py @@ -0,0 +1,144 @@ +#!/usr/bin/python + +""" +See https://edx-wiki.atlassian.net/wiki/display/ENG/PO+File+workflow + + This task extracts all English strings from all source code + and produces three human-readable files: + conf/locale/en/LC_MESSAGES/django-partial.po + conf/locale/en/LC_MESSAGES/djangojs.po + conf/locale/en/LC_MESSAGES/mako.po + + This task will clobber any existing django.po file. + This is because django-admin.py makemessages hardcodes this filename + and it cannot be overridden. + +""" + +import os +from datetime import datetime +from polib import pofile +from execute import execute, create_dir_if_necessary, remove_file, \ + BASE_DIR, LOCALE_DIR, SOURCE_MSGS_DIR, LOG + + +# BABEL_CONFIG contains declarations for Babel to extract strings from mako template files +# Use relpath to reduce noise in logs +BABEL_CONFIG = os.path.relpath(LOCALE_DIR + '/babel.cfg', BASE_DIR) + +# Strings from mako template files are written to BABEL_OUT +# Use relpath to reduce noise in logs +BABEL_OUT = os.path.relpath(SOURCE_MSGS_DIR + '/mako.po', BASE_DIR) + + +def main (): + create_dir_if_necessary(LOCALE_DIR) + generated_files = ('django-partial.po', 'djangojs.po', 'mako.po') + + for filename in generated_files: + remove_file(os.path.join(SOURCE_MSGS_DIR, filename)) + + # Extract strings from mako templates + babel_mako_cmd = 'pybabel extract -F %s -c "TRANSLATORS:" . -o %s' % (BABEL_CONFIG, BABEL_OUT) + + # Extract strings from django source files + make_django_cmd = 'django-admin.py makemessages -l en --ignore=src/* --ignore=i18n/* ' \ + + '--extension html' + + # Extract strings from javascript source files + make_djangojs_cmd = 'django-admin.py makemessages -l en -d djangojs --ignore=src/* ' \ + + '--ignore=i18n/* --extension js' + execute(babel_mako_cmd, working_directory=BASE_DIR) + execute(make_django_cmd, working_directory=BASE_DIR) + # makemessages creates 'django.po'. This filename is hardcoded. + # Rename it to django-partial.po to enable merging into django.po later. + os.rename(os.path.join(SOURCE_MSGS_DIR, 'django.po'), + os.path.join(SOURCE_MSGS_DIR, 'django-partial.po')) + execute(make_djangojs_cmd, working_directory=BASE_DIR) + + for filename in generated_files: + LOG.info('Cleaning %s' % filename) + po = pofile(os.path.join(SOURCE_MSGS_DIR, filename)) + # replace default headers with edX headers + fix_header(po) + # replace default metadata with edX metadata + fix_metadata(po) + # remove key strings which belong in messages.po + strip_key_strings(po) + po.save() + +# By default, django-admin.py makemessages creates this header: +""" +SOME DESCRIPTIVE TITLE. +Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER +This file is distributed under the same license as the PACKAGE package. +FIRST AUTHOR , YEAR. +""" + +def fix_header(po): + """ + Replace default headers with edX headers + """ + header = po.header + fixes = ( + ('SOME DESCRIPTIVE TITLE', 'edX translation file'), + ('Translations template for PROJECT.', 'edX translation file'), + ('YEAR', '%s' % datetime.utcnow().year), + ('ORGANIZATION', 'edX'), + ("THE PACKAGE'S COPYRIGHT HOLDER", "EdX"), + ('This file is distributed under the same license as the PROJECT project.', + 'This file is distributed under the GNU AFFERO GENERAL PUBLIC LICENSE.'), + ('This file is distributed under the same license as the PACKAGE package.', + 'This file is distributed under the GNU AFFERO GENERAL PUBLIC LICENSE.'), + ('FIRST AUTHOR ', + 'EdX Team ') + ) + for (src, dest) in fixes: + header = header.replace(src, dest) + po.header = header + +# By default, django-admin.py makemessages creates this metadata: +""" +{u'PO-Revision-Date': u'YEAR-MO-DA HO:MI+ZONE', + u'Language': u'', + u'Content-Transfer-Encoding': u'8bit', + u'Project-Id-Version': u'PACKAGE VERSION', + u'Report-Msgid-Bugs-To': u'', + u'Last-Translator': u'FULL NAME ', + u'Language-Team': u'LANGUAGE ', + u'POT-Creation-Date': u'2013-04-25 14:14-0400', + u'Content-Type': u'text/plain; charset=UTF-8', + u'MIME-Version': u'1.0'} +""" + +def fix_metadata(po): + """ + Replace default metadata with edX metadata + """ + fixes = {'PO-Revision-Date': datetime.utcnow(), + 'Report-Msgid-Bugs-To': 'translation_team@edx.org', + 'Project-Id-Version': '0.1a', + 'Language' : 'en', + 'Language-Team': 'translation team ', + } + del po.metadata['Last-Translator'] + po.metadata.update(fixes) + +def strip_key_strings(po): + """ + Removes all entries in PO which are key strings. + These entries should appear only in messages.po, not in any other po files. + """ + newlist = [entry for entry in po if not is_key_string(entry.msgid)] + del po[:] + po += newlist + +def is_key_string(string): + """ + returns True if string is a key string. + Key strings begin with underscore. + """ + return len(string)>1 and string[0]=='_' + +if __name__ == '__main__': + main() diff --git a/i18n/generate.py b/i18n/generate.py new file mode 100755 index 0000000000..f04726d55b --- /dev/null +++ b/i18n/generate.py @@ -0,0 +1,64 @@ +#!/usr/bin/python + +""" + See https://edx-wiki.atlassian.net/wiki/display/ENG/PO+File+workflow + + + This task merges and compiles the human-readable .pofiles on the + local filesystem into machine-readable .mofiles. This is typically + necessary as part of the build process since these .mofiles are + needed by Django when serving the web app. + + The configuration file (in mitx/conf/locale/config) specifies which + languages to generate. +""" + +import os +from execute import execute, get_config, messages_dir, remove_file, \ + BASE_DIR, LOG, SOURCE_LOCALE + +def merge(locale, target='django.po'): + """ + For the given locale, merge django-partial.po, messages.po, mako.po -> django.po + """ + LOG.info('Merging locale=%s' % locale) + locale_directory = messages_dir(locale) + files_to_merge = ('django-partial.po', 'messages.po', 'mako.po') + validate_files(locale_directory, files_to_merge) + + # merged file is merged.po + merge_cmd = 'msgcat -o merged.po ' + ' '.join(files_to_merge) + execute(merge_cmd, working_directory=locale_directory) + + # rename merged.po -> django.po (default) + merged_filename = os.path.join(locale_directory, 'merged.po') + django_filename = os.path.join(locale_directory, target) + os.rename(merged_filename, django_filename) # can't overwrite file on Windows + +def validate_files(dir, files_to_merge): + """ + Asserts that the given files exist. + files_to_merge is a list of file names (no directories). + dir is the directory in which the files should appear. + raises an Exception if any of the files are not in dir. + """ + for path in files_to_merge: + pathname = os.path.join(dir, path) + if not os.path.exists(pathname): + raise Exception("File not found: %s" % pathname) + +def main (): + configuration = get_config() + if configuration == None: + LOG.warn('Configuration file not found, using only English.') + locales = (SOURCE_LOCALE,) + else: + locales = configuration['locales'] + for locale in locales: + merge(locale) + + compile_cmd = 'django-admin.py compilemessages' + execute(compile_cmd, working_directory=BASE_DIR) + +if __name__ == '__main__': + main() diff --git a/i18n/test/__init__.py b/i18n/test/__init__.py index 63631ce9b8..65100a18d9 100644 --- a/i18n/test/__init__.py +++ b/i18n/test/__init__.py @@ -1 +1,2 @@ from test_extract import TestExtract +from test_generate import TestGenerate diff --git a/i18n/test/test_extract.py b/i18n/test/test_extract.py index 25a8208036..e0cbd912d0 100644 --- a/i18n/test/test_extract.py +++ b/i18n/test/test_extract.py @@ -10,10 +10,14 @@ class TestExtract(TestCase): """ Tests functionality of i18n/extract.py """ - generated_files = ('django.po', 'djangojs.po', 'mako.po') + generated_files = ('django-partial.po', 'djangojs.po', 'mako.po') def setUp(self): - self.start_time = datetime.now() + # Subtract 1 second to help comparisons with file-modify time succeed, + # since os.path.getmtime() is not millisecond-accurate + self.start_time = datetime.now() - timedelta(seconds=1) + + # Run extraction script. Warning, this takes 1 minute or more extract.main() def get_files (self): diff --git a/i18n/test/test_generate.py b/i18n/test/test_generate.py new file mode 100644 index 0000000000..295fd646d4 --- /dev/null +++ b/i18n/test/test_generate.py @@ -0,0 +1,65 @@ +import os +import string +import polib +import random +from unittest import TestCase +from datetime import datetime, timedelta + + +import generate +from execute import get_config, messages_dir, SOURCE_MSGS_DIR, SOURCE_LOCALE + +class TestGenerate(TestCase): + """ + Tests functionality of i18n/generate.py + """ + generated_files = ('django-partial.po', 'djangojs.po', 'mako.po') + + def setUp(self): + self.configuration = get_config() + + # Subtract 1 second to help comparisons with file-modify time succeed, + # since os.path.getmtime() is not millisecond-accurate + self.start_time = datetime.now() - timedelta(seconds=1) + + def test_configuration(self): + """ + Make sure we have a valid configuration file, + and that it contains an 'en' locale. + """ + self.assertIsNotNone(self.configuration) + locales = self.configuration['locales'] + self.assertIsNotNone(locales) + self.assertIsInstance(locales, list) + self.assertIn('en', locales) + + def test_merge(self): + """ + Tests merge script on English source files. + """ + filename = os.path.join(SOURCE_MSGS_DIR, random_name()) + generate.merge(SOURCE_LOCALE, target=filename) + self.assertTrue(os.path.exists(filename)) + os.remove(filename) + + def test_main(self): + """ + Runs generate.main() which should merge source files, + then compile all sources in all configured languages. + Validates output by checking all .mo files in all configured languages. + .mo files should exist, and be recently created (modified + after start of test suite) + """ + generate.main() + for locale in self.configuration['locales']: + for filename in ('django.mo', 'djangojs.mo'): + path = os.path.join(messages_dir(locale), filename) + exists = os.path.exists(path) + self.assertTrue(exists, msg='Missing file in locale %s: %s' % (locale, filename)) + self.assertTrue(datetime.fromtimestamp(os.path.getmtime(path)) >= self.start_time, + msg='File not recently modified: %s' % path) + +def random_name(size=6): + """Returns random filename as string, like test-4BZ81W""" + chars = string.ascii_uppercase + string.digits + return 'test-' + ''.join(random.choice(chars) for x in range(size))