From 6f103488360dc2e7134849817770f1971efe38d2 Mon Sep 17 00:00:00 2001 From: Steve Strassmann Date: Tue, 23 Apr 2013 10:07:51 -0400 Subject: [PATCH] addressed comments from pull request --- .../contentstore/tests/test_i18n.py | 28 +++- cms/envs/common.py | 2 + cms/static/js/base.js | 11 +- i18n/converter.py | 79 +++++----- i18n/dummy.py | 136 +++++------------ i18n/make_dummy.py | 23 +-- i18n/pofile.py | 143 ------------------ i18n/update.py | 13 +- 8 files changed, 131 insertions(+), 304 deletions(-) delete mode 100644 i18n/pofile.py diff --git a/cms/djangoapps/contentstore/tests/test_i18n.py b/cms/djangoapps/contentstore/tests/test_i18n.py index fba2da10dd..c3c0b25fc3 100644 --- a/cms/djangoapps/contentstore/tests/test_i18n.py +++ b/cms/djangoapps/contentstore/tests/test_i18n.py @@ -1,12 +1,12 @@ -# -*- coding: iso-8859-1 -*- +from unittest import skip -from django.test import TestCase from django.core.urlresolvers import reverse from django.contrib.auth.models import User from django.test.client import Client -from nose.tools import nottest -class InternationalizationTest(TestCase): +from .utils import ModuleStoreTestCase + +class InternationalizationTest(ModuleStoreTestCase): """ Tests to validate Internationalization. """ @@ -52,6 +52,22 @@ class InternationalizationTest(TestCase): status_code=200, html=True) + def test_course_explicit_english(self): + """Test viewing the index page with no courses""" + # Create a course so there is something to view + self.client = Client() + self.client.login(username=self.uname, password=self.password) + + resp = self.client.get(reverse('index'), + {}, + HTTP_ACCEPT_LANGUAGE='en' + ) + + self.assertContains(resp, + '

My Courses

', + status_code=200, + html=True) + # **** # NOTE: @@ -62,7 +78,7 @@ class InternationalizationTest(TestCase): # actual French at that time. # Test temporarily disable since it depends on creation of dummy strings - @nottest + @skip def test_course_with_accents (self): """Test viewing the index page with no courses""" # Create a course so there is something to view @@ -75,7 +91,7 @@ class InternationalizationTest(TestCase): ) TEST_STRING = u'

' \ - + u'My Çöürsés L#' \ + + u'My \xc7\xf6\xfcrs\xe9s L#' \ + u'

' self.assertContains(resp, diff --git a/cms/envs/common.py b/cms/envs/common.py index 3cf5fe15b3..614491f50d 100644 --- a/cms/envs/common.py +++ b/cms/envs/common.py @@ -128,6 +128,8 @@ MIDDLEWARE_CLASSES = ( 'django.contrib.messages.middleware.MessageMiddleware', 'track.middleware.TrackMiddleware', 'mitxmako.middleware.MakoMiddleware', + + # Detects user-requested locale from 'accept-language' header in http request 'django.middleware.locale.LocaleMiddleware', 'django.middleware.transaction.TransactionMiddleware' diff --git a/cms/static/js/base.js b/cms/static/js/base.js index fa48b1699e..4112d2bb8e 100644 --- a/cms/static/js/base.js +++ b/cms/static/js/base.js @@ -826,11 +826,14 @@ function saveSetSectionScheduleDate(e) { data: JSON.stringify({ 'id': id, 'metadata': {'start': start}}) }).success(function () { var $thisSection = $('.courseware-section[data-id="' + id + '"]'); + var format = gettext('Will Release: %(date)s at $(time)s UTC'); + var willReleaseAt = interpolate(format, [input_date, input_time], true); $thisSection.find('.section-published-date').html( - '' + gettext('Will Release:') + - ' ' + input_date + ' at ' + input_time + - ' UTC' + + '' + willReleaseAt + '' + + '' + gettext('Edit') + ''); $thisSection.find('.section-published-date').animate({ 'background-color': 'rgb(182,37,104)' diff --git a/i18n/converter.py b/i18n/converter.py index fe66ff3e74..63d8f83e00 100644 --- a/i18n/converter.py +++ b/i18n/converter.py @@ -1,53 +1,45 @@ -import re, itertools - -# Converter is an abstract class that transforms strings. -# It hides embedded tags (HTML or Python sequences) from transformation -# -# To implement Converter, provide implementation for inner_convert_string() - +import re +import itertools class Converter: + """Converter is an abstract class that transforms strings. + It hides embedded tags (HTML or Python sequences) from transformation + + To implement Converter, provide implementation for inner_convert_string() + Strategy: + 1. extract tags embedded in the string + a. use the index of each extracted tag to re-insert it later + b. replace tags in string with numbers (<0>, <1>, etc.) + c. save extracted tags in a separate list + 2. convert string + 3. re-insert the extracted tags + + """ + # matches tags like these: - # HTML: , ,
, - # Python: %(date)s, %(name)s - # - tag_pattern = re.compile(r'(<[-\w" .:?=/]*>)|({[^}]*})|(%\(.*\)\w)', re.I) + # HTML: , ,
, + # Python: %(date)s, %(name)s + tag_pattern = re.compile(r'(<[-\w" .:?=/]*>)|({[^}]*})|(%\([^)]*\)\w)', re.I) - - def convert (self, string): - if self.tag_pattern.search(string): - result = self.convert_tagged_string(string) - else: - result = self.inner_convert_string(string) - return result - - # convert_tagged_string(string): - # returns: a converted tagged string - # param: string (contains html tags) - # - # Don't replace characters inside tags - # - # Strategy: - # 1. extract tags embedded in the string - # a. use the index of each extracted tag to re-insert it later - # b. replace tags in string with numbers (<0>, <1>, etc.) - # c. save extracted tags in a separate list - # 2. convert string - # 3. re-insert the extracted tags - # - def convert_tagged_string (self, string): + def convert(self, string): + """Returns: a converted tagged string + param: string (contains html tags) + + Don't replace characters inside tags + """ (string, tags) = self.detag_string(string) string = self.inner_convert_string(string) string = self.retag_string(string, tags) return string - # extracts tags from string. - # - # returns (string, list) where - # string: string has tags replaced by indices (
... => <0>, <1>, <2>, etc.) - # list: list of the removed tags ("
", "", "") - def detag_string (self, string): + def detag_string(self, string): + """Extracts tags from string. + + returns (string, list) where + string: string has tags replaced by indices (
... => <0>, <1>, <2>, etc.) + list: list of the removed tags ('
', '', '') + """ counter = itertools.count(0) count = lambda m: '<%s>' % counter.next() tags = self.tag_pattern.findall(string) @@ -57,9 +49,8 @@ class Converter: raise Exception('tags dont match:'+string) return (new, tags) - # substitutes each tag back into string, into occurrences of <0>, <1> etc - # - def retag_string (self, string, tags): + def retag_string(self, string, tags): + """substitutes each tag back into string, into occurrences of <0>, <1> etc""" for (i, tag) in enumerate(tags): p = '<%s>' % i string = re.sub(p, tag, string, 1) @@ -69,6 +60,6 @@ class Converter: # ------------------------------ # Customize this in subclasses of Converter - def inner_convert_string (self, string): + def inner_convert_string(self, string): return string # do nothing by default diff --git a/i18n/dummy.py b/i18n/dummy.py index a94d400ba0..798ee525b5 100644 --- a/i18n/dummy.py +++ b/i18n/dummy.py @@ -1,12 +1,6 @@ -# -*- coding: iso-8859-15 -*- - from converter import Converter -# This file converts string resource files. -# Java: file has name like messages_en.properties -# Flex: file has name like locales/en_US/Labels.properties - -# Creates new localization properties files in a dummy language (saved as 'vr', Vardebedian) +# Creates new localization properties files in a dummy language # Each property file is derived from the equivalent en_US file, except # 1. Every vowel is replaced with an equivalent with extra accent marks # 2. Every string is padded out to +30% length to simulate verbose languages (e.g. German) @@ -18,19 +12,18 @@ from converter import Converter # Example use: # >>> from dummy import Dummy # >>> c = Dummy() -# >>> print c.convert("hello my name is Bond, James Bond") -# héllö my nämé ïs Bönd, Jämés Bönd Lorem i# +# >>> c.convert("hello my name is Bond, James Bond") +# u'h\xe9ll\xf6 my n\xe4m\xe9 \xefs B\xf6nd, J\xe4m\xe9s B\xf6nd Lorem i#' # -# >>> print c.convert('don\'t convert tag ids') -# dön't çönvért täg ïds Lorem ipsu# +# >>> c.convert('don\'t convert tag ids') +# u'd\xf6n\'t \xe7\xf6nv\xe9rt t\xe4g \xefds Lorem ipsu#' # -# >>> print c.convert('don\'t convert %(name)s tags on %(date)s') -# dön't çönvért %(name)s tags on %(date)s Lorem ips# +# >>> c.convert('don\'t convert %(name)s tags on %(date)s') +# u"d\xf6n't \xe7\xf6nv\xe9rt %(name)s t\xe4gs \xf6n %(date)s Lorem ips#" # Substitute plain characters with accented lookalikes. # http://tlt.its.psu.edu/suggestions/international/web/codehtml.html#accent -# print "print u'\\x%x'" % 207 TABLE = {'A': u'\xC0', 'a': u'\xE4', 'b': u'\xDF', @@ -62,23 +55,23 @@ PAD_FACTOR = 1.3 class Dummy (Converter): - ''' + """ A string converter that generates dummy strings with fake accents and lorem ipsum padding. - ''' + """ - def convert (self, string): + def convert(self, string): result = Converter.convert(self, string) return self.pad(result) - def inner_convert_string (self, string): + def inner_convert_string(self, string): for (k,v) in TABLE.items(): string = string.replace(k, v) return string - def pad (self, string): - '''add some lorem ipsum text to the end of string''' + def pad(self, string): + """add some lorem ipsum text to the end of string""" size = len(string) if size < 7: target = size*3 @@ -86,15 +79,15 @@ class Dummy (Converter): target = int(size*PAD_FACTOR) return string + self.terminate(LOREM[:(target-size)]) - def terminate (self, string): - '''replaces the final char of string with #''' + def terminate(self, string): + """replaces the final char of string with #""" return string[:-1]+'#' - def init_msgs (self, msgs): - ''' + def init_msgs(self, msgs): + """ Make sure the first msg in msgs has a plural property. msgs is list of instances of pofile.Msg - ''' + """ if len(msgs)==0: return headers = msgs[0].get_property('msgstr') @@ -105,82 +98,35 @@ class Dummy (Converter): headers.append(plural) - def convert_msg (self, msg): - ''' + def convert_msg(self, msg): + """ Takes one Msg object and converts it (adds a dummy translation to it) msg is an instance of pofile.Msg - ''' - source = msg.get_property('msgid') - if len(source)==1 and len(source[0])==0: + """ + source = msg.msgid + if len(source)==0: # don't translate empty string return - plural = msg.get_property('msgid_plural') + + plural = msg.msgid_plural if len(plural)>0: # translate singular and plural - foreign_single = self.convert(merge(source)) - foreign_plural = self.convert(merge(plural)) - msg.set_property('msgstr[0]', split(foreign_single)) - msg.set_property('msgstr[1]', split(foreign_plural)) + foreign_single = self.convert(source) + foreign_plural = self.convert(plural) + plural = {'0': self.final_newline(source, foreign_single), + '1': self.final_newline(plural, foreign_plural)} + msg.msgstr_plural = plural return else: - src_merged = merge(source) - foreign = self.convert(src_merged) - if len(source)>1: - # If last char is a newline, make sure translation - # has a newline too. - if src_merged[-2:]=='\\n': - foreign += '\\n' - msg.set_property('msgstr', split(foreign)) - - -# ---------------------------------- -# String splitting utility functions - -SPLIT_SIZE = 70 - -def merge (string_list): - '''returns a single string: concatenates string_list''' - return ''.join(string_list) - -# .po file format requires long strings to be broken -# up into several shorter (<80 char) strings. -# The first string is empty (""), which indicates -# that more are to be read on following lines. - -def split (string): - ''' - Returns string split into fragments of a given size. - If there are multiple fragments, insert "" as the first fragment. - ''' - result = [chunk for chunk in chunks(string, SPLIT_SIZE)] - if len(result)>1: - result = [''] + result - return result - -def chunks(string, size): - ''' - Generate fragments of a given size from string. Avoid breaking - the string in the middle of an escape sequence (e.g. "\n") - ''' - strlen=len(string)-1 - esc = False - last = 0 - for i,char in enumerate(string): - if not esc and char == '\\': - esc = True - continue - if esc: - esc = False - if i>=last+size-1 or i==strlen: - chunk = string[last:i+1] - last = i+1 - yield chunk - -# testing -# >>> a = "abcd\\efghijklmnopqrstuvwxyz" -# >>> SPLIT_SIZE = 5 -# >>> split(a) -# ['abcd\\e', 'fghij', 'klmno', 'pqrst', 'uvwxy', 'z'] -# >>> merge(split(a)) -# 'abcd\\efghijklmnopqrstuvwxyz' + foreign = self.convert(source) + msg.msgstr = self.final_newline(source, foreign) + def final_newline(self, original, translated): + """ Returns a new translated string. + If last char of original is a newline, make sure translation + has a newline too. + """ + if len(original)>1: + if original[-1]=='\n' and translated[-1]!='\n': + return translated + '\n' + return translated diff --git a/i18n/make_dummy.py b/i18n/make_dummy.py index 8bf9711c57..4ccfb0d5f1 100755 --- a/i18n/make_dummy.py +++ b/i18n/make_dummy.py @@ -16,7 +16,7 @@ # mitx/conf/locale/vr/LC_MESSAGES/django.po import os, sys -from pofile import PoFile +import polib from dummy import Dummy # Dummy language @@ -28,23 +28,26 @@ from dummy import Dummy OUT_LANG = 'fr' -def main (file): - ''' +def main(file): + """ Takes a source po file, reads it, and writes out a new po file containing a dummy translation. - ''' - pofile = PoFile(file) + """ + if not os.path.exists(file): + raise IOError('File does not exist: %s' % file) + pofile = polib.pofile(file) converter = Dummy() - converter.init_msgs(pofile.msgs) - for msg in pofile.msgs: + converter.init_msgs(pofile.translated_entries()) + for msg in pofile: converter.convert_msg(msg) new_file = new_filename(file, OUT_LANG) create_dir_if_necessary(new_file) - pofile.write(new_file) + pofile.save(new_file) + -def new_filename (original_filename, new_lang): - '''Returns a filename derived from original_filename, using new_lang as the locale''' +def new_filename(original_filename, new_lang): + """Returns a filename derived from original_filename, using new_lang as the locale""" orig_dir = os.path.dirname(original_filename) msgs_dir = os.path.basename(orig_dir) orig_file = os.path.basename(original_filename) diff --git a/i18n/pofile.py b/i18n/pofile.py deleted file mode 100644 index d91f76a925..0000000000 --- a/i18n/pofile.py +++ /dev/null @@ -1,143 +0,0 @@ -import re, codecs -from operator import itemgetter - -# Django stores externalized strings in .po and .mo files. -# po files are human readable and contain metadata about the strings. -# mo files are machine readable and optimized for runtime performance. - -# See https://docs.djangoproject.com/en/1.3/topics/i18n/internationalization/ -# See http://www.gnu.org/software/gettext/manual/html_node/PO-Files.html - -# Usage: -# >>> pofile = PoFile('/path/to/file') - - -class PoFile: - - # Django requires po files to be in UTF8 with no BOM (byte order marker) - # see "Mind your charset" on this page: - # https://docs.djangoproject.com/en/1.3/topics/i18n/localization/ - - ENCODING = 'utf_8' - - def __init__ (self, pathname): - self.pathname = pathname - self.parse() - - def parse (self): - with codecs.open(self.pathname, 'r', self.ENCODING) as stream: - text = stream.read() - msgs = text.split('\n\n') - self.msgs = [Msg.parse(m) for m in msgs] - return msgs - - def write (self, out_pathname=None): - if out_pathname == None: - out_pathname = self.pathname - with codecs.open(out_pathname, 'w', self.ENCODING) as stream: - for msg in self.msgs: - msg.write(stream) - -class Msg: - - # A PoFile is parsed into a list of Msg objects, each of which corresponds - # to an externalized string entry. - - # Each Msg object may contain multiple comment lines, capturing metadata - - # Each Msg has a property list (self.props) with a dict of key-values. - # Each value is a list of strings - kwords = ['msgid', 'msgstr', 'msgctxt', 'msgid_plural'] - - # Line might begin with "msgid ..." or "msgid[2] ..." - pattern = re.compile('^(\w+)(\[(\d+)\])?') - - @classmethod - def parse (cls, string): - ''' - String is a fragment of a pofile (.po) source file. - This returns a Msg object created by parsing string. - ''' - lines = string.strip().split('\n') - msg = Msg() - msg.comments = [] - msg.props = {} - last_kword = None - for line in lines: - if line[0]=='#': - msg.comments.append(line) - elif line[0]=='"' and last_kword != None: - msg.add_string(last_kword, line) - else: - match = cls.pattern.search(line) - if match: - kword = match.group(1) - last_kword = kword - if kword in cls.kwords: - if match.group(3): - key = '%s[%s]' % (kword, match.group(3)) - msg.add_string(key, line[len(key):]) - else: - msg.add_string(kword, line[len(kword):]) - return msg - - def get_property (self, kword): - '''returns value for kword. Typically returns a list of strings''' - return self.props.get(kword, []) - - def set_property (self, kword, value): - '''sets value for kword. Typically returns a list of strings''' - self.props[kword] = value - - def add_string (self, kword, line): - '''Append line to the list of values stored for the property kword''' - props = self.props - value = self.get_property(kword) - value.append(self.cleanup_string(line)) - self.set_property(kword, value) - - def cleanup_string(self, string): - string = string.strip() - if len(string)>1 and string[0]=='"' and string[-1]=='"': - return string[1:-1] - else: - return string - - def write (self, stream): - '''Write a Msg to stream''' - for comment in self.comments: - stream.write(comment) - stream.write('\n') - for (key, values) in self.sort(self.props.items()): - stream.write(key + ' ') - for value in values: - stream.write('"'+value+'"') - stream.write('\n') - stream.write('\n') - - # Preferred ordering of key output - # Always print 'msgctxt' first, then 'msgid', etc. - KEY_ORDER = ('msgctxt', 'msgid', 'msgid_plural', 'msgstr', 'msgstr[0]', 'msgstr[1]') - - def keyword_compare (self, k1, k2): - for key in self.KEY_ORDER: - if key == k1: - return -1 - if key == k2: - return 1 - return 0 - - def sort (self, plist): - '''sorts a propertylist to bring the high-priority keys to the beginning of the list''' - return sorted(plist, key=itemgetter(0), cmp=self.keyword_compare) - - - -# Testing -# -# >>> file = 'mitx/conf/locale/en/LC_MESSAGES/django.po' -# >>> file1 = 'mitx/conf/locale/en/LC_MESSAGES/django1.po' -# >>> po = PoFile(file) -# >>> po.write(file1) -# $ diff file file1 - diff --git a/i18n/update.py b/i18n/update.py index 8a865c2528..447dcf71d5 100755 --- a/i18n/update.py +++ b/i18n/update.py @@ -42,8 +42,8 @@ BABEL_OUT = MSGS_DIR + '/mako.po' # These are the shell commands invoked by main() COMMANDS = { 'babel_mako': 'pybabel extract -F %s -c "TRANSLATORS:" . -o %s' % (BABEL_CONFIG, BABEL_OUT), - 'make_django': 'django-admin.py makemessages --all --extension html -l en', - 'make_djangojs': 'django-admin.py makemessages --all -d djangojs --extension js -l en', + 'make_django': 'django-admin.py makemessages --all --ignore=src/* --extension html -l en', + 'make_djangojs': 'django-admin.py makemessages --all -d djangojs --ignore=src/* --extension js -l en', 'msgcat' : 'msgcat -o merged.po django.po %s' % BABEL_OUT, 'rename_django' : 'mv django.po django_old.po', 'rename_merged' : 'mv merged.po django.po', @@ -81,6 +81,15 @@ def main (): create_dir_if_necessary(LOCALE_DIR) log.info('Executing all commands from %s' % BASE_DIR) + remove_files = ['django.po', 'djangojs.po', 'nonesuch'] + for filename in remove_files: + path = MSGS_DIR + '/' + filename + log.info('Deleting file %s' % path) + if not os.path.exists(path): + log.warn("File does not exist: %s" % path) + else: + os.remove(path) + # Generate or update human-readable .po files from all source code. execute('babel_mako', log=log) execute('make_django', log=log)