From 6e34c668bbb323516bd93c65851886e4f14c4646 Mon Sep 17 00:00:00 2001 From: Steve Strassmann Date: Wed, 17 Apr 2013 11:30:29 -0400 Subject: [PATCH 1/8] introduce i18n --- .../contentstore/tests/test_i18n.py | 82 ++++++++ cms/envs/common.py | 5 + cms/static/js/base.js | 37 ++-- cms/templates/base.html | 1 + cms/templates/index.html | 20 +- cms/templates/widgets/footer.html | 9 +- cms/urls.py | 13 +- create-dev-env.sh | 6 +- i18n/converter.py | 74 +++++++ i18n/dummy.py | 186 ++++++++++++++++++ i18n/googleTranslate.py | 68 +++++++ i18n/make_dummy.py | 65 ++++++ i18n/pofile.py | 143 ++++++++++++++ i18n/update.py | 101 ++++++++++ lms/templates/discussion/_single_thread.html | 4 +- 15 files changed, 782 insertions(+), 32 deletions(-) create mode 100644 cms/djangoapps/contentstore/tests/test_i18n.py create mode 100644 i18n/converter.py create mode 100644 i18n/dummy.py create mode 100644 i18n/googleTranslate.py create mode 100755 i18n/make_dummy.py create mode 100644 i18n/pofile.py create mode 100755 i18n/update.py diff --git a/cms/djangoapps/contentstore/tests/test_i18n.py b/cms/djangoapps/contentstore/tests/test_i18n.py new file mode 100644 index 0000000000..8b9fb9e16f --- /dev/null +++ b/cms/djangoapps/contentstore/tests/test_i18n.py @@ -0,0 +1,82 @@ +# -*- coding: iso-8859-1 -*- + +from django.test import TestCase +from django.core.urlresolvers import reverse +from django.contrib.auth.models import User +from django.test.client import Client + +class InternationalizationTest(TestCase): + """ + Tests to validate Internationalization. + """ + + def setUp(self): + """ + These tests need a user in the DB so that the django Test Client + can log them in. + They inherit from the ModuleStoreTestCase class so that the mongodb collection + will be cleared out before each test case execution and deleted + afterwards. + """ + self.uname = 'testuser' + self.email = 'test+courses@edx.org' + self.password = 'foo' + + # Create the use so we can log them in. + self.user = User.objects.create_user(self.uname, self.email, self.password) + + # Note that we do not actually need to do anything + # for registration if we directly mark them active. + self.user.is_active = True + # Staff has access to view all courses + self.user.is_staff = True + self.user.save() + + self.course_data = { + 'template': 'i4x://edx/templates/course/Empty', + 'org': 'MITx', + 'number': '999', + 'display_name': 'Robot Super Course', + } + + def test_course_plain_english(self): + """Test viewing the index page with no courses""" + # Create a course so there is something to view + self.client = Client() + self.client.login(username=self.uname, password=self.password) + + resp = self.client.get(reverse('index')) + self.assertContains(resp, + '

My Courses

', + status_code=200, + html=True) + + + # **** + # NOTE: + # **** + # + # This test will break when we replace this fake 'test' language + # with actual French. This test will need to be updated with + # actual French at that time. + + + def test_course_with_accents (self): + """Test viewing the index page with no courses""" + # Create a course so there is something to view + self.client = Client() + self.client.login(username=self.uname, password=self.password) + + resp = self.client.get(reverse('index'), + {}, + HTTP_ACCEPT_LANGUAGE='fr' + ) + + TEST_STRING = u'

' \ + + u'My Çöürsés L#' \ + + u'

' + + self.assertContains(resp, + TEST_STRING, + status_code=200, + html=True) diff --git a/cms/envs/common.py b/cms/envs/common.py index 37cfeea7a1..3cf5fe15b3 100644 --- a/cms/envs/common.py +++ b/cms/envs/common.py @@ -128,6 +128,7 @@ MIDDLEWARE_CLASSES = ( 'django.contrib.messages.middleware.MessageMiddleware', 'track.middleware.TrackMiddleware', 'mitxmako.middleware.MakoMiddleware', + 'django.middleware.locale.LocaleMiddleware', 'django.middleware.transaction.TransactionMiddleware' ) @@ -173,9 +174,13 @@ STATICFILES_DIRS = [ # Locale/Internationalization TIME_ZONE = 'America/New_York' # http://en.wikipedia.org/wiki/List_of_tz_zones_by_name LANGUAGE_CODE = 'en' # http://www.i18nguy.com/unicode/language-identifiers.html + USE_I18N = True USE_L10N = True +# Localization strings (e.g. django.po) are under this directory +LOCALE_PATHS = (REPO_ROOT + '/conf/locale',) # mitx/conf/locale/ + # Tracking TRACK_MAX_EVENT = 10000 diff --git a/cms/static/js/base.js b/cms/static/js/base.js index 6a582a45a6..fa48b1699e 100644 --- a/cms/static/js/base.js +++ b/cms/static/js/base.js @@ -159,9 +159,9 @@ $(document).ready(function () { function smoothScrollLink(e) { (e).preventDefault(); - $.smoothScroll({ - offset: -200, - easing: 'swing', + $.smoothScroll({ + offset: -200, + easing: 'swing', speed: 1000, scrollElement: null, scrollTarget: $(this).attr('href') @@ -171,9 +171,9 @@ function smoothScrollLink(e) { function smoothScrollTop(e) { (e).preventDefault(); - $.smoothScroll({ - offset: -200, - easing: 'swing', + $.smoothScroll({ + offset: -200, + easing: 'swing', speed: 1000, scrollElement: null, scrollTarget: $('#view-top') @@ -237,7 +237,7 @@ function showImportSubmit(e) { $('.submit-button').show(); $('.progress').show(); } else { - $('.error-block').html('File format not supported. Please upload a file with a tar.gz extension.').show(); + $('.error-block').html(gettext('File format not supported. Please upload a file with a tar.gz extension.')).show(); } } @@ -398,7 +398,7 @@ function showFileSelectionMenu(e) { } function startUpload(e) { - $('.upload-modal h1').html('Uploading…'); + $('.upload-modal h1').html(gettext('Uploading…')); $('.upload-modal .file-name').html($('.file-input').val().replace('C:\\fakepath\\', '')); $('.upload-modal .file-chooser').ajaxSubmit({ beforeSend: resetUploadBar, @@ -431,7 +431,7 @@ function displayFinishedUpload(xhr) { $('.upload-modal .embeddable').show(); $('.upload-modal .file-name').hide(); $('.upload-modal .progress-fill').html(resp.msg); - $('.upload-modal .choose-file-button').html('Load Another File').show(); + $('.upload-modal .choose-file-button').html(gettext('Load Another File')).show(); $('.upload-modal .progress-fill').width('100%'); // see if this id already exists, if so, then user must have updated an existing piece of content @@ -483,20 +483,20 @@ function toggleSock(e) { $sock.toggleClass('is-shown'); $sockContent.toggle('fast'); - $.smoothScroll({ - offset: -200, - easing: 'swing', + $.smoothScroll({ + offset: -200, + easing: 'swing', speed: 1000, scrollElement: null, scrollTarget: $sock }); if($sock.hasClass('is-shown')) { - $btnLabel.text('Hide Studio Help'); + $btnLabel.text(gettext('Hide Studio Help')); } else { - $btnLabel.text('Looking for Help with Studio?'); + $btnLabel.text(gettext('Looking for Help with Studio?')); } } @@ -826,7 +826,12 @@ function saveSetSectionScheduleDate(e) { data: JSON.stringify({ 'id': id, 'metadata': {'start': start}}) }).success(function () { var $thisSection = $('.courseware-section[data-id="' + id + '"]'); - $thisSection.find('.section-published-date').html('Will Release: ' + input_date + ' at ' + input_time + ' UTCEdit'); + $thisSection.find('.section-published-date').html( + '' + gettext('Will Release:') + + ' ' + input_date + ' at ' + input_time + + ' UTC' + + gettext('Edit') + ''); $thisSection.find('.section-published-date').animate({ 'background-color': 'rgb(182,37,104)' }, 300).animate({ @@ -839,4 +844,4 @@ function saveSetSectionScheduleDate(e) { hideModal(); }); -} \ No newline at end of file +} diff --git a/cms/templates/base.html b/cms/templates/base.html index 0809795f70..884a84c459 100644 --- a/cms/templates/base.html +++ b/cms/templates/base.html @@ -30,6 +30,7 @@ <%include file="courseware_vendor_js.html"/> + diff --git a/cms/templates/index.html b/cms/templates/index.html index 916720f4e7..0f6e982b1d 100644 --- a/cms/templates/index.html +++ b/cms/templates/index.html @@ -1,6 +1,8 @@ +<%! from django.utils.translation import ugettext as _ %> + <%inherit file="base.html" /> -<%block name="title">My Courses +<%block name="title">${_("My Courses")} <%block name="bodyclass">is-signedin index dashboard <%block name="header_extras"> @@ -36,18 +38,18 @@
-

My Courses

+

${_("My Courses")}

% if user.is_active:
- \ No newline at end of file + diff --git a/cms/templates/widgets/footer.html b/cms/templates/widgets/footer.html index 7162dad50f..db7d5fb3f8 100644 --- a/cms/templates/widgets/footer.html +++ b/cms/templates/widgets/footer.html @@ -1,8 +1,10 @@ <%! from django.core.urlresolvers import reverse %> +<%! from django.utils.translation import ugettext as _ %> + \ No newline at end of file + + diff --git a/cms/urls.py b/cms/urls.py index e1eae3352a..832879b51e 100644 --- a/cms/urls.py +++ b/cms/urls.py @@ -94,7 +94,7 @@ urlpatterns = ('', # noop to squelch ajax errors url(r'^event$', 'contentstore.views.event', name='event'), - url(r'^heartbeat$', include('heartbeat.urls')), + url(r'^heartbeat$', include('heartbeat.urls')) ) # User creation and updating views @@ -118,6 +118,17 @@ urlpatterns += ( ) +js_info_dict = { + 'domain': 'djangojs', + 'packages': ('cms',), + } + +urlpatterns += ( + # Serve catalog of localized strings to be rendered by Javascript + url(r'^jsi18n/$', 'django.views.i18n.javascript_catalog', js_info_dict), + ) + + if settings.ENABLE_JASMINE: # # Jasmine urlpatterns = urlpatterns + (url(r'^_jasmine/', include('django_jasmine.urls')),) diff --git a/create-dev-env.sh b/create-dev-env.sh index f0ebca3ff7..f87d88401d 100755 --- a/create-dev-env.sh +++ b/create-dev-env.sh @@ -93,7 +93,7 @@ clone_repos() { ### START PROG=${0##*/} -BASE="$HOME/mitx_all" +BASE="$HOME/src/mitx_all" PYTHON_DIR="$BASE/python" RUBY_DIR="$BASE/ruby" RUBY_VER="1.9.3" @@ -290,7 +290,8 @@ source $PYTHON_DIR/bin/activate NUMPY_VER="1.6.2" SCIPY_VER="0.10.1" -if [[ -n $compile ]]; then +if [-z "false"]; then + if [[ -n $compile ]]; then output "Downloading numpy and scipy" curl -sL -o numpy.tar.gz http://downloads.sourceforge.net/project/numpy/NumPy/${NUMPY_VER}/numpy-${NUMPY_VER}.tar.gz curl -sL -o scipy.tar.gz http://downloads.sourceforge.net/project/scipy/scipy/${SCIPY_VER}/scipy-${SCIPY_VER}.tar.gz @@ -305,6 +306,7 @@ if [[ -n $compile ]]; then python setup.py install cd "$BASE" rm -rf numpy-${NUMPY_VER} scipy-${SCIPY_VER} + fi fi case `uname -s` in diff --git a/i18n/converter.py b/i18n/converter.py new file mode 100644 index 0000000000..fe66ff3e74 --- /dev/null +++ b/i18n/converter.py @@ -0,0 +1,74 @@ +import re, itertools + +# Converter is an abstract class that transforms strings. +# It hides embedded tags (HTML or Python sequences) from transformation +# +# To implement Converter, provide implementation for inner_convert_string() + + +class Converter: + + # matches tags like these: + # HTML: , ,
, + # Python: %(date)s, %(name)s + # + tag_pattern = re.compile(r'(<[-\w" .:?=/]*>)|({[^}]*})|(%\(.*\)\w)', re.I) + + + def convert (self, string): + if self.tag_pattern.search(string): + result = self.convert_tagged_string(string) + else: + result = self.inner_convert_string(string) + return result + + # convert_tagged_string(string): + # returns: a converted tagged string + # param: string (contains html tags) + # + # Don't replace characters inside tags + # + # Strategy: + # 1. extract tags embedded in the string + # a. use the index of each extracted tag to re-insert it later + # b. replace tags in string with numbers (<0>, <1>, etc.) + # c. save extracted tags in a separate list + # 2. convert string + # 3. re-insert the extracted tags + # + def convert_tagged_string (self, string): + (string, tags) = self.detag_string(string) + string = self.inner_convert_string(string) + string = self.retag_string(string, tags) + return string + + # extracts tags from string. + # + # returns (string, list) where + # string: string has tags replaced by indices (
... => <0>, <1>, <2>, etc.) + # list: list of the removed tags ("
", "", "") + def detag_string (self, string): + counter = itertools.count(0) + count = lambda m: '<%s>' % counter.next() + tags = self.tag_pattern.findall(string) + tags = [''.join(tag) for tag in tags] + (new, nfound) = self.tag_pattern.subn(count, string) + if len(tags) != nfound: + raise Exception('tags dont match:'+string) + return (new, tags) + + # substitutes each tag back into string, into occurrences of <0>, <1> etc + # + def retag_string (self, string, tags): + for (i, tag) in enumerate(tags): + p = '<%s>' % i + string = re.sub(p, tag, string, 1) + return string + + + # ------------------------------ + # Customize this in subclasses of Converter + + def inner_convert_string (self, string): + return string # do nothing by default + diff --git a/i18n/dummy.py b/i18n/dummy.py new file mode 100644 index 0000000000..a94d400ba0 --- /dev/null +++ b/i18n/dummy.py @@ -0,0 +1,186 @@ +# -*- coding: iso-8859-15 -*- + +from converter import Converter + +# This file converts string resource files. +# Java: file has name like messages_en.properties +# Flex: file has name like locales/en_US/Labels.properties + +# Creates new localization properties files in a dummy language (saved as 'vr', Vardebedian) +# Each property file is derived from the equivalent en_US file, except +# 1. Every vowel is replaced with an equivalent with extra accent marks +# 2. Every string is padded out to +30% length to simulate verbose languages (e.g. German) +# to see if layout and flows work properly +# 3. Every string is terminated with a '#' character to make it easier to detect truncation + + +# -------------------------------- +# Example use: +# >>> from dummy import Dummy +# >>> c = Dummy() +# >>> print c.convert("hello my name is Bond, James Bond") +# héllö my nämé ïs Bönd, Jämés Bönd Lorem i# +# +# >>> print c.convert('don\'t convert tag ids') +# dön't çönvért täg ïds Lorem ipsu# +# +# >>> print c.convert('don\'t convert %(name)s tags on %(date)s') +# dön't çönvért %(name)s tags on %(date)s Lorem ips# + + +# Substitute plain characters with accented lookalikes. +# http://tlt.its.psu.edu/suggestions/international/web/codehtml.html#accent +# print "print u'\\x%x'" % 207 +TABLE = {'A': u'\xC0', + 'a': u'\xE4', + 'b': u'\xDF', + 'C': u'\xc7', + 'c': u'\xE7', + 'E': u'\xC9', + 'e': u'\xE9', + 'I': U'\xCC', + 'i': u'\xEF', + 'O': u'\xD8', + 'o': u'\xF6', + 'u': u'\xFC' + } + + + +# The print industry's standard dummy text, in use since the 1500s +# see http://www.lipsum.com/ +LOREM = ' Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed ' \ + 'do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad ' \ + 'minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ' \ + 'ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate ' \ + 'velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat ' \ + 'cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. ' + +# To simulate more verbose languages (like German), pad the length of a string +# by a multiple of PAD_FACTOR +PAD_FACTOR = 1.3 + + +class Dummy (Converter): + ''' + A string converter that generates dummy strings with fake accents + and lorem ipsum padding. + ''' + + def convert (self, string): + result = Converter.convert(self, string) + return self.pad(result) + + def inner_convert_string (self, string): + for (k,v) in TABLE.items(): + string = string.replace(k, v) + return string + + + def pad (self, string): + '''add some lorem ipsum text to the end of string''' + size = len(string) + if size < 7: + target = size*3 + else: + target = int(size*PAD_FACTOR) + return string + self.terminate(LOREM[:(target-size)]) + + def terminate (self, string): + '''replaces the final char of string with #''' + return string[:-1]+'#' + + def init_msgs (self, msgs): + ''' + Make sure the first msg in msgs has a plural property. + msgs is list of instances of pofile.Msg + ''' + if len(msgs)==0: + return + headers = msgs[0].get_property('msgstr') + has_plural = len([header for header in headers if header.find('Plural-Forms:') == 0])>0 + if not has_plural: + # Apply declaration for English pluralization rules + plural = "Plural-Forms: nplurals=2; plural=(n != 1);\\n" + headers.append(plural) + + + def convert_msg (self, msg): + ''' + Takes one Msg object and converts it (adds a dummy translation to it) + msg is an instance of pofile.Msg + ''' + source = msg.get_property('msgid') + if len(source)==1 and len(source[0])==0: + # don't translate empty string + return + plural = msg.get_property('msgid_plural') + if len(plural)>0: + # translate singular and plural + foreign_single = self.convert(merge(source)) + foreign_plural = self.convert(merge(plural)) + msg.set_property('msgstr[0]', split(foreign_single)) + msg.set_property('msgstr[1]', split(foreign_plural)) + return + else: + src_merged = merge(source) + foreign = self.convert(src_merged) + if len(source)>1: + # If last char is a newline, make sure translation + # has a newline too. + if src_merged[-2:]=='\\n': + foreign += '\\n' + msg.set_property('msgstr', split(foreign)) + + +# ---------------------------------- +# String splitting utility functions + +SPLIT_SIZE = 70 + +def merge (string_list): + '''returns a single string: concatenates string_list''' + return ''.join(string_list) + +# .po file format requires long strings to be broken +# up into several shorter (<80 char) strings. +# The first string is empty (""), which indicates +# that more are to be read on following lines. + +def split (string): + ''' + Returns string split into fragments of a given size. + If there are multiple fragments, insert "" as the first fragment. + ''' + result = [chunk for chunk in chunks(string, SPLIT_SIZE)] + if len(result)>1: + result = [''] + result + return result + +def chunks(string, size): + ''' + Generate fragments of a given size from string. Avoid breaking + the string in the middle of an escape sequence (e.g. "\n") + ''' + strlen=len(string)-1 + esc = False + last = 0 + for i,char in enumerate(string): + if not esc and char == '\\': + esc = True + continue + if esc: + esc = False + if i>=last+size-1 or i==strlen: + chunk = string[last:i+1] + last = i+1 + yield chunk + +# testing +# >>> a = "abcd\\efghijklmnopqrstuvwxyz" +# >>> SPLIT_SIZE = 5 +# >>> split(a) +# ['abcd\\e', 'fghij', 'klmno', 'pqrst', 'uvwxy', 'z'] +# >>> merge(split(a)) +# 'abcd\\efghijklmnopqrstuvwxyz' + diff --git a/i18n/googleTranslate.py b/i18n/googleTranslate.py new file mode 100644 index 0000000000..e79dbe00a2 --- /dev/null +++ b/i18n/googleTranslate.py @@ -0,0 +1,68 @@ +import urllib, urllib2, json + +# Google Translate API +# see https://code.google.com/apis/language/translate/v2/getting_started.html +# +# +# usage: translate('flower', 'fr') => 'fleur' + + +# -------------------------------------------- +# Translation limit = 100,000 chars/day (request submitted for more) +# Limit of 5,000 characters per request +# This key is personally registered to Steve Strassmann +# +#KEY = 'AIzaSyCDapmXdBtIYw3ofsvgm6gIYDNwiVmSm7g' +KEY = 'AIzaSyDOhTQokSOqqO-8ZJqUNgn12C83g-muIqA' + +URL = 'https://www.googleapis.com/language/translate/v2' + +SOURCE = 'en' # source: English + +TARGETS = ['zh-CN', 'ja', 'fr', 'de', # tier 1: Simplified Chinese, Japanese, French, German + 'es', 'it', # tier 2: Spanish, Italian + 'ru'] # extra credit: Russian + + +def translate (string, target): + return extract(fetch(string, target)) + + +# Ask Google to translate string to target language +# string: English string +# target: lang (e.g. 'fr', 'cn') +# Returns JSON object +def fetch (string, target, url=URL, key=KEY, source=SOURCE): + data = {'key':key, + 'q':string, + 'source': source, + 'target':target} + fullUrl = '%s?%s' % (url, urllib.urlencode(data)) + try: + response = urllib2.urlopen(fullUrl) + return json.loads(response.read()) + except urllib2.HTTPError as err: + if err.code == 403: + print "***" + print "*** Possible daily limit exceeded for Google Translate:" + print "***" + print "***", json.loads("".join(err.readlines())) + print "***" + raise + + + +# Extracts a translated result from a json object returned from Google +def extract (response): + data = response['data'] + translations = data['translations'] + first = translations[0] + result = first.get('translated_text', None) + if result != None: + return result + else: + result = first.get('translatedText', None) + if result != None: + return result + else: + raise Exception("Could not read translation from: %s" % translations) diff --git a/i18n/make_dummy.py b/i18n/make_dummy.py new file mode 100755 index 0000000000..8bf9711c57 --- /dev/null +++ b/i18n/make_dummy.py @@ -0,0 +1,65 @@ +#!/usr/bin/python + +# Generate test translation files from human-readable po files. +# +# +# po files can be generated with this: +# django-admin.py makemessages --all --extension html -l en + +# Usage: +# +# $ ./make_dummy.py +# +# $ ./make_dummy.py mitx/conf/locale/en/LC_MESSAGES/django.po +# +# generates output to +# mitx/conf/locale/vr/LC_MESSAGES/django.po + +import os, sys +from pofile import PoFile +from dummy import Dummy + +# Dummy language +# two letter language codes reference: +# see http://www.loc.gov/standards/iso639-2/php/code_list.php +# +# Django will not localize in languages that django itself has not been +# localized for. So we are using a well-known language: 'fr'. + +OUT_LANG = 'fr' + +def main (file): + ''' + Takes a source po file, reads it, and writes out a new po file + containing a dummy translation. + ''' + pofile = PoFile(file) + converter = Dummy() + converter.init_msgs(pofile.msgs) + for msg in pofile.msgs: + converter.convert_msg(msg) + new_file = new_filename(file, OUT_LANG) + create_dir_if_necessary(new_file) + pofile.write(new_file) + + +def new_filename (original_filename, new_lang): + '''Returns a filename derived from original_filename, using new_lang as the locale''' + orig_dir = os.path.dirname(original_filename) + msgs_dir = os.path.basename(orig_dir) + orig_file = os.path.basename(original_filename) + return '%s/%s/%s/%s' % (os.path.abspath(orig_dir + '/../..'), + new_lang, + msgs_dir, + orig_file) + + +def create_dir_if_necessary(pathname): + dirname = os.path.dirname(pathname) + if not os.path.exists(dirname): + os.makedirs(dirname) + +if __name__ == '__main__': + if len(sys.argv)<2: + raise Exception("missing file argument") + main(sys.argv[1]) diff --git a/i18n/pofile.py b/i18n/pofile.py new file mode 100644 index 0000000000..d91f76a925 --- /dev/null +++ b/i18n/pofile.py @@ -0,0 +1,143 @@ +import re, codecs +from operator import itemgetter + +# Django stores externalized strings in .po and .mo files. +# po files are human readable and contain metadata about the strings. +# mo files are machine readable and optimized for runtime performance. + +# See https://docs.djangoproject.com/en/1.3/topics/i18n/internationalization/ +# See http://www.gnu.org/software/gettext/manual/html_node/PO-Files.html + +# Usage: +# >>> pofile = PoFile('/path/to/file') + + +class PoFile: + + # Django requires po files to be in UTF8 with no BOM (byte order marker) + # see "Mind your charset" on this page: + # https://docs.djangoproject.com/en/1.3/topics/i18n/localization/ + + ENCODING = 'utf_8' + + def __init__ (self, pathname): + self.pathname = pathname + self.parse() + + def parse (self): + with codecs.open(self.pathname, 'r', self.ENCODING) as stream: + text = stream.read() + msgs = text.split('\n\n') + self.msgs = [Msg.parse(m) for m in msgs] + return msgs + + def write (self, out_pathname=None): + if out_pathname == None: + out_pathname = self.pathname + with codecs.open(out_pathname, 'w', self.ENCODING) as stream: + for msg in self.msgs: + msg.write(stream) + +class Msg: + + # A PoFile is parsed into a list of Msg objects, each of which corresponds + # to an externalized string entry. + + # Each Msg object may contain multiple comment lines, capturing metadata + + # Each Msg has a property list (self.props) with a dict of key-values. + # Each value is a list of strings + kwords = ['msgid', 'msgstr', 'msgctxt', 'msgid_plural'] + + # Line might begin with "msgid ..." or "msgid[2] ..." + pattern = re.compile('^(\w+)(\[(\d+)\])?') + + @classmethod + def parse (cls, string): + ''' + String is a fragment of a pofile (.po) source file. + This returns a Msg object created by parsing string. + ''' + lines = string.strip().split('\n') + msg = Msg() + msg.comments = [] + msg.props = {} + last_kword = None + for line in lines: + if line[0]=='#': + msg.comments.append(line) + elif line[0]=='"' and last_kword != None: + msg.add_string(last_kword, line) + else: + match = cls.pattern.search(line) + if match: + kword = match.group(1) + last_kword = kword + if kword in cls.kwords: + if match.group(3): + key = '%s[%s]' % (kword, match.group(3)) + msg.add_string(key, line[len(key):]) + else: + msg.add_string(kword, line[len(kword):]) + return msg + + def get_property (self, kword): + '''returns value for kword. Typically returns a list of strings''' + return self.props.get(kword, []) + + def set_property (self, kword, value): + '''sets value for kword. Typically returns a list of strings''' + self.props[kword] = value + + def add_string (self, kword, line): + '''Append line to the list of values stored for the property kword''' + props = self.props + value = self.get_property(kword) + value.append(self.cleanup_string(line)) + self.set_property(kword, value) + + def cleanup_string(self, string): + string = string.strip() + if len(string)>1 and string[0]=='"' and string[-1]=='"': + return string[1:-1] + else: + return string + + def write (self, stream): + '''Write a Msg to stream''' + for comment in self.comments: + stream.write(comment) + stream.write('\n') + for (key, values) in self.sort(self.props.items()): + stream.write(key + ' ') + for value in values: + stream.write('"'+value+'"') + stream.write('\n') + stream.write('\n') + + # Preferred ordering of key output + # Always print 'msgctxt' first, then 'msgid', etc. + KEY_ORDER = ('msgctxt', 'msgid', 'msgid_plural', 'msgstr', 'msgstr[0]', 'msgstr[1]') + + def keyword_compare (self, k1, k2): + for key in self.KEY_ORDER: + if key == k1: + return -1 + if key == k2: + return 1 + return 0 + + def sort (self, plist): + '''sorts a propertylist to bring the high-priority keys to the beginning of the list''' + return sorted(plist, key=itemgetter(0), cmp=self.keyword_compare) + + + +# Testing +# +# >>> file = 'mitx/conf/locale/en/LC_MESSAGES/django.po' +# >>> file1 = 'mitx/conf/locale/en/LC_MESSAGES/django1.po' +# >>> po = PoFile(file) +# >>> po.write(file1) +# $ diff file file1 + diff --git a/i18n/update.py b/i18n/update.py new file mode 100755 index 0000000000..8a865c2528 --- /dev/null +++ b/i18n/update.py @@ -0,0 +1,101 @@ +#!/usr/bin/python + +import os, subprocess, logging, json +from make_dummy import create_dir_if_necessary, main as dummy_main + +''' +Generate or update all translation files + Usage: + $ update.py + + + 1. extracts files from mako templates + 2. extracts files from django templates and python source files + 3. extracts files from django javascript files + 4. generates dummy text translations + 5. compiles po files to mo files + + Configuration (e.g. known languages) declared in mitx/conf/locale/config +''' + +# ----------------------------------- +# BASE_DIR is the working directory to execute django-admin commands from. +# Typically this should be the 'mitx' directory. +BASE_DIR = os.path.abspath(os.path.dirname(os.path.abspath(__file__))+'/..') + +# LOCALE_DIR contains the locale files. +# Typically this should be 'mitx/conf/locale' +LOCALE_DIR = BASE_DIR + '/conf/locale' + +# MSGS_DIR contains the English po files +MSGS_DIR = LOCALE_DIR + '/en/LC_MESSAGES' + +# CONFIG_FILENAME contains localization configuration in json format +CONFIG_FILENAME = LOCALE_DIR + '/config' + +# BABEL_CONFIG contains declarations for Babel to extract strings from mako template files +BABEL_CONFIG = LOCALE_DIR + '/babel.cfg' + +# Strings from mako template files are written to BABEL_OUT +BABEL_OUT = MSGS_DIR + '/mako.po' + +# These are the shell commands invoked by main() +COMMANDS = { + 'babel_mako': 'pybabel extract -F %s -c "TRANSLATORS:" . -o %s' % (BABEL_CONFIG, BABEL_OUT), + 'make_django': 'django-admin.py makemessages --all --extension html -l en', + 'make_djangojs': 'django-admin.py makemessages --all -d djangojs --extension js -l en', + 'msgcat' : 'msgcat -o merged.po django.po %s' % BABEL_OUT, + 'rename_django' : 'mv django.po django_old.po', + 'rename_merged' : 'mv merged.po django.po', + 'compile': 'django-admin.py compilemessages' + + } + +def execute (command_kwd, log, working_directory=BASE_DIR): + ''' + Executes command_kwd, which references a shell command in COMMANDS. + ''' + full_cmd = COMMANDS[command_kwd] + log.info('%s' % full_cmd) + subprocess.call(full_cmd.split(' '), cwd=working_directory) + +def make_log (): + '''returns a logger''' + log = logging.getLogger(__name__) + log.setLevel(logging.INFO) + log_handler = logging.StreamHandler() + log_handler.setFormatter(logging.Formatter('%(asctime)s [%(levelname)s] %(message)s')) + log.addHandler(log_handler) + return log + +def get_config (): + '''Returns data found in config file, or returns None if file not found''' + config_path = os.path.abspath(CONFIG_FILENAME) + if not os.path.exists(config_path): + return None + with open(config_path) as stream: + return json.load(stream) + +def main (): + log = make_log() + create_dir_if_necessary(LOCALE_DIR) + log.info('Executing all commands from %s' % BASE_DIR) + + # Generate or update human-readable .po files from all source code. + execute('babel_mako', log=log) + execute('make_django', log=log) + execute('make_djangojs', log=log) + execute('msgcat', log=log, working_directory=MSGS_DIR) + execute('rename_django', log=log, working_directory=MSGS_DIR) + execute('rename_merged', log=log, working_directory=MSGS_DIR) + + # Generate dummy text files from the English .po files + log.info('Generating dummy text.') + dummy_main(LOCALE_DIR + '/en/LC_MESSAGES/django.po') + dummy_main(LOCALE_DIR + '/en/LC_MESSAGES/djangojs.po') + + # Generate machine-readable .mo files + execute('compile', log) + +if __name__ == '__main__': + main() diff --git a/lms/templates/discussion/_single_thread.html b/lms/templates/discussion/_single_thread.html index 0dec32ad47..e291bc955c 100644 --- a/lms/templates/discussion/_single_thread.html +++ b/lms/templates/discussion/_single_thread.html @@ -6,7 +6,7 @@
- %if thread['group_id'] + %if thread['group_id']:
This post visible only to group ${cohort_dictionary[thread['group_id']]}.
%endif @@ -35,4 +35,4 @@ -<%include file="_js_data.html" /> \ No newline at end of file +<%include file="_js_data.html" /> From e76419093d976bb2f2e91fade4926e826d3e2183 Mon Sep 17 00:00:00 2001 From: Steve Strassmann Date: Wed, 17 Apr 2013 12:25:23 -0400 Subject: [PATCH 2/8] uncommit unneeded files --- cms/urls.py | 2 +- create-dev-env.sh | 6 ++-- i18n/googleTranslate.py | 68 ----------------------------------------- 3 files changed, 3 insertions(+), 73 deletions(-) delete mode 100644 i18n/googleTranslate.py diff --git a/cms/urls.py b/cms/urls.py index 832879b51e..598d91b075 100644 --- a/cms/urls.py +++ b/cms/urls.py @@ -94,7 +94,7 @@ urlpatterns = ('', # noop to squelch ajax errors url(r'^event$', 'contentstore.views.event', name='event'), - url(r'^heartbeat$', include('heartbeat.urls')) + url(r'^heartbeat$', include('heartbeat.urls')), ) # User creation and updating views diff --git a/create-dev-env.sh b/create-dev-env.sh index f87d88401d..f0ebca3ff7 100755 --- a/create-dev-env.sh +++ b/create-dev-env.sh @@ -93,7 +93,7 @@ clone_repos() { ### START PROG=${0##*/} -BASE="$HOME/src/mitx_all" +BASE="$HOME/mitx_all" PYTHON_DIR="$BASE/python" RUBY_DIR="$BASE/ruby" RUBY_VER="1.9.3" @@ -290,8 +290,7 @@ source $PYTHON_DIR/bin/activate NUMPY_VER="1.6.2" SCIPY_VER="0.10.1" -if [-z "false"]; then - if [[ -n $compile ]]; then +if [[ -n $compile ]]; then output "Downloading numpy and scipy" curl -sL -o numpy.tar.gz http://downloads.sourceforge.net/project/numpy/NumPy/${NUMPY_VER}/numpy-${NUMPY_VER}.tar.gz curl -sL -o scipy.tar.gz http://downloads.sourceforge.net/project/scipy/scipy/${SCIPY_VER}/scipy-${SCIPY_VER}.tar.gz @@ -306,7 +305,6 @@ if [-z "false"]; then python setup.py install cd "$BASE" rm -rf numpy-${NUMPY_VER} scipy-${SCIPY_VER} - fi fi case `uname -s` in diff --git a/i18n/googleTranslate.py b/i18n/googleTranslate.py deleted file mode 100644 index e79dbe00a2..0000000000 --- a/i18n/googleTranslate.py +++ /dev/null @@ -1,68 +0,0 @@ -import urllib, urllib2, json - -# Google Translate API -# see https://code.google.com/apis/language/translate/v2/getting_started.html -# -# -# usage: translate('flower', 'fr') => 'fleur' - - -# -------------------------------------------- -# Translation limit = 100,000 chars/day (request submitted for more) -# Limit of 5,000 characters per request -# This key is personally registered to Steve Strassmann -# -#KEY = 'AIzaSyCDapmXdBtIYw3ofsvgm6gIYDNwiVmSm7g' -KEY = 'AIzaSyDOhTQokSOqqO-8ZJqUNgn12C83g-muIqA' - -URL = 'https://www.googleapis.com/language/translate/v2' - -SOURCE = 'en' # source: English - -TARGETS = ['zh-CN', 'ja', 'fr', 'de', # tier 1: Simplified Chinese, Japanese, French, German - 'es', 'it', # tier 2: Spanish, Italian - 'ru'] # extra credit: Russian - - -def translate (string, target): - return extract(fetch(string, target)) - - -# Ask Google to translate string to target language -# string: English string -# target: lang (e.g. 'fr', 'cn') -# Returns JSON object -def fetch (string, target, url=URL, key=KEY, source=SOURCE): - data = {'key':key, - 'q':string, - 'source': source, - 'target':target} - fullUrl = '%s?%s' % (url, urllib.urlencode(data)) - try: - response = urllib2.urlopen(fullUrl) - return json.loads(response.read()) - except urllib2.HTTPError as err: - if err.code == 403: - print "***" - print "*** Possible daily limit exceeded for Google Translate:" - print "***" - print "***", json.loads("".join(err.readlines())) - print "***" - raise - - - -# Extracts a translated result from a json object returned from Google -def extract (response): - data = response['data'] - translations = data['translations'] - first = translations[0] - result = first.get('translated_text', None) - if result != None: - return result - else: - result = first.get('translatedText', None) - if result != None: - return result - else: - raise Exception("Could not read translation from: %s" % translations) From 2f84b230479bbd5f544a7d4dff2beeb0bf12c292 Mon Sep 17 00:00:00 2001 From: Steve Strassmann Date: Wed, 17 Apr 2013 13:20:20 -0400 Subject: [PATCH 3/8] disable i18n unit test - it wont run without creating dummy strings first --- cms/djangoapps/contentstore/tests/test_i18n.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cms/djangoapps/contentstore/tests/test_i18n.py b/cms/djangoapps/contentstore/tests/test_i18n.py index 8b9fb9e16f..fba2da10dd 100644 --- a/cms/djangoapps/contentstore/tests/test_i18n.py +++ b/cms/djangoapps/contentstore/tests/test_i18n.py @@ -4,6 +4,7 @@ from django.test import TestCase from django.core.urlresolvers import reverse from django.contrib.auth.models import User from django.test.client import Client +from nose.tools import nottest class InternationalizationTest(TestCase): """ @@ -60,7 +61,8 @@ class InternationalizationTest(TestCase): # with actual French. This test will need to be updated with # actual French at that time. - + # Test temporarily disable since it depends on creation of dummy strings + @nottest def test_course_with_accents (self): """Test viewing the index page with no courses""" # Create a course so there is something to view From 91bee1a96606bef29e1f30d188cf5adefa83130f Mon Sep 17 00:00:00 2001 From: Steve Strassmann Date: Thu, 18 Apr 2013 09:27:44 -0400 Subject: [PATCH 4/8] add Babel to requirements.txt --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 1a383e6cc0..9242724ed5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ -r repo-requirements.txt +Babel==0.9.6 beautifulsoup4==4.1.3 beautifulsoup==3.2.1 boto==2.6.0 From 6f103488360dc2e7134849817770f1971efe38d2 Mon Sep 17 00:00:00 2001 From: Steve Strassmann Date: Tue, 23 Apr 2013 10:07:51 -0400 Subject: [PATCH 5/8] addressed comments from pull request --- .../contentstore/tests/test_i18n.py | 28 +++- cms/envs/common.py | 2 + cms/static/js/base.js | 11 +- i18n/converter.py | 79 +++++----- i18n/dummy.py | 136 +++++------------ i18n/make_dummy.py | 23 +-- i18n/pofile.py | 143 ------------------ i18n/update.py | 13 +- 8 files changed, 131 insertions(+), 304 deletions(-) delete mode 100644 i18n/pofile.py diff --git a/cms/djangoapps/contentstore/tests/test_i18n.py b/cms/djangoapps/contentstore/tests/test_i18n.py index fba2da10dd..c3c0b25fc3 100644 --- a/cms/djangoapps/contentstore/tests/test_i18n.py +++ b/cms/djangoapps/contentstore/tests/test_i18n.py @@ -1,12 +1,12 @@ -# -*- coding: iso-8859-1 -*- +from unittest import skip -from django.test import TestCase from django.core.urlresolvers import reverse from django.contrib.auth.models import User from django.test.client import Client -from nose.tools import nottest -class InternationalizationTest(TestCase): +from .utils import ModuleStoreTestCase + +class InternationalizationTest(ModuleStoreTestCase): """ Tests to validate Internationalization. """ @@ -52,6 +52,22 @@ class InternationalizationTest(TestCase): status_code=200, html=True) + def test_course_explicit_english(self): + """Test viewing the index page with no courses""" + # Create a course so there is something to view + self.client = Client() + self.client.login(username=self.uname, password=self.password) + + resp = self.client.get(reverse('index'), + {}, + HTTP_ACCEPT_LANGUAGE='en' + ) + + self.assertContains(resp, + '

My Courses

', + status_code=200, + html=True) + # **** # NOTE: @@ -62,7 +78,7 @@ class InternationalizationTest(TestCase): # actual French at that time. # Test temporarily disable since it depends on creation of dummy strings - @nottest + @skip def test_course_with_accents (self): """Test viewing the index page with no courses""" # Create a course so there is something to view @@ -75,7 +91,7 @@ class InternationalizationTest(TestCase): ) TEST_STRING = u'

' \ - + u'My Çöürsés L#' \ + + u'My \xc7\xf6\xfcrs\xe9s L#' \ + u'

' self.assertContains(resp, diff --git a/cms/envs/common.py b/cms/envs/common.py index 3cf5fe15b3..614491f50d 100644 --- a/cms/envs/common.py +++ b/cms/envs/common.py @@ -128,6 +128,8 @@ MIDDLEWARE_CLASSES = ( 'django.contrib.messages.middleware.MessageMiddleware', 'track.middleware.TrackMiddleware', 'mitxmako.middleware.MakoMiddleware', + + # Detects user-requested locale from 'accept-language' header in http request 'django.middleware.locale.LocaleMiddleware', 'django.middleware.transaction.TransactionMiddleware' diff --git a/cms/static/js/base.js b/cms/static/js/base.js index fa48b1699e..4112d2bb8e 100644 --- a/cms/static/js/base.js +++ b/cms/static/js/base.js @@ -826,11 +826,14 @@ function saveSetSectionScheduleDate(e) { data: JSON.stringify({ 'id': id, 'metadata': {'start': start}}) }).success(function () { var $thisSection = $('.courseware-section[data-id="' + id + '"]'); + var format = gettext('Will Release: %(date)s at $(time)s UTC'); + var willReleaseAt = interpolate(format, [input_date, input_time], true); $thisSection.find('.section-published-date').html( - '' + gettext('Will Release:') + - ' ' + input_date + ' at ' + input_time + - ' UTC' + + '' + willReleaseAt + '' + + '' + gettext('Edit') + ''); $thisSection.find('.section-published-date').animate({ 'background-color': 'rgb(182,37,104)' diff --git a/i18n/converter.py b/i18n/converter.py index fe66ff3e74..63d8f83e00 100644 --- a/i18n/converter.py +++ b/i18n/converter.py @@ -1,53 +1,45 @@ -import re, itertools - -# Converter is an abstract class that transforms strings. -# It hides embedded tags (HTML or Python sequences) from transformation -# -# To implement Converter, provide implementation for inner_convert_string() - +import re +import itertools class Converter: + """Converter is an abstract class that transforms strings. + It hides embedded tags (HTML or Python sequences) from transformation + + To implement Converter, provide implementation for inner_convert_string() + Strategy: + 1. extract tags embedded in the string + a. use the index of each extracted tag to re-insert it later + b. replace tags in string with numbers (<0>, <1>, etc.) + c. save extracted tags in a separate list + 2. convert string + 3. re-insert the extracted tags + + """ + # matches tags like these: - # HTML: , ,
, - # Python: %(date)s, %(name)s - # - tag_pattern = re.compile(r'(<[-\w" .:?=/]*>)|({[^}]*})|(%\(.*\)\w)', re.I) + # HTML: , ,
, + # Python: %(date)s, %(name)s + tag_pattern = re.compile(r'(<[-\w" .:?=/]*>)|({[^}]*})|(%\([^)]*\)\w)', re.I) - - def convert (self, string): - if self.tag_pattern.search(string): - result = self.convert_tagged_string(string) - else: - result = self.inner_convert_string(string) - return result - - # convert_tagged_string(string): - # returns: a converted tagged string - # param: string (contains html tags) - # - # Don't replace characters inside tags - # - # Strategy: - # 1. extract tags embedded in the string - # a. use the index of each extracted tag to re-insert it later - # b. replace tags in string with numbers (<0>, <1>, etc.) - # c. save extracted tags in a separate list - # 2. convert string - # 3. re-insert the extracted tags - # - def convert_tagged_string (self, string): + def convert(self, string): + """Returns: a converted tagged string + param: string (contains html tags) + + Don't replace characters inside tags + """ (string, tags) = self.detag_string(string) string = self.inner_convert_string(string) string = self.retag_string(string, tags) return string - # extracts tags from string. - # - # returns (string, list) where - # string: string has tags replaced by indices (
... => <0>, <1>, <2>, etc.) - # list: list of the removed tags ("
", "", "") - def detag_string (self, string): + def detag_string(self, string): + """Extracts tags from string. + + returns (string, list) where + string: string has tags replaced by indices (
... => <0>, <1>, <2>, etc.) + list: list of the removed tags ('
', '', '') + """ counter = itertools.count(0) count = lambda m: '<%s>' % counter.next() tags = self.tag_pattern.findall(string) @@ -57,9 +49,8 @@ class Converter: raise Exception('tags dont match:'+string) return (new, tags) - # substitutes each tag back into string, into occurrences of <0>, <1> etc - # - def retag_string (self, string, tags): + def retag_string(self, string, tags): + """substitutes each tag back into string, into occurrences of <0>, <1> etc""" for (i, tag) in enumerate(tags): p = '<%s>' % i string = re.sub(p, tag, string, 1) @@ -69,6 +60,6 @@ class Converter: # ------------------------------ # Customize this in subclasses of Converter - def inner_convert_string (self, string): + def inner_convert_string(self, string): return string # do nothing by default diff --git a/i18n/dummy.py b/i18n/dummy.py index a94d400ba0..798ee525b5 100644 --- a/i18n/dummy.py +++ b/i18n/dummy.py @@ -1,12 +1,6 @@ -# -*- coding: iso-8859-15 -*- - from converter import Converter -# This file converts string resource files. -# Java: file has name like messages_en.properties -# Flex: file has name like locales/en_US/Labels.properties - -# Creates new localization properties files in a dummy language (saved as 'vr', Vardebedian) +# Creates new localization properties files in a dummy language # Each property file is derived from the equivalent en_US file, except # 1. Every vowel is replaced with an equivalent with extra accent marks # 2. Every string is padded out to +30% length to simulate verbose languages (e.g. German) @@ -18,19 +12,18 @@ from converter import Converter # Example use: # >>> from dummy import Dummy # >>> c = Dummy() -# >>> print c.convert("hello my name is Bond, James Bond") -# héllö my nämé ïs Bönd, Jämés Bönd Lorem i# +# >>> c.convert("hello my name is Bond, James Bond") +# u'h\xe9ll\xf6 my n\xe4m\xe9 \xefs B\xf6nd, J\xe4m\xe9s B\xf6nd Lorem i#' # -# >>> print c.convert('don\'t convert tag ids') -# dön't çönvért täg ïds Lorem ipsu# +# >>> c.convert('don\'t convert tag ids') +# u'd\xf6n\'t \xe7\xf6nv\xe9rt t\xe4g \xefds Lorem ipsu#' # -# >>> print c.convert('don\'t convert %(name)s tags on %(date)s') -# dön't çönvért %(name)s tags on %(date)s Lorem ips# +# >>> c.convert('don\'t convert %(name)s tags on %(date)s') +# u"d\xf6n't \xe7\xf6nv\xe9rt %(name)s t\xe4gs \xf6n %(date)s Lorem ips#" # Substitute plain characters with accented lookalikes. # http://tlt.its.psu.edu/suggestions/international/web/codehtml.html#accent -# print "print u'\\x%x'" % 207 TABLE = {'A': u'\xC0', 'a': u'\xE4', 'b': u'\xDF', @@ -62,23 +55,23 @@ PAD_FACTOR = 1.3 class Dummy (Converter): - ''' + """ A string converter that generates dummy strings with fake accents and lorem ipsum padding. - ''' + """ - def convert (self, string): + def convert(self, string): result = Converter.convert(self, string) return self.pad(result) - def inner_convert_string (self, string): + def inner_convert_string(self, string): for (k,v) in TABLE.items(): string = string.replace(k, v) return string - def pad (self, string): - '''add some lorem ipsum text to the end of string''' + def pad(self, string): + """add some lorem ipsum text to the end of string""" size = len(string) if size < 7: target = size*3 @@ -86,15 +79,15 @@ class Dummy (Converter): target = int(size*PAD_FACTOR) return string + self.terminate(LOREM[:(target-size)]) - def terminate (self, string): - '''replaces the final char of string with #''' + def terminate(self, string): + """replaces the final char of string with #""" return string[:-1]+'#' - def init_msgs (self, msgs): - ''' + def init_msgs(self, msgs): + """ Make sure the first msg in msgs has a plural property. msgs is list of instances of pofile.Msg - ''' + """ if len(msgs)==0: return headers = msgs[0].get_property('msgstr') @@ -105,82 +98,35 @@ class Dummy (Converter): headers.append(plural) - def convert_msg (self, msg): - ''' + def convert_msg(self, msg): + """ Takes one Msg object and converts it (adds a dummy translation to it) msg is an instance of pofile.Msg - ''' - source = msg.get_property('msgid') - if len(source)==1 and len(source[0])==0: + """ + source = msg.msgid + if len(source)==0: # don't translate empty string return - plural = msg.get_property('msgid_plural') + + plural = msg.msgid_plural if len(plural)>0: # translate singular and plural - foreign_single = self.convert(merge(source)) - foreign_plural = self.convert(merge(plural)) - msg.set_property('msgstr[0]', split(foreign_single)) - msg.set_property('msgstr[1]', split(foreign_plural)) + foreign_single = self.convert(source) + foreign_plural = self.convert(plural) + plural = {'0': self.final_newline(source, foreign_single), + '1': self.final_newline(plural, foreign_plural)} + msg.msgstr_plural = plural return else: - src_merged = merge(source) - foreign = self.convert(src_merged) - if len(source)>1: - # If last char is a newline, make sure translation - # has a newline too. - if src_merged[-2:]=='\\n': - foreign += '\\n' - msg.set_property('msgstr', split(foreign)) - - -# ---------------------------------- -# String splitting utility functions - -SPLIT_SIZE = 70 - -def merge (string_list): - '''returns a single string: concatenates string_list''' - return ''.join(string_list) - -# .po file format requires long strings to be broken -# up into several shorter (<80 char) strings. -# The first string is empty (""), which indicates -# that more are to be read on following lines. - -def split (string): - ''' - Returns string split into fragments of a given size. - If there are multiple fragments, insert "" as the first fragment. - ''' - result = [chunk for chunk in chunks(string, SPLIT_SIZE)] - if len(result)>1: - result = [''] + result - return result - -def chunks(string, size): - ''' - Generate fragments of a given size from string. Avoid breaking - the string in the middle of an escape sequence (e.g. "\n") - ''' - strlen=len(string)-1 - esc = False - last = 0 - for i,char in enumerate(string): - if not esc and char == '\\': - esc = True - continue - if esc: - esc = False - if i>=last+size-1 or i==strlen: - chunk = string[last:i+1] - last = i+1 - yield chunk - -# testing -# >>> a = "abcd\\efghijklmnopqrstuvwxyz" -# >>> SPLIT_SIZE = 5 -# >>> split(a) -# ['abcd\\e', 'fghij', 'klmno', 'pqrst', 'uvwxy', 'z'] -# >>> merge(split(a)) -# 'abcd\\efghijklmnopqrstuvwxyz' + foreign = self.convert(source) + msg.msgstr = self.final_newline(source, foreign) + def final_newline(self, original, translated): + """ Returns a new translated string. + If last char of original is a newline, make sure translation + has a newline too. + """ + if len(original)>1: + if original[-1]=='\n' and translated[-1]!='\n': + return translated + '\n' + return translated diff --git a/i18n/make_dummy.py b/i18n/make_dummy.py index 8bf9711c57..4ccfb0d5f1 100755 --- a/i18n/make_dummy.py +++ b/i18n/make_dummy.py @@ -16,7 +16,7 @@ # mitx/conf/locale/vr/LC_MESSAGES/django.po import os, sys -from pofile import PoFile +import polib from dummy import Dummy # Dummy language @@ -28,23 +28,26 @@ from dummy import Dummy OUT_LANG = 'fr' -def main (file): - ''' +def main(file): + """ Takes a source po file, reads it, and writes out a new po file containing a dummy translation. - ''' - pofile = PoFile(file) + """ + if not os.path.exists(file): + raise IOError('File does not exist: %s' % file) + pofile = polib.pofile(file) converter = Dummy() - converter.init_msgs(pofile.msgs) - for msg in pofile.msgs: + converter.init_msgs(pofile.translated_entries()) + for msg in pofile: converter.convert_msg(msg) new_file = new_filename(file, OUT_LANG) create_dir_if_necessary(new_file) - pofile.write(new_file) + pofile.save(new_file) + -def new_filename (original_filename, new_lang): - '''Returns a filename derived from original_filename, using new_lang as the locale''' +def new_filename(original_filename, new_lang): + """Returns a filename derived from original_filename, using new_lang as the locale""" orig_dir = os.path.dirname(original_filename) msgs_dir = os.path.basename(orig_dir) orig_file = os.path.basename(original_filename) diff --git a/i18n/pofile.py b/i18n/pofile.py deleted file mode 100644 index d91f76a925..0000000000 --- a/i18n/pofile.py +++ /dev/null @@ -1,143 +0,0 @@ -import re, codecs -from operator import itemgetter - -# Django stores externalized strings in .po and .mo files. -# po files are human readable and contain metadata about the strings. -# mo files are machine readable and optimized for runtime performance. - -# See https://docs.djangoproject.com/en/1.3/topics/i18n/internationalization/ -# See http://www.gnu.org/software/gettext/manual/html_node/PO-Files.html - -# Usage: -# >>> pofile = PoFile('/path/to/file') - - -class PoFile: - - # Django requires po files to be in UTF8 with no BOM (byte order marker) - # see "Mind your charset" on this page: - # https://docs.djangoproject.com/en/1.3/topics/i18n/localization/ - - ENCODING = 'utf_8' - - def __init__ (self, pathname): - self.pathname = pathname - self.parse() - - def parse (self): - with codecs.open(self.pathname, 'r', self.ENCODING) as stream: - text = stream.read() - msgs = text.split('\n\n') - self.msgs = [Msg.parse(m) for m in msgs] - return msgs - - def write (self, out_pathname=None): - if out_pathname == None: - out_pathname = self.pathname - with codecs.open(out_pathname, 'w', self.ENCODING) as stream: - for msg in self.msgs: - msg.write(stream) - -class Msg: - - # A PoFile is parsed into a list of Msg objects, each of which corresponds - # to an externalized string entry. - - # Each Msg object may contain multiple comment lines, capturing metadata - - # Each Msg has a property list (self.props) with a dict of key-values. - # Each value is a list of strings - kwords = ['msgid', 'msgstr', 'msgctxt', 'msgid_plural'] - - # Line might begin with "msgid ..." or "msgid[2] ..." - pattern = re.compile('^(\w+)(\[(\d+)\])?') - - @classmethod - def parse (cls, string): - ''' - String is a fragment of a pofile (.po) source file. - This returns a Msg object created by parsing string. - ''' - lines = string.strip().split('\n') - msg = Msg() - msg.comments = [] - msg.props = {} - last_kword = None - for line in lines: - if line[0]=='#': - msg.comments.append(line) - elif line[0]=='"' and last_kword != None: - msg.add_string(last_kword, line) - else: - match = cls.pattern.search(line) - if match: - kword = match.group(1) - last_kword = kword - if kword in cls.kwords: - if match.group(3): - key = '%s[%s]' % (kword, match.group(3)) - msg.add_string(key, line[len(key):]) - else: - msg.add_string(kword, line[len(kword):]) - return msg - - def get_property (self, kword): - '''returns value for kword. Typically returns a list of strings''' - return self.props.get(kword, []) - - def set_property (self, kword, value): - '''sets value for kword. Typically returns a list of strings''' - self.props[kword] = value - - def add_string (self, kword, line): - '''Append line to the list of values stored for the property kword''' - props = self.props - value = self.get_property(kword) - value.append(self.cleanup_string(line)) - self.set_property(kword, value) - - def cleanup_string(self, string): - string = string.strip() - if len(string)>1 and string[0]=='"' and string[-1]=='"': - return string[1:-1] - else: - return string - - def write (self, stream): - '''Write a Msg to stream''' - for comment in self.comments: - stream.write(comment) - stream.write('\n') - for (key, values) in self.sort(self.props.items()): - stream.write(key + ' ') - for value in values: - stream.write('"'+value+'"') - stream.write('\n') - stream.write('\n') - - # Preferred ordering of key output - # Always print 'msgctxt' first, then 'msgid', etc. - KEY_ORDER = ('msgctxt', 'msgid', 'msgid_plural', 'msgstr', 'msgstr[0]', 'msgstr[1]') - - def keyword_compare (self, k1, k2): - for key in self.KEY_ORDER: - if key == k1: - return -1 - if key == k2: - return 1 - return 0 - - def sort (self, plist): - '''sorts a propertylist to bring the high-priority keys to the beginning of the list''' - return sorted(plist, key=itemgetter(0), cmp=self.keyword_compare) - - - -# Testing -# -# >>> file = 'mitx/conf/locale/en/LC_MESSAGES/django.po' -# >>> file1 = 'mitx/conf/locale/en/LC_MESSAGES/django1.po' -# >>> po = PoFile(file) -# >>> po.write(file1) -# $ diff file file1 - diff --git a/i18n/update.py b/i18n/update.py index 8a865c2528..447dcf71d5 100755 --- a/i18n/update.py +++ b/i18n/update.py @@ -42,8 +42,8 @@ BABEL_OUT = MSGS_DIR + '/mako.po' # These are the shell commands invoked by main() COMMANDS = { 'babel_mako': 'pybabel extract -F %s -c "TRANSLATORS:" . -o %s' % (BABEL_CONFIG, BABEL_OUT), - 'make_django': 'django-admin.py makemessages --all --extension html -l en', - 'make_djangojs': 'django-admin.py makemessages --all -d djangojs --extension js -l en', + 'make_django': 'django-admin.py makemessages --all --ignore=src/* --extension html -l en', + 'make_djangojs': 'django-admin.py makemessages --all -d djangojs --ignore=src/* --extension js -l en', 'msgcat' : 'msgcat -o merged.po django.po %s' % BABEL_OUT, 'rename_django' : 'mv django.po django_old.po', 'rename_merged' : 'mv merged.po django.po', @@ -81,6 +81,15 @@ def main (): create_dir_if_necessary(LOCALE_DIR) log.info('Executing all commands from %s' % BASE_DIR) + remove_files = ['django.po', 'djangojs.po', 'nonesuch'] + for filename in remove_files: + path = MSGS_DIR + '/' + filename + log.info('Deleting file %s' % path) + if not os.path.exists(path): + log.warn("File does not exist: %s" % path) + else: + os.remove(path) + # Generate or update human-readable .po files from all source code. execute('babel_mako', log=log) execute('make_django', log=log) From 111ec62bb62f17cd3f2548ceb89926505d3c3c27 Mon Sep 17 00:00:00 2001 From: Steve Strassmann Date: Wed, 24 Apr 2013 10:42:35 -0400 Subject: [PATCH 6/8] merged from master --- cms/djangoapps/contentstore/tests/test_i18n.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/cms/djangoapps/contentstore/tests/test_i18n.py b/cms/djangoapps/contentstore/tests/test_i18n.py index c3c0b25fc3..cbfa1c6bef 100644 --- a/cms/djangoapps/contentstore/tests/test_i18n.py +++ b/cms/djangoapps/contentstore/tests/test_i18n.py @@ -42,7 +42,6 @@ class InternationalizationTest(ModuleStoreTestCase): def test_course_plain_english(self): """Test viewing the index page with no courses""" - # Create a course so there is something to view self.client = Client() self.client.login(username=self.uname, password=self.password) @@ -54,7 +53,6 @@ class InternationalizationTest(ModuleStoreTestCase): def test_course_explicit_english(self): """Test viewing the index page with no courses""" - # Create a course so there is something to view self.client = Client() self.client.login(username=self.uname, password=self.password) @@ -81,7 +79,6 @@ class InternationalizationTest(ModuleStoreTestCase): @skip def test_course_with_accents (self): """Test viewing the index page with no courses""" - # Create a course so there is something to view self.client = Client() self.client.login(username=self.uname, password=self.password) From 8ab467a9fc1098385d08b04274759263e7f802eb Mon Sep 17 00:00:00 2001 From: Steve Strassmann Date: Wed, 24 Apr 2013 11:17:02 -0400 Subject: [PATCH 7/8] add config files for PyBabel and update.py --- .gitignore | 10 ++++++++++ conf/locale/babel.cfg | 19 +++++++++++++++++++ conf/locale/config | 1 + 3 files changed, 30 insertions(+) create mode 100644 conf/locale/babel.cfg create mode 100644 conf/locale/config diff --git a/.gitignore b/.gitignore index 8fb170c30f..e7b0b16be8 100644 --- a/.gitignore +++ b/.gitignore @@ -31,3 +31,13 @@ cover_html/ chromedriver.log /nbproject ghostdriver.log +/cms/doc/en/getting_started/ +/conf/locale/en +/conf/locale/fr +create-dev-env.hack.sh +distribute-0.6.36.tar.gz +i18n/googleTranslate.hack.py +i18n/mitx/conf/locale/fr/LC_MESSAGES/django.po +i18n/split.py +.gitignore + diff --git a/conf/locale/babel.cfg b/conf/locale/babel.cfg new file mode 100644 index 0000000000..5b8333cf1e --- /dev/null +++ b/conf/locale/babel.cfg @@ -0,0 +1,19 @@ +# Extraction from Python source files +#[python: cms/**.py] +#[python: lms/**.py] +#[python: common/**.py] + +# Extraction from Javscript source files +#[javascript: cms/**.js] +#[javascript: lms/**.js] +#[javascript: common/static/js/capa/**.js] +#[javascript: common/static/js/course_groups/**.js] +# do not extract from common/static/js/vendor/** + +# Extraction from Mako templates +[mako: cms/templates/**.html] +input_encoding = utf-8 +[mako: lms/templates/**.html] +input_encoding = utf-8 +[mako: common/templates/**.html] +input_encoding = utf-8 diff --git a/conf/locale/config b/conf/locale/config new file mode 100644 index 0000000000..fe811ee02e --- /dev/null +++ b/conf/locale/config @@ -0,0 +1 @@ +{"locales" : ["en", "fr", "de"]} From dd71ae818a6bbc83992b94cccb7ed9adf9d1632f Mon Sep 17 00:00:00 2001 From: Steve Strassmann Date: Wed, 24 Apr 2013 11:33:47 -0400 Subject: [PATCH 8/8] ModuleStoreTestCase moved to new location --- cms/djangoapps/contentstore/tests/test_i18n.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cms/djangoapps/contentstore/tests/test_i18n.py b/cms/djangoapps/contentstore/tests/test_i18n.py index cbfa1c6bef..e6d68ba004 100644 --- a/cms/djangoapps/contentstore/tests/test_i18n.py +++ b/cms/djangoapps/contentstore/tests/test_i18n.py @@ -4,7 +4,7 @@ from django.core.urlresolvers import reverse from django.contrib.auth.models import User from django.test.client import Client -from .utils import ModuleStoreTestCase +from xmodule.modulestore.tests.django_utils import ModuleStoreTestCase class InternationalizationTest(ModuleStoreTestCase): """