From 6f103488360dc2e7134849817770f1971efe38d2 Mon Sep 17 00:00:00 2001
From: Steve Strassmann <sstrassmann@vmware.com>
Date: Tue, 23 Apr 2013 10:07:51 -0400
Subject: [PATCH] addressed comments from pull request

---
 .../contentstore/tests/test_i18n.py           |  28 +++-
 cms/envs/common.py                            |   2 +
 cms/static/js/base.js                         |  11 +-
 i18n/converter.py                             |  79 +++++-----
 i18n/dummy.py                                 | 136 +++++------------
 i18n/make_dummy.py                            |  23 +--
 i18n/pofile.py                                | 143 ------------------
 i18n/update.py                                |  13 +-
 8 files changed, 131 insertions(+), 304 deletions(-)
 delete mode 100644 i18n/pofile.py
diff --git a/cms/djangoapps/contentstore/tests/test_i18n.py b/cms/djangoapps/contentstore/tests/test_i18n.py
index fba2da10dd..c3c0b25fc3 100644
--- a/cms/djangoapps/contentstore/tests/test_i18n.py
+++ b/cms/djangoapps/contentstore/tests/test_i18n.py
@@ -1,12 +1,12 @@
-# -*- coding: iso-8859-1 -*-
+from unittest import skip
 
-from django.test import TestCase
 from django.core.urlresolvers import reverse
 from django.contrib.auth.models import User
 from django.test.client import Client
-from nose.tools import nottest
 
-class InternationalizationTest(TestCase):
+from .utils import ModuleStoreTestCase
+
+class InternationalizationTest(ModuleStoreTestCase):
     """
     Tests to validate Internationalization.
     """
@@ -52,6 +52,22 @@ class InternationalizationTest(TestCase):
             status_code=200,
             html=True)
 
+    def test_course_explicit_english(self):
+        """Test viewing the index page with no courses"""
+        # Create a course so there is something to view
+        self.client = Client()
+        self.client.login(username=self.uname, password=self.password)
+        
+        resp = self.client.get(reverse('index'),
+                               {},
+                               HTTP_ACCEPT_LANGUAGE='en'
+                               )
+
+        self.assertContains(resp,
+            '<h1 class="title-1">My Courses</h1>',
+            status_code=200,
+            html=True)        
+
 
     # ****
     # NOTE:
@@ -62,7 +78,7 @@ class InternationalizationTest(TestCase):
     # actual French at that time.
     
     # Test temporarily disable since it depends on creation of dummy strings
-    @nottest
+    @skip
     def test_course_with_accents (self):
         """Test viewing the index page with no courses"""
         # Create a course so there is something to view
@@ -75,7 +91,7 @@ class InternationalizationTest(TestCase):
                                )
 
         TEST_STRING = u'<h1 class="title-1">' \
-                      + u'My Çöürsés L#' \
+                      + u'My \xc7\xf6\xfcrs\xe9s L#' \
                       + u'</h1>'
         
         self.assertContains(resp,
diff --git a/cms/envs/common.py b/cms/envs/common.py
index 3cf5fe15b3..614491f50d 100644
--- a/cms/envs/common.py
+++ b/cms/envs/common.py
@@ -128,6 +128,8 @@ MIDDLEWARE_CLASSES = (
     'django.contrib.messages.middleware.MessageMiddleware',
     'track.middleware.TrackMiddleware',
     'mitxmako.middleware.MakoMiddleware',
+
+    # Detects user-requested locale from 'accept-language' header in http request
     'django.middleware.locale.LocaleMiddleware',
 
     'django.middleware.transaction.TransactionMiddleware'
diff --git a/cms/static/js/base.js b/cms/static/js/base.js
index fa48b1699e..4112d2bb8e 100644
--- a/cms/static/js/base.js
+++ b/cms/static/js/base.js
@@ -826,11 +826,14 @@ function saveSetSectionScheduleDate(e) {
         data: JSON.stringify({ 'id': id, 'metadata': {'start': start}})
     }).success(function () {
             var $thisSection = $('.courseware-section[data-id="' + id + '"]');
+  	    var format = gettext('<strong>Will Release:</strong> %(date)s at $(time)s UTC');
+            var willReleaseAt = interpolate(format, [input_date, input_time], true);
             $thisSection.find('.section-published-date').html(
-	      '<span class="published-status"><strong>' + gettext('Will Release:') +
-		'</strong> ' + input_date + ' at ' + input_time +
-		' UTC</span><a href="#" class="edit-button" data-date="' + input_date +
-		'" data-time="' + input_time + '" data-id="' + id + '">' +
+	      '<span class="published-status">' + willReleaseAt + '</span>' +
+	      '<a href="#" class="edit-button" ' +
+		'" data-date="' + input_date +
+		'" data-time="' + input_time +
+		'" data-id="' + id + '">' +
 		gettext('Edit') + '</a>');
             $thisSection.find('.section-published-date').animate({
                 'background-color': 'rgb(182,37,104)'
diff --git a/i18n/converter.py b/i18n/converter.py
index fe66ff3e74..63d8f83e00 100644
--- a/i18n/converter.py
+++ b/i18n/converter.py
@@ -1,53 +1,45 @@
-import re, itertools
-
-# Converter is an abstract class that transforms strings.
-# It hides embedded tags (HTML or Python sequences) from transformation
-#
-# To implement Converter, provide implementation for inner_convert_string()
-
+import re
+import itertools
 
 class Converter:
+    """Converter is an abstract class that transforms strings.
+       It hides embedded tags (HTML or Python sequences) from transformation
+  
+       To implement Converter, provide implementation for inner_convert_string()
 
+       Strategy:
+         1. extract tags embedded in the string
+           a. use the index of each extracted tag to re-insert it later
+           b. replace tags in string with numbers (<0>, <1>, etc.)
+           c. save extracted tags in a separate list
+         2. convert string
+         3. re-insert the extracted tags
+
+    """
+    
     # matches tags like these:
-    #     HTML:   <B>, </B>, <BR/>, <textformat leading="10">
-    #     Python: %(date)s, %(name)s
-    #
-    tag_pattern = re.compile(r'(<[-\w" .:?=/]*>)|({[^}]*})|(%\(.*\)\w)', re.I)
+    #   HTML:   <B>, </B>, <BR/>, <textformat leading="10">
+    #   Python: %(date)s, %(name)s
+    tag_pattern = re.compile(r'(<[-\w" .:?=/]*>)|({[^}]*})|(%\([^)]*\)\w)', re.I)
 
-
-    def convert (self, string):
-        if self.tag_pattern.search(string):
-            result = self.convert_tagged_string(string)
-        else:
-            result = self.inner_convert_string(string)
-        return result
-
-    # convert_tagged_string(string):
-    #    returns: a converted tagged string
-    #    param: string (contains html tags)
-    #
-    #    Don't replace characters inside tags
-    #
-    # Strategy:
-    #    1. extract tags embedded in the string
-    #      a. use the index of each extracted tag to re-insert it later
-    #      b. replace tags in string with numbers (<0>, <1>, etc.)
-    #      c. save extracted tags in a separate list
-    #    2. convert string
-    #    3. re-insert the extracted tags
-    #
-    def convert_tagged_string (self, string):
+    def convert(self, string):
+        """Returns: a converted tagged string
+           param: string (contains html tags)
+    
+           Don't replace characters inside tags
+        """
         (string, tags) = self.detag_string(string)
         string = self.inner_convert_string(string)
         string = self.retag_string(string, tags)
         return string
 
-    # extracts tags from string.
-    #
-    # returns (string, list) where
-    #   string: string has tags replaced by indices (<BR>... => <0>, <1>, <2>, etc.)
-    #   list: list of the removed tags ("<BR>", "<I>", "</I>")
-    def detag_string (self, string):
+    def detag_string(self, string):
+        """Extracts tags from string.
+        
+           returns (string, list) where
+           string: string has tags replaced by indices (<BR>... => <0>, <1>, <2>, etc.)
+           list: list of the removed tags ('<BR>', '<I>', '</I>')
+        """
         counter = itertools.count(0)
         count = lambda m: '<%s>' % counter.next()
         tags = self.tag_pattern.findall(string)
@@ -57,9 +49,8 @@ class Converter:
             raise Exception('tags dont match:'+string)
         return (new, tags)
 
-    # substitutes each tag back into string, into occurrences of <0>, <1> etc
-    #
-    def retag_string (self, string, tags):
+    def retag_string(self, string, tags):
+        """substitutes each tag back into string, into occurrences of <0>, <1> etc"""
         for (i, tag) in enumerate(tags):
             p = '<%s>' % i
             string = re.sub(p, tag, string, 1)
@@ -69,6 +60,6 @@ class Converter:
     # ------------------------------
     # Customize this in subclasses of Converter
 
-    def inner_convert_string (self, string):
+    def inner_convert_string(self, string):
         return string  # do nothing by default
 
diff --git a/i18n/dummy.py b/i18n/dummy.py
index a94d400ba0..798ee525b5 100644
--- a/i18n/dummy.py
+++ b/i18n/dummy.py
@@ -1,12 +1,6 @@
-# -*- coding: iso-8859-15 -*-
-
 from converter import Converter
 
-# This file converts string resource files.
-#   Java: file has name like messages_en.properties
-#   Flex: file has name like locales/en_US/Labels.properties
-
-# Creates new localization properties files in a dummy language (saved as 'vr', Vardebedian)
+# Creates new localization properties files in a dummy language
 # Each property file is derived from the equivalent en_US file, except
 # 1. Every vowel is replaced with an equivalent with extra accent marks
 # 2. Every string is padded out to +30% length to simulate verbose languages (e.g. German)
@@ -18,19 +12,18 @@ from converter import Converter
 # Example use:
 # >>> from dummy import Dummy
 # >>> c = Dummy()
-# >>> print c.convert("hello my name is Bond, James Bond")
-# héllö my nämé ïs Bönd, Jämés Bönd Lorem i#
+# >>> c.convert("hello my name is Bond, James Bond")
+# u'h\xe9ll\xf6 my n\xe4m\xe9 \xefs B\xf6nd, J\xe4m\xe9s B\xf6nd Lorem i#'
 #
-# >>> print c.convert('don\'t convert <a href="href">tag ids</a>')
-# dön't çönvért <a href="href">täg ïds</a> Lorem ipsu#
+# >>> c.convert('don\'t convert <a href="href">tag ids</a>')
+# u'd\xf6n\'t \xe7\xf6nv\xe9rt <a href="href">t\xe4g \xefds</a> Lorem ipsu#'
 #
-# >>> print c.convert('don\'t convert %(name)s tags on %(date)s')
-# dön't çönvért %(name)s tags on %(date)s Lorem ips#
+# >>> c.convert('don\'t convert %(name)s tags on %(date)s')
+# u"d\xf6n't \xe7\xf6nv\xe9rt %(name)s t\xe4gs \xf6n %(date)s Lorem ips#"
 
 
 # Substitute plain characters with accented lookalikes.
 # http://tlt.its.psu.edu/suggestions/international/web/codehtml.html#accent
-# print "print u'\\x%x'" % 207
 TABLE = {'A': u'\xC0',
          'a': u'\xE4',
          'b': u'\xDF',
@@ -62,23 +55,23 @@ PAD_FACTOR = 1.3
 
 
 class Dummy (Converter):
-    '''
+    """
     A string converter that generates dummy strings with fake accents
     and lorem ipsum padding.
-    '''
+    """
 
-    def convert (self, string):
+    def convert(self, string):
         result = Converter.convert(self, string)
         return self.pad(result)
 
-    def inner_convert_string (self, string):
+    def inner_convert_string(self, string):
         for (k,v) in TABLE.items():
             string = string.replace(k, v)
         return string
 
 
-    def pad (self, string):
-        '''add some lorem ipsum text to the end of string'''
+    def pad(self, string):
+        """add some lorem ipsum text to the end of string"""
         size = len(string)
         if size < 7:
             target = size*3
@@ -86,15 +79,15 @@ class Dummy (Converter):
             target = int(size*PAD_FACTOR)
         return string + self.terminate(LOREM[:(target-size)])
 
-    def terminate (self, string):
-        '''replaces the final char of string with #'''
+    def terminate(self, string):
+        """replaces the final char of string with #"""
         return string[:-1]+'#'
 
-    def init_msgs (self, msgs):
-        '''
+    def init_msgs(self, msgs):
+        """
         Make sure the first msg in msgs has a plural property.
         msgs is list of instances of pofile.Msg
-        '''
+        """
         if len(msgs)==0:
             return
         headers = msgs[0].get_property('msgstr')
@@ -105,82 +98,35 @@ class Dummy (Converter):
             headers.append(plural)
         
 
-    def convert_msg (self, msg):
-        '''
+    def convert_msg(self, msg):
+        """
         Takes one Msg object and converts it (adds a dummy translation to it)
         msg is an instance of pofile.Msg
-        '''
-        source = msg.get_property('msgid')
-        if len(source)==1 and len(source[0])==0:
+        """
+        source = msg.msgid
+        if len(source)==0:
             # don't translate empty string
             return
-        plural = msg.get_property('msgid_plural')
+
+        plural = msg.msgid_plural
         if len(plural)>0:
             # translate singular and plural
-            foreign_single = self.convert(merge(source))
-            foreign_plural = self.convert(merge(plural))
-            msg.set_property('msgstr[0]', split(foreign_single))
-            msg.set_property('msgstr[1]', split(foreign_plural))
+            foreign_single = self.convert(source)
+            foreign_plural = self.convert(plural)
+            plural = {'0': self.final_newline(source, foreign_single),
+                      '1': self.final_newline(plural, foreign_plural)}
+            msg.msgstr_plural = plural
             return
         else:
-            src_merged = merge(source)
-            foreign = self.convert(src_merged)
-            if len(source)>1:
-                # If last char is a newline, make sure translation
-                # has a newline too.
-                if src_merged[-2:]=='\\n':
-                    foreign += '\\n'
-            msg.set_property('msgstr', split(foreign))
-
-
-# ----------------------------------
-# String splitting utility functions
-
-SPLIT_SIZE = 70
-
-def merge (string_list):
-    '''returns a single string: concatenates string_list'''
-    return ''.join(string_list)
-
-# .po file format requires long strings to be broken
-# up into several shorter (<80 char) strings.
-# The first string is empty (""), which indicates
-# that more are to be read on following lines.
-
-def split (string):
-    '''
-    Returns string split into fragments of a given size.
-    If there are multiple fragments, insert "" as the first fragment.
-    '''
-    result = [chunk for chunk in chunks(string, SPLIT_SIZE)]
-    if len(result)>1:
-        result = [''] + result
-    return result
-
-def chunks(string, size):
-    '''
-    Generate fragments of a given size from string. Avoid breaking
-    the string in the middle of an escape sequence (e.g. "\n")
-    '''
-    strlen=len(string)-1
-    esc = False
-    last = 0
-    for i,char in enumerate(string):
-        if not esc and char == '\\':
-            esc = True
-            continue
-        if esc:
-            esc = False
-        if i>=last+size-1 or i==strlen:
-            chunk = string[last:i+1]
-            last = i+1
-            yield chunk
-
-# testing
-# >>> a = "abcd\\efghijklmnopqrstuvwxyz"
-# >>> SPLIT_SIZE = 5
-# >>> split(a)
-# ['abcd\\e', 'fghij', 'klmno', 'pqrst', 'uvwxy', 'z']
-# >>> merge(split(a))
-# 'abcd\\efghijklmnopqrstuvwxyz'
+            foreign = self.convert(source)
+            msg.msgstr = self.final_newline(source, foreign)
 
+    def final_newline(self, original, translated):
+        """ Returns a new translated string.
+            If last char of original is a newline, make sure translation
+            has a newline too.
+        """
+        if len(original)>1:
+            if original[-1]=='\n' and translated[-1]!='\n':
+                return translated + '\n'
+        return translated
diff --git a/i18n/make_dummy.py b/i18n/make_dummy.py
index 8bf9711c57..4ccfb0d5f1 100755
--- a/i18n/make_dummy.py
+++ b/i18n/make_dummy.py
@@ -16,7 +16,7 @@
 #    mitx/conf/locale/vr/LC_MESSAGES/django.po
 
 import os, sys
-from pofile import PoFile
+import polib
 from dummy import Dummy
 
 # Dummy language 
@@ -28,23 +28,26 @@ from dummy import Dummy
 
 OUT_LANG = 'fr'
 
-def main (file):
-    '''
+def main(file):
+    """
     Takes a source po file, reads it, and writes out a new po file
     containing a dummy translation.
-    '''
-    pofile = PoFile(file)
+    """
+    if not os.path.exists(file):
+        raise IOError('File does not exist: %s' % file)
+    pofile = polib.pofile(file)
     converter = Dummy()
-    converter.init_msgs(pofile.msgs)
-    for msg in pofile.msgs:
+    converter.init_msgs(pofile.translated_entries())
+    for msg in pofile:
         converter.convert_msg(msg)
     new_file = new_filename(file, OUT_LANG)
     create_dir_if_necessary(new_file)
-    pofile.write(new_file)
+    pofile.save(new_file)
+    
 
 
-def new_filename (original_filename, new_lang):
-    '''Returns a filename derived from original_filename, using new_lang as the locale'''    
+def new_filename(original_filename, new_lang):
+    """Returns a filename derived from original_filename, using new_lang as the locale"""
     orig_dir = os.path.dirname(original_filename)
     msgs_dir = os.path.basename(orig_dir)
     orig_file = os.path.basename(original_filename)
diff --git a/i18n/pofile.py b/i18n/pofile.py
deleted file mode 100644
index d91f76a925..0000000000
--- a/i18n/pofile.py
+++ /dev/null
@@ -1,143 +0,0 @@
-import re, codecs
-from operator import itemgetter
-
-# Django stores externalized strings in .po and .mo files.
-#  po files are human readable and contain metadata about the strings.
-#  mo files are machine readable and optimized for runtime performance.
-
-# See https://docs.djangoproject.com/en/1.3/topics/i18n/internationalization/
-# See http://www.gnu.org/software/gettext/manual/html_node/PO-Files.html
-
-# Usage:
-#   >>> pofile = PoFile('/path/to/file')
-
-
-class PoFile:
-
-    # Django requires po files to be in UTF8 with no BOM (byte order marker)
-    # see "Mind your charset" on this page:
-    #     https://docs.djangoproject.com/en/1.3/topics/i18n/localization/
-
-    ENCODING = 'utf_8'
-
-    def __init__ (self, pathname):
-        self.pathname = pathname
-        self.parse()
-
-    def parse (self):
-        with codecs.open(self.pathname, 'r', self.ENCODING) as stream:
-            text = stream.read()
-        msgs = text.split('\n\n')
-        self.msgs = [Msg.parse(m) for m in msgs]
-        return msgs
-
-    def write (self, out_pathname=None):
-        if out_pathname == None:
-            out_pathname = self.pathname
-        with codecs.open(out_pathname, 'w', self.ENCODING) as stream:
-            for msg in self.msgs:
-                msg.write(stream)
-
-class Msg:
-
-    # A PoFile is parsed into a list of Msg objects, each of which corresponds
-    # to an externalized string entry.
-
-    # Each Msg object may contain multiple comment lines, capturing metadata
-    
-    # Each Msg has a property list (self.props) with a dict of key-values.
-    # Each value is a list of strings
-    kwords = ['msgid', 'msgstr', 'msgctxt', 'msgid_plural']
-
-    # Line might begin with "msgid ..." or "msgid[2] ..."
-    pattern = re.compile('^(\w+)(\[(\d+)\])?')
-    
-    @classmethod
-    def parse (cls, string):
-        '''
-        String is a fragment of a pofile (.po) source file.
-        This returns a Msg object created by parsing string.
-        '''
-        lines = string.strip().split('\n')
-        msg = Msg()
-        msg.comments = []
-        msg.props = {}
-        last_kword = None
-        for line in lines:
-            if line[0]=='#':
-                msg.comments.append(line)
-            elif line[0]=='"' and last_kword != None:
-                msg.add_string(last_kword, line)
-            else:
-                match = cls.pattern.search(line)
-                if match:
-                    kword = match.group(1)
-                    last_kword = kword
-                    if kword in cls.kwords:
-                        if match.group(3):
-                            key = '%s[%s]' % (kword, match.group(3))
-                            msg.add_string(key, line[len(key):])
-                        else:
-                            msg.add_string(kword, line[len(kword):])
-        return msg
-    
-    def get_property (self, kword):
-        '''returns value for kword. Typically returns a list of strings'''
-        return self.props.get(kword, [])
-
-    def set_property (self, kword, value):
-        '''sets value for kword. Typically returns a list of strings'''
-        self.props[kword] = value
-
-    def add_string (self, kword, line):
-        '''Append line to the list of values stored for the property kword'''
-        props = self.props
-        value = self.get_property(kword)
-        value.append(self.cleanup_string(line))
-        self.set_property(kword, value)
-
-    def cleanup_string(self, string):
-        string = string.strip()
-        if len(string)>1 and string[0]=='"' and string[-1]=='"':
-            return string[1:-1]
-        else:
-            return string
-
-    def write (self, stream):
-        '''Write a Msg to stream'''
-        for comment in self.comments:
-            stream.write(comment)
-            stream.write('\n')
-        for (key, values) in self.sort(self.props.items()):
-            stream.write(key + ' ')
-            for value in values:
-                stream.write('"'+value+'"')
-                stream.write('\n')
-        stream.write('\n')
-
-    # Preferred ordering of key output
-    # Always print 'msgctxt' first, then 'msgid', etc.
-    KEY_ORDER = ('msgctxt', 'msgid', 'msgid_plural', 'msgstr', 'msgstr[0]', 'msgstr[1]')
-
-    def keyword_compare (self, k1, k2):
-        for key in self.KEY_ORDER:
-            if key == k1:
-                return -1
-            if key == k2:
-                return 1
-        return 0
-
-    def sort (self, plist):
-        '''sorts a propertylist to bring the high-priority keys to the beginning of the list'''
-        return sorted(plist, key=itemgetter(0), cmp=self.keyword_compare)
-
-
-
-# Testing
-#
-# >>> file  = 'mitx/conf/locale/en/LC_MESSAGES/django.po'
-# >>> file1 = 'mitx/conf/locale/en/LC_MESSAGES/django1.po'
-# >>> po = PoFile(file)
-# >>> po.write(file1)
-# $ diff file file1
-
diff --git a/i18n/update.py b/i18n/update.py
index 8a865c2528..447dcf71d5 100755
--- a/i18n/update.py
+++ b/i18n/update.py
@@ -42,8 +42,8 @@ BABEL_OUT = MSGS_DIR + '/mako.po'
 # These are the shell commands invoked by main()
 COMMANDS = {
     'babel_mako': 'pybabel extract -F %s -c "TRANSLATORS:" . -o %s' % (BABEL_CONFIG, BABEL_OUT),
-    'make_django': 'django-admin.py makemessages --all --extension html -l en',
-    'make_djangojs': 'django-admin.py makemessages --all -d djangojs --extension js -l en',
+    'make_django': 'django-admin.py makemessages --all --ignore=src/* --extension html -l en',
+    'make_djangojs': 'django-admin.py makemessages --all -d djangojs --ignore=src/* --extension js -l en',
     'msgcat' : 'msgcat -o merged.po django.po %s' % BABEL_OUT,
     'rename_django' : 'mv django.po django_old.po',
     'rename_merged' : 'mv merged.po django.po',
@@ -81,6 +81,15 @@ def main ():
     create_dir_if_necessary(LOCALE_DIR)
     log.info('Executing all commands from %s' % BASE_DIR)
 
+    remove_files = ['django.po', 'djangojs.po', 'nonesuch']
+    for filename in remove_files:
+        path = MSGS_DIR + '/' + filename
+        log.info('Deleting file %s' % path)
+        if not os.path.exists(path):
+            log.warn("File does not exist: %s" % path)
+        else:
+            os.remove(path)
+
     # Generate or update human-readable .po files from all source code.
     execute('babel_mako', log=log)
     execute('make_django', log=log)