diff --git a/.tx/config b/.tx/config index 9288418924..fd12506f17 100644 --- a/.tx/config +++ b/.tx/config @@ -7,18 +7,36 @@ source_file = conf/locale/en/LC_MESSAGES/django-partial.po source_lang = en type = PO +[edx-platform.django-studio] +file_filter = conf/locale//LC_MESSAGES/django-studio.po +source_file = conf/locale/en/LC_MESSAGES/django-studio.po +source_lang = en +type = PO + [edx-platform.djangojs] file_filter = conf/locale//LC_MESSAGES/djangojs.po source_file = conf/locale/en/LC_MESSAGES/djangojs.po source_lang = en type = PO +[edx-platform.djangojs-studio] +file_filter = conf/locale//LC_MESSAGES/djangojs-studio.po +source_file = conf/locale/en/LC_MESSAGES/djangojs-studio.po +source_lang = en +type = PO + [edx-platform.mako] file_filter = conf/locale//LC_MESSAGES/mako.po source_file = conf/locale/en/LC_MESSAGES/mako.po source_lang = en type = PO +[edx-platform.mako-studio] +file_filter = conf/locale//LC_MESSAGES/mako-studio.po +source_file = conf/locale/en/LC_MESSAGES/mako-studio.po +source_lang = en +type = PO + [edx-platform.messages] file_filter = conf/locale//LC_MESSAGES/messages.po source_file = conf/locale/en/LC_MESSAGES/messages.po diff --git a/conf/locale/config.yaml b/conf/locale/config.yaml index a80bbca297..954966ef3e 100644 --- a/conf/locale/config.yaml +++ b/conf/locale/config.yaml @@ -31,3 +31,22 @@ locales: # The locale used for fake-accented English, for testing. dummy-locale: eo + +# How should .po files be segmented? See i18n/segment.py for details. Strings +# that are only found in a particular segment are segregated into that .po file +# so that translators can focus on separate parts of the product. +# +# We segregate Studio so we can provide new languages for LMS without having to +# also translate the Studio strings. LMS needs the strings from lms/* and +# common/*, so those will stay in the main .po file. +segment: + django-partial.po: # This .po file.. + django-studio.po: # produces this .po file.. + - cms/* # by segregating strings from these files. + # Anything that doesn't match a pattern stays in the original file. + djangojs.po: + djangojs-studio.po: + - cms/* + mako.po: + mako-studio.po: + - cms/* diff --git a/i18n/extract.py b/i18n/extract.py index b3c59ec554..1d7fa7ea5f 100755 --- a/i18n/extract.py +++ b/i18n/extract.py @@ -31,7 +31,7 @@ BABEL_CONFIG = BASE_DIR.relpathto(LOCALE_DIR.joinpath('babel.cfg')) # Use relpath to reduce noise in logs BABEL_OUT = BASE_DIR.relpathto(CONFIGURATION.source_messages_dir.joinpath('mako.po')) -SOURCE_WARN = 'This English source file is machine-generated. Do not check it into github' +SOURCE_WARN = 'This English source file is machine-generated. Do not check it into git.' LOG = logging.getLogger(__name__) diff --git a/i18n/segment.py b/i18n/segment.py new file mode 100755 index 0000000000..8420694eae --- /dev/null +++ b/i18n/segment.py @@ -0,0 +1,145 @@ +#!/usr/bin/env python +""" +Segment a .po file to produce smaller files based on the locations of the +messages. +""" + +import copy +import fnmatch +import logging +import sys + +import polib + +from i18n.config import CONFIGURATION + +LOG = logging.getLogger(__name__) + + +def segment_pofiles(locale): + """Segment all the pofiles for `locale`. + + Returns a set of filenames, all the segment files written. + + """ + files_written = set() + for filename, segments in CONFIGURATION.segment.items(): + filename = CONFIGURATION.get_messages_dir(locale) / filename + files_written.update(segment_pofile(filename, segments)) + return files_written + + +def segment_pofile(filename, segments): + """Segment a .po file using patterns in `segments`. + + The .po file at `filename` is read, and the occurrence locations of its + messages are examined. `segments` is a dictionary: the keys are segment + .po filenames, the values are lists of patterns:: + + { + 'django-studio.po': [ + 'cms/*', + 'some-other-studio-place/*', + ], + 'django-weird.po': [ + '*/weird_*.*', + ], + } + + If all a message's occurrences match the patterns for a segment, then that + message is written to the new segmented .po file. + + Any message that matches no segments, or more than one, is written back to + the original file. + + Arguments: + filename (path.path): a path object referring to the original .po file. + segments (dict): specification of the segments to create. + + Returns: + a set of path objects, all the segment files written. + + """ + reading_msg = "Reading {num} entries from {file}" + writing_msg = "Writing {num} entries to {file}" + + source_po = polib.pofile(filename) + LOG.info(reading_msg.format(file=filename, num=len(source_po))) + + # A new pofile just like the source, but with no messages. We'll put + # anything not segmented into this file. + remaining_po = copy.deepcopy(source_po) + remaining_po[:] = [] + + # Turn the segments dictionary into two structures: segment_patterns is a + # list of (pattern, segmentfile) pairs. segment_po_files is a dict mapping + # segment file names to pofile objects of their contents. + segment_po_files = {filename: remaining_po} + segment_patterns = [] + for segmentfile, patterns in segments.items(): + segment_po_files[segmentfile] = copy.deepcopy(remaining_po) + segment_patterns.extend((pat, segmentfile) for pat in patterns) + + # Examine each message in the source file. If all of its occurrences match + # a pattern for the same segment, it goes in that segment. Otherwise, it + # goes in remaining. + for msg in source_po: + msg_segments = set() + for occ_file, _ in msg.occurrences: + for pat, segment_file in segment_patterns: + if fnmatch.fnmatch(occ_file, pat): + msg_segments.add(segment_file) + break + else: + msg_segments.add(filename) + + if len(msg_segments) == 1: + # This message belongs in this segment. + segment_file = msg_segments.pop() + segment_po_files[segment_file].append(msg) + else: + # Either it's in more than one segment, or none, so put it back in + # the main file. + remaining_po.append(msg) + + # Write out the results. + files_written = set() + for segment_file, pofile in segment_po_files.items(): + out_file = filename.dirname() / segment_file + if len(pofile) == 0: + LOG.error("No messages to write to {file}, did you run segment twice?".format(file=out_file)) + else: + LOG.info(writing_msg.format(file=out_file, num=len(pofile))) + pofile.save(out_file) + files_written.add(out_file) + + LOG.info(writing_msg.format(file=filename, num=len(remaining_po))) + remaining_po.save(filename) + + return files_written + + +def main(argv): + """ + $ segment.py LOCALE [...] + + Segment the .po files in LOCALE(s) based on the segmenting rules in + config.yaml. + + Note that segmenting is *not* idempotent: it modifies the input file, so + be careful that you don't run it twice on the same file. + + """ + # This is used as a tool only to segment translation files when adding a + # new segment. In the regular workflow, the work is done by the extract + # phase calling the functions above. + + logging.basicConfig(stream=sys.stdout, level=logging.INFO) + if len(argv) < 2: + sys.exit("Need a locale to segment") + for locale in argv[1:]: + segment_pofiles(locale) + + +if __name__ == "__main__": + main(sys.argv) diff --git a/i18n/tests/data/django_after.po b/i18n/tests/data/django_after.po new file mode 100644 index 0000000000..f1d7ae6425 --- /dev/null +++ b/i18n/tests/data/django_after.po @@ -0,0 +1,37 @@ +# This is test data. +# +msgid "" +msgstr "" +"Project-Id-Version: 0.1a\n" +"Report-Msgid-Bugs-To: openedx-translation@googlegroups.com\n" +"POT-Creation-Date: 2014-01-22 15:35-0500\n" +"PO-Revision-Date: 2014-01-22 20:35:52.096456\n" +"Last-Translator: \n" +"Language-Team: openedx-translation \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Language: en\n" + +#: cms/djangoapps/contentstore/views/tabs.py:39 +#: lms/djangoapps/instructor/views/instructor_dashboard.py:111 +msgid "Course Info" +msgstr "stuff about the course" + +#: common/djangoapps/course_modes/models.py:43 +msgid "Honor Code Certificate" +msgstr "your paper" + +#: common/djangoapps/course_modes/views.py:81 +#: common/djangoapps/student/views.py:478 +msgid "Enrollment is closed" +msgstr "no way, dude" + +#: common/static/js/vendor/mathjax-MathJax-c9db6ac/docs/source/mjtheme/layout.html:129 +#: lms/templates/wiki/plugins/attachments/index.html:40 +msgid "Search" +msgstr "find it!" + +#: lms/djangoapps/courseware/features/video.py:111 +msgid "ERROR: No playable video sources found!" +msgstr "try youtube, dude!" diff --git a/i18n/tests/data/django_before.po b/i18n/tests/data/django_before.po new file mode 100644 index 0000000000..9e508547ad --- /dev/null +++ b/i18n/tests/data/django_before.po @@ -0,0 +1,52 @@ +# This is test data. +# +msgid "" +msgstr "" +"Project-Id-Version: 0.1a\n" +"Report-Msgid-Bugs-To: openedx-translation@googlegroups.com\n" +"POT-Creation-Date: 2014-01-22 15:35-0500\n" +"PO-Revision-Date: 2014-01-22 20:35:52.096456\n" +"Last-Translator: \n" +"Language-Team: openedx-translation \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Language: en\n" + +#: cms/djangoapps/contentstore/views/tabs.py:39 +#: lms/djangoapps/instructor/views/instructor_dashboard.py:111 +msgid "Course Info" +msgstr "stuff about the course" + +#: common/djangoapps/course_modes/models.py:43 +msgid "Honor Code Certificate" +msgstr "your paper" + +#: common/djangoapps/course_modes/views.py:81 +#: common/djangoapps/student/views.py:478 +msgid "Enrollment is closed" +msgstr "no way, dude" + +#: cms/djangoapps/contentstore/views/course.py:237 +msgid "" +"There is already a course defined with the same organization, course number," +" and course run. Please change either organization or course number to be " +"unique." +msgstr "org/course/run, wtf??" + +#: cms/djangoapps/contentstore/views/course.py:243 +#: cms/djangoapps/contentstore/views/course.py:247 +#: other_cms/djangoapps/contentstore/views/course.py:269 +#: cms/djangoapps/contentstore/views/course.py:272 +msgid "" +"Please change either the organization or course number so that it is unique." +msgstr "pick again!" + +#: common/static/js/vendor/mathjax-MathJax-c9db6ac/docs/source/mjtheme/layout.html:129 +#: lms/templates/wiki/plugins/attachments/index.html:40 +msgid "Search" +msgstr "find it!" + +#: lms/djangoapps/courseware/features/video.py:111 +msgid "ERROR: No playable video sources found!" +msgstr "try youtube, dude!" diff --git a/i18n/tests/data/studio.po b/i18n/tests/data/studio.po new file mode 100644 index 0000000000..33fabc380c --- /dev/null +++ b/i18n/tests/data/studio.po @@ -0,0 +1,29 @@ +# This is test data. +# +msgid "" +msgstr "" +"Project-Id-Version: 0.1a\n" +"Report-Msgid-Bugs-To: openedx-translation@googlegroups.com\n" +"POT-Creation-Date: 2014-01-22 15:35-0500\n" +"PO-Revision-Date: 2014-01-22 20:35:52.096456\n" +"Last-Translator: \n" +"Language-Team: openedx-translation \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Language: en\n" + +#: cms/djangoapps/contentstore/views/course.py:237 +msgid "" +"There is already a course defined with the same organization, course number," +" and course run. Please change either organization or course number to be " +"unique." +msgstr "org/course/run, wtf??" + +#: cms/djangoapps/contentstore/views/course.py:243 +#: cms/djangoapps/contentstore/views/course.py:247 +#: other_cms/djangoapps/contentstore/views/course.py:269 +#: cms/djangoapps/contentstore/views/course.py:272 +msgid "" +"Please change either the organization or course number so that it is unique." +msgstr "pick again!" diff --git a/i18n/tests/test_segment.py b/i18n/tests/test_segment.py new file mode 100644 index 0000000000..079a74ff05 --- /dev/null +++ b/i18n/tests/test_segment.py @@ -0,0 +1,58 @@ +"""Test i18n/segment.py""" + +import os.path +import shutil +import unittest + +from path import path +import polib + +from i18n.segment import segment_pofile + + +HERE = path(__file__).dirname() +TEST_DATA = HERE / "data" +WORK = HERE / "work" + + +class SegmentTest(unittest.TestCase): + """Test segment_pofile.""" + + def setUp(self): + if not os.path.exists(WORK): + os.mkdir(WORK) + self.addCleanup(shutil.rmtree, WORK) + + def assert_pofile_same(self, pofile1, pofile2): + """The paths `p1` and `p2` should be identical pofiles.""" + po1 = polib.pofile(pofile1) + po2 = polib.pofile(pofile2) + self.assertEqual(po1, po2) + + def test_sample_data(self): + work_file = WORK / "django.po" + shutil.copyfile(TEST_DATA / "django_before.po", work_file) + original_pofile = polib.pofile(work_file) + + written = segment_pofile( + work_file, + { + 'studio.po': [ + 'cms/*', + 'other_cms/*', + ], + } + ) + + self.assertEqual(written, set([WORK / "django.po", WORK / "studio.po"])) + + pofiles = [polib.pofile(f) for f in written] + after_entries = sum(len(pofile) for pofile in pofiles) + self.assertEqual(len(original_pofile), after_entries) + + original_ids = set(m.msgid for m in original_pofile) + after_ids = set(m.msgid for pofile in pofiles for m in pofile) + self.assertEqual(original_ids, after_ids) + + self.assert_pofile_same(WORK / "django.po", TEST_DATA / "django_after.po") + self.assert_pofile_same(WORK / "studio.po", TEST_DATA / "studio.po")