149 lines
4.9 KiB
Python
Executable File
149 lines
4.9 KiB
Python
Executable File
#!/usr/bin/env python
|
|
"""
|
|
Segment a .po file to produce smaller files based on the locations of the
|
|
messages.
|
|
"""
|
|
|
|
import copy
|
|
import fnmatch
|
|
import logging
|
|
import sys
|
|
import argparse
|
|
import polib
|
|
import textwrap
|
|
|
|
from i18n.config import CONFIGURATION
|
|
|
|
LOG = logging.getLogger(__name__)
|
|
|
|
|
|
def segment_pofiles(locale):
|
|
"""Segment all the pofiles for `locale`.
|
|
|
|
Returns a set of filenames, all the segment files written.
|
|
|
|
"""
|
|
files_written = set()
|
|
for filename, segments in CONFIGURATION.segment.items():
|
|
filename = CONFIGURATION.get_messages_dir(locale) / filename
|
|
files_written.update(segment_pofile(filename, segments))
|
|
return files_written
|
|
|
|
|
|
def segment_pofile(filename, segments):
|
|
"""Segment a .po file using patterns in `segments`.
|
|
|
|
The .po file at `filename` is read, and the occurrence locations of its
|
|
messages are examined. `segments` is a dictionary: the keys are segment
|
|
.po filenames, the values are lists of patterns::
|
|
|
|
{
|
|
'django-studio.po': [
|
|
'cms/*',
|
|
'some-other-studio-place/*',
|
|
],
|
|
'django-weird.po': [
|
|
'*/weird_*.*',
|
|
],
|
|
}
|
|
|
|
If all a message's occurrences match the patterns for a segment, then that
|
|
message is written to the new segmented .po file.
|
|
|
|
Any message that matches no segments, or more than one, is written back to
|
|
the original file.
|
|
|
|
Arguments:
|
|
filename (path.path): a path object referring to the original .po file.
|
|
segments (dict): specification of the segments to create.
|
|
|
|
Returns:
|
|
a set of path objects, all the segment files written.
|
|
|
|
"""
|
|
reading_msg = "Reading {num} entries from {file}"
|
|
writing_msg = "Writing {num} entries to {file}"
|
|
|
|
source_po = polib.pofile(filename)
|
|
LOG.info(reading_msg.format(file=filename, num=len(source_po)))
|
|
|
|
# A new pofile just like the source, but with no messages. We'll put
|
|
# anything not segmented into this file.
|
|
remaining_po = copy.deepcopy(source_po)
|
|
remaining_po[:] = []
|
|
|
|
# Turn the segments dictionary into two structures: segment_patterns is a
|
|
# list of (pattern, segmentfile) pairs. segment_po_files is a dict mapping
|
|
# segment file names to pofile objects of their contents.
|
|
segment_po_files = {filename: remaining_po}
|
|
segment_patterns = []
|
|
for segmentfile, patterns in segments.items():
|
|
segment_po_files[segmentfile] = copy.deepcopy(remaining_po)
|
|
segment_patterns.extend((pat, segmentfile) for pat in patterns)
|
|
|
|
# Examine each message in the source file. If all of its occurrences match
|
|
# a pattern for the same segment, it goes in that segment. Otherwise, it
|
|
# goes in remaining.
|
|
for msg in source_po:
|
|
msg_segments = set()
|
|
for occ_file, _ in msg.occurrences:
|
|
for pat, segment_file in segment_patterns:
|
|
if fnmatch.fnmatch(occ_file, pat):
|
|
msg_segments.add(segment_file)
|
|
break
|
|
else:
|
|
msg_segments.add(filename)
|
|
|
|
assert msg_segments
|
|
if len(msg_segments) == 1:
|
|
# This message belongs in this segment.
|
|
segment_file = msg_segments.pop()
|
|
segment_po_files[segment_file].append(msg)
|
|
else:
|
|
# It's in more than one segment, so put it back in the main file.
|
|
remaining_po.append(msg)
|
|
|
|
# Write out the results.
|
|
files_written = set()
|
|
for segment_file, pofile in segment_po_files.items():
|
|
out_file = filename.dirname() / segment_file
|
|
if len(pofile) == 0:
|
|
LOG.error("No messages to write to {file}, did you run segment twice?".format(file=out_file))
|
|
else:
|
|
LOG.info(writing_msg.format(file=out_file, num=len(pofile)))
|
|
pofile.save(out_file)
|
|
files_written.add(out_file)
|
|
|
|
return files_written
|
|
|
|
|
|
def main(locales=None, verbosity=1): # pylint: disable=unused-argument
|
|
"""
|
|
Main entry point of script
|
|
"""
|
|
# This is used as a tool only to segment translation files when adding a
|
|
# new segment. In the regular workflow, the work is done by the extract
|
|
# phase calling the functions above.
|
|
locales = locales or []
|
|
for locale in locales:
|
|
segment_pofiles(locale)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
|
|
|
|
# pylint: disable=invalid-name
|
|
description = textwrap.dedent("""
|
|
Segment the .po files in LOCALE(s) based on the segmenting rules in
|
|
config.yaml.
|
|
|
|
Note that segmenting is *not* idempotent: it modifies the input file, so
|
|
be careful that you don't run it twice on the same file.
|
|
""".strip())
|
|
|
|
parser = argparse.ArgumentParser(description=description)
|
|
parser.add_argument("locale", nargs="+", help="a locale to segment")
|
|
parser.add_argument("--verbose", "-v", action="count", default=0)
|
|
args = parser.parse_args()
|
|
main(locales=args.locale, verbosity=args.verbose)
|