"Fossies" - the Fresh Open Source Software Archive  

Source code changes of the file "roundup/msgfmt.py" between
roundup-1.6.1.tar.gz and roundup-2.0.0.tar.gz

About: Roundup is an highly customisable issue-tracking system with command-line, web and e-mail interfaces (written in Python).

msgfmt.py  (roundup-1.6.1):msgfmt.py  (roundup-2.0.0)
#! /usr/bin/env python #! /usr/bin/env python
# -*- coding: iso-8859-1 -*- # -*- coding: iso-8859-1 -*-
# Written by Martin v. L÷wis <loewis@informatik.hu-berlin.de> # Written by Martin v. Loewis <loewis@informatik.hu-berlin.de>
# Plural forms support added by alexander smishlajev <alex@tycobka.lv> #
# Changed by Christian 'Tiran' Heimes <tiran@cheimes.de> for the placeless
# translation service (PTS) of Zope
#
# Fixed some bugs and updated to support msgctxt
# by Hanno Schlichting <hanno@hannosch.eu>
"""Generate binary message catalog from textual translation description. """Generate binary message catalog from textual translation description.
This program converts a textual Uniforum-style message catalog (.po file) into This program converts a textual Uniforum-style message catalog (.po file) into
a binary GNU catalog (.mo file). This is essentially the same function as the a binary GNU catalog (.mo file). This is essentially the same function as the
GNU msgfmt program, however, it is a simpler implementation. GNU msgfmt program, however, it is a simpler implementation.
Usage: msgfmt.py [OPTIONS] filename.po This file was taken from Python-2.3.2/Tools/i18n and altered in several ways.
Now you can simply use it from another python module:
Options: from msgfmt import Msgfmt
-o file mo = Msgfmt(po).get()
--output-file=file
Specify the output file to write to. If omitted, output will go to a where po is path to a po file as string, an opened po file ready for reading or
file named filename.mo (based off the input file name). a list of strings (readlines of a po file) and mo is the compiled mo file as
binary string.
-h
--help Exceptions:
Print this message and exit.
* IOError if the file couldn't be read
-V
--version * msgfmt.PoSyntaxError if the po file has syntax errors
Display version information and exit.
""" """
import sys
import os
import getopt
import struct
import array import array
from ast import literal_eval
import codecs
from email.parser import HeaderParser
import struct
import sys
__version__ = "1.1" PY3 = sys.version_info[0] == 3
if PY3:
MESSAGES = {} def header_charset(s):
def usage(code, msg=''): p = HeaderParser()
print >> sys.stderr, __doc__ return p.parsestr(s).get_content_charset()
if msg:
print >> sys.stderr, msg import io
sys.exit(code) BytesIO = io.BytesIO
def add(id, str, fuzzy): FILE_TYPE = io.IOBase
"Add a non-fuzzy translation to the dictionary." else:
global MESSAGES def header_charset(s):
if not fuzzy and str and not str.startswith('\0'): p = HeaderParser()
MESSAGES[id] = str return p.parsestr(s.encode('utf-8', 'ignore')).get_content_charset()
def generate():
"Return the generated output." from cStringIO import StringIO as BytesIO
global MESSAGES # file is a type defined only under python 2.
keys = MESSAGES.keys() # Flake8 when run in py3 flags this.
# the keys are sorted in the .mo file FILE_TYPE = file # noqa: 821
keys.sort()
offsets = [] class PoSyntaxError(Exception):
ids = strs = '' """ Syntax error in a po file """
for id in keys:
# For each string, we need size and file offset. Each string is NUL def __init__(self, msg):
# terminated; the NUL does not count into the size. self.msg = msg
offsets.append((len(ids), len(id), len(strs), len(MESSAGES[id])))
ids += id + '\0' def __str__(self):
strs += MESSAGES[id] + '\0' return 'Po file syntax error: %s' % self.msg
output = ''
# The header is 7 32-bit unsigned integers. We don't use hash tables, so class Msgfmt:
# the keys start right after the index tables.
# translated string. def __init__(self, po, name='unknown'):
keystart = 7*4+16*len(keys) self.po = po
# and the values start after the keys self.name = name
valuestart = keystart + len(ids) self.messages = {}
koffsets = [] self.openfile = False
voffsets = [] # Start off assuming latin-1, so everything decodes without failure,
# The string table first has the list of keys, then the list of values. # until we know the exact encoding
# Each entry has first the size of the string, then the file offset. self.encoding = 'latin-1'
for o1, l1, o2, l2 in offsets:
koffsets += [l1, o1+keystart] def readPoData(self):
voffsets += [l2, o2+valuestart] """ read po data from self.po and return an iterator """
offsets = koffsets + voffsets output = []
output = struct.pack("Iiiiiii", if isinstance(self.po, str):
0x950412deL, # Magic output = open(self.po, 'rb')
0, # Version elif isinstance(self.po, FILE_TYPE):
len(keys), # # of entries self.po.seek(0)
7*4, # start of key index self.openfile = True
7*4+len(keys)*8, # start of value index output = self.po
0, 0) # size and offset of hash table elif isinstance(self.po, list):
output += array.array("i", offsets).tostring() output = self.po
output += ids if not output:
output += strs raise ValueError("self.po is invalid! %s" % type(self.po))
return output if isinstance(output, FILE_TYPE):
def make(filename, outfile): # remove BOM from the start of the parsed input
ID = 1 first = output.readline()
STR = 2 if len(first) == 0:
global MESSAGES return output.readlines()
MESSAGES = {} if first.startswith(codecs.BOM_UTF8):
first = first.lstrip(codecs.BOM_UTF8)
msgid = None return [first] + output.readlines()
msgstr = None return output
# Compute .mo name from .po name and arguments def add(self, context, id, string, fuzzy):
if filename.endswith('.po'): "Add a non-empty and non-fuzzy translation to the dictionary."
infile = filename if string and not fuzzy:
else: # The context is put before the id and separated by a EOT char.
infile = filename + '.po' if context:
if outfile is None: id = context + u'\x04' + id
outfile = os.path.splitext(infile)[0] + '.mo' if not id:
# See whether there is an encoding declaration
try: charset = header_charset(string)
lines = open(infile).readlines() if charset:
except IOError as msg: # decode header in proper encoding
print >> sys.stderr, msg string = string.encode(self.encoding).decode(charset)
sys.exit(1) if not PY3:
# undo damage done by literal_eval in Python 2.x
# remove UTF-8 Byte Order Mark, if any. string = string.encode(self.encoding).decode(charset)
# (UCS2 BOMs are not handled because messages in UCS2 cannot be handled) self.encoding = charset
if lines[0].startswith('\xEF\xBB\xBF'): self.messages[id] = string
lines[0] = lines[0][3:]
def generate(self):
section = None "Return the generated output."
fuzzy = 0 # the keys are sorted in the .mo file
keys = sorted(self.messages.keys())
# Parse the catalog offsets = []
lno = 0 ids = strs = b''
for l in lines: for id in keys:
lno += 1 msg = self.messages[id].encode(self.encoding)
# If we get a comment line after a msgstr, this is a new entry id = id.encode(self.encoding)
if l[0] == '#' and section == STR: # For each string, we need size and file offset. Each string is
add(msgid, msgstr, fuzzy) # NUL terminated; the NUL does not count into the size.
section = None offsets.append((len(ids), len(id), len(strs),
fuzzy = 0 len(msg)))
# Record a fuzzy mark ids += id + b'\0'
if l[:2] == '#,' and (l.find('fuzzy') >= 0): strs += msg + b'\0'
fuzzy = 1 output = b''
# Skip comments # The header is 7 32-bit unsigned integers. We don't use hash tables,
if l[0] == '#': # so the keys start right after the index tables.
continue keystart = 7 * 4 + 16 * len(keys)
# Start of msgid_plural section, separate from singular form with \0 # and the values start after the keys
if l.startswith('msgid_plural'): valuestart = keystart + len(ids)
msgid += '\0' koffsets = []
l = l[12:] voffsets = []
# Now we are in a msgid section, output previous section # The string table first has the list of keys, then the list of values.
elif l.startswith('msgid'): # Each entry has first the size of the string, then the file offset.
if section == STR: for o1, l1, o2, l2 in offsets:
add(msgid, msgstr, fuzzy) koffsets += [l1, o1 + keystart]
section = ID voffsets += [l2, o2 + valuestart]
l = l[5:] offsets = koffsets + voffsets
msgid = msgstr = '' # Even though we don't use a hashtable, we still set its offset to be
# Now we are in a msgstr section # binary compatible with the gnu gettext format produced by:
elif l.startswith('msgstr'): # msgfmt file.po --no-hash
section = STR output = struct.pack("Iiiiiii",
l = l[6:] 0x950412de, # Magic
# Check for plural forms 0, # Version
if l.startswith('['): len(keys), # # of entries
# Separate plural forms with \0 7 * 4, # start of key index
if not l.startswith('[0]'): 7 * 4 + len(keys) * 8, # start of value index
msgstr += '\0' 0, keystart) # size and offset of hash table
# Ignore the index - must come in sequence if PY3:
l = l[l.index(']') + 1:] output += array.array("i", offsets).tobytes()
# Skip empty lines
l = l.strip()
if not l:
continue
# XXX: Does this always follow Python escape semantics?
l = eval(l)
if section == ID:
msgid += l
elif section == STR:
msgstr += l
else: else:
print >> sys.stderr, 'Syntax error on %s:%d' % (infile, lno), \ output += array.array("i", offsets).tostring()
'before:' output += ids
print >> sys.stderr, l output += strs
sys.exit(1) return output
# Add last entry
if section == STR: def get(self):
add(msgid, msgstr, fuzzy) """ """
self.read()
# Compute output # Compute output
output = generate() return self.generate()
try: def read(self, header_only=False):
open(outfile,"wb").write(output) """ """
except IOError as msg: ID = 1
print >> sys.stderr, msg STR = 2
def main(): CTXT = 3
try:
opts, args = getopt.getopt(sys.argv[1:], 'hVo:', section = None
['help', 'version', 'output-file=']) fuzzy = 0
except getopt.error as msg: msgid = msgstr = msgctxt = u''
usage(1, msg)
# Parse the catalog
outfile = None lno = 0
# parse options for l in self.readPoData():
for opt, arg in opts: l = l.decode(self.encoding)
if opt in ('-h', '--help'): lno += 1
usage(0) # If we get a comment line after a msgstr or a line starting with
elif opt in ('-V', '--version'): # msgid or msgctxt, this is a new entry
print >> sys.stderr, "msgfmt.py", __version__ if section == STR and (l[0] == '#' or (l[0] == 'm' and
sys.exit(0) (l.startswith('msgctxt') or l.startswith('msgid')))):
elif opt in ('-o', '--output-file'): self.add(msgctxt, msgid, msgstr, fuzzy)
outfile = arg section = None
# do it fuzzy = 0
if not args: # If we only want the header we stop after the first message
print >> sys.stderr, 'No input file given' if header_only:
print >> sys.stderr, "Try `msgfmt --help' for more information." break
return # Record a fuzzy mark
if l[:2] == '#,' and 'fuzzy' in l:
for filename in args: fuzzy = 1
make(filename, outfile) # Skip comments
if l[0] == '#':
continue
# Now we are in a msgctxt section
if l.startswith('msgctxt'):
section = CTXT
l = l[7:]
msgctxt = u''
# Now we are in a msgid section, output previous section
elif (l.startswith('msgid') and
not l.startswith('msgid_plural')):
if section == STR:
self.add(msgid, msgstr, fuzzy)
section = ID
l = l[5:]
msgid = msgstr = u''
is_plural = False
# This is a message with plural forms
elif l.startswith('msgid_plural'):
if section != ID:
raise PoSyntaxError(
'msgid_plural not preceeded by '
'msgid on line %d of po file %s' %
(lno, repr(self.name)))
l = l[12:]
msgid += u'\0' # separator of singular and plural
is_plural = True
# Now we are in a msgstr section
elif l.startswith('msgstr'):
section = STR
if l.startswith('msgstr['):
if not is_plural:
raise PoSyntaxError(
'plural without msgid_plural '
'on line %d of po file %s' %
(lno, repr(self.name)))
l = l.split(']', 1)[1]
if msgstr:
# Separator of the various plural forms
msgstr += u'\0'
else:
if is_plural:
raise PoSyntaxError(
'indexed msgstr required for '
'plural on line %d of po file %s' %
(lno, repr(self.name)))
l = l[6:]
# Skip empty lines
l = l.strip()
if not l:
continue
# TODO: Does this always follow Python escape semantics?
try:
l = literal_eval(l)
except Exception as msg:
raise PoSyntaxError(
'%s (line %d of po file %s): \n%s' %
(msg, lno, repr(self.name), l))
if isinstance(l, bytes):
l = l.decode(self.encoding)
if section == CTXT:
msgctxt += l
elif section == ID:
msgid += l
elif section == STR:
msgstr += l
else:
raise PoSyntaxError(
'error on line %d of po file %s' %
(lno, repr(self.name)))
# Add last entry
if section == STR:
self.add(msgctxt, msgid, msgstr, fuzzy)
if __name__ == '__main__': if self.openfile:
main() self.po.close()
# vim: set et sts=4 sw=4 : def getAsFile(self):
return BytesIO(self.get())
 End of changes. 10 change blocks. 
194 lines changed or deleted 263 lines changed or added

Home  |  About  |  Features  |  All  |  Newest  |  Dox  |  Diffs  |  RSS Feeds  |  Screenshots  |  Comments  |  Imprint  |  Privacy  |  HTTP(S)