1 #! /usr/bin/env python3
2 # Written by Martin v. Löwis <loewis@informatik.hu-berlin.de>
3
4 """Generate binary message catalog from textual translation description.
5
6 This program converts a textual Uniforum-style message catalog (.po file) into
7 a binary GNU catalog (.mo file). This is essentially the same function as the
8 GNU msgfmt program, however, it is a simpler implementation. Currently it
9 does not handle plural forms but it does handle message contexts.
10
11 Usage: msgfmt.py [OPTIONS] filename.po
12
13 Options:
14 -o file
15 --output-file=file
16 Specify the output file to write to. If omitted, output will go to a
17 file named filename.mo (based off the input file name).
18
19 -h
20 --help
21 Print this message and exit.
22
23 -V
24 --version
25 Display version information and exit.
26 """
27
28 import os
29 import sys
30 import ast
31 import getopt
32 import struct
33 import array
34 from email.parser import HeaderParser
35
36 __version__ = "1.2"
37
38 MESSAGES = {}
39
40
41 def usage(code, msg=''):
42 print(__doc__, file=sys.stderr)
43 if msg:
44 print(msg, file=sys.stderr)
45 sys.exit(code)
46
47
48 def add(ctxt, id, str, fuzzy):
49 "Add a non-fuzzy translation to the dictionary."
50 global MESSAGES
51 if not fuzzy and str:
52 if ctxt is None:
53 MESSAGES[id] = str
54 else:
55 MESSAGES[b"%b\x04%b" % (ctxt, id)] = str
56
57
58 def generate():
59 "Return the generated output."
60 global MESSAGES
61 # the keys are sorted in the .mo file
62 keys = sorted(MESSAGES.keys())
63 offsets = []
64 ids = strs = b''
65 for id in keys:
66 # For each string, we need size and file offset. Each string is NUL
67 # terminated; the NUL does not count into the size.
68 offsets.append((len(ids), len(id), len(strs), len(MESSAGES[id])))
69 ids += id + b'\0'
70 strs += MESSAGES[id] + b'\0'
71 output = ''
72 # The header is 7 32-bit unsigned integers. We don't use hash tables, so
73 # the keys start right after the index tables.
74 # translated string.
75 keystart = 7*4+16*len(keys)
76 # and the values start after the keys
77 valuestart = keystart + len(ids)
78 koffsets = []
79 voffsets = []
80 # The string table first has the list of keys, then the list of values.
81 # Each entry has first the size of the string, then the file offset.
82 for o1, l1, o2, l2 in offsets:
83 koffsets += [l1, o1+keystart]
84 voffsets += [l2, o2+valuestart]
85 offsets = koffsets + voffsets
86 output = struct.pack("Iiiiiii",
87 0x950412de, # Magic
88 0, # Version
89 len(keys), # # of entries
90 7*4, # start of key index
91 7*4+len(keys)*8, # start of value index
92 0, 0) # size and offset of hash table
93 output += array.array("i", offsets).tobytes()
94 output += ids
95 output += strs
96 return output
97
98
99 def make(filename, outfile):
100 ID = 1
101 STR = 2
102 CTXT = 3
103
104 # Compute .mo name from .po name and arguments
105 if filename.endswith('.po'):
106 infile = filename
107 else:
108 infile = filename + '.po'
109 if outfile is None:
110 outfile = os.path.splitext(infile)[0] + '.mo'
111
112 try:
113 with open(infile, 'rb') as f:
114 lines = f.readlines()
115 except IOError as msg:
116 print(msg, file=sys.stderr)
117 sys.exit(1)
118
119 section = msgctxt = None
120 fuzzy = 0
121
122 # Start off assuming Latin-1, so everything decodes without failure,
123 # until we know the exact encoding
124 encoding = 'latin-1'
125
126 # Parse the catalog
127 lno = 0
128 for l in lines:
129 l = l.decode(encoding)
130 lno += 1
131 # If we get a comment line after a msgstr, this is a new entry
132 if l[0] == '#' and section == STR:
133 add(msgctxt, msgid, msgstr, fuzzy)
134 section = msgctxt = None
135 fuzzy = 0
136 # Record a fuzzy mark
137 if l[:2] == '#,' and 'fuzzy' in l:
138 fuzzy = 1
139 # Skip comments
140 if l[0] == '#':
141 continue
142 # Now we are in a msgid or msgctxt section, output previous section
143 if l.startswith('msgctxt'):
144 if section == STR:
145 add(msgctxt, msgid, msgstr, fuzzy)
146 section = CTXT
147 l = l[7:]
148 msgctxt = b''
149 elif l.startswith('msgid') and not l.startswith('msgid_plural'):
150 if section == STR:
151 add(msgctxt, msgid, msgstr, fuzzy)
152 if not msgid:
153 # See whether there is an encoding declaration
154 p = HeaderParser()
155 charset = p.parsestr(msgstr.decode(encoding)).get_content_charset()
156 if charset:
157 encoding = charset
158 section = ID
159 l = l[5:]
160 msgid = msgstr = b''
161 is_plural = False
162 # This is a message with plural forms
163 elif l.startswith('msgid_plural'):
164 if section != ID:
165 print('msgid_plural not preceded by msgid on %s:%d' % (infile, lno),
166 file=sys.stderr)
167 sys.exit(1)
168 l = l[12:]
169 msgid += b'\0' # separator of singular and plural
170 is_plural = True
171 # Now we are in a msgstr section
172 elif l.startswith('msgstr'):
173 section = STR
174 if l.startswith('msgstr['):
175 if not is_plural:
176 print('plural without msgid_plural on %s:%d' % (infile, lno),
177 file=sys.stderr)
178 sys.exit(1)
179 l = l.split(']', 1)[1]
180 if msgstr:
181 msgstr += b'\0' # Separator of the various plural forms
182 else:
183 if is_plural:
184 print('indexed msgstr required for plural on %s:%d' % (infile, lno),
185 file=sys.stderr)
186 sys.exit(1)
187 l = l[6:]
188 # Skip empty lines
189 l = l.strip()
190 if not l:
191 continue
192 l = ast.literal_eval(l)
193 if section == CTXT:
194 msgctxt += l.encode(encoding)
195 elif section == ID:
196 msgid += l.encode(encoding)
197 elif section == STR:
198 msgstr += l.encode(encoding)
199 else:
200 print('Syntax error on %s:%d' % (infile, lno), \
201 'before:', file=sys.stderr)
202 print(l, file=sys.stderr)
203 sys.exit(1)
204 # Add last entry
205 if section == STR:
206 add(msgctxt, msgid, msgstr, fuzzy)
207
208 # Compute output
209 output = generate()
210
211 try:
212 with open(outfile,"wb") as f:
213 f.write(output)
214 except IOError as msg:
215 print(msg, file=sys.stderr)
216
217
218 def main():
219 try:
220 opts, args = getopt.getopt(sys.argv[1:], 'hVo:',
221 ['help', 'version', 'output-file='])
222 except getopt.error as msg:
223 usage(1, msg)
224
225 outfile = None
226 # parse options
227 for opt, arg in opts:
228 if opt in ('-h', '--help'):
229 usage(0)
230 elif opt in ('-V', '--version'):
231 print("msgfmt.py", __version__)
232 sys.exit(0)
233 elif opt in ('-o', '--output-file'):
234 outfile = arg
235 # do it
236 if not args:
237 print('No input file given', file=sys.stderr)
238 print("Try `msgfmt --help' for more information.", file=sys.stderr)
239 return
240
241 for filename in args:
242 make(filename, outfile)
243
244
245 if __name__ == '__main__':
246 main()