1 /* Writing binary .mo files.
2 Copyright (C) 1995-1998, 2000-2007, 2016, 2020, 2023 Free Software Foundation, Inc.
3 Written by Ulrich Drepper <drepper@gnu.ai.mit.edu>, April 1995.
4
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17
18 #ifdef HAVE_CONFIG_H
19 # include <config.h>
20 #endif
21 #include <alloca.h>
22
23 /* Specification. */
24 #include "write-mo.h"
25
26 #include <errno.h>
27 #include <stdbool.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
31
32 #if HAVE_SYS_PARAM_H
33 # include <sys/param.h>
34 #endif
35
36 /* These two include files describe the binary .mo format. */
37 #include "gmo.h"
38 #include "hash-string.h"
39
40 #include "byteswap.h"
41 #include "error.h"
42 #include "mem-hash-map.h"
43 #include "message.h"
44 #include "format.h"
45 #include "xsize.h"
46 #include "xalloc.h"
47 #include "xmalloca.h"
48 #include "po-charset.h"
49 #include "msgl-iconv.h"
50 #include "msgl-header.h"
51 #include "binary-io.h"
52 #include "supersede.h"
53 #include "fwriteerror.h"
54 #include "gettext.h"
55
56 #define _(str) gettext (str)
57
58 #define freea(p) /* nothing */
59
60 /* Usually defined in <sys/param.h>. */
61 #ifndef roundup
62 # if defined __GNUC__ && __GNUC__ >= 2
63 # define roundup(x, y) ({typeof(x) _x = (x); typeof(y) _y = (y); \
64 ((_x + _y - 1) / _y) * _y; })
65 # else
66 # define roundup(x, y) ((((x)+((y)-1))/(y))*(y))
67 # endif /* GNU CC2 */
68 #endif /* roundup */
69
70 #define SIZEOF(a) (sizeof(a) / sizeof(a[0]))
71
72
73 /* True if no conversion to UTF-8 is desired. */
74 bool no_convert_to_utf8;
75
76 /* True if the redundant storage of instantiations of system-dependent strings
77 shall be avoided. */
78 bool no_redundancy;
79
80 /* Alignment of strings in resulting .mo file. */
81 size_t alignment;
82
83 /* True if writing a .mo file in opposite endianness than the host. */
84 bool byteswap;
85
86 /* True if no hash table in .mo is wanted. */
87 bool no_hash_table;
88
89
90 /* Destructively changes the byte order of a 32-bit value in memory. */
91 #define BSWAP32(x) (x) = bswap_32 (x)
92
93
94 /* Indices into the strings contained in 'struct pre_message' and
95 'struct pre_sysdep_message'. */
96 enum
97 {
98 M_ID = 0, /* msgid - the original string */
99 M_STR = 1 /* msgstr - the translated string */
100 };
101
102 /* An intermediate data structure representing a 'struct string_desc'. */
103 struct pre_string
104 {
105 size_t length;
106 const char *pointer;
107 };
108
109 /* An intermediate data structure representing a message. */
110 struct pre_message
111 {
112 struct pre_string str[2];
113 const char *id_plural;
114 size_t id_plural_len;
115 };
116
117 static int
118 compare_id (const void *pval1, const void *pval2)
119 {
120 return strcmp (((const struct pre_message *) pval1)->str[M_ID].pointer,
121 ((const struct pre_message *) pval2)->str[M_ID].pointer);
122 }
123
124
125 /* An intermediate data structure representing a 'struct sysdep_segment'. */
126 struct pre_sysdep_segment
127 {
128 size_t length;
129 const char *pointer;
130 };
131
132 /* An intermediate data structure representing a 'struct segment_pair'. */
133 struct pre_segment_pair
134 {
135 size_t segsize;
136 const char *segptr;
137 size_t sysdepref;
138 };
139
140 /* An intermediate data structure representing a 'struct sysdep_string'. */
141 struct pre_sysdep_string
142 {
143 unsigned int segmentcount;
144 struct pre_segment_pair segments[1];
145 };
146
147 /* An intermediate data structure representing a message with system dependent
148 strings. */
149 struct pre_sysdep_message
150 {
151 struct pre_sysdep_string *str[2];
152 const char *id_plural;
153 size_t id_plural_len;
154 };
155
156
157 /* Instantiating system dependent strings.
158 This is a technique to make messages with system dependent strings work with
159 musl libc's gettext() implementation, even though this implementation does
160 not process the system dependent strings. Namely, we store the actual
161 runtime expansion of the string for this platform — we call this an
162 "instantiation" of the string — in the table of static string pairs.
163 This is redundant, but allows the same MO files to be used on musl libc
164 (without GNU libintl) as on other platforms (with GNU libc or with GNU
165 libintl).
166
167 A survey of the PO files on translationproject.org shows that
168 * Less than 9% of the messages of any PO file are system dependent strings.
169 Therefore the increase of the size of the MO file is small.
170 * None of these PO files uses the 'I' format string flag.
171
172 There are few possible <inttypes.h> flavours. Each such flavour gives rise
173 to an instantation rule. We ran this test program on various platforms:
174 =============================================================================
175 #include <inttypes.h>
176 #include <stdio.h>
177 #include <string.h>
178 int main ()
179 {
180 printf ("%s\n", PRIuMAX);
181 printf ("%s\n", PRIdMAX);
182 printf ("%s %s %s %s\n", PRIu8, PRIu16, PRIu32, PRIu64);
183 printf ("%s %s %s %s\n", PRId8, PRId16, PRId32, PRId64);
184 printf ("%s %s %s %s\n", PRIuLEAST8, PRIuLEAST16, PRIuLEAST32, PRIuLEAST64);
185 printf ("%s %s %s %s\n", PRIdLEAST8, PRIdLEAST16, PRIdLEAST32, PRIdLEAST64);
186 printf ("%s %s %s %s\n", PRIuFAST8, PRIuFAST16, PRIuFAST32, PRIuFAST64);
187 printf ("%s %s %s %s\n", PRIdFAST8, PRIdFAST16, PRIdFAST32, PRIdFAST64);
188 printf ("%s\n", PRIuPTR);
189 printf ("%s\n", PRIdPTR);
190 printf ("Summary:\n");
191 printf (" MAX 8 LEAST8 FAST8 PTR\n");
192 printf (" | | | | | | | | | | | | | |\n");
193 printf ("| %-3.*s %-3.*s %-3.*s %-3.*s %-3.*s %-3.*s %-3.*s %-3.*s %-3.*s %-3.*s %-3.*s %-3.*s %-3.*s %-3.*s |\n",
194 (int) strlen (PRIuMAX) - 1, PRIuMAX,
195 (int) strlen (PRIu8) - 1, PRIu8,
196 (int) strlen (PRIu16) - 1, PRIu16,
197 (int) strlen (PRIu32) - 1, PRIu32,
198 (int) strlen (PRIu64) - 1, PRIu64,
199 (int) strlen (PRIuLEAST8) - 1, PRIuLEAST8,
200 (int) strlen (PRIuLEAST16) - 1, PRIuLEAST16,
201 (int) strlen (PRIuLEAST32) - 1, PRIuLEAST32,
202 (int) strlen (PRIuLEAST64) - 1, PRIuLEAST64,
203 (int) strlen (PRIuFAST8) - 1, PRIuFAST8,
204 (int) strlen (PRIuFAST16) - 1, PRIuFAST16,
205 (int) strlen (PRIuFAST32) - 1, PRIuFAST32,
206 (int) strlen (PRIuFAST64) - 1, PRIuFAST64,
207 (int) strlen (PRIuPTR) - 1, PRIuPTR);
208 return 0;
209 }
210 =============================================================================
211 and found the following table.
212
213 <inttypes.h> MAX 8 LEAST8 FAST8 PTR
214 flavour | | | | | | | | | | | | | |
215 ------------ -----------------------------------------------------------
216 0 | ll ll ll ll |
217 1 | l l l l l |
218 2 | ll ll ll ll l |
219 3 | ll hh h ll hh h ll hh h ll l |
220 4 | ll hh h ll hh h ll hh ll |
221 5 | ll hh h ll hh h ll hh ll ll |
222 6 | l l l l l l l |
223 7 | l hh h l hh h l hh h l l |
224 8 | l hh h l hh h l hh l l |
225 9 | l hh h l hh h l hh l l l l |
226 10 | j hh h ll hh h ll hh h ll l |
227 11 | j ll ll ll |
228 12 | j l l l l |
229 13 | j ll ll ll l |
230 14 | I64 I64 I64 I64 |
231 15 | I64 I64 I64 I64 I64 |
232
233 Which <inttypes.h> flavour for which platforms?
234
235 <inttypes.h>
236 flavour Platforms
237 ------------ ---------------------------------------------------------------
238 0 glibc 32-bit, musl 32-bit, NetBSD 32-bit
239 1 musl 64-bit, NetBSD 64-bit, Haiku 64-bit
240 2 Haiku 32-bit
241 3 AIX 32-bit
242 4 Solaris 32-bit, Cygwin 32-bit, MSVC 32-bit
243 5 MSVC 64-bit
244 6 glibc 64-bit
245 7 AIX 64-bit
246 8 Solaris 64-bit
247 9 Cygwin 64-bit
248 10 macOS 32-bit and 64-bit
249 11 FreeBSD 32-bit, Android 32-bit
250 12 FreeBSD 64-bit
251 13 OpenBSD 32-bit and 64-bit
252 14 mingw 32-bit
253 15 mingw 64-bit
254 */
255 struct sysdep_instantiation_rule
256 {
257 const char *prefix_for_MAX;
258 const char *prefix_for_8; /* also for LEAST8 and FAST8 */
259 const char *prefix_for_16; /* also for LEAST16 */
260 const char *prefix_for_64; /* also for LEAST64 and FAST64 */
261 const char *prefix_for_FAST16;
262 const char *prefix_for_FAST32;
263 const char *prefix_for_PTR;
264 };
265 const struct sysdep_instantiation_rule useful_instantiation_rules[] =
266 {
267 /* 0 */ { "ll", "", "", "ll", "", "", "" },
268 /* 1 */ { "l", "", "", "l", "", "", "l" },
269 #if 0 /* These instantiation rules are not useful. They would just be bloat. */
270 /* 2 */ { "ll", "", "", "ll", "", "", "l" },
271 /* 3 */ { "ll", "hh", "h", "ll", "h", "", "l" },
272 /* 4 */ { "ll", "hh", "h", "ll", "", "", "" },
273 /* 5 */ { "ll", "hh", "h", "ll", "", "", "ll" },
274 /* 6 */ { "l", "", "", "l", "l", "l", "l" },
275 /* 7 */ { "l", "hh", "h", "l", "h", "", "l" },
276 /* 8 */ { "l", "hh", "h", "l", "", "", "l" },
277 /* 9 */ { "l", "hh", "h", "l", "l", "l", "l" },
278 /* 10 */ { "j", "hh", "h", "ll", "h", "", "l" },
279 /* 11 */ { "j", "", "", "ll", "", "", "" },
280 /* 12 */ { "j", "", "", "l", "", "", "l" },
281 /* 13 */ { "j", "", "", "ll", "", "", "l" },
282 /* 14 */ { "I64", "", "", "I64", "", "", "" },
283 /* 15 */ { "I64", "", "", "I64", "", "", "I64" },
284 #endif
285 };
286
287 /* Concatenate a prefix and a conversion specifier. */
288 static const char *
289 concat_prefix_cs (const char *prefix, char conversion)
290 {
291 char *result = XNMALLOC (strlen (prefix) + 2, char);
292 {
293 char *p = result;
294 p = stpcpy (p, prefix);
295 *p++ = conversion;
296 *p = '\0';
297 }
298 return result;
299 }
300
301 /* Expand a system dependent string segment for a specific instantation.
302 Return NULL if unsupported. */
303 static const char *
304 get_sysdep_segment_value (struct pre_sysdep_segment segment,
305 const struct sysdep_instantiation_rule *instrule)
306 {
307 const char *name = segment.pointer;
308 size_t len = segment.length;
309
310 /* Test for an ISO C 99 section 7.8.1 format string directive.
311 Syntax:
312 P R I { d | i | o | u | x | X }
313 { { | LEAST | FAST } { 8 | 16 | 32 | 64 } | MAX | PTR } */
314 if (len >= 3 && name[0] == 'P' && name[1] == 'R' && name[2] == 'I')
315 {
316 if (len >= 4
317 && (name[3] == 'd' || name[3] == 'i' || name[3] == 'o'
318 || name[3] == 'u' || name[3] == 'x' || name[3] == 'X'))
319 {
320 if (len == 5 && name[4] == '8')
321 return concat_prefix_cs (instrule->prefix_for_8, name[3]);
322 if (len == 6 && name[4] == '1' && name[5] == '6')
323 return concat_prefix_cs (instrule->prefix_for_16, name[3]);
324 if (len == 6 && name[4] == '3' && name[5] == '2')
325 return concat_prefix_cs ("", name[3]);
326 if (len == 6 && name[4] == '6' && name[5] == '4')
327 return concat_prefix_cs (instrule->prefix_for_64, name[3]);
328 if (len >= 9 && name[4] == 'L' && name[5] == 'E' && name[6] == 'A'
329 && name[7] == 'S' && name[8] == 'T')
330 {
331 if (len == 10 && name[9] == '8')
332 return concat_prefix_cs (instrule->prefix_for_8, name[3]);
333 if (len == 11 && name[9] == '1' && name[10] == '6')
334 return concat_prefix_cs (instrule->prefix_for_16, name[3]);
335 if (len == 11 && name[9] == '3' && name[10] == '2')
336 return concat_prefix_cs ("", name[3]);
337 if (len == 11 && name[9] == '6' && name[10] == '4')
338 return concat_prefix_cs (instrule->prefix_for_64, name[3]);
339 }
340 if (len >= 8 && name[4] == 'F' && name[5] == 'A' && name[6] == 'S'
341 && name[7] == 'T')
342 {
343 if (len == 9 && name[8] == '8')
344 return concat_prefix_cs (instrule->prefix_for_8, name[3]);
345 if (len == 10 && name[8] == '1' && name[9] == '6')
346 return concat_prefix_cs (instrule->prefix_for_FAST16, name[3]);
347 if (len == 10 && name[8] == '3' && name[9] == '2')
348 return concat_prefix_cs (instrule->prefix_for_FAST32, name[3]);
349 if (len == 10 && name[8] == '6' && name[9] == '4')
350 return concat_prefix_cs (instrule->prefix_for_64, name[3]);
351 }
352 if (len == 7 && name[4] == 'M' && name[5] == 'A' && name[6] == 'X')
353 return concat_prefix_cs (instrule->prefix_for_MAX, name[3]);
354 if (len == 7 && name[4] == 'P' && name[5] == 'T' && name[6] == 'R')
355 return concat_prefix_cs (instrule->prefix_for_PTR, name[3]);
356 }
357 }
358 /* Note: We cannot support the 'I' format directive flag here. Because
359 - If we expand the 'I' to "I", the expansion will not work on non-glibc
360 systems (whose *printf() functions don't understand this flag).
361 - If we expand the 'I' to "", the expansion will override the expansion
362 produced at run time (see loadmsgcat.c) and will not produce the
363 locale-specific outdigits as expected. */
364 return NULL;
365 }
366
367
368 /* Write the message list to the given open file. */
369 static void
370 write_table (FILE *output_file, message_list_ty *mlp)
371 {
372 char **msgctid_arr;
373 size_t nstrings;
374 size_t msg_arr_allocated;
375 struct pre_message *msg_arr;
376 size_t n_sysdep_strings;
377 struct pre_sysdep_message *sysdep_msg_arr;
378 size_t n_sysdep_segments;
379 struct pre_sysdep_segment *sysdep_segments;
380 bool have_outdigits;
381 int major_revision;
382 int minor_revision;
383 bool omit_hash_table;
384 nls_uint32 hash_tab_size;
385 struct mo_file_header header; /* Header of the .mo file to be written. */
386 size_t header_size;
387 size_t offset;
388 struct string_desc *orig_tab;
389 struct string_desc *trans_tab;
390 size_t sysdep_tab_offset = 0;
391 size_t end_offset;
392 char *null;
393
394 /* First pass: Move the static string pairs into an array, for sorting,
395 and at the same time, compute the segments of the system dependent
396 strings. */
397 msgctid_arr = XNMALLOC (mlp->nitems, char *);
398 nstrings = 0;
399 msg_arr_allocated = mlp->nitems;
400 msg_arr = XNMALLOC (msg_arr_allocated, struct pre_message);
401 n_sysdep_strings = 0;
402 sysdep_msg_arr = XNMALLOC (mlp->nitems, struct pre_sysdep_message);
403 n_sysdep_segments = 0;
404 sysdep_segments = NULL;
405 have_outdigits = false;
406 {
407 size_t j;
408
409 for (j = 0; j < mlp->nitems; j++)
410 {
411 message_ty *mp = mlp->item[j];
412 size_t msgctlen;
413 char *msgctid;
414 struct interval *intervals[2];
415 size_t nintervals[2];
416
417 /* Concatenate mp->msgctxt and mp->msgid into msgctid. */
418 msgctlen = (mp->msgctxt != NULL ? strlen (mp->msgctxt) + 1 : 0);
419 msgctid = XNMALLOC (msgctlen + strlen (mp->msgid) + 1, char);
420 if (mp->msgctxt != NULL)
421 {
422 memcpy (msgctid, mp->msgctxt, msgctlen - 1);
423 msgctid[msgctlen - 1] = MSGCTXT_SEPARATOR;
424 }
425 strcpy (msgctid + msgctlen, mp->msgid);
426 msgctid_arr[j] = msgctid;
427
428 intervals[M_ID] = NULL;
429 nintervals[M_ID] = 0;
430 intervals[M_STR] = NULL;
431 nintervals[M_STR] = 0;
432
433 /* Test if mp contains system dependent strings and thus
434 requires the use of the .mo file minor revision 1. */
435 if (possible_format_p (mp->is_format[format_c])
436 || possible_format_p (mp->is_format[format_objc]))
437 {
438 /* Check whether msgid or msgstr contain ISO C 99 <inttypes.h>
439 format string directives. No need to check msgid_plural, because
440 it is not accessed by the [n]gettext() function family. */
441 const char *p_end;
442 const char *p;
443
444 get_sysdep_c_format_directives (mp->msgid, false,
445 &intervals[M_ID], &nintervals[M_ID]);
446 if (msgctlen > 0)
447 {
448 struct interval *id_intervals = intervals[M_ID];
449 size_t id_nintervals = nintervals[M_ID];
450
451 if (id_nintervals > 0)
452 {
453 unsigned int i;
454
455 for (i = 0; i < id_nintervals; i++)
456 {
457 id_intervals[i].startpos += msgctlen;
458 id_intervals[i].endpos += msgctlen;
459 }
460 }
461 }
462
463 p_end = mp->msgstr + mp->msgstr_len;
464 for (p = mp->msgstr; p < p_end; p += strlen (p) + 1)
465 {
466 struct interval *part_intervals;
467 size_t part_nintervals;
468
469 get_sysdep_c_format_directives (p, true,
470 &part_intervals,
471 &part_nintervals);
472 if (part_nintervals > 0)
473 {
474 size_t d = p - mp->msgstr;
475 unsigned int i;
476
477 intervals[M_STR] =
478 (struct interval *)
479 xrealloc (intervals[M_STR],
480 (nintervals[M_STR] + part_nintervals)
481 * sizeof (struct interval));
482 for (i = 0; i < part_nintervals; i++)
483 {
484 intervals[M_STR][nintervals[M_STR] + i].startpos =
485 d + part_intervals[i].startpos;
486 intervals[M_STR][nintervals[M_STR] + i].endpos =
487 d + part_intervals[i].endpos;
488 }
489 nintervals[M_STR] += part_nintervals;
490 }
491 }
492 }
493
494 if (nintervals[M_ID] > 0 || nintervals[M_STR] > 0)
495 {
496 /* System dependent string pair. */
497 size_t m;
498
499 for (m = 0; m < 2; m++)
500 {
501 struct pre_sysdep_string *pre =
502 (struct pre_sysdep_string *)
503 xmalloc (xsum (sizeof (struct pre_sysdep_string),
504 xtimes (nintervals[m],
505 sizeof (struct pre_segment_pair))));
506 const char *str;
507 size_t str_len;
508 size_t lastpos;
509 unsigned int i;
510
511 if (m == M_ID)
512 {
513 str = msgctid; /* concatenation of mp->msgctxt + mp->msgid */
514 str_len = strlen (msgctid) + 1;
515 }
516 else
517 {
518 str = mp->msgstr;
519 str_len = mp->msgstr_len;
520 }
521
522 lastpos = 0;
523 pre->segmentcount = nintervals[m];
524 for (i = 0; i < nintervals[m]; i++)
525 {
526 size_t length;
527 const char *pointer;
528 size_t r;
529
530 pre->segments[i].segptr = str + lastpos;
531 pre->segments[i].segsize = intervals[m][i].startpos - lastpos;
532
533 length = intervals[m][i].endpos - intervals[m][i].startpos;
534 pointer = str + intervals[m][i].startpos;
535 if (length >= 2
536 && pointer[0] == '<' && pointer[length - 1] == '>')
537 {
538 /* Skip the '<' and '>' markers. */
539 length -= 2;
540 pointer += 1;
541 }
542
543 for (r = 0; r < n_sysdep_segments; r++)
544 if (sysdep_segments[r].length == length
545 && memcmp (sysdep_segments[r].pointer, pointer, length)
546 == 0)
547 break;
548 if (r == n_sysdep_segments)
549 {
550 n_sysdep_segments++;
551 sysdep_segments =
552 (struct pre_sysdep_segment *)
553 xrealloc (sysdep_segments,
554 n_sysdep_segments
555 * sizeof (struct pre_sysdep_segment));
556 sysdep_segments[r].length = length;
557 sysdep_segments[r].pointer = pointer;
558 }
559
560 pre->segments[i].sysdepref = r;
561
562 if (length == 1 && *pointer == 'I')
563 have_outdigits = true;
564
565 lastpos = intervals[m][i].endpos;
566 }
567 pre->segments[i].segptr = str + lastpos;
568 pre->segments[i].segsize = str_len - lastpos;
569 pre->segments[i].sysdepref = SEGMENTS_END;
570
571 sysdep_msg_arr[n_sysdep_strings].str[m] = pre;
572 }
573
574 sysdep_msg_arr[n_sysdep_strings].id_plural = mp->msgid_plural;
575 sysdep_msg_arr[n_sysdep_strings].id_plural_len =
576 (mp->msgid_plural != NULL ? strlen (mp->msgid_plural) + 1 : 0);
577 n_sysdep_strings++;
578 }
579 else
580 {
581 /* Static string pair. */
582 msg_arr[nstrings].str[M_ID].pointer = msgctid;
583 msg_arr[nstrings].str[M_ID].length = strlen (msgctid) + 1;
584 msg_arr[nstrings].str[M_STR].pointer = mp->msgstr;
585 msg_arr[nstrings].str[M_STR].length = mp->msgstr_len;
586 msg_arr[nstrings].id_plural = mp->msgid_plural;
587 msg_arr[nstrings].id_plural_len =
588 (mp->msgid_plural != NULL ? strlen (mp->msgid_plural) + 1 : 0);
589 nstrings++;
590 }
591
592 {
593 size_t m;
594
595 for (m = 0; m < 2; m++)
596 if (intervals[m] != NULL)
597 free (intervals[m]);
598 }
599 }
600 }
601
602 /* Second pass: Instantiate the system dependent string pairs and add them to
603 the table of static string pairs. */
604 if (!no_redundancy && n_sysdep_strings > 0)
605 {
606 /* Create a temporary hash table of msg_arr[*].str[M_ID], to guarantee
607 fast lookups. */
608 hash_table static_msgids;
609
610 hash_init (&static_msgids, 10);
611 {
612 size_t i;
613
614 for (i = 0; i < nstrings; i++)
615 hash_insert_entry (&static_msgids,
616 msg_arr[i].str[M_ID].pointer,
617 msg_arr[i].str[M_ID].length,
618 NULL);
619 }
620
621 size_t ss;
622
623 for (ss = 0; ss < n_sysdep_strings; ss++)
624 {
625 size_t u;
626
627 for (u = 0; u < SIZEOF (useful_instantiation_rules); u++)
628 {
629 const struct sysdep_instantiation_rule *instrule =
630 &useful_instantiation_rules[u];
631 bool supported = true;
632 struct pre_string expansion[2];
633 size_t m;
634
635 for (m = 0; m < 2; m++)
636 {
637 struct pre_sysdep_string *pre = sysdep_msg_arr[ss].str[m];
638 unsigned int segmentcount = pre->segmentcount;
639 size_t expansion_length;
640 char *expansion_pointer;
641 unsigned int i;
642
643 /* Compute the length of the expansion. */
644 expansion_length = 0;
645 i = 0;
646 do
647 {
648 expansion_length += pre->segments[i].segsize;
649
650 size_t r = pre->segments[i].sysdepref;
651 if (r == SEGMENTS_END)
652 break;
653 const char *segment_expansion =
654 get_sysdep_segment_value (sysdep_segments[r], instrule);
655 if (segment_expansion == NULL)
656 {
657 supported = false;
658 break;
659 }
660 expansion_length += strlen (segment_expansion);
661 }
662 while (i++ < segmentcount);
663 if (!supported)
664 break;
665
666 /* Compute the expansion. */
667 expansion_pointer = (char *) xmalloc (expansion_length);
668 {
669 char *p = expansion_pointer;
670
671 i = 0;
672 do
673 {
674 memcpy (p, pre->segments[i].segptr, pre->segments[i].segsize);
675 p += pre->segments[i].segsize;
676
677 size_t r = pre->segments[i].sysdepref;
678 if (r == SEGMENTS_END)
679 break;
680 const char *segment_expansion =
681 get_sysdep_segment_value (sysdep_segments[r], instrule);
682 if (segment_expansion == NULL)
683 /* Should already have set supported = false above. */
684 abort ();
685 memcpy (p, segment_expansion, strlen (segment_expansion));
686 p += strlen (segment_expansion);
687 }
688 while (i++ < segmentcount);
689 if (p != expansion_pointer + expansion_length)
690 /* The two loops are not in sync. */
691 abort ();
692 }
693
694 expansion[m].length = expansion_length;
695 expansion[m].pointer = expansion_pointer;
696 }
697
698 if (supported)
699 {
700 /* Don't overwrite existing static string pairs. */
701 if (hash_insert_entry (&static_msgids,
702 expansion[M_ID].pointer,
703 expansion[M_ID].length,
704 NULL)
705 != NULL)
706 {
707 if (nstrings == msg_arr_allocated)
708 {
709 msg_arr_allocated = 2 * msg_arr_allocated + 1;
710 msg_arr =
711 (struct pre_message *)
712 xreallocarray (msg_arr, msg_arr_allocated,
713 sizeof (struct pre_message));
714 }
715 msg_arr[nstrings].str[M_ID] = expansion[M_ID];
716 msg_arr[nstrings].str[M_STR] = expansion[M_STR];
717 msg_arr[nstrings].id_plural = sysdep_msg_arr[ss].id_plural;
718 msg_arr[nstrings].id_plural_len = sysdep_msg_arr[ss].id_plural_len;
719 nstrings++;
720 }
721 }
722 }
723 }
724
725 hash_destroy (&static_msgids);
726 }
727
728 /* Sort the table according to original string. */
729 if (nstrings > 0)
730 qsort (msg_arr, nstrings, sizeof (struct pre_message), compare_id);
731
732 /* We need major revision 1 if there are system dependent strings that use
733 "I" because older versions of gettext() crash when this occurs in a .mo
734 file. Otherwise use major revision 0. */
735 major_revision =
736 (have_outdigits ? MO_REVISION_NUMBER_WITH_SYSDEP_I : MO_REVISION_NUMBER);
737
738 /* We need minor revision 1 if there are system dependent strings.
739 Otherwise we choose minor revision 0 because it's supported by older
740 versions of libintl and revision 1 isn't. */
741 minor_revision = (n_sysdep_strings > 0 ? 1 : 0);
742
743 /* In minor revision >= 1, the hash table is obligatory. */
744 omit_hash_table = (no_hash_table && minor_revision == 0);
745
746 /* This should be explained:
747 Each string has an associate hashing value V, computed by a fixed
748 function. To locate the string we use open addressing with double
749 hashing. The first index will be V % M, where M is the size of the
750 hashing table. If no entry is found, iterating with a second,
751 independent hashing function takes place. This second value will
752 be 1 + V % (M - 2).
753 The approximate number of probes will be
754
755 for unsuccessful search: (1 - N / M) ^ -1
756 for successful search: - (N / M) ^ -1 * ln (1 - N / M)
757
758 where N is the number of keys.
759
760 If we now choose M to be the next prime bigger than 4 / 3 * N,
761 we get the values
762 4 and 1.85 resp.
763 Because unsuccessful searches are unlikely this is a good value.
764 Formulas: [Knuth, The Art of Computer Programming, Volume 3,
765 Sorting and Searching, 1973, Addison Wesley] */
766 if (!omit_hash_table)
767 {
768 /* N is the number of static string pairs (filled in here, below)
769 plus the number of system dependent string pairs (filled at runtime,
770 in loadmsgcat.c). */
771 hash_tab_size = next_prime (((nstrings + n_sysdep_strings) * 4) / 3);
772 /* Ensure M > 2. */
773 if (hash_tab_size <= 2)
774 hash_tab_size = 3;
775 }
776 else
777 hash_tab_size = 0;
778
779 /* Third pass: Fill the structure describing the header. At the same time,
780 compute the sizes and offsets of the non-string parts of the file. */
781
782 /* Magic number. */
783 header.magic = _MAGIC;
784 /* Revision number of file format. */
785 header.revision = (major_revision << 16) + minor_revision;
786
787 header_size =
788 (minor_revision == 0
789 ? offsetof (struct mo_file_header, n_sysdep_segments)
790 : sizeof (struct mo_file_header));
791 offset = header_size;
792
793 /* Number of static string pairs. */
794 header.nstrings = nstrings;
795
796 /* Offset of table for original string offsets. */
797 header.orig_tab_offset = offset;
798 offset += nstrings * sizeof (struct string_desc);
799 orig_tab = XNMALLOC (nstrings, struct string_desc);
800
801 /* Offset of table for translated string offsets. */
802 header.trans_tab_offset = offset;
803 offset += nstrings * sizeof (struct string_desc);
804 trans_tab = XNMALLOC (nstrings, struct string_desc);
805
806 /* Size of hash table. */
807 header.hash_tab_size = hash_tab_size;
808 /* Offset of hash table. */
809 header.hash_tab_offset = offset;
810 offset += hash_tab_size * sizeof (nls_uint32);
811
812 if (minor_revision >= 1)
813 {
814 /* Size of table describing system dependent segments. */
815 header.n_sysdep_segments = n_sysdep_segments;
816 /* Offset of table describing system dependent segments. */
817 header.sysdep_segments_offset = offset;
818 offset += n_sysdep_segments * sizeof (struct sysdep_segment);
819
820 /* Number of system dependent string pairs. */
821 header.n_sysdep_strings = n_sysdep_strings;
822
823 /* Offset of table for original sysdep string offsets. */
824 header.orig_sysdep_tab_offset = offset;
825 offset += n_sysdep_strings * sizeof (nls_uint32);
826
827 /* Offset of table for translated sysdep string offsets. */
828 header.trans_sysdep_tab_offset = offset;
829 offset += n_sysdep_strings * sizeof (nls_uint32);
830
831 /* System dependent string descriptors. */
832 sysdep_tab_offset = offset;
833 {
834 size_t m;
835 size_t j;
836
837 for (m = 0; m < 2; m++)
838 for (j = 0; j < n_sysdep_strings; j++)
839 offset += sizeof (struct sysdep_string)
840 + sysdep_msg_arr[j].str[m]->segmentcount
841 * sizeof (struct segment_pair);
842 }
843 }
844
845 end_offset = offset;
846
847
848 /* Fourth pass: Write the non-string parts of the file. At the same time,
849 compute the offsets of each string, including the proper alignment. */
850
851 /* Write the header out. */
852 if (byteswap)
853 {
854 BSWAP32 (header.magic);
855 BSWAP32 (header.revision);
856 BSWAP32 (header.nstrings);
857 BSWAP32 (header.orig_tab_offset);
858 BSWAP32 (header.trans_tab_offset);
859 BSWAP32 (header.hash_tab_size);
860 BSWAP32 (header.hash_tab_offset);
861 if (minor_revision >= 1)
862 {
863 BSWAP32 (header.n_sysdep_segments);
864 BSWAP32 (header.sysdep_segments_offset);
865 BSWAP32 (header.n_sysdep_strings);
866 BSWAP32 (header.orig_sysdep_tab_offset);
867 BSWAP32 (header.trans_sysdep_tab_offset);
868 }
869 }
870 fwrite (&header, header_size, 1, output_file);
871
872 /* Table for original string offsets. */
873 /* Here output_file is at position header.orig_tab_offset. */
874
875 {
876 size_t j;
877
878 for (j = 0; j < nstrings; j++)
879 {
880 offset = roundup (offset, alignment);
881 orig_tab[j].length =
882 msg_arr[j].str[M_ID].length + msg_arr[j].id_plural_len;
883 orig_tab[j].offset = offset;
884 offset += orig_tab[j].length;
885 /* Subtract 1 because of the terminating NUL. */
886 orig_tab[j].length--;
887 }
888 if (byteswap)
889 for (j = 0; j < nstrings; j++)
890 {
891 BSWAP32 (orig_tab[j].length);
892 BSWAP32 (orig_tab[j].offset);
893 }
894 fwrite (orig_tab, nstrings * sizeof (struct string_desc), 1, output_file);
895 }
896
897 /* Table for translated string offsets. */
898 /* Here output_file is at position header.trans_tab_offset. */
899
900 {
901 size_t j;
902
903 for (j = 0; j < nstrings; j++)
904 {
905 offset = roundup (offset, alignment);
906 trans_tab[j].length = msg_arr[j].str[M_STR].length;
907 trans_tab[j].offset = offset;
908 offset += trans_tab[j].length;
909 /* Subtract 1 because of the terminating NUL. */
910 trans_tab[j].length--;
911 }
912 if (byteswap)
913 for (j = 0; j < nstrings; j++)
914 {
915 BSWAP32 (trans_tab[j].length);
916 BSWAP32 (trans_tab[j].offset);
917 }
918 fwrite (trans_tab, nstrings * sizeof (struct string_desc), 1, output_file);
919 }
920
921 /* Skip this part when no hash table is needed. */
922 if (!omit_hash_table)
923 {
924 nls_uint32 *hash_tab;
925 size_t j;
926
927 /* Here output_file is at position header.hash_tab_offset. */
928
929 /* Allocate room for the hashing table to be written out. */
930 hash_tab = XNMALLOC (hash_tab_size, nls_uint32);
931 memset (hash_tab, '\0', hash_tab_size * sizeof (nls_uint32));
932
933 /* Insert all values in the hash table, following the algorithm described
934 above. */
935 for (j = 0; j < nstrings; j++)
936 {
937 nls_uint32 hash_val = hash_string (msg_arr[j].str[M_ID].pointer);
938 nls_uint32 idx = hash_val % hash_tab_size;
939
940 if (hash_tab[idx] != 0)
941 {
942 /* We need the second hashing function. */
943 nls_uint32 incr = 1 + (hash_val % (hash_tab_size - 2));
944
945 do
946 if (idx >= hash_tab_size - incr)
947 idx -= hash_tab_size - incr;
948 else
949 idx += incr;
950 while (hash_tab[idx] != 0);
951 }
952
953 hash_tab[idx] = j + 1;
954 }
955
956 /* Write the hash table out. */
957 if (byteswap)
958 for (j = 0; j < hash_tab_size; j++)
959 BSWAP32 (hash_tab[j]);
960 fwrite (hash_tab, hash_tab_size * sizeof (nls_uint32), 1, output_file);
961
962 free (hash_tab);
963 }
964
965 if (minor_revision >= 1)
966 {
967 /* Here output_file is at position header.sysdep_segments_offset. */
968
969 {
970 struct sysdep_segment *sysdep_segments_tab;
971 unsigned int i;
972
973 sysdep_segments_tab =
974 XNMALLOC (n_sysdep_segments, struct sysdep_segment);
975 for (i = 0; i < n_sysdep_segments; i++)
976 {
977 offset = roundup (offset, alignment);
978 /* The "+ 1" accounts for the trailing NUL byte. */
979 sysdep_segments_tab[i].length = sysdep_segments[i].length + 1;
980 sysdep_segments_tab[i].offset = offset;
981 offset += sysdep_segments_tab[i].length;
982 }
983
984 if (byteswap)
985 for (i = 0; i < n_sysdep_segments; i++)
986 {
987 BSWAP32 (sysdep_segments_tab[i].length);
988 BSWAP32 (sysdep_segments_tab[i].offset);
989 }
990 fwrite (sysdep_segments_tab,
991 n_sysdep_segments * sizeof (struct sysdep_segment), 1,
992 output_file);
993
994 free (sysdep_segments_tab);
995 }
996
997 {
998 nls_uint32 *sysdep_tab;
999 size_t stoffset;
1000 size_t m;
1001 size_t j;
1002
1003 sysdep_tab = XNMALLOC (n_sysdep_strings, nls_uint32);
1004 stoffset = sysdep_tab_offset;
1005
1006 for (m = 0; m < 2; m++)
1007 {
1008 /* Here output_file is at position
1009 m == M_ID -> header.orig_sysdep_tab_offset,
1010 m == M_STR -> header.trans_sysdep_tab_offset. */
1011
1012 for (j = 0; j < n_sysdep_strings; j++)
1013 {
1014 sysdep_tab[j] = stoffset;
1015 stoffset += sizeof (struct sysdep_string)
1016 + sysdep_msg_arr[j].str[m]->segmentcount
1017 * sizeof (struct segment_pair);
1018 }
1019 /* Write the table for original/translated sysdep string offsets. */
1020 if (byteswap)
1021 for (j = 0; j < n_sysdep_strings; j++)
1022 BSWAP32 (sysdep_tab[j]);
1023 fwrite (sysdep_tab, n_sysdep_strings * sizeof (nls_uint32), 1,
1024 output_file);
1025 }
1026
1027 free (sysdep_tab);
1028 }
1029
1030 /* Here output_file is at position sysdep_tab_offset. */
1031
1032 {
1033 size_t m;
1034 size_t j;
1035
1036 for (m = 0; m < 2; m++)
1037 for (j = 0; j < n_sysdep_strings; j++)
1038 {
1039 struct pre_sysdep_message *msg = &sysdep_msg_arr[j];
1040 struct pre_sysdep_string *pre = msg->str[m];
1041 struct sysdep_string *str =
1042 (struct sysdep_string *)
1043 xmalloca (sizeof (struct sysdep_string)
1044 + pre->segmentcount * sizeof (struct segment_pair));
1045 unsigned int i;
1046
1047 offset = roundup (offset, alignment);
1048 str->offset = offset;
1049 for (i = 0; i <= pre->segmentcount; i++)
1050 {
1051 str->segments[i].segsize = pre->segments[i].segsize;
1052 str->segments[i].sysdepref = pre->segments[i].sysdepref;
1053 offset += str->segments[i].segsize;
1054 }
1055 if (m == M_ID && msg->id_plural_len > 0)
1056 {
1057 str->segments[pre->segmentcount].segsize += msg->id_plural_len;
1058 offset += msg->id_plural_len;
1059 }
1060 if (byteswap)
1061 {
1062 BSWAP32 (str->offset);
1063 for (i = 0; i <= pre->segmentcount; i++)
1064 {
1065 BSWAP32 (str->segments[i].segsize);
1066 BSWAP32 (str->segments[i].sysdepref);
1067 }
1068 }
1069 fwrite (str,
1070 sizeof (struct sysdep_string)
1071 + pre->segmentcount * sizeof (struct segment_pair),
1072 1, output_file);
1073
1074 freea (str);
1075 }
1076 }
1077 }
1078
1079 /* Here output_file is at position end_offset. */
1080
1081 free (trans_tab);
1082 free (orig_tab);
1083
1084
1085 /* Fifth pass: Write the strings. */
1086
1087 offset = end_offset;
1088
1089 /* A few zero bytes for padding. */
1090 null = (char *) alloca (alignment);
1091 memset (null, '\0', alignment);
1092
1093 /* Now write the original strings. */
1094 {
1095 size_t j;
1096
1097 for (j = 0; j < nstrings; j++)
1098 {
1099 fwrite (null, roundup (offset, alignment) - offset, 1, output_file);
1100 offset = roundup (offset, alignment);
1101
1102 fwrite (msg_arr[j].str[M_ID].pointer, msg_arr[j].str[M_ID].length, 1,
1103 output_file);
1104 if (msg_arr[j].id_plural_len > 0)
1105 fwrite (msg_arr[j].id_plural, msg_arr[j].id_plural_len, 1,
1106 output_file);
1107 offset += msg_arr[j].str[M_ID].length + msg_arr[j].id_plural_len;
1108 }
1109 }
1110
1111 /* Now write the translated strings. */
1112 {
1113 size_t j;
1114
1115 for (j = 0; j < nstrings; j++)
1116 {
1117 fwrite (null, roundup (offset, alignment) - offset, 1, output_file);
1118 offset = roundup (offset, alignment);
1119
1120 fwrite (msg_arr[j].str[M_STR].pointer, msg_arr[j].str[M_STR].length, 1,
1121 output_file);
1122 offset += msg_arr[j].str[M_STR].length;
1123 }
1124 }
1125
1126 if (minor_revision >= 1)
1127 {
1128 unsigned int i;
1129 size_t m;
1130 size_t j;
1131
1132 for (i = 0; i < n_sysdep_segments; i++)
1133 {
1134 fwrite (null, roundup (offset, alignment) - offset, 1, output_file);
1135 offset = roundup (offset, alignment);
1136
1137 fwrite (sysdep_segments[i].pointer, sysdep_segments[i].length, 1,
1138 output_file);
1139 fwrite (null, 1, 1, output_file);
1140 offset += sysdep_segments[i].length + 1;
1141 }
1142
1143 for (m = 0; m < 2; m++)
1144 for (j = 0; j < n_sysdep_strings; j++)
1145 {
1146 struct pre_sysdep_message *msg = &sysdep_msg_arr[j];
1147 struct pre_sysdep_string *pre = msg->str[m];
1148
1149 fwrite (null, roundup (offset, alignment) - offset, 1,
1150 output_file);
1151 offset = roundup (offset, alignment);
1152
1153 for (i = 0; i <= pre->segmentcount; i++)
1154 {
1155 fwrite (pre->segments[i].segptr, pre->segments[i].segsize, 1,
1156 output_file);
1157 offset += pre->segments[i].segsize;
1158 }
1159 if (m == M_ID && msg->id_plural_len > 0)
1160 {
1161 fwrite (msg->id_plural, msg->id_plural_len, 1, output_file);
1162 offset += msg->id_plural_len;
1163 }
1164
1165 free (pre);
1166 }
1167 }
1168
1169 freea (null);
1170 {
1171 size_t j;
1172 for (j = 0; j < mlp->nitems; j++)
1173 free (msgctid_arr[j]);
1174 }
1175 free (sysdep_msg_arr);
1176 free (msg_arr);
1177 free (msgctid_arr);
1178 }
1179
1180
1181 int
1182 msgdomain_write_mo (message_list_ty *mlp,
1183 const char *domain_name,
1184 const char *file_name,
1185 const char *input_file)
1186 {
1187 /* If no entry for this domain don't even create the file. */
1188 if (mlp->nitems != 0)
1189 {
1190 if (!no_convert_to_utf8)
1191 {
1192 /* Convert the messages to UTF-8.
1193 This is necessary because the *gettext functions in musl libc
1194 assume that both the locale encoding and the .mo encoding is UTF-8.
1195 It is also helpful for performance on glibc systems, since most
1196 locales nowadays have UTF-8 as locale encoding, whereas some PO
1197 files still are encoded in EUC-JP or so. */
1198 iconv_message_list (mlp, NULL, po_charset_utf8, input_file);
1199 }
1200
1201 /* Support for "reproducible builds": Delete information that may vary
1202 between builds in the same conditions. */
1203 message_list_delete_header_field (mlp, "POT-Creation-Date:");
1204
1205 if (strcmp (domain_name, "-") == 0)
1206 {
1207 FILE *output_file = stdout;
1208 SET_BINARY (fileno (output_file));
1209
1210 write_table (output_file, mlp);
1211
1212 /* Make sure nothing went wrong. */
1213 if (fwriteerror (output_file))
1214 error (EXIT_FAILURE, errno, _("error while writing \"%s\" file"),
1215 file_name);
1216 }
1217 else
1218 {
1219 /* Supersede, don't overwrite, the output file. Otherwise, processes
1220 that are currently using (via mmap!) the output file could crash
1221 (through SIGSEGV or SIGBUS). */
1222 struct supersede_final_action action;
1223 FILE *output_file =
1224 fopen_supersede (file_name, "wb", true, true, &action);
1225 if (output_file == NULL)
1226 {
1227 error (0, errno, _("error while opening \"%s\" for writing"),
1228 file_name);
1229 return 1;
1230 }
1231
1232 write_table (output_file, mlp);
1233
1234 /* Make sure nothing went wrong. */
1235 if (fwriteerror_supersede (output_file, &action))
1236 error (EXIT_FAILURE, errno, _("error while writing \"%s\" file"),
1237 file_name);
1238 }
1239 }
1240
1241 return 0;
1242 }