1 /* Recode Serbian text from Cyrillic to Latin script.
2 Copyright (C) 2006-2007, 2010, 2012, 2018-2023 Free Software Foundation, Inc.
3 Written by Bruno Haible <bruno@clisp.org>, 2006.
4
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17
18 #ifdef HAVE_CONFIG_H
19 # include "config.h"
20 #endif
21
22 #include <errno.h>
23 #include <getopt.h>
24 #include <stdbool.h>
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <locale.h>
28
29 #if HAVE_ICONV
30 #include <iconv.h>
31 #endif
32
33 #include "noreturn.h"
34 #include "closeout.h"
35 #include "error.h"
36 #include "progname.h"
37 #include "relocatable.h"
38 #include "basename-lgpl.h"
39 #include "xalloc.h"
40 #include "localcharset.h"
41 #include "c-strcase.h"
42 #include "xstriconv.h"
43 #include "filters.h"
44 #include "propername.h"
45 #include "gettext.h"
46
47 #define _(str) gettext (str)
48
49
50 /* Long options. */
51 static const struct option long_options[] =
52 {
53 { "help", no_argument, NULL, 'h' },
54 { "version", no_argument, NULL, 'V' },
55 { NULL, 0, NULL, 0 }
56 };
57
58 /* Forward declaration of local functions. */
59 _GL_NORETURN_FUNC static void usage (int status);
60 static void process (FILE *stream);
61
62 int
63 main (int argc, char *argv[])
64 {
65 /* Default values for command line options. */
66 bool do_help = false;
67 bool do_version = false;
68
69 int opt;
70
71 /* Set program name for message texts. */
72 set_program_name (argv[0]);
73
74 /* Set locale via LC_ALL. */
75 setlocale (LC_ALL, "");
76
77 /* Set the text message domain. */
78 bindtextdomain (PACKAGE, relocate (LOCALEDIR));
79 textdomain (PACKAGE);
80
81 /* Ensure that write errors on stdout are detected. */
82 atexit (close_stdout);
83
84 /* Parse command line options. */
85 while ((opt = getopt_long (argc, argv, "hV", long_options, NULL)) != EOF)
86 switch (opt)
87 {
88 case '\0': /* Long option. */
89 break;
90 case 'h':
91 do_help = true;
92 break;
93 case 'V':
94 do_version = true;
95 break;
96 default:
97 usage (EXIT_FAILURE);
98 }
99
100 /* Version information is requested. */
101 if (do_version)
102 {
103 printf ("%s (GNU %s) %s\n", last_component (program_name),
104 PACKAGE, VERSION);
105 /* xgettext: no-wrap */
106 printf (_("Copyright (C) %s Free Software Foundation, Inc.\n\
107 License GPLv3+: GNU GPL version 3 or later <%s>\n\
108 This is free software: you are free to change and redistribute it.\n\
109 There is NO WARRANTY, to the extent permitted by law.\n\
110 "),
111 "2006-2023", "https://gnu.org/licenses/gpl.html");
112 printf (_("Written by %s and %s.\n"),
113 /* TRANSLATORS: This is a proper name. The last name is
114 (with Unicode escapes) "\u0160egan" or (with HTML entities)
115 "Šegan". */
116 proper_name_utf8 ("Danilo Segan", "Danilo \305\240egan"),
117 proper_name ("Bruno Haible"));
118 exit (EXIT_SUCCESS);
119 }
120
121 /* Help is requested. */
122 if (do_help)
123 usage (EXIT_SUCCESS);
124
125 if (argc - optind > 0)
126 error (EXIT_FAILURE, 0, _("too many arguments"));
127
128 process (stdin);
129
130 exit (EXIT_SUCCESS);
131 }
132
133
134 /* Display usage information and exit. */
135 static void
136 usage (int status)
137 {
138 if (status != EXIT_SUCCESS)
139 fprintf (stderr, _("Try '%s --help' for more information.\n"),
140 program_name);
141 else
142 {
143 /* xgettext: no-wrap */
144 printf (_("\
145 Usage: %s [OPTION]\n\
146 "), program_name);
147 printf ("\n");
148 /* xgettext: no-wrap */
149 printf (_("\
150 Recode Serbian text from Cyrillic to Latin script.\n"));
151 /* xgettext: no-wrap */
152 printf (_("\
153 The input text is read from standard input. The converted text is output to\n\
154 standard output.\n"));
155 printf ("\n");
156 /* xgettext: no-wrap */
157 printf (_("\
158 Informative output:\n"));
159 /* xgettext: no-wrap */
160 printf (_("\
161 -h, --help display this help and exit\n"));
162 /* xgettext: no-wrap */
163 printf (_("\
164 -V, --version output version information and exit\n"));
165 printf ("\n");
166 /* TRANSLATORS: The first placeholder is the web address of the Savannah
167 project of this package. The second placeholder is the bug-reporting
168 email address for this package. Please add _another line_ saying
169 "Report translation bugs to <...>\n" with the address for translation
170 bugs (typically your translation team's web or email address). */
171 printf(_("\
172 Report bugs in the bug tracker at <%s>\n\
173 or by email to <%s>.\n"),
174 "https://savannah.gnu.org/projects/gettext",
175 "bug-gettext@gnu.org");
176 }
177
178 exit (status);
179 }
180
181
182 /* Routines for reading a line.
183 Don't use routines that drop NUL bytes. Don't use getline(), because it
184 doesn't provide a good error message in case of memory allocation failure.
185 The gnulib module 'linebuffer' is nearly the right thing, except that we
186 don't want an extra newline at the end of file. */
187
188 /* A 'struct linebuffer' holds a line of text. */
189
190 struct linebuffer
191 {
192 size_t size; /* Allocated. */
193 size_t length; /* Used. */
194 char *buffer;
195 };
196
197 /* Initialize linebuffer LINEBUFFER for use. */
198 static inline void
199 init_linebuffer (struct linebuffer *lb)
200 {
201 lb->size = 0;
202 lb->length = 0;
203 lb->buffer = NULL;
204 }
205
206 /* Read an arbitrarily long line of text from STREAM into linebuffer LB.
207 Keep the newline. Do not NUL terminate.
208 Return LINEBUFFER, except at end of file return NULL. */
209 static struct linebuffer *
210 read_linebuffer (struct linebuffer *lb, FILE *stream)
211 {
212 if (feof (stream))
213 return NULL;
214 else
215 {
216 char *p = lb->buffer;
217 char *end = lb->buffer + lb->size;
218
219 for (;;)
220 {
221 int c = getc (stream);
222 if (c == EOF)
223 {
224 if (p == lb->buffer || ferror (stream))
225 return NULL;
226 break;
227 }
228 if (p == end)
229 {
230 size_t oldsize = lb->size; /* = p - lb->buffer */
231 size_t newsize = 2 * oldsize + 40;
232 lb->buffer = (char *) xrealloc (lb->buffer, newsize);
233 lb->size = newsize;
234 p = lb->buffer + oldsize;
235 end = lb->buffer + newsize;
236 }
237 *p++ = c;
238 if (c == '\n')
239 break;
240 }
241
242 lb->length = p - lb->buffer;
243 return lb;
244 }
245 }
246
247 /* Free linebuffer LB and its data, all allocated with malloc. */
248 static inline void
249 destroy_linebuffer (struct linebuffer *lb)
250 {
251 if (lb->buffer != NULL)
252 free (lb->buffer);
253 }
254
255
256 /* Process the input and produce the output. */
257 static void
258 process (FILE *stream)
259 {
260 struct linebuffer lb;
261 const char *locale_code = locale_charset ();
262 bool need_code_conversion = (c_strcasecmp (locale_code, "UTF-8") != 0);
263 #if HAVE_ICONV
264 iconv_t conv_to_utf8 = (iconv_t)(-1);
265 iconv_t conv_from_utf8 = (iconv_t)(-1);
266 char *last_utf8_line;
267 size_t last_utf8_line_len;
268 char *last_backconv_line;
269 size_t last_backconv_line_len;
270 #endif
271
272 init_linebuffer (&lb);
273
274 /* Initialize the conversion descriptors. */
275 if (need_code_conversion)
276 {
277 #if HAVE_ICONV
278 /* Avoid glibc-2.1 bug with EUC-KR. */
279 # if ((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
280 && !defined _LIBICONV_VERSION
281 if (strcmp (locale_code, "EUC-KR") != 0)
282 # endif
283 {
284 conv_to_utf8 = iconv_open ("UTF-8", locale_code);
285 /* TODO: Maybe append //TRANSLIT here? */
286 conv_from_utf8 = iconv_open (locale_code, "UTF-8");
287 }
288 if (conv_to_utf8 == (iconv_t)(-1))
289 error (EXIT_FAILURE, 0,
290 _("Cannot convert from \"%s\" to \"%s\". %s relies on iconv(), and iconv() does not support this conversion."),
291 locale_code, "UTF-8", last_component (program_name));
292 if (conv_from_utf8 == (iconv_t)(-1))
293 error (EXIT_FAILURE, 0,
294 _("Cannot convert from \"%s\" to \"%s\". %s relies on iconv(), and iconv() does not support this conversion."),
295 "UTF-8", locale_code, last_component (program_name));
296 last_utf8_line = NULL;
297 last_utf8_line_len = 0;
298 last_backconv_line = NULL;
299 last_backconv_line_len = 0;
300 #else
301 error (EXIT_FAILURE, 0,
302 _("Cannot convert from \"%s\" to \"%s\". %s relies on iconv(). This version was built without iconv()."),
303 locale_code, "UTF-8", last_component (program_name));
304 #endif
305 }
306
307 /* Read the input line by line.
308 Processing it character by character is not possible, because some
309 filters need to look at adjacent characters. Processing the entire file
310 in a whole chunk would take an excessive amount of memory. */
311 for (;;)
312 {
313 char *line;
314 size_t line_len;
315 char *filtered_line;
316 size_t filtered_line_len;
317
318 /* Read a line. */
319 if (read_linebuffer (&lb, stream) == NULL)
320 break;
321 line = lb.buffer;
322 line_len = lb.length;
323 /* read_linebuffer always returns a non-void result. */
324 if (line_len == 0)
325 abort ();
326
327 #if HAVE_ICONV
328 /* Convert it to UTF-8. */
329 if (need_code_conversion)
330 {
331 char *utf8_line = last_utf8_line;
332 size_t utf8_line_len = last_utf8_line_len;
333
334 if (xmem_cd_iconv (line, line_len, conv_to_utf8,
335 &utf8_line, &utf8_line_len) != 0)
336 error (EXIT_FAILURE, errno,
337 _("input is not valid in \"%s\" encoding"),
338 locale_code);
339 if (utf8_line != last_utf8_line)
340 {
341 if (last_utf8_line != NULL)
342 free (last_utf8_line);
343 last_utf8_line = utf8_line;
344 last_utf8_line_len = utf8_line_len;
345 }
346
347 line = utf8_line;
348 line_len = utf8_line_len;
349 }
350 #endif
351
352 /* Apply the filter. */
353 serbian_to_latin (line, line_len, &filtered_line, &filtered_line_len);
354
355 #if HAVE_ICONV
356 /* Convert it back to the original encoding. */
357 if (need_code_conversion)
358 {
359 char *backconv_line = last_backconv_line;
360 size_t backconv_line_len = last_backconv_line_len;
361
362 if (xmem_cd_iconv (filtered_line, filtered_line_len, conv_from_utf8,
363 &backconv_line, &backconv_line_len) != 0)
364 error (EXIT_FAILURE, errno,
365 _("error while converting from \"%s\" encoding to \"%s\" encoding"),
366 "UTF-8", locale_code);
367 if (backconv_line != last_backconv_line)
368 {
369 if (last_backconv_line != NULL)
370 free (last_backconv_line);
371 last_backconv_line = backconv_line;
372 last_backconv_line_len = backconv_line_len;
373 }
374
375 fwrite (backconv_line, 1, backconv_line_len, stdout);
376 }
377 else
378 #endif
379 fwrite (filtered_line, 1, filtered_line_len, stdout);
380
381 free (filtered_line);
382 }
383
384 #if HAVE_ICONV
385 if (need_code_conversion)
386 {
387 iconv_close (conv_from_utf8);
388 iconv_close (conv_to_utf8);
389 }
390 #endif
391
392 destroy_linebuffer (&lb);
393 }