1 /*
2 * manconv_main.c: convert manual page from one encoding to another
3 *
4 * Copyright (C) 2007, 2008 Colin Watson.
5 *
6 * This file is part of man-db.
7 *
8 * man-db is free software; you can redistribute it and/or modify it
9 * under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * man-db is distributed in the hope that it will be useful, but
14 * WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with man-db; if not, write to the Free Software Foundation,
20 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21 */
22
23 #ifdef HAVE_CONFIG_H
24 # include "config.h"
25 #endif /* HAVE_CONFIG_H */
26
27 #include <assert.h>
28 #include <stdbool.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <unistd.h>
32
33 #include "argp.h"
34 #include "error.h"
35 #include "gl_array_list.h"
36 #include "gl_xlist.h"
37 #include "progname.h"
38 #include "xalloc.h"
39
40 #include "gettext.h"
41 #define _(String) gettext (String)
42 #define N_(String) gettext_noop (String)
43
44 #include "manconfig.h"
45
46 #include "appendstr.h"
47 #include "cleanup.h"
48 #include "debug.h"
49 #include "encodings.h"
50 #include "pipeline.h"
51 #include "glcontainers.h"
52 #include "sandbox.h"
53 #include "util.h"
54
55 #include "decompress.h"
56 #include "manconv.h"
57
58 int quiet = 0;
59 man_sandbox *sandbox;
60
61 static const char *from_codes;
62 static char *to_code;
63 static gl_list_t from_code;
64 static const char *filename;
65
66 static gl_list_t split_codes (const char *codestr)
67 {
68 char *codestrtok, *codestrtok_ptr;
69 char *tok;
70 gl_list_t codelist = new_string_list (GL_ARRAY_LIST, true);
71
72 if (!codestr)
73 return codelist;
74
75 codestrtok = xstrdup (codestr);
76 codestrtok_ptr = codestrtok;
77
78 for (tok = strsep (&codestrtok_ptr, ":"); tok;
79 tok = strsep (&codestrtok_ptr, ":")) {
80 if (!*tok)
81 continue; /* ignore empty fields */
82 gl_list_add_last (codelist, xstrdup (tok));
83 }
84
85 free (codestrtok);
86
87 return codelist;
88 }
89
90 const char *argp_program_version = "manconv " PACKAGE_VERSION;
91 const char *argp_program_bug_address = PACKAGE_BUGREPORT;
92 error_t argp_err_exit_status = FAIL;
93
94 static const char args_doc[] = N_("[-f CODE[:...]] -t CODE [FILENAME]");
95
96 static struct argp_option options[] = {
97 OPT ("from-code", 'f', N_("CODE[:...]"),
98 N_("possible encodings of original text")),
99 OPT ("to-code", 't', N_("CODE"), N_("encoding for output")),
100 OPT ("debug", 'd', 0, N_("emit debugging messages")),
101 OPT ("quiet", 'q', 0, N_("produce fewer warnings")),
102 OPT_HELP_COMPAT,
103 { 0 }
104 };
105
106 static error_t parse_opt (int key, char *arg, struct argp_state *state)
107 {
108 switch (key) {
109 case 'f':
110 from_codes = arg;
111 return 0;
112 case 't':
113 to_code = xstrdup (arg);
114 if (!strstr (to_code, "//"))
115 to_code = appendstr (to_code, "//TRANSLIT",
116 (void *) 0);
117 return 0;
118 case 'd':
119 debug_level = true;
120 return 0;
121 case 'q':
122 quiet = 1;
123 return 0;
124 case 'h':
125 argp_state_help (state, state->out_stream,
126 ARGP_HELP_STD_HELP);
127 break;
128 case ARGP_KEY_ARG:
129 if (filename)
130 argp_usage (state);
131 filename = arg;
132 return 0;
133 case ARGP_KEY_SUCCESS:
134 if (!to_code)
135 argp_error (state,
136 _("must specify an output "
137 "encoding"));
138 from_code = split_codes (from_codes);
139 return 0;
140 }
141 return ARGP_ERR_UNKNOWN;
142 }
143
144 static struct argp argp = { options, parse_opt, args_doc };
145
146 int main (int argc, char *argv[])
147 {
148 decompress *decomp;
149
150 set_program_name (argv[0]);
151
152 init_debug ();
153 pipeline_install_post_fork (pop_all_cleanups);
154 sandbox = sandbox_init ();
155 init_locale ();
156
157 if (argp_parse (&argp, argc, argv, 0, 0, 0))
158 exit (FAIL);
159 assert (from_code);
160
161 if (filename) {
162 decomp = decompress_open (filename, 0);
163 if (!decomp)
164 error (FAIL, 0, _("can't open %s"), filename);
165 } else
166 decomp = decompress_fdopen (dup (STDIN_FILENO));
167 decompress_start (decomp);
168
169 if (!gl_list_size (from_code)) {
170 char *lang, *page_encoding;
171
172 /* Note that we don't need to explicitly check the page's
173 * preprocessor encoding here, as the manconv function will
174 * do that itself and override the requested input encoding
175 * with it if it finds one.
176 */
177 lang = lang_dir (filename);
178 page_encoding = get_page_encoding (lang);
179 if (STREQ (page_encoding, "UTF-8")) {
180 /* Steal memory. */
181 gl_list_add_last (from_code, page_encoding);
182 debug ("guessed input encoding %s for %s\n",
183 page_encoding, filename);
184 } else {
185 gl_list_add_last (from_code, xstrdup ("UTF-8"));
186 /* Steal memory. */
187 gl_list_add_last (from_code, page_encoding);
188 debug ("guessed input encodings UTF-8:%s for %s\n",
189 page_encoding, filename);
190 }
191
192 free (lang);
193 }
194
195 if (manconv (decomp, from_code, to_code, NULL) != 0)
196 /* manconv already wrote an error message to stderr. Just
197 * exit non-zero.
198 */
199 exit (FATAL);
200
201 free (to_code);
202 gl_list_free (from_code);
203
204 decompress_wait (decomp);
205
206 sandbox_free (sandbox);
207
208 return 0;
209 }