1 /*
2 * manconv_client.c: use manconv in a pipeline
3 *
4 * Copyright (C) 2007, 2008, 2010 Colin Watson.
5 *
6 * This file is part of man-db.
7 *
8 * man-db is free software; you can redistribute it and/or modify it
9 * under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * man-db is distributed in the hope that it will be useful, but
14 * WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with man-db; if not, write to the Free Software Foundation,
20 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21 */
22
23 #ifdef HAVE_CONFIG_H
24 # include "config.h"
25 #endif /* HAVE_CONFIG_H */
26
27 #include <string.h>
28 #include <stdlib.h>
29 #include <unistd.h>
30
31 #include "gl_array_list.h"
32 #include "gl_xlist.h"
33 #include "xalloc.h"
34 #include "xvasprintf.h"
35
36 #include "manconfig.h"
37
38 #include "pipeline.h"
39
40 #include "appendstr.h"
41 #include "glcontainers.h"
42 #include "sandbox.h"
43 #include "security.h"
44
45 #include "decompress.h"
46 #include "manconv.h"
47 #include "manconv_client.h"
48 #include "utf8.h"
49
50 extern man_sandbox *sandbox;
51
52 struct manconv_codes {
53 gl_list_t from;
54 char *to;
55 };
56
57 static void manconv_stdin (void *data)
58 {
59 struct manconv_codes *codes = data;
60 decompress *decomp;
61
62 decomp = decompress_fdopen (dup (STDIN_FILENO));
63 decompress_start (decomp);
64 if (manconv (decomp, codes->from, codes->to, NULL) != 0)
65 /* manconv already wrote an error message to stderr. Just
66 * exit non-zero.
67 */
68 exit (FATAL);
69 decompress_wait (decomp);
70 decompress_free (decomp);
71 }
72
73 static void manconv_pre_exec (void *data)
74 {
75 /* We must drop privileges before loading the sandbox, since our
76 * seccomp filter doesn't allow setresuid and friends.
77 */
78 drop_privs (NULL);
79 sandbox_load (data);
80 }
81
82 static void free_manconv_codes (void *data)
83 {
84 struct manconv_codes *codes = data;
85
86 gl_list_free (codes->from);
87 free (codes->to);
88 free (codes);
89 }
90
91 void add_manconv (pipeline *p,
92 const char *source_encoding, const char *target_encoding)
93 {
94 struct manconv_codes *codes;
95 char *name;
96 pipecmd *cmd;
97
98 if (STREQ (source_encoding, target_encoding))
99 return;
100
101 codes = xmalloc (sizeof *codes);
102 /* informational only; no shell quoting concerns */
103 name = xasprintf ("%s -f ", MANCONV);
104 codes->from = new_string_list (GL_ARRAY_LIST, true);
105 if (STREQ (source_encoding, "UTF-8")) {
106 gl_list_add_last (codes->from, xstrdup (source_encoding));
107 name = appendstr (name, source_encoding, (void *) 0);
108 } else {
109 gl_list_add_last (codes->from, xstrdup ("UTF-8"));
110 gl_list_add_last (codes->from, xstrdup (source_encoding));
111 name = appendstr (name, "UTF-8:", source_encoding, (void *) 0);
112 }
113 codes->to = xasprintf ("%s//IGNORE", target_encoding);
114 /* informational only; no shell quoting concerns */
115 name = appendstr (name, " -t ", codes->to, (void *) 0);
116 if (quiet >= 2)
117 name = appendstr (name, " -q", (void *) 0);
118
119 /* iconv_open may not work correctly in setuid processes; in GNU
120 * libc, gconv modules may be linked against other gconv modules and
121 * rely on RPATH $ORIGIN to load those modules from the correct
122 * path, but $ORIGIN is disabled in setuid processes. It is
123 * impossible to reset libc's idea of setuidness without creating a
124 * whole new process image. Therefore, if the calling process is
125 * setuid, we must drop privileges and execute manconv.
126 */
127 if (running_setuid ()) {
128 gl_list_t from = codes->from;
129 const char *from_code;
130 char *sources = NULL;
131
132 cmd = pipecmd_new_args (MANCONV, "-f", (void *) 0);
133 GL_LIST_FOREACH (from, from_code) {
134 sources = appendstr (sources, from_code, (void *) 0);
135 if (gl_list_next_node (from, from_node))
136 sources = appendstr (sources, ":", (void *) 0);
137 }
138 pipecmd_arg (cmd, sources);
139 free (sources);
140 pipecmd_args (cmd, "-t", codes->to, (void *) 0);
141 if (quiet >= 2)
142 pipecmd_arg (cmd, "-q");
143 pipecmd_pre_exec (cmd, manconv_pre_exec, sandbox_free,
144 sandbox);
145 free_manconv_codes (codes);
146 } else {
147 cmd = pipecmd_new_function (name, &manconv_stdin,
148 &free_manconv_codes, codes);
149 pipecmd_pre_exec (cmd, sandbox_load, sandbox_free, sandbox);
150 }
151 free (name);
152 pipeline_command (p, cmd);
153 }
154
155 /* Convert the result of in-process decompression to a target encoding.
156 *
157 * This converts the buffered result of decompression to a new buffer, then
158 * replaces the decompress object's buffer with the converted one for use by
159 * later stages of processing.
160 *
161 * Returns zero on success or non-zero on failure.
162 */
163 int manconv_inprocess (decompress *d,
164 const char *source_encoding,
165 const char *target_encoding)
166 {
167 gl_list_t from;
168 char *to;
169 struct manconv_outbuf outbuf;
170 int ret = 0;
171
172 if (STREQ (source_encoding, target_encoding))
173 return 0;
174
175 from = new_string_list (GL_ARRAY_LIST, true);
176 if (STREQ (source_encoding, "UTF-8"))
177 gl_list_add_last (from, xstrdup (source_encoding));
178 else {
179 if (STREQ (target_encoding, "UTF-8")) {
180 /* If the target encoding is UTF-8, then instead of
181 * starting with trial conversion from UTF-8 to
182 * UTF-8, we can start by simply performing UTF-8
183 * validation, avoiding a copy. (The source
184 * encoding cannot be UTF-8 in this case, since we
185 * already checked that the source and target
186 * encodings are different.)
187 */
188 if (utf8_validate_len (decompress_inprocess_buf (d),
189 decompress_inprocess_len (d)))
190 goto out;
191 } else
192 gl_list_add_last (from, xstrdup ("UTF-8"));
193 gl_list_add_last (from, xstrdup (source_encoding));
194 }
195 to = xasprintf ("%s//IGNORE", target_encoding);
196
197 outbuf.len = 0;
198 /* UTF-8 uses at most four bytes per Unicode code point. We assume
199 * that this conversion will be no worse than 1:4.
200 */
201 outbuf.max = decompress_inprocess_len (d) * 4;
202 outbuf.buf = xmalloc (outbuf.max);
203
204 if (manconv (d, from, to, &outbuf) == 0)
205 decompress_inprocess_replace (d, outbuf.buf, outbuf.len);
206 else {
207 /* manconv already wrote an error message to stderr. Just
208 * return non-zero.
209 */
210 free (outbuf.buf);
211 ret = -1;
212 }
213
214 free (to);
215 out:
216 gl_list_free (from);
217 return ret;
218 }