1 /* GIO - GLib Input, Output and Streaming Library
2 *
3 * Copyright (C) 2009 Red Hat, Inc.
4 *
5 * SPDX-License-Identifier: LGPL-2.1-or-later
6 *
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General
18 * Public License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 *
20 * Author: Alexander Larsson <alexl@redhat.com>
21 */
22
23 #include "config.h"
24
25 #include "gcharsetconverter.h"
26
27 #include <errno.h>
28
29 #include "ginitable.h"
30 #include "gioerror.h"
31 #include "glibintl.h"
32
33
34 enum {
35 PROP_0,
36 PROP_FROM_CHARSET,
37 PROP_TO_CHARSET,
38 PROP_USE_FALLBACK
39 };
40
41 /**
42 * GCharsetConverter:
43 *
44 * `GCharsetConverter` is an implementation of [iface@Gio.Converter] based on
45 * [struct@GLib.IConv].
46 */
47
48 static void g_charset_converter_iface_init (GConverterIface *iface);
49 static void g_charset_converter_initable_iface_init (GInitableIface *iface);
50
51 struct _GCharsetConverter
52 {
53 GObject parent_instance;
54
55 char *from;
56 char *to;
57 GIConv iconv;
58 gboolean use_fallback;
59 guint n_fallback_errors;
60 };
61
62 G_DEFINE_TYPE_WITH_CODE (GCharsetConverter, g_charset_converter, G_TYPE_OBJECT,
63 G_IMPLEMENT_INTERFACE (G_TYPE_CONVERTER,
64 g_charset_converter_iface_init);
65 G_IMPLEMENT_INTERFACE (G_TYPE_INITABLE,
66 g_charset_converter_initable_iface_init))
67
68 static void
69 g_charset_converter_finalize (GObject *object)
70 {
71 GCharsetConverter *conv;
72
73 conv = G_CHARSET_CONVERTER (object);
74
75 g_free (conv->from);
76 g_free (conv->to);
77 if (conv->iconv)
78 g_iconv_close (conv->iconv);
79
80 G_OBJECT_CLASS (g_charset_converter_parent_class)->finalize (object);
81 }
82
83 static void
84 g_charset_converter_set_property (GObject *object,
85 guint prop_id,
86 const GValue *value,
87 GParamSpec *pspec)
88 {
89 GCharsetConverter *conv;
90
91 conv = G_CHARSET_CONVERTER (object);
92
93 switch (prop_id)
94 {
95 case PROP_TO_CHARSET:
96 g_free (conv->to);
97 conv->to = g_value_dup_string (value);
98 break;
99
100 case PROP_FROM_CHARSET:
101 g_free (conv->from);
102 conv->from = g_value_dup_string (value);
103 break;
104
105 case PROP_USE_FALLBACK:
106 conv->use_fallback = g_value_get_boolean (value);
107 break;
108
109 default:
110 G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
111 break;
112 }
113
114 }
115
116 static void
117 g_charset_converter_get_property (GObject *object,
118 guint prop_id,
119 GValue *value,
120 GParamSpec *pspec)
121 {
122 GCharsetConverter *conv;
123
124 conv = G_CHARSET_CONVERTER (object);
125
126 switch (prop_id)
127 {
128 case PROP_TO_CHARSET:
129 g_value_set_string (value, conv->to);
130 break;
131
132 case PROP_FROM_CHARSET:
133 g_value_set_string (value, conv->from);
134 break;
135
136 case PROP_USE_FALLBACK:
137 g_value_set_boolean (value, conv->use_fallback);
138 break;
139
140 default:
141 G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
142 break;
143 }
144 }
145
146 static void
147 g_charset_converter_class_init (GCharsetConverterClass *klass)
148 {
149 GObjectClass *gobject_class = G_OBJECT_CLASS (klass);
150
151 gobject_class->finalize = g_charset_converter_finalize;
152 gobject_class->get_property = g_charset_converter_get_property;
153 gobject_class->set_property = g_charset_converter_set_property;
154
155 /**
156 * GCharsetConverter:to-charset:
157 *
158 * The character encoding to convert to.
159 *
160 * Since: 2.24
161 */
162 g_object_class_install_property (gobject_class,
163 PROP_TO_CHARSET,
164 g_param_spec_string ("to-charset", NULL, NULL,
165 NULL,
166 G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY |
167 G_PARAM_STATIC_STRINGS));
168
169 /**
170 * GCharsetConverter:from-charset:
171 *
172 * The character encoding to convert from.
173 *
174 * Since: 2.24
175 */
176 g_object_class_install_property (gobject_class,
177 PROP_FROM_CHARSET,
178 g_param_spec_string ("from-charset", NULL, NULL,
179 NULL,
180 G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY |
181 G_PARAM_STATIC_STRINGS));
182
183 /**
184 * GCharsetConverter:use-fallback:
185 *
186 * Use fallback (of form `\<hexval>`) for invalid bytes.
187 *
188 * Since: 2.24
189 */
190 g_object_class_install_property (gobject_class,
191 PROP_USE_FALLBACK,
192 g_param_spec_boolean ("use-fallback", NULL, NULL,
193 FALSE,
194 G_PARAM_READWRITE |
195 G_PARAM_CONSTRUCT |
196 G_PARAM_STATIC_STRINGS));
197 }
198
199 static void
200 g_charset_converter_init (GCharsetConverter *local)
201 {
202 }
203
204
205 /**
206 * g_charset_converter_new:
207 * @to_charset: destination charset
208 * @from_charset: source charset
209 * @error: #GError for error reporting, or %NULL to ignore.
210 *
211 * Creates a new #GCharsetConverter.
212 *
213 * Returns: a new #GCharsetConverter or %NULL on error.
214 *
215 * Since: 2.24
216 **/
217 GCharsetConverter *
218 g_charset_converter_new (const gchar *to_charset,
219 const gchar *from_charset,
220 GError **error)
221 {
222 GCharsetConverter *conv;
223
224 conv = g_initable_new (G_TYPE_CHARSET_CONVERTER,
225 NULL, error,
226 "to-charset", to_charset,
227 "from-charset", from_charset,
228 NULL);
229
230 return conv;
231 }
232
233 static void
234 g_charset_converter_reset (GConverter *converter)
235 {
236 GCharsetConverter *conv = G_CHARSET_CONVERTER (converter);
237
238 if (conv->iconv == NULL)
239 {
240 g_warning ("Invalid object, not initialized");
241 return;
242 }
243
244 g_iconv (conv->iconv, NULL, NULL, NULL, NULL);
245 conv->n_fallback_errors = 0;
246 }
247
248 static GConverterResult
249 g_charset_converter_convert (GConverter *converter,
250 const void *inbuf,
251 gsize inbuf_size,
252 void *outbuf,
253 gsize outbuf_size,
254 GConverterFlags flags,
255 gsize *bytes_read,
256 gsize *bytes_written,
257 GError **error)
258 {
259 GCharsetConverter *conv;
260 gsize res;
261 GConverterResult ret;
262 gchar *inbufp, *outbufp;
263 gsize in_left, out_left;
264 int errsv;
265 gboolean reset;
266
267 conv = G_CHARSET_CONVERTER (converter);
268
269 if (conv->iconv == NULL)
270 {
271 g_set_error_literal (error, G_IO_ERROR, G_IO_ERROR_NOT_INITIALIZED,
272 _("Invalid object, not initialized"));
273 return G_CONVERTER_ERROR;
274 }
275
276 inbufp = (char *)inbuf;
277 outbufp = (char *)outbuf;
278 in_left = inbuf_size;
279 out_left = outbuf_size;
280 reset = FALSE;
281
282 /* if there is not input try to flush the data */
283 if (inbuf_size == 0)
284 {
285 if (flags & G_CONVERTER_INPUT_AT_END ||
286 flags & G_CONVERTER_FLUSH)
287 {
288 reset = TRUE;
289 }
290 else
291 {
292 g_set_error_literal (error, G_IO_ERROR, G_IO_ERROR_PARTIAL_INPUT,
293 _("Incomplete multibyte sequence in input"));
294 return G_CONVERTER_ERROR;
295 }
296 }
297
298 if (reset)
299 /* call g_iconv with NULL inbuf to cleanup shift state */
300 res = g_iconv (conv->iconv,
301 NULL, &in_left,
302 &outbufp, &out_left);
303 else
304 res = g_iconv (conv->iconv,
305 &inbufp, &in_left,
306 &outbufp, &out_left);
307
308 *bytes_read = inbufp - (char *)inbuf;
309 *bytes_written = outbufp - (char *)outbuf;
310
311 /* Don't report error if we converted anything */
312 if (res == (gsize) -1 && *bytes_read == 0)
313 {
314 errsv = errno;
315
316 switch (errsv)
317 {
318 case EINVAL:
319 /* Incomplete input text */
320 g_set_error_literal (error, G_IO_ERROR, G_IO_ERROR_PARTIAL_INPUT,
321 _("Incomplete multibyte sequence in input"));
322 break;
323
324 case E2BIG:
325 /* Not enough destination space */
326 g_set_error_literal (error, G_IO_ERROR, G_IO_ERROR_NO_SPACE,
327 _("Not enough space in destination"));
328 break;
329
330 case EILSEQ:
331 /* Invalid code sequence */
332 if (conv->use_fallback)
333 {
334 if (outbuf_size < 3)
335 g_set_error_literal (error, G_IO_ERROR, G_IO_ERROR_NO_SPACE,
336 _("Not enough space in destination"));
337 else
338 {
339 const char hex[] = "0123456789ABCDEF";
340 guint8 v = *(guint8 *)inbuf;
341 guint8 *out = (guint8 *)outbuf;
342 out[0] = '\\';
343 out[1] = hex[(v & 0xf0) >> 4];
344 out[2] = hex[(v & 0x0f) >> 0];
345 *bytes_read = 1;
346 *bytes_written = 3;
347 in_left--;
348 conv->n_fallback_errors++;
349 goto ok;
350 }
351 }
352 else
353 g_set_error_literal (error, G_IO_ERROR, G_IO_ERROR_INVALID_DATA,
354 _("Invalid byte sequence in conversion input"));
355 break;
356
357 default:
358 g_set_error (error, G_IO_ERROR, G_IO_ERROR_FAILED,
359 _("Error during conversion: %s"),
360 g_strerror (errsv));
361 break;
362 }
363 ret = G_CONVERTER_ERROR;
364 }
365 else
366 {
367 ok:
368 ret = G_CONVERTER_CONVERTED;
369
370 if (reset &&
371 (flags & G_CONVERTER_INPUT_AT_END))
372 ret = G_CONVERTER_FINISHED;
373 else if (reset &&
374 (flags & G_CONVERTER_FLUSH))
375 ret = G_CONVERTER_FLUSHED;
376 }
377
378 return ret;
379 }
380
381 /**
382 * g_charset_converter_set_use_fallback:
383 * @converter: a #GCharsetConverter
384 * @use_fallback: %TRUE to use fallbacks
385 *
386 * Sets the #GCharsetConverter:use-fallback property.
387 *
388 * Since: 2.24
389 */
390 void
391 g_charset_converter_set_use_fallback (GCharsetConverter *converter,
392 gboolean use_fallback)
393 {
394 use_fallback = !!use_fallback;
395
396 if (converter->use_fallback != use_fallback)
397 {
398 converter->use_fallback = use_fallback;
399 g_object_notify (G_OBJECT (converter), "use-fallback");
400 }
401 }
402
403 /**
404 * g_charset_converter_get_use_fallback:
405 * @converter: a #GCharsetConverter
406 *
407 * Gets the #GCharsetConverter:use-fallback property.
408 *
409 * Returns: %TRUE if fallbacks are used by @converter
410 *
411 * Since: 2.24
412 */
413 gboolean
414 g_charset_converter_get_use_fallback (GCharsetConverter *converter)
415 {
416 return converter->use_fallback;
417 }
418
419 /**
420 * g_charset_converter_get_num_fallbacks:
421 * @converter: a #GCharsetConverter
422 *
423 * Gets the number of fallbacks that @converter has applied so far.
424 *
425 * Returns: the number of fallbacks that @converter has applied
426 *
427 * Since: 2.24
428 */
429 guint
430 g_charset_converter_get_num_fallbacks (GCharsetConverter *converter)
431 {
432 return converter->n_fallback_errors;
433 }
434
435 static void
436 g_charset_converter_iface_init (GConverterIface *iface)
437 {
438 iface->convert = g_charset_converter_convert;
439 iface->reset = g_charset_converter_reset;
440 }
441
442 static gboolean
443 g_charset_converter_initable_init (GInitable *initable,
444 GCancellable *cancellable,
445 GError **error)
446 {
447 GCharsetConverter *conv;
448 int errsv;
449
450 g_return_val_if_fail (G_IS_CHARSET_CONVERTER (initable), FALSE);
451
452 conv = G_CHARSET_CONVERTER (initable);
453
454 if (cancellable != NULL)
455 {
456 g_set_error_literal (error, G_IO_ERROR, G_IO_ERROR_NOT_SUPPORTED,
457 _("Cancellable initialization not supported"));
458 return FALSE;
459 }
460
461 conv->iconv = g_iconv_open (conv->to, conv->from);
462 errsv = errno;
463
464 if (conv->iconv == (GIConv)-1)
465 {
466 if (errsv == EINVAL)
467 g_set_error (error, G_IO_ERROR, G_IO_ERROR_NOT_SUPPORTED,
468 _("Conversion from character set “%s” to “%s” is not supported"),
469 conv->from, conv->to);
470 else
471 g_set_error (error, G_IO_ERROR, G_IO_ERROR_FAILED,
472 _("Could not open converter from “%s” to “%s”"),
473 conv->from, conv->to);
474 return FALSE;
475 }
476
477 return TRUE;
478 }
479
480 static void
481 g_charset_converter_initable_iface_init (GInitableIface *iface)
482 {
483 iface->init = g_charset_converter_initable_init;
484 }