(root)/
glib-2.79.0/
gio/
gcharsetconverter.c
       1  /* GIO - GLib Input, Output and Streaming Library
       2   *
       3   * Copyright (C) 2009 Red Hat, Inc.
       4   *
       5   * SPDX-License-Identifier: LGPL-2.1-or-later
       6   *
       7   * This library is free software; you can redistribute it and/or
       8   * modify it under the terms of the GNU Lesser General Public
       9   * License as published by the Free Software Foundation; either
      10   * version 2.1 of the License, or (at your option) any later version.
      11   *
      12   * This library is distributed in the hope that it will be useful,
      13   * but WITHOUT ANY WARRANTY; without even the implied warranty of
      14   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      15   * Lesser General Public License for more details.
      16   *
      17   * You should have received a copy of the GNU Lesser General
      18   * Public License along with this library; if not, see <http://www.gnu.org/licenses/>.
      19   *
      20   * Author: Alexander Larsson <alexl@redhat.com>
      21   */
      22  
      23  #include "config.h"
      24  
      25  #include "gcharsetconverter.h"
      26  
      27  #include <errno.h>
      28  
      29  #include "ginitable.h"
      30  #include "gioerror.h"
      31  #include "glibintl.h"
      32  
      33  
      34  enum {
      35    PROP_0,
      36    PROP_FROM_CHARSET,
      37    PROP_TO_CHARSET,
      38    PROP_USE_FALLBACK
      39  };
      40  
      41  /**
      42   * GCharsetConverter:
      43   *
      44   * `GCharsetConverter` is an implementation of [iface@Gio.Converter] based on
      45   * [struct@GLib.IConv].
      46   */
      47  
      48  static void g_charset_converter_iface_init          (GConverterIface *iface);
      49  static void g_charset_converter_initable_iface_init (GInitableIface  *iface);
      50  
      51  struct _GCharsetConverter
      52  {
      53    GObject parent_instance;
      54  
      55    char *from;
      56    char *to;
      57    GIConv iconv;
      58    gboolean use_fallback;
      59    guint n_fallback_errors;
      60  };
      61  
      62  G_DEFINE_TYPE_WITH_CODE (GCharsetConverter, g_charset_converter, G_TYPE_OBJECT,
      63  			 G_IMPLEMENT_INTERFACE (G_TYPE_CONVERTER,
      64  						g_charset_converter_iface_init);
      65  			 G_IMPLEMENT_INTERFACE (G_TYPE_INITABLE,
      66  						g_charset_converter_initable_iface_init))
      67  
      68  static void
      69  g_charset_converter_finalize (GObject *object)
      70  {
      71    GCharsetConverter *conv;
      72  
      73    conv = G_CHARSET_CONVERTER (object);
      74  
      75    g_free (conv->from);
      76    g_free (conv->to);
      77    if (conv->iconv)
      78      g_iconv_close (conv->iconv);
      79  
      80    G_OBJECT_CLASS (g_charset_converter_parent_class)->finalize (object);
      81  }
      82  
      83  static void
      84  g_charset_converter_set_property (GObject      *object,
      85  				  guint         prop_id,
      86  				  const GValue *value,
      87  				  GParamSpec   *pspec)
      88  {
      89    GCharsetConverter *conv;
      90  
      91    conv = G_CHARSET_CONVERTER (object);
      92  
      93    switch (prop_id)
      94      {
      95      case PROP_TO_CHARSET:
      96        g_free (conv->to);
      97        conv->to = g_value_dup_string (value);
      98        break;
      99  
     100      case PROP_FROM_CHARSET:
     101        g_free (conv->from);
     102        conv->from = g_value_dup_string (value);
     103        break;
     104  
     105      case PROP_USE_FALLBACK:
     106        conv->use_fallback = g_value_get_boolean (value);
     107        break;
     108  
     109      default:
     110        G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
     111        break;
     112      }
     113  
     114  }
     115  
     116  static void
     117  g_charset_converter_get_property (GObject    *object,
     118  				  guint       prop_id,
     119  				  GValue     *value,
     120  				  GParamSpec *pspec)
     121  {
     122    GCharsetConverter *conv;
     123  
     124    conv = G_CHARSET_CONVERTER (object);
     125  
     126    switch (prop_id)
     127      {
     128      case PROP_TO_CHARSET:
     129        g_value_set_string (value, conv->to);
     130        break;
     131  
     132      case PROP_FROM_CHARSET:
     133        g_value_set_string (value, conv->from);
     134        break;
     135  
     136      case PROP_USE_FALLBACK:
     137        g_value_set_boolean (value, conv->use_fallback);
     138        break;
     139  
     140      default:
     141        G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
     142        break;
     143      }
     144  }
     145  
     146  static void
     147  g_charset_converter_class_init (GCharsetConverterClass *klass)
     148  {
     149    GObjectClass *gobject_class = G_OBJECT_CLASS (klass);
     150  
     151    gobject_class->finalize = g_charset_converter_finalize;
     152    gobject_class->get_property = g_charset_converter_get_property;
     153    gobject_class->set_property = g_charset_converter_set_property;
     154  
     155    /**
     156     * GCharsetConverter:to-charset:
     157     *
     158     * The character encoding to convert to.
     159     *
     160     * Since: 2.24
     161     */
     162    g_object_class_install_property (gobject_class,
     163  				   PROP_TO_CHARSET,
     164  				   g_param_spec_string ("to-charset", NULL, NULL,
     165  							NULL,
     166  							G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY |
     167  							G_PARAM_STATIC_STRINGS));
     168  
     169    /**
     170     * GCharsetConverter:from-charset:
     171     *
     172     * The character encoding to convert from.
     173     *
     174     * Since: 2.24
     175     */
     176    g_object_class_install_property (gobject_class,
     177  				   PROP_FROM_CHARSET,
     178  				   g_param_spec_string ("from-charset", NULL, NULL,
     179  							NULL,
     180  							G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY |
     181  							G_PARAM_STATIC_STRINGS));
     182  
     183    /**
     184     * GCharsetConverter:use-fallback:
     185     *
     186     * Use fallback (of form `\<hexval>`) for invalid bytes.
     187     *
     188     * Since: 2.24
     189     */
     190    g_object_class_install_property (gobject_class,
     191  				   PROP_USE_FALLBACK,
     192  				   g_param_spec_boolean ("use-fallback", NULL, NULL,
     193  							 FALSE,
     194  							 G_PARAM_READWRITE |
     195  							 G_PARAM_CONSTRUCT |
     196  							 G_PARAM_STATIC_STRINGS));
     197  }
     198  
     199  static void
     200  g_charset_converter_init (GCharsetConverter *local)
     201  {
     202  }
     203  
     204  
     205  /**
     206   * g_charset_converter_new:
     207   * @to_charset: destination charset
     208   * @from_charset: source charset
     209   * @error: #GError for error reporting, or %NULL to ignore.
     210   *
     211   * Creates a new #GCharsetConverter.
     212   *
     213   * Returns: a new #GCharsetConverter or %NULL on error.
     214   *
     215   * Since: 2.24
     216   **/
     217  GCharsetConverter *
     218  g_charset_converter_new (const gchar *to_charset,
     219  			 const gchar *from_charset,
     220  			 GError      **error)
     221  {
     222    GCharsetConverter *conv;
     223  
     224    conv = g_initable_new (G_TYPE_CHARSET_CONVERTER,
     225  			 NULL, error,
     226  			 "to-charset", to_charset,
     227  			 "from-charset", from_charset,
     228  			 NULL);
     229  
     230    return conv;
     231  }
     232  
     233  static void
     234  g_charset_converter_reset (GConverter *converter)
     235  {
     236    GCharsetConverter *conv = G_CHARSET_CONVERTER (converter);
     237  
     238    if (conv->iconv == NULL)
     239      {
     240        g_warning ("Invalid object, not initialized");
     241        return;
     242      }
     243  
     244    g_iconv (conv->iconv, NULL, NULL, NULL, NULL);
     245    conv->n_fallback_errors = 0;
     246  }
     247  
     248  static GConverterResult
     249  g_charset_converter_convert (GConverter       *converter,
     250  			     const void       *inbuf,
     251  			     gsize             inbuf_size,
     252  			     void             *outbuf,
     253  			     gsize             outbuf_size,
     254  			     GConverterFlags   flags,
     255  			     gsize            *bytes_read,
     256  			     gsize            *bytes_written,
     257  			     GError          **error)
     258  {
     259    GCharsetConverter  *conv;
     260    gsize res;
     261    GConverterResult ret;
     262    gchar *inbufp, *outbufp;
     263    gsize in_left, out_left;
     264    int errsv;
     265    gboolean reset;
     266  
     267    conv = G_CHARSET_CONVERTER (converter);
     268  
     269    if (conv->iconv == NULL)
     270      {
     271        g_set_error_literal (error, G_IO_ERROR, G_IO_ERROR_NOT_INITIALIZED,
     272  			   _("Invalid object, not initialized"));
     273        return G_CONVERTER_ERROR;
     274      }
     275  
     276    inbufp = (char *)inbuf;
     277    outbufp = (char *)outbuf;
     278    in_left = inbuf_size;
     279    out_left = outbuf_size;
     280    reset = FALSE;
     281  
     282    /* if there is not input try to flush the data */
     283    if (inbuf_size == 0)
     284      {
     285        if (flags & G_CONVERTER_INPUT_AT_END ||
     286            flags & G_CONVERTER_FLUSH)
     287          {
     288            reset = TRUE;
     289          }
     290        else
     291          {
     292            g_set_error_literal (error, G_IO_ERROR, G_IO_ERROR_PARTIAL_INPUT,
     293                                 _("Incomplete multibyte sequence in input"));
     294            return G_CONVERTER_ERROR;
     295          }
     296      }
     297  
     298    if (reset)
     299      /* call g_iconv with NULL inbuf to cleanup shift state */
     300      res = g_iconv (conv->iconv,
     301                     NULL, &in_left,
     302                     &outbufp, &out_left);
     303    else
     304      res = g_iconv (conv->iconv,
     305                     &inbufp, &in_left,
     306                     &outbufp, &out_left);
     307  
     308    *bytes_read = inbufp - (char *)inbuf;
     309    *bytes_written = outbufp - (char *)outbuf;
     310  
     311    /* Don't report error if we converted anything */
     312    if (res == (gsize) -1 && *bytes_read == 0)
     313      {
     314        errsv = errno;
     315  
     316        switch (errsv)
     317  	{
     318  	case EINVAL:
     319  	  /* Incomplete input text */
     320  	  g_set_error_literal (error, G_IO_ERROR, G_IO_ERROR_PARTIAL_INPUT,
     321  			       _("Incomplete multibyte sequence in input"));
     322  	  break;
     323  
     324  	case E2BIG:
     325  	  /* Not enough destination space */
     326  	  g_set_error_literal (error, G_IO_ERROR, G_IO_ERROR_NO_SPACE,
     327  			       _("Not enough space in destination"));
     328  	  break;
     329  
     330  	case EILSEQ:
     331  	  /* Invalid code sequence */
     332  	  if (conv->use_fallback)
     333  	    {
     334  	      if (outbuf_size < 3)
     335  		g_set_error_literal (error, G_IO_ERROR, G_IO_ERROR_NO_SPACE,
     336  				     _("Not enough space in destination"));
     337  	      else
     338  		{
     339  		  const char hex[] = "0123456789ABCDEF";
     340  		  guint8 v = *(guint8 *)inbuf;
     341  		  guint8 *out = (guint8 *)outbuf;
     342  		  out[0] = '\\';
     343  		  out[1] = hex[(v & 0xf0) >> 4];
     344  		  out[2] = hex[(v & 0x0f) >> 0];
     345  		  *bytes_read = 1;
     346  		  *bytes_written = 3;
     347  		  in_left--;
     348  		  conv->n_fallback_errors++;
     349  		  goto ok;
     350  		}
     351  	    }
     352  	  else
     353  	    g_set_error_literal (error, G_IO_ERROR, G_IO_ERROR_INVALID_DATA,
     354  				 _("Invalid byte sequence in conversion input"));
     355  	  break;
     356  
     357  	default:
     358  	  g_set_error (error, G_IO_ERROR, G_IO_ERROR_FAILED,
     359  		       _("Error during conversion: %s"),
     360  		       g_strerror (errsv));
     361  	  break;
     362  	}
     363        ret = G_CONVERTER_ERROR;
     364      }
     365    else
     366      {
     367      ok:
     368        ret = G_CONVERTER_CONVERTED;
     369  
     370        if (reset &&
     371  	  (flags & G_CONVERTER_INPUT_AT_END))
     372          ret = G_CONVERTER_FINISHED;
     373        else if (reset &&
     374  	       (flags & G_CONVERTER_FLUSH))
     375          ret = G_CONVERTER_FLUSHED;
     376      }
     377  
     378    return ret;
     379  }
     380  
     381  /**
     382   * g_charset_converter_set_use_fallback:
     383   * @converter: a #GCharsetConverter
     384   * @use_fallback: %TRUE to use fallbacks
     385   *
     386   * Sets the #GCharsetConverter:use-fallback property.
     387   *
     388   * Since: 2.24
     389   */
     390  void
     391  g_charset_converter_set_use_fallback (GCharsetConverter *converter,
     392  				      gboolean           use_fallback)
     393  {
     394    use_fallback = !!use_fallback;
     395  
     396    if (converter->use_fallback != use_fallback)
     397      {
     398        converter->use_fallback = use_fallback;
     399        g_object_notify (G_OBJECT (converter), "use-fallback");
     400      }
     401  }
     402  
     403  /**
     404   * g_charset_converter_get_use_fallback:
     405   * @converter: a #GCharsetConverter
     406   *
     407   * Gets the #GCharsetConverter:use-fallback property.
     408   *
     409   * Returns: %TRUE if fallbacks are used by @converter
     410   *
     411   * Since: 2.24
     412   */
     413  gboolean
     414  g_charset_converter_get_use_fallback (GCharsetConverter *converter)
     415  {
     416    return converter->use_fallback;
     417  }
     418  
     419  /**
     420   * g_charset_converter_get_num_fallbacks:
     421   * @converter: a #GCharsetConverter
     422   *
     423   * Gets the number of fallbacks that @converter has applied so far.
     424   *
     425   * Returns: the number of fallbacks that @converter has applied
     426   *
     427   * Since: 2.24
     428   */
     429  guint
     430  g_charset_converter_get_num_fallbacks (GCharsetConverter *converter)
     431  {
     432    return converter->n_fallback_errors;
     433  }
     434  
     435  static void
     436  g_charset_converter_iface_init (GConverterIface *iface)
     437  {
     438    iface->convert = g_charset_converter_convert;
     439    iface->reset = g_charset_converter_reset;
     440  }
     441  
     442  static gboolean
     443  g_charset_converter_initable_init (GInitable     *initable,
     444  				   GCancellable  *cancellable,
     445  				   GError       **error)
     446  {
     447    GCharsetConverter  *conv;
     448    int errsv;
     449  
     450    g_return_val_if_fail (G_IS_CHARSET_CONVERTER (initable), FALSE);
     451  
     452    conv = G_CHARSET_CONVERTER (initable);
     453  
     454    if (cancellable != NULL)
     455      {
     456        g_set_error_literal (error, G_IO_ERROR, G_IO_ERROR_NOT_SUPPORTED,
     457  			   _("Cancellable initialization not supported"));
     458        return FALSE;
     459      }
     460  
     461    conv->iconv = g_iconv_open (conv->to, conv->from);
     462    errsv = errno;
     463  
     464    if (conv->iconv == (GIConv)-1)
     465      {
     466        if (errsv == EINVAL)
     467  	g_set_error (error, G_IO_ERROR, G_IO_ERROR_NOT_SUPPORTED,
     468  		     _("Conversion from character set “%s” to “%s” is not supported"),
     469  		     conv->from, conv->to);
     470        else
     471  	g_set_error (error, G_IO_ERROR, G_IO_ERROR_FAILED,
     472  		     _("Could not open converter from “%s” to “%s"),
     473  		     conv->from, conv->to);
     474        return FALSE;
     475      }
     476  
     477    return TRUE;
     478  }
     479  
     480  static void
     481  g_charset_converter_initable_iface_init (GInitableIface *iface)
     482  {
     483    iface->init = g_charset_converter_initable_init;
     484  }