(root)/
gcc-13.2.0/
libstdc++-v3/
include/
ext/
codecvt_specializations.h
       1  // Locale support (codecvt) -*- C++ -*-
       2  
       3  // Copyright (C) 2000-2023 Free Software Foundation, Inc.
       4  //
       5  // This file is part of the GNU ISO C++ Library.  This library is free
       6  // software; you can redistribute it and/or modify it under the
       7  // terms of the GNU General Public License as published by the
       8  // Free Software Foundation; either version 3, or (at your option)
       9  // any later version.
      10  
      11  // This library is distributed in the hope that it will be useful,
      12  // but WITHOUT ANY WARRANTY; without even the implied warranty of
      13  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      14  // GNU General Public License for more details.
      15  
      16  // Under Section 7 of GPL version 3, you are granted additional
      17  // permissions described in the GCC Runtime Library Exception, version
      18  // 3.1, as published by the Free Software Foundation.
      19  
      20  // You should have received a copy of the GNU General Public License and
      21  // a copy of the GCC Runtime Library Exception along with this program;
      22  // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
      23  // <http://www.gnu.org/licenses/>.
      24  
      25  //
      26  // ISO C++ 14882: 22.2.1.5 Template class codecvt
      27  //
      28  
      29  // Written by Benjamin Kosnik <bkoz@redhat.com>
      30  
      31  /** @file ext/codecvt_specializations.h
      32   *  This file is a GNU extension to the Standard C++ Library.
      33   */
      34  
      35  #ifndef _EXT_CODECVT_SPECIALIZATIONS_H
      36  #define _EXT_CODECVT_SPECIALIZATIONS_H 1
      37  
      38  #include <bits/requires_hosted.h> // GNU extensions are currently omitted
      39  
      40  #include <bits/c++config.h>
      41  #include <locale>
      42  #include <iconv.h>
      43  
      44  namespace __gnu_cxx _GLIBCXX_VISIBILITY(default)
      45  {
      46  _GLIBCXX_BEGIN_NAMESPACE_VERSION
      47  _GLIBCXX_BEGIN_NAMESPACE_CXX11
      48  
      49    /// Extension to use iconv for dealing with character encodings.
      50    // This includes conversions and comparisons between various character
      51    // sets.  This object encapsulates data that may need to be shared between
      52    // char_traits, codecvt and ctype.
      53    class encoding_state
      54    {
      55    public:
      56      // Types: 
      57      // NB: A conversion descriptor subsumes and enhances the
      58      // functionality of a simple state type such as mbstate_t.
      59      typedef iconv_t	descriptor_type;
      60      
      61    protected:
      62      // Name of internal character set encoding.
      63      std::string	       	_M_int_enc;
      64  
      65      // Name of external character set encoding.
      66      std::string  	_M_ext_enc;
      67  
      68      // Conversion descriptor between external encoding to internal encoding.
      69      descriptor_type	_M_in_desc;
      70  
      71      // Conversion descriptor between internal encoding to external encoding.
      72      descriptor_type	_M_out_desc;
      73  
      74      // The byte-order marker for the external encoding, if necessary.
      75      int			_M_ext_bom;
      76  
      77      // The byte-order marker for the internal encoding, if necessary.
      78      int			_M_int_bom;
      79  
      80      // Number of external bytes needed to construct one complete
      81      // character in the internal encoding.
      82      // NB: -1 indicates variable, or stateful, encodings.
      83      int 		_M_bytes;
      84  
      85    public:
      86      explicit 
      87      encoding_state() 
      88      : _M_in_desc(0), _M_out_desc(0), _M_ext_bom(0), _M_int_bom(0), _M_bytes(0)
      89      { }
      90  
      91      explicit 
      92      encoding_state(const char* __int, const char* __ext, 
      93  		   int __ibom = 0, int __ebom = 0, int __bytes = 1)
      94      : _M_int_enc(__int), _M_ext_enc(__ext), _M_in_desc(0), _M_out_desc(0), 
      95        _M_ext_bom(__ebom), _M_int_bom(__ibom), _M_bytes(__bytes)
      96      { init(); }
      97  
      98      // 21.1.2 traits typedefs
      99      // p4
     100      // typedef STATE_T state_type
     101      // requires: state_type shall meet the requirements of
     102      // CopyConstructible types (20.1.3)
     103      // NB: This does not preserve the actual state of the conversion
     104      // descriptor member, but it does duplicate the encoding
     105      // information.
     106      encoding_state(const encoding_state& __obj) : _M_in_desc(0), _M_out_desc(0)
     107      { construct(__obj); }
     108  
     109      // Need assignment operator as well.
     110      encoding_state&
     111      operator=(const encoding_state& __obj)
     112      {
     113        construct(__obj);
     114        return *this;
     115      }
     116  
     117      ~encoding_state()
     118      { destroy(); } 
     119  
     120      bool
     121      good() const throw()
     122      { 
     123        const descriptor_type __err = (iconv_t)(-1);
     124        bool __test = _M_in_desc && _M_in_desc != __err; 
     125        __test &=  _M_out_desc && _M_out_desc != __err;
     126        return __test;
     127      }
     128      
     129      int
     130      character_ratio() const
     131      { return _M_bytes; }
     132  
     133      const std::string
     134      internal_encoding() const
     135      { return _M_int_enc; }
     136  
     137      int 
     138      internal_bom() const
     139      { return _M_int_bom; }
     140  
     141      const std::string
     142      external_encoding() const
     143      { return _M_ext_enc; }
     144  
     145      int 
     146      external_bom() const
     147      { return _M_ext_bom; }
     148  
     149      const descriptor_type&
     150      in_descriptor() const
     151      { return _M_in_desc; }
     152  
     153      const descriptor_type&
     154      out_descriptor() const
     155      { return _M_out_desc; }
     156  
     157    protected:
     158      void
     159      init()
     160      {
     161        const descriptor_type __err = (iconv_t)(-1);
     162        const bool __have_encodings = _M_int_enc.size() && _M_ext_enc.size();
     163        if (!_M_in_desc && __have_encodings)
     164  	{
     165  	  _M_in_desc = iconv_open(_M_int_enc.c_str(), _M_ext_enc.c_str());
     166  	  if (_M_in_desc == __err)
     167  	    std::__throw_runtime_error(__N("encoding_state::_M_init "
     168  				    "creating iconv input descriptor failed"));
     169  	}
     170        if (!_M_out_desc && __have_encodings)
     171  	{
     172  	  _M_out_desc = iconv_open(_M_ext_enc.c_str(), _M_int_enc.c_str());
     173  	  if (_M_out_desc == __err)
     174  	    std::__throw_runtime_error(__N("encoding_state::_M_init "
     175  				  "creating iconv output descriptor failed"));
     176  	}
     177      }
     178  
     179      void
     180      construct(const encoding_state& __obj)
     181      {
     182        destroy();
     183        _M_int_enc = __obj._M_int_enc;
     184        _M_ext_enc = __obj._M_ext_enc;
     185        _M_ext_bom = __obj._M_ext_bom;
     186        _M_int_bom = __obj._M_int_bom;
     187        _M_bytes = __obj._M_bytes;
     188        init();
     189      }
     190  
     191      void
     192      destroy() throw()
     193      {
     194        const descriptor_type __err = (iconv_t)(-1);
     195        if (_M_in_desc && _M_in_desc != __err) 
     196  	{
     197  	  iconv_close(_M_in_desc);
     198  	  _M_in_desc = 0;
     199  	}
     200        if (_M_out_desc && _M_out_desc != __err) 
     201  	{
     202  	  iconv_close(_M_out_desc);
     203  	  _M_out_desc = 0;
     204  	}
     205      }
     206    };
     207  
     208    /// encoding_char_traits
     209    // Custom traits type with encoding_state for the state type, and the
     210    // associated fpos<encoding_state> for the position type, all other
     211    // bits equivalent to the required char_traits instantiations.
     212    template<typename _CharT>
     213      struct encoding_char_traits
     214      : public std::char_traits<_CharT>
     215      {
     216        typedef encoding_state				state_type;
     217        typedef typename std::fpos<state_type>		pos_type;
     218      };
     219  
     220  _GLIBCXX_END_NAMESPACE_CXX11
     221  _GLIBCXX_END_NAMESPACE_VERSION
     222  } // namespace
     223  
     224  
     225  namespace std _GLIBCXX_VISIBILITY(default)
     226  {
     227  _GLIBCXX_BEGIN_NAMESPACE_VERSION
     228  
     229    using __gnu_cxx::encoding_state;
     230  
     231    /// codecvt<InternT, _ExternT, encoding_state> specialization.
     232    // This partial specialization takes advantage of iconv to provide
     233    // code conversions between a large number of character encodings.
     234    template<typename _InternT, typename _ExternT>
     235      class codecvt<_InternT, _ExternT, encoding_state>
     236      : public __codecvt_abstract_base<_InternT, _ExternT, encoding_state>
     237      {
     238      public:      
     239        // Types:
     240        typedef codecvt_base::result			result;
     241        typedef _InternT 					intern_type;
     242        typedef _ExternT 					extern_type;
     243        typedef __gnu_cxx::encoding_state 		state_type;
     244        typedef state_type::descriptor_type 		descriptor_type;
     245  
     246        // Data Members:
     247        static locale::id 		id;
     248  
     249        explicit 
     250        codecvt(size_t __refs = 0)
     251        : __codecvt_abstract_base<intern_type, extern_type, state_type>(__refs)
     252        { }
     253  
     254        explicit 
     255        codecvt(state_type& __enc, size_t __refs = 0)
     256        : __codecvt_abstract_base<intern_type, extern_type, state_type>(__refs)
     257        { }
     258  
     259       protected:
     260        virtual 
     261        ~codecvt() { }
     262  
     263        virtual result
     264        do_out(state_type& __state, const intern_type* __from, 
     265  	     const intern_type* __from_end, const intern_type*& __from_next,
     266  	     extern_type* __to, extern_type* __to_end,
     267  	     extern_type*& __to_next) const;
     268  
     269        virtual result
     270        do_unshift(state_type& __state, extern_type* __to, 
     271  		 extern_type* __to_end, extern_type*& __to_next) const;
     272  
     273        virtual result
     274        do_in(state_type& __state, const extern_type* __from, 
     275  	    const extern_type* __from_end, const extern_type*& __from_next,
     276  	    intern_type* __to, intern_type* __to_end, 
     277  	    intern_type*& __to_next) const;
     278  
     279        virtual int 
     280        do_encoding() const throw();
     281  
     282        virtual bool 
     283        do_always_noconv() const throw();
     284  
     285        virtual int 
     286        do_length(state_type&, const extern_type* __from, 
     287  		const extern_type* __end, size_t __max) const;
     288  
     289        virtual int 
     290        do_max_length() const throw();
     291      };
     292  
     293    template<typename _InternT, typename _ExternT>
     294      locale::id 
     295      codecvt<_InternT, _ExternT, encoding_state>::id;
     296  
     297    // This adaptor works around the signature problems of the second
     298    // argument to iconv():  SUSv2 and others use 'const char**', but glibc 2.2
     299    // uses 'char**', which matches the POSIX 1003.1-2001 standard.
     300    // Using this adaptor, g++ will do the work for us.
     301    template<typename _Tp>
     302      inline size_t
     303      __iconv_adaptor(size_t(*__func)(iconv_t, _Tp, size_t*, char**, size_t*),
     304                      iconv_t __cd, char** __inbuf, size_t* __inbytes,
     305                      char** __outbuf, size_t* __outbytes)
     306      { return __func(__cd, (_Tp)__inbuf, __inbytes, __outbuf, __outbytes); }
     307  
     308    template<typename _InternT, typename _ExternT>
     309      codecvt_base::result
     310      codecvt<_InternT, _ExternT, encoding_state>::
     311      do_out(state_type& __state, const intern_type* __from, 
     312  	   const intern_type* __from_end, const intern_type*& __from_next,
     313  	   extern_type* __to, extern_type* __to_end,
     314  	   extern_type*& __to_next) const
     315      {
     316        result __ret = codecvt_base::error;
     317        if (__state.good())
     318  	{
     319  	  const descriptor_type& __desc = __state.out_descriptor();
     320  	  const size_t __fmultiple = sizeof(intern_type);
     321  	  size_t __fbytes = __fmultiple * (__from_end - __from);
     322  	  const size_t __tmultiple = sizeof(extern_type);
     323  	  size_t __tbytes = __tmultiple * (__to_end - __to); 
     324  	  
     325  	  // Argument list for iconv specifies a byte sequence. Thus,
     326  	  // all to/from arrays must be brutally casted to char*.
     327  	  char* __cto = reinterpret_cast<char*>(__to);
     328  	  char* __cfrom;
     329  	  size_t __conv;
     330  
     331  	  // Some encodings need a byte order marker as the first item
     332  	  // in the byte stream, to designate endian-ness. The default
     333  	  // value for the byte order marker is NULL, so if this is
     334  	  // the case, it's not necessary and we can just go on our
     335  	  // merry way.
     336  	  int __int_bom = __state.internal_bom();
     337  	  if (__int_bom)
     338  	    {	  
     339  	      size_t __size = __from_end - __from;
     340  	      intern_type* __cfixed = static_cast<intern_type*>
     341  		(__builtin_alloca(sizeof(intern_type) * (__size + 1)));
     342  	      __cfixed[0] = static_cast<intern_type>(__int_bom);
     343  	      char_traits<intern_type>::copy(__cfixed + 1, __from, __size);
     344  	      __cfrom = reinterpret_cast<char*>(__cfixed);
     345  	      __conv = __iconv_adaptor(iconv, __desc, &__cfrom,
     346                                          &__fbytes, &__cto, &__tbytes); 
     347  	    }
     348  	  else
     349  	    {
     350  	      intern_type* __cfixed = const_cast<intern_type*>(__from);
     351  	      __cfrom = reinterpret_cast<char*>(__cfixed);
     352  	      __conv = __iconv_adaptor(iconv, __desc, &__cfrom, &__fbytes, 
     353  				       &__cto, &__tbytes); 
     354  	    }
     355  
     356  	  if (__conv != size_t(-1))
     357  	    {
     358  	      __from_next = reinterpret_cast<const intern_type*>(__cfrom);
     359  	      __to_next = reinterpret_cast<extern_type*>(__cto);
     360  	      __ret = codecvt_base::ok;
     361  	    }
     362  	  else 
     363  	    {
     364  	      if (__fbytes < __fmultiple * (__from_end - __from))
     365  		{
     366  		  __from_next = reinterpret_cast<const intern_type*>(__cfrom);
     367  		  __to_next = reinterpret_cast<extern_type*>(__cto);
     368  		  __ret = codecvt_base::partial;
     369  		}
     370  	      else
     371  		__ret = codecvt_base::error;
     372  	    }
     373  	}
     374        return __ret; 
     375      }
     376  
     377    template<typename _InternT, typename _ExternT>
     378      codecvt_base::result
     379      codecvt<_InternT, _ExternT, encoding_state>::
     380      do_unshift(state_type& __state, extern_type* __to, 
     381  	       extern_type* __to_end, extern_type*& __to_next) const
     382      {
     383        result __ret = codecvt_base::error;
     384        if (__state.good())
     385  	{
     386  	  const descriptor_type& __desc = __state.in_descriptor();
     387  	  const size_t __tmultiple = sizeof(intern_type);
     388  	  size_t __tlen = __tmultiple * (__to_end - __to); 
     389  	  
     390  	  // Argument list for iconv specifies a byte sequence. Thus,
     391  	  // all to/from arrays must be brutally casted to char*.
     392  	  char* __cto = reinterpret_cast<char*>(__to);
     393  	  size_t __conv = __iconv_adaptor(iconv,__desc, 0, 0,
     394                                            &__cto, &__tlen); 
     395  	  
     396  	  if (__conv != size_t(-1))
     397  	    {
     398  	      __to_next = reinterpret_cast<extern_type*>(__cto);
     399  	      if (__tlen == __tmultiple * (__to_end - __to))
     400  		__ret = codecvt_base::noconv;
     401  	      else if (__tlen == 0)
     402  		__ret = codecvt_base::ok;
     403  	      else
     404  		__ret = codecvt_base::partial;
     405  	    }
     406  	  else 
     407  	    __ret = codecvt_base::error;
     408  	}
     409        return __ret; 
     410      }
     411     
     412    template<typename _InternT, typename _ExternT>
     413      codecvt_base::result
     414      codecvt<_InternT, _ExternT, encoding_state>::
     415      do_in(state_type& __state, const extern_type* __from, 
     416  	  const extern_type* __from_end, const extern_type*& __from_next,
     417  	  intern_type* __to, intern_type* __to_end, 
     418  	  intern_type*& __to_next) const
     419      { 
     420        result __ret = codecvt_base::error;
     421        if (__state.good())
     422  	{
     423  	  const descriptor_type& __desc = __state.in_descriptor();
     424  	  const size_t __fmultiple = sizeof(extern_type);
     425  	  size_t __flen = __fmultiple * (__from_end - __from);
     426  	  const size_t __tmultiple = sizeof(intern_type);
     427  	  size_t __tlen = __tmultiple * (__to_end - __to); 
     428  	  
     429  	  // Argument list for iconv specifies a byte sequence. Thus,
     430  	  // all to/from arrays must be brutally casted to char*.
     431  	  char* __cto = reinterpret_cast<char*>(__to);
     432  	  char* __cfrom;
     433  	  size_t __conv;
     434  
     435  	  // Some encodings need a byte order marker as the first item
     436  	  // in the byte stream, to designate endian-ness. The default
     437  	  // value for the byte order marker is NULL, so if this is
     438  	  // the case, it's not necessary and we can just go on our
     439  	  // merry way.
     440  	  int __ext_bom = __state.external_bom();
     441  	  if (__ext_bom)
     442  	    {	  
     443  	      size_t __size = __from_end - __from;
     444  	      extern_type* __cfixed =  static_cast<extern_type*>
     445  		(__builtin_alloca(sizeof(extern_type) * (__size + 1)));
     446  	      __cfixed[0] = static_cast<extern_type>(__ext_bom);
     447  	      char_traits<extern_type>::copy(__cfixed + 1, __from, __size);
     448  	      __cfrom = reinterpret_cast<char*>(__cfixed);
     449  	      __conv = __iconv_adaptor(iconv, __desc, &__cfrom,
     450                                         &__flen, &__cto, &__tlen); 
     451  	    }
     452  	  else
     453  	    {
     454  	      extern_type* __cfixed = const_cast<extern_type*>(__from);
     455  	      __cfrom = reinterpret_cast<char*>(__cfixed);
     456  	      __conv = __iconv_adaptor(iconv, __desc, &__cfrom,
     457                                         &__flen, &__cto, &__tlen); 
     458  	    }
     459  
     460  	  
     461  	  if (__conv != size_t(-1))
     462  	    {
     463  	      __from_next = reinterpret_cast<const extern_type*>(__cfrom);
     464  	      __to_next = reinterpret_cast<intern_type*>(__cto);
     465  	      __ret = codecvt_base::ok;
     466  	    }
     467  	  else 
     468  	    {
     469  	      if (__flen < static_cast<size_t>(__from_end - __from))
     470  		{
     471  		  __from_next = reinterpret_cast<const extern_type*>(__cfrom);
     472  		  __to_next = reinterpret_cast<intern_type*>(__cto);
     473  		  __ret = codecvt_base::partial;
     474  		}
     475  	      else
     476  		__ret = codecvt_base::error;
     477  	    }
     478  	}
     479        return __ret; 
     480      }
     481    
     482    template<typename _InternT, typename _ExternT>
     483      int 
     484      codecvt<_InternT, _ExternT, encoding_state>::
     485      do_encoding() const throw()
     486      {
     487        int __ret = 0;
     488        if (sizeof(_ExternT) <= sizeof(_InternT))
     489  	__ret = sizeof(_InternT) / sizeof(_ExternT);
     490        return __ret; 
     491      }
     492    
     493    template<typename _InternT, typename _ExternT>
     494      bool 
     495      codecvt<_InternT, _ExternT, encoding_state>::
     496      do_always_noconv() const throw()
     497      { return false; }
     498    
     499    template<typename _InternT, typename _ExternT>
     500      int 
     501      codecvt<_InternT, _ExternT, encoding_state>::
     502      do_length(state_type&, const extern_type* __from, 
     503  	      const extern_type* __end, size_t __max) const
     504      { return std::min(__max, static_cast<size_t>(__end - __from)); }
     505  
     506    // _GLIBCXX_RESOLVE_LIB_DEFECTS
     507    // 74.  Garbled text for codecvt::do_max_length
     508    template<typename _InternT, typename _ExternT>
     509      int 
     510      codecvt<_InternT, _ExternT, encoding_state>::
     511      do_max_length() const throw()
     512      { return 1; }
     513  
     514  _GLIBCXX_END_NAMESPACE_VERSION
     515  } // namespace
     516  
     517  #endif