(root)/
gettext-0.22.4/
gettext-tools/
gnulib-tests/
c32rtomb.c
       1  /* Convert 32-bit wide character to multibyte character.
       2     Copyright (C) 2020-2023 Free Software Foundation, Inc.
       3  
       4     This file is free software: you can redistribute it and/or modify
       5     it under the terms of the GNU Lesser General Public License as
       6     published by the Free Software Foundation; either version 2.1 of the
       7     License, or (at your option) any later version.
       8  
       9     This file is distributed in the hope that it will be useful,
      10     but WITHOUT ANY WARRANTY; without even the implied warranty of
      11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      12     GNU Lesser General Public License for more details.
      13  
      14     You should have received a copy of the GNU Lesser General Public License
      15     along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
      16  
      17  /* Written by Bruno Haible <bruno@clisp.org>, 2020.  */
      18  
      19  #include <config.h>
      20  
      21  /* Specification.  */
      22  #include <uchar.h>
      23  
      24  #include <errno.h>
      25  #include <wchar.h>
      26  
      27  #include "attribute.h" /* FALLTHROUGH */
      28  #include "localcharset.h"
      29  #include "streq.h"
      30  
      31  #if GL_CHAR32_T_IS_UNICODE
      32  # include "lc-charset-unicode.h"
      33  #endif
      34  
      35  size_t
      36  c32rtomb (char *s, char32_t wc, mbstate_t *ps)
      37  #undef c32rtomb
      38  {
      39  #if HAVE_WORKING_MBRTOC32
      40  
      41  # if C32RTOMB_RETVAL_BUG
      42    if (s == NULL)
      43      /* We know the NUL wide character corresponds to the NUL character.  */
      44      return 1;
      45  # endif
      46  
      47    return c32rtomb (s, wc, ps);
      48  
      49  #elif _GL_SMALL_WCHAR_T
      50  
      51    if (s == NULL)
      52      return wcrtomb (NULL, 0, ps);
      53    else
      54      {
      55        /* Special-case all encodings that may produce wide character values
      56           > WCHAR_MAX.  */
      57        const char *encoding = locale_charset ();
      58        if (STREQ_OPT (encoding, "UTF-8", 'U', 'T', 'F', '-', '8', 0, 0, 0, 0))
      59          {
      60            /* Special-case the UTF-8 encoding.  Assume that the wide-character
      61               encoding in a UTF-8 locale is UCS-2 or, equivalently, UTF-16.  */
      62            if (wc < 0x80)
      63              {
      64                s[0] = (unsigned char) wc;
      65                return 1;
      66              }
      67            else
      68              {
      69                int count;
      70  
      71                if (wc < 0x800)
      72                  count = 2;
      73                else if (wc < 0x10000)
      74                  {
      75                    if (wc < 0xd800 || wc >= 0xe000)
      76                      count = 3;
      77                    else
      78                      {
      79                        errno = EILSEQ;
      80                        return (size_t)(-1);
      81                      }
      82                  }
      83                else if (wc < 0x110000)
      84                  count = 4;
      85                else
      86                  {
      87                    errno = EILSEQ;
      88                    return (size_t)(-1);
      89                  }
      90  
      91                switch (count) /* note: code falls through cases! */
      92                  {
      93                  case 4: s[3] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0x10000;
      94                    FALLTHROUGH;
      95                  case 3: s[2] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0x800;
      96                    FALLTHROUGH;
      97                  case 2: s[1] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0xc0;
      98                /*case 1:*/ s[0] = wc;
      99                  }
     100                return count;
     101              }
     102          }
     103        else
     104          {
     105            if ((wchar_t) wc == wc)
     106              return wcrtomb (s, (wchar_t) wc, ps);
     107            else
     108              {
     109                errno = EILSEQ;
     110                return (size_t)(-1);
     111              }
     112          }
     113      }
     114  
     115  #else
     116  
     117    /* char32_t and wchar_t are equivalent.  */
     118  # if GL_CHAR32_T_IS_UNICODE && GL_CHAR32_T_VS_WCHAR_T_NEEDS_CONVERSION
     119    if (wc != 0)
     120      {
     121        wc = unicode_to_locale_encoding (wc);
     122        if (wc == 0)
     123          {
     124            errno = EILSEQ;
     125            return (size_t)(-1);
     126          }
     127      }
     128  # endif
     129    return wcrtomb (s, (wchar_t) wc, ps);
     130  
     131  #endif
     132  }