(root)/
glibc-2.38/
wcsmbs/
mbrtoc8.c
       1  /* Multibyte to UTF-8 conversion.
       2     Copyright (C) 2022-2023 Free Software Foundation, Inc.
       3     This file is part of the GNU C Library.
       4  
       5     The GNU C Library is free software; you can redistribute it and/or
       6     modify it under the terms of the GNU Lesser General Public
       7     License as published by the Free Software Foundation; either
       8     version 2.1 of the License, or (at your option) any later version.
       9  
      10     The GNU C Library is distributed in the hope that it will be useful,
      11     but WITHOUT ANY WARRANTY; without even the implied warranty of
      12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      13     Lesser General Public License for more details.
      14  
      15     You should have received a copy of the GNU Lesser General Public
      16     License along with the GNU C Library; if not, see
      17     <https://www.gnu.org/licenses/>.  */
      18  
      19  #include <assert.h>
      20  #include <dlfcn.h>
      21  #include <errno.h>
      22  #include <gconv.h>
      23  #include <uchar.h>
      24  #include <wcsmbsload.h>
      25  
      26  #ifndef EILSEQ
      27  # define EILSEQ EINVAL
      28  #endif
      29  
      30  
      31  /* This is the private state used if PS is NULL.  */
      32  static mbstate_t state;
      33  
      34  size_t
      35  mbrtoc8 (char8_t *pc8, const char *s, size_t n, mbstate_t *ps)
      36  {
      37    /* This implementation depends on the converter invoked by mbrtowc not
      38       needing to retain state in either the top most bit of ps->__count or
      39       in ps->__value between invocations.  This implementation uses the
      40       top most bit of ps->__count to indicate that trailing code units are
      41       yet to be written and uses ps->__value to store those code units.  */
      42  
      43    if (ps == NULL)
      44      ps = &state;
      45  
      46    /* If state indicates that trailing code units are yet to be written, write
      47       those first regardless of whether 's' is a null pointer.  */
      48    if (ps->__count & 0x80000000)
      49      {
      50        /* ps->__value.__wchb[3] stores the index of the next code unit to
      51           write.  Code units are stored in reverse order.  */
      52        size_t i = ps->__value.__wchb[3];
      53        if (pc8 != NULL)
      54  	{
      55  	  *pc8 = ps->__value.__wchb[i];
      56  	}
      57        if (i == 0)
      58  	{
      59  	  ps->__count &= 0x7fffffff;
      60  	  ps->__value.__wch = 0;
      61  	}
      62        else
      63  	--ps->__value.__wchb[3];
      64        return -3;
      65      }
      66  
      67    if (s == NULL)
      68      {
      69        /* if 's' is a null pointer, behave as if a null pointer was passed for
      70           'pc8', an empty string was passed for 's', and 1 passed for 'n'.  */
      71        pc8 = NULL;
      72        s = "";
      73        n = 1;
      74      }
      75  
      76    wchar_t wc;
      77    size_t result;
      78  
      79    result = mbrtowc (&wc, s, n, ps);
      80    if (result <= n)
      81      {
      82        if (wc <= 0x7F)
      83  	{
      84  	  if (pc8 != NULL)
      85  	    *pc8 = wc;
      86  	}
      87        else if (wc <= 0x7FF)
      88  	{
      89  	  if (pc8 != NULL)
      90  	    *pc8 = 0xC0 + ((wc >> 6) & 0x1F);
      91  	  ps->__value.__wchb[0] = 0x80 + (wc & 0x3F);
      92  	  ps->__value.__wchb[3] = 0;
      93  	  ps->__count |= 0x80000000;
      94  	}
      95        else if (wc <= 0xFFFF)
      96  	{
      97  	  if (pc8 != NULL)
      98  	    *pc8 = 0xE0 + ((wc >> 12) & 0x0F);
      99  	  ps->__value.__wchb[1] = 0x80 + ((wc >> 6) & 0x3F);
     100  	  ps->__value.__wchb[0] = 0x80 + (wc & 0x3F);
     101  	  ps->__value.__wchb[3] = 1;
     102  	  ps->__count |= 0x80000000;
     103  	}
     104        else if (wc <= 0x10FFFF)
     105  	{
     106  	  if (pc8 != NULL)
     107  	    *pc8 = 0xF0 + ((wc >> 18) & 0x07);
     108  	  ps->__value.__wchb[2] = 0x80 + ((wc >> 12) & 0x3F);
     109  	  ps->__value.__wchb[1] = 0x80 + ((wc >> 6) & 0x3F);
     110  	  ps->__value.__wchb[0] = 0x80 + (wc & 0x3F);
     111  	  ps->__value.__wchb[3] = 2;
     112  	  ps->__count |= 0x80000000;
     113  	}
     114      }
     115    if (result == 0 && wc != 0)
     116      {
     117        /* mbrtowc() never returns -3.  When a MB sequence converts to multiple
     118           WCs, no input is consumed when writing the subsequent WCs resulting
     119           in a result of 0 even if a null character wasn't written.  */
     120        result = -3;
     121      }
     122  
     123    return result;
     124  }