(root)/
glibc-2.38/
wcsmbs/
c8rtomb.c
       1  /* UTF-8 to multibyte conversion.
       2     Copyright (C) 2022-2023 Free Software Foundation, Inc.
       3     This file is part of the GNU C Library.
       4  
       5     The GNU C Library is free software; you can redistribute it and/or
       6     modify it under the terms of the GNU Lesser General Public
       7     License as published by the Free Software Foundation; either
       8     version 2.1 of the License, or (at your option) any later version.
       9  
      10     The GNU C Library is distributed in the hope that it will be useful,
      11     but WITHOUT ANY WARRANTY; without even the implied warranty of
      12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      13     Lesser General Public License for more details.
      14  
      15     You should have received a copy of the GNU Lesser General Public
      16     License along with the GNU C Library; if not, see
      17     <https://www.gnu.org/licenses/>.  */
      18  
      19  #include <errno.h>
      20  #include <uchar.h>
      21  #include <wchar.h>
      22  
      23  
      24  /* This is the private state used if PS is NULL.  */
      25  static mbstate_t state;
      26  
      27  size_t
      28  c8rtomb (char *s, char8_t c8, mbstate_t *ps)
      29  {
      30    /* This implementation depends on the converter invoked by wcrtomb not
      31       needing to retain state in either the top most bit of ps->__count or
      32       in ps->__value between invocations.  This implementation uses the
      33       top most bit of ps->__count to indicate that trailing code units are
      34       expected and uses ps->__value to store previously seen code units.  */
      35  
      36    wchar_t wc;
      37  
      38    if (ps == NULL)
      39      ps = &state;
      40  
      41    if (s == NULL)
      42      {
      43        /* if 's' is a null pointer, behave as if u8'\0' was passed as 'c8'.  If
      44           this occurs for an incomplete code unit sequence, then an error will
      45           be reported below.  */
      46        c8 = u8""[0];
      47      }
      48  
      49    if (! (ps->__count & 0x80000000))
      50      {
      51        /* Initial state.  */
      52        if ((c8 >= 0x80 && c8 <= 0xC1) || c8 >= 0xF5)
      53  	{
      54  	  /* An invalid lead code unit.  */
      55  	  __set_errno (EILSEQ);
      56  	  return -1;
      57  	}
      58        if (c8 >= 0xC2)
      59  	{
      60  	  /* A valid lead code unit.  */
      61  	  ps->__count |= 0x80000000;
      62  	  ps->__value.__wchb[0] = c8;
      63  	  ps->__value.__wchb[3] = 1;
      64  	  return 0;
      65  	}
      66        /* A single byte (ASCII) code unit.  */
      67        wc = c8;
      68      }
      69    else
      70      {
      71        char8_t cu1 = ps->__value.__wchb[0];
      72        if (ps->__value.__wchb[3] == 1)
      73  	{
      74  	  /* A single lead code unit was previously seen.  */
      75  	  if ((c8 < 0x80 || c8 > 0xBF)
      76                || (cu1 == 0xE0 && c8 < 0xA0)
      77                || (cu1 == 0xED && c8 > 0x9F)
      78                || (cu1 == 0xF0 && c8 < 0x90)
      79                || (cu1 == 0xF4 && c8 > 0x8F))
      80  	    {
      81  	      /* An invalid second code unit.  */
      82  	      __set_errno (EILSEQ);
      83  	      return -1;
      84  	    }
      85  	  if (cu1 >= 0xE0)
      86  	    {
      87  	      /* A three or four code unit sequence.  */
      88  	      ps->__value.__wchb[1] = c8;
      89  	      ++ps->__value.__wchb[3];
      90  	      return 0;
      91  	    }
      92  	  wc = ((cu1 & 0x1F) << 6)
      93  	       + (c8 & 0x3F);
      94  	}
      95        else
      96  	{
      97  	  char8_t cu2 = ps->__value.__wchb[1];
      98  	  /* A three or four byte code unit sequence.  */
      99  	  if (c8 < 0x80 || c8 > 0xBF)
     100  	    {
     101  	      /* An invalid third or fourth code unit.  */
     102  	      __set_errno (EILSEQ);
     103  	      return -1;
     104  	    }
     105  	  if (ps->__value.__wchb[3] == 2 && cu1 >= 0xF0)
     106  	    {
     107  	      /* A four code unit sequence.  */
     108  	      ps->__value.__wchb[2] = c8;
     109  	      ++ps->__value.__wchb[3];
     110  	      return 0;
     111  	    }
     112  	  if (cu1 < 0xF0)
     113  	    {
     114  	      wc = ((cu1 & 0x0F) << 12)
     115  		   + ((cu2 & 0x3F) << 6)
     116  		   + (c8 & 0x3F);
     117  	    }
     118  	  else
     119  	    {
     120  	      char8_t cu3 = ps->__value.__wchb[2];
     121  	      wc = ((cu1 & 0x07) << 18)
     122  		   + ((cu2 & 0x3F) << 12)
     123  		   + ((cu3 & 0x3F) << 6)
     124  		   + (c8 & 0x3F);
     125  	    }
     126  	}
     127        ps->__count &= 0x7fffffff;
     128        ps->__value.__wch = 0;
     129      }
     130  
     131    return wcrtomb (s, wc, ps);
     132  }