(root)/
gettext-0.22.4/
gettext-tools/
gnulib-lib/
unistr/
u8-mbtouc-unsafe-aux.c
       1  /* Conversion UTF-8 to UCS-4.
       2     Copyright (C) 2001-2002, 2006-2007, 2009-2023 Free Software Foundation, Inc.
       3     Written by Bruno Haible <bruno@clisp.org>, 2001.
       4  
       5     This file is free software: you can redistribute it and/or modify
       6     it under the terms of the GNU Lesser General Public License as
       7     published by the Free Software Foundation; either version 2.1 of the
       8     License, or (at your option) any later version.
       9  
      10     This file is distributed in the hope that it will be useful,
      11     but WITHOUT ANY WARRANTY; without even the implied warranty of
      12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      13     GNU Lesser General Public License for more details.
      14  
      15     You should have received a copy of the GNU Lesser General Public License
      16     along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
      17  
      18  #include <config.h>
      19  
      20  /* Specification.  */
      21  #include "unistr.h"
      22  
      23  #if defined IN_LIBUNISTRING || HAVE_INLINE
      24  
      25  int
      26  u8_mbtouc_unsafe_aux (ucs4_t *puc, const uint8_t *s, size_t n)
      27  {
      28    uint8_t c = *s;
      29  
      30    if (c >= 0xc2)
      31      {
      32        if (c < 0xe0)
      33          {
      34            if (n >= 2)
      35              {
      36                if ((s[1] ^ 0x80) < 0x40)
      37                  {
      38                    *puc = ((unsigned int) (c & 0x1f) << 6)
      39                           | (unsigned int) (s[1] ^ 0x80);
      40                    return 2;
      41                  }
      42                /* invalid multibyte character */
      43              }
      44            else
      45              {
      46                /* incomplete multibyte character */
      47                *puc = 0xfffd;
      48                return 1;
      49              }
      50          }
      51        else if (c < 0xf0)
      52          {
      53            if (n >= 3)
      54              {
      55                if ((s[1] ^ 0x80) < 0x40)
      56                  {
      57                    if ((s[2] ^ 0x80) < 0x40)
      58                      {
      59                        if ((c >= 0xe1 || s[1] >= 0xa0)
      60                            && (c != 0xed || s[1] < 0xa0))
      61                          {
      62                            *puc = ((unsigned int) (c & 0x0f) << 12)
      63                                   | ((unsigned int) (s[1] ^ 0x80) << 6)
      64                                   | (unsigned int) (s[2] ^ 0x80);
      65                            return 3;
      66                          }
      67                        /* invalid multibyte character */
      68                        *puc = 0xfffd;
      69                        return 3;
      70                      }
      71                    /* invalid multibyte character */
      72                    *puc = 0xfffd;
      73                    return 2;
      74                  }
      75                /* invalid multibyte character */
      76              }
      77            else
      78              {
      79                /* incomplete multibyte character */
      80                *puc = 0xfffd;
      81                if (n == 1 || (s[1] ^ 0x80) >= 0x40)
      82                  return 1;
      83                else
      84                  return 2;
      85              }
      86          }
      87        else if (c < 0xf8)
      88          {
      89            if (n >= 4)
      90              {
      91                if ((s[1] ^ 0x80) < 0x40)
      92                  {
      93                    if ((s[2] ^ 0x80) < 0x40)
      94                      {
      95                        if ((s[3] ^ 0x80) < 0x40)
      96                          {
      97                            if ((c >= 0xf1 || s[1] >= 0x90)
      98                                && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90))
      99                               )
     100                              {
     101                                *puc = ((unsigned int) (c & 0x07) << 18)
     102                                       | ((unsigned int) (s[1] ^ 0x80) << 12)
     103                                       | ((unsigned int) (s[2] ^ 0x80) << 6)
     104                                       | (unsigned int) (s[3] ^ 0x80);
     105                                return 4;
     106                              }
     107                            /* invalid multibyte character */
     108                            *puc = 0xfffd;
     109                            return 4;
     110                          }
     111                        /* invalid multibyte character */
     112                        *puc = 0xfffd;
     113                        return 3;
     114                      }
     115                    /* invalid multibyte character */
     116                    *puc = 0xfffd;
     117                    return 2;
     118                  }
     119                /* invalid multibyte character */
     120              }
     121            else
     122              {
     123                /* incomplete multibyte character */
     124                *puc = 0xfffd;
     125                if (n == 1 || (s[1] ^ 0x80) >= 0x40)
     126                  return 1;
     127                else if (n == 2 || (s[2] ^ 0x80) >= 0x40)
     128                  return 2;
     129                else
     130                  return 3;
     131              }
     132          }
     133      }
     134    /* invalid multibyte character */
     135    *puc = 0xfffd;
     136    return 1;
     137  }
     138  
     139  #endif