(root)/
gettext-0.22.4/
gettext-tools/
libgettextpo/
unistr/
u8-mbtouc-unsafe.c
       1  /* Look at first character in UTF-8 string.
       2     Copyright (C) 1999-2002, 2006-2007, 2009-2023 Free Software Foundation, Inc.
       3     Written by Bruno Haible <bruno@clisp.org>, 2001.
       4  
       5     This file is free software: you can redistribute it and/or modify
       6     it under the terms of the GNU Lesser General Public License as
       7     published by the Free Software Foundation; either version 2.1 of the
       8     License, or (at your option) any later version.
       9  
      10     This file is distributed in the hope that it will be useful,
      11     but WITHOUT ANY WARRANTY; without even the implied warranty of
      12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      13     GNU Lesser General Public License for more details.
      14  
      15     You should have received a copy of the GNU Lesser General Public License
      16     along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
      17  
      18  #include <config.h>
      19  
      20  #if defined IN_LIBUNISTRING
      21  /* Tell unistr.h to declare u8_mbtouc_unsafe as 'extern', not
      22     'static inline'.  */
      23  # include "unistring-notinline.h"
      24  #endif
      25  
      26  /* Specification.  */
      27  #include "unistr.h"
      28  
      29  #if !HAVE_INLINE
      30  
      31  int
      32  u8_mbtouc_unsafe (ucs4_t *puc, const uint8_t *s, size_t n)
      33  {
      34    uint8_t c = *s;
      35  
      36    if (c < 0x80)
      37      {
      38        *puc = c;
      39        return 1;
      40      }
      41    else if (c >= 0xc2)
      42      {
      43        if (c < 0xe0)
      44          {
      45            if (n >= 2)
      46              {
      47                if ((s[1] ^ 0x80) < 0x40)
      48                  {
      49                    *puc = ((unsigned int) (c & 0x1f) << 6)
      50                           | (unsigned int) (s[1] ^ 0x80);
      51                    return 2;
      52                  }
      53                /* invalid multibyte character */
      54              }
      55            else
      56              {
      57                /* incomplete multibyte character */
      58                *puc = 0xfffd;
      59                return 1;
      60              }
      61          }
      62        else if (c < 0xf0)
      63          {
      64            if (n >= 3)
      65              {
      66                if ((s[1] ^ 0x80) < 0x40)
      67                  {
      68                    if ((s[2] ^ 0x80) < 0x40)
      69                      {
      70                        if ((c >= 0xe1 || s[1] >= 0xa0)
      71                            && (c != 0xed || s[1] < 0xa0))
      72                          {
      73                            *puc = ((unsigned int) (c & 0x0f) << 12)
      74                                   | ((unsigned int) (s[1] ^ 0x80) << 6)
      75                                   | (unsigned int) (s[2] ^ 0x80);
      76                            return 3;
      77                          }
      78                        /* invalid multibyte character */
      79                        *puc = 0xfffd;
      80                        return 3;
      81                      }
      82                    /* invalid multibyte character */
      83                    *puc = 0xfffd;
      84                    return 2;
      85                  }
      86                /* invalid multibyte character */
      87              }
      88            else
      89              {
      90                /* incomplete multibyte character */
      91                *puc = 0xfffd;
      92                if (n == 1 || (s[1] ^ 0x80) >= 0x40)
      93                  return 1;
      94                else
      95                  return 2;
      96              }
      97          }
      98        else if (c < 0xf8)
      99          {
     100            if (n >= 4)
     101              {
     102                if ((s[1] ^ 0x80) < 0x40)
     103                  {
     104                    if ((s[2] ^ 0x80) < 0x40)
     105                      {
     106                        if ((s[3] ^ 0x80) < 0x40)
     107                          {
     108                            if ((c >= 0xf1 || s[1] >= 0x90)
     109                                && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90))
     110                               )
     111                              {
     112                                *puc = ((unsigned int) (c & 0x07) << 18)
     113                                       | ((unsigned int) (s[1] ^ 0x80) << 12)
     114                                       | ((unsigned int) (s[2] ^ 0x80) << 6)
     115                                       | (unsigned int) (s[3] ^ 0x80);
     116                                return 4;
     117                              }
     118                            /* invalid multibyte character */
     119                            *puc = 0xfffd;
     120                            return 4;
     121                          }
     122                        /* invalid multibyte character */
     123                        *puc = 0xfffd;
     124                        return 3;
     125                      }
     126                    /* invalid multibyte character */
     127                    *puc = 0xfffd;
     128                    return 2;
     129                  }
     130                /* invalid multibyte character */
     131              }
     132            else
     133              {
     134                /* incomplete multibyte character */
     135                *puc = 0xfffd;
     136                if (n == 1 || (s[1] ^ 0x80) >= 0x40)
     137                  return 1;
     138                else if (n == 2 || (s[2] ^ 0x80) >= 0x40)
     139                  return 2;
     140                else
     141                  return 3;
     142              }
     143          }
     144      }
     145    /* invalid multibyte character */
     146    *puc = 0xfffd;
     147    return 1;
     148  }
     149  
     150  #endif