(root)/
gettext-0.22.4/
libtextstyle/
lib/
unistr/
u8-mbtouc.c
       1  /* Look at first character in UTF-8 string.
       2     Copyright (C) 1999-2002, 2006-2007, 2009-2023 Free Software Foundation, Inc.
       3     Written by Bruno Haible <bruno@clisp.org>, 2001.
       4  
       5     This file is free software: you can redistribute it and/or modify
       6     it under the terms of the GNU Lesser General Public License as
       7     published by the Free Software Foundation; either version 2.1 of the
       8     License, or (at your option) any later version.
       9  
      10     This file is distributed in the hope that it will be useful,
      11     but WITHOUT ANY WARRANTY; without even the implied warranty of
      12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      13     GNU Lesser General Public License for more details.
      14  
      15     You should have received a copy of the GNU Lesser General Public License
      16     along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
      17  
      18  #include <config.h>
      19  
      20  #if defined IN_LIBUNISTRING
      21  /* Tell unistr.h to declare u8_mbtouc as 'extern', not 'static inline'.  */
      22  # include "unistring-notinline.h"
      23  #endif
      24  
      25  /* Specification.  */
      26  #include "unistr.h"
      27  
      28  #if !HAVE_INLINE
      29  
      30  int
      31  u8_mbtouc (ucs4_t *puc, const uint8_t *s, size_t n)
      32  {
      33    uint8_t c = *s;
      34  
      35    if (c < 0x80)
      36      {
      37        *puc = c;
      38        return 1;
      39      }
      40    else if (c >= 0xc2)
      41      {
      42        if (c < 0xe0)
      43          {
      44            if (n >= 2)
      45              {
      46                if ((s[1] ^ 0x80) < 0x40)
      47                  {
      48                    *puc = ((unsigned int) (c & 0x1f) << 6)
      49                           | (unsigned int) (s[1] ^ 0x80);
      50                    return 2;
      51                  }
      52                /* invalid multibyte character */
      53              }
      54            else
      55              {
      56                /* incomplete multibyte character */
      57                *puc = 0xfffd;
      58                return 1;
      59              }
      60          }
      61        else if (c < 0xf0)
      62          {
      63            if (n >= 3)
      64              {
      65                if ((s[1] ^ 0x80) < 0x40
      66                    && (c >= 0xe1 || s[1] >= 0xa0)
      67                    && (c != 0xed || s[1] < 0xa0))
      68                  {
      69                    if ((s[2] ^ 0x80) < 0x40)
      70                      {
      71                        *puc = ((unsigned int) (c & 0x0f) << 12)
      72                               | ((unsigned int) (s[1] ^ 0x80) << 6)
      73                               | (unsigned int) (s[2] ^ 0x80);
      74                        return 3;
      75                      }
      76                    /* invalid multibyte character */
      77                    *puc = 0xfffd;
      78                    return 2;
      79                  }
      80                /* invalid multibyte character */
      81                *puc = 0xfffd;
      82                return 1;
      83              }
      84            else
      85              {
      86                *puc = 0xfffd;
      87                if (n == 1)
      88                  {
      89                    /* incomplete multibyte character */
      90                    return 1;
      91                  }
      92                else
      93                  {
      94                    if ((s[1] ^ 0x80) < 0x40
      95                        && (c >= 0xe1 || s[1] >= 0xa0)
      96                        && (c != 0xed || s[1] < 0xa0))
      97                      {
      98                        /* incomplete multibyte character */
      99                        return 2;
     100                      }
     101                    else
     102                      {
     103                        /* invalid multibyte character */
     104                        return 1;
     105                      }
     106                  }
     107              }
     108          }
     109        else if (c <= 0xf4)
     110          {
     111            if (n >= 4)
     112              {
     113                if ((s[1] ^ 0x80) < 0x40
     114                    && (c >= 0xf1 || s[1] >= 0x90)
     115                    && (c < 0xf4 || (/* c == 0xf4 && */ s[1] < 0x90)))
     116                  {
     117                    if ((s[2] ^ 0x80) < 0x40)
     118                      {
     119                        if ((s[3] ^ 0x80) < 0x40)
     120                          {
     121                            *puc = ((unsigned int) (c & 0x07) << 18)
     122                                   | ((unsigned int) (s[1] ^ 0x80) << 12)
     123                                   | ((unsigned int) (s[2] ^ 0x80) << 6)
     124                                   | (unsigned int) (s[3] ^ 0x80);
     125                            return 4;
     126                          }
     127                        /* invalid multibyte character */
     128                        *puc = 0xfffd;
     129                        return 3;
     130                      }
     131                    /* invalid multibyte character */
     132                    *puc = 0xfffd;
     133                    return 2;
     134                  }
     135                /* invalid multibyte character */
     136                *puc = 0xfffd;
     137                return 1;
     138              }
     139            else
     140              {
     141                *puc = 0xfffd;
     142                if (n == 1)
     143                  {
     144                    /* incomplete multibyte character */
     145                    return 1;
     146                  }
     147                else
     148                  {
     149                    if ((s[1] ^ 0x80) < 0x40
     150                        && (c >= 0xf1 || s[1] >= 0x90)
     151                        && (c < 0xf4 || (/* c == 0xf4 && */ s[1] < 0x90)))
     152                      {
     153                        if (n == 2)
     154                          {
     155                            /* incomplete multibyte character */
     156                            return 2;
     157                          }
     158                        else
     159                          {
     160                            if ((s[2] ^ 0x80) < 0x40)
     161                              {
     162                                /* incomplete multibyte character */
     163                                return 3;
     164                              }
     165                            else
     166                              {
     167                                /* invalid multibyte character */
     168                                return 2;
     169                              }
     170                          }
     171                      }
     172                    else
     173                      {
     174                        /* invalid multibyte character */
     175                        return 1;
     176                      }
     177                  }
     178              }
     179          }
     180      }
     181    /* invalid multibyte character */
     182    *puc = 0xfffd;
     183    return 1;
     184  }
     185  
     186  #endif