(root)/
gettext-0.22.4/
gettext-runtime/
gnulib-lib/
unistr/
u8-mbtoucr.c
       1  /* Look at first character in UTF-8 string, returning an error code.
       2     Copyright (C) 1999-2002, 2006-2007, 2009-2023 Free Software Foundation, Inc.
       3     Written by Bruno Haible <bruno@clisp.org>, 2001.
       4  
       5     This file is free software: you can redistribute it and/or modify
       6     it under the terms of the GNU Lesser General Public License as
       7     published by the Free Software Foundation; either version 2.1 of the
       8     License, or (at your option) any later version.
       9  
      10     This file is distributed in the hope that it will be useful,
      11     but WITHOUT ANY WARRANTY; without even the implied warranty of
      12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      13     GNU Lesser General Public License for more details.
      14  
      15     You should have received a copy of the GNU Lesser General Public License
      16     along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
      17  
      18  #include <config.h>
      19  
      20  /* Specification.  */
      21  #include "unistr.h"
      22  
      23  int
      24  u8_mbtoucr (ucs4_t *puc, const uint8_t *s, size_t n)
      25  {
      26    uint8_t c = *s;
      27  
      28    if (c < 0x80)
      29      {
      30        *puc = c;
      31        return 1;
      32      }
      33    else if (c >= 0xc2)
      34      {
      35        if (c < 0xe0)
      36          {
      37            if (n >= 2)
      38              {
      39                if ((s[1] ^ 0x80) < 0x40)
      40                  {
      41                    *puc = ((unsigned int) (c & 0x1f) << 6)
      42                           | (unsigned int) (s[1] ^ 0x80);
      43                    return 2;
      44                  }
      45                /* invalid multibyte character */
      46              }
      47            else
      48              {
      49                /* incomplete multibyte character */
      50                *puc = 0xfffd;
      51                return -2;
      52              }
      53          }
      54        else if (c < 0xf0)
      55          {
      56            if (n >= 2)
      57              {
      58                if ((s[1] ^ 0x80) < 0x40
      59                    && (c >= 0xe1 || s[1] >= 0xa0)
      60                    && (c != 0xed || s[1] < 0xa0))
      61                  {
      62                    if (n >= 3)
      63                      {
      64                        if ((s[2] ^ 0x80) < 0x40)
      65                          {
      66                            *puc = ((unsigned int) (c & 0x0f) << 12)
      67                                   | ((unsigned int) (s[1] ^ 0x80) << 6)
      68                                   | (unsigned int) (s[2] ^ 0x80);
      69                            return 3;
      70                          }
      71                        /* invalid multibyte character */
      72                      }
      73                    else
      74                      {
      75                        /* incomplete multibyte character */
      76                        *puc = 0xfffd;
      77                        return -2;
      78                      }
      79                  }
      80                /* invalid multibyte character */
      81              }
      82            else
      83              {
      84                /* incomplete multibyte character */
      85                *puc = 0xfffd;
      86                return -2;
      87              }
      88          }
      89        else if (c <= 0xf4)
      90          {
      91            if (n >= 2)
      92              {
      93                if ((s[1] ^ 0x80) < 0x40
      94                    && (c >= 0xf1 || s[1] >= 0x90)
      95                    && (c < 0xf4 || (/* c == 0xf4 && */ s[1] < 0x90)))
      96                  {
      97                    if (n >= 3)
      98                      {
      99                        if ((s[2] ^ 0x80) < 0x40)
     100                          {
     101                            if (n >= 4)
     102                              {
     103                                if ((s[3] ^ 0x80) < 0x40)
     104                                  {
     105                                    *puc = ((unsigned int) (c & 0x07) << 18)
     106                                           | ((unsigned int) (s[1] ^ 0x80) << 12)
     107                                           | ((unsigned int) (s[2] ^ 0x80) << 6)
     108                                           | (unsigned int) (s[3] ^ 0x80);
     109                                    return 4;
     110                                  }
     111                                /* invalid multibyte character */
     112                              }
     113                            else
     114                              {
     115                                /* incomplete multibyte character */
     116                                *puc = 0xfffd;
     117                                return -2;
     118                              }
     119                          }
     120                        /* invalid multibyte character */
     121                      }
     122                    else
     123                      {
     124                        /* incomplete multibyte character */
     125                        *puc = 0xfffd;
     126                        return -2;
     127                      }
     128                  }
     129                /* invalid multibyte character */
     130              }
     131            else
     132              {
     133                /* incomplete multibyte character */
     134                *puc = 0xfffd;
     135                return -2;
     136              }
     137          }
     138      }
     139    /* invalid multibyte character */
     140    *puc = 0xfffd;
     141    return -1;
     142  }