(root)/
gettext-0.22.4/
gettext-tools/
libgettextpo/
unistr/
u8-mbtouc-aux.c
       1  /* Conversion UTF-8 to UCS-4.
       2     Copyright (C) 2001-2002, 2006-2007, 2009-2023 Free Software Foundation, Inc.
       3     Written by Bruno Haible <bruno@clisp.org>, 2001.
       4  
       5     This file is free software: you can redistribute it and/or modify
       6     it under the terms of the GNU Lesser General Public License as
       7     published by the Free Software Foundation; either version 2.1 of the
       8     License, or (at your option) any later version.
       9  
      10     This file is distributed in the hope that it will be useful,
      11     but WITHOUT ANY WARRANTY; without even the implied warranty of
      12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      13     GNU Lesser General Public License for more details.
      14  
      15     You should have received a copy of the GNU Lesser General Public License
      16     along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
      17  
      18  #include <config.h>
      19  
      20  /* Specification.  */
      21  #include "unistr.h"
      22  
      23  #if defined IN_LIBUNISTRING || HAVE_INLINE
      24  
      25  int
      26  u8_mbtouc_aux (ucs4_t *puc, const uint8_t *s, size_t n)
      27  {
      28    uint8_t c = *s;
      29  
      30    if (c >= 0xc2)
      31      {
      32        if (c < 0xe0)
      33          {
      34            if (n >= 2)
      35              {
      36                if ((s[1] ^ 0x80) < 0x40)
      37                  {
      38                    *puc = ((unsigned int) (c & 0x1f) << 6)
      39                           | (unsigned int) (s[1] ^ 0x80);
      40                    return 2;
      41                  }
      42                /* invalid multibyte character */
      43              }
      44            else
      45              {
      46                /* incomplete multibyte character */
      47                *puc = 0xfffd;
      48                return 1;
      49              }
      50          }
      51        else if (c < 0xf0)
      52          {
      53            if (n >= 3)
      54              {
      55                if ((s[1] ^ 0x80) < 0x40
      56                    && (c >= 0xe1 || s[1] >= 0xa0)
      57                    && (c != 0xed || s[1] < 0xa0))
      58                  {
      59                    if ((s[2] ^ 0x80) < 0x40)
      60                      {
      61                        *puc = ((unsigned int) (c & 0x0f) << 12)
      62                               | ((unsigned int) (s[1] ^ 0x80) << 6)
      63                               | (unsigned int) (s[2] ^ 0x80);
      64                        return 3;
      65                      }
      66                    /* invalid multibyte character */
      67                    *puc = 0xfffd;
      68                    return 2;
      69                  }
      70                /* invalid multibyte character */
      71                *puc = 0xfffd;
      72                return 1;
      73              }
      74            else
      75              {
      76                *puc = 0xfffd;
      77                if (n == 1)
      78                  {
      79                    /* incomplete multibyte character */
      80                    return 1;
      81                  }
      82                else
      83                  {
      84                    if ((s[1] ^ 0x80) < 0x40
      85                        && (c >= 0xe1 || s[1] >= 0xa0)
      86                        && (c != 0xed || s[1] < 0xa0))
      87                      {
      88                        /* incomplete multibyte character */
      89                        return 2;
      90                      }
      91                    else
      92                      {
      93                        /* invalid multibyte character */
      94                        return 1;
      95                      }
      96                  }
      97              }
      98          }
      99        else if (c <= 0xf4)
     100          {
     101            if (n >= 4)
     102              {
     103                if ((s[1] ^ 0x80) < 0x40
     104                    && (c >= 0xf1 || s[1] >= 0x90)
     105                    && (c < 0xf4 || (/* c == 0xf4 && */ s[1] < 0x90)))
     106                  {
     107                    if ((s[2] ^ 0x80) < 0x40)
     108                      {
     109                        if ((s[3] ^ 0x80) < 0x40)
     110                          {
     111                            *puc = ((unsigned int) (c & 0x07) << 18)
     112                                   | ((unsigned int) (s[1] ^ 0x80) << 12)
     113                                   | ((unsigned int) (s[2] ^ 0x80) << 6)
     114                                   | (unsigned int) (s[3] ^ 0x80);
     115                            return 4;
     116                          }
     117                        /* invalid multibyte character */
     118                        *puc = 0xfffd;
     119                        return 3;
     120                      }
     121                    /* invalid multibyte character */
     122                    *puc = 0xfffd;
     123                    return 2;
     124                  }
     125                /* invalid multibyte character */
     126                *puc = 0xfffd;
     127                return 1;
     128              }
     129            else
     130              {
     131                *puc = 0xfffd;
     132                if (n == 1)
     133                  {
     134                    /* incomplete multibyte character */
     135                    return 1;
     136                  }
     137                else
     138                  {
     139                    if ((s[1] ^ 0x80) < 0x40
     140                        && (c >= 0xf1 || s[1] >= 0x90)
     141                        && (c < 0xf4 || (/* c == 0xf4 && */ s[1] < 0x90)))
     142                      {
     143                        if (n == 2)
     144                          {
     145                            /* incomplete multibyte character */
     146                            return 2;
     147                          }
     148                        else
     149                          {
     150                            if ((s[2] ^ 0x80) < 0x40)
     151                              {
     152                                /* incomplete multibyte character */
     153                                return 3;
     154                              }
     155                            else
     156                              {
     157                                /* invalid multibyte character */
     158                                return 2;
     159                              }
     160                          }
     161                      }
     162                    else
     163                      {
     164                        /* invalid multibyte character */
     165                        return 1;
     166                      }
     167                  }
     168              }
     169          }
     170      }
     171    /* invalid multibyte character */
     172    *puc = 0xfffd;
     173    return 1;
     174  }
     175  
     176  #endif