(root)/
gettext-0.22.4/
gettext-tools/
libgettextpo/
unistr/
u8-strmbtouc.c
       1  /* Look at first character in UTF-8 string.
       2     Copyright (C) 1999-2000, 2002, 2006-2007, 2009-2023 Free Software
       3     Foundation, Inc.
       4     Written by Bruno Haible <bruno@clisp.org>, 2002.
       5  
       6     This file is free software.
       7     It is dual-licensed under "the GNU LGPLv3+ or the GNU GPLv2+".
       8     You can redistribute it and/or modify it under either
       9       - the terms of the GNU Lesser General Public License as published
      10         by the Free Software Foundation, either version 3, or (at your
      11         option) any later version, or
      12       - the terms of the GNU General Public License as published by the
      13         Free Software Foundation; either version 2, or (at your option)
      14         any later version, or
      15       - the same dual license "the GNU LGPLv3+ or the GNU GPLv2+".
      16  
      17     This file is distributed in the hope that it will be useful,
      18     but WITHOUT ANY WARRANTY; without even the implied warranty of
      19     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      20     Lesser General Public License and the GNU General Public License
      21     for more details.
      22  
      23     You should have received a copy of the GNU Lesser General Public
      24     License and of the GNU General Public License along with this
      25     program.  If not, see <https://www.gnu.org/licenses/>.  */
      26  
      27  #include <config.h>
      28  
      29  /* Specification.  */
      30  #include "unistr.h"
      31  
      32  int
      33  u8_strmbtouc (ucs4_t *puc, const uint8_t *s)
      34  {
      35    /* Keep in sync with unistr.h and u8-mbtouc-aux.c.  */
      36    uint8_t c = *s;
      37  
      38    if (c < 0x80)
      39      {
      40        *puc = c;
      41        return (c != 0 ? 1 : 0);
      42      }
      43    if (c >= 0xc2)
      44      {
      45        if (c < 0xe0)
      46          {
      47            if ((s[1] ^ 0x80) < 0x40)
      48              {
      49                *puc = ((unsigned int) (c & 0x1f) << 6)
      50                       | (unsigned int) (s[1] ^ 0x80);
      51                return 2;
      52              }
      53          }
      54        else if (c < 0xf0)
      55          {
      56            if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
      57                && (c >= 0xe1 || s[1] >= 0xa0)
      58                && (c != 0xed || s[1] < 0xa0))
      59              {
      60                *puc = ((unsigned int) (c & 0x0f) << 12)
      61                       | ((unsigned int) (s[1] ^ 0x80) << 6)
      62                       | (unsigned int) (s[2] ^ 0x80);
      63                return 3;
      64              }
      65          }
      66        else if (c <= 0xf4)
      67          {
      68            if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
      69                && (s[3] ^ 0x80) < 0x40
      70                && (c >= 0xf1 || s[1] >= 0x90)
      71                && (c < 0xf4 || (/* c == 0xf4 && */ s[1] < 0x90)))
      72              {
      73                *puc = ((unsigned int) (c & 0x07) << 18)
      74                       | ((unsigned int) (s[1] ^ 0x80) << 12)
      75                       | ((unsigned int) (s[2] ^ 0x80) << 6)
      76                       | (unsigned int) (s[3] ^ 0x80);
      77                return 4;
      78              }
      79          }
      80      }
      81    /* invalid or incomplete multibyte character */
      82    return -1;
      83  }