(root)/
gettext-0.22.4/
gettext-tools/
gnulib-lib/
unistr/
u8-mblen.c
       1  /* Look at first character in UTF-8 string.
       2     Copyright (C) 1999-2000, 2002, 2006-2007, 2009-2023 Free Software
       3     Foundation, Inc.
       4     Written by Bruno Haible <bruno@clisp.org>, 2002.
       5  
       6     This file is free software: you can redistribute it and/or modify
       7     it under the terms of the GNU Lesser General Public License as
       8     published by the Free Software Foundation; either version 2.1 of the
       9     License, or (at your option) any later version.
      10  
      11     This file is distributed in the hope that it will be useful,
      12     but WITHOUT ANY WARRANTY; without even the implied warranty of
      13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      14     GNU Lesser General Public License for more details.
      15  
      16     You should have received a copy of the GNU Lesser General Public License
      17     along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
      18  
      19  #include <config.h>
      20  
      21  /* Specification.  */
      22  #include "unistr.h"
      23  
      24  int
      25  u8_mblen (const uint8_t *s, size_t n)
      26  {
      27    if (n > 0)
      28      {
      29        /* Keep in sync with unistr.h and u8-mbtouc-aux.c.  */
      30        uint8_t c = *s;
      31  
      32        if (c < 0x80)
      33          return (c != 0 ? 1 : 0);
      34        if (c >= 0xc2)
      35          {
      36            if (c < 0xe0)
      37              {
      38                if (n >= 2
      39                    && (s[1] ^ 0x80) < 0x40)
      40                  return 2;
      41              }
      42            else if (c < 0xf0)
      43              {
      44                if (n >= 3
      45                    && (s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
      46                    && (c >= 0xe1 || s[1] >= 0xa0)
      47                    && (c != 0xed || s[1] < 0xa0))
      48                  return 3;
      49              }
      50            else if (c <= 0xf4)
      51              {
      52                if (n >= 4
      53                    && (s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
      54                    && (s[3] ^ 0x80) < 0x40
      55                    && (c >= 0xf1 || s[1] >= 0x90)
      56                    && (c < 0xf4 || (/* c == 0xf4 && */ s[1] < 0x90)))
      57                  return 4;
      58              }
      59          }
      60      }
      61    /* invalid or incomplete multibyte character */
      62    return -1;
      63  }