(root)/
gettext-0.22.4/
gettext-tools/
libgettextpo/
unistr/
u8-check.c
       1  /* Check UTF-8 string.
       2     Copyright (C) 2002, 2006-2007, 2009-2023 Free Software Foundation, Inc.
       3     Written by Bruno Haible <bruno@clisp.org>, 2002.
       4  
       5     This file is free software: you can redistribute it and/or modify
       6     it under the terms of the GNU Lesser General Public License as
       7     published by the Free Software Foundation; either version 2.1 of the
       8     License, or (at your option) any later version.
       9  
      10     This file is distributed in the hope that it will be useful,
      11     but WITHOUT ANY WARRANTY; without even the implied warranty of
      12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      13     GNU Lesser General Public License for more details.
      14  
      15     You should have received a copy of the GNU Lesser General Public License
      16     along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
      17  
      18  #include <config.h>
      19  
      20  /* Specification.  */
      21  #include "unistr.h"
      22  
      23  const uint8_t *
      24  u8_check (const uint8_t *s, size_t n)
      25  {
      26    const uint8_t *s_end = s + n;
      27  
      28    while (s < s_end)
      29      {
      30        /* Keep in sync with unistr.h and u8-mbtouc-aux.c.  */
      31        uint8_t c = *s;
      32  
      33        if (c < 0x80)
      34          {
      35            s++;
      36            continue;
      37          }
      38        if (c >= 0xc2)
      39          {
      40            if (c < 0xe0)
      41              {
      42                if (s + 2 <= s_end
      43                    && (s[1] ^ 0x80) < 0x40)
      44                  {
      45                    s += 2;
      46                    continue;
      47                  }
      48              }
      49            else if (c < 0xf0)
      50              {
      51                if (s + 3 <= s_end
      52                    && (s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
      53                    && (c >= 0xe1 || s[1] >= 0xa0)
      54                    && (c != 0xed || s[1] < 0xa0))
      55                  {
      56                    s += 3;
      57                    continue;
      58                  }
      59              }
      60            else if (c <= 0xf4)
      61              {
      62                if (s + 4 <= s_end
      63                    && (s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
      64                    && (s[3] ^ 0x80) < 0x40
      65                    && (c >= 0xf1 || s[1] >= 0x90)
      66                    && (c < 0xf4 || (/* c == 0xf4 && */ s[1] < 0x90)))
      67                  {
      68                    s += 4;
      69                    continue;
      70                  }
      71              }
      72          }
      73        /* invalid or incomplete multibyte character */
      74        return s;
      75      }
      76    return NULL;
      77  }