(root)/
gettext-0.22.4/
gettext-tools/
gnulib-tests/
unistr/
test-u8-check.c
       1  /* Test of u8_check() function.
       2     Copyright (C) 2010-2023 Free Software Foundation, Inc.
       3  
       4     This program is free software: you can redistribute it and/or modify
       5     it under the terms of the GNU General Public License as published by
       6     the Free Software Foundation, either version 3 of the License, or
       7     (at your option) any later version.
       8  
       9     This program is distributed in the hope that it will be useful,
      10     but WITHOUT ANY WARRANTY; without even the implied warranty of
      11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      12     GNU General Public License for more details.
      13  
      14     You should have received a copy of the GNU General Public License
      15     along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
      16  
      17  /* Written by Bruno Haible <bruno@clisp.org>, 2010.  */
      18  
      19  #include <config.h>
      20  
      21  #include "unistr.h"
      22  
      23  #include "macros.h"
      24  
      25  int
      26  main ()
      27  {
      28    /* Test empty string.  */
      29    {
      30      static const uint8_t input[] = "";
      31      ASSERT (u8_check (input, 0) == NULL);
      32    }
      33  
      34    /* Test valid non-empty string.  */
      35    {
      36      static const uint8_t input[] = /* "Данило Шеган" */
      37        "\320\224\320\260\320\275\320\270\320\273\320\276 \320\250\320\265\320\263\320\260\320\275";
      38      ASSERT (u8_check (input, sizeof (input) - 1) == NULL);
      39    }
      40  
      41    /* Test out-of-range character with 4 bytes: U+110000.  */
      42    {
      43      static const uint8_t input[] = "\320\224\320\260\364\220\200\200";
      44      ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);
      45    }
      46  
      47    /* Test out-of-range character with 5 bytes: U+200000.  */
      48    {
      49      static const uint8_t input[] = "\320\224\320\260\370\210\200\200\200";
      50      ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);
      51    }
      52  
      53    /* Test out-of-range character with 6 bytes: U+4000000.  */
      54    {
      55      static const uint8_t input[] = "\320\224\320\260\374\204\200\200\200\200";
      56      ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);
      57    }
      58  
      59    /* Test invalid lead byte.  */
      60    {
      61      static const uint8_t input[] = "\320\224\320\260\376\200\200\200\200\200";
      62      ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);
      63    }
      64    {
      65      static const uint8_t input[] = "\320\224\320\260\377\200\200\200\200\200";
      66      ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);
      67    }
      68  
      69    /* Test overlong 2-byte character.  */
      70    {
      71      static const uint8_t input[] = "\320\224\320\260\301\200";
      72      ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);
      73    }
      74  
      75    /* Test overlong 3-byte character.  */
      76    {
      77      static const uint8_t input[] = "\320\224\320\260\340\200\277";
      78      ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);
      79    }
      80  
      81    /* Test overlong 4-byte character.  */
      82    {
      83      static const uint8_t input[] = "\320\224\320\260\360\200\277\277";
      84      ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);
      85    }
      86  
      87    /* Test invalid bytes in 2-byte character.  */
      88    {
      89      static const uint8_t input[] = "\320\224\320\260\302\200";
      90      ASSERT (u8_check (input, sizeof (input) - 1) == NULL);
      91    }
      92    {
      93      static const uint8_t input[] = "\320\224\320\260\302\100";
      94      ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);
      95    }
      96    {
      97      static const uint8_t input[] = "\320\224\320\260\302\300";
      98      ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);
      99    }
     100  
     101    /* Test invalid bytes in 3-byte character.  */
     102    {
     103      static const uint8_t input[] = "\320\224\320\260\342\200\200";
     104      ASSERT (u8_check (input, sizeof (input) - 1) == NULL);
     105    }
     106    {
     107      static const uint8_t input[] = "\320\224\320\260\342\100\200";
     108      ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);
     109    }
     110    {
     111      static const uint8_t input[] = "\320\224\320\260\342\300\200";
     112      ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);
     113    }
     114    {
     115      static const uint8_t input[] = "\320\224\320\260\342\200\100";
     116      ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);
     117    }
     118    {
     119      static const uint8_t input[] = "\320\224\320\260\342\200\300";
     120      ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);
     121    }
     122  
     123    /* Test invalid bytes in 4-byte character.  */
     124    {
     125      static const uint8_t input[] = "\320\224\320\260\362\200\200\200";
     126      ASSERT (u8_check (input, sizeof (input) - 1) == NULL);
     127    }
     128    {
     129      static const uint8_t input[] = "\320\224\320\260\362\100\200\200";
     130      ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);
     131    }
     132    {
     133      static const uint8_t input[] = "\320\224\320\260\362\300\200\200";
     134      ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);
     135    }
     136    {
     137      static const uint8_t input[] = "\320\224\320\260\362\200\100\200";
     138      ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);
     139    }
     140    {
     141      static const uint8_t input[] = "\320\224\320\260\362\200\300\200";
     142      ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);
     143    }
     144    {
     145      static const uint8_t input[] = "\320\224\320\260\362\200\200\100";
     146      ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);
     147    }
     148    {
     149      static const uint8_t input[] = "\320\224\320\260\362\200\200\300";
     150      ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);
     151    }
     152  
     153    /* Test truncated/incomplete 2-byte character.  */
     154    {
     155      static const uint8_t input[] = "\320\224\320\260\302";
     156      ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);
     157    }
     158  
     159    /* Test truncated/incomplete 3-byte character.  */
     160    {
     161      static const uint8_t input[] = "\320\224\320\260\342\200";
     162      ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);
     163    }
     164  
     165    /* Test truncated/incomplete 4-byte character.  */
     166    {
     167      static const uint8_t input[] = "\320\224\320\260\362\200\200";
     168      ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);
     169    }
     170  
     171    /* Test missing lead byte.  */
     172    {
     173      static const uint8_t input[] = "\320\224\320\260\200\200\200\200\200";
     174      ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);
     175    }
     176  
     177    /* Test surrogate codepoints.  */
     178    {
     179      static const uint8_t input[] = "\320\224\320\260\355\240\200\355\260\200";
     180      ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);
     181    }
     182    {
     183      static const uint8_t input[] = "\320\224\320\260\355\260\200";
     184      ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);
     185    }
     186  
     187    return 0;
     188  }