1  /* Test of wcwidth() function.
       2     Copyright (C) 2007-2023 Free Software Foundation, Inc.
       3  
       4     This program is free software: you can redistribute it and/or modify
       5     it under the terms of the GNU General Public License as published by
       6     the Free Software Foundation, either version 3 of the License, or
       7     (at your option) any later version.
       8  
       9     This program is distributed in the hope that it will be useful,
      10     but WITHOUT ANY WARRANTY; without even the implied warranty of
      11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      12     GNU General Public License for more details.
      13  
      14     You should have received a copy of the GNU General Public License
      15     along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
      16  
      17  /* Written by Bruno Haible <bruno@clisp.org>, 2007.  */
      18  
      19  #include <config.h>
      20  
      21  #include <wchar.h>
      22  
      23  #include "signature.h"
      24  SIGNATURE_CHECK (wcwidth, int, (wchar_t));
      25  
      26  #include <locale.h>
      27  #include <string.h>
      28  
      29  #include "c-ctype.h"
      30  #include "localcharset.h"
      31  #include "macros.h"
      32  
      33  int
      34  main ()
      35  {
      36    wchar_t wc;
      37  
      38  #if !GNULIB_WCHAR_SINGLE_LOCALE
      39  # ifdef C_CTYPE_ASCII
      40    /* Test width of ASCII characters.  */
      41    for (wc = 0x20; wc < 0x7F; wc++)
      42      ASSERT (wcwidth (wc) == 1);
      43  # endif
      44  #endif
      45  
      46    /* Switch to an UTF-8 locale.  */
      47    if (setlocale (LC_ALL, "fr_FR.UTF-8") != NULL
      48        /* Check whether it's really an UTF-8 locale.
      49           On OpenBSD 4.0, the setlocale call succeeds only for the LC_CTYPE
      50           category and therefore returns "C/fr_FR.UTF-8/C/C/C/C", but the
      51           LC_CTYPE category is effectively set to an ASCII LC_CTYPE category;
      52           in particular, locale_charset() returns "ASCII".  */
      53        && strcmp (locale_charset (), "UTF-8") == 0)
      54      {
      55        /* Test width of ASCII characters.  */
      56        for (wc = 0x20; wc < 0x7F; wc++)
      57          ASSERT (wcwidth (wc) == 1);
      58  
      59        /* Test width of some non-spacing characters.  */
      60        ASSERT (wcwidth (0x0301) == 0);
      61        ASSERT (wcwidth (0x05B0) == 0);
      62  
      63        /* Test width of some format control characters.  */
      64        ASSERT (wcwidth (0x200E) <= 0);
      65        ASSERT (wcwidth (0x2060) <= 0);
      66  #if 0  /* wchar_t may be only 16 bits.  */
      67        ASSERT (wcwidth (0xE0001) <= 0);
      68        ASSERT (wcwidth (0xE0044) <= 0);
      69  #endif
      70  
      71        /* Test width of some zero width characters.  */
      72        /* While it is desirable that U+200B, U+200C, U+200D have width 0,
      73           because this makes wcswidth work better on strings that contain these
      74           characters, it is acceptable if an implementation treats these
      75           characters like control characters.  */
      76        ASSERT (wcwidth (0x200B) <= 0);
      77        ASSERT (wcwidth (0xFEFF) <= 0);
      78  
      79        /* Test width of some math symbols.
      80           U+2202 is marked as having ambiguous width (A) in EastAsianWidth.txt
      81           (see <https://www.unicode.org/Public/12.0.0/ucd/EastAsianWidth.txt>).
      82           The Unicode Standard Annex 11
      83           <https://www.unicode.org/reports/tr11/tr11-36.html>
      84           says
      85             "Ambiguous characters behave like wide or narrow characters
      86              depending on the context (language tag, script identification,
      87              associated font, source of data, or explicit markup; all can
      88              provide the context). If the context cannot be established
      89              reliably, they should be treated as narrow characters by default."
      90           For wcwidth(), the only available context information is the locale.
      91           "fr_FR.UTF-8" is a Western locale, not an East Asian locale, therefore
      92           U+2202 should be treated like a narrow character.  */
      93        ASSERT (wcwidth (0x2202) == 1);
      94  
      95        /* Test width of some CJK characters.  */
      96        ASSERT (wcwidth (0x3000) == 2);
      97        ASSERT (wcwidth (0xB250) == 2);
      98        ASSERT (wcwidth (0xFF1A) == 2);
      99  #if 0  /* wchar_t may be only 16 bits.  */
     100        ASSERT (wcwidth (0x20369) == 2);
     101        ASSERT (wcwidth (0x2F876) == 2);
     102  #endif
     103      }
     104  
     105    return 0;
     106  }