(root)/
gettext-0.22.4/
gettext-tools/
gnulib-tests/
uniname/
test-uninames.c
       1  /* Test the Unicode character name functions.
       2     Copyright (C) 2000-2003, 2005, 2007, 2009-2023 Free Software Foundation,
       3     Inc.
       4  
       5     This program is free software: you can redistribute it and/or modify
       6     it under the terms of the GNU General Public License as published by
       7     the Free Software Foundation, either version 3 of the License, or
       8     (at your option) any later version.
       9  
      10     This program is distributed in the hope that it will be useful,
      11     but WITHOUT ANY WARRANTY; without even the implied warranty of
      12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      13     GNU General Public License for more details.
      14  
      15     You should have received a copy of the GNU General Public License
      16     along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
      17  
      18  #include <config.h>
      19  
      20  #include <stdio.h>
      21  #include <stdlib.h>
      22  #include <string.h>
      23  
      24  #include "xalloc.h"
      25  #include "uniname.h"
      26  
      27  /* The names according to the UnicodeData.txt file, modified to contain the
      28     Hangul syllable names, as described in the Unicode 3.0 book.  */
      29  static const char * unicode_names [0x110000];
      30  
      31  /* Maximum entries in unicode_aliases.  */
      32  #define ALIASLEN 0x200
      33  
      34  /* The aliases according to the NameAliases.txt file.  */
      35  struct unicode_alias
      36  {
      37    const char *name;
      38    unsigned int uc;
      39  };
      40  
      41  static struct unicode_alias unicode_aliases [ALIASLEN];
      42  static int aliases_count;
      43  
      44  /* Stores in unicode_names[] the relevant contents of the UnicodeData.txt
      45     file.  */
      46  static void
      47  fill_names (const char *unicodedata_filename)
      48  {
      49    FILE *stream;
      50    char *field0;
      51    char *field1;
      52    char line[1024];
      53    int lineno = 0;
      54  
      55    stream = fopen (unicodedata_filename, "r");
      56    if (stream == NULL)
      57      {
      58        fprintf (stderr, "error during fopen of '%s'\n", unicodedata_filename);
      59        exit (EXIT_FAILURE);
      60      }
      61  
      62    while (fgets (line, sizeof line, stream))
      63      {
      64        char *p;
      65        char *comment;
      66        unsigned long i;
      67  
      68        lineno++;
      69  
      70        comment = strchr (line, '#');
      71        if (comment != NULL)
      72          *comment = '\0';
      73        if (line[strspn (line, " \t\r\n")] == '\0')
      74          continue;
      75  
      76        field0 = p = line;
      77        p = strchr (p, ';');
      78        if (!p)
      79          {
      80            fprintf (stderr, "short line in '%s':%d\n",
      81                     unicodedata_filename, lineno);
      82            exit (EXIT_FAILURE);
      83          }
      84        *p++ = '\0';
      85  
      86        field1 = p;
      87        if (*field1 == '<')
      88          continue;
      89        p = strchr (p, ';');
      90        if (!p)
      91          {
      92            fprintf (stderr, "short line in '%s':%d\n",
      93                     unicodedata_filename, lineno);
      94            exit (EXIT_FAILURE);
      95          }
      96        *p = '\0';
      97        i = strtoul (field0, NULL, 16);
      98        if (i >= 0x110000)
      99          {
     100            fprintf (stderr, "index too large\n");
     101            exit (EXIT_FAILURE);
     102          }
     103        unicode_names[i] = xstrdup (field1);
     104      }
     105    if (ferror (stream) || fclose (stream))
     106      {
     107        fprintf (stderr, "error reading from '%s'\n", unicodedata_filename);
     108        exit (1);
     109      }
     110  }
     111  
     112  /* Stores in unicode_aliases[] the relevant contents of the NameAliases.txt
     113     file.  */
     114  static void
     115  fill_aliases (const char *namealiases_filename)
     116  {
     117    FILE *stream;
     118    char *field0;
     119    char *field1;
     120    char line[1024];
     121    int lineno = 0;
     122  
     123    stream = fopen (namealiases_filename, "r");
     124    if (stream == NULL)
     125      {
     126        fprintf (stderr, "error during fopen of '%s'\n", namealiases_filename);
     127        exit (EXIT_FAILURE);
     128      }
     129  
     130    while (fgets (line, sizeof line, stream))
     131      {
     132        char *p;
     133        char *comment;
     134        unsigned long uc;
     135  
     136        comment = strchr (line, '#');
     137        if (comment != NULL)
     138          *comment = '\0';
     139        if (line[strspn (line, " \t\r\n")] == '\0')
     140          continue;
     141  
     142        lineno++;
     143  
     144        field0 = p = line;
     145        p = strchr (p, ';');
     146        if (!p)
     147          {
     148            fprintf (stderr, "short line in '%s':%d\n",
     149                     namealiases_filename, lineno);
     150            exit (EXIT_FAILURE);
     151          }
     152        *p++ = '\0';
     153  
     154        field1 = p;
     155        p = strchr (p, ';');
     156        if (!p)
     157          {
     158            fprintf (stderr, "short line in '%s':%d\n",
     159                     namealiases_filename, lineno);
     160            exit (EXIT_FAILURE);
     161          }
     162        *p = '\0';
     163  
     164        uc = strtoul (field0, NULL, 16);
     165        if (uc >= 0x110000)
     166          {
     167            fprintf (stderr, "index too large\n");
     168            exit (EXIT_FAILURE);
     169          }
     170  
     171        if (aliases_count == ALIASLEN)
     172          {
     173            fprintf (stderr, "too many aliases\n");
     174            exit (EXIT_FAILURE);
     175          }
     176        unicode_aliases[aliases_count].name = xstrdup (field1);
     177        unicode_aliases[aliases_count].uc = uc;
     178        aliases_count++;
     179      }
     180    if (ferror (stream) || fclose (stream))
     181      {
     182        fprintf (stderr, "error reading from '%s'\n", namealiases_filename);
     183        exit (1);
     184      }
     185  }
     186  
     187  static int
     188  name_has_alias (unsigned int uc)
     189  {
     190    int i;
     191    for (i = 0; i < ALIASLEN; i++)
     192      if (unicode_aliases[i].uc == uc)
     193        return 1;
     194    return 0;
     195  }
     196  
     197  /* Perform an exhaustive test of the unicode_character_name function.  */
     198  static int
     199  test_name_lookup ()
     200  {
     201    int error = 0;
     202    unsigned int i;
     203    char buf[UNINAME_MAX];
     204  
     205    for (i = 0; i < 0x11000; i++)
     206      {
     207        char *result = unicode_character_name (i, buf);
     208  
     209        if (unicode_names[i] != NULL)
     210          {
     211            if (result == NULL)
     212              {
     213                fprintf (stderr, "\\u%04X name lookup failed!\n", i);
     214                error = 1;
     215              }
     216            else if (strcmp (result, unicode_names[i]) != 0)
     217              {
     218                fprintf (stderr, "\\u%04X name lookup returned wrong name: %s\n",
     219                                 i, result);
     220                error = 1;
     221              }
     222          }
     223        else
     224          {
     225            if (result != NULL)
     226              {
     227                fprintf (stderr, "\\u%04X name lookup returned wrong name: %s\n",
     228                                 i, result);
     229                error = 1;
     230              }
     231          }
     232      }
     233  
     234    for (i = 0x110000; i < 0x1000000; i++)
     235      {
     236        char *result = unicode_character_name (i, buf);
     237  
     238        if (result != NULL)
     239          {
     240            fprintf (stderr, "\\u%04X name lookup returned wrong name: %s\n",
     241                             i, result);
     242            error = 1;
     243          }
     244      }
     245  
     246    return error;
     247  }
     248  
     249  /* Perform a test of the unicode_name_character function.  */
     250  static int
     251  test_inverse_lookup ()
     252  {
     253    int error = 0;
     254    unsigned int i;
     255  
     256    /* First, verify all valid character names are recognized.  */
     257    for (i = 0; i < 0x110000; i++)
     258      if (unicode_names[i] != NULL)
     259        {
     260          unsigned int result = unicode_name_character (unicode_names[i]);
     261          if (result != i)
     262            {
     263              if (result == UNINAME_INVALID)
     264                fprintf (stderr, "inverse name lookup of \"%s\" failed\n",
     265                         unicode_names[i]);
     266              else
     267                fprintf (stderr,
     268                         "inverse name lookup of \"%s\" returned 0x%04X\n",
     269                         unicode_names[i], result);
     270              error = 1;
     271            }
     272        }
     273  
     274    /* Second, generate random but likely names and verify they are not
     275       recognized unless really valid.  */
     276    for (i = 0; i < 10000; i++)
     277      {
     278        unsigned int i1, i2;
     279        const char *s1;
     280        const char *s2;
     281        unsigned int l1, l2, j1, j2;
     282        char buf[2*UNINAME_MAX];
     283        unsigned int result;
     284  
     285        do i1 = ((rand () % 0x11) << 16)
     286                + ((rand () & 0xff) << 8)
     287                + (rand () & 0xff);
     288        while (unicode_names[i1] == NULL);
     289  
     290        do i2 = ((rand () % 0x11) << 16)
     291                + ((rand () & 0xff) << 8)
     292                + (rand () & 0xff);
     293        while (unicode_names[i2] == NULL);
     294  
     295        s1 = unicode_names[i1];
     296        l1 = strlen (s1);
     297        s2 = unicode_names[i2];
     298        l2 = strlen (s2);
     299  
     300        /* Concatenate a starting piece of s1 with an ending piece of s2.  */
     301        for (j1 = 1; j1 <= l1; j1++)
     302          if (j1 == l1 || s1[j1] == ' ')
     303            for (j2 = 0; j2 < l2; j2++)
     304              if (j2 == 0 || s2[j2-1] == ' ')
     305                {
     306                  memcpy (buf, s1, j1);
     307                  buf[j1] = ' ';
     308                  memcpy (buf + j1 + 1, s2 + j2, l2 - j2 + 1);
     309  
     310                  result = unicode_name_character (buf);
     311                  if (result != UNINAME_INVALID
     312                      && !name_has_alias (result)
     313                      && !(unicode_names[result] != NULL
     314                           && strcmp (unicode_names[result], buf) == 0))
     315                    {
     316                      fprintf (stderr,
     317                               "inverse name lookup of \"%s\" returned 0x%04X\n",
     318                               unicode_names[i], result);
     319                      error = 1;
     320                    }
     321                }
     322      }
     323  
     324    /* Third, some extreme case that used to loop.  */
     325    if (unicode_name_character ("A A") != UNINAME_INVALID)
     326      error = 1;
     327  
     328    return error;
     329  }
     330  
     331  /* Perform a test of the unicode_name_character function for aliases.  */
     332  static int
     333  test_alias_lookup ()
     334  {
     335    int error = 0;
     336    unsigned int i;
     337    char buf[UNINAME_MAX];
     338  
     339    /* Verify all valid character names are recognized.  */
     340    for (i = 0; i < ALIASLEN; i++)
     341      if (unicode_aliases[i].uc != UNINAME_INVALID
     342          /* Skip if the character has no canonical name (e.g. control
     343             characters).  */
     344          && unicode_character_name (unicode_aliases[i].uc, buf))
     345        {
     346          unsigned int result = unicode_name_character (unicode_aliases[i].name);
     347          if (result != unicode_aliases[i].uc)
     348            {
     349              if (result == UNINAME_INVALID)
     350                fprintf (stderr, "inverse name lookup of \"%s\" failed\n",
     351                         unicode_aliases[i].name);
     352              else
     353                fprintf (stderr,
     354                         "inverse name lookup of \"%s\" returned 0x%04X\n",
     355                         unicode_aliases[i].name, result);
     356              error = 1;
     357            }
     358        }
     359  
     360    return error;
     361  }
     362  
     363  int
     364  main (int argc, char *argv[])
     365  {
     366    int error = 0;
     367    int i;
     368  
     369    for (i = 1; i < argc && strcmp (argv[i], "--") != 0; i++)
     370      fill_names (argv[i]);
     371  
     372    if (i < argc)
     373      {
     374        int j;
     375        for (j = 0; j < ALIASLEN; j++)
     376          unicode_aliases[j].uc = UNINAME_INVALID;
     377  
     378        i++;
     379        for (; i < argc; i++)
     380          fill_aliases (argv[i]);
     381      }
     382  
     383    error |= test_name_lookup ();
     384    error |= test_inverse_lookup ();
     385  
     386    if (aliases_count > 0)
     387      error |= test_alias_lookup ();
     388  
     389    return error;
     390  }