(root)/
glibc-2.38/
iconvdata/
tst-table-from.c
       1  /* Copyright (C) 2000-2023 Free Software Foundation, Inc.
       2     This file is part of the GNU C Library.
       3  
       4     The GNU C Library is free software; you can redistribute it and/or
       5     modify it under the terms of the GNU Lesser General Public
       6     License as published by the Free Software Foundation; either
       7     version 2.1 of the License, or (at your option) any later version.
       8  
       9     The GNU C Library is distributed in the hope that it will be useful,
      10     but WITHOUT ANY WARRANTY; without even the implied warranty of
      11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      12     Lesser General Public License for more details.
      13  
      14     You should have received a copy of the GNU Lesser General Public
      15     License along with the GNU C Library; if not, see
      16     <https://www.gnu.org/licenses/>.  */
      17  
      18  /* Create a table from CHARSET to Unicode.
      19     This is a good test for CHARSET's iconv() module, in particular the
      20     FROM_LOOP BODY macro.  */
      21  
      22  #include <stddef.h>
      23  #include <stdio.h>
      24  #include <stdlib.h>
      25  #include <string.h>
      26  #include <iconv.h>
      27  #include <errno.h>
      28  
      29  /* If nonzero, ignore conversions outside Unicode plane 0.  */
      30  static int bmp_only;
      31  
      32  /* Converts a byte buffer to a hexadecimal string.  */
      33  static const char*
      34  hexbuf (unsigned char buf[], unsigned int buflen)
      35  {
      36    static char msg[50];
      37  
      38    switch (buflen)
      39      {
      40      case 1:
      41        sprintf (msg, "0x%02X", buf[0]);
      42        break;
      43      case 2:
      44        sprintf (msg, "0x%02X%02X", buf[0], buf[1]);
      45        break;
      46      case 3:
      47        sprintf (msg, "0x%02X%02X%02X", buf[0], buf[1], buf[2]);
      48        break;
      49      case 4:
      50        sprintf (msg, "0x%02X%02X%02X%02X", buf[0], buf[1], buf[2], buf[3]);
      51        break;
      52      default:
      53        abort ();
      54      }
      55    return msg;
      56  }
      57  
      58  /* Attempts to convert a byte buffer BUF (BUFLEN bytes) to OUT (12 bytes)
      59     using the conversion descriptor CD.  Returns the number of written bytes,
      60     or 0 if ambiguous, or -1 if invalid.  */
      61  static int
      62  try (iconv_t cd, unsigned char buf[], unsigned int buflen, unsigned char *out)
      63  {
      64    const char *inbuf = (const char *) buf;
      65    size_t inbytesleft = buflen;
      66    char *outbuf = (char *) out;
      67    size_t outbytesleft = 12;
      68    size_t result;
      69  
      70    iconv (cd, NULL, NULL, NULL, NULL);
      71    result = iconv (cd, (char **) &inbuf, &inbytesleft, &outbuf, &outbytesleft);
      72    if (result != (size_t)(-1))
      73      result = iconv (cd, NULL, NULL, &outbuf, &outbytesleft);
      74  
      75    if (result == (size_t)(-1))
      76      {
      77        if (errno == EILSEQ)
      78  	{
      79  	  return -1;
      80  	}
      81        else if (errno == EINVAL)
      82  	{
      83  	  return 0;
      84  	}
      85        else
      86  	{
      87  	  int saved_errno = errno;
      88  	  fprintf (stderr, "%s: iconv error: ", hexbuf (buf, buflen));
      89  	  errno = saved_errno;
      90  	  perror ("");
      91  	  exit (1);
      92  	}
      93      }
      94    else
      95      {
      96        if (inbytesleft != 0)
      97  	{
      98  	  fprintf (stderr, "%s: inbytes = %ld, outbytes = %ld\n",
      99  		   hexbuf (buf, buflen),
     100  		   (long) (buflen - inbytesleft),
     101  		   (long) (12 - outbytesleft));
     102  	  exit (1);
     103  	}
     104        return 12 - outbytesleft;
     105      }
     106  }
     107  
     108  /* Returns the out[] buffer as a Unicode value, formatted as 0x%04X.  */
     109  static const char *
     110  utf8_decode (const unsigned char *out, unsigned int outlen)
     111  {
     112    static char hexbuf[84];
     113    char *p = hexbuf;
     114  
     115    while (outlen > 0)
     116      {
     117        if (p > hexbuf)
     118  	*p++ = ' ';
     119  
     120        if (out[0] < 0x80)
     121  	{
     122  	  sprintf (p, "0x%04X", out[0]);
     123  	  out += 1; outlen -= 1;
     124  	}
     125        else if (out[0] >= 0xc0 && out[0] < 0xe0 && outlen >= 2)
     126  	{
     127  	  sprintf (p, "0x%04X", ((out[0] & 0x1f) << 6) + (out[1] & 0x3f));
     128  	  out += 2; outlen -= 2;
     129  	}
     130        else if (out[0] >= 0xe0 && out[0] < 0xf0 && outlen >= 3)
     131  	{
     132  	  sprintf (p, "0x%04X", ((out[0] & 0x0f) << 12)
     133  				+ ((out[1] & 0x3f) << 6) + (out[2] & 0x3f));
     134  	  out += 3; outlen -= 3;
     135  	}
     136        else if (out[0] >= 0xf0 && out[0] < 0xf8 && outlen >= 4)
     137  	{
     138  	  sprintf (p, "0x%04X", ((out[0] & 0x07) << 18)
     139  				+ ((out[1] & 0x3f) << 12)
     140  				+ ((out[2] & 0x3f) << 6) + (out[3] & 0x3f));
     141  	  out += 4; outlen -= 4;
     142  	}
     143        else if (out[0] >= 0xf8 && out[0] < 0xfc && outlen >= 5)
     144  	{
     145  	  sprintf (p, "0x%04X", ((out[0] & 0x03) << 24)
     146  				+ ((out[1] & 0x3f) << 18)
     147  				+ ((out[2] & 0x3f) << 12)
     148  				+ ((out[3] & 0x3f) << 6) + (out[4] & 0x3f));
     149  	  out += 5; outlen -= 5;
     150  	}
     151        else if (out[0] >= 0xfc && out[0] < 0xfe && outlen >= 6)
     152  	{
     153  	  sprintf (p, "0x%04X", ((out[0] & 0x01) << 30)
     154  				+ ((out[1] & 0x3f) << 24)
     155  				+ ((out[2] & 0x3f) << 18)
     156  				+ ((out[3] & 0x3f) << 12)
     157  				+ ((out[4] & 0x3f) << 6) + (out[5] & 0x3f));
     158  	  out += 6; outlen -= 6;
     159  	}
     160        else
     161  	{
     162  	  sprintf (p, "0x????");
     163  	  out += 1; outlen -= 1;
     164  	}
     165  
     166        if (bmp_only && strlen (p) > 6)
     167  	/* Ignore conversions outside Unicode plane 0.  */
     168  	return NULL;
     169  
     170        p += strlen (p);
     171      }
     172  
     173    return hexbuf;
     174  }
     175  
     176  int
     177  main (int argc, char *argv[])
     178  {
     179    const char *charset;
     180    iconv_t cd;
     181    int search_depth;
     182  
     183    if (argc != 2)
     184      {
     185        fprintf (stderr, "Usage: tst-table-from charset\n");
     186        exit (1);
     187      }
     188    charset = argv[1];
     189  
     190    cd = iconv_open ("UTF-8", charset);
     191    if (cd == (iconv_t)(-1))
     192      {
     193        perror ("iconv_open");
     194        exit (1);
     195      }
     196  
     197    /* When testing UTF-8 or GB18030, stop at 0x10000, otherwise the output
     198       file gets too big.  */
     199    bmp_only = (strcmp (charset, "UTF-8") == 0
     200  	      || strcmp (charset, "GB18030") == 0);
     201    search_depth = (strcmp (charset, "UTF-8") == 0 ? 3 : 4);
     202  
     203    {
     204      unsigned char out[12];
     205      unsigned char buf[4];
     206      unsigned int i0, i1, i2, i3;
     207      int result;
     208  
     209      for (i0 = 0; i0 < 0x100; i0++)
     210        {
     211  	buf[0] = i0;
     212  	result = try (cd, buf, 1, out);
     213  	if (result < 0)
     214  	  {
     215  	  }
     216  	else if (result > 0)
     217  	  {
     218  	    const char *unicode = utf8_decode (out, result);
     219  	    if (unicode != NULL)
     220  	      printf ("0x%02X\t%s\n", i0, unicode);
     221  	  }
     222  	else
     223  	  {
     224  	    for (i1 = 0; i1 < 0x100; i1++)
     225  	      {
     226  		buf[1] = i1;
     227  		result = try (cd, buf, 2, out);
     228  		if (result < 0)
     229  		  {
     230  		  }
     231  		else if (result > 0)
     232  		  {
     233  		    const char *unicode = utf8_decode (out, result);
     234  		    if (unicode != NULL)
     235  		      printf ("0x%02X%02X\t%s\n", i0, i1, unicode);
     236  		  }
     237  		else
     238  		  {
     239  		    for (i2 = 0; i2 < 0x100; i2++)
     240  		      {
     241  			buf[2] = i2;
     242  			result = try (cd, buf, 3, out);
     243  			if (result < 0)
     244  			  {
     245  			  }
     246  			else if (result > 0)
     247  			  {
     248  			    const char *unicode = utf8_decode (out, result);
     249  			    if (unicode != NULL)
     250  			      printf ("0x%02X%02X%02X\t%s\n",
     251  				      i0, i1, i2, unicode);
     252  			  }
     253  			else if (search_depth > 3)
     254  			  {
     255  			    for (i3 = 0; i3 < 0x100; i3++)
     256  			      {
     257  				buf[3] = i3;
     258  				result = try (cd, buf, 4, out);
     259  				if (result < 0)
     260  				  {
     261  				  }
     262  				else if (result > 0)
     263  				  {
     264  				    const char *unicode =
     265  				      utf8_decode (out, result);
     266  				    if (unicode != NULL)
     267  				      printf ("0x%02X%02X%02X%02X\t%s\n",
     268  					      i0, i1, i2, i3, unicode);
     269  				  }
     270  				else
     271  				  {
     272  				    fprintf (stderr,
     273  					     "%s: incomplete byte sequence\n",
     274  					     hexbuf (buf, 4));
     275  				    exit (1);
     276  				  }
     277  			      }
     278  			  }
     279  		      }
     280  		  }
     281  	      }
     282  	  }
     283        }
     284    }
     285  
     286    if (iconv_close (cd) < 0)
     287      {
     288        perror ("iconv_close");
     289        exit (1);
     290      }
     291  
     292    if (ferror (stdin) || fflush (stdout) || ferror (stdout))
     293      {
     294        fprintf (stderr, "I/O error\n");
     295        exit (1);
     296      }
     297  
     298    return 0;
     299  }