(root)/
glibc-2.38/
iconvdata/
utf-32.c
       1  /* Conversion module for UTF-32.
       2     Copyright (C) 1999-2023 Free Software Foundation, Inc.
       3     This file is part of the GNU C Library.
       4  
       5     The GNU C Library is free software; you can redistribute it and/or
       6     modify it under the terms of the GNU Lesser General Public
       7     License as published by the Free Software Foundation; either
       8     version 2.1 of the License, or (at your option) any later version.
       9  
      10     The GNU C Library is distributed in the hope that it will be useful,
      11     but WITHOUT ANY WARRANTY; without even the implied warranty of
      12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      13     Lesser General Public License for more details.
      14  
      15     You should have received a copy of the GNU Lesser General Public
      16     License along with the GNU C Library; if not, see
      17     <https://www.gnu.org/licenses/>.  */
      18  
      19  #include <byteswap.h>
      20  #include <dlfcn.h>
      21  #include <gconv.h>
      22  #include <stddef.h>
      23  #include <stdint.h>
      24  #include <stdlib.h>
      25  #include <string.h>
      26  
      27  /* This is the Byte Order Mark character (BOM).  */
      28  #define BOM	0x0000feffu
      29  /* And in the other byte order.  */
      30  #define BOM_OE	0xfffe0000u
      31  
      32  
      33  /* Definitions used in the body of the `gconv' function.  */
      34  #define FROM_LOOP		from_utf32_loop
      35  #define TO_LOOP			to_utf32_loop
      36  #define DEFINE_INIT		0
      37  #define DEFINE_FINI		0
      38  #define MIN_NEEDED_FROM		4
      39  #define MIN_NEEDED_TO		4
      40  #define ONE_DIRECTION		0
      41  #define FROM_DIRECTION		(dir == from_utf32)
      42  #define PREPARE_LOOP \
      43    enum direction dir = ((struct utf32_data *) step->__data)->dir;	      \
      44    enum variant var = ((struct utf32_data *) step->__data)->var;		      \
      45    int swap;								      \
      46    if (FROM_DIRECTION && var == UTF_32)					      \
      47      {									      \
      48        if (__glibc_unlikely (data->__invocation_counter == 0))		      \
      49  	{								      \
      50  	  /* We have to find out which byte order the file is encoded in.  */ \
      51  	  if (inptr + 4 > inend)					      \
      52  	    return (inptr == inend					      \
      53  		    ? __GCONV_EMPTY_INPUT : __GCONV_INCOMPLETE_INPUT);	      \
      54  									      \
      55  	  if (get32 (inptr) == BOM)					      \
      56  	    /* Simply ignore the BOM character.  */			      \
      57  	    *inptrp = inptr += 4;					      \
      58  	  else if (get32 (inptr) == BOM_OE)				      \
      59  	    {								      \
      60  	      data->__flags |= __GCONV_SWAP;				      \
      61  	      *inptrp = inptr += 4;					      \
      62  	    }								      \
      63  	}								      \
      64      }									      \
      65    else if (!FROM_DIRECTION && var == UTF_32 && !data->__internal_use	      \
      66  	   && data->__invocation_counter == 0)				      \
      67      {									      \
      68        /* Emit the Byte Order Mark.  */					      \
      69        if (__glibc_unlikely (outbuf + 4 > outend))			      \
      70  	return __GCONV_FULL_OUTPUT;					      \
      71  									      \
      72        put32 (outbuf, BOM);						      \
      73        outbuf += 4;							      \
      74      }									      \
      75    else if (__builtin_expect (data->__invocation_counter == 0, 0)	      \
      76  	   && ((var == UTF_32LE && BYTE_ORDER == BIG_ENDIAN)		      \
      77  	       || (var == UTF_32BE && BYTE_ORDER == LITTLE_ENDIAN)))	      \
      78      data->__flags |= __GCONV_SWAP;					      \
      79    swap = data->__flags & __GCONV_SWAP;
      80  #define EXTRA_LOOP_ARGS		, var, swap
      81  
      82  
      83  /* Direction of the transformation.  */
      84  enum direction
      85  {
      86    illegal_dir,
      87    to_utf32,
      88    from_utf32
      89  };
      90  
      91  enum variant
      92  {
      93    illegal_var,
      94    UTF_32,
      95    UTF_32LE,
      96    UTF_32BE
      97  };
      98  
      99  struct utf32_data
     100  {
     101    enum direction dir;
     102    enum variant var;
     103  };
     104  
     105  
     106  extern int gconv_init (struct __gconv_step *step);
     107  int
     108  gconv_init (struct __gconv_step *step)
     109  {
     110    /* Determine which direction.  */
     111    struct utf32_data *new_data;
     112    enum direction dir = illegal_dir;
     113    enum variant var = illegal_var;
     114    int result;
     115  
     116    if (__strcasecmp (step->__from_name, "UTF-32//") == 0)
     117      {
     118        dir = from_utf32;
     119        var = UTF_32;
     120      }
     121    else if (__strcasecmp (step->__to_name, "UTF-32//") == 0)
     122      {
     123        dir = to_utf32;
     124        var = UTF_32;
     125      }
     126    else if (__strcasecmp (step->__from_name, "UTF-32BE//") == 0)
     127      {
     128        dir = from_utf32;
     129        var = UTF_32BE;
     130      }
     131    else if (__strcasecmp (step->__to_name, "UTF-32BE//") == 0)
     132      {
     133        dir = to_utf32;
     134        var = UTF_32BE;
     135      }
     136    else if (__strcasecmp (step->__from_name, "UTF-32LE//") == 0)
     137      {
     138        dir = from_utf32;
     139        var = UTF_32LE;
     140      }
     141    else if (__strcasecmp (step->__to_name, "UTF-32LE//") == 0)
     142      {
     143        dir = to_utf32;
     144        var = UTF_32LE;
     145      }
     146  
     147    result = __GCONV_NOCONV;
     148    if (__builtin_expect (dir, to_utf32) != illegal_dir)
     149      {
     150        new_data = (struct utf32_data *) malloc (sizeof (struct utf32_data));
     151  
     152        result = __GCONV_NOMEM;
     153        if (new_data != NULL)
     154  	{
     155  	  new_data->dir = dir;
     156  	  new_data->var = var;
     157  	  step->__data = new_data;
     158  
     159  	  if (dir == from_utf32)
     160  	    {
     161  	      step->__min_needed_from = MIN_NEEDED_FROM;
     162  	      step->__max_needed_from = MIN_NEEDED_FROM;
     163  	      step->__min_needed_to = MIN_NEEDED_TO;
     164  	      step->__max_needed_to = MIN_NEEDED_TO;
     165  	    }
     166  	  else
     167  	    {
     168  	      step->__min_needed_from = MIN_NEEDED_TO;
     169  	      step->__max_needed_from = MIN_NEEDED_TO;
     170  	      step->__min_needed_to = MIN_NEEDED_FROM;
     171  	      step->__max_needed_to = MIN_NEEDED_FROM;
     172  	    }
     173  
     174  	  step->__stateful = 0;
     175  
     176  	  result = __GCONV_OK;
     177  	}
     178      }
     179  
     180    return result;
     181  }
     182  
     183  
     184  extern void gconv_end (struct __gconv_step *data);
     185  void
     186  gconv_end (struct __gconv_step *data)
     187  {
     188    free (data->__data);
     189  }
     190  
     191  
     192  /* Convert from the internal (UCS4-like) format to UTF-32.  */
     193  #define MIN_NEEDED_INPUT	MIN_NEEDED_TO
     194  #define MIN_NEEDED_OUTPUT	MIN_NEEDED_FROM
     195  #define LOOPFCT			TO_LOOP
     196  #define BODY \
     197    {									      \
     198      uint32_t c = get32 (inptr);						      \
     199  									      \
     200      if (__glibc_unlikely (c >= 0x110000))				      \
     201        {									      \
     202  	STANDARD_TO_LOOP_ERR_HANDLER (4);				      \
     203        }									      \
     204      else if (__glibc_unlikely (c >= 0xd800 && c < 0xe000))		      \
     205        {									      \
     206  	/* Surrogate characters in UCS-4 input are not valid.		      \
     207  	   We must catch this.  If we let surrogates pass through,	      \
     208  	   attackers could make a security hole exploit by		      \
     209  	   generating "irregular UTF-32" sequences.  */			      \
     210  	result = __GCONV_ILLEGAL_INPUT;					      \
     211  	if (! ignore_errors_p ())					      \
     212  	  break;							      \
     213  	inptr += 4;							      \
     214  	++*irreversible;						      \
     215  	continue;							      \
     216        }									      \
     217  									      \
     218      if (swap)								      \
     219        c = bswap_32 (c);							      \
     220      put32 (outptr, c);							      \
     221  									      \
     222      outptr += 4;							      \
     223      inptr += 4;								      \
     224    }
     225  #define LOOP_NEED_FLAGS
     226  #define EXTRA_LOOP_DECLS \
     227  	, enum variant var, int swap
     228  #include <iconv/loop.c>
     229  
     230  
     231  /* Convert from UTF-32 to the internal (UCS4-like) format.  */
     232  #define MIN_NEEDED_INPUT	MIN_NEEDED_FROM
     233  #define MIN_NEEDED_OUTPUT	MIN_NEEDED_TO
     234  #define LOOPFCT			FROM_LOOP
     235  #define BODY \
     236    {									      \
     237      uint32_t u1 = get32 (inptr);					      \
     238  									      \
     239      if (swap)								      \
     240        u1 = bswap_32 (u1);						      \
     241  									      \
     242      if (__glibc_unlikely (u1 >= 0x110000 || (u1 >= 0xd800 && u1 < 0xe000)))   \
     243        {									      \
     244  	/* This is illegal.  */						      \
     245  	STANDARD_FROM_LOOP_ERR_HANDLER (4);				      \
     246        }									      \
     247  									      \
     248      put32 (outptr, u1);							      \
     249      inptr += 4;								      \
     250      outptr += 4;							      \
     251    }
     252  #define LOOP_NEED_FLAGS
     253  #define EXTRA_LOOP_DECLS \
     254  	, enum variant var, int swap
     255  #include <iconv/loop.c>
     256  
     257  
     258  /* Now define the toplevel functions.  */
     259  #include <iconv/skeleton.c>