(root)/
glibc-2.38/
iconvdata/
unicode.c
       1  /* Conversion module for Unicode
       2     Copyright (C) 1999-2023 Free Software Foundation, Inc.
       3     This file is part of the GNU C Library.
       4  
       5     The GNU C Library is free software; you can redistribute it and/or
       6     modify it under the terms of the GNU Lesser General Public
       7     License as published by the Free Software Foundation; either
       8     version 2.1 of the License, or (at your option) any later version.
       9  
      10     The GNU C Library is distributed in the hope that it will be useful,
      11     but WITHOUT ANY WARRANTY; without even the implied warranty of
      12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      13     Lesser General Public License for more details.
      14  
      15     You should have received a copy of the GNU Lesser General Public
      16     License along with the GNU C Library; if not, see
      17     <https://www.gnu.org/licenses/>.  */
      18  
      19  #include <byteswap.h>
      20  #include <dlfcn.h>
      21  #include <gconv.h>
      22  #include <stddef.h>
      23  #include <stdint.h>
      24  #include <stdlib.h>
      25  #include <string.h>
      26  
      27  /* This is the Byte Order Mark character (BOM).  */
      28  #define BOM	0xfeff
      29  /* And in the other endian format.  */
      30  #define BOM_OE	0xfffe
      31  
      32  
      33  /* Definitions used in the body of the `gconv' function.  */
      34  #define FROM_LOOP		from_unicode_loop
      35  #define TO_LOOP			to_unicode_loop
      36  #define DEFINE_INIT		0
      37  #define DEFINE_FINI		0
      38  #define MIN_NEEDED_FROM		2
      39  #define MIN_NEEDED_TO		4
      40  #define ONE_DIRECTION		0
      41  #define FROM_DIRECTION		(dir == from_unicode)
      42  #define PREPARE_LOOP \
      43    enum direction dir = ((struct unicode_data *) step->__data)->dir;	      \
      44    int swap;								      \
      45    if (FROM_DIRECTION)							      \
      46      {									      \
      47        if (data->__invocation_counter == 0)				      \
      48  	{								      \
      49  	  /* We have to find out which byte order the file is encoded in.  */ \
      50  	  if (inptr + 2 > inend)					      \
      51  	    return (inptr == inend					      \
      52  		    ? __GCONV_EMPTY_INPUT : __GCONV_INCOMPLETE_INPUT);	      \
      53  									      \
      54  	  if (get16 (inptr) == BOM)					      \
      55  	    /* Simply ignore the BOM character.  */			      \
      56  	    *inptrp = inptr += 2;					      \
      57  	  else if (get16 (inptr) == BOM_OE)				      \
      58  	    {								      \
      59  	      data->__flags |= __GCONV_SWAP;				      \
      60  	      *inptrp = inptr += 2;					      \
      61  	    }								      \
      62  	}								      \
      63      }									      \
      64    else if (!data->__internal_use && data->__invocation_counter == 0)	      \
      65      {									      \
      66        /* Emit the Byte Order Mark.  */					      \
      67        if (__glibc_unlikely (outbuf + 2 > outend))			      \
      68  	return __GCONV_FULL_OUTPUT;					      \
      69  									      \
      70        put16 (outbuf, BOM);						      \
      71        outbuf += 2;							      \
      72      }									      \
      73    swap = data->__flags & __GCONV_SWAP;
      74  #define EXTRA_LOOP_ARGS		, swap
      75  
      76  
      77  /* Direction of the transformation.  */
      78  enum direction
      79  {
      80    illegal_dir,
      81    to_unicode,
      82    from_unicode
      83  };
      84  
      85  struct unicode_data
      86  {
      87    enum direction dir;
      88  };
      89  
      90  
      91  extern int gconv_init (struct __gconv_step *step);
      92  int
      93  gconv_init (struct __gconv_step *step)
      94  {
      95    /* Determine which direction.  */
      96    struct unicode_data *new_data;
      97    enum direction dir = illegal_dir;
      98    int result;
      99  
     100    if (strcmp (step->__from_name, "UNICODE//") == 0)
     101      dir = from_unicode;
     102    else
     103      dir = to_unicode;
     104  
     105    new_data = (struct unicode_data *) malloc (sizeof (struct unicode_data));
     106  
     107    result = __GCONV_NOMEM;
     108    if (new_data != NULL)
     109      {
     110        new_data->dir = dir;
     111        step->__data = new_data;
     112  
     113        if (dir == from_unicode)
     114  	{
     115  	  step->__min_needed_from = MIN_NEEDED_FROM;
     116  	  step->__max_needed_from = MIN_NEEDED_FROM;
     117  	  step->__min_needed_to = MIN_NEEDED_TO;
     118  	  step->__max_needed_to = MIN_NEEDED_TO;
     119  	}
     120        else
     121  	{
     122  	  step->__min_needed_from = MIN_NEEDED_TO;
     123  	  step->__max_needed_from = MIN_NEEDED_TO;
     124  	  step->__min_needed_to = MIN_NEEDED_FROM;
     125  	  step->__max_needed_to = MIN_NEEDED_FROM;
     126  	}
     127  
     128        step->__stateful = 0;
     129  
     130        result = __GCONV_OK;
     131      }
     132  
     133    return result;
     134  }
     135  
     136  
     137  extern void gconv_end (struct __gconv_step *data);
     138  void
     139  gconv_end (struct __gconv_step *data)
     140  {
     141    free (data->__data);
     142  }
     143  
     144  
     145  /* Convert from the internal (UCS4-like) format to UCS2.  */
     146  #define MIN_NEEDED_INPUT	MIN_NEEDED_TO
     147  #define MIN_NEEDED_OUTPUT	MIN_NEEDED_FROM
     148  #define LOOPFCT			TO_LOOP
     149  #define BODY \
     150    {									      \
     151      uint32_t c = get32 (inptr);						      \
     152  									      \
     153      if (__glibc_unlikely (c >= 0x10000))				      \
     154        {									      \
     155  	UNICODE_TAG_HANDLER (c, 4);					      \
     156  	STANDARD_TO_LOOP_ERR_HANDLER (4);				      \
     157        }									      \
     158      else if (__glibc_unlikely (c >= 0xd800 && c < 0xe000))		      \
     159        {									      \
     160  	/* Surrogate characters in UCS-4 input are not valid.		      \
     161  	   We must catch this, because the UCS-2 output might be	      \
     162  	   interpreted as UTF-16 by other programs.  If we let		      \
     163  	   surrogates pass through, attackers could make a security	      \
     164  	   hole exploit by synthesizing any desired plane 1-16		      \
     165  	   character.  */						      \
     166  	result = __GCONV_ILLEGAL_INPUT;					      \
     167  	if (! ignore_errors_p ())					      \
     168  	  break;							      \
     169  	inptr += 4;							      \
     170  	++*irreversible;						      \
     171  	continue;							      \
     172        }									      \
     173      else								      \
     174        {									      \
     175  	put16 (outptr, c);						      \
     176  	outptr += 2;							      \
     177        }									      \
     178  									      \
     179      inptr += 4;								      \
     180    }
     181  #define LOOP_NEED_FLAGS
     182  #define EXTRA_LOOP_DECLS \
     183  	, int swap
     184  #include <iconv/loop.c>
     185  
     186  
     187  /* Convert from UCS2 to the internal (UCS4-like) format.  */
     188  #define MIN_NEEDED_INPUT	MIN_NEEDED_FROM
     189  #define MIN_NEEDED_OUTPUT	MIN_NEEDED_TO
     190  #define LOOPFCT			FROM_LOOP
     191  #define BODY \
     192    {									      \
     193      uint16_t u1 = get16 (inptr);					      \
     194  									      \
     195      if (swap)								      \
     196        u1 = bswap_16 (u1);						      \
     197  									      \
     198      if (__glibc_unlikely (u1 >= 0xd800 && u1 < 0xe000))			      \
     199        {									      \
     200  	/* Surrogate characters in UCS-2 input are not valid.  Reject	      \
     201  	   them.  (Catching this here is not security relevant.)  */	      \
     202  	STANDARD_FROM_LOOP_ERR_HANDLER (2);				      \
     203        }									      \
     204  									      \
     205      put32 (outptr, u1);							      \
     206  									      \
     207      inptr += 2;								      \
     208      outptr += 4;							      \
     209    }
     210  #define LOOP_NEED_FLAGS
     211  #define EXTRA_LOOP_DECLS \
     212  	, int swap
     213  #include <iconv/loop.c>
     214  
     215  
     216  /* Now define the toplevel functions.  */
     217  #include <iconv/skeleton.c>