(root)/
glibc-2.38/
iconvdata/
iso-2022-kr.c
       1  /* Conversion module for ISO-2022-KR.
       2     Copyright (C) 1998-2023 Free Software Foundation, Inc.
       3     This file is part of the GNU C Library.
       4  
       5     The GNU C Library is free software; you can redistribute it and/or
       6     modify it under the terms of the GNU Lesser General Public
       7     License as published by the Free Software Foundation; either
       8     version 2.1 of the License, or (at your option) any later version.
       9  
      10     The GNU C Library is distributed in the hope that it will be useful,
      11     but WITHOUT ANY WARRANTY; without even the implied warranty of
      12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      13     Lesser General Public License for more details.
      14  
      15     You should have received a copy of the GNU Lesser General Public
      16     License along with the GNU C Library; if not, see
      17     <https://www.gnu.org/licenses/>.  */
      18  
      19  #include <dlfcn.h>
      20  #include <gconv.h>
      21  #include <stdint.h>
      22  #include <string.h>
      23  #include "ksc5601.h"
      24  
      25  #include <assert.h>
      26  
      27  /* This makes obvious what everybody knows: 0x1b is the Esc character.  */
      28  #define ESC	0x1b
      29  
      30  /* The shift sequences for this charset (it does not use ESC).  */
      31  #define SI	0x0f
      32  #define SO	0x0e
      33  
      34  /* Definitions used in the body of the `gconv' function.  */
      35  #define CHARSET_NAME		"ISO-2022-KR//"
      36  #define DEFINE_INIT		1
      37  #define DEFINE_FINI		1
      38  #define FROM_LOOP		from_iso2022kr_loop
      39  #define TO_LOOP			to_iso2022kr_loop
      40  #define MIN_NEEDED_FROM		1
      41  #define MAX_NEEDED_FROM		4
      42  #define MIN_NEEDED_TO		4
      43  #define MAX_NEEDED_TO		4
      44  #define ONE_DIRECTION		0
      45  #define PREPARE_LOOP \
      46    int save_set;								      \
      47    int *setp = &data->__statep->__count;					      \
      48    if (!FROM_DIRECTION && !data->__internal_use				      \
      49        && data->__invocation_counter == 0)				      \
      50      {									      \
      51        /* Emit the designator sequence.  */				      \
      52        if (outbuf + 4 > outend)						      \
      53  	return __GCONV_FULL_OUTPUT;					      \
      54  									      \
      55        *outbuf++ = ESC;							      \
      56        *outbuf++ = '$';							      \
      57        *outbuf++ = ')';							      \
      58        *outbuf++ = 'C';							      \
      59      }
      60  #define EXTRA_LOOP_ARGS		, setp
      61  
      62  
      63  /* The COUNT element of the state keeps track of the currently selected
      64     character set.  The possible values are:  */
      65  enum
      66  {
      67    ASCII_set = 0,
      68    KSC5601_set = 8
      69  };
      70  
      71  
      72  /* Since this is a stateful encoding we have to provide code which resets
      73     the output state to the initial state.  This has to be done during the
      74     flushing.  */
      75  #define EMIT_SHIFT_TO_INIT \
      76    if (data->__statep->__count != ASCII_set)				      \
      77      {									      \
      78        if (FROM_DIRECTION)						      \
      79  	{								      \
      80  	  /* It's easy, we don't have to emit anything, we just reset the     \
      81  	     state for the input.  */					      \
      82  	  data->__statep->__count &= 7;					      \
      83  	  data->__statep->__count |= ASCII_set;				      \
      84  	}								      \
      85        else								      \
      86  	{								      \
      87  	  /* We are not in the initial state.  To switch back we have	      \
      88  	     to emit `SI'.  */						      \
      89  	  if (__glibc_unlikely (outbuf == outend))			      \
      90  	    /* We don't have enough room in the output buffer.  */	      \
      91  	    status = __GCONV_FULL_OUTPUT;				      \
      92  	  else								      \
      93  	    {								      \
      94  	      /* Write out the shift sequence.  */			      \
      95  	      *outbuf++ = SI;						      \
      96  	      data->__statep->__count = ASCII_set;			      \
      97  	    }								      \
      98  	}								      \
      99      }
     100  
     101  
     102  /* Since we might have to reset input pointer we must be able to save
     103     and restore the state.  */
     104  #define SAVE_RESET_STATE(Save) \
     105    if (Save)								      \
     106      save_set = *setp;							      \
     107    else									      \
     108      *setp = save_set
     109  
     110  
     111  /* First define the conversion function from ISO-2022-KR to UCS4.  */
     112  #define MIN_NEEDED_INPUT	MIN_NEEDED_FROM
     113  #define MAX_NEEDED_INPUT	MAX_NEEDED_FROM
     114  #define MIN_NEEDED_OUTPUT	MIN_NEEDED_TO
     115  #define LOOPFCT			FROM_LOOP
     116  #define BODY \
     117    {									      \
     118      uint32_t ch = *inptr;						      \
     119  									      \
     120      /* This is a 7bit character set, disallow all 8bit characters.  */	      \
     121      if (__glibc_unlikely (ch > 0x7f))					      \
     122        STANDARD_FROM_LOOP_ERR_HANDLER (1);				      \
     123  									      \
     124      /* Recognize escape sequences.  */					      \
     125      if (__builtin_expect (ch, 0) == ESC)				      \
     126        {									      \
     127  	/* We don't really have to handle escape sequences since all the      \
     128  	   switching is done using the SI and SO bytes.  But we have to	      \
     129  	   recognize `Esc $ ) C' since this is a kind of flag for this	      \
     130  	   encoding.  We simply ignore it.  */				      \
     131  	if (__builtin_expect (inptr + 2 > inend, 0)			      \
     132  	    || (inptr[1] == '$'						      \
     133  		&& (__builtin_expect (inptr + 3 > inend, 0)		      \
     134  		    || (inptr[2] == ')'					      \
     135  			&& __builtin_expect (inptr + 4 > inend, 0)))))	      \
     136  	  {								      \
     137  	    result = __GCONV_INCOMPLETE_INPUT;				      \
     138  	    break;							      \
     139  	  }								      \
     140  	if (inptr[1] == '$' && inptr[2] == ')' && inptr[3] == 'C')	      \
     141  	  {								      \
     142  	    /* Yeah, yeah, we know this is ISO 2022-KR.  */		      \
     143  	    inptr += 4;							      \
     144  	    continue;							      \
     145  	  }								      \
     146        }									      \
     147      else if (__builtin_expect (ch, 0) == SO)				      \
     148        {									      \
     149  	/* Switch to use KSC.  */					      \
     150  	++inptr;							      \
     151  	set = KSC5601_set;						      \
     152  	continue;							      \
     153        }									      \
     154      else if (__builtin_expect (ch, 0) == SI)				      \
     155        {									      \
     156  	/* Switch to use ASCII.  */					      \
     157  	++inptr;							      \
     158  	set = ASCII_set;						      \
     159  	continue;							      \
     160        }									      \
     161  									      \
     162      if (set == ASCII_set)						      \
     163        {									      \
     164  	/* Almost done, just advance the input pointer.  */		      \
     165  	++inptr;							      \
     166        }									      \
     167      else								      \
     168        {									      \
     169  	assert (set == KSC5601_set);					      \
     170  									      \
     171  	/* Use the KSC 5601 table.  */					      \
     172  	ch = ksc5601_to_ucs4 (&inptr, inend - inptr, 0);		      \
     173  									      \
     174  	if (__glibc_unlikely (ch == 0))					      \
     175  	  {								      \
     176  	    result = __GCONV_INCOMPLETE_INPUT;				      \
     177  	    break;							      \
     178  	  }								      \
     179  	else if (__glibc_unlikely (ch == __UNKNOWN_10646_CHAR))		      \
     180  	  {								      \
     181  	    STANDARD_FROM_LOOP_ERR_HANDLER (1);				      \
     182  	  }								      \
     183        }									      \
     184  									      \
     185      put32 (outptr, ch);							      \
     186      outptr += 4;							      \
     187    }
     188  #define LOOP_NEED_FLAGS
     189  #define EXTRA_LOOP_DECLS	, int *setp
     190  #define INIT_PARAMS		int set = *setp
     191  #define UPDATE_PARAMS		*setp = set
     192  #include <iconv/loop.c>
     193  
     194  
     195  /* Next, define the other direction.  */
     196  #define MIN_NEEDED_INPUT	MIN_NEEDED_TO
     197  #define MIN_NEEDED_OUTPUT	MIN_NEEDED_FROM
     198  #define MAX_NEEDED_OUTPUT	MAX_NEEDED_FROM
     199  #define LOOPFCT			TO_LOOP
     200  #define BODY \
     201    {									      \
     202      uint32_t ch = get32 (inptr);					      \
     203  									      \
     204      /* First see whether we can write the character using the currently	      \
     205         selected character set.  */					      \
     206      if (ch < 0x80)							      \
     207        {									      \
     208  	if (set != ASCII_set)						      \
     209  	  {								      \
     210  	    *outptr++ = SI;						      \
     211  	    set = ASCII_set;						      \
     212  	    if (__glibc_unlikely (outptr == outend))			      \
     213  	      {								      \
     214  		result = __GCONV_FULL_OUTPUT;				      \
     215  		break;							      \
     216  	      }								      \
     217  	  }								      \
     218   									      \
     219  	*outptr++ = ch;							      \
     220        }									      \
     221      else								      \
     222        {									      \
     223  	unsigned char buf[2];						      \
     224  	/* Fake initialization to keep gcc quiet.  */			      \
     225  	asm ("" : "=m" (buf));						      \
     226  									      \
     227  	size_t written = ucs4_to_ksc5601 (ch, buf, 2);			      \
     228  	if (__builtin_expect (written, 0) == __UNKNOWN_10646_CHAR)	      \
     229  	  {								      \
     230  	    UNICODE_TAG_HANDLER (ch, 4);				      \
     231  									      \
     232  	    /* Illegal character.  */					      \
     233  	    STANDARD_TO_LOOP_ERR_HANDLER (4);				      \
     234  	  }								      \
     235  	else								      \
     236  	  {								      \
     237  	    assert (written == 2);					      \
     238  									      \
     239  	    /* We use KSC 5601.  */					      \
     240  	    if (set != KSC5601_set)					      \
     241  	      {								      \
     242  		*outptr++ = SO;						      \
     243  		set = KSC5601_set;					      \
     244  	      }								      \
     245  									      \
     246  	    if (__glibc_unlikely (outptr + 2 > outend))			      \
     247  	      {								      \
     248  		result = __GCONV_FULL_OUTPUT;				      \
     249  		break;							      \
     250  	      }								      \
     251  									      \
     252  	    *outptr++ = buf[0];						      \
     253  	    *outptr++ = buf[1];						      \
     254  	  }								      \
     255        }									      \
     256  									      \
     257      /* Now that we wrote the output increment the input pointer.  */	      \
     258      inptr += 4;								      \
     259    }
     260  #define LOOP_NEED_FLAGS
     261  #define EXTRA_LOOP_DECLS	, int *setp
     262  #define INIT_PARAMS		int set = *setp
     263  #define REINIT_PARAMS		set = *setp
     264  #define UPDATE_PARAMS		*setp = set
     265  #include <iconv/loop.c>
     266  
     267  
     268  /* Now define the toplevel functions.  */
     269  #include <iconv/skeleton.c>