(root)/
glibc-2.38/
iconvdata/
ibm1364.c
       1  /* Conversion from and to IBM1364.
       2     Copyright (C) 2005-2023 Free Software Foundation, Inc.
       3     This file is part of the GNU C Library.
       4  
       5     The GNU C Library is free software; you can redistribute it and/or
       6     modify it under the terms of the GNU Lesser General Public
       7     License as published by the Free Software Foundation; either
       8     version 2.1 of the License, or (at your option) any later version.
       9  
      10     The GNU C Library is distributed in the hope that it will be useful,
      11     but WITHOUT ANY WARRANTY; without even the implied warranty of
      12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      13     Lesser General Public License for more details.
      14  
      15     You should have received a copy of the GNU Lesser General Public
      16     License along with the GNU C Library; if not, see
      17     <https://www.gnu.org/licenses/>.  */
      18  
      19  #include <dlfcn.h>
      20  #include <stdint.h>
      21  #include <wchar.h>
      22  #include <byteswap.h>
      23  
      24  #ifndef CHARSET_NAME
      25  /* This is really the IBM1364 converter, not another module sharing
      26     the code.  */
      27  # define DATA_HEADER	"ibm1364.h"
      28  # define CHARSET_NAME	"IBM1364//"
      29  # define FROM_LOOP	from_ibm1364
      30  # define TO_LOOP	to_ibm1364
      31  # define SB_TO_UCS4	__ibm1364sb_to_ucs4
      32  # define DB_TO_UCS4_IDX	__ibm1364db_to_ucs4_idx
      33  # define DB_TO_UCS4	__ibm1364db_to_ucs4
      34  # define UCS4_TO_SB_IDX	__ucs4_to_ibm1364sb_idx
      35  # define UCS4_TO_SB	__ucs4_to_ibm1364sb
      36  # define UCS4_TO_DB_IDX	__ucs4_to_ibm1364db_idx
      37  # define UCS4_TO_DB	__ucs4_to_ibm1364db
      38  # define UCS_LIMIT	0xffff
      39  #endif
      40  
      41  
      42  #include DATA_HEADER
      43  
      44  /* The shift sequences for this charset (it does not use ESC).  */
      45  #define SI 		0x0F  /* Shift In, host code to turn DBCS off.  */
      46  #define SO 		0x0E  /* Shift Out, host code to turn DBCS on.  */
      47  
      48  /* Definitions used in the body of the `gconv' function.  */
      49  #define MIN_NEEDED_FROM	1
      50  #define MAX_NEEDED_FROM	2
      51  #define MIN_NEEDED_TO	4
      52  #ifdef HAS_COMBINED
      53  # define MAX_NEEDED_TO	8
      54  #else
      55  # define MAX_NEEDED_TO	4
      56  #endif
      57  #define ONE_DIRECTION	0
      58  #define PREPARE_LOOP \
      59    int save_curcs;							      \
      60    int *curcsp = &data->__statep->__count;
      61  #define EXTRA_LOOP_ARGS		, curcsp
      62  
      63  /* Definitions of initialization and destructor function.  */
      64  #define DEFINE_INIT	1
      65  #define DEFINE_FINI	1
      66  
      67  
      68  /* Since this is a stateful encoding we have to provide code which resets
      69     the output state to the initial state.  This has to be done during the
      70     flushing.  */
      71  #define EMIT_SHIFT_TO_INIT \
      72    if ((data->__statep->__count & ~7) != sb)				      \
      73      {									      \
      74        if (FROM_DIRECTION)						      \
      75  	data->__statep->__count &= 7;					      \
      76        else								      \
      77  	{								      \
      78  	  /* We are not in the initial state.  To switch back we have	      \
      79  	     to emit `SI'.  */						      \
      80  	  if (__glibc_unlikely (outbuf >= outend))			      \
      81  	    /* We don't have enough room in the output buffer.  */	      \
      82  	    status = __GCONV_FULL_OUTPUT;				      \
      83  	  else								      \
      84  	    {								      \
      85  	      /* Write out the shift sequence.  */			      \
      86  	      *outbuf++ = SI;						      \
      87  	      data->__statep->__count &= 7;				      \
      88  	    }								      \
      89  	}								      \
      90      }
      91  
      92  
      93  /* Since we might have to reset input pointer we must be able to save
      94     and restore the state.  */
      95  #define SAVE_RESET_STATE(Save) \
      96    if (Save)								      \
      97      save_curcs = *curcsp;						      \
      98    else									      \
      99      *curcsp = save_curcs
     100  
     101  
     102  /* Current codeset type.  */
     103  enum
     104  {
     105    sb = 0,
     106    db = 64
     107  };
     108  
     109  
     110  /* Subroutine to write out converted UCS4 from IBM-13XX.  */
     111  #ifdef HAS_COMBINED
     112  # define SUB_COMBINED_UCS_FROM_IBM13XX \
     113    {									      \
     114      if (res != UCS_LIMIT || ch < __TO_UCS4_COMBINED_MIN			      \
     115  	|| ch > __TO_UCS4_COMBINED_MAX)					      \
     116        {									      \
     117  	put32 (outptr, res);						      \
     118  	outptr += 4;							      \
     119        }									      \
     120      else								      \
     121        {									      \
     122  	/* This is a combined character.  Make sure we have room.  */	      \
     123  	if (__glibc_unlikely (outptr + 8 > outend))			      \
     124  	  {								      \
     125  	    result = __GCONV_FULL_OUTPUT;				      \
     126  	    break;							      \
     127  	  }								      \
     128  									      \
     129  	const struct divide *cmbp					      \
     130  	  = &DB_TO_UCS4_COMB[ch - __TO_UCS4_COMBINED_MIN];		      \
     131  	assert (cmbp->res1 != 0 && cmbp->res2 != 0);			      \
     132  									      \
     133  	put32 (outptr, cmbp->res1);					      \
     134  	outptr += 4;							      \
     135  	put32 (outptr, cmbp->res2);					      \
     136  	outptr += 4;							      \
     137        }									      \
     138    }
     139  #else
     140  # define SUB_COMBINED_UCS_FROM_IBM13XX \
     141    {									      \
     142      put32 (outptr, res);						      \
     143      outptr += 4;							      \
     144    }
     145  #endif /* HAS_COMBINED */
     146  
     147  
     148  /* First, define the conversion function from IBM-13XX to UCS4.  */
     149  #define MIN_NEEDED_INPUT  	MIN_NEEDED_FROM
     150  #define MAX_NEEDED_INPUT  	MAX_NEEDED_FROM
     151  #define MIN_NEEDED_OUTPUT 	MIN_NEEDED_TO
     152  #define MAX_NEEDED_OUTPUT 	MAX_NEEDED_TO
     153  #define LOOPFCT 		FROM_LOOP
     154  #define BODY \
     155    {									      \
     156      uint32_t ch = *inptr;						      \
     157  									      \
     158      if (__builtin_expect (ch, 0) == SO)					      \
     159        {									      \
     160  	/* Shift OUT, change to DBCS converter (redundant escape okay).  */   \
     161  	curcs = db;							      \
     162  	++inptr;							      \
     163  	continue;							      \
     164        }									      \
     165      if (__builtin_expect (ch, 0) == SI)					      \
     166        {									      \
     167  	/* Shift IN, change to SBCS converter (redundant escape okay).  */    \
     168  	curcs = sb;							      \
     169  	++inptr;							      \
     170  	continue;							      \
     171        }									      \
     172  									      \
     173      if (curcs == sb)							      \
     174        {									      \
     175  	/* Use the IBM13XX table for single byte.  */			      \
     176  	uint32_t res = SB_TO_UCS4[ch];				      \
     177  	if (__builtin_expect (res, L'\1') == L'\0' && ch != '\0')	      \
     178  	  {								      \
     179  	    /* This is an illegal character.  */			      \
     180  	    if (! ignore_errors_p ())					      \
     181  	      {								      \
     182  		result = __GCONV_ILLEGAL_INPUT;				      \
     183  		break;							      \
     184  	      }								      \
     185  	    ++*irreversible;						      \
     186  	  }								      \
     187  	else								      \
     188  	  {								      \
     189  	    put32 (outptr, res);					      \
     190  	    outptr += 4;						      \
     191  	  }								      \
     192  	++inptr;							      \
     193        }									      \
     194      else								      \
     195        {									      \
     196  	assert (curcs == db);						      \
     197  									      \
     198  	if (__glibc_unlikely (inptr + 1 >= inend))			      \
     199  	  {								      \
     200  	    /* The second character is not available.  Store the	      \
     201  	       intermediate result.  */					      \
     202  	    result = __GCONV_INCOMPLETE_INPUT;				      \
     203  	    break;							      \
     204  	  }								      \
     205  									      \
     206  	ch = (ch * 0x100) + inptr[1];					      \
     207  									      \
     208  	/* Use the IBM1364 table for double byte.  */			      \
     209  	const struct gap *rp2 = DB_TO_UCS4_IDX;				      \
     210  	while (ch > rp2->end)						      \
     211  	  ++rp2;							      \
     212  									      \
     213  	uint32_t res;							      \
     214  	if (__builtin_expect (rp2->start == 0xffff, 0)			      \
     215  	    || __builtin_expect (ch < rp2->start, 0)			      \
     216  	    || (res = DB_TO_UCS4[ch + rp2->idx],			      \
     217  		__builtin_expect (res, L'\1') == L'\0' && ch != '\0'))	      \
     218  	  {								      \
     219  	    /* This is an illegal character.  */			      \
     220  	    if (! ignore_errors_p ())					      \
     221  	      {								      \
     222  		result = __GCONV_ILLEGAL_INPUT;				      \
     223  		break;							      \
     224  	      }								      \
     225  	    ++*irreversible;						      \
     226  	  }								      \
     227  	else								      \
     228  	  {								      \
     229  	    SUB_COMBINED_UCS_FROM_IBM13XX;				      \
     230  	  }								      \
     231  	inptr += 2;							      \
     232        }									      \
     233    }
     234  #define LOOP_NEED_FLAGS
     235  #define EXTRA_LOOP_DECLS	, int *curcsp
     236  #define INIT_PARAMS		int curcs = *curcsp & ~7
     237  #define UPDATE_PARAMS		*curcsp = curcs
     238  #include <iconv/loop.c>
     239  
     240  
     241  /* Subroutine to convert two UCS4 codes to IBM-13XX.  */
     242  #ifdef HAS_COMBINED
     243  # define SUB_COMBINED_UCS_TO_IBM13XX \
     244    {									      \
     245      const struct combine *cmbp = UCS4_COMB_TO_DB;			      \
     246      while (cmbp->res1 < ch)						      \
     247        ++cmbp;								      \
     248      /* XXX if last char is beginning of combining store in state */	      \
     249      if (cmbp->res1 == ch && inptr + 4 < inend)				      \
     250        {									      \
     251  	/* See if input is part of a combined character.  */		      \
     252  	uint32_t ch_next = get32 (inptr + 4);				      \
     253  	while (cmbp->res2 != ch_next)					      \
     254  	  {								      \
     255  	    ++cmbp;							      \
     256  	    if (cmbp->res1 != ch)					      \
     257  	      goto not_combined;					      \
     258  	  }								      \
     259  									      \
     260  	/* It is a combined character.  First make sure we are in	      \
     261  	   double byte mode.  */					      \
     262  	if (curcs == sb)						      \
     263  	  {								      \
     264  	    /* We know there is room for at least one byte.  */		      \
     265  	    *outptr++ = SO;						      \
     266  	    curcs = db;							      \
     267  	  }								      \
     268  									      \
     269  	if (__glibc_unlikely (outptr + 2 > outend))			      \
     270  	  {								      \
     271  	    result = __GCONV_FULL_OUTPUT;				      \
     272  	    break;							      \
     273  	  }								      \
     274  	*outptr++ = cmbp->ch[0];					      \
     275  	*outptr++ = cmbp->ch[1];					      \
     276  	inptr += 8;							      \
     277  	continue;							      \
     278  									      \
     279        not_combined:;							      \
     280        }									      \
     281    }
     282  #else
     283  # define SUB_COMBINED_UCS_TO_IBM13XX
     284  #endif /* HAS_COMBINED */
     285  
     286  
     287  /* Next, define the other direction.  */
     288  #define MIN_NEEDED_INPUT	MIN_NEEDED_TO
     289  #define MAX_NEEDED_INPUT  	MAX_NEEDED_TO
     290  #define MIN_NEEDED_OUTPUT	MIN_NEEDED_FROM
     291  #define MAX_NEEDED_OUTPUT	MAX_NEEDED_FROM
     292  #define LOOPFCT			TO_LOOP
     293  #define BODY \
     294    {									      \
     295      uint32_t ch = get32 (inptr);					      \
     296  									      \
     297      if (__glibc_unlikely (ch >= UCS_LIMIT))				      \
     298        {									      \
     299  	UNICODE_TAG_HANDLER (ch, 4);					      \
     300  									      \
     301  	if (! ignore_errors_p ())					      \
     302  	  {								      \
     303  	    result = __GCONV_ILLEGAL_INPUT;				      \
     304  	    break;							      \
     305  	  }								      \
     306  	++*irreversible;						      \
     307  	inptr += 4;							      \
     308  	continue;							      \
     309        }									      \
     310  									      \
     311      SUB_COMBINED_UCS_TO_IBM13XX;					      \
     312  									      \
     313      const struct gap *rp1 = UCS4_TO_SB_IDX;				      \
     314      while (ch > rp1->end)						      \
     315        ++rp1;								      \
     316  									      \
     317      /* Use the UCS4 table for single byte.  */				      \
     318      const char *cp;							      \
     319      if (__builtin_expect (ch < rp1->start, 0)				      \
     320  	|| (cp = UCS4_TO_SB[ch + rp1->idx],				      \
     321  	    __builtin_expect (cp[0], L'\1') == L'\0' && ch != '\0'))	      \
     322        {									      \
     323  	/* Use the UCS4 table for double byte.  */			      \
     324  	const struct gap *rp2 = UCS4_TO_DB_IDX;				      \
     325  	while (ch > rp2->end)						      \
     326  	  ++rp2;							      \
     327  									      \
     328  	if (__builtin_expect (ch < rp2->start, 0)			      \
     329  	    || (cp = UCS4_TO_DB[ch + rp2->idx],				      \
     330  		__builtin_expect (cp[0], L'\1') == L'\0' && ch != '\0'))      \
     331  	  {								      \
     332  	    /* This is an illegal character.  */			      \
     333  	    if (! ignore_errors_p ())					      \
     334  	      {								      \
     335  		result = __GCONV_ILLEGAL_INPUT;				      \
     336  		break;							      \
     337  	      }								      \
     338  	    ++*irreversible;						      \
     339  	  }								      \
     340  	else								      \
     341  	  {								      \
     342  	    if (curcs == sb)						      \
     343  	      {								      \
     344  		/* We know there is room for at least one byte.  */	      \
     345  		*outptr++ = SO;						      \
     346  		curcs = db;						      \
     347  	      }								      \
     348  									      \
     349  	    if (__glibc_unlikely (outptr + 2 > outend))			      \
     350  	      {								      \
     351  		result = __GCONV_FULL_OUTPUT;				      \
     352  		break;							      \
     353  	      }								      \
     354  	    *outptr++ = cp[0];						      \
     355  	    *outptr++ = cp[1];						      \
     356  	  }								      \
     357        }									      \
     358      else								      \
     359        {									      \
     360  	if (__glibc_unlikely (curcs == db))				      \
     361  	  {								      \
     362  	    /* We know there is room for at least one byte.  */		      \
     363  	    *outptr++ = SI;						      \
     364  	    curcs = sb;							      \
     365  									      \
     366  	    if (__glibc_unlikely (outptr >= outend))			      \
     367  	      {								      \
     368  		result = __GCONV_FULL_OUTPUT;				      \
     369  		break;							      \
     370  	      }								      \
     371  	  }								      \
     372  									      \
     373  	*outptr++ = cp[0];						      \
     374        }									      \
     375  									      \
     376      /* Now that we wrote the output increment the input pointer.  */	      \
     377      inptr += 4;								      \
     378    }
     379  #define LOOP_NEED_FLAGS
     380  #define EXTRA_LOOP_DECLS	, int *curcsp
     381  #define INIT_PARAMS		int curcs = *curcsp & ~7
     382  #define REINIT_PARAMS		curcs = *curcsp & ~7
     383  #define UPDATE_PARAMS		*curcsp = curcs
     384  #include <iconv/loop.c>
     385  
     386  /* Now define the toplevel functions.  */
     387  #include <iconv/skeleton.c>