(root)/
glibc-2.38/
iconvdata/
euc-jisx0213.c
       1  /* Conversion from and to EUC-JISX0213.
       2     Copyright (C) 2002-2023 Free Software Foundation, Inc.
       3     This file is part of the GNU C Library.
       4  
       5     The GNU C Library is free software; you can redistribute it and/or
       6     modify it under the terms of the GNU Lesser General Public
       7     License as published by the Free Software Foundation; either
       8     version 2.1 of the License, or (at your option) any later version.
       9  
      10     The GNU C Library is distributed in the hope that it will be useful,
      11     but WITHOUT ANY WARRANTY; without even the implied warranty of
      12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      13     Lesser General Public License for more details.
      14  
      15     You should have received a copy of the GNU Lesser General Public
      16     License along with the GNU C Library; if not, see
      17     <https://www.gnu.org/licenses/>.  */
      18  
      19  #include <dlfcn.h>
      20  #include <stdint.h>
      21  #include <gconv.h>
      22  
      23  /* The structure of EUC-JISX0213 is as follows:
      24  
      25     0x00..0x7F: ASCII
      26  
      27     0x8E{A1..FE}: JISX0201 Katakana, with prefix 0x8E, offset by +0x80.
      28  
      29     0x8F{A1..FE}{A1..FE}: JISX0213 plane 2, with prefix 0x8F, offset by +0x8080.
      30  
      31     0x{A1..FE}{A1..FE}: JISX0213 plane 1, offset by +0x8080.
      32  
      33     Note that some JISX0213 characters are not contained in Unicode 3.2
      34     and are therefore best represented as sequences of Unicode characters.
      35  */
      36  
      37  #include "jisx0213.h"
      38  
      39  /* Definitions used in the body of the `gconv' function.  */
      40  #define CHARSET_NAME		"EUC-JISX0213//"
      41  #define FROM_LOOP		from_euc_jisx0213
      42  #define TO_LOOP			to_euc_jisx0213
      43  #define DEFINE_INIT		1
      44  #define DEFINE_FINI		1
      45  #define ONE_DIRECTION		0
      46  #define FROM_LOOP_MIN_NEEDED_FROM	1
      47  #define FROM_LOOP_MAX_NEEDED_FROM	3
      48  #define FROM_LOOP_MIN_NEEDED_TO		4
      49  #define FROM_LOOP_MAX_NEEDED_TO		8
      50  #define TO_LOOP_MIN_NEEDED_FROM		4
      51  #define TO_LOOP_MAX_NEEDED_FROM		4
      52  #define TO_LOOP_MIN_NEEDED_TO		1
      53  #define TO_LOOP_MAX_NEEDED_TO		3
      54  #define PREPARE_LOOP \
      55    int saved_state;							      \
      56    int *statep = &data->__statep->__count;
      57  #define EXTRA_LOOP_ARGS		, statep
      58  
      59  
      60  /* Since we might have to reset input pointer we must be able to save
      61     and restore the state.  */
      62  #define SAVE_RESET_STATE(Save) \
      63    if (Save)								      \
      64      saved_state = *statep;						      \
      65    else									      \
      66      *statep = saved_state
      67  
      68  
      69  /* During EUC-JISX0213 to UCS-4 conversion, the COUNT element of the state
      70     contains the last UCS-4 character, shifted by 3 bits.
      71     During UCS-4 to EUC-JISX0213 conversion, the COUNT element of the state
      72     contains the last two bytes to be output, shifted by 3 bits.  */
      73  
      74  /* Since this is a stateful encoding we have to provide code which resets
      75     the output state to the initial state.  This has to be done during the
      76     flushing.  */
      77  #define EMIT_SHIFT_TO_INIT \
      78    if (data->__statep->__count != 0)					      \
      79      {									      \
      80        if (FROM_DIRECTION)						      \
      81  	{								      \
      82  	  if (__glibc_likely (outbuf + 4 <= outend))			      \
      83  	    {								      \
      84  	      /* Write out the last character.  */			      \
      85  	      *((uint32_t *) outbuf) = data->__statep->__count >> 3;	      \
      86  	      outbuf += sizeof (uint32_t);				      \
      87  	      data->__statep->__count = 0;				      \
      88  	    }								      \
      89  	  else								      \
      90  	    /* We don't have enough room in the output buffer.  */	      \
      91  	    status = __GCONV_FULL_OUTPUT;				      \
      92  	}								      \
      93        else								      \
      94  	{								      \
      95  	  if (__glibc_likely (outbuf + 2 <= outend))			      \
      96  	    {								      \
      97  	      /* Write out the last character.  */			      \
      98  	      uint32_t lasttwo = data->__statep->__count >> 3;		      \
      99  	      *outbuf++ = (lasttwo >> 8) & 0xff;			      \
     100  	      *outbuf++ = lasttwo & 0xff;				      \
     101  	      data->__statep->__count = 0;				      \
     102  	    }								      \
     103  	  else								      \
     104  	    /* We don't have enough room in the output buffer.  */	      \
     105  	    status = __GCONV_FULL_OUTPUT;				      \
     106  	}								      \
     107      }
     108  
     109  
     110  /* First define the conversion function from EUC-JISX0213 to UCS-4.  */
     111  #define MIN_NEEDED_INPUT	FROM_LOOP_MIN_NEEDED_FROM
     112  #define MAX_NEEDED_INPUT	FROM_LOOP_MAX_NEEDED_FROM
     113  #define MIN_NEEDED_OUTPUT	FROM_LOOP_MIN_NEEDED_TO
     114  #define MAX_NEEDED_OUTPUT	FROM_LOOP_MAX_NEEDED_TO
     115  #define LOOPFCT			FROM_LOOP
     116  #define BODY \
     117    {									      \
     118      uint32_t ch;							      \
     119  									      \
     120      /* Determine whether there is a buffered character pending.  */	      \
     121      ch = *statep >> 3;							      \
     122      if (__glibc_likely (ch == 0))					      \
     123        {									      \
     124  	/* No - so look at the next input byte.  */			      \
     125  	ch = *inptr;							      \
     126  									      \
     127  	if (ch < 0x80)							      \
     128  	  /* Plain ASCII character.  */					      \
     129  	  ++inptr;							      \
     130  	else if ((ch >= 0xa1 && ch <= 0xfe) || ch == 0x8e || ch == 0x8f)      \
     131  	  {								      \
     132  	    /* Two or three byte character.  */				      \
     133  	    uint32_t ch2;						      \
     134  									      \
     135  	    if (__glibc_unlikely (inptr + 1 >= inend))			      \
     136  	      {								      \
     137  		/* The second byte is not available.  */		      \
     138  		result = __GCONV_INCOMPLETE_INPUT;			      \
     139  		break;							      \
     140  	      }								      \
     141  									      \
     142  	    ch2 = inptr[1];						      \
     143  									      \
     144  	    /* The second byte must be >= 0xa1 and <= 0xfe.  */		      \
     145  	    if (__glibc_unlikely (ch2 < 0xa1 || ch2 > 0xfe))		      \
     146  	      {								      \
     147  		/* This is an illegal character.  */			      \
     148  		STANDARD_FROM_LOOP_ERR_HANDLER (1);			      \
     149  	      }								      \
     150  									      \
     151  	    if (ch == 0x8e)						      \
     152  	      {								      \
     153  		/* Half-width katakana.  */				      \
     154  		if (__glibc_unlikely (ch2 > 0xdf))			      \
     155  		  STANDARD_FROM_LOOP_ERR_HANDLER (1);			      \
     156  									      \
     157  		ch = ch2 + 0xfec0;					      \
     158  		inptr += 2;						      \
     159  	      }								      \
     160  	    else							      \
     161  	      {								      \
     162  		const unsigned char *endp;				      \
     163  									      \
     164  		if (ch == 0x8f)						      \
     165  		  {							      \
     166  		    /* JISX 0213 plane 2.  */				      \
     167  		    uint32_t ch3;					      \
     168  									      \
     169  		    if (__glibc_unlikely (inptr + 2 >= inend))		      \
     170  		      {							      \
     171  			/* The third byte is not available.  */		      \
     172  			result = __GCONV_INCOMPLETE_INPUT;		      \
     173  			break;						      \
     174  		      }							      \
     175  									      \
     176  		    ch3 = inptr[2];					      \
     177  		    endp = inptr + 3;					      \
     178  									      \
     179  		    ch = jisx0213_to_ucs4 (0x200 - 0x80 + ch2, ch3 ^ 0x80);   \
     180  		  }							      \
     181  		else							      \
     182  		  {							      \
     183  		    /* JISX 0213 plane 1.  */				      \
     184  		    endp = inptr + 2;					      \
     185  									      \
     186  		    ch = jisx0213_to_ucs4 (0x100 - 0x80 + ch, ch2 ^ 0x80);    \
     187  		  }							      \
     188  									      \
     189  		if (ch == 0)						      \
     190  		  /* This is an illegal character.  */			      \
     191  		  STANDARD_FROM_LOOP_ERR_HANDLER (1);			      \
     192  									      \
     193  		inptr = endp;						      \
     194  									      \
     195  		if (ch < 0x80)						      \
     196  		  {							      \
     197  		    /* It's a combining character.  */			      \
     198  		    uint32_t u1 = __jisx0213_to_ucs_combining[ch - 1][0];     \
     199  		    uint32_t u2 = __jisx0213_to_ucs_combining[ch - 1][1];     \
     200  									      \
     201  		    put32 (outptr, u1);					      \
     202  		    outptr += 4;					      \
     203  									      \
     204  		    /* See whether we have room for two characters.  */	      \
     205  		    if (outptr + 4 <= outend)				      \
     206  		      {							      \
     207  			put32 (outptr, u2);				      \
     208  			outptr += 4;					      \
     209  			continue;					      \
     210  		      }							      \
     211  									      \
     212  		    /* Otherwise store only the first character now, and      \
     213  		       put the second one into the queue.  */		      \
     214  		    *statep = u2 << 3;					      \
     215  		    /* Tell the caller why we terminate the loop.  */	      \
     216  		    result = __GCONV_FULL_OUTPUT;			      \
     217  		    break;						      \
     218  		  }							      \
     219  	      }								      \
     220  	  }								      \
     221  	else								      \
     222  	  {								      \
     223  	    /* This is illegal.  */					      \
     224  	    STANDARD_FROM_LOOP_ERR_HANDLER (1);				      \
     225  	  }								      \
     226        }									      \
     227  									      \
     228      put32 (outptr, ch);							      \
     229      outptr += 4;							      \
     230    }
     231  #define LOOP_NEED_FLAGS
     232  #define EXTRA_LOOP_DECLS	, int *statep
     233  #define ONEBYTE_BODY \
     234    {									      \
     235      if (c < 0x80)							      \
     236        return c;								      \
     237      else								      \
     238        return WEOF;							      \
     239    }
     240  #include <iconv/loop.c>
     241  
     242  
     243  /* Next, define the other direction, from UCS-4 to EUC-JISX0213.  */
     244  
     245  /* Composition tables for each of the relevant combining characters.  */
     246  static const struct
     247  {
     248    uint16_t base;
     249    uint16_t composed;
     250  } comp_table_data[] =
     251  {
     252  #define COMP_TABLE_IDX_02E5 0
     253  #define COMP_TABLE_LEN_02E5 1
     254    { 0xabe4, 0xabe5 }, /* 0x12B65 = 0x12B64 U+02E5 */
     255  #define COMP_TABLE_IDX_02E9 (COMP_TABLE_IDX_02E5 + COMP_TABLE_LEN_02E5)
     256  #define COMP_TABLE_LEN_02E9 1
     257    { 0xabe0, 0xabe6 }, /* 0x12B66 = 0x12B60 U+02E9 */
     258  #define COMP_TABLE_IDX_0300 (COMP_TABLE_IDX_02E9 + COMP_TABLE_LEN_02E9)
     259  #define COMP_TABLE_LEN_0300 5
     260    { 0xa9dc, 0xabc4 }, /* 0x12B44 = 0x1295C U+0300 */
     261    { 0xabb8, 0xabc8 }, /* 0x12B48 = 0x12B38 U+0300 */
     262    { 0xabb7, 0xabca }, /* 0x12B4A = 0x12B37 U+0300 */
     263    { 0xabb0, 0xabcc }, /* 0x12B4C = 0x12B30 U+0300 */
     264    { 0xabc3, 0xabce }, /* 0x12B4E = 0x12B43 U+0300 */
     265  #define COMP_TABLE_IDX_0301 (COMP_TABLE_IDX_0300 + COMP_TABLE_LEN_0300)
     266  #define COMP_TABLE_LEN_0301 4
     267    { 0xabb8, 0xabc9 }, /* 0x12B49 = 0x12B38 U+0301 */
     268    { 0xabb7, 0xabcb }, /* 0x12B4B = 0x12B37 U+0301 */
     269    { 0xabb0, 0xabcd }, /* 0x12B4D = 0x12B30 U+0301 */
     270    { 0xabc3, 0xabcf }, /* 0x12B4F = 0x12B43 U+0301 */
     271  #define COMP_TABLE_IDX_309A (COMP_TABLE_IDX_0301 + COMP_TABLE_LEN_0301)
     272  #define COMP_TABLE_LEN_309A 14
     273    { 0xa4ab, 0xa4f7 }, /* 0x12477 = 0x1242B U+309A */
     274    { 0xa4ad, 0xa4f8 }, /* 0x12478 = 0x1242D U+309A */
     275    { 0xa4af, 0xa4f9 }, /* 0x12479 = 0x1242F U+309A */
     276    { 0xa4b1, 0xa4fa }, /* 0x1247A = 0x12431 U+309A */
     277    { 0xa4b3, 0xa4fb }, /* 0x1247B = 0x12433 U+309A */
     278    { 0xa5ab, 0xa5f7 }, /* 0x12577 = 0x1252B U+309A */
     279    { 0xa5ad, 0xa5f8 }, /* 0x12578 = 0x1252D U+309A */
     280    { 0xa5af, 0xa5f9 }, /* 0x12579 = 0x1252F U+309A */
     281    { 0xa5b1, 0xa5fa }, /* 0x1257A = 0x12531 U+309A */
     282    { 0xa5b3, 0xa5fb }, /* 0x1257B = 0x12533 U+309A */
     283    { 0xa5bb, 0xa5fc }, /* 0x1257C = 0x1253B U+309A */
     284    { 0xa5c4, 0xa5fd }, /* 0x1257D = 0x12544 U+309A */
     285    { 0xa5c8, 0xa5fe }, /* 0x1257E = 0x12548 U+309A */
     286    { 0xa6f5, 0xa6f8 }, /* 0x12678 = 0x12675 U+309A */
     287  };
     288  
     289  #define MIN_NEEDED_INPUT	TO_LOOP_MIN_NEEDED_FROM
     290  #define MAX_NEEDED_INPUT	TO_LOOP_MAX_NEEDED_FROM
     291  #define MIN_NEEDED_OUTPUT	TO_LOOP_MIN_NEEDED_TO
     292  #define MAX_NEEDED_OUTPUT	TO_LOOP_MAX_NEEDED_TO
     293  #define LOOPFCT			TO_LOOP
     294  #define BODY \
     295    {									      \
     296      uint32_t ch = get32 (inptr);					      \
     297  									      \
     298      if ((*statep >> 3) != 0)						      \
     299        {									      \
     300  	/* Attempt to combine the last character with this one.  */	      \
     301  	uint16_t lasttwo = *statep >> 3;				      \
     302  	unsigned int idx;						      \
     303  	unsigned int len;						      \
     304  									      \
     305  	if (ch == 0x02e5)						      \
     306  	  idx = COMP_TABLE_IDX_02E5, len = COMP_TABLE_LEN_02E5;		      \
     307  	else if (ch == 0x02e9)						      \
     308  	  idx = COMP_TABLE_IDX_02E9, len = COMP_TABLE_LEN_02E9;		      \
     309  	else if (ch == 0x0300)						      \
     310  	  idx = COMP_TABLE_IDX_0300, len = COMP_TABLE_LEN_0300;		      \
     311  	else if (ch == 0x0301)						      \
     312  	  idx = COMP_TABLE_IDX_0301, len = COMP_TABLE_LEN_0301;		      \
     313  	else if (ch == 0x309a)						      \
     314  	  idx = COMP_TABLE_IDX_309A, len = COMP_TABLE_LEN_309A;		      \
     315  	else								      \
     316  	  goto not_combining;						      \
     317  									      \
     318  	do								      \
     319  	  if (comp_table_data[idx].base == lasttwo)			      \
     320  	    break;							      \
     321  	while (++idx, --len > 0);					      \
     322  									      \
     323  	if (len > 0)							      \
     324  	  {								      \
     325  	    /* Output the combined character.  */			      \
     326  	    if (__glibc_unlikely (outptr + 1 >= outend))		      \
     327  	      {								      \
     328  		result = __GCONV_FULL_OUTPUT;				      \
     329  		break;							      \
     330  	      }								      \
     331  	    lasttwo = comp_table_data[idx].composed;			      \
     332  	    *outptr++ = (lasttwo >> 8) & 0xff;				      \
     333  	    *outptr++ = lasttwo & 0xff;					      \
     334  	    *statep = 0;						      \
     335  	    inptr += 4;							      \
     336  	    continue;							      \
     337  	  }								      \
     338  									      \
     339        not_combining:							      \
     340  	/* Output the buffered character.  */				      \
     341  	if (__glibc_unlikely (outptr + 1 >= outend))			      \
     342  	  {								      \
     343  	    result = __GCONV_FULL_OUTPUT;				      \
     344  	    break;							      \
     345  	  }								      \
     346  	*outptr++ = (lasttwo >> 8) & 0xff;				      \
     347  	*outptr++ = lasttwo & 0xff;					      \
     348  	*statep = 0;							      \
     349  	continue;							      \
     350        }									      \
     351  									      \
     352      if (ch < 0x80)							      \
     353        /* Plain ASCII character.  */					      \
     354        *outptr++ = ch;							      \
     355      else if (ch >= 0xff61 && ch <= 0xff9f)				      \
     356        {									      \
     357  	/* Half-width katakana.  */					      \
     358  	if (__glibc_unlikely (outptr + 1 >= outend))			      \
     359  	  {								      \
     360  	    result = __GCONV_FULL_OUTPUT;				      \
     361  	    break;							      \
     362  	  }								      \
     363  	*outptr++ = 0x8e;						      \
     364  	*outptr++ = ch - 0xfec0;					      \
     365        }									      \
     366      else								      \
     367        {									      \
     368  	uint32_t jch = ucs4_to_jisx0213 (ch);				      \
     369  	if (jch == 0)							      \
     370  	  {								      \
     371  	    UNICODE_TAG_HANDLER (ch, 4);				      \
     372  									      \
     373  	    /* Illegal character.  */					      \
     374  	    STANDARD_TO_LOOP_ERR_HANDLER (4);				      \
     375  	  }								      \
     376  									      \
     377  	if (jch & 0x0080)						      \
     378  	  {								      \
     379  	    /* A possible match in comp_table_data.  We have to buffer it.  */\
     380  									      \
     381  	    /* We know it's a JISX 0213 plane 1 character.  */		      \
     382  	    assert ((jch & 0x8000) == 0);				      \
     383  									      \
     384  	    *statep = (jch | 0x8080) << 3;				      \
     385  	    inptr += 4;							      \
     386  	    continue;							      \
     387  	  }								      \
     388  									      \
     389  	if (jch & 0x8000)						      \
     390  	  {								      \
     391  	    /* JISX 0213 plane 2.  */					      \
     392  	    if (__glibc_unlikely (outptr + 2 >= outend))		      \
     393  	      {								      \
     394  		result = __GCONV_FULL_OUTPUT;				      \
     395  		break;							      \
     396  	      }								      \
     397  	    *outptr++ = 0x8f;						      \
     398  	  }								      \
     399  	else								      \
     400  	  {								      \
     401  	    /* JISX 0213 plane 1.  */					      \
     402  	    if (__glibc_unlikely (outptr + 1 >= outend))		      \
     403  	      {								      \
     404  		result = __GCONV_FULL_OUTPUT;				      \
     405  		break;							      \
     406  	      }								      \
     407  	  }								      \
     408  	*outptr++ = (jch >> 8) | 0x80;					      \
     409  	*outptr++ = (jch & 0xff) | 0x80;				      \
     410        }									      \
     411  									      \
     412      inptr += 4;								      \
     413    }
     414  #define LOOP_NEED_FLAGS
     415  #define EXTRA_LOOP_DECLS	, int *statep
     416  #include <iconv/loop.c>
     417  
     418  
     419  /* Now define the toplevel functions.  */
     420  #include <iconv/skeleton.c>