(root)/
glibc-2.38/
iconvdata/
iso-2022-jp-3.c
       1  /* Conversion module for ISO-2022-JP-3.
       2     Copyright (C) 1998-2023 Free Software Foundation, Inc.
       3     Copyright The GNU Toolchain Authors.
       4     This file is part of the GNU C Library.
       5  
       6     The GNU C Library is free software; you can redistribute it and/or
       7     modify it under the terms of the GNU Lesser General Public
       8     License as published by the Free Software Foundation; either
       9     version 2.1 of the License, or (at your option) any later version.
      10  
      11     The GNU C Library is distributed in the hope that it will be useful,
      12     but WITHOUT ANY WARRANTY; without even the implied warranty of
      13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      14     Lesser General Public License for more details.
      15  
      16     You should have received a copy of the GNU Lesser General Public
      17     License along with the GNU C Library; if not, see
      18     <https://www.gnu.org/licenses/>.  */
      19  
      20  #include <assert.h>
      21  #include <dlfcn.h>
      22  #include <gconv.h>
      23  #include <stdint.h>
      24  #include <string.h>
      25  
      26  #include "jis0201.h"
      27  #include "jis0208.h"
      28  #include "jisx0213.h"
      29  
      30  /* This makes obvious what everybody knows: 0x1b is the Esc character.  */
      31  #define ESC 0x1b
      32  
      33  /* Definitions used in the body of the `gconv' function.  */
      34  #define CHARSET_NAME		"ISO-2022-JP-3//"
      35  #define FROM_LOOP		from_iso2022jp3_loop
      36  #define TO_LOOP			to_iso2022jp3_loop
      37  #define DEFINE_INIT		1
      38  #define DEFINE_FINI		1
      39  #define ONE_DIRECTION		0
      40  #define FROM_LOOP_MIN_NEEDED_FROM	1
      41  #define FROM_LOOP_MAX_NEEDED_FROM	4
      42  #define FROM_LOOP_MIN_NEEDED_TO		4
      43  #define FROM_LOOP_MAX_NEEDED_TO		8
      44  #define TO_LOOP_MIN_NEEDED_FROM		4
      45  #define TO_LOOP_MAX_NEEDED_FROM		4
      46  #define TO_LOOP_MIN_NEEDED_TO		1
      47  #define TO_LOOP_MAX_NEEDED_TO		6
      48  #define PREPARE_LOOP \
      49    int saved_state;							      \
      50    int *statep = &data->__statep->__count;
      51  #define EXTRA_LOOP_ARGS		, statep
      52  
      53  
      54  /* The COUNT element of the state keeps track of the currently selected
      55     character set.  The possible values are:  */
      56  enum
      57  {
      58    ASCII_set = 0,		/* Esc ( B */
      59    JISX0208_1978_set = 1 << 3,	/* Esc $ @ */
      60    JISX0208_1983_set = 2 << 3,	/* Esc $ B */
      61    JISX0201_Roman_set = 3 << 3,	/* Esc ( J */
      62    JISX0201_Kana_set = 4 << 3,	/* Esc ( I */
      63    JISX0213_1_2000_set = 5 << 3,	/* Esc $ ( O */
      64    JISX0213_2_set = 6 << 3,	/* Esc $ ( P */
      65    JISX0213_1_2004_set = 7 << 3,	/* Esc $ ( Q */
      66    CURRENT_SEL_MASK = 7 << 3
      67  };
      68  
      69  /* During UCS-4 to ISO-2022-JP-3 conversion, the COUNT element of the
      70     state also contains the last two bytes to be output, shifted by 6
      71     bits, and a one-bit indicator whether they must be preceded by the
      72     shift sequence, in bit 22.  During ISO-2022-JP-3 to UCS-4
      73     conversion, COUNT may also contain a non-zero pending wide
      74     character, shifted by six bits.  This happens for certain inputs in
      75     JISX0213_1_2004_set and JISX0213_2_set if the second wide character
      76     in a combining sequence cannot be written because the buffer is
      77     full.  */
      78  
      79  /* Since this is a stateful encoding we have to provide code which resets
      80     the output state to the initial state.  This has to be done during the
      81     flushing.  */
      82  #define EMIT_SHIFT_TO_INIT \
      83    if ((data->__statep->__count & ~7) != ASCII_set)			      \
      84      {									      \
      85        if (FROM_DIRECTION)						      \
      86  	{								      \
      87  	  uint32_t ch = data->__statep->__count >> 6;			      \
      88  									      \
      89  	  if (__glibc_unlikely (ch != 0))				      \
      90  	    {								      \
      91  	      if (__glibc_likely (outbuf + 4 <= outend))		      \
      92  		{							      \
      93  		  /* Write out the last character.  */			      \
      94  		  put32 (outbuf, ch);					      \
      95  		  outbuf += 4;						      \
      96  		  data->__statep->__count &= 7;				      \
      97  		  data->__statep->__count |= ASCII_set;			      \
      98  		}							      \
      99  	      else							      \
     100  		/* We don't have enough room in the output buffer.  */	      \
     101  		status = __GCONV_FULL_OUTPUT;				      \
     102  	    }								      \
     103  	  else								      \
     104  	    {								      \
     105  	      data->__statep->__count &= 7;				      \
     106  	      data->__statep->__count |= ASCII_set;			      \
     107  	    }								      \
     108  	}								      \
     109        else								      \
     110  	{								      \
     111  	  /* We are not in the initial state.  To switch back we have	      \
     112  	     to write out the buffered character and/or emit the sequence     \
     113  	     `Esc ( B'.  */						      \
     114  	  size_t need =							      \
     115  	    (data->__statep->__count >> 6				      \
     116  	     ? (data->__statep->__count >> 22 ? 3 : 0) + 2		      \
     117  	     : 0)							      \
     118  	    + ((data->__statep->__count & CURRENT_SEL_MASK) != ASCII_set      \
     119  	       ? 3 : 0);						      \
     120  									      \
     121  	  if (__glibc_unlikely (outbuf + need > outend))		      \
     122  	    /* We don't have enough room in the output buffer.  */	      \
     123  	    status = __GCONV_FULL_OUTPUT;				      \
     124  	  else								      \
     125  	    {								      \
     126  	      if (data->__statep->__count >> 6)				      \
     127  		{							      \
     128  		  uint32_t lasttwo = data->__statep->__count >> 6;	      \
     129  									      \
     130  		  if (lasttwo >> 16)					      \
     131  		    {							      \
     132  		      /* Write out the shift sequence before the last	      \
     133  			 character.  */					      \
     134  		      assert ((data->__statep->__count & CURRENT_SEL_MASK)    \
     135  			      == JISX0208_1983_set);			      \
     136  		      *outbuf++ = ESC;					      \
     137  		      *outbuf++ = '$';					      \
     138  		      *outbuf++ = 'B';					      \
     139  		    }							      \
     140  		  /* Write out the last character.  */			      \
     141  		  *outbuf++ = (lasttwo >> 8) & 0xff;			      \
     142  		  *outbuf++ = lasttwo & 0xff;				      \
     143  		}							      \
     144  	      if ((data->__statep->__count & CURRENT_SEL_MASK) != ASCII_set)  \
     145  		{							      \
     146  		  /* Write out the shift sequence.  */			      \
     147  		  *outbuf++ = ESC;					      \
     148  		  *outbuf++ = '(';					      \
     149  		  *outbuf++ = 'B';					      \
     150  		}							      \
     151  	      data->__statep->__count &= 7;				      \
     152  	      data->__statep->__count |= ASCII_set;			      \
     153  	    }								      \
     154  	}								      \
     155      }
     156  
     157  
     158  /* Since we might have to reset input pointer we must be able to save
     159     and restore the state.  */
     160  #define SAVE_RESET_STATE(Save) \
     161    if (Save)								      \
     162      saved_state = *statep;						      \
     163    else									      \
     164      *statep = saved_state
     165  
     166  
     167  /* First define the conversion function from ISO-2022-JP-3 to UCS-4.  */
     168  #define MIN_NEEDED_INPUT	FROM_LOOP_MIN_NEEDED_FROM
     169  #define MAX_NEEDED_INPUT	FROM_LOOP_MAX_NEEDED_FROM
     170  #define MIN_NEEDED_OUTPUT	FROM_LOOP_MIN_NEEDED_TO
     171  #define MAX_NEEDED_OUTPUT	FROM_LOOP_MAX_NEEDED_TO
     172  #define LOOPFCT			FROM_LOOP
     173  #define BODY \
     174    {									      \
     175      uint32_t ch;							      \
     176  									      \
     177      /* Output any pending character.  */				      \
     178      ch = set >> 6;							      \
     179      if (__glibc_unlikely (ch != 0))					      \
     180        {									      \
     181  	put32 (outptr, ch);						      \
     182  	outptr += 4;							      \
     183  	/* Remove the pending character, but preserve state bits.  */	      \
     184  	set &= (1 << 6) - 1;						      \
     185  	continue;							      \
     186        }									      \
     187  									      \
     188      /* Otherwise read the next input byte.  */				      \
     189      ch = *inptr;							      \
     190  									      \
     191      /* Recognize escape sequences.  */					      \
     192      if (__glibc_unlikely (ch == ESC))					      \
     193        {									      \
     194  	/* We now must be prepared to read two to three more bytes.	      \
     195  	   If we have a match in the first byte but then the input buffer     \
     196  	   ends we terminate with an error since we must not risk missing     \
     197  	   an escape sequence just because it is not entirely in the	      \
     198  	   current input buffer.  */					      \
     199  	if (__builtin_expect (inptr + 2 >= inend, 0)			      \
     200  	    || (inptr[1] == '$' && inptr[2] == '('			      \
     201  		&& __builtin_expect (inptr + 3 >= inend, 0)))		      \
     202  	  {								      \
     203  	    /* Not enough input available.  */				      \
     204  	    result = __GCONV_INCOMPLETE_INPUT;				      \
     205  	    break;							      \
     206  	  }								      \
     207  									      \
     208  	if (inptr[1] == '(')						      \
     209  	  {								      \
     210  	    if (inptr[2] == 'B')					      \
     211  	      {								      \
     212  		/* ASCII selected.  */					      \
     213  		set = ASCII_set;					      \
     214  		inptr += 3;						      \
     215  		continue;						      \
     216  	      }								      \
     217  	    else if (inptr[2] == 'J')					      \
     218  	      {								      \
     219  		/* JIS X 0201 selected.  */				      \
     220  		set = JISX0201_Roman_set;				      \
     221  		inptr += 3;						      \
     222  		continue;						      \
     223  	      }								      \
     224  	    else if (inptr[2] == 'I')					      \
     225  	      {								      \
     226  		/* JIS X 0201 selected.  */				      \
     227  		set = JISX0201_Kana_set;				      \
     228  		inptr += 3;						      \
     229  		continue;						      \
     230  	      }								      \
     231  	  }								      \
     232  	else if (inptr[1] == '$')					      \
     233  	  {								      \
     234  	    if (inptr[2] == '@')					      \
     235  	      {								      \
     236  		/* JIS X 0208-1978 selected.  */			      \
     237  		set = JISX0208_1978_set;				      \
     238  		inptr += 3;						      \
     239  		continue;						      \
     240  	      }								      \
     241  	    else if (inptr[2] == 'B')					      \
     242  	      {								      \
     243  		/* JIS X 0208-1983 selected.  */			      \
     244  		set = JISX0208_1983_set;				      \
     245  		inptr += 3;						      \
     246  		continue;						      \
     247  	      }								      \
     248  	    else if (inptr[2] == '(')					      \
     249  	      {								      \
     250  		if (inptr[3] == 'O' || inptr[3] == 'Q')			      \
     251  		  {							      \
     252  		    /* JIS X 0213 plane 1 selected.  */			      \
     253  		    /* In this direction we don't need to distinguish the     \
     254  		       versions from 2000 and 2004. */			      \
     255  		    set = JISX0213_1_2004_set;				      \
     256  		    inptr += 4;						      \
     257  		    continue;						      \
     258  		  }							      \
     259  		else if (inptr[3] == 'P')				      \
     260  		  {							      \
     261  		    /* JIS X 0213 plane 2 selected.  */			      \
     262  		    set = JISX0213_2_set;				      \
     263  		    inptr += 4;						      \
     264  		    continue;						      \
     265  		  }							      \
     266  	      }								      \
     267  	  }								      \
     268        }									      \
     269  									      \
     270      if (ch >= 0x80)							      \
     271        {									      \
     272  	STANDARD_FROM_LOOP_ERR_HANDLER (1);				      \
     273        }									      \
     274      else if (set == ASCII_set || (ch < 0x21 || ch == 0x7f))		      \
     275        /* Almost done, just advance the input pointer.  */		      \
     276        ++inptr;								      \
     277      else if (set == JISX0201_Roman_set)					      \
     278        {									      \
     279  	/* Use the JIS X 0201 table.  */				      \
     280  	ch = jisx0201_to_ucs4 (ch);					      \
     281  	if (__glibc_unlikely (ch == __UNKNOWN_10646_CHAR))		      \
     282  	  {								      \
     283  	    STANDARD_FROM_LOOP_ERR_HANDLER (1);				      \
     284  	  }								      \
     285  	++inptr;							      \
     286        }									      \
     287      else if (set == JISX0201_Kana_set)					      \
     288        {									      \
     289  	/* Use the JIS X 0201 table.  */				      \
     290  	ch = jisx0201_to_ucs4 (ch + 0x80);				      \
     291  	if (__glibc_unlikely (ch == __UNKNOWN_10646_CHAR))		      \
     292  	  {								      \
     293  	    STANDARD_FROM_LOOP_ERR_HANDLER (1);				      \
     294  	  }								      \
     295  	++inptr;							      \
     296        }									      \
     297      else if (set == JISX0208_1978_set || set == JISX0208_1983_set)	      \
     298        {									      \
     299  	/* XXX I don't have the tables for these two old variants of	      \
     300  	   JIS X 0208.  Therefore I'm using the tables for JIS X	      \
     301  	   0208-1990.  If somebody has problems with this please	      \
     302  	   provide the appropriate tables.  */				      \
     303  	ch = jisx0208_to_ucs4 (&inptr, inend - inptr, 0);		      \
     304  									      \
     305  	if (__glibc_unlikely (ch == 0))					      \
     306  	  {								      \
     307  	    result = __GCONV_INCOMPLETE_INPUT;				      \
     308  	    break;							      \
     309  	  }								      \
     310  	else if (__glibc_unlikely (ch == __UNKNOWN_10646_CHAR))		      \
     311  	  {								      \
     312  	    STANDARD_FROM_LOOP_ERR_HANDLER (1);				      \
     313  	  }								      \
     314        }									      \
     315      else /* (set == JISX0213_1_2004_set || set == JISX0213_2_set) */	      \
     316        {									      \
     317  	if (__glibc_unlikely (inptr + 1 >= inend))			      \
     318  	  {								      \
     319  	    result = __GCONV_INCOMPLETE_INPUT;				      \
     320  	    break;							      \
     321  	  }								      \
     322  									      \
     323  	ch = jisx0213_to_ucs4 (						      \
     324  	       ((JISX0213_1_2004_set - set + (1 << 3)) << 5) + ch,	      \
     325  	       inptr[1]);						      \
     326  	if (ch == 0)							      \
     327  	  STANDARD_FROM_LOOP_ERR_HANDLER (1);				      \
     328  									      \
     329  	if (ch < 0x80)							      \
     330  	  {								      \
     331  	    /* It's a combining character.  */				      \
     332  	    uint32_t u1 = __jisx0213_to_ucs_combining[ch - 1][0];	      \
     333  	    uint32_t u2 = __jisx0213_to_ucs_combining[ch - 1][1];	      \
     334  									      \
     335  	    inptr += 2;							      \
     336  									      \
     337  	    put32 (outptr, u1);						      \
     338  	    outptr += 4;						      \
     339  									      \
     340  	    /* See whether we have room for two characters.  */		      \
     341  	    if (outptr + 4 <= outend)					      \
     342  	      {								      \
     343  		put32 (outptr, u2);					      \
     344  		outptr += 4;						      \
     345  		continue;						      \
     346  	      }								      \
     347  									      \
     348  	    /* Otherwise store only the first character now, and	      \
     349  	       put the second one into the queue.  */			      \
     350  	    set |= u2 << 6;						      \
     351  	    /* Tell the caller why we terminate the loop.  */		      \
     352  	    result = __GCONV_FULL_OUTPUT;				      \
     353  	    break;							      \
     354  	  }								      \
     355  									      \
     356  	inptr += 2;							      \
     357        }									      \
     358  									      \
     359      put32 (outptr, ch);							      \
     360      outptr += 4;							      \
     361    }
     362  #define LOOP_NEED_FLAGS
     363  #define EXTRA_LOOP_DECLS	, int *statep
     364  #define INIT_PARAMS		int set = *statep
     365  #define UPDATE_PARAMS		*statep = set
     366  #include <iconv/loop.c>
     367  
     368  
     369  /* Next, define the other direction, from UCS-4 to ISO-2022-JP-3.  */
     370  
     371  /* Composition tables for each of the relevant combining characters.  */
     372  static const struct
     373  {
     374    uint16_t base;
     375    uint16_t composed;
     376  } comp_table_data[] =
     377  {
     378  #define COMP_TABLE_IDX_02E5 0
     379  #define COMP_TABLE_LEN_02E5 1
     380    { 0x2b64, 0x2b65 }, /* 0x12B65 = 0x12B64 U+02E5 */
     381  #define COMP_TABLE_IDX_02E9 (COMP_TABLE_IDX_02E5 + COMP_TABLE_LEN_02E5)
     382  #define COMP_TABLE_LEN_02E9 1
     383    { 0x2b60, 0x2b66 }, /* 0x12B66 = 0x12B60 U+02E9 */
     384  #define COMP_TABLE_IDX_0300 (COMP_TABLE_IDX_02E9 + COMP_TABLE_LEN_02E9)
     385  #define COMP_TABLE_LEN_0300 5
     386    { 0x295c, 0x2b44 }, /* 0x12B44 = 0x1295C U+0300 */
     387    { 0x2b38, 0x2b48 }, /* 0x12B48 = 0x12B38 U+0300 */
     388    { 0x2b37, 0x2b4a }, /* 0x12B4A = 0x12B37 U+0300 */
     389    { 0x2b30, 0x2b4c }, /* 0x12B4C = 0x12B30 U+0300 */
     390    { 0x2b43, 0x2b4e }, /* 0x12B4E = 0x12B43 U+0300 */
     391  #define COMP_TABLE_IDX_0301 (COMP_TABLE_IDX_0300 + COMP_TABLE_LEN_0300)
     392  #define COMP_TABLE_LEN_0301 4
     393    { 0x2b38, 0x2b49 }, /* 0x12B49 = 0x12B38 U+0301 */
     394    { 0x2b37, 0x2b4b }, /* 0x12B4B = 0x12B37 U+0301 */
     395    { 0x2b30, 0x2b4d }, /* 0x12B4D = 0x12B30 U+0301 */
     396    { 0x2b43, 0x2b4f }, /* 0x12B4F = 0x12B43 U+0301 */
     397  #define COMP_TABLE_IDX_309A (COMP_TABLE_IDX_0301 + COMP_TABLE_LEN_0301)
     398  #define COMP_TABLE_LEN_309A 14
     399    { 0x242b, 0x2477 }, /* 0x12477 = 0x1242B U+309A */
     400    { 0x242d, 0x2478 }, /* 0x12478 = 0x1242D U+309A */
     401    { 0x242f, 0x2479 }, /* 0x12479 = 0x1242F U+309A */
     402    { 0x2431, 0x247a }, /* 0x1247A = 0x12431 U+309A */
     403    { 0x2433, 0x247b }, /* 0x1247B = 0x12433 U+309A */
     404    { 0x252b, 0x2577 }, /* 0x12577 = 0x1252B U+309A */
     405    { 0x252d, 0x2578 }, /* 0x12578 = 0x1252D U+309A */
     406    { 0x252f, 0x2579 }, /* 0x12579 = 0x1252F U+309A */
     407    { 0x2531, 0x257a }, /* 0x1257A = 0x12531 U+309A */
     408    { 0x2533, 0x257b }, /* 0x1257B = 0x12533 U+309A */
     409    { 0x253b, 0x257c }, /* 0x1257C = 0x1253B U+309A */
     410    { 0x2544, 0x257d }, /* 0x1257D = 0x12544 U+309A */
     411    { 0x2548, 0x257e }, /* 0x1257E = 0x12548 U+309A */
     412    { 0x2675, 0x2678 }, /* 0x12678 = 0x12675 U+309A */
     413  };
     414  
     415  #define MIN_NEEDED_INPUT	TO_LOOP_MIN_NEEDED_FROM
     416  #define MAX_NEEDED_INPUT	TO_LOOP_MAX_NEEDED_FROM
     417  #define MIN_NEEDED_OUTPUT	TO_LOOP_MIN_NEEDED_TO
     418  #define MAX_NEEDED_OUTPUT	TO_LOOP_MAX_NEEDED_TO
     419  #define LOOPFCT			TO_LOOP
     420  #define BODY \
     421    {									      \
     422      uint32_t ch = get32 (inptr);					      \
     423  									      \
     424      if (lasttwo != 0)							      \
     425        {									      \
     426  	/* Attempt to combine the last character with this one.  */	      \
     427  	unsigned int idx;						      \
     428  	unsigned int len;						      \
     429  									      \
     430  	if (ch == 0x02e5)						      \
     431  	  idx = COMP_TABLE_IDX_02E5, len = COMP_TABLE_LEN_02E5;		      \
     432  	else if (ch == 0x02e9)						      \
     433  	  idx = COMP_TABLE_IDX_02E9, len = COMP_TABLE_LEN_02E9;		      \
     434  	else if (ch == 0x0300)						      \
     435  	  idx = COMP_TABLE_IDX_0300, len = COMP_TABLE_LEN_0300;		      \
     436  	else if (ch == 0x0301)						      \
     437  	  idx = COMP_TABLE_IDX_0301, len = COMP_TABLE_LEN_0301;		      \
     438  	else if (ch == 0x309a)						      \
     439  	  idx = COMP_TABLE_IDX_309A, len = COMP_TABLE_LEN_309A;		      \
     440  	else								      \
     441  	  goto not_combining;						      \
     442  									      \
     443  	do								      \
     444  	  if (comp_table_data[idx].base == (uint16_t) lasttwo)		      \
     445  	    break;							      \
     446  	while (++idx, --len > 0);					      \
     447  									      \
     448  	if (len > 0)							      \
     449  	  {								      \
     450  	    /* Output the combined character.  */			      \
     451  	    /* We know the combined character is in JISX0213 plane 1,	      \
     452  	       but the buffered character may have been in JISX0208 or in     \
     453  	       JISX0213 plane 1.  */					      \
     454  	    size_t need =						      \
     455  	      (lasttwo >> 16						      \
     456  	       || (set != JISX0213_1_2000_set && set != JISX0213_1_2004_set)  \
     457  	       ? 4 : 0);						      \
     458  									      \
     459  	    if (__glibc_unlikely (outptr + need + 2 > outend))		      \
     460  	      {								      \
     461  		result = __GCONV_FULL_OUTPUT;				      \
     462  		break;							      \
     463  	      }								      \
     464  	    if (need)							      \
     465  	      {								      \
     466  		/* But first, output the escape sequence.  */		      \
     467  		*outptr++ = ESC;					      \
     468  		*outptr++ = '$';					      \
     469  		*outptr++ = '(';					      \
     470  		*outptr++ = 'O';					      \
     471  		set = JISX0213_1_2000_set;				      \
     472  	      }								      \
     473  	    lasttwo = comp_table_data[idx].composed;			      \
     474  	    *outptr++ = (lasttwo >> 8) & 0xff;				      \
     475  	    *outptr++ = lasttwo & 0xff;					      \
     476  	    lasttwo = 0;						      \
     477  	    inptr += 4;							      \
     478  	    continue;							      \
     479  	  }								      \
     480  									      \
     481        not_combining:							      \
     482  	/* Output the buffered character.  */				      \
     483  	/* We know it is in JISX0208 or in JISX0213 plane 1.  */	      \
     484  	{								      \
     485  	  size_t need = (lasttwo >> 16 ? 3 : 0);			      \
     486  									      \
     487  	  if (__glibc_unlikely (outptr + need + 2 > outend))		      \
     488  	    {								      \
     489  	      result = __GCONV_FULL_OUTPUT;				      \
     490  	      break;							      \
     491  	    }								      \
     492  	  if (need)							      \
     493  	    {								      \
     494  	      /* But first, output the escape sequence.  */		      \
     495  	      assert (set == JISX0208_1983_set);			      \
     496  	      *outptr++ = ESC;						      \
     497  	      *outptr++ = '$';						      \
     498  	      *outptr++ = 'B';						      \
     499  	    }								      \
     500  	  *outptr++ = (lasttwo >> 8) & 0xff;				      \
     501  	  *outptr++ = lasttwo & 0xff;					      \
     502  	  lasttwo = 0;							      \
     503  	  continue;							      \
     504  	}								      \
     505        }									      \
     506  									      \
     507      /* First see whether we can write the character using the currently	      \
     508         selected character set.  */					      \
     509      if (set == ASCII_set)						      \
     510        {									      \
     511  	/* Please note that the NUL byte is *not* matched if we are not	      \
     512  	   currently using the ASCII charset.  This is because we must	      \
     513  	   switch to the initial state whenever a NUL byte is written.  */    \
     514  	if (ch <= 0x7f)							      \
     515  	  {								      \
     516  	    *outptr++ = ch;						      \
     517  	    inptr += 4;							      \
     518  	    continue;							      \
     519  	  }								      \
     520        }									      \
     521      /* ISO-2022-JP recommends to encode the newline character always in	      \
     522         ASCII since this allows a context-free interpretation of the	      \
     523         characters at the beginning of the next line.  Otherwise it would      \
     524         have to be known whether the last line ended using ASCII or	      \
     525         JIS X 0201.  */							      \
     526      else if (set == JISX0201_Roman_set)					      \
     527        {									      \
     528  	unsigned char buf[1];						      \
     529  	if (ucs4_to_jisx0201 (ch, buf) != __UNKNOWN_10646_CHAR		      \
     530  	    && buf[0] > 0x20 && buf[0] < 0x80)				      \
     531  	  {								      \
     532  	    *outptr++ = buf[0];						      \
     533  	    inptr += 4;							      \
     534  	    continue;							      \
     535  	  }								      \
     536        }									      \
     537      else if (set == JISX0201_Kana_set)					      \
     538        {									      \
     539  	unsigned char buf[1];						      \
     540  	if (ucs4_to_jisx0201 (ch, buf) != __UNKNOWN_10646_CHAR		      \
     541  	    && buf[0] >= 0x80)						      \
     542  	  {								      \
     543  	    *outptr++ = buf[0] - 0x80;					      \
     544  	    inptr += 4;							      \
     545  	    continue;							      \
     546  	  }								      \
     547        }									      \
     548      else if (/*set == JISX0208_1978_set || */ set == JISX0208_1983_set)	      \
     549        {									      \
     550  	size_t written = ucs4_to_jisx0208 (ch, outptr, outend - outptr);      \
     551  									      \
     552  	if (written != __UNKNOWN_10646_CHAR)				      \
     553  	  {								      \
     554  	    uint32_t jch = ucs4_to_jisx0213 (ch);			      \
     555  									      \
     556  	    if (jch & 0x0080)						      \
     557  	      {								      \
     558  		/* A possible match in comp_table_data.  Buffer it.  */	      \
     559  		lasttwo = jch & 0x7f7f;					      \
     560  		inptr += 4;						      \
     561  		continue;						      \
     562  	      }								      \
     563  	    if (__glibc_unlikely (written == 0))			      \
     564  	      {								      \
     565  		result = __GCONV_FULL_OUTPUT;				      \
     566  		break;							      \
     567  	      }								      \
     568  	    else							      \
     569  	      {								      \
     570  	 	outptr += written;					      \
     571  		inptr += 4;						      \
     572  		continue;						      \
     573  	     }								      \
     574  	  }								      \
     575        }									      \
     576      else								      \
     577        {									      \
     578  	/* (set == JISX0213_1_2000_set || set == JISX0213_1_2004_set	      \
     579  	    || set == JISX0213_2_set) */				      \
     580  	uint32_t jch = ucs4_to_jisx0213 (ch);				      \
     581  									      \
     582  	if (jch != 0							      \
     583  	    && (jch & 0x8000						      \
     584  		? set == JISX0213_2_set					      \
     585  		: (set == JISX0213_1_2004_set				      \
     586  		   || (set == JISX0213_1_2000_set			      \
     587  		       && !jisx0213_added_in_2004_p (jch)))))		      \
     588  	  {								      \
     589  	    if (jch & 0x0080)						      \
     590  	      {								      \
     591  		/* A possible match in comp_table_data.  Buffer it.  */	      \
     592  									      \
     593  		/* We know it's a JISX 0213 plane 1 character.  */	      \
     594  		assert ((jch & 0x8000) == 0);				      \
     595  									      \
     596  		lasttwo = jch & 0x7f7f;					      \
     597  		inptr += 4;						      \
     598  		continue;						      \
     599  	      }								      \
     600  									      \
     601  	    if (__glibc_unlikely (outptr + 1 >= outend))		      \
     602  	      {								      \
     603  		result = __GCONV_FULL_OUTPUT;				      \
     604  		break;							      \
     605  	      }								      \
     606  	    *outptr++ = (jch >> 8) & 0x7f;				      \
     607  	    *outptr++ = jch & 0x7f;					      \
     608  	    inptr += 4;							      \
     609  	    continue;							      \
     610  	  }								      \
     611        }									      \
     612  									      \
     613      /* The attempts to use the currently selected character set failed,	      \
     614         either because the character requires a different character set,	      \
     615         or because the character is unknown.  */				      \
     616  									      \
     617      if (ch <= 0x7f)							      \
     618        {									      \
     619  	/* We must encode using ASCII.  First write out the escape	      \
     620  	   sequence.  */						      \
     621  	if (__glibc_unlikely (outptr + 3 > outend))			      \
     622  	  {								      \
     623  	    result = __GCONV_FULL_OUTPUT;				      \
     624  	    break;							      \
     625  	  }								      \
     626  									      \
     627  	*outptr++ = ESC;						      \
     628  	*outptr++ = '(';						      \
     629  	*outptr++ = 'B';						      \
     630  	set = ASCII_set;						      \
     631  									      \
     632  	if (__glibc_unlikely (outptr >= outend))			      \
     633  	  {								      \
     634  	    result = __GCONV_FULL_OUTPUT;				      \
     635  	    break;							      \
     636  	  }								      \
     637  	*outptr++ = ch;							      \
     638        }									      \
     639      else								      \
     640        {									      \
     641  	unsigned char buf[2];						      \
     642  									      \
     643  	/* Try JIS X 0201 Roman.  */					      \
     644  	if (ucs4_to_jisx0201 (ch, buf) != __UNKNOWN_10646_CHAR		      \
     645  	    && buf[0] > 0x20 && buf[0] < 0x80)				      \
     646  	  {								      \
     647  	    if (set != JISX0201_Roman_set)				      \
     648  	      {								      \
     649  		if (__glibc_unlikely (outptr + 3 > outend))		      \
     650  		  {							      \
     651  		    result = __GCONV_FULL_OUTPUT;			      \
     652  		    break;						      \
     653  		  }							      \
     654  		*outptr++ = ESC;					      \
     655  		*outptr++ = '(';					      \
     656  		*outptr++ = 'J';					      \
     657  		set = JISX0201_Roman_set;				      \
     658  	      }								      \
     659  									      \
     660  	    if (__glibc_unlikely (outptr >= outend))			      \
     661  	      {								      \
     662  		result = __GCONV_FULL_OUTPUT;				      \
     663  		break;							      \
     664  	      }								      \
     665  	    *outptr++ = buf[0];						      \
     666  	  }								      \
     667  	else								      \
     668  	  {								      \
     669  	    uint32_t jch = ucs4_to_jisx0213 (ch);			      \
     670  									      \
     671  	    /* Try JIS X 0208.  */					      \
     672  	    size_t written = ucs4_to_jisx0208 (ch, buf, 2);		      \
     673  	    if (written != __UNKNOWN_10646_CHAR)			      \
     674  	      {								      \
     675  		if (jch & 0x0080)					      \
     676  		  {							      \
     677  		    /* A possible match in comp_table_data.  Buffer it.  */   \
     678  		    lasttwo = ((set != JISX0208_1983_set ? 1 : 0) << 16)      \
     679  			      | (jch & 0x7f7f);				      \
     680  		    set = JISX0208_1983_set;				      \
     681  		    inptr += 4;						      \
     682  		    continue;						      \
     683  		  }							      \
     684  									      \
     685  		if (set != JISX0208_1983_set)				      \
     686  		  {							      \
     687  		    if (__glibc_unlikely (outptr + 3 > outend))		      \
     688  		      {							      \
     689  			result = __GCONV_FULL_OUTPUT;			      \
     690  			break;						      \
     691  		      }							      \
     692  		    *outptr++ = ESC;					      \
     693  		    *outptr++ = '$';					      \
     694  		    *outptr++ = 'B';					      \
     695  		    set = JISX0208_1983_set;				      \
     696  		  }							      \
     697  									      \
     698  		if (__glibc_unlikely (outptr + 2 > outend))		      \
     699  		  {							      \
     700  		    result = __GCONV_FULL_OUTPUT;			      \
     701  		    break;						      \
     702  		  }							      \
     703  		*outptr++ = buf[0];					      \
     704  		*outptr++ = buf[1];					      \
     705  	      }								      \
     706  	    else							      \
     707  	      {								      \
     708  		/* Try JIS X 0213.  */					      \
     709  		if (jch != 0)						      \
     710  		  {							      \
     711  		    int new_set =					      \
     712  		      (jch & 0x8000					      \
     713  		       ? JISX0213_2_set					      \
     714  		       : jisx0213_added_in_2004_p (jch)			      \
     715  			 ? JISX0213_1_2004_set				      \
     716  			 : JISX0213_1_2000_set);			      \
     717  									      \
     718  		    if (set != new_set)					      \
     719  		      {							      \
     720  			if (__glibc_unlikely (outptr + 4 > outend))	      \
     721  			  {						      \
     722  			    result = __GCONV_FULL_OUTPUT;		      \
     723  			    break;					      \
     724  			  }						      \
     725  			*outptr++ = ESC;				      \
     726  			*outptr++ = '$';				      \
     727  			*outptr++ = '(';				      \
     728  			*outptr++ =					      \
     729  			  ((new_set - JISX0213_1_2000_set) >> 3) + 'O';	      \
     730  			set = new_set;					      \
     731  		      }							      \
     732  									      \
     733  		    if (jch & 0x0080)					      \
     734  		      {							      \
     735  			/* A possible match in comp_table_data.		      \
     736  			   Buffer it.  */				      \
     737  									      \
     738  			/* We know it's a JIS X 0213 plane 1 character.  */   \
     739  			assert ((jch & 0x8000) == 0);			      \
     740  									      \
     741  			lasttwo = jch & 0x7f7f;				      \
     742  			inptr += 4;					      \
     743  			continue;					      \
     744  		      }							      \
     745  									      \
     746  		    if (__glibc_unlikely (outptr + 1 >= outend))	      \
     747  		      {							      \
     748  			result = __GCONV_FULL_OUTPUT;			      \
     749  			break;						      \
     750  		      }							      \
     751  		    *outptr++ = (jch >> 8) & 0x7f;			      \
     752  		    *outptr++ = jch & 0x7f;				      \
     753  		  }							      \
     754  		else							      \
     755  		  {							      \
     756  		    /* Try JIS X 0201 Katakana.  This is officially not part  \
     757  		       of ISO-2022-JP-3.  Therefore we try it after all other \
     758  		       attempts.  */					      \
     759  		    if (ucs4_to_jisx0201 (ch, buf) != __UNKNOWN_10646_CHAR    \
     760  			&& buf[0] >= 0x80)				      \
     761  		      {							      \
     762  			if (set != JISX0201_Kana_set)			      \
     763  			  {						      \
     764  			    if (__builtin_expect (outptr + 3 > outend, 0))    \
     765  			      {						      \
     766  				result = __GCONV_FULL_OUTPUT;		      \
     767  				break;					      \
     768  			      }						      \
     769  			    *outptr++ = ESC;				      \
     770  			    *outptr++ = '(';				      \
     771  			    *outptr++ = 'I';				      \
     772  			    set = JISX0201_Kana_set;			      \
     773  			  }						      \
     774  									      \
     775  			if (__glibc_unlikely (outptr >= outend))	      \
     776  			  {						      \
     777  			    result = __GCONV_FULL_OUTPUT;		      \
     778  			    break;					      \
     779  			  }						      \
     780  			*outptr++ = buf[0] - 0x80;			      \
     781  		      }							      \
     782  		    else						      \
     783  		      {							      \
     784  			UNICODE_TAG_HANDLER (ch, 4);			      \
     785  									      \
     786  			/* Illegal character.  */			      \
     787  			STANDARD_TO_LOOP_ERR_HANDLER (4);		      \
     788  		      }							      \
     789  		  }							      \
     790  	      }								      \
     791  	  }								      \
     792        }									      \
     793  									      \
     794      /* Now that we wrote the output increment the input pointer.  */	      \
     795      inptr += 4;								      \
     796    }
     797  #define LOOP_NEED_FLAGS
     798  #define EXTRA_LOOP_DECLS	, int *statep
     799  #define INIT_PARAMS		int set = *statep & CURRENT_SEL_MASK;	      \
     800  				uint32_t lasttwo = *statep >> 6
     801  #define REINIT_PARAMS		do					      \
     802  				  {					      \
     803  				    set = *statep & CURRENT_SEL_MASK;	      \
     804  				    lasttwo = *statep >> 6;		      \
     805  				  }					      \
     806  				while (0)
     807  #define UPDATE_PARAMS		*statep = set | (lasttwo << 6)
     808  #include <iconv/loop.c>
     809  
     810  
     811  /* Now define the toplevel functions.  */
     812  #include <iconv/skeleton.c>