(root)/
glibc-2.38/
iconvdata/
euc-jp.c
       1  /* Mapping tables for EUC-JP handling.
       2     Copyright (C) 1998-2023 Free Software Foundation, Inc.
       3     This file is part of the GNU C Library.
       4  
       5     The GNU C Library is free software; you can redistribute it and/or
       6     modify it under the terms of the GNU Lesser General Public
       7     License as published by the Free Software Foundation; either
       8     version 2.1 of the License, or (at your option) any later version.
       9  
      10     The GNU C Library is distributed in the hope that it will be useful,
      11     but WITHOUT ANY WARRANTY; without even the implied warranty of
      12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      13     Lesser General Public License for more details.
      14  
      15     You should have received a copy of the GNU Lesser General Public
      16     License along with the GNU C Library; if not, see
      17     <https://www.gnu.org/licenses/>.  */
      18  
      19  #include <dlfcn.h>
      20  #include <stdint.h>
      21  #include <gconv.h>
      22  #include <jis0201.h>
      23  #include <jis0208.h>
      24  #include <jis0212.h>
      25  
      26  /* Definitions used in the body of the `gconv' function.  */
      27  #define CHARSET_NAME		"EUC-JP//"
      28  #define FROM_LOOP		from_euc_jp
      29  #define TO_LOOP			to_euc_jp
      30  #define DEFINE_INIT		1
      31  #define DEFINE_FINI		1
      32  #define MIN_NEEDED_FROM		1
      33  #define MAX_NEEDED_FROM		3
      34  #define MIN_NEEDED_TO		4
      35  #define ONE_DIRECTION		0
      36  
      37  
      38  /* First define the conversion function from EUC-JP to UCS4.  */
      39  #define MIN_NEEDED_INPUT	MIN_NEEDED_FROM
      40  #define MAX_NEEDED_INPUT	MAX_NEEDED_FROM
      41  #define MIN_NEEDED_OUTPUT	MIN_NEEDED_TO
      42  #define LOOPFCT			FROM_LOOP
      43  #define BODY \
      44    {									      \
      45      uint32_t ch = *inptr;						      \
      46  									      \
      47      if (ch < 0x8e || (ch >= 0x90 && ch <= 0x9f))			      \
      48        ++inptr;								      \
      49      else if (ch == 0xff)						      \
      50        {									      \
      51  	/* This is illegal.  */						      \
      52  	STANDARD_FROM_LOOP_ERR_HANDLER (1);				      \
      53        }									      \
      54      else								      \
      55        {									      \
      56  	/* Two or more byte character.  First test whether the next	      \
      57  	   byte is also available.  */					      \
      58  	int ch2;							      \
      59  									      \
      60  	if (__glibc_unlikely (inptr + 1 >= inend))			      \
      61  	  {								      \
      62  	    /* The second byte is not available.  Store the		      \
      63  	       intermediate result.  */					      \
      64  	    result = __GCONV_INCOMPLETE_INPUT;				      \
      65  	    break;							      \
      66  	  }								      \
      67  									      \
      68  	ch2 = inptr[1];							      \
      69  									      \
      70  	/* All second bytes of a multibyte character must be >= 0xa1. */      \
      71  	if (__glibc_unlikely (ch2 < 0xa1))				      \
      72  	  STANDARD_FROM_LOOP_ERR_HANDLER (1);				      \
      73  									      \
      74  	if (ch == 0x8e)							      \
      75  	  {								      \
      76  	    /* This is code set 2: half-width katakana.  */		      \
      77  	    ch = jisx0201_to_ucs4 (ch2);				      \
      78  	    if (__builtin_expect (ch, 0) == __UNKNOWN_10646_CHAR)	      \
      79  	      STANDARD_FROM_LOOP_ERR_HANDLER (1);			      \
      80  									      \
      81  	    inptr += 2;							      \
      82  	  }								      \
      83  	else								      \
      84  	  {								      \
      85  	    const unsigned char *endp;					      \
      86  									      \
      87  	    if (ch == 0x8f)						      \
      88  	      {								      \
      89  		/* This is code set 3: JIS X 0212-1990.  */		      \
      90  		endp = inptr + 1;					      \
      91  									      \
      92  		ch = jisx0212_to_ucs4 (&endp, inend - endp, 0x80);	      \
      93  	      }								      \
      94  	    else							      \
      95  	      {								      \
      96  		/* This is code set 1: JIS X 0208.  */			      \
      97  		endp = inptr;						      \
      98  									      \
      99  		ch = jisx0208_to_ucs4 (&endp, inend - inptr, 0x80);	      \
     100  	      }								      \
     101  									      \
     102  	    if (__builtin_expect (ch, 1) == 0)				      \
     103  	      {								      \
     104  		/* Not enough input available.  */			      \
     105  		result = __GCONV_INCOMPLETE_INPUT;			      \
     106  		break;							      \
     107  	      }								      \
     108  	    if (__glibc_unlikely (ch == __UNKNOWN_10646_CHAR))		      \
     109  	      /* Illegal character.  */					      \
     110  	      STANDARD_FROM_LOOP_ERR_HANDLER (1);			      \
     111  									      \
     112  	    inptr = endp;						      \
     113  	  }								      \
     114        }									      \
     115  									      \
     116      put32 (outptr, ch);							      \
     117      outptr += 4;							      \
     118    }
     119  #define ONEBYTE_BODY \
     120    {									      \
     121      if (c < 0x8e || (c >= 0x90 && c <= 0x9f))				      \
     122        return c;								      \
     123      else								      \
     124        return WEOF;							      \
     125    }
     126  #define LOOP_NEED_FLAGS
     127  #include <iconv/loop.c>
     128  
     129  
     130  /* Next, define the other direction.  */
     131  #define MIN_NEEDED_INPUT	MIN_NEEDED_TO
     132  #define MIN_NEEDED_OUTPUT	MIN_NEEDED_FROM
     133  #define MAX_NEEDED_OUTPUT	MAX_NEEDED_FROM
     134  #define LOOPFCT			TO_LOOP
     135  #define BODY \
     136    {									      \
     137      uint32_t ch = get32 (inptr);					      \
     138  									      \
     139      if (ch < 0x8e || (ch >= 0x90 && ch <= 0x9f))			      \
     140        /* It's plain ASCII or C1.  */					      \
     141        *outptr++ = ch;							      \
     142      else if (ch == 0xa5)						      \
     143        /* YEN sign => backslash  */					      \
     144        *outptr++ = 0x5c;							      \
     145      else if (ch == 0x203e)						      \
     146        /* overscore => asciitilde */					      \
     147        *outptr++ = 0x7e;							      \
     148      else								      \
     149        {									      \
     150  	/* Try the JIS character sets.  */				      \
     151  	size_t found;							      \
     152  									      \
     153  	/* See whether we have room for at least two characters.  */	      \
     154  	if (__glibc_unlikely (outptr + 1 >= outend))			      \
     155  	  {								      \
     156  	    result = __GCONV_FULL_OUTPUT;				      \
     157  	    break;							      \
     158  	  }								      \
     159  									      \
     160  	found = ucs4_to_jisx0201 (ch, outptr + 1);			      \
     161  	if (found != __UNKNOWN_10646_CHAR)				      \
     162  	  {								      \
     163  	    /* Yes, it's a JIS 0201 character.  Store the shift byte.  */     \
     164  	    *outptr = 0x8e;						      \
     165  	    outptr += 2;						      \
     166  	  }								      \
     167  	else								      \
     168  	  {								      \
     169  	    /* No JIS 0201 character.  */				      \
     170  	    found = ucs4_to_jisx0208 (ch, outptr, 2);			      \
     171  	    /* Please note that we always have enough room for the output. */ \
     172  	    if (found != __UNKNOWN_10646_CHAR)				      \
     173  	      {								      \
     174  		/* It's a JIS 0208 character, adjust it for EUC-JP.  */	      \
     175  		*outptr++ += 0x80;					      \
     176  		*outptr++ += 0x80;					      \
     177  	      }								      \
     178  	    else							      \
     179  	      {								      \
     180  		/* No JIS 0208 character.  */				      \
     181  		found = ucs4_to_jisx0212 (ch, outptr + 1,		      \
     182  					  outend - outptr - 1);		      \
     183  		  							      \
     184  		if (__builtin_expect (found, 1) == 0)			      \
     185  		  {							      \
     186  		    /* We ran out of space.  */				      \
     187  		    result = __GCONV_FULL_OUTPUT;			      \
     188  		    break;						      \
     189  		  }							      \
     190  		else if (__builtin_expect (found, 0) != __UNKNOWN_10646_CHAR) \
     191  		  {							      \
     192  		    /* It's a JIS 0212 character, adjust it for EUC-JP.  */   \
     193  		    *outptr++ = 0x8f;					      \
     194  		    *outptr++ += 0x80;					      \
     195  		    *outptr++ += 0x80;					      \
     196  		  }							      \
     197  		else							      \
     198  		  {							      \
     199  		    UNICODE_TAG_HANDLER (ch, 4);			      \
     200  									      \
     201  		    /* Illegal character.  */				      \
     202  		    STANDARD_TO_LOOP_ERR_HANDLER (4);			      \
     203  		  }							      \
     204  	      }								      \
     205  	  }								      \
     206        }									      \
     207  									      \
     208      inptr += 4;								      \
     209    }
     210  #define LOOP_NEED_FLAGS
     211  #include <iconv/loop.c>
     212  
     213  
     214  /* Now define the toplevel functions.  */
     215  #include <iconv/skeleton.c>