(root)/
glibc-2.38/
iconvdata/
gbgbk.c
       1  /* Mapping tables from GBK to GB2312 and vice versa.
       2     Copyright (C) 1999-2023 Free Software Foundation, Inc.
       3     This file is part of the GNU C Library.
       4  
       5     The GNU C Library is free software; you can redistribute it and/or
       6     modify it under the terms of the GNU Lesser General Public
       7     License as published by the Free Software Foundation; either
       8     version 2.1 of the License, or (at your option) any later version.
       9  
      10     The GNU C Library is distributed in the hope that it will be useful,
      11     but WITHOUT ANY WARRANTY; without even the implied warranty of
      12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      13     Lesser General Public License for more details.
      14  
      15     You should have received a copy of the GNU Lesser General Public
      16     License along with the GNU C Library; if not, see
      17     <https://www.gnu.org/licenses/>.  */
      18  
      19  #include <dlfcn.h>
      20  #include <gconv.h>
      21  #include <stdint.h>
      22  
      23  
      24  /* Definitions used in the body of the `gconv' function.  */
      25  #define CHARSET_NAME		"GBK//"
      26  #define FROM_LOOP		from_gbk_to_gb
      27  #define TO_LOOP			from_gb_to_gbk
      28  #define DEFINE_INIT		1
      29  #define DEFINE_FINI		1
      30  #define MIN_NEEDED_FROM		1
      31  #define MAX_NEEDED_FROM		2
      32  #define MIN_NEEDED_TO		1
      33  #define MAX_NEEDED_TO		2
      34  #define ONE_DIRECTION		0
      35  
      36  
      37  /* First define the conversion function from GBK to GB2312.  */
      38  #define MIN_NEEDED_INPUT	MIN_NEEDED_FROM
      39  #define MAX_NEEDED_INPUT	MAX_NEEDED_FROM
      40  #define MIN_NEEDED_OUTPUT	MIN_NEEDED_TO
      41  #define MAX_NEEDED_OUTPUT	MAX_NEEDED_TO
      42  #define LOOPFCT			FROM_LOOP
      43  #define BODY \
      44    {									      \
      45      uint32_t ch = *inptr;						      \
      46  									      \
      47      if (ch <= 0x7f)							      \
      48        *outptr++ = *inptr++;						      \
      49      else								      \
      50        {									      \
      51  	/* It's a two-byte sequence.  We have to mask out all the sequences   \
      52  	   which are not in GB2312.  Besides all of them in the range	      \
      53  	   0x8140 to 0xA0FE this also includes in the remaining range the     \
      54  	   sequences which the second byte being in the range from 0x40 to    \
      55  	   0xA0 and the following exceptions:				      \
      56  									      \
      57  	     0xA2A1 to 0xA2A9,						      \
      58  	     0xA2AA,							      \
      59  	     0xA6E0 to 0xA6EB,						      \
      60  	     0xA6EE to 0xA6F2,						      \
      61  	     0xA6F4, 0xA6F5,						      \
      62  	     0xA8BB to 0xA8C0						      \
      63  									      \
      64  	   All these characters are not defined in GB2312.  Besides this      \
      65  	   there is an incomatibility in the mapping.  The Unicode tables     \
      66  	   say that 0xA1A4 maps in GB2312 to U30FB while in GBK it maps to    \
      67  	   U00B7.  Similarly, 0xA1AA maps in GB2312 to U2015 while in GBK     \
      68  	   it maps to U2014.  Since we are free to do whatever we want if     \
      69  	   a mapping is not available we will not flag this as an error	      \
      70  	   but instead map the two positions.  But this means that the	      \
      71  	   mapping							      \
      72  									      \
      73  		UCS4 -> GB2312 -> GBK -> UCS4				      \
      74  									      \
      75  	   might not produce identical text.  */			      \
      76  	if (__glibc_unlikely (inptr + 1 >= inend))			      \
      77  	  {								      \
      78  	    /* The second character is not available.  Store		      \
      79  	       the intermediate result.  */				      \
      80  	    result = __GCONV_INCOMPLETE_INPUT;				      \
      81  	    break;							      \
      82  	  }								      \
      83  									      \
      84  	if (__glibc_unlikely (outend - outptr < 2))			      \
      85  	  {								      \
      86  	    /* We ran out of space.  */					      \
      87  	    result = __GCONV_FULL_OUTPUT;				      \
      88  	    break;							      \
      89  	  }								      \
      90  									      \
      91  	ch = (ch << 8) | inptr[1];					      \
      92  									      \
      93  	/* Map 0xA844 (U2015 in GBK) to 0xA1AA (U2015 in GB2312).  */	      \
      94  	if (__glibc_unlikely (ch == 0xa844))				      \
      95  	  ch = 0xa1aa;							      \
      96  									      \
      97  	/* Now determine whether the character is valid.  */		      \
      98  	if (__builtin_expect (ch < 0xa1a1, 0)				      \
      99  	    || __builtin_expect (ch > 0xf7fe, 0)			      \
     100  	    || __builtin_expect (inptr[1] < 0xa1, 0)			      \
     101  	    /* Now test the exceptions.  */				      \
     102  	    || (__builtin_expect (ch >= 0xa2a1, 0)			      \
     103  		&& __builtin_expect (ch <= 0xa2aa, 0))			      \
     104  	    || (__builtin_expect (ch >= 0xa6e0, 0)			      \
     105  		&& __builtin_expect (ch <= 0xa6f5, 0))			      \
     106  	    || (__builtin_expect (ch >= 0xa8bb, 0)			      \
     107  		&& __builtin_expect (ch <= 0xa8c0, 0)))			      \
     108  	  {								      \
     109  	    /* One of the characters we cannot map.  */			      \
     110  	    STANDARD_TO_LOOP_ERR_HANDLER (2);				      \
     111  	  }								      \
     112  									      \
     113  	/* Copy the two bytes.  */					      \
     114  	*outptr++ = *inptr++;						      \
     115  	*outptr++ = *inptr++;						      \
     116        }									      \
     117    }
     118  #define LOOP_NEED_FLAGS
     119  #include <iconv/loop.c>
     120  
     121  
     122  /* Next, define the other direction.  */
     123  #define MIN_NEEDED_INPUT	MIN_NEEDED_TO
     124  #define MAX_NEEDED_INPUT	MAX_NEEDED_TO
     125  #define MIN_NEEDED_OUTPUT	MIN_NEEDED_FROM
     126  #define MAX_NEEDED_OUTPUT	MAX_NEEDED_FROM
     127  #define LOOPFCT			TO_LOOP
     128  #define BODY \
     129    {									      \
     130      /* We don't have to care about characters we cannot map.  The only	      \
     131         problem are the mapping of 0xA1A4 and 0xA1AA but as explained above    \
     132         we do not do anything special here.  */				      \
     133      unsigned char ch = *inptr++;					      \
     134  									      \
     135      if (ch > 0x7f)							      \
     136        {									      \
     137  	if (__glibc_unlikely (inptr + 1 >= inend))			      \
     138  	  {								      \
     139  	    /* The second character is not available.  Store		      \
     140  		 the intermediate result.  */				      \
     141  	    result = __GCONV_INCOMPLETE_INPUT;				      \
     142  	    break;							      \
     143  	  }								      \
     144  									      \
     145  	if (__glibc_unlikely (outend - outptr < 2))			      \
     146  	  {								      \
     147  	    /* We ran out of space.  */					      \
     148  	    result = __GCONV_FULL_OUTPUT;				      \
     149  	    break;							      \
     150  	  }								      \
     151  									      \
     152  	*outptr++ = ch;							      \
     153  	ch = *inptr++;							      \
     154        }									      \
     155      *outptr++ = ch;							      \
     156    }
     157  #include <iconv/loop.c>
     158  
     159  
     160  /* Now define the toplevel functions.  */
     161  #include <iconv/skeleton.c>