(root)/
Python-3.12.0/
Modules/
cjkcodecs/
_codecs_hk.c
       1  /*
       2   * _codecs_hk.c: Codecs collection for encodings from Hong Kong
       3   *
       4   * Written by Hye-Shik Chang <perky@FreeBSD.org>
       5   */
       6  
       7  #define USING_IMPORTED_MAPS
       8  
       9  #define CJK_MOD_SPECIFIC_STATE      \
      10      const encode_map *big5_encmap;  \
      11      const decode_map *big5_decmap;
      12  
      13  #include "cjkcodecs.h"
      14  #include "mappings_hk.h"
      15  
      16  /*
      17   * BIG5HKSCS codec
      18   */
      19  
      20  CODEC_INIT(big5hkscs)
      21  {
      22      cjkcodecs_module_state *st = codec->modstate;
      23      if (IMPORT_MAP(tw, big5, &st->big5_encmap, &st->big5_decmap)) {
      24          return -1;
      25      }
      26      return 0;
      27  }
      28  
      29  /*
      30   * There are four possible pair unicode -> big5hkscs maps as in HKSCS 2004:
      31   *  U+00CA U+0304 -> 8862  (U+00CA alone is mapped to 8866)
      32   *  U+00CA U+030C -> 8864
      33   *  U+00EA U+0304 -> 88a3  (U+00EA alone is mapped to 88a7)
      34   *  U+00EA U+030C -> 88a5
      35   * These are handled by not mapping tables but a hand-written code.
      36   */
      37  static const DBCHAR big5hkscs_pairenc_table[4] = {0x8862, 0x8864, 0x88a3, 0x88a5};
      38  
      39  ENCODER(big5hkscs)
      40  {
      41      while (*inpos < inlen) {
      42          Py_UCS4 c = INCHAR1;
      43          DBCHAR code;
      44          Py_ssize_t insize;
      45  
      46          if (c < 0x80) {
      47              REQUIRE_OUTBUF(1);
      48              **outbuf = (unsigned char)c;
      49              NEXT(1, 1);
      50              continue;
      51          }
      52  
      53          insize = 1;
      54          REQUIRE_OUTBUF(2);
      55  
      56          if (c < 0x10000) {
      57              if (TRYMAP_ENC(big5hkscs_bmp, code, c)) {
      58                  if (code == MULTIC) {
      59                      Py_UCS4 c2;
      60                      if (inlen - *inpos >= 2)
      61                          c2 = INCHAR2;
      62                      else
      63                          c2 = 0;
      64  
      65                      if (inlen - *inpos >= 2 &&
      66                          ((c & 0xffdf) == 0x00ca) &&
      67                          ((c2 & 0xfff7) == 0x0304)) {
      68                          code = big5hkscs_pairenc_table[
      69                              ((c >> 4) |
      70                               (c2 >> 3)) & 3];
      71                          insize = 2;
      72                      }
      73                      else if (inlen - *inpos < 2 &&
      74                               !(flags & MBENC_FLUSH))
      75                          return MBERR_TOOFEW;
      76                      else {
      77                          if (c == 0xca)
      78                              code = 0x8866;
      79                          else /* c == 0xea */
      80                              code = 0x88a7;
      81                      }
      82                  }
      83              }
      84              else if (TRYMAP_ENC_ST(big5, code, c))
      85                  ;
      86              else
      87                  return 1;
      88          }
      89          else if (c < 0x20000)
      90              return insize;
      91          else if (c < 0x30000) {
      92              if (TRYMAP_ENC(big5hkscs_nonbmp, code, c & 0xffff))
      93                  ;
      94              else
      95                  return insize;
      96          }
      97          else
      98              return insize;
      99  
     100          OUTBYTE1(code >> 8);
     101          OUTBYTE2(code & 0xFF);
     102          NEXT(insize, 2);
     103      }
     104  
     105      return 0;
     106  }
     107  
     108  #define BH2S(c1, c2) (((c1) - 0x87) * (0xfe - 0x40 + 1) + ((c2) - 0x40))
     109  
     110  DECODER(big5hkscs)
     111  {
     112      while (inleft > 0) {
     113          unsigned char c = INBYTE1;
     114          Py_UCS4 decoded;
     115  
     116          if (c < 0x80) {
     117              OUTCHAR(c);
     118              NEXT_IN(1);
     119              continue;
     120          }
     121  
     122          REQUIRE_INBUF(2);
     123  
     124          if (0xc6 > c || c > 0xc8 || (c < 0xc7 && INBYTE2 < 0xa1)) {
     125              if (TRYMAP_DEC_ST(big5, decoded, c, INBYTE2)) {
     126                  OUTCHAR(decoded);
     127                  NEXT_IN(2);
     128                  continue;
     129              }
     130          }
     131  
     132          if (TRYMAP_DEC(big5hkscs, decoded, c, INBYTE2))
     133          {
     134              int s = BH2S(c, INBYTE2);
     135              const unsigned char *hintbase;
     136  
     137              assert(0x87 <= c && c <= 0xfe);
     138              assert(0x40 <= INBYTE2 && INBYTE2 <= 0xfe);
     139  
     140              if (BH2S(0x87, 0x40) <= s && s <= BH2S(0xa0, 0xfe)) {
     141                      hintbase = big5hkscs_phint_0;
     142                      s -= BH2S(0x87, 0x40);
     143              }
     144              else if (BH2S(0xc6,0xa1) <= s && s <= BH2S(0xc8,0xfe)){
     145                      hintbase = big5hkscs_phint_12130;
     146                      s -= BH2S(0xc6, 0xa1);
     147              }
     148              else if (BH2S(0xf9,0xd6) <= s && s <= BH2S(0xfe,0xfe)){
     149                      hintbase = big5hkscs_phint_21924;
     150                      s -= BH2S(0xf9, 0xd6);
     151              }
     152              else
     153                      return MBERR_INTERNAL;
     154  
     155              if (hintbase[s >> 3] & (1 << (s & 7))) {
     156                      OUTCHAR(decoded | 0x20000);
     157                      NEXT_IN(2);
     158              }
     159              else {
     160                      OUTCHAR(decoded);
     161                      NEXT_IN(2);
     162              }
     163              continue;
     164          }
     165  
     166          switch ((c << 8) | INBYTE2) {
     167          case 0x8862: OUTCHAR2(0x00ca, 0x0304); break;
     168          case 0x8864: OUTCHAR2(0x00ca, 0x030c); break;
     169          case 0x88a3: OUTCHAR2(0x00ea, 0x0304); break;
     170          case 0x88a5: OUTCHAR2(0x00ea, 0x030c); break;
     171          default: return 1;
     172          }
     173  
     174          NEXT_IN(2); /* all decoded code points are pairs, above. */
     175      }
     176  
     177      return 0;
     178  }
     179  
     180  BEGIN_MAPPINGS_LIST(3)
     181    MAPPING_DECONLY(big5hkscs)
     182    MAPPING_ENCONLY(big5hkscs_bmp)
     183    MAPPING_ENCONLY(big5hkscs_nonbmp)
     184  END_MAPPINGS_LIST
     185  
     186  BEGIN_CODECS_LIST(1)
     187    CODEC_STATELESS_WINIT(big5hkscs)
     188  END_CODECS_LIST
     189  
     190  I_AM_A_MODULE_FOR(hk)