(root)/
Python-3.11.7/
Modules/
cjkcodecs/
cjkcodecs.h
       1  /*
       2   * cjkcodecs.h: common header for cjkcodecs
       3   *
       4   * Written by Hye-Shik Chang <perky@FreeBSD.org>
       5   */
       6  
       7  #ifndef _CJKCODECS_H_
       8  #define _CJKCODECS_H_
       9  
      10  #define PY_SSIZE_T_CLEAN
      11  #include "Python.h"
      12  #include "multibytecodec.h"
      13  
      14  
      15  /* a unicode "undefined" code point */
      16  #define UNIINV  0xFFFE
      17  
      18  /* internal-use DBCS code points which aren't used by any charsets */
      19  #define NOCHAR  0xFFFF
      20  #define MULTIC  0xFFFE
      21  #define DBCINV  0xFFFD
      22  
      23  /* shorter macros to save source size of mapping tables */
      24  #define U UNIINV
      25  #define N NOCHAR
      26  #define M MULTIC
      27  #define D DBCINV
      28  
      29  struct dbcs_index {
      30      const ucs2_t *map;
      31      unsigned char bottom, top;
      32  };
      33  typedef struct dbcs_index decode_map;
      34  
      35  struct widedbcs_index {
      36      const Py_UCS4 *map;
      37      unsigned char bottom, top;
      38  };
      39  typedef struct widedbcs_index widedecode_map;
      40  
      41  struct unim_index {
      42      const DBCHAR *map;
      43      unsigned char bottom, top;
      44  };
      45  typedef struct unim_index encode_map;
      46  
      47  struct unim_index_bytebased {
      48      const unsigned char *map;
      49      unsigned char bottom, top;
      50  };
      51  
      52  struct dbcs_map {
      53      const char *charset;
      54      const struct unim_index *encmap;
      55      const struct dbcs_index *decmap;
      56  };
      57  
      58  struct pair_encodemap {
      59      Py_UCS4 uniseq;
      60      DBCHAR code;
      61  };
      62  
      63  static const MultibyteCodec *codec_list;
      64  static const struct dbcs_map *mapping_list;
      65  
      66  #define CODEC_INIT(encoding)                                            \
      67      static int encoding##_codec_init(const void *config)
      68  
      69  #define ENCODER_INIT(encoding)                                          \
      70      static int encoding##_encode_init(                                  \
      71          MultibyteCodec_State *state, const void *config)
      72  #define ENCODER(encoding)                                               \
      73      static Py_ssize_t encoding##_encode(                                \
      74          MultibyteCodec_State *state, const void *config,                \
      75          int kind, const void *data,                                     \
      76          Py_ssize_t *inpos, Py_ssize_t inlen,                            \
      77          unsigned char **outbuf, Py_ssize_t outleft, int flags)
      78  #define ENCODER_RESET(encoding)                                         \
      79      static Py_ssize_t encoding##_encode_reset(                          \
      80          MultibyteCodec_State *state, const void *config,                \
      81          unsigned char **outbuf, Py_ssize_t outleft)
      82  
      83  #define DECODER_INIT(encoding)                                          \
      84      static int encoding##_decode_init(                                  \
      85          MultibyteCodec_State *state, const void *config)
      86  #define DECODER(encoding)                                               \
      87      static Py_ssize_t encoding##_decode(                                \
      88          MultibyteCodec_State *state, const void *config,                \
      89          const unsigned char **inbuf, Py_ssize_t inleft,                 \
      90          _PyUnicodeWriter *writer)
      91  #define DECODER_RESET(encoding)                                         \
      92      static Py_ssize_t encoding##_decode_reset(                          \
      93          MultibyteCodec_State *state, const void *config)
      94  
      95  #define NEXT_IN(i)                              \
      96      do {                                        \
      97          (*inbuf) += (i);                        \
      98          (inleft) -= (i);                        \
      99      } while (0)
     100  #define NEXT_INCHAR(i)                          \
     101      do {                                        \
     102          (*inpos) += (i);                        \
     103      } while (0)
     104  #define NEXT_OUT(o)                             \
     105      do {                                        \
     106          (*outbuf) += (o);                       \
     107          (outleft) -= (o);                       \
     108      } while (0)
     109  #define NEXT(i, o)                              \
     110      do {                                        \
     111          NEXT_INCHAR(i);                         \
     112          NEXT_OUT(o);                            \
     113      } while (0)
     114  
     115  #define REQUIRE_INBUF(n)                        \
     116      do {                                        \
     117          if (inleft < (n))                       \
     118              return MBERR_TOOFEW;                \
     119      } while (0)
     120  
     121  #define REQUIRE_OUTBUF(n)                       \
     122      do {                                        \
     123          if (outleft < (n))                      \
     124              return MBERR_TOOSMALL;              \
     125      } while (0)
     126  
     127  #define INBYTE1 ((*inbuf)[0])
     128  #define INBYTE2 ((*inbuf)[1])
     129  #define INBYTE3 ((*inbuf)[2])
     130  #define INBYTE4 ((*inbuf)[3])
     131  
     132  #define INCHAR1 (PyUnicode_READ(kind, data, *inpos))
     133  #define INCHAR2 (PyUnicode_READ(kind, data, *inpos + 1))
     134  
     135  #define OUTCHAR(c)                                                         \
     136      do {                                                                   \
     137          if (_PyUnicodeWriter_WriteChar(writer, (c)) < 0)                   \
     138              return MBERR_EXCEPTION;                                         \
     139      } while (0)
     140  
     141  #define OUTCHAR2(c1, c2)                                                   \
     142      do {                                                                   \
     143          Py_UCS4 _c1 = (c1);                                                \
     144          Py_UCS4 _c2 = (c2);                                                \
     145          if (_PyUnicodeWriter_Prepare(writer, 2, Py_MAX(_c1, c2)) < 0)      \
     146              return MBERR_EXCEPTION;                                        \
     147          PyUnicode_WRITE(writer->kind, writer->data, writer->pos, _c1);     \
     148          PyUnicode_WRITE(writer->kind, writer->data, writer->pos + 1, _c2); \
     149          writer->pos += 2;                                                  \
     150      } while (0)
     151  
     152  #define OUTBYTEI(c, i)                     \
     153      do {                                   \
     154          assert((unsigned char)(c) == (c)); \
     155          ((*outbuf)[i]) = (c);              \
     156      } while (0)
     157  
     158  #define OUTBYTE1(c) OUTBYTEI(c, 0)
     159  #define OUTBYTE2(c) OUTBYTEI(c, 1)
     160  #define OUTBYTE3(c) OUTBYTEI(c, 2)
     161  #define OUTBYTE4(c) OUTBYTEI(c, 3)
     162  
     163  #define WRITEBYTE1(c1)              \
     164      do {                            \
     165          REQUIRE_OUTBUF(1);          \
     166          OUTBYTE1(c1);               \
     167      } while (0)
     168  #define WRITEBYTE2(c1, c2)          \
     169      do {                            \
     170          REQUIRE_OUTBUF(2);          \
     171          OUTBYTE1(c1);               \
     172          OUTBYTE2(c2);               \
     173      } while (0)
     174  #define WRITEBYTE3(c1, c2, c3)      \
     175      do {                            \
     176          REQUIRE_OUTBUF(3);          \
     177          OUTBYTE1(c1);               \
     178          OUTBYTE2(c2);               \
     179          OUTBYTE3(c3);               \
     180      } while (0)
     181  #define WRITEBYTE4(c1, c2, c3, c4)  \
     182      do {                            \
     183          REQUIRE_OUTBUF(4);          \
     184          OUTBYTE1(c1);               \
     185          OUTBYTE2(c2);               \
     186          OUTBYTE3(c3);               \
     187          OUTBYTE4(c4);               \
     188      } while (0)
     189  
     190  #define _TRYMAP_ENC(m, assi, val)                               \
     191      ((m)->map != NULL && (val) >= (m)->bottom &&                \
     192          (val)<= (m)->top && ((assi) = (m)->map[(val) -          \
     193          (m)->bottom]) != NOCHAR)
     194  #define TRYMAP_ENC(charset, assi, uni)                     \
     195      _TRYMAP_ENC(&charset##_encmap[(uni) >> 8], assi, (uni) & 0xff)
     196  
     197  #define _TRYMAP_DEC(m, assi, val)                             \
     198      ((m)->map != NULL &&                                        \
     199       (val) >= (m)->bottom &&                                    \
     200       (val)<= (m)->top &&                                        \
     201       ((assi) = (m)->map[(val) - (m)->bottom]) != UNIINV)
     202  #define TRYMAP_DEC(charset, assi, c1, c2)                     \
     203      _TRYMAP_DEC(&charset##_decmap[c1], assi, c2)
     204  
     205  #define BEGIN_MAPPINGS_LIST static const struct dbcs_map _mapping_list[] = {
     206  #define MAPPING_ENCONLY(enc) {#enc, (void*)enc##_encmap, NULL},
     207  #define MAPPING_DECONLY(enc) {#enc, NULL, (void*)enc##_decmap},
     208  #define MAPPING_ENCDEC(enc) {#enc, (void*)enc##_encmap, (void*)enc##_decmap},
     209  #define END_MAPPINGS_LIST                               \
     210      {"", NULL, NULL} };                                 \
     211      static const struct dbcs_map *mapping_list =        \
     212          (const struct dbcs_map *)_mapping_list;
     213  
     214  #define BEGIN_CODECS_LIST static const MultibyteCodec _codec_list[] = {
     215  #define _STATEFUL_METHODS(enc)          \
     216      enc##_encode,                       \
     217      enc##_encode_init,                  \
     218      enc##_encode_reset,                 \
     219      enc##_decode,                       \
     220      enc##_decode_init,                  \
     221      enc##_decode_reset,
     222  #define _STATELESS_METHODS(enc)         \
     223      enc##_encode, NULL, NULL,           \
     224      enc##_decode, NULL, NULL,
     225  #define CODEC_STATEFUL(enc) {           \
     226      #enc, NULL, NULL,                   \
     227      _STATEFUL_METHODS(enc)              \
     228  },
     229  #define CODEC_STATELESS(enc) {          \
     230      #enc, NULL, NULL,                   \
     231      _STATELESS_METHODS(enc)             \
     232  },
     233  #define CODEC_STATELESS_WINIT(enc) {    \
     234      #enc, NULL,                         \
     235      enc##_codec_init,                   \
     236      _STATELESS_METHODS(enc)             \
     237  },
     238  #define END_CODECS_LIST                                 \
     239      {"", NULL,} };                                      \
     240      static const MultibyteCodec *codec_list =           \
     241          (const MultibyteCodec *)_codec_list;
     242  
     243  
     244  
     245  static PyObject *
     246  getmultibytecodec(void)
     247  {
     248      PyObject *mod = PyImport_ImportModuleNoBlock("_multibytecodec");
     249      if (mod == NULL) {
     250          return NULL;
     251      }
     252  
     253      PyObject *cofunc = PyObject_GetAttrString(mod, "__create_codec");
     254      Py_DECREF(mod);
     255      return cofunc;
     256  }
     257  
     258  static PyObject *
     259  getcodec(PyObject *self, PyObject *encoding)
     260  {
     261      PyObject *codecobj, *r, *cofunc;
     262      const MultibyteCodec *codec;
     263      const char *enc;
     264  
     265      if (!PyUnicode_Check(encoding)) {
     266          PyErr_SetString(PyExc_TypeError,
     267                          "encoding name must be a string.");
     268          return NULL;
     269      }
     270      enc = PyUnicode_AsUTF8(encoding);
     271      if (enc == NULL)
     272          return NULL;
     273  
     274      cofunc = getmultibytecodec();
     275      if (cofunc == NULL)
     276          return NULL;
     277  
     278      for (codec = codec_list; codec->encoding[0]; codec++)
     279          if (strcmp(codec->encoding, enc) == 0)
     280              break;
     281  
     282      if (codec->encoding[0] == '\0') {
     283          PyErr_SetString(PyExc_LookupError,
     284                          "no such codec is supported.");
     285          return NULL;
     286      }
     287  
     288      codecobj = PyCapsule_New((void *)codec, PyMultibyteCodec_CAPSULE_NAME, NULL);
     289      if (codecobj == NULL)
     290          return NULL;
     291  
     292      r = PyObject_CallOneArg(cofunc, codecobj);
     293      Py_DECREF(codecobj);
     294      Py_DECREF(cofunc);
     295  
     296      return r;
     297  }
     298  
     299  
     300  static int
     301  register_maps(PyObject *module)
     302  {
     303      const struct dbcs_map *h;
     304  
     305      for (h = mapping_list; h->charset[0] != '\0'; h++) {
     306          char mhname[256] = "__map_";
     307          strcpy(mhname + sizeof("__map_") - 1, h->charset);
     308  
     309          PyObject *capsule = PyCapsule_New((void *)h,
     310                                            PyMultibyteCodec_CAPSULE_NAME, NULL);
     311          if (capsule == NULL) {
     312              return -1;
     313          }
     314          if (PyModule_AddObject(module, mhname, capsule) < 0) {
     315              Py_DECREF(capsule);
     316              return -1;
     317          }
     318      }
     319      return 0;
     320  }
     321  
     322  #ifdef USING_BINARY_PAIR_SEARCH
     323  static DBCHAR
     324  find_pairencmap(ucs2_t body, ucs2_t modifier,
     325                  const struct pair_encodemap *haystack, int haystacksize)
     326  {
     327      int pos, min, max;
     328      Py_UCS4 value = body << 16 | modifier;
     329  
     330      min = 0;
     331      max = haystacksize;
     332  
     333      for (pos = haystacksize >> 1; min != max; pos = (min + max) >> 1) {
     334          if (value < haystack[pos].uniseq) {
     335              if (max != pos) {
     336                  max = pos;
     337                  continue;
     338              }
     339          }
     340          else if (value > haystack[pos].uniseq) {
     341              if (min != pos) {
     342                  min = pos;
     343                  continue;
     344              }
     345          }
     346          break;
     347      }
     348  
     349      if (value == haystack[pos].uniseq) {
     350          return haystack[pos].code;
     351      }
     352      return DBCINV;
     353  }
     354  #endif
     355  
     356  #ifdef USING_IMPORTED_MAPS
     357  #define IMPORT_MAP(locale, charset, encmap, decmap) \
     358      importmap("_codecs_" #locale, "__map_" #charset, \
     359                (const void**)encmap, (const void**)decmap)
     360  
     361  static int
     362  importmap(const char *modname, const char *symbol,
     363            const void **encmap, const void **decmap)
     364  {
     365      PyObject *o, *mod;
     366  
     367      mod = PyImport_ImportModule(modname);
     368      if (mod == NULL)
     369          return -1;
     370  
     371      o = PyObject_GetAttrString(mod, symbol);
     372      if (o == NULL)
     373          goto errorexit;
     374      else if (!PyCapsule_IsValid(o, PyMultibyteCodec_CAPSULE_NAME)) {
     375          PyErr_SetString(PyExc_ValueError,
     376                          "map data must be a Capsule.");
     377          goto errorexit;
     378      }
     379      else {
     380          struct dbcs_map *map;
     381          map = PyCapsule_GetPointer(o, PyMultibyteCodec_CAPSULE_NAME);
     382          if (encmap != NULL)
     383              *encmap = map->encmap;
     384          if (decmap != NULL)
     385              *decmap = map->decmap;
     386          Py_DECREF(o);
     387      }
     388  
     389      Py_DECREF(mod);
     390      return 0;
     391  
     392  errorexit:
     393      Py_DECREF(mod);
     394      return -1;
     395  }
     396  #endif
     397  
     398  static int
     399  _cjk_exec(PyObject *module)
     400  {
     401      return register_maps(module);
     402  }
     403  
     404  
     405  static struct PyMethodDef _cjk_methods[] = {
     406      {"getcodec", (PyCFunction)getcodec, METH_O, ""},
     407      {NULL, NULL},
     408  };
     409  
     410  static PyModuleDef_Slot _cjk_slots[] = {
     411      {Py_mod_exec, _cjk_exec},
     412      {0, NULL}
     413  };
     414  
     415  #define I_AM_A_MODULE_FOR(loc)                                          \
     416      static struct PyModuleDef _cjk_module = {                           \
     417          PyModuleDef_HEAD_INIT,                                          \
     418          .m_name = "_codecs_"#loc,                                       \
     419          .m_size = 0,                                                    \
     420          .m_methods = _cjk_methods,                                      \
     421          .m_slots = _cjk_slots,                                          \
     422      };                                                                  \
     423                                                                          \
     424      PyMODINIT_FUNC                                                      \
     425      PyInit__codecs_##loc(void)                                          \
     426      {                                                                   \
     427          return PyModuleDef_Init(&_cjk_module);                          \
     428      }
     429  
     430  #endif