(root)/
Python-3.12.0/
Modules/
cjkcodecs/
cjkcodecs.h
       1  /*
       2   * cjkcodecs.h: common header for cjkcodecs
       3   *
       4   * Written by Hye-Shik Chang <perky@FreeBSD.org>
       5   */
       6  
       7  #ifndef _CJKCODECS_H_
       8  #define _CJKCODECS_H_
       9  
      10  #define PY_SSIZE_T_CLEAN
      11  #include "Python.h"
      12  #include "multibytecodec.h"
      13  
      14  
      15  /* a unicode "undefined" code point */
      16  #define UNIINV  0xFFFE
      17  
      18  /* internal-use DBCS code points which aren't used by any charsets */
      19  #define NOCHAR  0xFFFF
      20  #define MULTIC  0xFFFE
      21  #define DBCINV  0xFFFD
      22  
      23  /* shorter macros to save source size of mapping tables */
      24  #define U UNIINV
      25  #define N NOCHAR
      26  #define M MULTIC
      27  #define D DBCINV
      28  
      29  struct dbcs_index {
      30      const ucs2_t *map;
      31      unsigned char bottom, top;
      32  };
      33  typedef struct dbcs_index decode_map;
      34  
      35  struct widedbcs_index {
      36      const Py_UCS4 *map;
      37      unsigned char bottom, top;
      38  };
      39  typedef struct widedbcs_index widedecode_map;
      40  
      41  struct unim_index {
      42      const DBCHAR *map;
      43      unsigned char bottom, top;
      44  };
      45  typedef struct unim_index encode_map;
      46  
      47  struct unim_index_bytebased {
      48      const unsigned char *map;
      49      unsigned char bottom, top;
      50  };
      51  
      52  struct dbcs_map {
      53      const char *charset;
      54      const struct unim_index *encmap;
      55      const struct dbcs_index *decmap;
      56  };
      57  
      58  struct pair_encodemap {
      59      Py_UCS4 uniseq;
      60      DBCHAR code;
      61  };
      62  
      63  #ifndef CJK_MOD_SPECIFIC_STATE
      64  #define CJK_MOD_SPECIFIC_STATE
      65  #endif
      66  
      67  typedef struct _cjk_mod_state {
      68      int num_mappings;
      69      int num_codecs;
      70      struct dbcs_map *mapping_list;
      71      MultibyteCodec *codec_list;
      72  
      73      CJK_MOD_SPECIFIC_STATE
      74  } cjkcodecs_module_state;
      75  
      76  static inline cjkcodecs_module_state *
      77  get_module_state(PyObject *mod)
      78  {
      79      void *state = PyModule_GetState(mod);
      80      assert(state != NULL);
      81      return (cjkcodecs_module_state *)state;
      82  }
      83  
      84  #define CODEC_INIT(encoding)                                            \
      85      static int encoding##_codec_init(const MultibyteCodec *codec)
      86  
      87  #define ENCODER_INIT(encoding)                                          \
      88      static int encoding##_encode_init(                                  \
      89          MultibyteCodec_State *state, const MultibyteCodec *codec)
      90  #define ENCODER(encoding)                                               \
      91      static Py_ssize_t encoding##_encode(                                \
      92          MultibyteCodec_State *state, const MultibyteCodec *codec,       \
      93          int kind, const void *data,                                     \
      94          Py_ssize_t *inpos, Py_ssize_t inlen,                            \
      95          unsigned char **outbuf, Py_ssize_t outleft, int flags)
      96  #define ENCODER_RESET(encoding)                                         \
      97      static Py_ssize_t encoding##_encode_reset(                          \
      98          MultibyteCodec_State *state, const MultibyteCodec *codec,       \
      99          unsigned char **outbuf, Py_ssize_t outleft)
     100  
     101  #define DECODER_INIT(encoding)                                          \
     102      static int encoding##_decode_init(                                  \
     103          MultibyteCodec_State *state, const MultibyteCodec *codec)
     104  #define DECODER(encoding)                                               \
     105      static Py_ssize_t encoding##_decode(                                \
     106          MultibyteCodec_State *state, const MultibyteCodec *codec,       \
     107          const unsigned char **inbuf, Py_ssize_t inleft,                 \
     108          _PyUnicodeWriter *writer)
     109  #define DECODER_RESET(encoding)                                         \
     110      static Py_ssize_t encoding##_decode_reset(                          \
     111          MultibyteCodec_State *state, const MultibyteCodec *codec)
     112  
     113  #define NEXT_IN(i)                              \
     114      do {                                        \
     115          (*inbuf) += (i);                        \
     116          (inleft) -= (i);                        \
     117      } while (0)
     118  #define NEXT_INCHAR(i)                          \
     119      do {                                        \
     120          (*inpos) += (i);                        \
     121      } while (0)
     122  #define NEXT_OUT(o)                             \
     123      do {                                        \
     124          (*outbuf) += (o);                       \
     125          (outleft) -= (o);                       \
     126      } while (0)
     127  #define NEXT(i, o)                              \
     128      do {                                        \
     129          NEXT_INCHAR(i);                         \
     130          NEXT_OUT(o);                            \
     131      } while (0)
     132  
     133  #define REQUIRE_INBUF(n)                        \
     134      do {                                        \
     135          if (inleft < (n))                       \
     136              return MBERR_TOOFEW;                \
     137      } while (0)
     138  
     139  #define REQUIRE_OUTBUF(n)                       \
     140      do {                                        \
     141          if (outleft < (n))                      \
     142              return MBERR_TOOSMALL;              \
     143      } while (0)
     144  
     145  #define INBYTE1 ((*inbuf)[0])
     146  #define INBYTE2 ((*inbuf)[1])
     147  #define INBYTE3 ((*inbuf)[2])
     148  #define INBYTE4 ((*inbuf)[3])
     149  
     150  #define INCHAR1 (PyUnicode_READ(kind, data, *inpos))
     151  #define INCHAR2 (PyUnicode_READ(kind, data, *inpos + 1))
     152  
     153  #define OUTCHAR(c)                                                         \
     154      do {                                                                   \
     155          if (_PyUnicodeWriter_WriteChar(writer, (c)) < 0)                   \
     156              return MBERR_EXCEPTION;                                         \
     157      } while (0)
     158  
     159  #define OUTCHAR2(c1, c2)                                                   \
     160      do {                                                                   \
     161          Py_UCS4 _c1 = (c1);                                                \
     162          Py_UCS4 _c2 = (c2);                                                \
     163          if (_PyUnicodeWriter_Prepare(writer, 2, Py_MAX(_c1, c2)) < 0)      \
     164              return MBERR_EXCEPTION;                                        \
     165          PyUnicode_WRITE(writer->kind, writer->data, writer->pos, _c1);     \
     166          PyUnicode_WRITE(writer->kind, writer->data, writer->pos + 1, _c2); \
     167          writer->pos += 2;                                                  \
     168      } while (0)
     169  
     170  #define OUTBYTEI(c, i)                     \
     171      do {                                   \
     172          assert((unsigned char)(c) == (c)); \
     173          ((*outbuf)[i]) = (c);              \
     174      } while (0)
     175  
     176  #define OUTBYTE1(c) OUTBYTEI(c, 0)
     177  #define OUTBYTE2(c) OUTBYTEI(c, 1)
     178  #define OUTBYTE3(c) OUTBYTEI(c, 2)
     179  #define OUTBYTE4(c) OUTBYTEI(c, 3)
     180  
     181  #define WRITEBYTE1(c1)              \
     182      do {                            \
     183          REQUIRE_OUTBUF(1);          \
     184          OUTBYTE1(c1);               \
     185      } while (0)
     186  #define WRITEBYTE2(c1, c2)          \
     187      do {                            \
     188          REQUIRE_OUTBUF(2);          \
     189          OUTBYTE1(c1);               \
     190          OUTBYTE2(c2);               \
     191      } while (0)
     192  #define WRITEBYTE3(c1, c2, c3)      \
     193      do {                            \
     194          REQUIRE_OUTBUF(3);          \
     195          OUTBYTE1(c1);               \
     196          OUTBYTE2(c2);               \
     197          OUTBYTE3(c3);               \
     198      } while (0)
     199  #define WRITEBYTE4(c1, c2, c3, c4)  \
     200      do {                            \
     201          REQUIRE_OUTBUF(4);          \
     202          OUTBYTE1(c1);               \
     203          OUTBYTE2(c2);               \
     204          OUTBYTE3(c3);               \
     205          OUTBYTE4(c4);               \
     206      } while (0)
     207  
     208  #define _TRYMAP_ENC(m, assi, val)                               \
     209      ((m)->map != NULL && (val) >= (m)->bottom &&                \
     210          (val)<= (m)->top && ((assi) = (m)->map[(val) -          \
     211          (m)->bottom]) != NOCHAR)
     212  #define TRYMAP_ENC(charset, assi, uni)                     \
     213      _TRYMAP_ENC(&charset##_encmap[(uni) >> 8], assi, (uni) & 0xff)
     214  #define TRYMAP_ENC_ST(charset, assi, uni) \
     215      _TRYMAP_ENC(&(codec->modstate->charset##_encmap)[(uni) >> 8], \
     216                  assi, (uni) & 0xff)
     217  
     218  #define _TRYMAP_DEC(m, assi, val)                             \
     219      ((m)->map != NULL &&                                        \
     220       (val) >= (m)->bottom &&                                    \
     221       (val)<= (m)->top &&                                        \
     222       ((assi) = (m)->map[(val) - (m)->bottom]) != UNIINV)
     223  #define TRYMAP_DEC(charset, assi, c1, c2)                     \
     224      _TRYMAP_DEC(&charset##_decmap[c1], assi, c2)
     225  #define TRYMAP_DEC_ST(charset, assi, c1, c2) \
     226      _TRYMAP_DEC(&(codec->modstate->charset##_decmap)[c1], assi, c2)
     227  
     228  #define BEGIN_MAPPINGS_LIST(NUM)                                    \
     229  static int                                                          \
     230  add_mappings(cjkcodecs_module_state *st)                            \
     231  {                                                                   \
     232      int idx = 0;                                                    \
     233      (void)idx;                                                      \
     234      st->num_mappings = NUM;                                         \
     235      st->mapping_list = PyMem_Calloc(NUM, sizeof(struct dbcs_map));  \
     236      if (st->mapping_list == NULL) {                                 \
     237          return -1;                                                  \
     238      }
     239  
     240  #define MAPPING_ENCONLY(enc) \
     241      st->mapping_list[idx++] = (struct dbcs_map){#enc, (void*)enc##_encmap, NULL};
     242  #define MAPPING_DECONLY(enc) \
     243      st->mapping_list[idx++] = (struct dbcs_map){#enc, NULL, (void*)enc##_decmap};
     244  #define MAPPING_ENCDEC(enc) \
     245      st->mapping_list[idx++] = (struct dbcs_map){#enc, (void*)enc##_encmap, (void*)enc##_decmap};
     246  
     247  #define END_MAPPINGS_LIST               \
     248      assert(st->num_mappings == idx);    \
     249      return 0;                           \
     250  }
     251  
     252  #define BEGIN_CODECS_LIST(NUM)                                  \
     253  static int                                                      \
     254  add_codecs(cjkcodecs_module_state *st)                          \
     255  {                                                               \
     256      int idx = 0;                                                \
     257      (void)idx;                                                  \
     258      st->num_codecs = NUM;                                       \
     259      st->codec_list = PyMem_Calloc(NUM, sizeof(MultibyteCodec)); \
     260      if (st->codec_list == NULL) {                               \
     261          return -1;                                              \
     262      }
     263  
     264  #define _STATEFUL_METHODS(enc)          \
     265      enc##_encode,                       \
     266      enc##_encode_init,                  \
     267      enc##_encode_reset,                 \
     268      enc##_decode,                       \
     269      enc##_decode_init,                  \
     270      enc##_decode_reset,
     271  #define _STATELESS_METHODS(enc)         \
     272      enc##_encode, NULL, NULL,           \
     273      enc##_decode, NULL, NULL,
     274  
     275  #define NEXT_CODEC \
     276      st->codec_list[idx++]
     277  
     278  #define CODEC_STATEFUL(enc) \
     279      NEXT_CODEC = (MultibyteCodec){#enc, NULL, NULL, _STATEFUL_METHODS(enc)};
     280  #define CODEC_STATELESS(enc) \
     281      NEXT_CODEC = (MultibyteCodec){#enc, NULL, NULL, _STATELESS_METHODS(enc)};
     282  #define CODEC_STATELESS_WINIT(enc) \
     283      NEXT_CODEC = (MultibyteCodec){#enc, NULL, enc##_codec_init, _STATELESS_METHODS(enc)};
     284  
     285  #define END_CODECS_LIST                         \
     286      assert(st->num_codecs == idx);              \
     287      for (int i = 0; i < st->num_codecs; i++) {  \
     288          st->codec_list[i].modstate = st;        \
     289      }                                           \
     290      return 0;                                   \
     291  }
     292  
     293  
     294  
     295  static PyObject *
     296  getmultibytecodec(void)
     297  {
     298      return _PyImport_GetModuleAttrString("_multibytecodec", "__create_codec");
     299  }
     300  
     301  static void
     302  destroy_codec_capsule(PyObject *capsule)
     303  {
     304      void *ptr = PyCapsule_GetPointer(capsule, CODEC_CAPSULE);
     305      codec_capsule *data = (codec_capsule *)ptr;
     306      Py_DECREF(data->cjk_module);
     307      PyMem_Free(ptr);
     308  }
     309  
     310  static codec_capsule *
     311  capsulate_codec(PyObject *mod, const MultibyteCodec *codec)
     312  {
     313      codec_capsule *data = PyMem_Malloc(sizeof(codec_capsule));
     314      if (data == NULL) {
     315          PyErr_NoMemory();
     316          return NULL;
     317      }
     318      data->codec = codec;
     319      data->cjk_module = Py_NewRef(mod);
     320      return data;
     321  }
     322  
     323  static PyObject *
     324  _getcodec(PyObject *self, const MultibyteCodec *codec)
     325  {
     326      PyObject *cofunc = getmultibytecodec();
     327      if (cofunc == NULL) {
     328          return NULL;
     329      }
     330  
     331      codec_capsule *data = capsulate_codec(self, codec);
     332      if (data == NULL) {
     333          Py_DECREF(cofunc);
     334          return NULL;
     335      }
     336      PyObject *codecobj = PyCapsule_New(data, CODEC_CAPSULE,
     337                                         destroy_codec_capsule);
     338      if (codecobj == NULL) {
     339          PyMem_Free(data);
     340          Py_DECREF(cofunc);
     341          return NULL;
     342      }
     343  
     344      PyObject *res = PyObject_CallOneArg(cofunc, codecobj);
     345      Py_DECREF(codecobj);
     346      Py_DECREF(cofunc);
     347      return res;
     348  }
     349  
     350  static PyObject *
     351  getcodec(PyObject *self, PyObject *encoding)
     352  {
     353      if (!PyUnicode_Check(encoding)) {
     354          PyErr_SetString(PyExc_TypeError,
     355                          "encoding name must be a string.");
     356          return NULL;
     357      }
     358      const char *enc = PyUnicode_AsUTF8(encoding);
     359      if (enc == NULL) {
     360          return NULL;
     361      }
     362  
     363      cjkcodecs_module_state *st = get_module_state(self);
     364      for (int i = 0; i < st->num_codecs; i++) {
     365          const MultibyteCodec *codec = &st->codec_list[i];
     366          if (strcmp(codec->encoding, enc) == 0) {
     367              return _getcodec(self, codec);
     368          }
     369      }
     370  
     371      PyErr_SetString(PyExc_LookupError,
     372                      "no such codec is supported.");
     373      return NULL;
     374  }
     375  
     376  static int add_mappings(cjkcodecs_module_state *);
     377  static int add_codecs(cjkcodecs_module_state *);
     378  
     379  static int
     380  register_maps(PyObject *module)
     381  {
     382      // Init module state.
     383      cjkcodecs_module_state *st = get_module_state(module);
     384      if (add_mappings(st) < 0) {
     385          return -1;
     386      }
     387      if (add_codecs(st) < 0) {
     388          return -1;
     389      }
     390  
     391      for (int i = 0; i < st->num_mappings; i++) {
     392          const struct dbcs_map *h = &st->mapping_list[i];
     393          char mhname[256] = "__map_";
     394          strcpy(mhname + sizeof("__map_") - 1, h->charset);
     395  
     396          PyObject *capsule = PyCapsule_New((void *)h, MAP_CAPSULE, NULL);
     397          if (capsule == NULL) {
     398              return -1;
     399          }
     400          if (PyModule_AddObject(module, mhname, capsule) < 0) {
     401              Py_DECREF(capsule);
     402              return -1;
     403          }
     404      }
     405      return 0;
     406  }
     407  
     408  #ifdef USING_BINARY_PAIR_SEARCH
     409  static DBCHAR
     410  find_pairencmap(ucs2_t body, ucs2_t modifier,
     411                  const struct pair_encodemap *haystack, int haystacksize)
     412  {
     413      int pos, min, max;
     414      Py_UCS4 value = body << 16 | modifier;
     415  
     416      min = 0;
     417      max = haystacksize;
     418  
     419      for (pos = haystacksize >> 1; min != max; pos = (min + max) >> 1) {
     420          if (value < haystack[pos].uniseq) {
     421              if (max != pos) {
     422                  max = pos;
     423                  continue;
     424              }
     425          }
     426          else if (value > haystack[pos].uniseq) {
     427              if (min != pos) {
     428                  min = pos;
     429                  continue;
     430              }
     431          }
     432          break;
     433      }
     434  
     435      if (value == haystack[pos].uniseq) {
     436          return haystack[pos].code;
     437      }
     438      return DBCINV;
     439  }
     440  #endif
     441  
     442  #ifdef USING_IMPORTED_MAPS
     443  #define IMPORT_MAP(locale, charset, encmap, decmap) \
     444      importmap("_codecs_" #locale, "__map_" #charset, \
     445                (const void**)encmap, (const void**)decmap)
     446  
     447  static int
     448  importmap(const char *modname, const char *symbol,
     449            const void **encmap, const void **decmap)
     450  {
     451      PyObject *o, *mod;
     452  
     453      mod = PyImport_ImportModule(modname);
     454      if (mod == NULL)
     455          return -1;
     456  
     457      o = PyObject_GetAttrString(mod, symbol);
     458      if (o == NULL)
     459          goto errorexit;
     460      else if (!PyCapsule_IsValid(o, MAP_CAPSULE)) {
     461          PyErr_SetString(PyExc_ValueError,
     462                          "map data must be a Capsule.");
     463          goto errorexit;
     464      }
     465      else {
     466          struct dbcs_map *map;
     467          map = PyCapsule_GetPointer(o, MAP_CAPSULE);
     468          if (encmap != NULL)
     469              *encmap = map->encmap;
     470          if (decmap != NULL)
     471              *decmap = map->decmap;
     472          Py_DECREF(o);
     473      }
     474  
     475      Py_DECREF(mod);
     476      return 0;
     477  
     478  errorexit:
     479      Py_DECREF(mod);
     480      return -1;
     481  }
     482  #endif
     483  
     484  static int
     485  _cjk_exec(PyObject *module)
     486  {
     487      return register_maps(module);
     488  }
     489  
     490  static void
     491  _cjk_free(void *mod)
     492  {
     493      cjkcodecs_module_state *st = get_module_state((PyObject *)mod);
     494      PyMem_Free(st->mapping_list);
     495      PyMem_Free(st->codec_list);
     496  }
     497  
     498  static struct PyMethodDef _cjk_methods[] = {
     499      {"getcodec", (PyCFunction)getcodec, METH_O, ""},
     500      {NULL, NULL},
     501  };
     502  
     503  static PyModuleDef_Slot _cjk_slots[] = {
     504      {Py_mod_exec, _cjk_exec},
     505      {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
     506      {0, NULL}
     507  };
     508  
     509  #define I_AM_A_MODULE_FOR(loc)                                          \
     510      static struct PyModuleDef _cjk_module = {                           \
     511          PyModuleDef_HEAD_INIT,                                          \
     512          .m_name = "_codecs_"#loc,                                       \
     513          .m_size = sizeof(cjkcodecs_module_state),                       \
     514          .m_methods = _cjk_methods,                                      \
     515          .m_slots = _cjk_slots,                                          \
     516          .m_free = _cjk_free,                                            \
     517      };                                                                  \
     518                                                                          \
     519      PyMODINIT_FUNC                                                      \
     520      PyInit__codecs_##loc(void)                                          \
     521      {                                                                   \
     522          return PyModuleDef_Init(&_cjk_module);                          \
     523      }
     524  
     525  #endif