(root)/
Python-3.11.7/
Python/
Python-tokenize.c
       1  #include "Python.h"
       2  #include "../Parser/tokenizer.h"
       3  
       4  static struct PyModuleDef _tokenizemodule;
       5  
       6  typedef struct {
       7      PyTypeObject *TokenizerIter;
       8  } tokenize_state;
       9  
      10  static tokenize_state *
      11  get_tokenize_state(PyObject *module) {
      12      return (tokenize_state *)PyModule_GetState(module);
      13  }
      14  
      15  #define _tokenize_get_state_by_type(type) \
      16      get_tokenize_state(PyType_GetModuleByDef(type, &_tokenizemodule))
      17  
      18  #include "clinic/Python-tokenize.c.h"
      19  
      20  /*[clinic input]
      21  module _tokenizer
      22  class _tokenizer.tokenizeriter "tokenizeriterobject *" "_tokenize_get_state_by_type(type)->TokenizerIter"
      23  [clinic start generated code]*/
      24  /*[clinic end generated code: output=da39a3ee5e6b4b0d input=96d98ee2fef7a8bc]*/
      25  
      26  typedef struct
      27  {
      28      PyObject_HEAD struct tok_state *tok;
      29  } tokenizeriterobject;
      30  
      31  /*[clinic input]
      32  @classmethod
      33  _tokenizer.tokenizeriter.__new__ as tokenizeriter_new
      34  
      35      source: str
      36  [clinic start generated code]*/
      37  
      38  static PyObject *
      39  tokenizeriter_new_impl(PyTypeObject *type, const char *source)
      40  /*[clinic end generated code: output=7fd9f46cf9263cbb input=4384b368407375c6]*/
      41  {
      42      tokenizeriterobject *self = (tokenizeriterobject *)type->tp_alloc(type, 0);
      43      if (self == NULL) {
      44          return NULL;
      45      }
      46      PyObject *filename = PyUnicode_FromString("<string>");
      47      if (filename == NULL) {
      48          return NULL;
      49      }
      50      self->tok = _PyTokenizer_FromUTF8(source, 1);
      51      if (self->tok == NULL) {
      52          Py_DECREF(filename);
      53          return NULL;
      54      }
      55      self->tok->filename = filename;
      56      return (PyObject *)self;
      57  }
      58  
      59  static PyObject *
      60  tokenizeriter_next(tokenizeriterobject *it)
      61  {
      62      const char *start;
      63      const char *end;
      64      int type = _PyTokenizer_Get(it->tok, &start, &end);
      65      if (type == ERRORTOKEN && PyErr_Occurred()) {
      66          return NULL;
      67      }
      68      if (type == ERRORTOKEN || type == ENDMARKER) {
      69          PyErr_SetString(PyExc_StopIteration, "EOF");
      70          return NULL;
      71      }
      72      PyObject *str = NULL;
      73      if (start == NULL || end == NULL) {
      74          str = PyUnicode_FromString("");
      75      }
      76      else {
      77          str = PyUnicode_FromStringAndSize(start, end - start);
      78      }
      79      if (str == NULL) {
      80          return NULL;
      81      }
      82  
      83      Py_ssize_t size = it->tok->inp - it->tok->buf;
      84      PyObject *line = PyUnicode_DecodeUTF8(it->tok->buf, size, "replace");
      85      if (line == NULL) {
      86          Py_DECREF(str);
      87          return NULL;
      88      }
      89      const char *line_start = type == STRING ? it->tok->multi_line_start : it->tok->line_start;
      90      int lineno = type == STRING ? it->tok->first_lineno : it->tok->lineno;
      91      int end_lineno = it->tok->lineno;
      92      int col_offset = -1;
      93      int end_col_offset = -1;
      94      if (start != NULL && start >= line_start) {
      95          col_offset = (int)(start - line_start);
      96      }
      97      if (end != NULL && end >= it->tok->line_start) {
      98          end_col_offset = (int)(end - it->tok->line_start);
      99      }
     100  
     101      return Py_BuildValue("(NiiiiiN)", str, type, lineno, end_lineno, col_offset, end_col_offset, line);
     102  }
     103  
     104  static void
     105  tokenizeriter_dealloc(tokenizeriterobject *it)
     106  {
     107      PyTypeObject *tp = Py_TYPE(it);
     108      _PyTokenizer_Free(it->tok);
     109      tp->tp_free(it);
     110      Py_DECREF(tp);
     111  }
     112  
     113  static PyType_Slot tokenizeriter_slots[] = {
     114      {Py_tp_new, tokenizeriter_new},
     115      {Py_tp_dealloc, tokenizeriter_dealloc},
     116      {Py_tp_getattro, PyObject_GenericGetAttr},
     117      {Py_tp_iter, PyObject_SelfIter},
     118      {Py_tp_iternext, tokenizeriter_next},
     119      {0, NULL},
     120  };
     121  
     122  static PyType_Spec tokenizeriter_spec = {
     123      .name = "_tokenize.TokenizerIter",
     124      .basicsize = sizeof(tokenizeriterobject),
     125      .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE),
     126      .slots = tokenizeriter_slots,
     127  };
     128  
     129  static int
     130  tokenizemodule_exec(PyObject *m)
     131  {
     132      tokenize_state *state = get_tokenize_state(m);
     133      if (state == NULL) {
     134          return -1;
     135      }
     136  
     137      state->TokenizerIter = (PyTypeObject *)PyType_FromModuleAndSpec(m, &tokenizeriter_spec, NULL);
     138      if (state->TokenizerIter == NULL) {
     139          return -1;
     140      }
     141      if (PyModule_AddType(m, state->TokenizerIter) < 0) {
     142          return -1;
     143      }
     144  
     145      return 0;
     146  }
     147  
     148  static PyMethodDef tokenize_methods[] = {
     149      {NULL, NULL, 0, NULL} /* Sentinel */
     150  };
     151  
     152  static PyModuleDef_Slot tokenizemodule_slots[] = {
     153      {Py_mod_exec, tokenizemodule_exec},
     154      {0, NULL}
     155  };
     156  
     157  static int
     158  tokenizemodule_traverse(PyObject *m, visitproc visit, void *arg)
     159  {
     160      tokenize_state *state = get_tokenize_state(m);
     161      Py_VISIT(state->TokenizerIter);
     162      return 0;
     163  }
     164  
     165  static int
     166  tokenizemodule_clear(PyObject *m)
     167  {
     168      tokenize_state *state = get_tokenize_state(m);
     169      Py_CLEAR(state->TokenizerIter);
     170      return 0;
     171  }
     172  
     173  static void
     174  tokenizemodule_free(void *m)
     175  {
     176      tokenizemodule_clear((PyObject *)m);
     177  }
     178  
     179  static struct PyModuleDef _tokenizemodule = {
     180      PyModuleDef_HEAD_INIT,
     181      .m_name = "_tokenize",
     182      .m_size = sizeof(tokenize_state),
     183      .m_slots = tokenizemodule_slots,
     184      .m_methods = tokenize_methods,
     185      .m_traverse = tokenizemodule_traverse,
     186      .m_clear = tokenizemodule_clear,
     187      .m_free = tokenizemodule_free,
     188  };
     189  
     190  PyMODINIT_FUNC
     191  PyInit__tokenize(void)
     192  {
     193      return PyModuleDef_Init(&_tokenizemodule);
     194  }