1  #include <Python.h>
       2  #include "pycore_ast.h"           // _PyAST_Validate(),
       3  #include "pycore_pystate.h"       // _PyThreadState_GET()
       4  #include <errcode.h>
       5  
       6  #include "tokenizer.h"
       7  #include "pegen.h"
       8  
       9  // Internal parser functions
      10  
      11  asdl_stmt_seq*
      12  _PyPegen_interactive_exit(Parser *p)
      13  {
      14      if (p->errcode) {
      15          *(p->errcode) = E_EOF;
      16      }
      17      return NULL;
      18  }
      19  
      20  Py_ssize_t
      21  _PyPegen_byte_offset_to_character_offset(PyObject *line, Py_ssize_t col_offset)
      22  {
      23      const char *str = PyUnicode_AsUTF8(line);
      24      if (!str) {
      25          return -1;
      26      }
      27      Py_ssize_t len = strlen(str);
      28      if (col_offset > len + 1) {
      29          col_offset = len + 1;
      30      }
      31      assert(col_offset >= 0);
      32      PyObject *text = PyUnicode_DecodeUTF8(str, col_offset, "replace");
      33      if (!text) {
      34          return -1;
      35      }
      36      Py_ssize_t size = PyUnicode_GET_LENGTH(text);
      37      Py_DECREF(text);
      38      return size;
      39  }
      40  
      41  // Here, mark is the start of the node, while p->mark is the end.
      42  // If node==NULL, they should be the same.
      43  int
      44  _PyPegen_insert_memo(Parser *p, int mark, int type, void *node)
      45  {
      46      // Insert in front
      47      Memo *m = _PyArena_Malloc(p->arena, sizeof(Memo));
      48      if (m == NULL) {
      49          return -1;
      50      }
      51      m->type = type;
      52      m->node = node;
      53      m->mark = p->mark;
      54      m->next = p->tokens[mark]->memo;
      55      p->tokens[mark]->memo = m;
      56      return 0;
      57  }
      58  
      59  // Like _PyPegen_insert_memo(), but updates an existing node if found.
      60  int
      61  _PyPegen_update_memo(Parser *p, int mark, int type, void *node)
      62  {
      63      for (Memo *m = p->tokens[mark]->memo; m != NULL; m = m->next) {
      64          if (m->type == type) {
      65              // Update existing node.
      66              m->node = node;
      67              m->mark = p->mark;
      68              return 0;
      69          }
      70      }
      71      // Insert new node.
      72      return _PyPegen_insert_memo(p, mark, type, node);
      73  }
      74  
      75  static int
      76  init_normalization(Parser *p)
      77  {
      78      if (p->normalize) {
      79          return 1;
      80      }
      81      p->normalize = _PyImport_GetModuleAttrString("unicodedata", "normalize");
      82      if (!p->normalize)
      83      {
      84          return 0;
      85      }
      86      return 1;
      87  }
      88  
      89  static int
      90  growable_comment_array_init(growable_comment_array *arr, size_t initial_size) {
      91      assert(initial_size > 0);
      92      arr->items = PyMem_Malloc(initial_size * sizeof(*arr->items));
      93      arr->size = initial_size;
      94      arr->num_items = 0;
      95  
      96      return arr->items != NULL;
      97  }
      98  
      99  static int
     100  growable_comment_array_add(growable_comment_array *arr, int lineno, char *comment) {
     101      if (arr->num_items >= arr->size) {
     102          size_t new_size = arr->size * 2;
     103          void *new_items_array = PyMem_Realloc(arr->items, new_size * sizeof(*arr->items));
     104          if (!new_items_array) {
     105              return 0;
     106          }
     107          arr->items = new_items_array;
     108          arr->size = new_size;
     109      }
     110  
     111      arr->items[arr->num_items].lineno = lineno;
     112      arr->items[arr->num_items].comment = comment;  // Take ownership
     113      arr->num_items++;
     114      return 1;
     115  }
     116  
     117  static void
     118  growable_comment_array_deallocate(growable_comment_array *arr) {
     119      for (unsigned i = 0; i < arr->num_items; i++) {
     120          PyMem_Free(arr->items[i].comment);
     121      }
     122      PyMem_Free(arr->items);
     123  }
     124  
     125  static int
     126  _get_keyword_or_name_type(Parser *p, struct token *new_token)
     127  {
     128      int name_len = new_token->end_col_offset - new_token->col_offset;
     129      assert(name_len > 0);
     130  
     131      if (name_len >= p->n_keyword_lists ||
     132          p->keywords[name_len] == NULL ||
     133          p->keywords[name_len]->type == -1) {
     134          return NAME;
     135      }
     136      for (KeywordToken *k = p->keywords[name_len]; k != NULL && k->type != -1; k++) {
     137          if (strncmp(k->str, new_token->start, name_len) == 0) {
     138              return k->type;
     139          }
     140      }
     141      return NAME;
     142  }
     143  
     144  static int
     145  initialize_token(Parser *p, Token *parser_token, struct token *new_token, int token_type) {
     146      assert(parser_token != NULL);
     147  
     148      parser_token->type = (token_type == NAME) ? _get_keyword_or_name_type(p, new_token) : token_type;
     149      parser_token->bytes = PyBytes_FromStringAndSize(new_token->start, new_token->end - new_token->start);
     150      if (parser_token->bytes == NULL) {
     151          return -1;
     152      }
     153      if (_PyArena_AddPyObject(p->arena, parser_token->bytes) < 0) {
     154          Py_DECREF(parser_token->bytes);
     155          return -1;
     156      }
     157  
     158      parser_token->metadata = NULL;
     159      if (new_token->metadata != NULL) {
     160          if (_PyArena_AddPyObject(p->arena, new_token->metadata) < 0) {
     161              Py_DECREF(parser_token->metadata);
     162              return -1;
     163          }
     164          parser_token->metadata = new_token->metadata;
     165          new_token->metadata = NULL;
     166      }
     167  
     168      parser_token->level = new_token->level;
     169      parser_token->lineno = new_token->lineno;
     170      parser_token->col_offset = p->tok->lineno == p->starting_lineno ? p->starting_col_offset + new_token->col_offset
     171                                                                      : new_token->col_offset;
     172      parser_token->end_lineno = new_token->end_lineno;
     173      parser_token->end_col_offset = p->tok->lineno == p->starting_lineno ? p->starting_col_offset + new_token->end_col_offset
     174                                                                   : new_token->end_col_offset;
     175  
     176      p->fill += 1;
     177  
     178      if (token_type == ERRORTOKEN && p->tok->done == E_DECODE) {
     179          return _Pypegen_raise_decode_error(p);
     180      }
     181  
     182      return (token_type == ERRORTOKEN ? _Pypegen_tokenizer_error(p) : 0);
     183  }
     184  
     185  static int
     186  _resize_tokens_array(Parser *p) {
     187      int newsize = p->size * 2;
     188      Token **new_tokens = PyMem_Realloc(p->tokens, newsize * sizeof(Token *));
     189      if (new_tokens == NULL) {
     190          PyErr_NoMemory();
     191          return -1;
     192      }
     193      p->tokens = new_tokens;
     194  
     195      for (int i = p->size; i < newsize; i++) {
     196          p->tokens[i] = PyMem_Calloc(1, sizeof(Token));
     197          if (p->tokens[i] == NULL) {
     198              p->size = i; // Needed, in order to cleanup correctly after parser fails
     199              PyErr_NoMemory();
     200              return -1;
     201          }
     202      }
     203      p->size = newsize;
     204      return 0;
     205  }
     206  
     207  int
     208  _PyPegen_fill_token(Parser *p)
     209  {
     210      struct token new_token;
     211      _PyToken_Init(&new_token);
     212      int type = _PyTokenizer_Get(p->tok, &new_token);
     213  
     214      // Record and skip '# type: ignore' comments
     215      while (type == TYPE_IGNORE) {
     216          Py_ssize_t len = new_token.end_col_offset - new_token.col_offset;
     217          char *tag = PyMem_Malloc(len + 1);
     218          if (tag == NULL) {
     219              PyErr_NoMemory();
     220              goto error;
     221          }
     222          strncpy(tag, new_token.start, len);
     223          tag[len] = '\0';
     224          // Ownership of tag passes to the growable array
     225          if (!growable_comment_array_add(&p->type_ignore_comments, p->tok->lineno, tag)) {
     226              PyErr_NoMemory();
     227              goto error;
     228          }
     229          type = _PyTokenizer_Get(p->tok, &new_token);
     230      }
     231  
     232      // If we have reached the end and we are in single input mode we need to insert a newline and reset the parsing
     233      if (p->start_rule == Py_single_input && type == ENDMARKER && p->parsing_started) {
     234          type = NEWLINE; /* Add an extra newline */
     235          p->parsing_started = 0;
     236  
     237          if (p->tok->indent && !(p->flags & PyPARSE_DONT_IMPLY_DEDENT)) {
     238              p->tok->pendin = -p->tok->indent;
     239              p->tok->indent = 0;
     240          }
     241      }
     242      else {
     243          p->parsing_started = 1;
     244      }
     245  
     246      // Check if we are at the limit of the token array capacity and resize if needed
     247      if ((p->fill == p->size) && (_resize_tokens_array(p) != 0)) {
     248          goto error;
     249      }
     250  
     251      Token *t = p->tokens[p->fill];
     252      return initialize_token(p, t, &new_token, type);
     253  error:
     254      _PyToken_Free(&new_token);
     255      return -1;
     256  }
     257  
     258  #if defined(Py_DEBUG)
     259  // Instrumentation to count the effectiveness of memoization.
     260  // The array counts the number of tokens skipped by memoization,
     261  // indexed by type.
     262  
     263  #define NSTATISTICS _PYPEGEN_NSTATISTICS
     264  #define memo_statistics _PyRuntime.parser.memo_statistics
     265  
     266  void
     267  _PyPegen_clear_memo_statistics(void)
     268  {
     269      for (int i = 0; i < NSTATISTICS; i++) {
     270          memo_statistics[i] = 0;
     271      }
     272  }
     273  
     274  PyObject *
     275  _PyPegen_get_memo_statistics(void)
     276  {
     277      PyObject *ret = PyList_New(NSTATISTICS);
     278      if (ret == NULL) {
     279          return NULL;
     280      }
     281      for (int i = 0; i < NSTATISTICS; i++) {
     282          PyObject *value = PyLong_FromLong(memo_statistics[i]);
     283          if (value == NULL) {
     284              Py_DECREF(ret);
     285              return NULL;
     286          }
     287          // PyList_SetItem borrows a reference to value.
     288          if (PyList_SetItem(ret, i, value) < 0) {
     289              Py_DECREF(ret);
     290              return NULL;
     291          }
     292      }
     293      return ret;
     294  }
     295  #endif
     296  
     297  int  // bool
     298  _PyPegen_is_memoized(Parser *p, int type, void *pres)
     299  {
     300      if (p->mark == p->fill) {
     301          if (_PyPegen_fill_token(p) < 0) {
     302              p->error_indicator = 1;
     303              return -1;
     304          }
     305      }
     306  
     307      Token *t = p->tokens[p->mark];
     308  
     309      for (Memo *m = t->memo; m != NULL; m = m->next) {
     310          if (m->type == type) {
     311  #if defined(PY_DEBUG)
     312              if (0 <= type && type < NSTATISTICS) {
     313                  long count = m->mark - p->mark;
     314                  // A memoized negative result counts for one.
     315                  if (count <= 0) {
     316                      count = 1;
     317                  }
     318                  memo_statistics[type] += count;
     319              }
     320  #endif
     321              p->mark = m->mark;
     322              *(void **)(pres) = m->node;
     323              return 1;
     324          }
     325      }
     326      return 0;
     327  }
     328  
     329  int
     330  _PyPegen_lookahead_with_name(int positive, expr_ty (func)(Parser *), Parser *p)
     331  {
     332      int mark = p->mark;
     333      void *res = func(p);
     334      p->mark = mark;
     335      return (res != NULL) == positive;
     336  }
     337  
     338  int
     339  _PyPegen_lookahead_with_string(int positive, expr_ty (func)(Parser *, const char*), Parser *p, const char* arg)
     340  {
     341      int mark = p->mark;
     342      void *res = func(p, arg);
     343      p->mark = mark;
     344      return (res != NULL) == positive;
     345  }
     346  
     347  int
     348  _PyPegen_lookahead_with_int(int positive, Token *(func)(Parser *, int), Parser *p, int arg)
     349  {
     350      int mark = p->mark;
     351      void *res = func(p, arg);
     352      p->mark = mark;
     353      return (res != NULL) == positive;
     354  }
     355  
     356  int
     357  _PyPegen_lookahead(int positive, void *(func)(Parser *), Parser *p)
     358  {
     359      int mark = p->mark;
     360      void *res = (void*)func(p);
     361      p->mark = mark;
     362      return (res != NULL) == positive;
     363  }
     364  
     365  Token *
     366  _PyPegen_expect_token(Parser *p, int type)
     367  {
     368      if (p->mark == p->fill) {
     369          if (_PyPegen_fill_token(p) < 0) {
     370              p->error_indicator = 1;
     371              return NULL;
     372          }
     373      }
     374      Token *t = p->tokens[p->mark];
     375      if (t->type != type) {
     376         return NULL;
     377      }
     378      p->mark += 1;
     379      return t;
     380  }
     381  
     382  void*
     383  _PyPegen_expect_forced_result(Parser *p, void* result, const char* expected) {
     384  
     385      if (p->error_indicator == 1) {
     386          return NULL;
     387      }
     388      if (result == NULL) {
     389          RAISE_SYNTAX_ERROR("expected (%s)", expected);
     390          return NULL;
     391      }
     392      return result;
     393  }
     394  
     395  Token *
     396  _PyPegen_expect_forced_token(Parser *p, int type, const char* expected) {
     397  
     398      if (p->error_indicator == 1) {
     399          return NULL;
     400      }
     401  
     402      if (p->mark == p->fill) {
     403          if (_PyPegen_fill_token(p) < 0) {
     404              p->error_indicator = 1;
     405              return NULL;
     406          }
     407      }
     408      Token *t = p->tokens[p->mark];
     409      if (t->type != type) {
     410          RAISE_SYNTAX_ERROR_KNOWN_LOCATION(t, "expected '%s'", expected);
     411          return NULL;
     412      }
     413      p->mark += 1;
     414      return t;
     415  }
     416  
     417  expr_ty
     418  _PyPegen_expect_soft_keyword(Parser *p, const char *keyword)
     419  {
     420      if (p->mark == p->fill) {
     421          if (_PyPegen_fill_token(p) < 0) {
     422              p->error_indicator = 1;
     423              return NULL;
     424          }
     425      }
     426      Token *t = p->tokens[p->mark];
     427      if (t->type != NAME) {
     428          return NULL;
     429      }
     430      const char *s = PyBytes_AsString(t->bytes);
     431      if (!s) {
     432          p->error_indicator = 1;
     433          return NULL;
     434      }
     435      if (strcmp(s, keyword) != 0) {
     436          return NULL;
     437      }
     438      return _PyPegen_name_token(p);
     439  }
     440  
     441  Token *
     442  _PyPegen_get_last_nonnwhitespace_token(Parser *p)
     443  {
     444      assert(p->mark >= 0);
     445      Token *token = NULL;
     446      for (int m = p->mark - 1; m >= 0; m--) {
     447          token = p->tokens[m];
     448          if (token->type != ENDMARKER && (token->type < NEWLINE || token->type > DEDENT)) {
     449              break;
     450          }
     451      }
     452      return token;
     453  }
     454  
     455  PyObject *
     456  _PyPegen_new_identifier(Parser *p, const char *n)
     457  {
     458      PyObject *id = PyUnicode_DecodeUTF8(n, strlen(n), NULL);
     459      if (!id) {
     460          goto error;
     461      }
     462      /* PyUnicode_DecodeUTF8 should always return a ready string. */
     463      assert(PyUnicode_IS_READY(id));
     464      /* Check whether there are non-ASCII characters in the
     465         identifier; if so, normalize to NFKC. */
     466      if (!PyUnicode_IS_ASCII(id))
     467      {
     468          PyObject *id2;
     469          if (!init_normalization(p))
     470          {
     471              Py_DECREF(id);
     472              goto error;
     473          }
     474          PyObject *form = PyUnicode_InternFromString("NFKC");
     475          if (form == NULL)
     476          {
     477              Py_DECREF(id);
     478              goto error;
     479          }
     480          PyObject *args[2] = {form, id};
     481          id2 = _PyObject_FastCall(p->normalize, args, 2);
     482          Py_DECREF(id);
     483          Py_DECREF(form);
     484          if (!id2) {
     485              goto error;
     486          }
     487          if (!PyUnicode_Check(id2))
     488          {
     489              PyErr_Format(PyExc_TypeError,
     490                           "unicodedata.normalize() must return a string, not "
     491                           "%.200s",
     492                           _PyType_Name(Py_TYPE(id2)));
     493              Py_DECREF(id2);
     494              goto error;
     495          }
     496          id = id2;
     497      }
     498      PyUnicode_InternInPlace(&id);
     499      if (_PyArena_AddPyObject(p->arena, id) < 0)
     500      {
     501          Py_DECREF(id);
     502          goto error;
     503      }
     504      return id;
     505  
     506  error:
     507      p->error_indicator = 1;
     508      return NULL;
     509  }
     510  
     511  static expr_ty
     512  _PyPegen_name_from_token(Parser *p, Token* t)
     513  {
     514      if (t == NULL) {
     515          return NULL;
     516      }
     517      const char *s = PyBytes_AsString(t->bytes);
     518      if (!s) {
     519          p->error_indicator = 1;
     520          return NULL;
     521      }
     522      PyObject *id = _PyPegen_new_identifier(p, s);
     523      if (id == NULL) {
     524          p->error_indicator = 1;
     525          return NULL;
     526      }
     527      return _PyAST_Name(id, Load, t->lineno, t->col_offset, t->end_lineno,
     528                         t->end_col_offset, p->arena);
     529  }
     530  
     531  expr_ty
     532  _PyPegen_name_token(Parser *p)
     533  {
     534      Token *t = _PyPegen_expect_token(p, NAME);
     535      return _PyPegen_name_from_token(p, t);
     536  }
     537  
     538  void *
     539  _PyPegen_string_token(Parser *p)
     540  {
     541      return _PyPegen_expect_token(p, STRING);
     542  }
     543  
     544  expr_ty _PyPegen_soft_keyword_token(Parser *p) {
     545      Token *t = _PyPegen_expect_token(p, NAME);
     546      if (t == NULL) {
     547          return NULL;
     548      }
     549      char *the_token;
     550      Py_ssize_t size;
     551      PyBytes_AsStringAndSize(t->bytes, &the_token, &size);
     552      for (char **keyword = p->soft_keywords; *keyword != NULL; keyword++) {
     553          if (strncmp(*keyword, the_token, size) == 0) {
     554              return _PyPegen_name_from_token(p, t);
     555          }
     556      }
     557      return NULL;
     558  }
     559  
     560  static PyObject *
     561  parsenumber_raw(const char *s)
     562  {
     563      const char *end;
     564      long x;
     565      double dx;
     566      Py_complex compl;
     567      int imflag;
     568  
     569      assert(s != NULL);
     570      errno = 0;
     571      end = s + strlen(s) - 1;
     572      imflag = *end == 'j' || *end == 'J';
     573      if (s[0] == '0') {
     574          x = (long)PyOS_strtoul(s, (char **)&end, 0);
     575          if (x < 0 && errno == 0) {
     576              return PyLong_FromString(s, (char **)0, 0);
     577          }
     578      }
     579      else {
     580          x = PyOS_strtol(s, (char **)&end, 0);
     581      }
     582      if (*end == '\0') {
     583          if (errno != 0) {
     584              return PyLong_FromString(s, (char **)0, 0);
     585          }
     586          return PyLong_FromLong(x);
     587      }
     588      /* XXX Huge floats may silently fail */
     589      if (imflag) {
     590          compl.real = 0.;
     591          compl.imag = PyOS_string_to_double(s, (char **)&end, NULL);
     592          if (compl.imag == -1.0 && PyErr_Occurred()) {
     593              return NULL;
     594          }
     595          return PyComplex_FromCComplex(compl);
     596      }
     597      dx = PyOS_string_to_double(s, NULL, NULL);
     598      if (dx == -1.0 && PyErr_Occurred()) {
     599          return NULL;
     600      }
     601      return PyFloat_FromDouble(dx);
     602  }
     603  
     604  static PyObject *
     605  parsenumber(const char *s)
     606  {
     607      char *dup;
     608      char *end;
     609      PyObject *res = NULL;
     610  
     611      assert(s != NULL);
     612  
     613      if (strchr(s, '_') == NULL) {
     614          return parsenumber_raw(s);
     615      }
     616      /* Create a duplicate without underscores. */
     617      dup = PyMem_Malloc(strlen(s) + 1);
     618      if (dup == NULL) {
     619          return PyErr_NoMemory();
     620      }
     621      end = dup;
     622      for (; *s; s++) {
     623          if (*s != '_') {
     624              *end++ = *s;
     625          }
     626      }
     627      *end = '\0';
     628      res = parsenumber_raw(dup);
     629      PyMem_Free(dup);
     630      return res;
     631  }
     632  
     633  expr_ty
     634  _PyPegen_number_token(Parser *p)
     635  {
     636      Token *t = _PyPegen_expect_token(p, NUMBER);
     637      if (t == NULL) {
     638          return NULL;
     639      }
     640  
     641      const char *num_raw = PyBytes_AsString(t->bytes);
     642      if (num_raw == NULL) {
     643          p->error_indicator = 1;
     644          return NULL;
     645      }
     646  
     647      if (p->feature_version < 6 && strchr(num_raw, '_') != NULL) {
     648          p->error_indicator = 1;
     649          return RAISE_SYNTAX_ERROR("Underscores in numeric literals are only supported "
     650                                    "in Python 3.6 and greater");
     651      }
     652  
     653      PyObject *c = parsenumber(num_raw);
     654  
     655      if (c == NULL) {
     656          p->error_indicator = 1;
     657          PyThreadState *tstate = _PyThreadState_GET();
     658          // The only way a ValueError should happen in _this_ code is via
     659          // PyLong_FromString hitting a length limit.
     660          if (tstate->current_exception != NULL &&
     661              Py_TYPE(tstate->current_exception) == (PyTypeObject *)PyExc_ValueError
     662          ) {
     663              PyObject *exc = PyErr_GetRaisedException();
     664              /* Intentionally omitting columns to avoid a wall of 1000s of '^'s
     665               * on the error message. Nobody is going to overlook their huge
     666               * numeric literal once given the line. */
     667              RAISE_ERROR_KNOWN_LOCATION(
     668                  p, PyExc_SyntaxError,
     669                  t->lineno, -1 /* col_offset */,
     670                  t->end_lineno, -1 /* end_col_offset */,
     671                  "%S - Consider hexadecimal for huge integer literals "
     672                  "to avoid decimal conversion limits.",
     673                  exc);
     674              Py_DECREF(exc);
     675          }
     676          return NULL;
     677      }
     678  
     679      if (_PyArena_AddPyObject(p->arena, c) < 0) {
     680          Py_DECREF(c);
     681          p->error_indicator = 1;
     682          return NULL;
     683      }
     684  
     685      return _PyAST_Constant(c, NULL, t->lineno, t->col_offset, t->end_lineno,
     686                             t->end_col_offset, p->arena);
     687  }
     688  
     689  /* Check that the source for a single input statement really is a single
     690     statement by looking at what is left in the buffer after parsing.
     691     Trailing whitespace and comments are OK. */
     692  static int // bool
     693  bad_single_statement(Parser *p)
     694  {
     695      char *cur = p->tok->cur;
     696      char c = *cur;
     697  
     698      for (;;) {
     699          while (c == ' ' || c == '\t' || c == '\n' || c == '\014') {
     700              c = *++cur;
     701          }
     702  
     703          if (!c) {
     704              return 0;
     705          }
     706  
     707          if (c != '#') {
     708              return 1;
     709          }
     710  
     711          /* Suck up comment. */
     712          while (c && c != '\n') {
     713              c = *++cur;
     714          }
     715      }
     716  }
     717  
     718  static int
     719  compute_parser_flags(PyCompilerFlags *flags)
     720  {
     721      int parser_flags = 0;
     722      if (!flags) {
     723          return 0;
     724      }
     725      if (flags->cf_flags & PyCF_DONT_IMPLY_DEDENT) {
     726          parser_flags |= PyPARSE_DONT_IMPLY_DEDENT;
     727      }
     728      if (flags->cf_flags & PyCF_IGNORE_COOKIE) {
     729          parser_flags |= PyPARSE_IGNORE_COOKIE;
     730      }
     731      if (flags->cf_flags & CO_FUTURE_BARRY_AS_BDFL) {
     732          parser_flags |= PyPARSE_BARRY_AS_BDFL;
     733      }
     734      if (flags->cf_flags & PyCF_TYPE_COMMENTS) {
     735          parser_flags |= PyPARSE_TYPE_COMMENTS;
     736      }
     737      if ((flags->cf_flags & PyCF_ONLY_AST) && flags->cf_feature_version < 7) {
     738          parser_flags |= PyPARSE_ASYNC_HACKS;
     739      }
     740      if (flags->cf_flags & PyCF_ALLOW_INCOMPLETE_INPUT) {
     741          parser_flags |= PyPARSE_ALLOW_INCOMPLETE_INPUT;
     742      }
     743      return parser_flags;
     744  }
     745  
     746  // Parser API
     747  
     748  Parser *
     749  _PyPegen_Parser_New(struct tok_state *tok, int start_rule, int flags,
     750                      int feature_version, int *errcode, PyArena *arena)
     751  {
     752      Parser *p = PyMem_Malloc(sizeof(Parser));
     753      if (p == NULL) {
     754          return (Parser *) PyErr_NoMemory();
     755      }
     756      assert(tok != NULL);
     757      tok->type_comments = (flags & PyPARSE_TYPE_COMMENTS) > 0;
     758      tok->async_hacks = (flags & PyPARSE_ASYNC_HACKS) > 0;
     759      p->tok = tok;
     760      p->keywords = NULL;
     761      p->n_keyword_lists = -1;
     762      p->soft_keywords = NULL;
     763      p->tokens = PyMem_Malloc(sizeof(Token *));
     764      if (!p->tokens) {
     765          PyMem_Free(p);
     766          return (Parser *) PyErr_NoMemory();
     767      }
     768      p->tokens[0] = PyMem_Calloc(1, sizeof(Token));
     769      if (!p->tokens[0]) {
     770          PyMem_Free(p->tokens);
     771          PyMem_Free(p);
     772          return (Parser *) PyErr_NoMemory();
     773      }
     774      if (!growable_comment_array_init(&p->type_ignore_comments, 10)) {
     775          PyMem_Free(p->tokens[0]);
     776          PyMem_Free(p->tokens);
     777          PyMem_Free(p);
     778          return (Parser *) PyErr_NoMemory();
     779      }
     780  
     781      p->mark = 0;
     782      p->fill = 0;
     783      p->size = 1;
     784  
     785      p->errcode = errcode;
     786      p->arena = arena;
     787      p->start_rule = start_rule;
     788      p->parsing_started = 0;
     789      p->normalize = NULL;
     790      p->error_indicator = 0;
     791  
     792      p->starting_lineno = 0;
     793      p->starting_col_offset = 0;
     794      p->flags = flags;
     795      p->feature_version = feature_version;
     796      p->known_err_token = NULL;
     797      p->level = 0;
     798      p->call_invalid_rules = 0;
     799  #ifdef Py_DEBUG
     800      p->debug = _Py_GetConfig()->parser_debug;
     801  #endif
     802      return p;
     803  }
     804  
     805  void
     806  _PyPegen_Parser_Free(Parser *p)
     807  {
     808      Py_XDECREF(p->normalize);
     809      for (int i = 0; i < p->size; i++) {
     810          PyMem_Free(p->tokens[i]);
     811      }
     812      PyMem_Free(p->tokens);
     813      growable_comment_array_deallocate(&p->type_ignore_comments);
     814      PyMem_Free(p);
     815  }
     816  
     817  static void
     818  reset_parser_state_for_error_pass(Parser *p)
     819  {
     820      for (int i = 0; i < p->fill; i++) {
     821          p->tokens[i]->memo = NULL;
     822      }
     823      p->mark = 0;
     824      p->call_invalid_rules = 1;
     825      // Don't try to get extra tokens in interactive mode when trying to
     826      // raise specialized errors in the second pass.
     827      p->tok->interactive_underflow = IUNDERFLOW_STOP;
     828  }
     829  
     830  static inline int
     831  _is_end_of_source(Parser *p) {
     832      int err = p->tok->done;
     833      return err == E_EOF || err == E_EOFS || err == E_EOLS;
     834  }
     835  
     836  void *
     837  _PyPegen_run_parser(Parser *p)
     838  {
     839      void *res = _PyPegen_parse(p);
     840      assert(p->level == 0);
     841      if (res == NULL) {
     842          if ((p->flags & PyPARSE_ALLOW_INCOMPLETE_INPUT) &&  _is_end_of_source(p)) {
     843              PyErr_Clear();
     844              return RAISE_SYNTAX_ERROR("incomplete input");
     845          }
     846          if (PyErr_Occurred() && !PyErr_ExceptionMatches(PyExc_SyntaxError)) {
     847              return NULL;
     848          }
     849         // Make a second parser pass. In this pass we activate heavier and slower checks
     850          // to produce better error messages and more complete diagnostics. Extra "invalid_*"
     851          // rules will be active during parsing.
     852          Token *last_token = p->tokens[p->fill - 1];
     853          reset_parser_state_for_error_pass(p);
     854          _PyPegen_parse(p);
     855  
     856          // Set SyntaxErrors accordingly depending on the parser/tokenizer status at the failure
     857          // point.
     858          _Pypegen_set_syntax_error(p, last_token);
     859         return NULL;
     860      }
     861  
     862      if (p->start_rule == Py_single_input && bad_single_statement(p)) {
     863          p->tok->done = E_BADSINGLE; // This is not necessary for now, but might be in the future
     864          return RAISE_SYNTAX_ERROR("multiple statements found while compiling a single statement");
     865      }
     866  
     867      // test_peg_generator defines _Py_TEST_PEGEN to not call PyAST_Validate()
     868  #if defined(Py_DEBUG) && !defined(_Py_TEST_PEGEN)
     869      if (p->start_rule == Py_single_input ||
     870          p->start_rule == Py_file_input ||
     871          p->start_rule == Py_eval_input)
     872      {
     873          if (!_PyAST_Validate(res)) {
     874              return NULL;
     875          }
     876      }
     877  #endif
     878      return res;
     879  }
     880  
     881  mod_ty
     882  _PyPegen_run_parser_from_file_pointer(FILE *fp, int start_rule, PyObject *filename_ob,
     883                               const char *enc, const char *ps1, const char *ps2,
     884                               PyCompilerFlags *flags, int *errcode, PyArena *arena)
     885  {
     886      struct tok_state *tok = _PyTokenizer_FromFile(fp, enc, ps1, ps2);
     887      if (tok == NULL) {
     888          if (PyErr_Occurred()) {
     889              _PyPegen_raise_tokenizer_init_error(filename_ob);
     890              return NULL;
     891          }
     892          return NULL;
     893      }
     894      if (!tok->fp || ps1 != NULL || ps2 != NULL ||
     895          PyUnicode_CompareWithASCIIString(filename_ob, "<stdin>") == 0) {
     896          tok->fp_interactive = 1;
     897      }
     898      // This transfers the ownership to the tokenizer
     899      tok->filename = Py_NewRef(filename_ob);
     900  
     901      // From here on we need to clean up even if there's an error
     902      mod_ty result = NULL;
     903  
     904      int parser_flags = compute_parser_flags(flags);
     905      Parser *p = _PyPegen_Parser_New(tok, start_rule, parser_flags, PY_MINOR_VERSION,
     906                                      errcode, arena);
     907      if (p == NULL) {
     908          goto error;
     909      }
     910  
     911      result = _PyPegen_run_parser(p);
     912      _PyPegen_Parser_Free(p);
     913  
     914  error:
     915      _PyTokenizer_Free(tok);
     916      return result;
     917  }
     918  
     919  mod_ty
     920  _PyPegen_run_parser_from_string(const char *str, int start_rule, PyObject *filename_ob,
     921                         PyCompilerFlags *flags, PyArena *arena)
     922  {
     923      int exec_input = start_rule == Py_file_input;
     924  
     925      struct tok_state *tok;
     926      if (flags != NULL && flags->cf_flags & PyCF_IGNORE_COOKIE) {
     927          tok = _PyTokenizer_FromUTF8(str, exec_input, 0);
     928      } else {
     929          tok = _PyTokenizer_FromString(str, exec_input, 0);
     930      }
     931      if (tok == NULL) {
     932          if (PyErr_Occurred()) {
     933              _PyPegen_raise_tokenizer_init_error(filename_ob);
     934          }
     935          return NULL;
     936      }
     937      // This transfers the ownership to the tokenizer
     938      tok->filename = Py_NewRef(filename_ob);
     939  
     940      // We need to clear up from here on
     941      mod_ty result = NULL;
     942  
     943      int parser_flags = compute_parser_flags(flags);
     944      int feature_version = flags && (flags->cf_flags & PyCF_ONLY_AST) ?
     945          flags->cf_feature_version : PY_MINOR_VERSION;
     946      Parser *p = _PyPegen_Parser_New(tok, start_rule, parser_flags, feature_version,
     947                                      NULL, arena);
     948      if (p == NULL) {
     949          goto error;
     950      }
     951  
     952      result = _PyPegen_run_parser(p);
     953      _PyPegen_Parser_Free(p);
     954  
     955  error:
     956      _PyTokenizer_Free(tok);
     957      return result;
     958  }