(root)/
Python-3.11.7/
Parser/
tokenizer.h
       1  #ifndef Py_TOKENIZER_H
       2  #define Py_TOKENIZER_H
       3  #ifdef __cplusplus
       4  extern "C" {
       5  #endif
       6  
       7  #include "object.h"
       8  
       9  /* Tokenizer interface */
      10  
      11  #include "token.h"      /* For token types */
      12  
      13  #define MAXINDENT 100   /* Max indentation level */
      14  #define MAXLEVEL 200    /* Max parentheses level */
      15  
      16  enum decoding_state {
      17      STATE_INIT,
      18      STATE_SEEK_CODING,
      19      STATE_NORMAL
      20  };
      21  
      22  enum interactive_underflow_t {
      23      /* Normal mode of operation: return a new token when asked in interactive mode */
      24      IUNDERFLOW_NORMAL,
      25      /* Forcefully return ENDMARKER when asked for a new token in interactive mode. This
      26       * can be used to prevent the tokenizer to prompt the user for new tokens */
      27      IUNDERFLOW_STOP,
      28  };
      29  
      30  /* Tokenizer state */
      31  struct tok_state {
      32      /* Input state; buf <= cur <= inp <= end */
      33      /* NB an entire line is held in the buffer */
      34      char *buf;          /* Input buffer, or NULL; malloc'ed if fp != NULL */
      35      char *cur;          /* Next character in buffer */
      36      char *inp;          /* End of data in buffer */
      37      int fp_interactive; /* If the file descriptor is interactive */
      38      char *interactive_src_start; /* The start of the source parsed so far in interactive mode */
      39      char *interactive_src_end; /* The end of the source parsed so far in interactive mode */
      40      const char *end;    /* End of input buffer if buf != NULL */
      41      const char *start;  /* Start of current token if not NULL */
      42      int done;           /* E_OK normally, E_EOF at EOF, otherwise error code */
      43      /* NB If done != E_OK, cur must be == inp!!! */
      44      FILE *fp;           /* Rest of input; NULL if tokenizing a string */
      45      int tabsize;        /* Tab spacing */
      46      int indent;         /* Current indentation index */
      47      int indstack[MAXINDENT];            /* Stack of indents */
      48      int atbol;          /* Nonzero if at begin of new line */
      49      int pendin;         /* Pending indents (if > 0) or dedents (if < 0) */
      50      const char *prompt, *nextprompt;          /* For interactive prompting */
      51      int lineno;         /* Current line number */
      52      int first_lineno;   /* First line of a single line or multi line string
      53                             expression (cf. issue 16806) */
      54      int level;          /* () [] {} Parentheses nesting level */
      55              /* Used to allow free continuations inside them */
      56      char parenstack[MAXLEVEL];
      57      int parenlinenostack[MAXLEVEL];
      58      int parencolstack[MAXLEVEL];
      59      PyObject *filename;
      60      /* Stuff for checking on different tab sizes */
      61      int altindstack[MAXINDENT];         /* Stack of alternate indents */
      62      /* Stuff for PEP 0263 */
      63      enum decoding_state decoding_state;
      64      int decoding_erred;         /* whether erred in decoding  */
      65      char *encoding;         /* Source encoding. */
      66      int cont_line;          /* whether we are in a continuation line. */
      67      const char* line_start;     /* pointer to start of current line */
      68      const char* multi_line_start; /* pointer to start of first line of
      69                                       a single line or multi line string
      70                                       expression (cf. issue 16806) */
      71      PyObject *decoding_readline; /* open(...).readline */
      72      PyObject *decoding_buffer;
      73      const char* enc;        /* Encoding for the current str. */
      74      char* str;          /* Source string being tokenized (if tokenizing from a string)*/
      75      char* input;       /* Tokenizer's newline translated copy of the string. */
      76  
      77      int type_comments;      /* Whether to look for type comments */
      78  
      79      /* async/await related fields (still needed depending on feature_version) */
      80      int async_hacks;     /* =1 if async/await aren't always keywords */
      81      int async_def;        /* =1 if tokens are inside an 'async def' body. */
      82      int async_def_indent; /* Indentation level of the outermost 'async def'. */
      83      int async_def_nl;     /* =1 if the outermost 'async def' had at least one
      84                               NEWLINE token after it. */
      85      /* How to proceed when asked for a new token in interactive mode */
      86      enum interactive_underflow_t interactive_underflow;
      87      int report_warnings;
      88  };
      89  
      90  extern struct tok_state *_PyTokenizer_FromString(const char *, int);
      91  extern struct tok_state *_PyTokenizer_FromUTF8(const char *, int);
      92  extern struct tok_state *_PyTokenizer_FromFile(FILE *, const char*,
      93                                                const char *, const char *);
      94  extern void _PyTokenizer_Free(struct tok_state *);
      95  extern int _PyTokenizer_Get(struct tok_state *, const char **, const char **);
      96  
      97  #define tok_dump _Py_tok_dump
      98  
      99  #ifdef __cplusplus
     100  }
     101  #endif
     102  #endif /* !Py_TOKENIZER_H */