(root)/
gcc-13.2.0/
gcc/
rust/
lex/
rust-token.h
       1  // Copyright (C) 2020-2023 Free Software Foundation, Inc.
       2  
       3  // This file is part of GCC.
       4  
       5  // GCC is free software; you can redistribute it and/or modify it under
       6  // the terms of the GNU General Public License as published by the Free
       7  // Software Foundation; either version 3, or (at your option) any later
       8  // version.
       9  
      10  // GCC is distributed in the hope that it will be useful, but WITHOUT ANY
      11  // WARRANTY; without even the implied warranty of MERCHANTABILITY or
      12  // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      13  // for more details.
      14  
      15  // You should have received a copy of the GNU General Public License
      16  // along with GCC; see the file COPYING3.  If not see
      17  // <http://www.gnu.org/licenses/>.
      18  
      19  #ifndef RUST_TOKEN_H
      20  #define RUST_TOKEN_H
      21  
      22  #include "rust-system.h"
      23  #include "rust-linemap.h"
      24  #include "rust-codepoint.h"
      25  
      26  namespace Rust {
      27  // "Primitive core types" in Rust - the different int and float types, as well
      28  // as some others
      29  enum PrimitiveCoreType
      30  {
      31    CORETYPE_UNKNOWN,
      32    // named primitives
      33    CORETYPE_BOOL,
      34    CORETYPE_CHAR,
      35    CORETYPE_STR,
      36    // okay technically int and uint are arch-dependent (pointer size)
      37    CORETYPE_INT,
      38    CORETYPE_UINT,
      39    // numbered number primitives
      40    CORETYPE_F32,
      41    CORETYPE_F64,
      42    CORETYPE_I8,
      43    CORETYPE_I16,
      44    CORETYPE_I32,
      45    CORETYPE_I64,
      46    CORETYPE_I128,
      47    CORETYPE_U8,
      48    CORETYPE_U16,
      49    CORETYPE_U32,
      50    CORETYPE_U64,
      51    CORETYPE_U128,
      52    // Pure decimals are used for tuple index.
      53    // Also means there is no type hint.
      54    CORETYPE_PURE_DECIMAL,
      55    // arch-dependent pointer sizes
      56    CORETYPE_ISIZE = CORETYPE_INT,
      57    CORETYPE_USIZE = CORETYPE_UINT
      58  };
      59  
      60  // RS_TOKEN(name, description)
      61  // RS_TOKEN_KEYWORD(name, identifier)
      62  //
      63  // Keep RS_TOKEN_KEYWORD sorted
      64  
      65  /* note that abstract, async, become, box, do, final, macro, override, priv,
      66   * try, typeof, unsized, virtual, and yield are unused */
      67  #define RS_TOKEN_LIST                                                          \
      68    RS_TOKEN (FIRST_TOKEN, "<first-token-marker>")                               \
      69    RS_TOKEN (END_OF_FILE, "end of file")                                        \
      70    RS_TOKEN (EXCLAM, "!")                                                       \
      71    RS_TOKEN (NOT_EQUAL, "!=")                                                   \
      72    RS_TOKEN (PERCENT, "%")                                                      \
      73    RS_TOKEN (PERCENT_EQ, "%=")                                                  \
      74    RS_TOKEN (AMP, "&")                                                          \
      75    RS_TOKEN (AMP_EQ, "&=")                                                      \
      76    RS_TOKEN (LOGICAL_AND, "&&")                                                 \
      77    RS_TOKEN (ASTERISK, "*")                                                     \
      78    RS_TOKEN (ASTERISK_EQ, "*=")                                                 \
      79    RS_TOKEN (PLUS, "+")                                                         \
      80    RS_TOKEN (PLUS_EQ, "+=")                                                     \
      81    RS_TOKEN (COMMA, ",")                                                        \
      82    RS_TOKEN (MINUS, "-")                                                        \
      83    RS_TOKEN (MINUS_EQ, "-=")                                                    \
      84    RS_TOKEN (RETURN_TYPE, "->")                                                 \
      85    RS_TOKEN (DOT, ".")                                                          \
      86    RS_TOKEN (DOT_DOT, "..")                                                     \
      87    RS_TOKEN (DOT_DOT_EQ, "..=")                                                 \
      88    RS_TOKEN (ELLIPSIS, "...")                                                   \
      89    RS_TOKEN (DIV, "/")                                                          \
      90    RS_TOKEN (DIV_EQ, "/=")                                                      \
      91    RS_TOKEN (COLON, ":")                                                        \
      92    RS_TOKEN (SEMICOLON, ";")                                                    \
      93    RS_TOKEN (LEFT_SHIFT, "<<")                                                  \
      94    RS_TOKEN (LEFT_SHIFT_EQ, "<<=")                                              \
      95    RS_TOKEN (LEFT_ANGLE, "<")                                                   \
      96    RS_TOKEN (LESS_OR_EQUAL, "<=")                                               \
      97    RS_TOKEN (EQUAL, "=")                                                        \
      98    RS_TOKEN (EQUAL_EQUAL, "==")                                                 \
      99    RS_TOKEN (MATCH_ARROW, "=>")                                                 \
     100    RS_TOKEN (RIGHT_ANGLE, ">")                                                  \
     101    RS_TOKEN (GREATER_OR_EQUAL, ">=")                                            \
     102    RS_TOKEN (RIGHT_SHIFT, ">>")                                                 \
     103    RS_TOKEN (RIGHT_SHIFT_EQ, ">>=")                                             \
     104    RS_TOKEN (PATTERN_BIND, "@")                                                 \
     105    RS_TOKEN (TILDE, "~")                                                        \
     106    RS_TOKEN (BACKSLASH, "\\")                                                   \
     107    RS_TOKEN (BACKTICK, "`")                                                     \
     108    RS_TOKEN (CARET, "^")                                                        \
     109    RS_TOKEN (CARET_EQ, "^=")                                                    \
     110    RS_TOKEN (PIPE, "|")                                                         \
     111    RS_TOKEN (PIPE_EQ, "|=")                                                     \
     112    RS_TOKEN (OR, "||")                                                          \
     113    RS_TOKEN (QUESTION_MARK, "?")                                                \
     114    RS_TOKEN (HASH, "#")                                                         \
     115    /* from here on, dodgy and may not be correct. not operators and may be      \
     116     * symbols */                                                                \
     117    /* RS_TOKEN(SPACE, " ") probably too dodgy */                                \
     118    /* RS_TOKEN(NEWLINE, "\n")*/                                                 \
     119    RS_TOKEN (SCOPE_RESOLUTION, "::") /* dodgy */                                \
     120    RS_TOKEN (SINGLE_QUOTE, "'") /* should i differentiate from lifetime? */     \
     121    RS_TOKEN (DOUBLE_QUOTE, "\"")                                                \
     122    RS_TOKEN (UNDERSCORE,                                                        \
     123  	    "_") /* TODO: treat as reserved word like mrustc instead? */       \
     124    RS_TOKEN (IDENTIFIER, "identifier")                                          \
     125    RS_TOKEN (INT_LITERAL,                                                       \
     126  	    "integer literal") /* do different int and float types need        \
     127  				  different literal types? */                  \
     128    RS_TOKEN (FLOAT_LITERAL, "float literal")                                    \
     129    RS_TOKEN (STRING_LITERAL, "string literal")                                  \
     130    RS_TOKEN (CHAR_LITERAL, "character literal")                                 \
     131    RS_TOKEN (BYTE_STRING_LITERAL, "byte string literal")                        \
     132    RS_TOKEN (BYTE_CHAR_LITERAL, "byte character literal")                       \
     133    RS_TOKEN (LIFETIME, "lifetime") /* TODO: improve token type */               \
     134    /* Have "interpolated" tokens (whatever that means)? identifer, path, type,  \
     135     * pattern, */                                                               \
     136    /* expression, statement, block, meta, item in mrustc (but not directly in   \
     137     * lexer). */                                                                \
     138    RS_TOKEN (LEFT_PAREN, "(")                                                   \
     139    RS_TOKEN (RIGHT_PAREN, ")")                                                  \
     140    RS_TOKEN (LEFT_CURLY, "{")                                                   \
     141    RS_TOKEN (RIGHT_CURLY, "}")                                                  \
     142    RS_TOKEN (LEFT_SQUARE, "[")                                                  \
     143    RS_TOKEN (RIGHT_SQUARE, "]")                                                 \
     144    /* Macros */                                                                 \
     145    RS_TOKEN (DOLLAR_SIGN, "$")                                                  \
     146    /* Doc Comments */                                                           \
     147    RS_TOKEN (INNER_DOC_COMMENT, "#![doc]")                                      \
     148    RS_TOKEN (OUTER_DOC_COMMENT, "#[doc]")                                       \
     149    /* have "weak" union and 'static keywords? */                                \
     150    RS_TOKEN_KEYWORD (ABSTRACT, "abstract") /* unused */                         \
     151    RS_TOKEN_KEYWORD (AS, "as")                                                  \
     152    RS_TOKEN_KEYWORD (ASYNC, "async")   /* unused */                             \
     153    RS_TOKEN_KEYWORD (BECOME, "become") /* unused */                             \
     154    RS_TOKEN_KEYWORD (BOX, "box")	      /* unused */                             \
     155    RS_TOKEN_KEYWORD (BREAK, "break")                                            \
     156    RS_TOKEN_KEYWORD (CONST, "const")                                            \
     157    RS_TOKEN_KEYWORD (CONTINUE, "continue")                                      \
     158    RS_TOKEN_KEYWORD (CRATE, "crate")                                            \
     159    /* FIXME: Do we need to add $crate (DOLLAR_CRATE) as a reserved kw? */       \
     160    RS_TOKEN_KEYWORD (DO, "do") /* unused */                                     \
     161    RS_TOKEN_KEYWORD (DYN, "dyn")                                                \
     162    RS_TOKEN_KEYWORD (ELSE, "else")                                              \
     163    RS_TOKEN_KEYWORD (ENUM_TOK, "enum")                                          \
     164    RS_TOKEN_KEYWORD (EXTERN_TOK, "extern")                                      \
     165    RS_TOKEN_KEYWORD (FALSE_LITERAL, "false")                                    \
     166    RS_TOKEN_KEYWORD (FINAL_TOK, "final") /* unused */                           \
     167    RS_TOKEN_KEYWORD (FN_TOK, "fn")                                              \
     168    RS_TOKEN_KEYWORD (FOR, "for")                                                \
     169    RS_TOKEN_KEYWORD (IF, "if")                                                  \
     170    RS_TOKEN_KEYWORD (IMPL, "impl")                                              \
     171    RS_TOKEN_KEYWORD (IN, "in")                                                  \
     172    RS_TOKEN_KEYWORD (LET, "let")                                                \
     173    RS_TOKEN_KEYWORD (LOOP, "loop")                                              \
     174    RS_TOKEN_KEYWORD (MACRO, "macro")                                            \
     175    RS_TOKEN_KEYWORD (MATCH_TOK, "match")                                        \
     176    RS_TOKEN_KEYWORD (MOD, "mod")                                                \
     177    RS_TOKEN_KEYWORD (MOVE, "move")                                              \
     178    RS_TOKEN_KEYWORD (MUT, "mut")                                                \
     179    RS_TOKEN_KEYWORD (OVERRIDE_TOK, "override") /* unused */                     \
     180    RS_TOKEN_KEYWORD (PRIV, "priv")	      /* unused */                     \
     181    RS_TOKEN_KEYWORD (PUB, "pub")                                                \
     182    RS_TOKEN_KEYWORD (REF, "ref")                                                \
     183    RS_TOKEN_KEYWORD (RETURN_TOK, "return")                                      \
     184    RS_TOKEN_KEYWORD (SELF_ALIAS,                                                \
     185  		    "Self") /* mrustc does not treat this as a reserved word*/ \
     186    RS_TOKEN_KEYWORD (SELF, "self")                                              \
     187    RS_TOKEN_KEYWORD (STATIC_TOK, "static")                                      \
     188    RS_TOKEN_KEYWORD (STRUCT_TOK, "struct")                                      \
     189    RS_TOKEN_KEYWORD (SUPER, "super")                                            \
     190    RS_TOKEN_KEYWORD (TRAIT, "trait")                                            \
     191    RS_TOKEN_KEYWORD (TRUE_LITERAL, "true")                                      \
     192    RS_TOKEN_KEYWORD (TRY, "try") /* unused */                                   \
     193    RS_TOKEN_KEYWORD (TYPE, "type")                                              \
     194    RS_TOKEN_KEYWORD (TYPEOF, "typeof") /* unused */                             \
     195    RS_TOKEN_KEYWORD (UNSAFE, "unsafe")                                          \
     196    RS_TOKEN_KEYWORD (UNSIZED, "unsized") /* unused */                           \
     197    RS_TOKEN_KEYWORD (USE, "use")                                                \
     198    RS_TOKEN_KEYWORD (VIRTUAL, "virtual") /* unused */                           \
     199    RS_TOKEN_KEYWORD (WHERE, "where")                                            \
     200    RS_TOKEN_KEYWORD (WHILE, "while")                                            \
     201    RS_TOKEN_KEYWORD (YIELD, "yield") /* unused */                               \
     202    RS_TOKEN (LAST_TOKEN, "<last-token-marker>")
     203  
     204  // Contains all token types. Crappy implementation via x-macros.
     205  enum TokenId
     206  {
     207  #define RS_TOKEN(name, _) name,
     208  #define RS_TOKEN_KEYWORD(x, y) RS_TOKEN (x, y)
     209    RS_TOKEN_LIST
     210  #undef RS_TOKEN_KEYWORD
     211  #undef RS_TOKEN
     212  };
     213  
     214  // dodgy "TokenPtr" declaration with Token forward declaration
     215  class Token;
     216  // A smart pointer (shared_ptr) to Token.
     217  typedef std::shared_ptr<Token> TokenPtr;
     218  // A smart pointer (shared_ptr) to a constant Token.
     219  typedef std::shared_ptr<const Token> const_TokenPtr;
     220  
     221  // Hackily defined way to get token description for enum value using x-macros
     222  const char *
     223  get_token_description (TokenId id);
     224  /* Hackily defined way to get token description as a string for enum value using
     225   * x-macros */
     226  const char *
     227  token_id_to_str (TokenId id);
     228  // Get type hint description as a string.
     229  const char *
     230  get_type_hint_string (PrimitiveCoreType type);
     231  
     232  // Represents a single token. Create using factory static methods.
     233  class Token
     234  {
     235  private:
     236    // Token kind.
     237    TokenId token_id;
     238    // Token location.
     239    Location locus;
     240    // Associated text (if any) of token.
     241    std::unique_ptr<std::string> str;
     242    // TODO: maybe remove issues and just store std::string as value?
     243    /* Type hint for token based on lexer data (e.g. type suffix). Does not exist
     244     * for most tokens. */
     245    PrimitiveCoreType type_hint;
     246  
     247    // Token constructor from token id and location. Has a null string.
     248    Token (TokenId token_id, Location location)
     249      : token_id (token_id), locus (location), str (nullptr),
     250        type_hint (CORETYPE_UNKNOWN)
     251    {}
     252  
     253    // Token constructor from token id, location, and a string.
     254    Token (TokenId token_id, Location location, std::string &&paramStr)
     255      : token_id (token_id), locus (location),
     256        str (new std::string (std::move (paramStr))), type_hint (CORETYPE_UNKNOWN)
     257    {}
     258  
     259    // Token constructor from token id, location, and a char.
     260    Token (TokenId token_id, Location location, char paramChar)
     261      : token_id (token_id), locus (location),
     262        str (new std::string (1, paramChar)), type_hint (CORETYPE_UNKNOWN)
     263    {}
     264  
     265    // Token constructor from token id, location, and a "codepoint".
     266    Token (TokenId token_id, Location location, Codepoint paramCodepoint)
     267      : token_id (token_id), locus (location),
     268        str (new std::string (paramCodepoint.as_string ())),
     269        type_hint (CORETYPE_UNKNOWN)
     270    {}
     271  
     272    // Token constructor from token id, location, a string, and type hint.
     273    Token (TokenId token_id, Location location, std::string &&paramStr,
     274  	 PrimitiveCoreType parType)
     275      : token_id (token_id), locus (location),
     276        str (new std::string (std::move (paramStr))), type_hint (parType)
     277    {}
     278  
     279  public:
     280    // No default constructor.
     281    Token () = delete;
     282    // Do not copy/assign tokens.
     283    Token (const Token &) = delete;
     284    Token &operator= (const Token &) = delete;
     285  
     286    // Allow moving tokens.
     287    Token (Token &&other) = default;
     288    Token &operator= (Token &&other) = default;
     289  
     290    ~Token () = default;
     291  
     292    /* TODO: make_shared (which saves a heap allocation) does not work with the
     293     * private constructor */
     294  
     295    // Makes and returns a new TokenPtr (with null string).
     296    static TokenPtr make (TokenId token_id, Location locus)
     297    {
     298      // return std::make_shared<Token> (token_id, locus);
     299      return TokenPtr (new Token (token_id, locus));
     300    }
     301  
     302    // Makes and returns a new TokenPtr of type IDENTIFIER.
     303    static TokenPtr make_identifier (Location locus, std::string &&str)
     304    {
     305      // return std::make_shared<Token> (IDENTIFIER, locus, str);
     306      return TokenPtr (new Token (IDENTIFIER, locus, std::move (str)));
     307    }
     308  
     309    // Makes and returns a new TokenPtr of type INT_LITERAL.
     310    static TokenPtr make_int (Location locus, std::string &&str,
     311  			    PrimitiveCoreType type_hint = CORETYPE_UNKNOWN)
     312    {
     313      // return std::make_shared<Token> (INT_LITERAL, locus, str, type_hint);
     314      return TokenPtr (
     315        new Token (INT_LITERAL, locus, std::move (str), type_hint));
     316    }
     317  
     318    // Makes and returns a new TokenPtr of type FLOAT_LITERAL.
     319    static TokenPtr make_float (Location locus, std::string &&str,
     320  			      PrimitiveCoreType type_hint = CORETYPE_UNKNOWN)
     321    {
     322      // return std::make_shared<Token> (FLOAT_LITERAL, locus, str, type_hint);
     323      return TokenPtr (
     324        new Token (FLOAT_LITERAL, locus, std::move (str), type_hint));
     325    }
     326  
     327    // Makes and returns a new TokenPtr of type STRING_LITERAL.
     328    static TokenPtr make_string (Location locus, std::string &&str)
     329    {
     330      // return std::make_shared<Token> (STRING_LITERAL, locus, str,
     331      // CORETYPE_STR);
     332      return TokenPtr (
     333        new Token (STRING_LITERAL, locus, std::move (str), CORETYPE_STR));
     334    }
     335  
     336    // Makes and returns a new TokenPtr of type CHAR_LITERAL.
     337    static TokenPtr make_char (Location locus, Codepoint char_lit)
     338    {
     339      // return std::make_shared<Token> (CHAR_LITERAL, locus, char_lit);
     340      return TokenPtr (new Token (CHAR_LITERAL, locus, char_lit));
     341    }
     342  
     343    // Makes and returns a new TokenPtr of type BYTE_CHAR_LITERAL.
     344    static TokenPtr make_byte_char (Location locus, char byte_char)
     345    {
     346      // return std::make_shared<Token> (BYTE_CHAR_LITERAL, locus, byte_char);
     347      return TokenPtr (new Token (BYTE_CHAR_LITERAL, locus, byte_char));
     348    }
     349  
     350    // Makes and returns a new TokenPtr of type BYTE_STRING_LITERAL (fix).
     351    static TokenPtr make_byte_string (Location locus, std::string &&str)
     352    {
     353      // return std::make_shared<Token> (BYTE_STRING_LITERAL, locus, str);
     354      return TokenPtr (new Token (BYTE_STRING_LITERAL, locus, std::move (str)));
     355    }
     356  
     357    // Makes and returns a new TokenPtr of type INNER_DOC_COMMENT.
     358    static TokenPtr make_inner_doc_comment (Location locus, std::string &&str)
     359    {
     360      return TokenPtr (new Token (INNER_DOC_COMMENT, locus, std::move (str)));
     361    }
     362  
     363    // Makes and returns a new TokenPtr of type OUTER_DOC_COMMENT.
     364    static TokenPtr make_outer_doc_comment (Location locus, std::string &&str)
     365    {
     366      return TokenPtr (new Token (OUTER_DOC_COMMENT, locus, std::move (str)));
     367    }
     368  
     369    // Makes and returns a new TokenPtr of type LIFETIME.
     370    static TokenPtr make_lifetime (Location locus, std::string &&str)
     371    {
     372      // return std::make_shared<Token> (LIFETIME, locus, str);
     373      return TokenPtr (new Token (LIFETIME, locus, std::move (str)));
     374    }
     375  
     376    // Gets id of the token.
     377    TokenId get_id () const { return token_id; }
     378  
     379    // Gets location of the token.
     380    Location get_locus () const { return locus; }
     381  
     382    // Gets string description of the token.
     383    const std::string &
     384    get_str () const; /*{
     385  // FIXME: put in header again when fix null problem
     386  //gcc_assert(str != nullptr);
     387  if (str == nullptr) {
     388  error_at(get_locus(), "attempted to get string for '%s', which has no string.
     389  returning empty string instead.", get_token_description()); return "";
     390  }
     391  return *str;
     392  }*/
     393  
     394    // Gets token's type hint info.
     395    PrimitiveCoreType get_type_hint () const
     396    {
     397      return type_hint == CORETYPE_PURE_DECIMAL ? CORETYPE_UNKNOWN : type_hint;
     398    }
     399  
     400    // diagnostics (error reporting)
     401    const char *get_token_description () const
     402    {
     403      return Rust::get_token_description (token_id);
     404    }
     405  
     406    // debugging
     407    const char *token_id_to_str () const
     408    {
     409      return Rust::token_id_to_str (token_id);
     410    }
     411  
     412    // debugging
     413    const char *get_type_hint_str () const;
     414  
     415    /* Returns whether the token is a literal of any type (int, float, char,
     416     * string, byte char, byte string). */
     417    bool is_literal () const
     418    {
     419      switch (token_id)
     420        {
     421        case INT_LITERAL:
     422        case FLOAT_LITERAL:
     423        case CHAR_LITERAL:
     424        case STRING_LITERAL:
     425        case BYTE_CHAR_LITERAL:
     426        case BYTE_STRING_LITERAL:
     427  	return true;
     428        default:
     429  	return false;
     430        }
     431    }
     432  
     433    /* Returns whether the token actually has a string (regardless of whether it
     434     * should or not). */
     435    bool has_str () const { return str != nullptr; }
     436  
     437    // Returns whether the token should have a string.
     438    bool should_have_str () const
     439    {
     440      return is_literal () || token_id == IDENTIFIER || token_id == LIFETIME;
     441    }
     442  
     443    // Returns whether the token is a pure decimal int literal
     444    bool is_pure_decimal () const { return type_hint == CORETYPE_PURE_DECIMAL; }
     445  };
     446  } // namespace Rust
     447  
     448  #endif