(root)/
gcc-13.2.0/
gcc/
go/
gofrontend/
lex.h
       1  // lex.h -- Go frontend lexer.     -*- C++ -*-
       2  
       3  // Copyright 2009 The Go Authors. All rights reserved.
       4  // Use of this source code is governed by a BSD-style
       5  // license that can be found in the LICENSE file.
       6  
       7  #ifndef GO_LEX_H
       8  #define GO_LEX_H
       9  
      10  #include <mpfr.h>
      11  
      12  #include "operator.h"
      13  #include "go-linemap.h"
      14  
      15  struct Unicode_range;
      16  
      17  // The keywords.  These must be in sorted order, other than
      18  // KEYWORD_INVALID.  They must match the Keywords::mapping_ array in
      19  // lex.cc.
      20  
      21  enum Keyword
      22  {
      23    KEYWORD_INVALID,	// Not a keyword.
      24    KEYWORD_ASM,
      25    KEYWORD_BREAK,
      26    KEYWORD_CASE,
      27    KEYWORD_CHAN,
      28    KEYWORD_CONST,
      29    KEYWORD_CONTINUE,
      30    KEYWORD_DEFAULT,
      31    KEYWORD_DEFER,
      32    KEYWORD_ELSE,
      33    KEYWORD_FALLTHROUGH,
      34    KEYWORD_FOR,
      35    KEYWORD_FUNC,
      36    KEYWORD_GO,
      37    KEYWORD_GOTO,
      38    KEYWORD_IF,
      39    KEYWORD_IMPORT,
      40    KEYWORD_INTERFACE,
      41    KEYWORD_MAP,
      42    KEYWORD_PACKAGE,
      43    KEYWORD_RANGE,
      44    KEYWORD_RETURN,
      45    KEYWORD_SELECT,
      46    KEYWORD_STRUCT,
      47    KEYWORD_SWITCH,
      48    KEYWORD_TYPE,
      49    KEYWORD_VAR
      50  };
      51  
      52  // Pragmas built from magic comments and recorded for functions.
      53  // These are used as bits in a bitmask.
      54  // The set of values is intended to be the same as the gc compiler.
      55  
      56  enum GoPragma
      57  {
      58    GOPRAGMA_NOINTERFACE = 1 << 0,	// Method not in type descriptor.
      59    GOPRAGMA_NOESCAPE = 1 << 1,		// Args do not escape.
      60    GOPRAGMA_NORACE = 1 << 2,		// No race detector.
      61    GOPRAGMA_NOSPLIT = 1 << 3,		// Do not split stack.
      62    GOPRAGMA_NOINLINE = 1 << 4,		// Do not inline.
      63    GOPRAGMA_SYSTEMSTACK = 1 << 5,	// Must run on system stack.
      64    GOPRAGMA_NOWRITEBARRIER = 1 << 6,	// No write barriers.
      65    GOPRAGMA_NOWRITEBARRIERREC = 1 << 7,	// No write barriers here or callees.
      66    GOPRAGMA_YESWRITEBARRIERREC = 1 << 8,	// Stops nowritebarrierrec.
      67    GOPRAGMA_MARK = 1 << 9,		// Marker for nowritebarrierrec.
      68    GOPRAGMA_CGOUNSAFEARGS = 1 << 10,	// Pointer to arg is pointer to all.
      69    GOPRAGMA_UINTPTRESCAPES = 1 << 11,	// uintptr(p) escapes.
      70    GOPRAGMA_NOTINHEAP = 1 << 12		// type is not in heap.
      71  };
      72  
      73  // A token returned from the lexer.
      74  
      75  class Token
      76  {
      77   public:
      78    // Token classification.
      79    enum Classification
      80    {
      81      // Token is invalid.
      82      TOKEN_INVALID,
      83      // Token indicates end of input.
      84      TOKEN_EOF,
      85      // Token is a keyword.
      86      TOKEN_KEYWORD,
      87      // Token is an identifier.
      88      TOKEN_IDENTIFIER,
      89      // Token is a string of characters.
      90      TOKEN_STRING,
      91      // Token is an operator.
      92      TOKEN_OPERATOR,
      93      // Token is a character constant.
      94      TOKEN_CHARACTER,
      95      // Token is an integer.
      96      TOKEN_INTEGER,
      97      // Token is a floating point number.
      98      TOKEN_FLOAT,
      99      // Token is an imaginary number.
     100      TOKEN_IMAGINARY
     101    };
     102  
     103    ~Token();
     104    Token(const Token&);
     105    Token& operator=(const Token&);
     106  
     107    // Get token classification.
     108    Classification
     109    classification() const
     110    { return this->classification_; }
     111  
     112    // Make a token for an invalid value.
     113    static Token
     114    make_invalid_token(Location location)
     115    { return Token(TOKEN_INVALID, location); }
     116  
     117    // Make a token representing end of file.
     118    static Token
     119    make_eof_token(Location location)
     120    { return Token(TOKEN_EOF, location); }
     121  
     122    // Make a keyword token.
     123    static Token
     124    make_keyword_token(Keyword keyword, Location location)
     125    {
     126      Token tok(TOKEN_KEYWORD, location);
     127      tok.u_.keyword = keyword;
     128      return tok;
     129    }
     130  
     131    // Make an identifier token.
     132    static Token
     133    make_identifier_token(const std::string& value, bool is_exported,
     134  			Location location)
     135    {
     136      Token tok(TOKEN_IDENTIFIER, location);
     137      tok.u_.identifier_value.name = new std::string(value);
     138      tok.u_.identifier_value.is_exported = is_exported;
     139      return tok;
     140    }
     141  
     142    // Make a quoted string token.
     143    static Token
     144    make_string_token(const std::string& value, Location location)
     145    {
     146      Token tok(TOKEN_STRING, location);
     147      tok.u_.string_value = new std::string(value);
     148      return tok;
     149    }
     150  
     151    // Make an operator token.
     152    static Token
     153    make_operator_token(Operator op, Location location)
     154    {
     155      Token tok(TOKEN_OPERATOR, location);
     156      tok.u_.op = op;
     157      return tok;
     158    }
     159  
     160    // Make a character constant token.
     161    static Token
     162    make_character_token(mpz_t val, Location location)
     163    {
     164      Token tok(TOKEN_CHARACTER, location);
     165      mpz_init(tok.u_.integer_value);
     166      mpz_swap(tok.u_.integer_value, val);
     167      return tok;
     168    }
     169  
     170    // Make an integer token.
     171    static Token
     172    make_integer_token(mpz_t val, Location location)
     173    {
     174      Token tok(TOKEN_INTEGER, location);
     175      mpz_init(tok.u_.integer_value);
     176      mpz_swap(tok.u_.integer_value, val);
     177      return tok;
     178    }
     179  
     180    // Make a float token.
     181    static Token
     182    make_float_token(mpfr_t val, Location location)
     183    {
     184      Token tok(TOKEN_FLOAT, location);
     185      mpfr_init(tok.u_.float_value);
     186      mpfr_swap(tok.u_.float_value, val);
     187      return tok;
     188    }
     189  
     190    // Make a token for an imaginary number.
     191    static Token
     192    make_imaginary_token(mpfr_t val, Location location)
     193    {
     194      Token tok(TOKEN_IMAGINARY, location);
     195      mpfr_init(tok.u_.float_value);
     196      mpfr_swap(tok.u_.float_value, val);
     197      return tok;
     198    }
     199  
     200    // Get the location of the token.
     201    Location
     202    location() const
     203    { return this->location_; }
     204  
     205    // Return whether this is an invalid token.
     206    bool
     207    is_invalid() const
     208    { return this->classification_ == TOKEN_INVALID; }
     209  
     210    // Return whether this is the EOF token.
     211    bool
     212    is_eof() const
     213    { return this->classification_ == TOKEN_EOF; }
     214  
     215    // Return the keyword value for a keyword token.
     216    Keyword
     217    keyword() const
     218    {
     219      go_assert(this->classification_ == TOKEN_KEYWORD);
     220      return this->u_.keyword;
     221    }
     222  
     223    // Return whether this is an identifier.
     224    bool
     225    is_identifier() const
     226    { return this->classification_ == TOKEN_IDENTIFIER; }
     227  
     228    // Return the identifier.
     229    const std::string&
     230    identifier() const
     231    {
     232      go_assert(this->classification_ == TOKEN_IDENTIFIER);
     233      return *this->u_.identifier_value.name;
     234    }
     235  
     236    // Return whether the identifier is exported.
     237    bool
     238    is_identifier_exported() const
     239    {
     240      go_assert(this->classification_ == TOKEN_IDENTIFIER);
     241      return this->u_.identifier_value.is_exported;
     242    }
     243  
     244    // Return whether this is a string.
     245    bool
     246    is_string() const
     247    {
     248      return this->classification_ == TOKEN_STRING;
     249    }
     250  
     251    // Return the value of a string.  The returned value is a string of
     252    // UTF-8 characters.
     253    std::string
     254    string_value() const
     255    {
     256      go_assert(this->classification_ == TOKEN_STRING);
     257      return *this->u_.string_value;
     258    }
     259  
     260    // Return the value of a character constant.
     261    const mpz_t*
     262    character_value() const
     263    {
     264      go_assert(this->classification_ == TOKEN_CHARACTER);
     265      return &this->u_.integer_value;
     266    }
     267  
     268    // Return the value of an integer.
     269    const mpz_t*
     270    integer_value() const
     271    {
     272      go_assert(this->classification_ == TOKEN_INTEGER);
     273      return &this->u_.integer_value;
     274    }
     275  
     276    // Return the value of a float.
     277    const mpfr_t*
     278    float_value() const
     279    {
     280      go_assert(this->classification_ == TOKEN_FLOAT);
     281      return &this->u_.float_value;
     282    }
     283  
     284    // Return the value of an imaginary number.
     285    const mpfr_t*
     286    imaginary_value() const
     287    {
     288      go_assert(this->classification_ == TOKEN_IMAGINARY);
     289      return &this->u_.float_value;
     290    }
     291  
     292    // Return the operator value for an operator token.
     293    Operator
     294    op() const
     295    {
     296      go_assert(this->classification_ == TOKEN_OPERATOR);
     297      return this->u_.op;
     298    }
     299  
     300    // Return whether this token is KEYWORD.
     301    bool
     302    is_keyword(Keyword keyword) const
     303    {
     304      return (this->classification_ == TOKEN_KEYWORD
     305  	    && this->u_.keyword == keyword);
     306    }
     307  
     308    // Return whether this token is OP.
     309    bool
     310    is_op(Operator op) const
     311    { return this->classification_ == TOKEN_OPERATOR && this->u_.op == op; }
     312  
     313    // Print the token for debugging.
     314    void
     315    print(FILE*) const;
     316  
     317   private:
     318    // Private constructor used by make_..._token functions above.
     319    Token(Classification, Location);
     320  
     321    // Clear the token.
     322    void
     323    clear();
     324  
     325    // The token classification.
     326    Classification classification_;
     327    union
     328    {
     329      // The keyword value for TOKEN_KEYWORD.
     330      Keyword keyword;
     331      // The token value for TOKEN_IDENTIFIER.
     332      struct
     333      {
     334        // The name of the identifier.  This has been mangled to only
     335        // include ASCII characters.
     336        std::string* name;
     337        // Whether this name should be exported.  This is true if the
     338        // first letter in the name is upper case.
     339        bool is_exported;
     340      } identifier_value;
     341      // The string value for TOKEN_STRING.
     342      std::string* string_value;
     343      // The token value for TOKEN_CHARACTER or TOKEN_INTEGER.
     344      mpz_t integer_value;
     345      // The token value for TOKEN_FLOAT or TOKEN_IMAGINARY.
     346      mpfr_t float_value;
     347      // The token value for TOKEN_OPERATOR or the keyword value
     348      Operator op;
     349    } u_;
     350    // The source location.
     351    Location location_;
     352  };
     353  
     354  // The lexer itself.
     355  
     356  class Lex
     357  {
     358   public:
     359    Lex(const char* input_file_name, FILE* input_file, Linemap *linemap);
     360  
     361    ~Lex();
     362  
     363    // Return the next token.
     364    Token
     365    next_token();
     366  
     367    // Return the contents of any current //extern comment.
     368    const std::string&
     369    extern_name() const
     370    { return this->extern_; }
     371  
     372    // Return the current set of pragmas, and clear them.
     373    unsigned int
     374    get_and_clear_pragmas()
     375    {
     376      unsigned int ret = this->pragmas_;
     377      this->pragmas_ = 0;
     378      return ret;
     379    }
     380  
     381    struct Linkname
     382    {
     383      std::string ext_name;	// External name; empty to just export.
     384      bool is_exported;		// Whether the internal name is exported.
     385      Location loc;		// Location of go:linkname directive.
     386  
     387      Linkname()
     388        : ext_name(), is_exported(false), loc()
     389      { }
     390  
     391      Linkname(const std::string& ext_name_a, bool is_exported_a, Location loc_a)
     392        : ext_name(ext_name_a), is_exported(is_exported_a), loc(loc_a)
     393      { }
     394    };
     395  
     396    typedef std::map<std::string, Linkname> Linknames;
     397  
     398    // Return the linknames seen so far, or NULL if none, and clear the
     399    // set.  These are from go:linkname compiler directives.
     400    Linknames*
     401    get_and_clear_linknames()
     402    {
     403      Linknames* ret = this->linknames_;
     404      this->linknames_ = NULL;
     405      return ret;
     406    }
     407  
     408    // Return whether there are any current go:embed patterns.
     409    bool
     410    has_embeds() const
     411    { return !this->embeds_.empty(); }
     412  
     413    // If there are any go:embed patterns seen so far, store them in
     414    // *EMBEDS and clear the saved set.  *EMBEDS must be an empty
     415    // vector.
     416    void
     417    get_and_clear_embeds(std::vector<std::string>* embeds)
     418    {
     419      go_assert(embeds->empty());
     420      std::swap(*embeds, this->embeds_);
     421    }
     422  
     423    // Clear any go:embed patterns seen so far.  This is used for
     424    // erroneous cases.
     425    void
     426    clear_embeds()
     427    { this->embeds_.clear(); }
     428  
     429    // Return whether the identifier NAME should be exported.  NAME is a
     430    // mangled name which includes only ASCII characters.
     431    static bool
     432    is_exported_mangled_name(const std::string& name);
     433  
     434    // Return whether the identifier NAME should be exported.  NAME is
     435    // an unmangled utf-8 string and may contain non-ASCII characters.
     436    static bool
     437    is_exported_name(const std::string& name);
     438  
     439    // Return whether the identifier NAME is invalid.  When we see an
     440    // invalid character we still build an identifier, but we use a
     441    // magic string to indicate that the identifier is invalid.  We then
     442    // use this to avoid knockon errors.
     443    static bool
     444    is_invalid_identifier(const std::string& name);
     445  
     446    // A helper function.  Append V to STR.  IS_CHARACTER is true if V
     447    // is a Unicode character which should be converted into UTF-8,
     448    // false if it is a byte value to be appended directly.  The
     449    // location is used to warn about an out of range character.
     450    static void
     451    append_char(unsigned int v, bool is_charater, std::string* str,
     452  	      Location);
     453  
     454    // A helper function.  Fetch a UTF-8 character from STR and store it
     455    // in *VALUE.  Return the number of bytes read from STR.  Return 0
     456    // if STR does not point to a valid UTF-8 character.
     457    static int
     458    fetch_char(const char* str, unsigned int *value);
     459  
     460    // Return whether C is a Unicode or "C" locale space character.
     461    static bool
     462    is_unicode_space(unsigned int c);
     463  
     464    // Convert the specified hex char into an unsigned integer value.
     465    static unsigned
     466    hex_val(char c);
     467  
     468   private:
     469    ssize_t
     470    get_line();
     471  
     472    bool
     473    require_line();
     474  
     475    // The current location.
     476    Location
     477    location() const;
     478  
     479    // A position CHARS column positions before the current location.
     480    Location
     481    earlier_location(int chars) const;
     482  
     483    static bool
     484    is_hex_digit(char);
     485  
     486    static bool
     487    is_base_digit(int base, char);
     488  
     489    static unsigned char
     490    octal_value(char c)
     491    { return c - '0'; }
     492  
     493    Token
     494    make_invalid_token()
     495    { return Token::make_invalid_token(this->location()); }
     496  
     497    Token
     498    make_eof_token()
     499    { return Token::make_eof_token(this->location()); }
     500  
     501    Token
     502    make_operator(Operator op, int chars)
     503    { return Token::make_operator_token(op, this->earlier_location(chars)); }
     504  
     505    Token
     506    gather_identifier();
     507  
     508    static bool
     509    could_be_exponent(int base, const char*, const char*);
     510  
     511    Token
     512    gather_number();
     513  
     514    void
     515    skip_exponent();
     516  
     517    Token
     518    gather_character();
     519  
     520    Token
     521    gather_string();
     522  
     523    Token
     524    gather_raw_string();
     525  
     526    const char*
     527    advance_one_utf8_char(const char*, unsigned int*, bool*);
     528  
     529    const char*
     530    advance_one_char(const char*, bool, unsigned int*, bool*);
     531  
     532    static bool
     533    is_unicode_digit(unsigned int c);
     534  
     535    static bool
     536    is_unicode_letter(unsigned int c);
     537  
     538    static bool
     539    is_unicode_uppercase(unsigned int c);
     540  
     541    static bool
     542    is_in_unicode_range(unsigned int C, const Unicode_range* ranges,
     543  		      size_t range_size);
     544  
     545    Operator
     546    three_character_operator(char, char, char);
     547  
     548    Operator
     549    two_character_operator(char, char);
     550  
     551    Operator
     552    one_character_operator(char);
     553  
     554    bool
     555    skip_c_comment(bool* found_newline);
     556  
     557    void
     558    skip_cpp_comment();
     559  
     560    void
     561    gather_embed(const char*, const char*);
     562  
     563    // The input file name.
     564    const char* input_file_name_ ATTRIBUTE_UNUSED;
     565    // The input file.
     566    FILE* input_file_;
     567    // The object used to keep track of file names and line numbers.
     568    Linemap* linemap_;
     569    // The line buffer.  This holds the current line.
     570    char* linebuf_;
     571    // The size of the line buffer.
     572    size_t linebufsize_;
     573    // The nmber of characters in the current line.
     574    size_t linesize_;
     575    // The current offset in linebuf_.
     576    size_t lineoff_;
     577    // The current line number.
     578    size_t lineno_;
     579    // Whether to add a semicolon if we see a newline now.
     580    bool add_semi_at_eol_;
     581    // Pragmas for the next function, from magic comments.
     582    unsigned int pragmas_;
     583    // The external name to use for a function declaration, from a magic
     584    // //extern comment.
     585    std::string extern_;
     586    // The list of //go:linkname comments, if any.
     587    Linknames* linknames_;
     588    // The list of //go:embed patterns, if any.
     589    std::vector<std::string> embeds_;
     590  };
     591  
     592  #endif // !defined(GO_LEX_H)