Viewing file: lex.h (14.25 KB) -rw-r--r-- Select action/file-type: (+) | (+) | (+) | Code (+) | Session (+) | (+) | SDB (+) | (+) | (+) | (+) | (+) | (+) |
// lex.h -- Go frontend lexer. -*- C++ -*-
// Copyright 2009 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file.
#ifndef GO_LEX_H #define GO_LEX_H
#include <mpfr.h>
#include "operator.h" #include "go-linemap.h"
struct Unicode_range;
// The keywords. These must be in sorted order, other than // KEYWORD_INVALID. They must match the Keywords::mapping_ array in // lex.cc.
enum Keyword { KEYWORD_INVALID, // Not a keyword. KEYWORD_ASM, KEYWORD_BREAK, KEYWORD_CASE, KEYWORD_CHAN, KEYWORD_CONST, KEYWORD_CONTINUE, KEYWORD_DEFAULT, KEYWORD_DEFER, KEYWORD_ELSE, KEYWORD_FALLTHROUGH, KEYWORD_FOR, KEYWORD_FUNC, KEYWORD_GO, KEYWORD_GOTO, KEYWORD_IF, KEYWORD_IMPORT, KEYWORD_INTERFACE, KEYWORD_MAP, KEYWORD_PACKAGE, KEYWORD_RANGE, KEYWORD_RETURN, KEYWORD_SELECT, KEYWORD_STRUCT, KEYWORD_SWITCH, KEYWORD_TYPE, KEYWORD_VAR };
// Pragmas built from magic comments and recorded for functions. // These are used as bits in a bitmask. // The set of values is intended to be the same as the gc compiler.
enum GoPragma { GOPRAGMA_NOINTERFACE = 1 << 0, // Method not in type descriptor. GOPRAGMA_NOESCAPE = 1 << 1, // Args do not escape. GOPRAGMA_NORACE = 1 << 2, // No race detector. GOPRAGMA_NOSPLIT = 1 << 3, // Do not split stack. GOPRAGMA_NOINLINE = 1 << 4, // Do not inline. GOPRAGMA_SYSTEMSTACK = 1 << 5, // Must run on system stack. GOPRAGMA_NOWRITEBARRIER = 1 << 6, // No write barriers. GOPRAGMA_NOWRITEBARRIERREC = 1 << 7, // No write barriers here or callees. GOPRAGMA_YESWRITEBARRIERREC = 1 << 8, // Stops nowritebarrierrec. GOPRAGMA_MARK = 1 << 9, // Marker for nowritebarrierrec. GOPRAGMA_CGOUNSAFEARGS = 1 << 10, // Pointer to arg is pointer to all. GOPRAGMA_UINTPTRESCAPES = 1 << 11, // uintptr(p) escapes. GOPRAGMA_NOTINHEAP = 1 << 12 // type is not in heap. };
// A token returned from the lexer.
class Token { public: // Token classification. enum Classification { // Token is invalid. TOKEN_INVALID, // Token indicates end of input. TOKEN_EOF, // Token is a keyword. TOKEN_KEYWORD, // Token is an identifier. TOKEN_IDENTIFIER, // Token is a string of characters. TOKEN_STRING, // Token is an operator. TOKEN_OPERATOR, // Token is a character constant. TOKEN_CHARACTER, // Token is an integer. TOKEN_INTEGER, // Token is a floating point number. TOKEN_FLOAT, // Token is an imaginary number. TOKEN_IMAGINARY };
~Token(); Token(const Token&); Token& operator=(const Token&);
// Get token classification. Classification classification() const { return this->classification_; }
// Make a token for an invalid value. static Token make_invalid_token(Location location) { return Token(TOKEN_INVALID, location); }
// Make a token representing end of file. static Token make_eof_token(Location location) { return Token(TOKEN_EOF, location); }
// Make a keyword token. static Token make_keyword_token(Keyword keyword, Location location) { Token tok(TOKEN_KEYWORD, location); tok.u_.keyword = keyword; return tok; }
// Make an identifier token. static Token make_identifier_token(const std::string& value, bool is_exported, Location location) { Token tok(TOKEN_IDENTIFIER, location); tok.u_.identifier_value.name = new std::string(value); tok.u_.identifier_value.is_exported = is_exported; return tok; }
// Make a quoted string token. static Token make_string_token(const std::string& value, Location location) { Token tok(TOKEN_STRING, location); tok.u_.string_value = new std::string(value); return tok; }
// Make an operator token. static Token make_operator_token(Operator op, Location location) { Token tok(TOKEN_OPERATOR, location); tok.u_.op = op; return tok; }
// Make a character constant token. static Token make_character_token(mpz_t val, Location location) { Token tok(TOKEN_CHARACTER, location); mpz_init(tok.u_.integer_value); mpz_swap(tok.u_.integer_value, val); return tok; }
// Make an integer token. static Token make_integer_token(mpz_t val, Location location) { Token tok(TOKEN_INTEGER, location); mpz_init(tok.u_.integer_value); mpz_swap(tok.u_.integer_value, val); return tok; }
// Make a float token. static Token make_float_token(mpfr_t val, Location location) { Token tok(TOKEN_FLOAT, location); mpfr_init(tok.u_.float_value); mpfr_swap(tok.u_.float_value, val); return tok; }
// Make a token for an imaginary number. static Token make_imaginary_token(mpfr_t val, Location location) { Token tok(TOKEN_IMAGINARY, location); mpfr_init(tok.u_.float_value); mpfr_swap(tok.u_.float_value, val); return tok; }
// Get the location of the token. Location location() const { return this->location_; }
// Return whether this is an invalid token. bool is_invalid() const { return this->classification_ == TOKEN_INVALID; }
// Return whether this is the EOF token. bool is_eof() const { return this->classification_ == TOKEN_EOF; }
// Return the keyword value for a keyword token. Keyword keyword() const { go_assert(this->classification_ == TOKEN_KEYWORD); return this->u_.keyword; }
// Return whether this is an identifier. bool is_identifier() const { return this->classification_ == TOKEN_IDENTIFIER; }
// Return the identifier. const std::string& identifier() const { go_assert(this->classification_ == TOKEN_IDENTIFIER); return *this->u_.identifier_value.name; }
// Return whether the identifier is exported. bool is_identifier_exported() const { go_assert(this->classification_ == TOKEN_IDENTIFIER); return this->u_.identifier_value.is_exported; }
// Return whether this is a string. bool is_string() const { return this->classification_ == TOKEN_STRING; }
// Return the value of a string. The returned value is a string of // UTF-8 characters. std::string string_value() const { go_assert(this->classification_ == TOKEN_STRING); return *this->u_.string_value; }
// Return the value of a character constant. const mpz_t* character_value() const { go_assert(this->classification_ == TOKEN_CHARACTER); return &this->u_.integer_value; }
// Return the value of an integer. const mpz_t* integer_value() const { go_assert(this->classification_ == TOKEN_INTEGER); return &this->u_.integer_value; }
// Return the value of a float. const mpfr_t* float_value() const { go_assert(this->classification_ == TOKEN_FLOAT); return &this->u_.float_value; }
// Return the value of an imaginary number. const mpfr_t* imaginary_value() const { go_assert(this->classification_ == TOKEN_IMAGINARY); return &this->u_.float_value; }
// Return the operator value for an operator token. Operator op() const { go_assert(this->classification_ == TOKEN_OPERATOR); return this->u_.op; }
// Return whether this token is KEYWORD. bool is_keyword(Keyword keyword) const { return (this->classification_ == TOKEN_KEYWORD && this->u_.keyword == keyword); }
// Return whether this token is OP. bool is_op(Operator op) const { return this->classification_ == TOKEN_OPERATOR && this->u_.op == op; }
// Print the token for debugging. void print(FILE*) const;
private: // Private constructor used by make_..._token functions above. Token(Classification, Location);
// Clear the token. void clear();
// The token classification. Classification classification_; union { // The keyword value for TOKEN_KEYWORD. Keyword keyword; // The token value for TOKEN_IDENTIFIER. struct { // The name of the identifier. This has been mangled to only // include ASCII characters. std::string* name; // Whether this name should be exported. This is true if the // first letter in the name is upper case. bool is_exported; } identifier_value; // The string value for TOKEN_STRING. std::string* string_value; // The token value for TOKEN_CHARACTER or TOKEN_INTEGER. mpz_t integer_value; // The token value for TOKEN_FLOAT or TOKEN_IMAGINARY. mpfr_t float_value; // The token value for TOKEN_OPERATOR or the keyword value Operator op; } u_; // The source location. Location location_; };
// The lexer itself.
class Lex { public: Lex(const char* input_file_name, FILE* input_file, Linemap *linemap);
~Lex();
// Return the next token. Token next_token();
// Return the contents of any current //extern comment. const std::string& extern_name() const { return this->extern_; }
// Return the current set of pragmas, and clear them. unsigned int get_and_clear_pragmas() { unsigned int ret = this->pragmas_; this->pragmas_ = 0; return ret; }
struct Linkname { std::string ext_name; // External name; empty to just export. bool is_exported; // Whether the internal name is exported. Location loc; // Location of go:linkname directive.
Linkname() : ext_name(), is_exported(false), loc() { }
Linkname(const std::string& ext_name_a, bool is_exported_a, Location loc_a) : ext_name(ext_name_a), is_exported(is_exported_a), loc(loc_a) { } };
typedef std::map<std::string, Linkname> Linknames;
// Return the linknames seen so far, or NULL if none, and clear the // set. These are from go:linkname compiler directives. Linknames* get_and_clear_linknames() { Linknames* ret = this->linknames_; this->linknames_ = NULL; return ret; }
// Return whether there are any current go:embed patterns. bool has_embeds() const { return !this->embeds_.empty(); }
// If there are any go:embed patterns seen so far, store them in // *EMBEDS and clear the saved set. *EMBEDS must be an empty // vector. void get_and_clear_embeds(std::vector<std::string>* embeds) { go_assert(embeds->empty()); std::swap(*embeds, this->embeds_); }
// Clear any go:embed patterns seen so far. This is used for // erroneous cases. void clear_embeds() { this->embeds_.clear(); }
// Return whether the identifier NAME should be exported. NAME is a // mangled name which includes only ASCII characters. static bool is_exported_mangled_name(const std::string& name);
// Return whether the identifier NAME should be exported. NAME is // an unmangled utf-8 string and may contain non-ASCII characters. static bool is_exported_name(const std::string& name);
// Return whether the identifier NAME is invalid. When we see an // invalid character we still build an identifier, but we use a // magic string to indicate that the identifier is invalid. We then // use this to avoid knockon errors. static bool is_invalid_identifier(const std::string& name);
// A helper function. Append V to STR. IS_CHARACTER is true if V // is a Unicode character which should be converted into UTF-8, // false if it is a byte value to be appended directly. The // location is used to warn about an out of range character. static void append_char(unsigned int v, bool is_charater, std::string* str, Location);
// A helper function. Fetch a UTF-8 character from STR and store it // in *VALUE. Return the number of bytes read from STR. Return 0 // if STR does not point to a valid UTF-8 character. static int fetch_char(const char* str, unsigned int *value);
// Return whether C is a Unicode or "C" locale space character. static bool is_unicode_space(unsigned int c);
// Convert the specified hex char into an unsigned integer value. static unsigned hex_val(char c);
private: ssize_t get_line();
bool require_line();
// The current location. Location location() const;
// A position CHARS column positions before the current location. Location earlier_location(int chars) const;
static bool is_hex_digit(char);
static bool is_base_digit(int base, char);
static unsigned char octal_value(char c) { return c - '0'; }
Token make_invalid_token() { return Token::make_invalid_token(this->location()); }
Token make_eof_token() { return Token::make_eof_token(this->location()); }
Token make_operator(Operator op, int chars) { return Token::make_operator_token(op, this->earlier_location(chars)); }
Token gather_identifier();
static bool could_be_exponent(int base, const char*, const char*);
Token gather_number();
void skip_exponent();
Token gather_character();
Token gather_string();
Token gather_raw_string();
const char* advance_one_utf8_char(const char*, unsigned int*, bool*);
const char* advance_one_char(const char*, bool, unsigned int*, bool*);
static bool is_unicode_digit(unsigned int c);
static bool is_unicode_letter(unsigned int c);
static bool is_unicode_uppercase(unsigned int c);
static bool is_in_unicode_range(unsigned int C, const Unicode_range* ranges, size_t range_size);
Operator three_character_operator(char, char, char);
Operator two_character_operator(char, char);
Operator one_character_operator(char);
bool skip_c_comment(bool* found_newline);
void skip_cpp_comment();
void gather_embed(const char*, const char*);
// The input file name. const char* input_file_name_; // The input file. FILE* input_file_; // The object used to keep track of file names and line numbers. Linemap* linemap_; // The line buffer. This holds the current line. char* linebuf_; // The size of the line buffer. size_t linebufsize_; // The nmber of characters in the current line. size_t linesize_; // The current offset in linebuf_. size_t lineoff_; // The current line number. size_t lineno_; // Whether to add a semicolon if we see a newline now. bool add_semi_at_eol_; // Pragmas for the next function, from magic comments. unsigned int pragmas_; // The external name to use for a function declaration, from a magic // //extern comment. std::string extern_; // The list of //go:linkname comments, if any. Linknames* linknames_; // The list of //go:embed patterns, if any. std::vector<std::string> embeds_; };
#endif // !defined(GO_LEX_H)
|