Viewing file: asdl.py (12.76 KB) -rw-r--r-- Select action/file-type: (+) | (+) | (+) | Code (+) | Session (+) | (+) | SDB (+) | (+) | (+) | (+) | (+) | (+) |
#------------------------------------------------------------------------------- # Parser for ASDL [1] definition files. Reads in an ASDL description and parses # it into an AST that describes it. # # The EBNF we're parsing here: Figure 1 of the paper [1]. Extended to support # modules and attributes after a product. Words starting with Capital letters # are terminals. Literal tokens are in "double quotes". Others are # non-terminals. Id is either TokenId or ConstructorId. # # module ::= "module" Id "{" [definitions] "}" # definitions ::= { TypeId "=" type } # type ::= product | sum # product ::= fields ["attributes" fields] # fields ::= "(" { field, "," } field ")" # field ::= TypeId ["?" | "*"] [Id] # sum ::= constructor { "|" constructor } ["attributes" fields] # constructor ::= ConstructorId [fields] # # [1] "The Zephyr Abstract Syntax Description Language" by Wang, et. al. See # http://asdl.sourceforge.net/ #------------------------------------------------------------------------------- from collections import namedtuple import re
__all__ = [ 'builtin_types', 'parse', 'AST', 'Module', 'Type', 'Constructor', 'Field', 'Sum', 'Product', 'VisitorBase', 'Check', 'check']
# The following classes define nodes into which the ASDL description is parsed. # Note: this is a "meta-AST". ASDL files (such as Python.asdl) describe the AST # structure used by a programming language. But ASDL files themselves need to be # parsed. This module parses ASDL files and uses a simple AST to represent them. # See the EBNF at the top of the file to understand the logical connection # between the various node types.
builtin_types = {'identifier', 'string', 'int', 'constant'}
class AST: def __repr__(self): raise NotImplementedError
class Module(AST): def __init__(self, name, dfns): self.name = name self.dfns = dfns self.types = {type.name: type.value for type in dfns}
def __repr__(self): return 'Module({0.name}, {0.dfns})'.format(self)
class Type(AST): def __init__(self, name, value): self.name = name self.value = value
def __repr__(self): return 'Type({0.name}, {0.value})'.format(self)
class Constructor(AST): def __init__(self, name, fields=None): self.name = name self.fields = fields or []
def __repr__(self): return 'Constructor({0.name}, {0.fields})'.format(self)
class Field(AST): def __init__(self, type, name=None, seq=False, opt=False): self.type = type self.name = name self.seq = seq self.opt = opt
def __str__(self): if self.seq: extra = "*" elif self.opt: extra = "?" else: extra = ""
return "{}{} {}".format(self.type, extra, self.name)
def __repr__(self): if self.seq: extra = ", seq=True" elif self.opt: extra = ", opt=True" else: extra = "" if self.name is None: return 'Field({0.type}{1})'.format(self, extra) else: return 'Field({0.type}, {0.name}{1})'.format(self, extra)
class Sum(AST): def __init__(self, types, attributes=None): self.types = types self.attributes = attributes or []
def __repr__(self): if self.attributes: return 'Sum({0.types}, {0.attributes})'.format(self) else: return 'Sum({0.types})'.format(self)
class Product(AST): def __init__(self, fields, attributes=None): self.fields = fields self.attributes = attributes or []
def __repr__(self): if self.attributes: return 'Product({0.fields}, {0.attributes})'.format(self) else: return 'Product({0.fields})'.format(self)
# A generic visitor for the meta-AST that describes ASDL. This can be used by # emitters. Note that this visitor does not provide a generic visit method, so a # subclass needs to define visit methods from visitModule to as deep as the # interesting node. # We also define a Check visitor that makes sure the parsed ASDL is well-formed.
class VisitorBase(object): """Generic tree visitor for ASTs.""" def __init__(self): self.cache = {}
def visit(self, obj, *args): klass = obj.__class__ meth = self.cache.get(klass) if meth is None: methname = "visit" + klass.__name__ meth = getattr(self, methname, None) self.cache[klass] = meth if meth: try: meth(obj, *args) except Exception as e: print("Error visiting %r: %s" % (obj, e)) raise
class Check(VisitorBase): """A visitor that checks a parsed ASDL tree for correctness.
Errors are printed and accumulated. """ def __init__(self): super(Check, self).__init__() self.cons = {} self.errors = 0 self.types = {}
def visitModule(self, mod): for dfn in mod.dfns: self.visit(dfn)
def visitType(self, type): self.visit(type.value, str(type.name))
def visitSum(self, sum, name): for t in sum.types: self.visit(t, name)
def visitConstructor(self, cons, name): key = str(cons.name) conflict = self.cons.get(key) if conflict is None: self.cons[key] = name else: print('Redefinition of constructor {}'.format(key)) print('Defined in {} and {}'.format(conflict, name)) self.errors += 1 for f in cons.fields: self.visit(f, key)
def visitField(self, field, name): key = str(field.type) l = self.types.setdefault(key, []) l.append(name)
def visitProduct(self, prod, name): for f in prod.fields: self.visit(f, name)
def check(mod): """Check the parsed ASDL tree for correctness.
Return True if success. For failure, the errors are printed out and False is returned. """ v = Check() v.visit(mod)
for t in v.types: if t not in mod.types and not t in builtin_types: v.errors += 1 uses = ", ".join(v.types[t]) print('Undefined type {}, used in {}'.format(t, uses)) return not v.errors
# The ASDL parser itself comes next. The only interesting external interface # here is the top-level parse function.
def parse(filename): """Parse ASDL from the given file and return a Module node describing it.""" with open(filename, encoding="utf-8") as f: parser = ASDLParser() return parser.parse(f.read())
# Types for describing tokens in an ASDL specification. class TokenKind: """TokenKind is provides a scope for enumerated token kinds.""" (ConstructorId, TypeId, Equals, Comma, Question, Pipe, Asterisk, LParen, RParen, LBrace, RBrace) = range(11)
operator_table = { '=': Equals, ',': Comma, '?': Question, '|': Pipe, '(': LParen, ')': RParen, '*': Asterisk, '{': LBrace, '}': RBrace}
Token = namedtuple('Token', 'kind value lineno')
class ASDLSyntaxError(Exception): def __init__(self, msg, lineno=None): self.msg = msg self.lineno = lineno or '<unknown>'
def __str__(self): return 'Syntax error on line {0.lineno}: {0.msg}'.format(self)
def tokenize_asdl(buf): """Tokenize the given buffer. Yield Token objects.""" for lineno, line in enumerate(buf.splitlines(), 1): for m in re.finditer(r'\s*(\w+|--.*|.)', line.strip()): c = m.group(1) if c[0].isalpha(): # Some kind of identifier if c[0].isupper(): yield Token(TokenKind.ConstructorId, c, lineno) else: yield Token(TokenKind.TypeId, c, lineno) elif c[:2] == '--': # Comment break else: # Operators try: op_kind = TokenKind.operator_table[c] except KeyError: raise ASDLSyntaxError('Invalid operator %s' % c, lineno) yield Token(op_kind, c, lineno)
class ASDLParser: """Parser for ASDL files.
Create, then call the parse method on a buffer containing ASDL. This is a simple recursive descent parser that uses tokenize_asdl for the lexing. """ def __init__(self): self._tokenizer = None self.cur_token = None
def parse(self, buf): """Parse the ASDL in the buffer and return an AST with a Module root. """ self._tokenizer = tokenize_asdl(buf) self._advance() return self._parse_module()
def _parse_module(self): if self._at_keyword('module'): self._advance() else: raise ASDLSyntaxError( 'Expected "module" (found {})'.format(self.cur_token.value), self.cur_token.lineno) name = self._match(self._id_kinds) self._match(TokenKind.LBrace) defs = self._parse_definitions() self._match(TokenKind.RBrace) return Module(name, defs)
def _parse_definitions(self): defs = [] while self.cur_token.kind == TokenKind.TypeId: typename = self._advance() self._match(TokenKind.Equals) type = self._parse_type() defs.append(Type(typename, type)) return defs
def _parse_type(self): if self.cur_token.kind == TokenKind.LParen: # If we see a (, it's a product return self._parse_product() else: # Otherwise it's a sum. Look for ConstructorId sumlist = [Constructor(self._match(TokenKind.ConstructorId), self._parse_optional_fields())] while self.cur_token.kind == TokenKind.Pipe: # More constructors self._advance() sumlist.append(Constructor( self._match(TokenKind.ConstructorId), self._parse_optional_fields())) return Sum(sumlist, self._parse_optional_attributes())
def _parse_product(self): return Product(self._parse_fields(), self._parse_optional_attributes())
def _parse_fields(self): fields = [] self._match(TokenKind.LParen) while self.cur_token.kind == TokenKind.TypeId: typename = self._advance() is_seq, is_opt = self._parse_optional_field_quantifier() id = (self._advance() if self.cur_token.kind in self._id_kinds else None) fields.append(Field(typename, id, seq=is_seq, opt=is_opt)) if self.cur_token.kind == TokenKind.RParen: break elif self.cur_token.kind == TokenKind.Comma: self._advance() self._match(TokenKind.RParen) return fields
def _parse_optional_fields(self): if self.cur_token.kind == TokenKind.LParen: return self._parse_fields() else: return None
def _parse_optional_attributes(self): if self._at_keyword('attributes'): self._advance() return self._parse_fields() else: return None
def _parse_optional_field_quantifier(self): is_seq, is_opt = False, False if self.cur_token.kind == TokenKind.Asterisk: is_seq = True self._advance() elif self.cur_token.kind == TokenKind.Question: is_opt = True self._advance() return is_seq, is_opt
def _advance(self): """ Return the value of the current token and read the next one into self.cur_token. """ cur_val = None if self.cur_token is None else self.cur_token.value try: self.cur_token = next(self._tokenizer) except StopIteration: self.cur_token = None return cur_val
_id_kinds = (TokenKind.ConstructorId, TokenKind.TypeId)
def _match(self, kind): """The 'match' primitive of RD parsers.
* Verifies that the current token is of the given kind (kind can be a tuple, in which the kind must match one of its members). * Returns the value of the current token * Reads in the next token """ if (isinstance(kind, tuple) and self.cur_token.kind in kind or self.cur_token.kind == kind ): value = self.cur_token.value self._advance() return value else: raise ASDLSyntaxError( 'Unmatched {} (found {})'.format(kind, self.cur_token.kind), self.cur_token.lineno)
def _at_keyword(self, keyword): return (self.cur_token.kind == TokenKind.TypeId and self.cur_token.value == keyword)
|