yaml-cpp/test/gtest-1.10.0/googlemock/scripts/generator/cpp/ast.py
2021-11-30 01:20:18 +00:00

1876 lines
63 KiB
Python

#!/usr/bin/env python
#
# Copyright 2007 Neal Norwitz
# Portions Copyright 2007 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Generate an Abstract Syntax Tree (AST) for C++."""
__author__ = "nnorwitz@google.com (Neal Norwitz)"
# TODO:
# * Tokens should never be exported, need to convert to Nodes
# (return types, parameters, etc.)
# * Handle static class data for templatized classes
# * Handle casts (both C++ and C-style)
# * Handle conditions and loops (if/else, switch, for, while/do)
#
# TODO much, much later:
# * Handle #define
# * exceptions
try:
# Python 3.x
import builtins
except ImportError:
# Python 2.x
import __builtin__ as builtins
import sys
import traceback
from cpp import keywords, tokenize, utils
if not hasattr(builtins, "reversed"):
# Support Python 2.3 and earlier.
def reversed(seq):
for i in range(len(seq) - 1, -1, -1):
yield seq[i]
if not hasattr(builtins, "next"):
# Support Python 2.5 and earlier.
def next(obj):
return obj.next()
VISIBILITY_PUBLIC, VISIBILITY_PROTECTED, VISIBILITY_PRIVATE = range(3)
FUNCTION_NONE = 0x00
FUNCTION_CONST = 0x01
FUNCTION_VIRTUAL = 0x02
FUNCTION_PURE_VIRTUAL = 0x04
FUNCTION_CTOR = 0x08
FUNCTION_DTOR = 0x10
FUNCTION_ATTRIBUTE = 0x20
FUNCTION_UNKNOWN_ANNOTATION = 0x40
FUNCTION_THROW = 0x80
FUNCTION_OVERRIDE = 0x100
"""
These are currently unused. Should really handle these properly at some point.
TYPE_MODIFIER_INLINE = 0x010000
TYPE_MODIFIER_EXTERN = 0x020000
TYPE_MODIFIER_STATIC = 0x040000
TYPE_MODIFIER_CONST = 0x080000
TYPE_MODIFIER_REGISTER = 0x100000
TYPE_MODIFIER_VOLATILE = 0x200000
TYPE_MODIFIER_MUTABLE = 0x400000
TYPE_MODIFIER_MAP = {
'inline': TYPE_MODIFIER_INLINE,
'extern': TYPE_MODIFIER_EXTERN,
'static': TYPE_MODIFIER_STATIC,
'const': TYPE_MODIFIER_CONST,
'register': TYPE_MODIFIER_REGISTER,
'volatile': TYPE_MODIFIER_VOLATILE,
'mutable': TYPE_MODIFIER_MUTABLE,
}
"""
_INTERNAL_TOKEN = "internal"
_NAMESPACE_POP = "ns-pop"
# TODO(nnorwitz): use this as a singleton for templated_types, etc
# where we don't want to create a new empty dict each time. It is also const.
class _NullDict(object):
__contains__ = lambda self: False
keys = values = items = iterkeys = itervalues = iteritems = lambda self: ()
# TODO(nnorwitz): move AST nodes into a separate module.
class Node(object):
"""Base AST node."""
def __init__(self, start, end):
self.start = start
self.end = end
def IsDeclaration(self):
"""Returns bool if this node is a declaration."""
return False
def IsDefinition(self):
"""Returns bool if this node is a definition."""
return False
def IsExportable(self):
"""Returns bool if this node exportable from a header file."""
return False
def Requires(self, node):
"""Does this AST node require the definition of the node passed in?"""
return False
def XXX__str__(self):
return self._StringHelper(self.__class__.__name__, "")
def _StringHelper(self, name, suffix):
if not utils.DEBUG:
return "%s(%s)" % (name, suffix)
return "%s(%d, %d, %s)" % (name, self.start, self.end, suffix)
def __repr__(self):
return str(self)
class Define(Node):
def __init__(self, start, end, name, definition):
Node.__init__(self, start, end)
self.name = name
self.definition = definition
def __str__(self):
value = "%s %s" % (self.name, self.definition)
return self._StringHelper(self.__class__.__name__, value)
class Include(Node):
def __init__(self, start, end, filename, system):
Node.__init__(self, start, end)
self.filename = filename
self.system = system
def __str__(self):
fmt = '"%s"'
if self.system:
fmt = "<%s>"
return self._StringHelper(self.__class__.__name__, fmt % self.filename)
class Goto(Node):
def __init__(self, start, end, label):
Node.__init__(self, start, end)
self.label = label
def __str__(self):
return self._StringHelper(self.__class__.__name__, str(self.label))
class Expr(Node):
def __init__(self, start, end, expr):
Node.__init__(self, start, end)
self.expr = expr
def Requires(self, node):
# TODO(nnorwitz): impl.
return False
def __str__(self):
return self._StringHelper(self.__class__.__name__, str(self.expr))
class Return(Expr):
pass
class Delete(Expr):
pass
class Friend(Expr):
def __init__(self, start, end, expr, namespace):
Expr.__init__(self, start, end, expr)
self.namespace = namespace[:]
class Using(Node):
def __init__(self, start, end, names):
Node.__init__(self, start, end)
self.names = names
def __str__(self):
return self._StringHelper(self.__class__.__name__, str(self.names))
class Parameter(Node):
def __init__(self, start, end, name, parameter_type, default):
Node.__init__(self, start, end)
self.name = name
self.type = parameter_type
self.default = default
def Requires(self, node):
# TODO(nnorwitz): handle namespaces, etc.
return self.type.name == node.name
def __str__(self):
name = str(self.type)
suffix = "%s %s" % (name, self.name)
if self.default:
suffix += " = " + "".join([d.name for d in self.default])
return self._StringHelper(self.__class__.__name__, suffix)
class _GenericDeclaration(Node):
def __init__(self, start, end, name, namespace):
Node.__init__(self, start, end)
self.name = name
self.namespace = namespace[:]
def FullName(self):
prefix = ""
if self.namespace and self.namespace[-1]:
prefix = "::".join(self.namespace) + "::"
return prefix + self.name
def _TypeStringHelper(self, suffix):
if self.namespace:
names = [n or "<anonymous>" for n in self.namespace]
suffix += " in " + "::".join(names)
return self._StringHelper(self.__class__.__name__, suffix)
# TODO(nnorwitz): merge with Parameter in some way?
class VariableDeclaration(_GenericDeclaration):
def __init__(self, start, end, name, var_type, initial_value, namespace):
_GenericDeclaration.__init__(self, start, end, name, namespace)
self.type = var_type
self.initial_value = initial_value
def Requires(self, node):
# TODO(nnorwitz): handle namespaces, etc.
return self.type.name == node.name
def ToString(self):
"""Return a string that tries to reconstitute the variable decl."""
suffix = "%s %s" % (self.type, self.name)
if self.initial_value:
suffix += " = " + self.initial_value
return suffix
def __str__(self):
return self._StringHelper(self.__class__.__name__, self.ToString())
class Typedef(_GenericDeclaration):
def __init__(self, start, end, name, alias, namespace):
_GenericDeclaration.__init__(self, start, end, name, namespace)
self.alias = alias
def IsDefinition(self):
return True
def IsExportable(self):
return True
def Requires(self, node):
# TODO(nnorwitz): handle namespaces, etc.
name = node.name
for token in self.alias:
if token is not None and name == token.name:
return True
return False
def __str__(self):
suffix = "%s, %s" % (self.name, self.alias)
return self._TypeStringHelper(suffix)
class _NestedType(_GenericDeclaration):
def __init__(self, start, end, name, fields, namespace):
_GenericDeclaration.__init__(self, start, end, name, namespace)
self.fields = fields
def IsDefinition(self):
return True
def IsExportable(self):
return True
def __str__(self):
suffix = "%s, {%s}" % (self.name, self.fields)
return self._TypeStringHelper(suffix)
class Union(_NestedType):
pass
class Enum(_NestedType):
pass
class Class(_GenericDeclaration):
def __init__(self, start, end, name, bases, templated_types, body, namespace):
_GenericDeclaration.__init__(self, start, end, name, namespace)
self.bases = bases
self.body = body
self.templated_types = templated_types
def IsDeclaration(self):
return self.bases is None and self.body is None
def IsDefinition(self):
return not self.IsDeclaration()
def IsExportable(self):
return not self.IsDeclaration()
def Requires(self, node):
# TODO(nnorwitz): handle namespaces, etc.
if self.bases:
for token_list in self.bases:
# TODO(nnorwitz): bases are tokens, do name comparison.
for token in token_list:
if token.name == node.name:
return True
# TODO(nnorwitz): search in body too.
return False
def __str__(self):
name = self.name
if self.templated_types:
name += "<%s>" % self.templated_types
suffix = "%s, %s, %s" % (name, self.bases, self.body)
return self._TypeStringHelper(suffix)
class Struct(Class):
pass
class Function(_GenericDeclaration):
def __init__(
self,
start,
end,
name,
return_type,
parameters,
modifiers,
templated_types,
body,
namespace,
):
_GenericDeclaration.__init__(self, start, end, name, namespace)
converter = TypeConverter(namespace)
self.return_type = converter.CreateReturnType(return_type)
self.parameters = converter.ToParameters(parameters)
self.modifiers = modifiers
self.body = body
self.templated_types = templated_types
def IsDeclaration(self):
return self.body is None
def IsDefinition(self):
return self.body is not None
def IsExportable(self):
if self.return_type and "static" in self.return_type.modifiers:
return False
return None not in self.namespace
def Requires(self, node):
if self.parameters:
# TODO(nnorwitz): parameters are tokens, do name comparison.
for p in self.parameters:
if p.name == node.name:
return True
# TODO(nnorwitz): search in body too.
return False
def __str__(self):
# TODO(nnorwitz): add templated_types.
suffix = "%s %s(%s), 0x%02x, %s" % (
self.return_type,
self.name,
self.parameters,
self.modifiers,
self.body,
)
return self._TypeStringHelper(suffix)
class Method(Function):
def __init__(
self,
start,
end,
name,
in_class,
return_type,
parameters,
modifiers,
templated_types,
body,
namespace,
):
Function.__init__(
self,
start,
end,
name,
return_type,
parameters,
modifiers,
templated_types,
body,
namespace,
)
# TODO(nnorwitz): in_class could also be a namespace which can
# mess up finding functions properly.
self.in_class = in_class
class Type(_GenericDeclaration):
"""Type used for any variable (eg class, primitive, struct, etc)."""
def __init__(
self, start, end, name, templated_types, modifiers, reference, pointer, array
):
"""
Args:
name: str name of main type
templated_types: [Class (Type?)] template type info between <>
modifiers: [str] type modifiers (keywords) eg, const, mutable, etc.
reference, pointer, array: bools
"""
_GenericDeclaration.__init__(self, start, end, name, [])
self.templated_types = templated_types
if not name and modifiers:
self.name = modifiers.pop()
self.modifiers = modifiers
self.reference = reference
self.pointer = pointer
self.array = array
def __str__(self):
prefix = ""
if self.modifiers:
prefix = " ".join(self.modifiers) + " "
name = str(self.name)
if self.templated_types:
name += "<%s>" % self.templated_types
suffix = prefix + name
if self.reference:
suffix += "&"
if self.pointer:
suffix += "*"
if self.array:
suffix += "[]"
return self._TypeStringHelper(suffix)
# By definition, Is* are always False. A Type can only exist in
# some sort of variable declaration, parameter, or return value.
def IsDeclaration(self):
return False
def IsDefinition(self):
return False
def IsExportable(self):
return False
class TypeConverter(object):
def __init__(self, namespace_stack):
self.namespace_stack = namespace_stack
def _GetTemplateEnd(self, tokens, start):
count = 1
end = start
while 1:
token = tokens[end]
end += 1
if token.name == "<":
count += 1
elif token.name == ">":
count -= 1
if count == 0:
break
return tokens[start : end - 1], end
def ToType(self, tokens):
"""Convert [Token,...] to [Class(...), ] useful for base classes.
For example, code like class Foo : public Bar<x, y> { ... };
the "Bar<x, y>" portion gets converted to an AST.
Returns:
[Class(...), ...]
"""
result = []
name_tokens = []
reference = pointer = array = False
def AddType(templated_types):
# Partition tokens into name and modifier tokens.
names = []
modifiers = []
for t in name_tokens:
if keywords.IsKeyword(t.name):
modifiers.append(t.name)
else:
names.append(t.name)
name = "".join(names)
if name_tokens:
result.append(
Type(
name_tokens[0].start,
name_tokens[-1].end,
name,
templated_types,
modifiers,
reference,
pointer,
array,
)
)
del name_tokens[:]
i = 0
end = len(tokens)
while i < end:
token = tokens[i]
if token.name == "<":
new_tokens, new_end = self._GetTemplateEnd(tokens, i + 1)
AddType(self.ToType(new_tokens))
# If there is a comma after the template, we need to consume
# that here otherwise it becomes part of the name.
i = new_end
reference = pointer = array = False
elif token.name == ",":
AddType([])
reference = pointer = array = False
elif token.name == "*":
pointer = True
elif token.name == "&":
reference = True
elif token.name == "[":
pointer = True
elif token.name == "]":
pass
else:
name_tokens.append(token)
i += 1
if name_tokens:
# No '<' in the tokens, just a simple name and no template.
AddType([])
return result
def DeclarationToParts(self, parts, needs_name_removed):
name = None
default = []
if needs_name_removed:
# Handle default (initial) values properly.
for i, t in enumerate(parts):
if t.name == "=":
default = parts[i + 1 :]
name = parts[i - 1].name
if name == "]" and parts[i - 2].name == "[":
name = parts[i - 3].name
i -= 1
parts = parts[: i - 1]
break
else:
if parts[-1].token_type == tokenize.NAME:
name = parts.pop().name
else:
# TODO(nnorwitz): this is a hack that happens for code like
# Register(Foo<T>); where it thinks this is a function call
# but it's actually a declaration.
name = "???"
modifiers = []
type_name = []
other_tokens = []
templated_types = []
i = 0
end = len(parts)
while i < end:
p = parts[i]
if keywords.IsKeyword(p.name):
modifiers.append(p.name)
elif p.name == "<":
templated_tokens, new_end = self._GetTemplateEnd(parts, i + 1)
templated_types = self.ToType(templated_tokens)
i = new_end - 1
# Don't add a spurious :: to data members being initialized.
next_index = i + 1
if next_index < end and parts[next_index].name == "::":
i += 1
elif p.name in ("[", "]", "="):
# These are handled elsewhere.
other_tokens.append(p)
elif p.name not in ("*", "&", ">"):
# Ensure that names have a space between them.
if (
type_name
and type_name[-1].token_type == tokenize.NAME
and p.token_type == tokenize.NAME
):
type_name.append(tokenize.Token(tokenize.SYNTAX, " ", 0, 0))
type_name.append(p)
else:
other_tokens.append(p)
i += 1
type_name = "".join([t.name for t in type_name])
return name, type_name, templated_types, modifiers, default, other_tokens
def ToParameters(self, tokens):
if not tokens:
return []
result = []
name = type_name = ""
type_modifiers = []
pointer = reference = array = False
first_token = None
default = []
def AddParameter(end):
if default:
del default[0] # Remove flag.
parts = self.DeclarationToParts(type_modifiers, True)
(
name,
type_name,
templated_types,
modifiers,
unused_default,
unused_other_tokens,
) = parts
parameter_type = Type(
first_token.start,
first_token.end,
type_name,
templated_types,
modifiers,
reference,
pointer,
array,
)
p = Parameter(first_token.start, end, name, parameter_type, default)
result.append(p)
template_count = 0
for s in tokens:
if not first_token:
first_token = s
if s.name == "<":
template_count += 1
elif s.name == ">":
template_count -= 1
if template_count > 0:
type_modifiers.append(s)
continue
if s.name == ",":
AddParameter(s.start)
name = type_name = ""
type_modifiers = []
pointer = reference = array = False
first_token = None
default = []
elif s.name == "*":
pointer = True
elif s.name == "&":
reference = True
elif s.name == "[":
array = True
elif s.name == "]":
pass # Just don't add to type_modifiers.
elif s.name == "=":
# Got a default value. Add any value (None) as a flag.
default.append(None)
elif default:
default.append(s)
else:
type_modifiers.append(s)
AddParameter(tokens[-1].end)
return result
def CreateReturnType(self, return_type_seq):
if not return_type_seq:
return None
start = return_type_seq[0].start
end = return_type_seq[-1].end
(
_,
name,
templated_types,
modifiers,
default,
other_tokens,
) = self.DeclarationToParts(return_type_seq, False)
names = [n.name for n in other_tokens]
reference = "&" in names
pointer = "*" in names
array = "[" in names
return Type(
start, end, name, templated_types, modifiers, reference, pointer, array
)
def GetTemplateIndices(self, names):
# names is a list of strings.
start = names.index("<")
end = len(names) - 1
while end > 0:
if names[end] == ">":
break
end -= 1
return start, end + 1
class AstBuilder(object):
def __init__(
self, token_stream, filename, in_class="", visibility=None, namespace_stack=[]
):
self.tokens = token_stream
self.filename = filename
# TODO(nnorwitz): use a better data structure (deque) for the queue.
# Switching directions of the "queue" improved perf by about 25%.
# Using a deque should be even better since we access from both sides.
self.token_queue = []
self.namespace_stack = namespace_stack[:]
self.in_class = in_class
if in_class is None:
self.in_class_name_only = None
else:
self.in_class_name_only = in_class.split("::")[-1]
self.visibility = visibility
self.in_function = False
self.current_token = None
# Keep the state whether we are currently handling a typedef or not.
self._handling_typedef = False
self.converter = TypeConverter(self.namespace_stack)
def HandleError(self, msg, token):
printable_queue = list(reversed(self.token_queue[-20:]))
sys.stderr.write(
"Got %s in %s @ %s %s\n" % (msg, self.filename, token, printable_queue)
)
def Generate(self):
while 1:
token = self._GetNextToken()
if not token:
break
# Get the next token.
self.current_token = token
# Dispatch on the next token type.
if token.token_type == _INTERNAL_TOKEN:
if token.name == _NAMESPACE_POP:
self.namespace_stack.pop()
continue
try:
result = self._GenerateOne(token)
if result is not None:
yield result
except:
self.HandleError("exception", token)
raise
def _CreateVariable(
self,
pos_token,
name,
type_name,
type_modifiers,
ref_pointer_name_seq,
templated_types,
value=None,
):
reference = "&" in ref_pointer_name_seq
pointer = "*" in ref_pointer_name_seq
array = "[" in ref_pointer_name_seq
var_type = Type(
pos_token.start,
pos_token.end,
type_name,
templated_types,
type_modifiers,
reference,
pointer,
array,
)
return VariableDeclaration(
pos_token.start, pos_token.end, name, var_type, value, self.namespace_stack
)
def _GenerateOne(self, token):
if token.token_type == tokenize.NAME:
if keywords.IsKeyword(token.name) and not keywords.IsBuiltinType(
token.name
):
method = getattr(self, "handle_" + token.name)
return method()
elif token.name == self.in_class_name_only:
# The token name is the same as the class, must be a ctor if
# there is a paren. Otherwise, it's the return type.
# Peek ahead to get the next token to figure out which.
next = self._GetNextToken()
self._AddBackToken(next)
if next.token_type == tokenize.SYNTAX and next.name == "(":
return self._GetMethod([token], FUNCTION_CTOR, None, True)
# Fall through--handle like any other method.
# Handle data or function declaration/definition.
syntax = tokenize.SYNTAX
temp_tokens, last_token = self._GetVarTokensUpTo(syntax, "(", ";", "{", "[")
temp_tokens.insert(0, token)
if last_token.name == "(":
# If there is an assignment before the paren,
# this is an expression, not a method.
expr = bool([e for e in temp_tokens if e.name == "="])
if expr:
new_temp = self._GetTokensUpTo(tokenize.SYNTAX, ";")
temp_tokens.append(last_token)
temp_tokens.extend(new_temp)
last_token = tokenize.Token(tokenize.SYNTAX, ";", 0, 0)
if last_token.name == "[":
# Handle array, this isn't a method, unless it's an operator.
# TODO(nnorwitz): keep the size somewhere.
# unused_size = self._GetTokensUpTo(tokenize.SYNTAX, ']')
temp_tokens.append(last_token)
if temp_tokens[-2].name == "operator":
temp_tokens.append(self._GetNextToken())
else:
temp_tokens2, last_token = self._GetVarTokensUpTo(
tokenize.SYNTAX, ";"
)
temp_tokens.extend(temp_tokens2)
if last_token.name == ";":
# Handle data, this isn't a method.
parts = self.converter.DeclarationToParts(temp_tokens, True)
(
name,
type_name,
templated_types,
modifiers,
default,
unused_other_tokens,
) = parts
t0 = temp_tokens[0]
names = [t.name for t in temp_tokens]
if templated_types:
start, end = self.converter.GetTemplateIndices(names)
names = names[:start] + names[end:]
default = "".join([t.name for t in default])
return self._CreateVariable(
t0, name, type_name, modifiers, names, templated_types, default
)
if last_token.name == "{":
self._AddBackTokens(temp_tokens[1:])
self._AddBackToken(last_token)
method_name = temp_tokens[0].name
method = getattr(self, "handle_" + method_name, None)
if not method:
# Must be declaring a variable.
# TODO(nnorwitz): handle the declaration.
return None
return method()
return self._GetMethod(temp_tokens, 0, None, False)
elif token.token_type == tokenize.SYNTAX:
if token.name == "~" and self.in_class:
# Must be a dtor (probably not in method body).
token = self._GetNextToken()
# self.in_class can contain A::Name, but the dtor will only
# be Name. Make sure to compare against the right value.
if (
token.token_type == tokenize.NAME
and token.name == self.in_class_name_only
):
return self._GetMethod([token], FUNCTION_DTOR, None, True)
# TODO(nnorwitz): handle a lot more syntax.
elif token.token_type == tokenize.PREPROCESSOR:
# TODO(nnorwitz): handle more preprocessor directives.
# token starts with a #, so remove it and strip whitespace.
name = token.name[1:].lstrip()
if name.startswith("include"):
# Remove "include".
name = name[7:].strip()
assert name
# Handle #include \<newline> "header-on-second-line.h".
if name.startswith("\\"):
name = name[1:].strip()
assert name[0] in '<"', token
assert name[-1] in '>"', token
system = name[0] == "<"
filename = name[1:-1]
return Include(token.start, token.end, filename, system)
if name.startswith("define"):
# Remove "define".
name = name[6:].strip()
assert name
value = ""
for i, c in enumerate(name):
if c.isspace():
value = name[i:].lstrip()
name = name[:i]
break
return Define(token.start, token.end, name, value)
if name.startswith("if") and name[2:3].isspace():
condition = name[3:].strip()
if condition.startswith("0") or condition.startswith("(0)"):
self._SkipIf0Blocks()
return None
def _GetTokensUpTo(self, expected_token_type, expected_token):
return self._GetVarTokensUpTo(expected_token_type, expected_token)[0]
def _GetVarTokensUpTo(self, expected_token_type, *expected_tokens):
last_token = self._GetNextToken()
tokens = []
while (
last_token.token_type != expected_token_type
or last_token.name not in expected_tokens
):
tokens.append(last_token)
last_token = self._GetNextToken()
return tokens, last_token
# TODO(nnorwitz): remove _IgnoreUpTo() it shouldn't be necessary.
def _IgnoreUpTo(self, token_type, token):
unused_tokens = self._GetTokensUpTo(token_type, token)
def _SkipIf0Blocks(self):
count = 1
while 1:
token = self._GetNextToken()
if token.token_type != tokenize.PREPROCESSOR:
continue
name = token.name[1:].lstrip()
if name.startswith("endif"):
count -= 1
if count == 0:
break
elif name.startswith("if"):
count += 1
def _GetMatchingChar(self, open_paren, close_paren, GetNextToken=None):
if GetNextToken is None:
GetNextToken = self._GetNextToken
# Assumes the current token is open_paren and we will consume
# and return up to the close_paren.
count = 1
token = GetNextToken()
while 1:
if token.token_type == tokenize.SYNTAX:
if token.name == open_paren:
count += 1
elif token.name == close_paren:
count -= 1
if count == 0:
break
yield token
token = GetNextToken()
yield token
def _GetParameters(self):
return self._GetMatchingChar("(", ")")
def GetScope(self):
return self._GetMatchingChar("{", "}")
def _GetNextToken(self):
if self.token_queue:
return self.token_queue.pop()
return next(self.tokens)
def _AddBackToken(self, token):
if token.whence == tokenize.WHENCE_STREAM:
token.whence = tokenize.WHENCE_QUEUE
self.token_queue.insert(0, token)
else:
assert token.whence == tokenize.WHENCE_QUEUE, token
self.token_queue.append(token)
def _AddBackTokens(self, tokens):
if tokens:
if tokens[-1].whence == tokenize.WHENCE_STREAM:
for token in tokens:
token.whence = tokenize.WHENCE_QUEUE
self.token_queue[:0] = reversed(tokens)
else:
assert tokens[-1].whence == tokenize.WHENCE_QUEUE, tokens
self.token_queue.extend(reversed(tokens))
def GetName(self, seq=None):
"""Returns ([tokens], next_token_info)."""
GetNextToken = self._GetNextToken
if seq is not None:
it = iter(seq)
GetNextToken = lambda: next(it)
next_token = GetNextToken()
tokens = []
last_token_was_name = False
while next_token.token_type == tokenize.NAME or (
next_token.token_type == tokenize.SYNTAX and next_token.name in ("::", "<")
):
# Two NAMEs in a row means the identifier should terminate.
# It's probably some sort of variable declaration.
if last_token_was_name and next_token.token_type == tokenize.NAME:
break
last_token_was_name = next_token.token_type == tokenize.NAME
tokens.append(next_token)
# Handle templated names.
if next_token.name == "<":
tokens.extend(self._GetMatchingChar("<", ">", GetNextToken))
last_token_was_name = True
next_token = GetNextToken()
return tokens, next_token
def GetMethod(self, modifiers, templated_types):
return_type_and_name = self._GetTokensUpTo(tokenize.SYNTAX, "(")
assert len(return_type_and_name) >= 1
return self._GetMethod(return_type_and_name, modifiers, templated_types, False)
def _GetMethod(self, return_type_and_name, modifiers, templated_types, get_paren):
template_portion = None
if get_paren:
token = self._GetNextToken()
assert token.token_type == tokenize.SYNTAX, token
if token.name == "<":
# Handle templatized dtors.
template_portion = [token]
template_portion.extend(self._GetMatchingChar("<", ">"))
token = self._GetNextToken()
assert token.token_type == tokenize.SYNTAX, token
assert token.name == "(", token
name = return_type_and_name.pop()
# Handle templatized ctors.
if name.name == ">":
index = 1
while return_type_and_name[index].name != "<":
index += 1
template_portion = return_type_and_name[index:] + [name]
del return_type_and_name[index:]
name = return_type_and_name.pop()
elif name.name == "]":
rt = return_type_and_name
assert rt[-1].name == "[", return_type_and_name
assert rt[-2].name == "operator", return_type_and_name
name_seq = return_type_and_name[-2:]
del return_type_and_name[-2:]
name = tokenize.Token(
tokenize.NAME, "operator[]", name_seq[0].start, name.end
)
# Get the open paren so _GetParameters() below works.
unused_open_paren = self._GetNextToken()
# TODO(nnorwitz): store template_portion.
return_type = return_type_and_name
indices = name
if return_type:
indices = return_type[0]
# Force ctor for templatized ctors.
if name.name == self.in_class and not modifiers:
modifiers |= FUNCTION_CTOR
parameters = list(self._GetParameters())
del parameters[-1] # Remove trailing ')'.
# Handling operator() is especially weird.
if name.name == "operator" and not parameters:
token = self._GetNextToken()
assert token.name == "(", token
parameters = list(self._GetParameters())
del parameters[-1] # Remove trailing ')'.
token = self._GetNextToken()
while token.token_type == tokenize.NAME:
modifier_token = token
token = self._GetNextToken()
if modifier_token.name == "const":
modifiers |= FUNCTION_CONST
elif modifier_token.name == "__attribute__":
# TODO(nnorwitz): handle more __attribute__ details.
modifiers |= FUNCTION_ATTRIBUTE
assert token.name == "(", token
# Consume everything between the (parens).
unused_tokens = list(self._GetMatchingChar("(", ")"))
token = self._GetNextToken()
elif modifier_token.name == "throw":
modifiers |= FUNCTION_THROW
assert token.name == "(", token
# Consume everything between the (parens).
unused_tokens = list(self._GetMatchingChar("(", ")"))
token = self._GetNextToken()
elif modifier_token.name == "override":
modifiers |= FUNCTION_OVERRIDE
elif modifier_token.name == modifier_token.name.upper():
# HACK(nnorwitz): assume that all upper-case names
# are some macro we aren't expanding.
modifiers |= FUNCTION_UNKNOWN_ANNOTATION
else:
self.HandleError("unexpected token", modifier_token)
assert token.token_type == tokenize.SYNTAX, token
# Handle ctor initializers.
if token.name == ":":
# TODO(nnorwitz): anything else to handle for initializer list?
while token.name != ";" and token.name != "{":
token = self._GetNextToken()
# Handle pointer to functions that are really data but look
# like method declarations.
if token.name == "(":
if parameters[0].name == "*":
# name contains the return type.
name = parameters.pop()
# parameters contains the name of the data.
modifiers = [p.name for p in parameters]
# Already at the ( to open the parameter list.
function_parameters = list(self._GetMatchingChar("(", ")"))
del function_parameters[-1] # Remove trailing ')'.
# TODO(nnorwitz): store the function_parameters.
token = self._GetNextToken()
assert token.token_type == tokenize.SYNTAX, token
assert token.name == ";", token
return self._CreateVariable(
indices, name.name, indices.name, modifiers, "", None
)
# At this point, we got something like:
# return_type (type::*name_)(params);
# This is a data member called name_ that is a function pointer.
# With this code: void (sq_type::*field_)(string&);
# We get: name=void return_type=[] parameters=sq_type ... field_
# TODO(nnorwitz): is return_type always empty?
# TODO(nnorwitz): this isn't even close to being correct.
# Just put in something so we don't crash and can move on.
real_name = parameters[-1]
modifiers = [p.name for p in self._GetParameters()]
del modifiers[-1] # Remove trailing ')'.
return self._CreateVariable(
indices, real_name.name, indices.name, modifiers, "", None
)
if token.name == "{":
body = list(self.GetScope())
del body[-1] # Remove trailing '}'.
else:
body = None
if token.name == "=":
token = self._GetNextToken()
if token.name == "default" or token.name == "delete":
# Ignore explicitly defaulted and deleted special members
# in C++11.
token = self._GetNextToken()
else:
# Handle pure-virtual declarations.
assert token.token_type == tokenize.CONSTANT, token
assert token.name == "0", token
modifiers |= FUNCTION_PURE_VIRTUAL
token = self._GetNextToken()
if token.name == "[":
# TODO(nnorwitz): store tokens and improve parsing.
# template <typename T, size_t N> char (&ASH(T (&seq)[N]))[N];
tokens = list(self._GetMatchingChar("[", "]"))
token = self._GetNextToken()
assert token.name == ";", (token, return_type_and_name, parameters)
# Looks like we got a method, not a function.
if len(return_type) > 2 and return_type[-1].name == "::":
return_type, in_class = self._GetReturnTypeAndClassName(return_type)
return Method(
indices.start,
indices.end,
name.name,
in_class,
return_type,
parameters,
modifiers,
templated_types,
body,
self.namespace_stack,
)
return Function(
indices.start,
indices.end,
name.name,
return_type,
parameters,
modifiers,
templated_types,
body,
self.namespace_stack,
)
def _GetReturnTypeAndClassName(self, token_seq):
# Splitting the return type from the class name in a method
# can be tricky. For example, Return::Type::Is::Hard::To::Find().
# Where is the return type and where is the class name?
# The heuristic used is to pull the last name as the class name.
# This includes all the templated type info.
# TODO(nnorwitz): if there is only One name like in the
# example above, punt and assume the last bit is the class name.
# Ignore a :: prefix, if exists so we can find the first real name.
i = 0
if token_seq[0].name == "::":
i = 1
# Ignore a :: suffix, if exists.
end = len(token_seq) - 1
if token_seq[end - 1].name == "::":
end -= 1
# Make a copy of the sequence so we can append a sentinel
# value. This is required for GetName will has to have some
# terminating condition beyond the last name.
seq_copy = token_seq[i:end]
seq_copy.append(tokenize.Token(tokenize.SYNTAX, "", 0, 0))
names = []
while i < end:
# Iterate through the sequence parsing out each name.
new_name, next = self.GetName(seq_copy[i:])
assert new_name, "Got empty new_name, next=%s" % next
# We got a pointer or ref. Add it to the name.
if next and next.token_type == tokenize.SYNTAX:
new_name.append(next)
names.append(new_name)
i += len(new_name)
# Now that we have the names, it's time to undo what we did.
# Remove the sentinel value.
names[-1].pop()
# Flatten the token sequence for the return type.
return_type = [e for seq in names[:-1] for e in seq]
# The class name is the last name.
class_name = names[-1]
return return_type, class_name
def handle_bool(self):
pass
def handle_char(self):
pass
def handle_int(self):
pass
def handle_long(self):
pass
def handle_short(self):
pass
def handle_double(self):
pass
def handle_float(self):
pass
def handle_void(self):
pass
def handle_wchar_t(self):
pass
def handle_unsigned(self):
pass
def handle_signed(self):
pass
def _GetNestedType(self, ctor):
name = None
name_tokens, token = self.GetName()
if name_tokens:
name = "".join([t.name for t in name_tokens])
# Handle forward declarations.
if token.token_type == tokenize.SYNTAX and token.name == ";":
return ctor(token.start, token.end, name, None, self.namespace_stack)
if token.token_type == tokenize.NAME and self._handling_typedef:
self._AddBackToken(token)
return ctor(token.start, token.end, name, None, self.namespace_stack)
# Must be the type declaration.
fields = list(self._GetMatchingChar("{", "}"))
del fields[-1] # Remove trailing '}'.
if token.token_type == tokenize.SYNTAX and token.name == "{":
next = self._GetNextToken()
new_type = ctor(token.start, token.end, name, fields, self.namespace_stack)
# A name means this is an anonymous type and the name
# is the variable declaration.
if next.token_type != tokenize.NAME:
return new_type
name = new_type
token = next
# Must be variable declaration using the type prefixed with keyword.
assert token.token_type == tokenize.NAME, token
return self._CreateVariable(token, token.name, name, [], "", None)
def handle_struct(self):
# Special case the handling typedef/aliasing of structs here.
# It would be a pain to handle in the class code.
name_tokens, var_token = self.GetName()
if name_tokens:
next_token = self._GetNextToken()
is_syntax = (
var_token.token_type == tokenize.SYNTAX and var_token.name[0] in "*&"
)
is_variable = (
var_token.token_type == tokenize.NAME and next_token.name == ";"
)
variable = var_token
if is_syntax and not is_variable:
variable = next_token
temp = self._GetNextToken()
if temp.token_type == tokenize.SYNTAX and temp.name == "(":
# Handle methods declared to return a struct.
t0 = name_tokens[0]
struct = tokenize.Token(
tokenize.NAME, "struct", t0.start - 7, t0.start - 2
)
type_and_name = [struct]
type_and_name.extend(name_tokens)
type_and_name.extend((var_token, next_token))
return self._GetMethod(type_and_name, 0, None, False)
assert temp.name == ";", (temp, name_tokens, var_token)
if is_syntax or (is_variable and not self._handling_typedef):
modifiers = ["struct"]
type_name = "".join([t.name for t in name_tokens])
position = name_tokens[0]
return self._CreateVariable(
position, variable.name, type_name, modifiers, var_token.name, None
)
name_tokens.extend((var_token, next_token))
self._AddBackTokens(name_tokens)
else:
self._AddBackToken(var_token)
return self._GetClass(Struct, VISIBILITY_PUBLIC, None)
def handle_union(self):
return self._GetNestedType(Union)
def handle_enum(self):
token = self._GetNextToken()
if not (token.token_type == tokenize.NAME and token.name == "class"):
self._AddBackToken(token)
return self._GetNestedType(Enum)
def handle_auto(self):
# TODO(nnorwitz): warn about using auto? Probably not since it
# will be reclaimed and useful for C++0x.
pass
def handle_register(self):
pass
def handle_const(self):
pass
def handle_inline(self):
pass
def handle_extern(self):
pass
def handle_static(self):
pass
def handle_virtual(self):
# What follows must be a method.
token = token2 = self._GetNextToken()
if token.name == "inline":
# HACK(nnorwitz): handle inline dtors by ignoring 'inline'.
token2 = self._GetNextToken()
if token2.token_type == tokenize.SYNTAX and token2.name == "~":
return self.GetMethod(FUNCTION_VIRTUAL + FUNCTION_DTOR, None)
assert token.token_type == tokenize.NAME or token.name == "::", token
return_type_and_name = self._GetTokensUpTo(tokenize.SYNTAX, "(") # )
return_type_and_name.insert(0, token)
if token2 is not token:
return_type_and_name.insert(1, token2)
return self._GetMethod(return_type_and_name, FUNCTION_VIRTUAL, None, False)
def handle_volatile(self):
pass
def handle_mutable(self):
pass
def handle_public(self):
assert self.in_class
self.visibility = VISIBILITY_PUBLIC
def handle_protected(self):
assert self.in_class
self.visibility = VISIBILITY_PROTECTED
def handle_private(self):
assert self.in_class
self.visibility = VISIBILITY_PRIVATE
def handle_friend(self):
tokens = self._GetTokensUpTo(tokenize.SYNTAX, ";")
assert tokens
t0 = tokens[0]
return Friend(t0.start, t0.end, tokens, self.namespace_stack)
def handle_static_cast(self):
pass
def handle_const_cast(self):
pass
def handle_dynamic_cast(self):
pass
def handle_reinterpret_cast(self):
pass
def handle_new(self):
pass
def handle_delete(self):
tokens = self._GetTokensUpTo(tokenize.SYNTAX, ";")
assert tokens
return Delete(tokens[0].start, tokens[0].end, tokens)
def handle_typedef(self):
token = self._GetNextToken()
if token.token_type == tokenize.NAME and keywords.IsKeyword(token.name):
# Token must be struct/enum/union/class.
method = getattr(self, "handle_" + token.name)
self._handling_typedef = True
tokens = [method()]
self._handling_typedef = False
else:
tokens = [token]
# Get the remainder of the typedef up to the semi-colon.
tokens.extend(self._GetTokensUpTo(tokenize.SYNTAX, ";"))
# TODO(nnorwitz): clean all this up.
assert tokens
name = tokens.pop()
indices = name
if tokens:
indices = tokens[0]
if not indices:
indices = token
if name.name == ")":
# HACK(nnorwitz): Handle pointers to functions "properly".
if len(tokens) >= 4 and tokens[1].name == "(" and tokens[2].name == "*":
tokens.append(name)
name = tokens[3]
elif name.name == "]":
# HACK(nnorwitz): Handle arrays properly.
if len(tokens) >= 2:
tokens.append(name)
name = tokens[1]
new_type = tokens
if tokens and isinstance(tokens[0], tokenize.Token):
new_type = self.converter.ToType(tokens)[0]
return Typedef(
indices.start, indices.end, name.name, new_type, self.namespace_stack
)
def handle_typeid(self):
pass # Not needed yet.
def handle_typename(self):
pass # Not needed yet.
def _GetTemplatedTypes(self):
result = {}
tokens = list(self._GetMatchingChar("<", ">"))
len_tokens = len(tokens) - 1 # Ignore trailing '>'.
i = 0
while i < len_tokens:
key = tokens[i].name
i += 1
if keywords.IsKeyword(key) or key == ",":
continue
type_name = default = None
if i < len_tokens:
i += 1
if tokens[i - 1].name == "=":
assert i < len_tokens, "%s %s" % (i, tokens)
default, unused_next_token = self.GetName(tokens[i:])
i += len(default)
else:
if tokens[i - 1].name != ",":
# We got something like: Type variable.
# Re-adjust the key (variable) and type_name (Type).
key = tokens[i - 1].name
type_name = tokens[i - 2]
result[key] = (type_name, default)
return result
def handle_template(self):
token = self._GetNextToken()
assert token.token_type == tokenize.SYNTAX, token
assert token.name == "<", token
templated_types = self._GetTemplatedTypes()
# TODO(nnorwitz): for now, just ignore the template params.
token = self._GetNextToken()
if token.token_type == tokenize.NAME:
if token.name == "class":
return self._GetClass(Class, VISIBILITY_PRIVATE, templated_types)
elif token.name == "struct":
return self._GetClass(Struct, VISIBILITY_PUBLIC, templated_types)
elif token.name == "friend":
return self.handle_friend()
self._AddBackToken(token)
tokens, last = self._GetVarTokensUpTo(tokenize.SYNTAX, "(", ";")
tokens.append(last)
self._AddBackTokens(tokens)
if last.name == "(":
return self.GetMethod(FUNCTION_NONE, templated_types)
# Must be a variable definition.
return None
def handle_true(self):
pass # Nothing to do.
def handle_false(self):
pass # Nothing to do.
def handle_asm(self):
pass # Not needed yet.
def handle_class(self):
return self._GetClass(Class, VISIBILITY_PRIVATE, None)
def _GetBases(self):
# Get base classes.
bases = []
while 1:
token = self._GetNextToken()
assert token.token_type == tokenize.NAME, token
# TODO(nnorwitz): store kind of inheritance...maybe.
if token.name not in ("public", "protected", "private"):
# If inheritance type is not specified, it is private.
# Just put the token back so we can form a name.
# TODO(nnorwitz): it would be good to warn about this.
self._AddBackToken(token)
else:
# Check for virtual inheritance.
token = self._GetNextToken()
if token.name != "virtual":
self._AddBackToken(token)
else:
# TODO(nnorwitz): store that we got virtual for this base.
pass
base, next_token = self.GetName()
bases_ast = self.converter.ToType(base)
assert len(bases_ast) == 1, bases_ast
bases.append(bases_ast[0])
assert next_token.token_type == tokenize.SYNTAX, next_token
if next_token.name == "{":
token = next_token
break
# Support multiple inheritance.
assert next_token.name == ",", next_token
return bases, token
def _GetClass(self, class_type, visibility, templated_types):
class_name = None
class_token = self._GetNextToken()
if class_token.token_type != tokenize.NAME:
assert class_token.token_type == tokenize.SYNTAX, class_token
token = class_token
else:
# Skip any macro (e.g. storage class specifiers) after the
# 'class' keyword.
next_token = self._GetNextToken()
if next_token.token_type == tokenize.NAME:
self._AddBackToken(next_token)
else:
self._AddBackTokens([class_token, next_token])
name_tokens, token = self.GetName()
class_name = "".join([t.name for t in name_tokens])
bases = None
if token.token_type == tokenize.SYNTAX:
if token.name == ";":
# Forward declaration.
return class_type(
class_token.start,
class_token.end,
class_name,
None,
templated_types,
None,
self.namespace_stack,
)
if token.name in "*&":
# Inline forward declaration. Could be method or data.
name_token = self._GetNextToken()
next_token = self._GetNextToken()
if next_token.name == ";":
# Handle data
modifiers = ["class"]
return self._CreateVariable(
class_token,
name_token.name,
class_name,
modifiers,
token.name,
None,
)
else:
# Assume this is a method.
tokens = (class_token, token, name_token, next_token)
self._AddBackTokens(tokens)
return self.GetMethod(FUNCTION_NONE, None)
if token.name == ":":
bases, token = self._GetBases()
body = None
if token.token_type == tokenize.SYNTAX and token.name == "{":
assert token.token_type == tokenize.SYNTAX, token
assert token.name == "{", token
ast = AstBuilder(
self.GetScope(),
self.filename,
class_name,
visibility,
self.namespace_stack,
)
body = list(ast.Generate())
if not self._handling_typedef:
token = self._GetNextToken()
if token.token_type != tokenize.NAME:
assert token.token_type == tokenize.SYNTAX, token
assert token.name == ";", token
else:
new_class = class_type(
class_token.start,
class_token.end,
class_name,
bases,
None,
body,
self.namespace_stack,
)
modifiers = []
return self._CreateVariable(
class_token, token.name, new_class, modifiers, token.name, None
)
else:
if not self._handling_typedef:
self.HandleError("non-typedef token", token)
self._AddBackToken(token)
return class_type(
class_token.start,
class_token.end,
class_name,
bases,
templated_types,
body,
self.namespace_stack,
)
def handle_namespace(self):
token = self._GetNextToken()
# Support anonymous namespaces.
name = None
if token.token_type == tokenize.NAME:
name = token.name
token = self._GetNextToken()
self.namespace_stack.append(name)
assert token.token_type == tokenize.SYNTAX, token
# Create an internal token that denotes when the namespace is complete.
internal_token = tokenize.Token(_INTERNAL_TOKEN, _NAMESPACE_POP, None, None)
internal_token.whence = token.whence
if token.name == "=":
# TODO(nnorwitz): handle aliasing namespaces.
name, next_token = self.GetName()
assert next_token.name == ";", next_token
self._AddBackToken(internal_token)
else:
assert token.name == "{", token
tokens = list(self.GetScope())
# Replace the trailing } with the internal namespace pop token.
tokens[-1] = internal_token
# Handle namespace with nothing in it.
self._AddBackTokens(tokens)
return None
def handle_using(self):
tokens = self._GetTokensUpTo(tokenize.SYNTAX, ";")
assert tokens
return Using(tokens[0].start, tokens[0].end, tokens)
def handle_explicit(self):
assert self.in_class
# Nothing much to do.
# TODO(nnorwitz): maybe verify the method name == class name.
# This must be a ctor.
return self.GetMethod(FUNCTION_CTOR, None)
def handle_this(self):
pass # Nothing to do.
def handle_operator(self):
# Pull off the next token(s?) and make that part of the method name.
pass
def handle_sizeof(self):
pass
def handle_case(self):
pass
def handle_switch(self):
pass
def handle_default(self):
token = self._GetNextToken()
assert token.token_type == tokenize.SYNTAX
assert token.name == ":"
def handle_if(self):
pass
def handle_else(self):
pass
def handle_return(self):
tokens = self._GetTokensUpTo(tokenize.SYNTAX, ";")
if not tokens:
return Return(self.current_token.start, self.current_token.end, None)
return Return(tokens[0].start, tokens[0].end, tokens)
def handle_goto(self):
tokens = self._GetTokensUpTo(tokenize.SYNTAX, ";")
assert len(tokens) == 1, str(tokens)
return Goto(tokens[0].start, tokens[0].end, tokens[0].name)
def handle_try(self):
pass # Not needed yet.
def handle_catch(self):
pass # Not needed yet.
def handle_throw(self):
pass # Not needed yet.
def handle_while(self):
pass
def handle_do(self):
pass
def handle_for(self):
pass
def handle_break(self):
self._IgnoreUpTo(tokenize.SYNTAX, ";")
def handle_continue(self):
self._IgnoreUpTo(tokenize.SYNTAX, ";")
def BuilderFromSource(source, filename):
"""Utility method that returns an AstBuilder from source code.
Args:
source: 'C++ source code'
filename: 'file1'
Returns:
AstBuilder
"""
return AstBuilder(tokenize.GetTokens(source), filename)
def PrintIndentifiers(filename, should_print):
"""Prints all identifiers for a C++ source file.
Args:
filename: 'file1'
should_print: predicate with signature: bool Function(token)
"""
source = utils.ReadFile(filename, False)
if source is None:
sys.stderr.write("Unable to find: %s\n" % filename)
return
# print('Processing %s' % actual_filename)
builder = BuilderFromSource(source, filename)
try:
for node in builder.Generate():
if should_print(node):
print(node.name)
except KeyboardInterrupt:
return
except:
pass
def PrintAllIndentifiers(filenames, should_print):
"""Prints all identifiers for each C++ source file in filenames.
Args:
filenames: ['file1', 'file2', ...]
should_print: predicate with signature: bool Function(token)
"""
for path in filenames:
PrintIndentifiers(path, should_print)
def main(argv):
for filename in argv[1:]:
source = utils.ReadFile(filename)
if source is None:
continue
print("Processing %s" % filename)
builder = BuilderFromSource(source, filename)
try:
entire_ast = filter(None, builder.Generate())
except KeyboardInterrupt:
return
except:
# Already printed a warning, print the traceback and continue.
traceback.print_exc()
else:
if utils.DEBUG:
for ast in entire_ast:
print(ast)
if __name__ == "__main__":
main(sys.argv)