"Fossies" - the Fresh Open Source Software Archive  

Source code changes of the file "googlemock/scripts/generator/cpp/ast.py" between
googletest-release-1.10.0.tar.gz and googletest-release-1.11.0.tar.gz

About: GoogleTest is Google's (unit) testing and mocking framework for C++ tests.

ast.py  (googletest-release-1.10.0):ast.py  (googletest-release-1.11.0)
skipping to change at line 20 skipping to change at line 20
# http://www.apache.org/licenses/LICENSE-2.0 # http://www.apache.org/licenses/LICENSE-2.0
# #
# Unless required by applicable law or agreed to in writing, software # Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, # distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
"""Generate an Abstract Syntax Tree (AST) for C++.""" """Generate an Abstract Syntax Tree (AST) for C++."""
__author__ = 'nnorwitz@google.com (Neal Norwitz)' # FIXME:
# TODO:
# * Tokens should never be exported, need to convert to Nodes # * Tokens should never be exported, need to convert to Nodes
# (return types, parameters, etc.) # (return types, parameters, etc.)
# * Handle static class data for templatized classes # * Handle static class data for templatized classes
# * Handle casts (both C++ and C-style) # * Handle casts (both C++ and C-style)
# * Handle conditions and loops (if/else, switch, for, while/do) # * Handle conditions and loops (if/else, switch, for, while/do)
# #
# TODO much, much later: # TODO much, much later:
# * Handle #define # * Handle #define
# * exceptions # * exceptions
try: try:
# Python 3.x # Python 3.x
import builtins import builtins
except ImportError: except ImportError:
# Python 2.x # Python 2.x
import __builtin__ as builtins import __builtin__ as builtins
import collections
import sys import sys
import traceback import traceback
from cpp import keywords from cpp import keywords
from cpp import tokenize from cpp import tokenize
from cpp import utils from cpp import utils
if not hasattr(builtins, 'reversed'): if not hasattr(builtins, 'reversed'):
# Support Python 2.3 and earlier. # Support Python 2.3 and earlier.
def reversed(seq): def reversed(seq):
for i in range(len(seq)-1, -1, -1): for i in range(len(seq)-1, -1, -1):
yield seq[i] yield seq[i]
if not hasattr(builtins, 'next'): if not hasattr(builtins, 'next'):
# Support Python 2.5 and earlier. # Support Python 2.5 and earlier.
def next(obj): def next(obj):
return obj.next() return obj.next()
VISIBILITY_PUBLIC, VISIBILITY_PROTECTED, VISIBILITY_PRIVATE = range(3) VISIBILITY_PUBLIC, VISIBILITY_PROTECTED, VISIBILITY_PRIVATE = range(3)
FUNCTION_NONE = 0x00 FUNCTION_NONE = 0x00
FUNCTION_CONST = 0x01 FUNCTION_CONST = 0x01
FUNCTION_VIRTUAL = 0x02 FUNCTION_VIRTUAL = 0x02
FUNCTION_PURE_VIRTUAL = 0x04 FUNCTION_PURE_VIRTUAL = 0x04
FUNCTION_CTOR = 0x08 FUNCTION_CTOR = 0x08
FUNCTION_DTOR = 0x10 FUNCTION_DTOR = 0x10
FUNCTION_ATTRIBUTE = 0x20 FUNCTION_ATTRIBUTE = 0x20
skipping to change at line 99 skipping to change at line 98
'mutable': TYPE_MODIFIER_MUTABLE, 'mutable': TYPE_MODIFIER_MUTABLE,
} }
""" """
_INTERNAL_TOKEN = 'internal' _INTERNAL_TOKEN = 'internal'
_NAMESPACE_POP = 'ns-pop' _NAMESPACE_POP = 'ns-pop'
# TODO(nnorwitz): use this as a singleton for templated_types, etc # TODO(nnorwitz): use this as a singleton for templated_types, etc
# where we don't want to create a new empty dict each time. It is also const. # where we don't want to create a new empty dict each time. It is also const.
class _NullDict(object): class _NullDict(object):
__contains__ = lambda self: False __contains__ = lambda self: False
keys = values = items = iterkeys = itervalues = iteritems = lambda self: () keys = values = items = iterkeys = itervalues = iteritems = lambda self: ()
# TODO(nnorwitz): move AST nodes into a separate module. # TODO(nnorwitz): move AST nodes into a separate module.
class Node(object): class Node(object):
"""Base AST node.""" """Base AST node."""
def __init__(self, start, end): def __init__(self, start, end):
self.start = start self.start = start
self.end = end self.end = end
def IsDeclaration(self): def IsDeclaration(self):
"""Returns bool if this node is a declaration.""" """Returns bool if this node is a declaration."""
return False return False
def IsDefinition(self): def IsDefinition(self):
"""Returns bool if this node is a definition.""" """Returns bool if this node is a definition."""
return False return False
def IsExportable(self): def IsExportable(self):
"""Returns bool if this node exportable from a header file.""" """Returns bool if this node exportable from a header file."""
return False return False
def Requires(self, node): def Requires(self, node):
"""Does this AST node require the definition of the node passed in?""" """Does this AST node require the definition of the node passed in?"""
return False return False
def XXX__str__(self): def XXX__str__(self):
return self._StringHelper(self.__class__.__name__, '') return self._StringHelper(self.__class__.__name__, '')
def _StringHelper(self, name, suffix): def _StringHelper(self, name, suffix):
if not utils.DEBUG: if not utils.DEBUG:
return '%s(%s)' % (name, suffix) return '%s(%s)' % (name, suffix)
return '%s(%d, %d, %s)' % (name, self.start, self.end, suffix) return '%s(%d, %d, %s)' % (name, self.start, self.end, suffix)
def __repr__(self): def __repr__(self):
return str(self) return str(self)
class Define(Node): class Define(Node):
def __init__(self, start, end, name, definition): def __init__(self, start, end, name, definition):
Node.__init__(self, start, end) Node.__init__(self, start, end)
self.name = name self.name = name
self.definition = definition self.definition = definition
def __str__(self): def __str__(self):
value = '%s %s' % (self.name, self.definition) value = '%s %s' % (self.name, self.definition)
return self._StringHelper(self.__class__.__name__, value) return self._StringHelper(self.__class__.__name__, value)
class Include(Node): class Include(Node):
def __init__(self, start, end, filename, system): def __init__(self, start, end, filename, system):
Node.__init__(self, start, end) Node.__init__(self, start, end)
self.filename = filename self.filename = filename
self.system = system self.system = system
def __str__(self): def __str__(self):
fmt = '"%s"' fmt = '"%s"'
if self.system: if self.system:
fmt = '<%s>' fmt = '<%s>'
return self._StringHelper(self.__class__.__name__, fmt % self.filename) return self._StringHelper(self.__class__.__name__, fmt % self.filename)
class Goto(Node): class Goto(Node):
def __init__(self, start, end, label): def __init__(self, start, end, label):
Node.__init__(self, start, end) Node.__init__(self, start, end)
self.label = label self.label = label
def __str__(self): def __str__(self):
return self._StringHelper(self.__class__.__name__, str(self.label)) return self._StringHelper(self.__class__.__name__, str(self.label))
class Expr(Node): class Expr(Node):
def __init__(self, start, end, expr): def __init__(self, start, end, expr):
Node.__init__(self, start, end) Node.__init__(self, start, end)
self.expr = expr self.expr = expr
def Requires(self, node): def Requires(self, node):
# TODO(nnorwitz): impl. # TODO(nnorwitz): impl.
return False return False
def __str__(self): def __str__(self):
return self._StringHelper(self.__class__.__name__, str(self.expr)) return self._StringHelper(self.__class__.__name__, str(self.expr))
class Return(Expr): class Return(Expr):
pass pass
class Delete(Expr): class Delete(Expr):
pass pass
class Friend(Expr): class Friend(Expr):
def __init__(self, start, end, expr, namespace): def __init__(self, start, end, expr, namespace):
Expr.__init__(self, start, end, expr) Expr.__init__(self, start, end, expr)
self.namespace = namespace[:] self.namespace = namespace[:]
class Using(Node): class Using(Node):
def __init__(self, start, end, names): def __init__(self, start, end, names):
Node.__init__(self, start, end) Node.__init__(self, start, end)
self.names = names self.names = names
def __str__(self): def __str__(self):
return self._StringHelper(self.__class__.__name__, str(self.names)) return self._StringHelper(self.__class__.__name__, str(self.names))
class Parameter(Node): class Parameter(Node):
def __init__(self, start, end, name, parameter_type, default): def __init__(self, start, end, name, parameter_type, default):
Node.__init__(self, start, end) Node.__init__(self, start, end)
self.name = name self.name = name
self.type = parameter_type self.type = parameter_type
self.default = default self.default = default
def Requires(self, node): def Requires(self, node):
# TODO(nnorwitz): handle namespaces, etc. # TODO(nnorwitz): handle namespaces, etc.
return self.type.name == node.name return self.type.name == node.name
def __str__(self): def __str__(self):
name = str(self.type) name = str(self.type)
suffix = '%s %s' % (name, self.name) suffix = '%s %s' % (name, self.name)
if self.default: if self.default:
suffix += ' = ' + ''.join([d.name for d in self.default]) suffix += ' = ' + ''.join([d.name for d in self.default])
return self._StringHelper(self.__class__.__name__, suffix) return self._StringHelper(self.__class__.__name__, suffix)
class _GenericDeclaration(Node): class _GenericDeclaration(Node):
def __init__(self, start, end, name, namespace): def __init__(self, start, end, name, namespace):
Node.__init__(self, start, end) Node.__init__(self, start, end)
self.name = name self.name = name
self.namespace = namespace[:] self.namespace = namespace[:]
def FullName(self): def FullName(self):
prefix = '' prefix = ''
if self.namespace and self.namespace[-1]: if self.namespace and self.namespace[-1]:
prefix = '::'.join(self.namespace) + '::' prefix = '::'.join(self.namespace) + '::'
return prefix + self.name return prefix + self.name
def _TypeStringHelper(self, suffix): def _TypeStringHelper(self, suffix):
if self.namespace: if self.namespace:
names = [n or '<anonymous>' for n in self.namespace] names = [n or '<anonymous>' for n in self.namespace]
suffix += ' in ' + '::'.join(names) suffix += ' in ' + '::'.join(names)
return self._StringHelper(self.__class__.__name__, suffix) return self._StringHelper(self.__class__.__name__, suffix)
# TODO(nnorwitz): merge with Parameter in some way? # TODO(nnorwitz): merge with Parameter in some way?
class VariableDeclaration(_GenericDeclaration): class VariableDeclaration(_GenericDeclaration):
def __init__(self, start, end, name, var_type, initial_value, namespace): def __init__(self, start, end, name, var_type, initial_value, namespace):
_GenericDeclaration.__init__(self, start, end, name, namespace) _GenericDeclaration.__init__(self, start, end, name, namespace)
self.type = var_type self.type = var_type
self.initial_value = initial_value self.initial_value = initial_value
def Requires(self, node): def Requires(self, node):
# TODO(nnorwitz): handle namespaces, etc. # TODO(nnorwitz): handle namespaces, etc.
return self.type.name == node.name return self.type.name == node.name
def ToString(self): def ToString(self):
"""Return a string that tries to reconstitute the variable decl.""" """Return a string that tries to reconstitute the variable decl."""
suffix = '%s %s' % (self.type, self.name) suffix = '%s %s' % (self.type, self.name)
if self.initial_value: if self.initial_value:
suffix += ' = ' + self.initial_value suffix += ' = ' + self.initial_value
return suffix return suffix
def __str__(self): def __str__(self):
return self._StringHelper(self.__class__.__name__, self.ToString()) return self._StringHelper(self.__class__.__name__, self.ToString())
class Typedef(_GenericDeclaration): class Typedef(_GenericDeclaration):
def __init__(self, start, end, name, alias, namespace): def __init__(self, start, end, name, alias, namespace):
_GenericDeclaration.__init__(self, start, end, name, namespace) _GenericDeclaration.__init__(self, start, end, name, namespace)
self.alias = alias self.alias = alias
def IsDefinition(self): def IsDefinition(self):
return True
def IsExportable(self):
return True
def Requires(self, node):
# TODO(nnorwitz): handle namespaces, etc.
name = node.name
for token in self.alias:
if token is not None and name == token.name:
return True return True
return False
def IsExportable(self): def __str__(self):
return True suffix = '%s, %s' % (self.name, self.alias)
return self._TypeStringHelper(suffix)
def Requires(self, node):
# TODO(nnorwitz): handle namespaces, etc.
name = node.name
for token in self.alias:
if token is not None and name == token.name:
return True
return False
def __str__(self):
suffix = '%s, %s' % (self.name, self.alias)
return self._TypeStringHelper(suffix)
class _NestedType(_GenericDeclaration): class _NestedType(_GenericDeclaration):
def __init__(self, start, end, name, fields, namespace): def __init__(self, start, end, name, fields, namespace):
_GenericDeclaration.__init__(self, start, end, name, namespace) _GenericDeclaration.__init__(self, start, end, name, namespace)
self.fields = fields self.fields = fields
def IsDefinition(self): def IsDefinition(self):
return True return True
def IsExportable(self): def IsExportable(self):
return True return True
def __str__(self): def __str__(self):
suffix = '%s, {%s}' % (self.name, self.fields) suffix = '%s, {%s}' % (self.name, self.fields)
return self._TypeStringHelper(suffix) return self._TypeStringHelper(suffix)
class Union(_NestedType): class Union(_NestedType):
pass pass
class Enum(_NestedType): class Enum(_NestedType):
pass pass
class Class(_GenericDeclaration): class Class(_GenericDeclaration):
def __init__(self, start, end, name, bases, templated_types, body, namespace def __init__(self, start, end, name, bases, templated_types, body, namespace):
): _GenericDeclaration.__init__(self, start, end, name, namespace)
_GenericDeclaration.__init__(self, start, end, name, namespace) self.bases = bases
self.bases = bases self.body = body
self.body = body self.templated_types = templated_types
self.templated_types = templated_types
def IsDeclaration(self):
def IsDeclaration(self): return self.bases is None and self.body is None
return self.bases is None and self.body is None
def IsDefinition(self):
def IsDefinition(self): return not self.IsDeclaration()
return not self.IsDeclaration()
def IsExportable(self):
def IsExportable(self): return not self.IsDeclaration()
return not self.IsDeclaration()
def Requires(self, node):
def Requires(self, node): # TODO(nnorwitz): handle namespaces, etc.
# TODO(nnorwitz): handle namespaces, etc. if self.bases:
if self.bases: for token_list in self.bases:
for token_list in self.bases: # TODO(nnorwitz): bases are tokens, do name comparison.
# TODO(nnorwitz): bases are tokens, do name comparison. for token in token_list:
for token in token_list: if token.name == node.name:
if token.name == node.name: return True
return True # TODO(nnorwitz): search in body too.
# TODO(nnorwitz): search in body too. return False
return False
def __str__(self):
def __str__(self): name = self.name
name = self.name if self.templated_types:
if self.templated_types: name += '<%s>' % self.templated_types
name += '<%s>' % self.templated_types suffix = '%s, %s, %s' % (name, self.bases, self.body)
suffix = '%s, %s, %s' % (name, self.bases, self.body) return self._TypeStringHelper(suffix)
return self._TypeStringHelper(suffix)
class Struct(Class): class Struct(Class):
pass pass
class Function(_GenericDeclaration): class Function(_GenericDeclaration):
def __init__(self, start, end, name, return_type, parameters, def __init__(self, start, end, name, return_type, parameters,
modifiers, templated_types, body, namespace): modifiers, templated_types, body, namespace):
_GenericDeclaration.__init__(self, start, end, name, namespace) _GenericDeclaration.__init__(self, start, end, name, namespace)
converter = TypeConverter(namespace) converter = TypeConverter(namespace)
self.return_type = converter.CreateReturnType(return_type) self.return_type = converter.CreateReturnType(return_type)
self.parameters = converter.ToParameters(parameters) self.parameters = converter.ToParameters(parameters)
self.modifiers = modifiers self.modifiers = modifiers
self.body = body self.body = body
self.templated_types = templated_types self.templated_types = templated_types
def IsDeclaration(self): def IsDeclaration(self):
return self.body is None return self.body is None
def IsDefinition(self): def IsDefinition(self):
return self.body is not None return self.body is not None
def IsExportable(self): def IsExportable(self):
if self.return_type and 'static' in self.return_type.modifiers: if self.return_type and 'static' in self.return_type.modifiers:
return False return False
return None not in self.namespace return None not in self.namespace
def Requires(self, node): def Requires(self, node):
if self.parameters: if self.parameters:
# TODO(nnorwitz): parameters are tokens, do name comparison. # TODO(nnorwitz): parameters are tokens, do name comparison.
for p in self.parameters: for p in self.parameters:
if p.name == node.name: if p.name == node.name:
return True return True
# TODO(nnorwitz): search in body too. # TODO(nnorwitz): search in body too.
return False return False
def __str__(self): def __str__(self):
# TODO(nnorwitz): add templated_types. # TODO(nnorwitz): add templated_types.
suffix = ('%s %s(%s), 0x%02x, %s' % suffix = ('%s %s(%s), 0x%02x, %s' %
(self.return_type, self.name, self.parameters, (self.return_type, self.name, self.parameters,
self.modifiers, self.body)) self.modifiers, self.body))
return self._TypeStringHelper(suffix) return self._TypeStringHelper(suffix)
class Method(Function): class Method(Function):
def __init__(self, start, end, name, in_class, return_type, parameters, def __init__(self, start, end, name, in_class, return_type, parameters,
modifiers, templated_types, body, namespace): modifiers, templated_types, body, namespace):
Function.__init__(self, start, end, name, return_type, parameters, Function.__init__(self, start, end, name, return_type, parameters,
modifiers, templated_types, body, namespace) modifiers, templated_types, body, namespace)
# TODO(nnorwitz): in_class could also be a namespace which can # TODO(nnorwitz): in_class could also be a namespace which can
# mess up finding functions properly. # mess up finding functions properly.
self.in_class = in_class self.in_class = in_class
class Type(_GenericDeclaration): class Type(_GenericDeclaration):
"""Type used for any variable (eg class, primitive, struct, etc).""" """Type used for any variable (eg class, primitive, struct, etc)."""
def __init__(self, start, end, name, templated_types, modifiers, def __init__(self, start, end, name, templated_types, modifiers,
reference, pointer, array): reference, pointer, array):
""" """
Args: Args:
name: str name of main type name: str name of main type
templated_types: [Class (Type?)] template type info between <> templated_types: [Class (Type?)] template type info between <>
modifiers: [str] type modifiers (keywords) eg, const, mutable, etc. modifiers: [str] type modifiers (keywords) eg, const, mutable, etc.
reference, pointer, array: bools reference, pointer, array: bools
""" """
_GenericDeclaration.__init__(self, start, end, name, []) _GenericDeclaration.__init__(self, start, end, name, [])
self.templated_types = templated_types self.templated_types = templated_types
if not name and modifiers: if not name and modifiers:
self.name = modifiers.pop() self.name = modifiers.pop()
self.modifiers = modifiers self.modifiers = modifiers
self.reference = reference self.reference = reference
self.pointer = pointer self.pointer = pointer
self.array = array self.array = array
def __str__(self): def __str__(self):
prefix = '' prefix = ''
if self.modifiers: if self.modifiers:
prefix = ' '.join(self.modifiers) + ' ' prefix = ' '.join(self.modifiers) + ' '
name = str(self.name) name = str(self.name)
if self.templated_types: if self.templated_types:
name += '<%s>' % self.templated_types name += '<%s>' % self.templated_types
suffix = prefix + name suffix = prefix + name
if self.reference: if self.reference:
suffix += '&' suffix += '&'
if self.pointer: if self.pointer:
suffix += '*' suffix += '*'
if self.array: if self.array:
suffix += '[]' suffix += '[]'
return self._TypeStringHelper(suffix) return self._TypeStringHelper(suffix)
# By definition, Is* are always False. A Type can only exist in # By definition, Is* are always False. A Type can only exist in
# some sort of variable declaration, parameter, or return value. # some sort of variable declaration, parameter, or return value.
def IsDeclaration(self): def IsDeclaration(self):
return False return False
def IsDefinition(self): def IsDefinition(self):
return False return False
def IsExportable(self): def IsExportable(self):
return False return False
class TypeConverter(object): class TypeConverter(object):
def __init__(self, namespace_stack): def __init__(self, namespace_stack):
self.namespace_stack = namespace_stack self.namespace_stack = namespace_stack
def _GetTemplateEnd(self, tokens, start): def _GetTemplateEnd(self, tokens, start):
count = 1 count = 1
end = start end = start
while 1: while 1:
token = tokens[end] token = tokens[end]
end += 1 end += 1
if token.name == '<': if token.name == '<':
count += 1 count += 1
elif token.name == '>': elif token.name == '>':
count -= 1 count -= 1
if count == 0: if count == 0:
break break
return tokens[start:end-1], end return tokens[start:end-1], end
def ToType(self, tokens): def ToType(self, tokens):
"""Convert [Token,...] to [Class(...), ] useful for base classes. """Convert [Token,...] to [Class(...), ] useful for base classes.
For example, code like class Foo : public Bar<x, y> { ... }; For example, code like class Foo : public Bar<x, y> { ... };
the "Bar<x, y>" portion gets converted to an AST. the "Bar<x, y>" portion gets converted to an AST.
Returns: Returns:
[Class(...), ...] [Class(...), ...]
""" """
result = [] result = []
name_tokens = [] name_tokens = []
reference = pointer = array = False
def AddType(templated_types):
# Partition tokens into name and modifier tokens.
names = []
modifiers = []
for t in name_tokens:
if keywords.IsKeyword(t.name):
modifiers.append(t.name)
else:
names.append(t.name)
name = ''.join(names)
if name_tokens:
result.append(Type(name_tokens[0].start, name_tokens[-1].end,
name, templated_types, modifiers,
reference, pointer, array))
del name_tokens[:]
i = 0
end = len(tokens)
while i < end:
token = tokens[i]
if token.name == '<':
new_tokens, new_end = self._GetTemplateEnd(tokens, i+1)
AddType(self.ToType(new_tokens))
# If there is a comma after the template, we need to consume
# that here otherwise it becomes part of the name.
i = new_end
reference = pointer = array = False reference = pointer = array = False
elif token.name == ',':
AddType([])
reference = pointer = array = False
elif token.name == '*':
pointer = True
elif token.name == '&':
reference = True
elif token.name == '[':
pointer = True
elif token.name == ']':
pass
else:
name_tokens.append(token)
i += 1
if name_tokens:
# No '<' in the tokens, just a simple name and no template.
AddType([])
return result
def DeclarationToParts(self, parts, needs_name_removed):
name = None
default = []
if needs_name_removed:
# Handle default (initial) values properly.
for i, t in enumerate(parts):
if t.name == '=':
default = parts[i+1:]
name = parts[i-1].name
if name == ']' and parts[i-2].name == '[':
name = parts[i-3].name
i -= 1
parts = parts[:i-1]
break
else:
if parts[-1].token_type == tokenize.NAME:
name = parts.pop().name
else:
# TODO(nnorwitz): this is a hack that happens for code like
# Register(Foo<T>); where it thinks this is a function call
# but it's actually a declaration.
name = '???'
modifiers = []
type_name = []
other_tokens = []
templated_types = []
i = 0
end = len(parts)
while i < end:
p = parts[i]
if keywords.IsKeyword(p.name):
modifiers.append(p.name)
elif p.name == '<':
templated_tokens, new_end = self._GetTemplateEnd(parts, i+1)
templated_types = self.ToType(templated_tokens)
i = new_end - 1
# Don't add a spurious :: to data members being initialized.
next_index = i + 1
if next_index < end and parts[next_index].name == '::':
i += 1
elif p.name in ('[', ']', '='):
# These are handled elsewhere.
other_tokens.append(p)
elif p.name not in ('*', '&', '>'):
# Ensure that names have a space between them.
if (type_name and type_name[-1].token_type == tokenize.NAME and
p.token_type == tokenize.NAME):
type_name.append(tokenize.Token(tokenize.SYNTAX, ' ', 0, 0))
type_name.append(p)
else:
other_tokens.append(p)
i += 1
type_name = ''.join([t.name for t in type_name])
return name, type_name, templated_types, modifiers, default, other_tokens
def ToParameters(self, tokens):
if not tokens:
return []
result = []
name = type_name = ''
type_modifiers = []
pointer = reference = array = False
first_token = None
default = []
def AddParameter(end):
if default:
del default[0] # Remove flag.
parts = self.DeclarationToParts(type_modifiers, True)
(name, type_name, templated_types, modifiers,
unused_default, unused_other_tokens) = parts
parameter_type = Type(first_token.start, first_token.end,
type_name, templated_types, modifiers,
reference, pointer, array)
p = Parameter(first_token.start, end, name,
parameter_type, default)
result.append(p)
template_count = 0
brace_count = 0
for s in tokens:
if not first_token:
first_token = s
# Check for braces before templates, as we can have unmatched '<>'
# inside default arguments.
if s.name == '{':
brace_count += 1
elif s.name == '}':
brace_count -= 1
if brace_count > 0:
type_modifiers.append(s)
continue
if s.name == '<':
template_count += 1
elif s.name == '>':
template_count -= 1
if template_count > 0:
type_modifiers.append(s)
continue
def AddType(templated_types): if s.name == ',':
# Partition tokens into name and modifier tokens. AddParameter(s.start)
names = []
modifiers = []
for t in name_tokens:
if keywords.IsKeyword(t.name):
modifiers.append(t.name)
else:
names.append(t.name)
name = ''.join(names)
if name_tokens:
result.append(Type(name_tokens[0].start, name_tokens[-1].end,
name, templated_types, modifiers,
reference, pointer, array))
del name_tokens[:]
i = 0
end = len(tokens)
while i < end:
token = tokens[i]
if token.name == '<':
new_tokens, new_end = self._GetTemplateEnd(tokens, i+1)
AddType(self.ToType(new_tokens))
# If there is a comma after the template, we need to consume
# that here otherwise it becomes part of the name.
i = new_end
reference = pointer = array = False
elif token.name == ',':
AddType([])
reference = pointer = array = False
elif token.name == '*':
pointer = True
elif token.name == '&':
reference = True
elif token.name == '[':
pointer = True
elif token.name == ']':
pass
else:
name_tokens.append(token)
i += 1
if name_tokens:
# No '<' in the tokens, just a simple name and no template.
AddType([])
return result
def DeclarationToParts(self, parts, needs_name_removed):
name = None
default = []
if needs_name_removed:
# Handle default (initial) values properly.
for i, t in enumerate(parts):
if t.name == '=':
default = parts[i+1:]
name = parts[i-1].name
if name == ']' and parts[i-2].name == '[':
name = parts[i-3].name
i -= 1
parts = parts[:i-1]
break
else:
if parts[-1].token_type == tokenize.NAME:
name = parts.pop().name
else:
# TODO(nnorwitz): this is a hack that happens for code like
# Register(Foo<T>); where it thinks this is a function call
# but it's actually a declaration.
name = '???'
modifiers = []
type_name = []
other_tokens = []
templated_types = []
i = 0
end = len(parts)
while i < end:
p = parts[i]
if keywords.IsKeyword(p.name):
modifiers.append(p.name)
elif p.name == '<':
templated_tokens, new_end = self._GetTemplateEnd(parts, i+1)
templated_types = self.ToType(templated_tokens)
i = new_end - 1
# Don't add a spurious :: to data members being initialized.
next_index = i + 1
if next_index < end and parts[next_index].name == '::':
i += 1
elif p.name in ('[', ']', '='):
# These are handled elsewhere.
other_tokens.append(p)
elif p.name not in ('*', '&', '>'):
# Ensure that names have a space between them.
if (type_name and type_name[-1].token_type == tokenize.NAME and
p.token_type == tokenize.NAME):
type_name.append(tokenize.Token(tokenize.SYNTAX, ' ', 0, 0))
type_name.append(p)
else:
other_tokens.append(p)
i += 1
type_name = ''.join([t.name for t in type_name])
return name, type_name, templated_types, modifiers, default, other_token
s
def ToParameters(self, tokens):
if not tokens:
return []
result = []
name = type_name = '' name = type_name = ''
type_modifiers = [] type_modifiers = []
pointer = reference = array = False pointer = reference = array = False
first_token = None first_token = None
default = [] default = []
elif s.name == '*':
pointer = True
elif s.name == '&':
reference = True
elif s.name == '[':
array = True
elif s.name == ']':
pass # Just don't add to type_modifiers.
elif s.name == '=':
# Got a default value. Add any value (None) as a flag.
default.append(None)
elif default:
default.append(s)
else:
type_modifiers.append(s)
AddParameter(tokens[-1].end)
return result
def CreateReturnType(self, return_type_seq):
if not return_type_seq:
return None
start = return_type_seq[0].start
end = return_type_seq[-1].end
_, name, templated_types, modifiers, default, other_tokens = \
self.DeclarationToParts(return_type_seq, False)
names = [n.name for n in other_tokens]
reference = '&' in names
pointer = '*' in names
array = '[' in names
return Type(start, end, name, templated_types, modifiers,
reference, pointer, array)
def GetTemplateIndices(self, names):
# names is a list of strings.
start = names.index('<')
end = len(names) - 1
while end > 0:
if names[end] == '>':
break
end -= 1
return start, end+1
def AddParameter(end): class AstBuilder(object):
if default: def __init__(self, token_stream, filename, in_class='', visibility=None,
del default[0] # Remove flag. namespace_stack=[]):
parts = self.DeclarationToParts(type_modifiers, True) self.tokens = token_stream
(name, type_name, templated_types, modifiers, self.filename = filename
unused_default, unused_other_tokens) = parts # TODO(nnorwitz): use a better data structure (deque) for the queue.
parameter_type = Type(first_token.start, first_token.end, # Switching directions of the "queue" improved perf by about 25%.
type_name, templated_types, modifiers, # Using a deque should be even better since we access from both sides.
reference, pointer, array) self.token_queue = []
p = Parameter(first_token.start, end, name, self.namespace_stack = namespace_stack[:]
parameter_type, default) self.in_class = in_class
result.append(p) if in_class is None:
self.in_class_name_only = None
template_count = 0 else:
for s in tokens: self.in_class_name_only = in_class.split('::')[-1]
if not first_token: self.visibility = visibility
first_token = s self.in_function = False
if s.name == '<': self.current_token = None
template_count += 1 # Keep the state whether we are currently handling a typedef or not.
elif s.name == '>': self._handling_typedef = False
template_count -= 1
if template_count > 0: self.converter = TypeConverter(self.namespace_stack)
type_modifiers.append(s)
continue def HandleError(self, msg, token):
printable_queue = list(reversed(self.token_queue[-20:]))
if s.name == ',': sys.stderr.write('Got %s in %s @ %s %s\n' %
AddParameter(s.start) (msg, self.filename, token, printable_queue))
name = type_name = ''
type_modifiers = [] def Generate(self):
pointer = reference = array = False while 1:
first_token = None token = self._GetNextToken()
default = [] if not token:
elif s.name == '*': break
pointer = True
elif s.name == '&': # Get the next token.
reference = True self.current_token = token
elif s.name == '[':
array = True # Dispatch on the next token type.
elif s.name == ']': if token.token_type == _INTERNAL_TOKEN:
pass # Just don't add to type_modifiers. if token.name == _NAMESPACE_POP:
elif s.name == '=': self.namespace_stack.pop()
# Got a default value. Add any value (None) as a flag. continue
default.append(None)
elif default: try:
default.append(s) result = self._GenerateOne(token)
else: if result is not None:
type_modifiers.append(s) yield result
AddParameter(tokens[-1].end) except:
return result self.HandleError('exception', token)
raise
def CreateReturnType(self, return_type_seq):
if not return_type_seq: def _CreateVariable(self, pos_token, name, type_name, type_modifiers,
return None ref_pointer_name_seq, templated_types, value=None):
start = return_type_seq[0].start reference = '&' in ref_pointer_name_seq
end = return_type_seq[-1].end pointer = '*' in ref_pointer_name_seq
_, name, templated_types, modifiers, default, other_tokens = \ array = '[' in ref_pointer_name_seq
self.DeclarationToParts(return_type_seq, False) var_type = Type(pos_token.start, pos_token.end, type_name,
names = [n.name for n in other_tokens] templated_types, type_modifiers,
reference = '&' in names
pointer = '*' in names
array = '[' in names
return Type(start, end, name, templated_types, modifiers,
reference, pointer, array) reference, pointer, array)
return VariableDeclaration(pos_token.start, pos_token.end,
name, var_type, value, self.namespace_stack)
def GetTemplateIndices(self, names): def _GenerateOne(self, token):
# names is a list of strings. if token.token_type == tokenize.NAME:
start = names.index('<') if (keywords.IsKeyword(token.name) and
end = len(names) - 1 not keywords.IsBuiltinType(token.name)):
while end > 0: if token.name == 'enum':
if names[end] == '>': # Pop the next token and only put it back if it's not
break # 'class'. This allows us to support the two-token
end -= 1 # 'enum class' keyword as if it were simply 'enum'.
return start, end+1 next = self._GetNextToken()
if next.name != 'class':
class AstBuilder(object): self._AddBackToken(next)
def __init__(self, token_stream, filename, in_class='', visibility=None,
namespace_stack=[]): method = getattr(self, 'handle_' + token.name)
self.tokens = token_stream return method()
self.filename = filename elif token.name == self.in_class_name_only:
# TODO(nnorwitz): use a better data structure (deque) for the queue. # The token name is the same as the class, must be a ctor if
# Switching directions of the "queue" improved perf by about 25%. # there is a paren. Otherwise, it's the return type.
# Using a deque should be even better since we access from both sides. # Peek ahead to get the next token to figure out which.
self.token_queue = [] next = self._GetNextToken()
self.namespace_stack = namespace_stack[:] self._AddBackToken(next)
self.in_class = in_class if next.token_type == tokenize.SYNTAX and next.name == '(':
if in_class is None: return self._GetMethod([token], FUNCTION_CTOR, None, True)
self.in_class_name_only = None # Fall through--handle like any other method.
else:
self.in_class_name_only = in_class.split('::')[-1] # Handle data or function declaration/definition.
self.visibility = visibility syntax = tokenize.SYNTAX
self.in_function = False temp_tokens, last_token = \
self.current_token = None self._GetVarTokensUpToIgnoringTemplates(syntax,
# Keep the state whether we are currently handling a typedef or not. '(', ';', '{', '[')
self._handling_typedef = False temp_tokens.insert(0, token)
if last_token.name == '(':
self.converter = TypeConverter(self.namespace_stack) # If there is an assignment before the paren,
# this is an expression, not a method.
def HandleError(self, msg, token): expr = bool([e for e in temp_tokens if e.name == '='])
printable_queue = list(reversed(self.token_queue[-20:])) if expr:
sys.stderr.write('Got %s in %s @ %s %s\n' % new_temp = self._GetTokensUpTo(tokenize.SYNTAX, ';')
(msg, self.filename, token, printable_queue)) temp_tokens.append(last_token)
temp_tokens.extend(new_temp)
def Generate(self): last_token = tokenize.Token(tokenize.SYNTAX, ';', 0, 0)
while 1:
token = self._GetNextToken() if last_token.name == '[':
if not token: # Handle array, this isn't a method, unless it's an operator.
break # TODO(nnorwitz): keep the size somewhere.
# unused_size = self._GetTokensUpTo(tokenize.SYNTAX, ']')
# Get the next token. temp_tokens.append(last_token)
self.current_token = token if temp_tokens[-2].name == 'operator':
temp_tokens.append(self._GetNextToken())
# Dispatch on the next token type.
if token.token_type == _INTERNAL_TOKEN:
if token.name == _NAMESPACE_POP:
self.namespace_stack.pop()
continue
try:
result = self._GenerateOne(token)
if result is not None:
yield result
except:
self.HandleError('exception', token)
raise
def _CreateVariable(self, pos_token, name, type_name, type_modifiers,
ref_pointer_name_seq, templated_types, value=None):
reference = '&' in ref_pointer_name_seq
pointer = '*' in ref_pointer_name_seq
array = '[' in ref_pointer_name_seq
var_type = Type(pos_token.start, pos_token.end, type_name,
templated_types, type_modifiers,
reference, pointer, array)
return VariableDeclaration(pos_token.start, pos_token.end,
name, var_type, value, self.namespace_stack)
def _GenerateOne(self, token):
if token.token_type == tokenize.NAME:
if (keywords.IsKeyword(token.name) and
not keywords.IsBuiltinType(token.name)):
method = getattr(self, 'handle_' + token.name)
return method()
elif token.name == self.in_class_name_only:
# The token name is the same as the class, must be a ctor if
# there is a paren. Otherwise, it's the return type.
# Peek ahead to get the next token to figure out which.
next = self._GetNextToken()
self._AddBackToken(next)
if next.token_type == tokenize.SYNTAX and next.name == '(':
return self._GetMethod([token], FUNCTION_CTOR, None, True)
# Fall through--handle like any other method.
# Handle data or function declaration/definition.
syntax = tokenize.SYNTAX
temp_tokens, last_token = \
self._GetVarTokensUpTo(syntax, '(', ';', '{', '[')
temp_tokens.insert(0, token)
if last_token.name == '(':
# If there is an assignment before the paren,
# this is an expression, not a method.
expr = bool([e for e in temp_tokens if e.name == '='])
if expr:
new_temp = self._GetTokensUpTo(tokenize.SYNTAX, ';')
temp_tokens.append(last_token)
temp_tokens.extend(new_temp)
last_token = tokenize.Token(tokenize.SYNTAX, ';', 0, 0)
if last_token.name == '[':
# Handle array, this isn't a method, unless it's an operator.
# TODO(nnorwitz): keep the size somewhere.
# unused_size = self._GetTokensUpTo(tokenize.SYNTAX, ']')
temp_tokens.append(last_token)
if temp_tokens[-2].name == 'operator':
temp_tokens.append(self._GetNextToken())
else:
temp_tokens2, last_token = \
self._GetVarTokensUpTo(tokenize.SYNTAX, ';')
temp_tokens.extend(temp_tokens2)
if last_token.name == ';':
# Handle data, this isn't a method.
parts = self.converter.DeclarationToParts(temp_tokens, True)
(name, type_name, templated_types, modifiers, default,
unused_other_tokens) = parts
t0 = temp_tokens[0]
names = [t.name for t in temp_tokens]
if templated_types:
start, end = self.converter.GetTemplateIndices(names)
names = names[:start] + names[end:]
default = ''.join([t.name for t in default])
return self._CreateVariable(t0, name, type_name, modifiers,
names, templated_types, default)
if last_token.name == '{':
self._AddBackTokens(temp_tokens[1:])
self._AddBackToken(last_token)
method_name = temp_tokens[0].name
method = getattr(self, 'handle_' + method_name, None)
if not method:
# Must be declaring a variable.
# TODO(nnorwitz): handle the declaration.
return None
return method()
return self._GetMethod(temp_tokens, 0, None, False)
elif token.token_type == tokenize.SYNTAX:
if token.name == '~' and self.in_class:
# Must be a dtor (probably not in method body).
token = self._GetNextToken()
# self.in_class can contain A::Name, but the dtor will only
# be Name. Make sure to compare against the right value.
if (token.token_type == tokenize.NAME and
token.name == self.in_class_name_only):
return self._GetMethod([token], FUNCTION_DTOR, None, True)
# TODO(nnorwitz): handle a lot more syntax.
elif token.token_type == tokenize.PREPROCESSOR:
# TODO(nnorwitz): handle more preprocessor directives.
# token starts with a #, so remove it and strip whitespace.
name = token.name[1:].lstrip()
if name.startswith('include'):
# Remove "include".
name = name[7:].strip()
assert name
# Handle #include \<newline> "header-on-second-line.h".
if name.startswith('\\'):
name = name[1:].strip()
assert name[0] in '<"', token
assert name[-1] in '>"', token
system = name[0] == '<'
filename = name[1:-1]
return Include(token.start, token.end, filename, system)
if name.startswith('define'):
# Remove "define".
name = name[6:].strip()
assert name
value = ''
for i, c in enumerate(name):
if c.isspace():
value = name[i:].lstrip()
name = name[:i]
break
return Define(token.start, token.end, name, value)
if name.startswith('if') and name[2:3].isspace():
condition = name[3:].strip()
if condition.startswith('0') or condition.startswith('(0)'):
self._SkipIf0Blocks()
return None
def _GetTokensUpTo(self, expected_token_type, expected_token):
return self._GetVarTokensUpTo(expected_token_type, expected_token)[0]
def _GetVarTokensUpTo(self, expected_token_type, *expected_tokens):
last_token = self._GetNextToken()
tokens = []
while (last_token.token_type != expected_token_type or
last_token.name not in expected_tokens):
tokens.append(last_token)
last_token = self._GetNextToken()
return tokens, last_token
# TODO(nnorwitz): remove _IgnoreUpTo() it shouldn't be necessary.
def _IgnoreUpTo(self, token_type, token):
unused_tokens = self._GetTokensUpTo(token_type, token)
def _SkipIf0Blocks(self):
count = 1
while 1:
token = self._GetNextToken()
if token.token_type != tokenize.PREPROCESSOR:
continue
name = token.name[1:].lstrip()
if name.startswith('endif'):
count -= 1
if count == 0:
break
elif name.startswith('if'):
count += 1
def _GetMatchingChar(self, open_paren, close_paren, GetNextToken=None):
if GetNextToken is None:
GetNextToken = self._GetNextToken
# Assumes the current token is open_paren and we will consume
# and return up to the close_paren.
count = 1
token = GetNextToken()
while 1:
if token.token_type == tokenize.SYNTAX:
if token.name == open_paren:
count += 1
elif token.name == close_paren:
count -= 1
if count == 0:
break
yield token
token = GetNextToken()
yield token
def _GetParameters(self):
return self._GetMatchingChar('(', ')')
def GetScope(self):
return self._GetMatchingChar('{', '}')
def _GetNextToken(self):
if self.token_queue:
return self.token_queue.pop()
return next(self.tokens)
def _AddBackToken(self, token):
if token.whence == tokenize.WHENCE_STREAM:
token.whence = tokenize.WHENCE_QUEUE
self.token_queue.insert(0, token)
else: else:
assert token.whence == tokenize.WHENCE_QUEUE, token temp_tokens2, last_token = \
self.token_queue.append(token) self._GetVarTokensUpTo(tokenize.SYNTAX, ';')
temp_tokens.extend(temp_tokens2)
def _AddBackTokens(self, tokens):
if tokens: if last_token.name == ';':
if tokens[-1].whence == tokenize.WHENCE_STREAM: # Handle data, this isn't a method.
for token in tokens: parts = self.converter.DeclarationToParts(temp_tokens, True)
token.whence = tokenize.WHENCE_QUEUE (name, type_name, templated_types, modifiers, default,
self.token_queue[:0] = reversed(tokens) unused_other_tokens) = parts
else:
assert tokens[-1].whence == tokenize.WHENCE_QUEUE, tokens t0 = temp_tokens[0]
self.token_queue.extend(reversed(tokens)) names = [t.name for t in temp_tokens]
if templated_types:
def GetName(self, seq=None): start, end = self.converter.GetTemplateIndices(names)
"""Returns ([tokens], next_token_info).""" names = names[:start] + names[end:]
GetNextToken = self._GetNextToken default = ''.join([t.name for t in default])
if seq is not None: return self._CreateVariable(t0, name, type_name, modifiers,
it = iter(seq) names, templated_types, default)
GetNextToken = lambda: next(it) if last_token.name == '{':
next_token = GetNextToken() self._AddBackTokens(temp_tokens[1:])
tokens = [] self._AddBackToken(last_token)
last_token_was_name = False method_name = temp_tokens[0].name
while (next_token.token_type == tokenize.NAME or method = getattr(self, 'handle_' + method_name, None)
(next_token.token_type == tokenize.SYNTAX and if not method:
next_token.name in ('::', '<'))): # Must be declaring a variable.
# Two NAMEs in a row means the identifier should terminate. # TODO(nnorwitz): handle the declaration.
# It's probably some sort of variable declaration. return None
if last_token_was_name and next_token.token_type == tokenize.NAME: return method()
break return self._GetMethod(temp_tokens, 0, None, False)
last_token_was_name = next_token.token_type == tokenize.NAME elif token.token_type == tokenize.SYNTAX:
tokens.append(next_token) if token.name == '~' and self.in_class:
# Handle templated names. # Must be a dtor (probably not in method body).
if next_token.name == '<': token = self._GetNextToken()
tokens.extend(self._GetMatchingChar('<', '>', GetNextToken)) # self.in_class can contain A::Name, but the dtor will only
last_token_was_name = True # be Name. Make sure to compare against the right value.
next_token = GetNextToken() if (token.token_type == tokenize.NAME and
return tokens, next_token token.name == self.in_class_name_only):
return self._GetMethod([token], FUNCTION_DTOR, None, True)
def GetMethod(self, modifiers, templated_types): # TODO(nnorwitz): handle a lot more syntax.
return_type_and_name = self._GetTokensUpTo(tokenize.SYNTAX, '(') elif token.token_type == tokenize.PREPROCESSOR:
assert len(return_type_and_name) >= 1 # TODO(nnorwitz): handle more preprocessor directives.
return self._GetMethod(return_type_and_name, modifiers, templated_types, # token starts with a #, so remove it and strip whitespace.
False) name = token.name[1:].lstrip()
if name.startswith('include'):
def _GetMethod(self, return_type_and_name, modifiers, templated_types, # Remove "include".
get_paren): name = name[7:].strip()
template_portion = None assert name
if get_paren: # Handle #include \<newline> "header-on-second-line.h".
token = self._GetNextToken() if name.startswith('\\'):
assert token.token_type == tokenize.SYNTAX, token name = name[1:].strip()
if token.name == '<': assert name[0] in '<"', token
# Handle templatized dtors. assert name[-1] in '>"', token
template_portion = [token] system = name[0] == '<'
template_portion.extend(self._GetMatchingChar('<', '>')) filename = name[1:-1]
token = self._GetNextToken() return Include(token.start, token.end, filename, system)
assert token.token_type == tokenize.SYNTAX, token if name.startswith('define'):
assert token.name == '(', token # Remove "define".
name = name[6:].strip()
name = return_type_and_name.pop() assert name
# Handle templatized ctors. value = ''
if name.name == '>': for i, c in enumerate(name):
index = 1 if c.isspace():
while return_type_and_name[index].name != '<': value = name[i:].lstrip()
index += 1 name = name[:i]
template_portion = return_type_and_name[index:] + [name] break
del return_type_and_name[index:] return Define(token.start, token.end, name, value)
name = return_type_and_name.pop() if name.startswith('if') and name[2:3].isspace():
elif name.name == ']': condition = name[3:].strip()
rt = return_type_and_name if condition.startswith('0') or condition.startswith('(0)'):
assert rt[-1].name == '[', return_type_and_name self._SkipIf0Blocks()
assert rt[-2].name == 'operator', return_type_and_name return None
name_seq = return_type_and_name[-2:]
del return_type_and_name[-2:] def _GetTokensUpTo(self, expected_token_type, expected_token):
name = tokenize.Token(tokenize.NAME, 'operator[]', return self._GetVarTokensUpTo(expected_token_type, expected_token)[0]
name_seq[0].start, name.end)
# Get the open paren so _GetParameters() below works. def _GetVarTokensUpTo(self, expected_token_type, *expected_tokens):
unused_open_paren = self._GetNextToken() last_token = self._GetNextToken()
tokens = []
# TODO(nnorwitz): store template_portion. while (last_token.token_type != expected_token_type or
return_type = return_type_and_name last_token.name not in expected_tokens):
indices = name tokens.append(last_token)
if return_type: last_token = self._GetNextToken()
indices = return_type[0] return tokens, last_token
# Force ctor for templatized ctors. # Same as _GetVarTokensUpTo, but skips over '<...>' which could contain an
if name.name == self.in_class and not modifiers: # expected token.
modifiers |= FUNCTION_CTOR def _GetVarTokensUpToIgnoringTemplates(self, expected_token_type,
parameters = list(self._GetParameters()) *expected_tokens):
del parameters[-1] # Remove trailing ')'. last_token = self._GetNextToken()
tokens = []
# Handling operator() is especially weird. nesting = 0
if name.name == 'operator' and not parameters: while (nesting > 0 or
token = self._GetNextToken() last_token.token_type != expected_token_type or
assert token.name == '(', token last_token.name not in expected_tokens):
parameters = list(self._GetParameters()) tokens.append(last_token)
del parameters[-1] # Remove trailing ')'. last_token = self._GetNextToken()
if last_token.name == '<':
nesting += 1
elif last_token.name == '>':
nesting -= 1
return tokens, last_token
# TODO(nnorwitz): remove _IgnoreUpTo() it shouldn't be necessary.
def _IgnoreUpTo(self, token_type, token):
unused_tokens = self._GetTokensUpTo(token_type, token)
def _SkipIf0Blocks(self):
count = 1
while 1:
token = self._GetNextToken()
if token.token_type != tokenize.PREPROCESSOR:
continue
name = token.name[1:].lstrip()
if name.startswith('endif'):
count -= 1
if count == 0:
break
elif name.startswith('if'):
count += 1
def _GetMatchingChar(self, open_paren, close_paren, GetNextToken=None):
if GetNextToken is None:
GetNextToken = self._GetNextToken
# Assumes the current token is open_paren and we will consume
# and return up to the close_paren.
count = 1
token = GetNextToken()
while 1:
if token.token_type == tokenize.SYNTAX:
if token.name == open_paren:
count += 1
elif token.name == close_paren:
count -= 1
if count == 0:
break
yield token
token = GetNextToken()
yield token
def _GetParameters(self):
return self._GetMatchingChar('(', ')')
def GetScope(self):
return self._GetMatchingChar('{', '}')
def _GetNextToken(self):
if self.token_queue:
return self.token_queue.pop()
try:
return next(self.tokens)
except StopIteration:
return
def _AddBackToken(self, token):
if token.whence == tokenize.WHENCE_STREAM:
token.whence = tokenize.WHENCE_QUEUE
self.token_queue.insert(0, token)
else:
assert token.whence == tokenize.WHENCE_QUEUE, token
self.token_queue.append(token)
def _AddBackTokens(self, tokens):
if tokens:
if tokens[-1].whence == tokenize.WHENCE_STREAM:
for token in tokens:
token.whence = tokenize.WHENCE_QUEUE
self.token_queue[:0] = reversed(tokens)
else:
assert tokens[-1].whence == tokenize.WHENCE_QUEUE, tokens
self.token_queue.extend(reversed(tokens))
def GetName(self, seq=None):
"""Returns ([tokens], next_token_info)."""
GetNextToken = self._GetNextToken
if seq is not None:
it = iter(seq)
GetNextToken = lambda: next(it)
next_token = GetNextToken()
tokens = []
last_token_was_name = False
while (next_token.token_type == tokenize.NAME or
(next_token.token_type == tokenize.SYNTAX and
next_token.name in ('::', '<'))):
# Two NAMEs in a row means the identifier should terminate.
# It's probably some sort of variable declaration.
if last_token_was_name and next_token.token_type == tokenize.NAME:
break
last_token_was_name = next_token.token_type == tokenize.NAME
tokens.append(next_token)
# Handle templated names.
if next_token.name == '<':
tokens.extend(self._GetMatchingChar('<', '>', GetNextToken))
last_token_was_name = True
next_token = GetNextToken()
return tokens, next_token
def GetMethod(self, modifiers, templated_types):
return_type_and_name = self._GetTokensUpTo(tokenize.SYNTAX, '(')
assert len(return_type_and_name) >= 1
return self._GetMethod(return_type_and_name, modifiers, templated_types,
False)
def _GetMethod(self, return_type_and_name, modifiers, templated_types,
get_paren):
template_portion = None
if get_paren:
token = self._GetNextToken()
assert token.token_type == tokenize.SYNTAX, token
if token.name == '<':
# Handle templatized dtors.
template_portion = [token]
template_portion.extend(self._GetMatchingChar('<', '>'))
token = self._GetNextToken()
assert token.token_type == tokenize.SYNTAX, token
assert token.name == '(', token
name = return_type_and_name.pop()
# Handle templatized ctors.
if name.name == '>':
index = 1
while return_type_and_name[index].name != '<':
index += 1
template_portion = return_type_and_name[index:] + [name]
del return_type_and_name[index:]
name = return_type_and_name.pop()
elif name.name == ']':
rt = return_type_and_name
assert rt[-1].name == '[', return_type_and_name
assert rt[-2].name == 'operator', return_type_and_name
name_seq = return_type_and_name[-2:]
del return_type_and_name[-2:]
name = tokenize.Token(tokenize.NAME, 'operator[]',
name_seq[0].start, name.end)
# Get the open paren so _GetParameters() below works.
unused_open_paren = self._GetNextToken()
# TODO(nnorwitz): store template_portion.
return_type = return_type_and_name
indices = name
if return_type:
indices = return_type[0]
# Force ctor for templatized ctors.
if name.name == self.in_class and not modifiers:
modifiers |= FUNCTION_CTOR
parameters = list(self._GetParameters())
del parameters[-1] # Remove trailing ')'.
# Handling operator() is especially weird.
if name.name == 'operator' and not parameters:
token = self._GetNextToken()
assert token.name == '(', token
parameters = list(self._GetParameters())
del parameters[-1] # Remove trailing ')'.
token = self._GetNextToken()
while token.token_type == tokenize.NAME:
modifier_token = token
token = self._GetNextToken()
if modifier_token.name == 'const':
modifiers |= FUNCTION_CONST
elif modifier_token.name == '__attribute__':
# TODO(nnorwitz): handle more __attribute__ details.
modifiers |= FUNCTION_ATTRIBUTE
assert token.name == '(', token
# Consume everything between the (parens).
unused_tokens = list(self._GetMatchingChar('(', ')'))
token = self._GetNextToken()
elif modifier_token.name == 'throw':
modifiers |= FUNCTION_THROW
assert token.name == '(', token
# Consume everything between the (parens).
unused_tokens = list(self._GetMatchingChar('(', ')'))
token = self._GetNextToken()
elif modifier_token.name == 'override':
modifiers |= FUNCTION_OVERRIDE
elif modifier_token.name == modifier_token.name.upper():
# HACK(nnorwitz): assume that all upper-case names
# are some macro we aren't expanding.
modifiers |= FUNCTION_UNKNOWN_ANNOTATION
else:
self.HandleError('unexpected token', modifier_token)
assert token.token_type == tokenize.SYNTAX, token
# Handle ctor initializers.
if token.name == ':':
# TODO(nnorwitz): anything else to handle for initializer list?
while token.name != ';' and token.name != '{':
token = self._GetNextToken() token = self._GetNextToken()
while token.token_type == tokenize.NAME:
modifier_token = token
token = self._GetNextToken()
if modifier_token.name == 'const':
modifiers |= FUNCTION_CONST
elif modifier_token.name == '__attribute__':
# TODO(nnorwitz): handle more __attribute__ details.
modifiers |= FUNCTION_ATTRIBUTE
assert token.name == '(', token
# Consume everything between the (parens).
unused_tokens = list(self._GetMatchingChar('(', ')'))
token = self._GetNextToken()
elif modifier_token.name == 'throw':
modifiers |= FUNCTION_THROW
assert token.name == '(', token
# Consume everything between the (parens).
unused_tokens = list(self._GetMatchingChar('(', ')'))
token = self._GetNextToken()
elif modifier_token.name == 'override':
modifiers |= FUNCTION_OVERRIDE
elif modifier_token.name == modifier_token.name.upper():
# HACK(nnorwitz): assume that all upper-case names
# are some macro we aren't expanding.
modifiers |= FUNCTION_UNKNOWN_ANNOTATION
else:
self.HandleError('unexpected token', modifier_token)
# Handle pointer to functions that are really data but look
# like method declarations.
if token.name == '(':
if parameters[0].name == '*':
# name contains the return type.
name = parameters.pop()
# parameters contains the name of the data.
modifiers = [p.name for p in parameters]
# Already at the ( to open the parameter list.
function_parameters = list(self._GetMatchingChar('(', ')'))
del function_parameters[-1] # Remove trailing ')'.
# TODO(nnorwitz): store the function_parameters.
token = self._GetNextToken()
assert token.token_type == tokenize.SYNTAX, token assert token.token_type == tokenize.SYNTAX, token
# Handle ctor initializers. assert token.name == ';', token
if token.name == ':': return self._CreateVariable(indices, name.name, indices.name,
# TODO(nnorwitz): anything else to handle for initializer list? modifiers, '', None)
while token.name != ';' and token.name != '{': # At this point, we got something like:
token = self._GetNextToken() # return_type (type::*name_)(params);
# This is a data member called name_ that is a function pointer.
# Handle pointer to functions that are really data but look # With this code: void (sq_type::*field_)(string&);
# like method declarations. # We get: name=void return_type=[] parameters=sq_type ... field_
if token.name == '(': # TODO(nnorwitz): is return_type always empty?
if parameters[0].name == '*': # TODO(nnorwitz): this isn't even close to being correct.
# name contains the return type. # Just put in something so we don't crash and can move on.
name = parameters.pop() real_name = parameters[-1]
# parameters contains the name of the data. modifiers = [p.name for p in self._GetParameters()]
modifiers = [p.name for p in parameters] del modifiers[-1] # Remove trailing ')'.
# Already at the ( to open the parameter list. return self._CreateVariable(indices, real_name.name, indices.name,
function_parameters = list(self._GetMatchingChar('(', ')')) modifiers, '', None)
del function_parameters[-1] # Remove trailing ')'.
# TODO(nnorwitz): store the function_parameters. if token.name == '{':
token = self._GetNextToken() body = list(self.GetScope())
assert token.token_type == tokenize.SYNTAX, token del body[-1] # Remove trailing '}'.
assert token.name == ';', token else:
return self._CreateVariable(indices, name.name, indices.name, body = None
modifiers, '', None) if token.name == '=':
# At this point, we got something like: token = self._GetNextToken()
# return_type (type::*name_)(params);
# This is a data member called name_ that is a function pointer. if token.name == 'default' or token.name == 'delete':
# With this code: void (sq_type::*field_)(string&); # Ignore explicitly defaulted and deleted special members
# We get: name=void return_type=[] parameters=sq_type ... field_ # in C++11.
# TODO(nnorwitz): is return_type always empty? token = self._GetNextToken()
# TODO(nnorwitz): this isn't even close to being correct.
# Just put in something so we don't crash and can move on.
real_name = parameters[-1]
modifiers = [p.name for p in self._GetParameters()]
del modifiers[-1] # Remove trailing ')'.
return self._CreateVariable(indices, real_name.name, indices.name,
modifiers, '', None)
if token.name == '{':
body = list(self.GetScope())
del body[-1] # Remove trailing '}'.
else: else:
body = None # Handle pure-virtual declarations.
if token.name == '=': assert token.token_type == tokenize.CONSTANT, token
token = self._GetNextToken() assert token.name == '0', token
modifiers |= FUNCTION_PURE_VIRTUAL
if token.name == 'default' or token.name == 'delete': token = self._GetNextToken()
# Ignore explicitly defaulted and deleted special members
# in C++11. if token.name == '[':
token = self._GetNextToken() # TODO(nnorwitz): store tokens and improve parsing.
else: # template <typename T, size_t N> char (&ASH(T (&seq)[N]))[N];
# Handle pure-virtual declarations. tokens = list(self._GetMatchingChar('[', ']'))
assert token.token_type == tokenize.CONSTANT, token token = self._GetNextToken()
assert token.name == '0', token
modifiers |= FUNCTION_PURE_VIRTUAL
token = self._GetNextToken()
if token.name == '[':
# TODO(nnorwitz): store tokens and improve parsing.
# template <typename T, size_t N> char (&ASH(T (&seq)[N]))[N];
tokens = list(self._GetMatchingChar('[', ']'))
token = self._GetNextToken()
assert token.name == ';', (token, return_type_and_name, parameters)
# Looks like we got a method, not a function.
if len(return_type) > 2 and return_type[-1].name == '::':
return_type, in_class = \
self._GetReturnTypeAndClassName(return_type)
return Method(indices.start, indices.end, name.name, in_class,
return_type, parameters, modifiers, templated_types,
body, self.namespace_stack)
return Function(indices.start, indices.end, name.name, return_type,
parameters, modifiers, templated_types, body,
self.namespace_stack)
def _GetReturnTypeAndClassName(self, token_seq):
# Splitting the return type from the class name in a method
# can be tricky. For example, Return::Type::Is::Hard::To::Find().
# Where is the return type and where is the class name?
# The heuristic used is to pull the last name as the class name.
# This includes all the templated type info.
# TODO(nnorwitz): if there is only One name like in the
# example above, punt and assume the last bit is the class name.
# Ignore a :: prefix, if exists so we can find the first real name.
i = 0
if token_seq[0].name == '::':
i = 1
# Ignore a :: suffix, if exists.
end = len(token_seq) - 1
if token_seq[end-1].name == '::':
end -= 1
# Make a copy of the sequence so we can append a sentinel
# value. This is required for GetName will has to have some
# terminating condition beyond the last name.
seq_copy = token_seq[i:end]
seq_copy.append(tokenize.Token(tokenize.SYNTAX, '', 0, 0))
names = []
while i < end:
# Iterate through the sequence parsing out each name.
new_name, next = self.GetName(seq_copy[i:])
assert new_name, 'Got empty new_name, next=%s' % next
# We got a pointer or ref. Add it to the name.
if next and next.token_type == tokenize.SYNTAX:
new_name.append(next)
names.append(new_name)
i += len(new_name)
# Now that we have the names, it's time to undo what we did.
# Remove the sentinel value.
names[-1].pop()
# Flatten the token sequence for the return type.
return_type = [e for seq in names[:-1] for e in seq]
# The class name is the last name.
class_name = names[-1]
return return_type, class_name
def handle_bool(self): assert token.name == ';', (token, return_type_and_name, parameters)
pass
def handle_char(self): # Looks like we got a method, not a function.
pass if len(return_type) > 2 and return_type[-1].name == '::':
return_type, in_class = \
self._GetReturnTypeAndClassName(return_type)
return Method(indices.start, indices.end, name.name, in_class,
return_type, parameters, modifiers, templated_types,
body, self.namespace_stack)
return Function(indices.start, indices.end, name.name, return_type,
parameters, modifiers, templated_types, body,
self.namespace_stack)
def _GetReturnTypeAndClassName(self, token_seq):
# Splitting the return type from the class name in a method
# can be tricky. For example, Return::Type::Is::Hard::To::Find().
# Where is the return type and where is the class name?
# The heuristic used is to pull the last name as the class name.
# This includes all the templated type info.
# TODO(nnorwitz): if there is only One name like in the
# example above, punt and assume the last bit is the class name.
# Ignore a :: prefix, if exists so we can find the first real name.
i = 0
if token_seq[0].name == '::':
i = 1
# Ignore a :: suffix, if exists.
end = len(token_seq) - 1
if token_seq[end-1].name == '::':
end -= 1
# Make a copy of the sequence so we can append a sentinel
# value. This is required for GetName will has to have some
# terminating condition beyond the last name.
seq_copy = token_seq[i:end]
seq_copy.append(tokenize.Token(tokenize.SYNTAX, '', 0, 0))
names = []
while i < end:
# Iterate through the sequence parsing out each name.
new_name, next = self.GetName(seq_copy[i:])
assert new_name, 'Got empty new_name, next=%s' % next
# We got a pointer or ref. Add it to the name.
if next and next.token_type == tokenize.SYNTAX:
new_name.append(next)
names.append(new_name)
i += len(new_name)
# Now that we have the names, it's time to undo what we did.
# Remove the sentinel value.
names[-1].pop()
# Flatten the token sequence for the return type.
return_type = [e for seq in names[:-1] for e in seq]
# The class name is the last name.
class_name = names[-1]
return return_type, class_name
def handle_int(self): def handle_bool(self):
pass pass
def handle_long(self):
pass
def handle_short(self): def handle_char(self):
pass pass
def handle_double(self): def handle_int(self):
pass pass
def handle_float(self): def handle_long(self):
pass pass
def handle_void(self): def handle_short(self):
pass pass
def handle_wchar_t(self): def handle_double(self):
pass pass
def handle_unsigned(self): def handle_float(self):
pass pass
def handle_signed(self): def handle_void(self):
pass pass
def _GetNestedType(self, ctor): def handle_wchar_t(self):
name = None pass
name_tokens, token = self.GetName()
if name_tokens:
name = ''.join([t.name for t in name_tokens])
# Handle forward declarations.
if token.token_type == tokenize.SYNTAX and token.name == ';':
return ctor(token.start, token.end, name, None,
self.namespace_stack)
if token.token_type == tokenize.NAME and self._handling_typedef:
self._AddBackToken(token)
return ctor(token.start, token.end, name, None,
self.namespace_stack)
# Must be the type declaration.
fields = list(self._GetMatchingChar('{', '}'))
del fields[-1] # Remove trailing '}'.
if token.token_type == tokenize.SYNTAX and token.name == '{':
next = self._GetNextToken()
new_type = ctor(token.start, token.end, name, fields,
self.namespace_stack)
# A name means this is an anonymous type and the name
# is the variable declaration.
if next.token_type != tokenize.NAME:
return new_type
name = new_type
token = next
# Must be variable declaration using the type prefixed with keyword.
assert token.token_type == tokenize.NAME, token
return self._CreateVariable(token, token.name, name, [], '', None)
def handle_struct(self):
# Special case the handling typedef/aliasing of structs here.
# It would be a pain to handle in the class code.
name_tokens, var_token = self.GetName()
if name_tokens:
next_token = self._GetNextToken()
is_syntax = (var_token.token_type == tokenize.SYNTAX and
var_token.name[0] in '*&')
is_variable = (var_token.token_type == tokenize.NAME and
next_token.name == ';')
variable = var_token
if is_syntax and not is_variable:
variable = next_token
temp = self._GetNextToken()
if temp.token_type == tokenize.SYNTAX and temp.name == '(':
# Handle methods declared to return a struct.
t0 = name_tokens[0]
struct = tokenize.Token(tokenize.NAME, 'struct',
t0.start-7, t0.start-2)
type_and_name = [struct]
type_and_name.extend(name_tokens)
type_and_name.extend((var_token, next_token))
return self._GetMethod(type_and_name, 0, None, False)
assert temp.name == ';', (temp, name_tokens, var_token)
if is_syntax or (is_variable and not self._handling_typedef):
modifiers = ['struct']
type_name = ''.join([t.name for t in name_tokens])
position = name_tokens[0]
return self._CreateVariable(position, variable.name, type_name,
modifiers, var_token.name, None)
name_tokens.extend((var_token, next_token))
self._AddBackTokens(name_tokens)
else:
self._AddBackToken(var_token)
return self._GetClass(Struct, VISIBILITY_PUBLIC, None)
def handle_union(self): def handle_unsigned(self):
return self._GetNestedType(Union) pass
def handle_enum(self): def handle_signed(self):
token = self._GetNextToken() pass
if not (token.token_type == tokenize.NAME and token.name == 'class'):
self._AddBackToken(token)
return self._GetNestedType(Enum)
def handle_auto(self):
# TODO(nnorwitz): warn about using auto? Probably not since it
# will be reclaimed and useful for C++0x.
pass
def handle_register(self): def _GetNestedType(self, ctor):
pass name = None
name_tokens, token = self.GetName()
if name_tokens:
name = ''.join([t.name for t in name_tokens])
# Handle forward declarations.
if token.token_type == tokenize.SYNTAX and token.name == ';':
return ctor(token.start, token.end, name, None,
self.namespace_stack)
if token.token_type == tokenize.NAME and self._handling_typedef:
self._AddBackToken(token)
return ctor(token.start, token.end, name, None,
self.namespace_stack)
# Must be the type declaration.
fields = list(self._GetMatchingChar('{', '}'))
del fields[-1] # Remove trailing '}'.
if token.token_type == tokenize.SYNTAX and token.name == '{':
next = self._GetNextToken()
new_type = ctor(token.start, token.end, name, fields,
self.namespace_stack)
# A name means this is an anonymous type and the name
# is the variable declaration.
if next.token_type != tokenize.NAME:
return new_type
name = new_type
token = next
# Must be variable declaration using the type prefixed with keyword.
assert token.token_type == tokenize.NAME, token
return self._CreateVariable(token, token.name, name, [], '', None)
def handle_struct(self):
# Special case the handling typedef/aliasing of structs here.
# It would be a pain to handle in the class code.
name_tokens, var_token = self.GetName()
if name_tokens:
next_token = self._GetNextToken()
is_syntax = (var_token.token_type == tokenize.SYNTAX and
var_token.name[0] in '*&')
is_variable = (var_token.token_type == tokenize.NAME and
next_token.name == ';')
variable = var_token
if is_syntax and not is_variable:
variable = next_token
temp = self._GetNextToken()
if temp.token_type == tokenize.SYNTAX and temp.name == '(':
# Handle methods declared to return a struct.
t0 = name_tokens[0]
struct = tokenize.Token(tokenize.NAME, 'struct',
t0.start-7, t0.start-2)
type_and_name = [struct]
type_and_name.extend(name_tokens)
type_and_name.extend((var_token, next_token))
return self._GetMethod(type_and_name, 0, None, False)
assert temp.name == ';', (temp, name_tokens, var_token)
if is_syntax or (is_variable and not self._handling_typedef):
modifiers = ['struct']
type_name = ''.join([t.name for t in name_tokens])
position = name_tokens[0]
return self._CreateVariable(position, variable.name, type_name,
modifiers, var_token.name, None)
name_tokens.extend((var_token, next_token))
self._AddBackTokens(name_tokens)
else:
self._AddBackToken(var_token)
return self._GetClass(Struct, VISIBILITY_PUBLIC, None)
def handle_union(self):
return self._GetNestedType(Union)
def handle_enum(self):
return self._GetNestedType(Enum)
def handle_auto(self):
# TODO(nnorwitz): warn about using auto? Probably not since it
# will be reclaimed and useful for C++0x.
pass
def handle_const(self): def handle_register(self):
pass pass
def handle_inline(self): def handle_const(self):
pass pass
def handle_extern(self): def handle_inline(self):
pass pass
def handle_static(self): def handle_extern(self):
pass pass
def handle_virtual(self): def handle_static(self):
# What follows must be a method. pass
token = token2 = self._GetNextToken()
if token.name == 'inline':
# HACK(nnorwitz): handle inline dtors by ignoring 'inline'.
token2 = self._GetNextToken()
if token2.token_type == tokenize.SYNTAX and token2.name == '~':
return self.GetMethod(FUNCTION_VIRTUAL + FUNCTION_DTOR, None)
assert token.token_type == tokenize.NAME or token.name == '::', token
return_type_and_name = self._GetTokensUpTo(tokenize.SYNTAX, '(') # )
return_type_and_name.insert(0, token)
if token2 is not token:
return_type_and_name.insert(1, token2)
return self._GetMethod(return_type_and_name, FUNCTION_VIRTUAL,
None, False)
def handle_volatile(self): def handle_virtual(self):
pass # What follows must be a method.
token = token2 = self._GetNextToken()
if token.name == 'inline':
# HACK(nnorwitz): handle inline dtors by ignoring 'inline'.
token2 = self._GetNextToken()
if token2.token_type == tokenize.SYNTAX and token2.name == '~':
return self.GetMethod(FUNCTION_VIRTUAL + FUNCTION_DTOR, None)
assert token.token_type == tokenize.NAME or token.name == '::', token
return_type_and_name, _ = self._GetVarTokensUpToIgnoringTemplates(
tokenize.SYNTAX, '(') # )
return_type_and_name.insert(0, token)
if token2 is not token:
return_type_and_name.insert(1, token2)
return self._GetMethod(return_type_and_name, FUNCTION_VIRTUAL,
None, False)
def handle_mutable(self): def handle_volatile(self):
pass pass
def handle_public(self): def handle_mutable(self):
assert self.in_class pass
self.visibility = VISIBILITY_PUBLIC
def handle_protected(self):
assert self.in_class
self.visibility = VISIBILITY_PROTECTED
def handle_private(self):
assert self.in_class
self.visibility = VISIBILITY_PRIVATE
def handle_friend(self):
tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
assert tokens
t0 = tokens[0]
return Friend(t0.start, t0.end, tokens, self.namespace_stack)
def handle_static_cast(self): def handle_public(self):
pass assert self.in_class
self.visibility = VISIBILITY_PUBLIC
def handle_protected(self):
assert self.in_class
self.visibility = VISIBILITY_PROTECTED
def handle_private(self):
assert self.in_class
self.visibility = VISIBILITY_PRIVATE
def handle_friend(self):
tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
assert tokens
t0 = tokens[0]
return Friend(t0.start, t0.end, tokens, self.namespace_stack)
def handle_const_cast(self): def handle_static_cast(self):
pass pass
def handle_dynamic_cast(self): def handle_const_cast(self):
pass pass
def handle_reinterpret_cast(self): def handle_dynamic_cast(self):
pass pass
def handle_new(self): def handle_reinterpret_cast(self):
pass pass
def handle_delete(self): def handle_new(self):
tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';') pass
assert tokens
return Delete(tokens[0].start, tokens[0].end, tokens)
def handle_typedef(self): def handle_delete(self):
token = self._GetNextToken() tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
if (token.token_type == tokenize.NAME and assert tokens
return Delete(tokens[0].start, tokens[0].end, tokens)
def handle_typedef(self):
token = self._GetNextToken()
if (token.token_type == tokenize.NAME and
keywords.IsKeyword(token.name)): keywords.IsKeyword(token.name)):
# Token must be struct/enum/union/class. # Token must be struct/enum/union/class.
method = getattr(self, 'handle_' + token.name) method = getattr(self, 'handle_' + token.name)
self._handling_typedef = True self._handling_typedef = True
tokens = [method()] tokens = [method()]
self._handling_typedef = False self._handling_typedef = False
else:
tokens = [token]
# Get the remainder of the typedef up to the semi-colon.
tokens.extend(self._GetTokensUpTo(tokenize.SYNTAX, ';'))
# TODO(nnorwitz): clean all this up.
assert tokens
name = tokens.pop()
indices = name
if tokens:
indices = tokens[0]
if not indices:
indices = token
if name.name == ')':
# HACK(nnorwitz): Handle pointers to functions "properly".
if (len(tokens) >= 4 and
tokens[1].name == '(' and tokens[2].name == '*'):
tokens.append(name)
name = tokens[3]
elif name.name == ']':
# HACK(nnorwitz): Handle arrays properly.
if len(tokens) >= 2:
tokens.append(name)
name = tokens[1]
new_type = tokens
if tokens and isinstance(tokens[0], tokenize.Token):
new_type = self.converter.ToType(tokens)[0]
return Typedef(indices.start, indices.end, name.name,
new_type, self.namespace_stack)
def handle_typeid(self):
pass # Not needed yet.
def handle_typename(self):
pass # Not needed yet.
def _GetTemplatedTypes(self):
result = collections.OrderedDict()
tokens = list(self._GetMatchingChar('<', '>'))
len_tokens = len(tokens) - 1 # Ignore trailing '>'.
i = 0
while i < len_tokens:
key = tokens[i].name
i += 1
if keywords.IsKeyword(key) or key == ',':
continue
type_name = default = None
if i < len_tokens:
i += 1
if tokens[i-1].name == '=':
assert i < len_tokens, '%s %s' % (i, tokens)
default, unused_next_token = self.GetName(tokens[i:])
i += len(default)
else: else:
tokens = [token] if tokens[i-1].name != ',':
# We got something like: Type variable.
# Get the remainder of the typedef up to the semi-colon. # Re-adjust the key (variable) and type_name (Type).
tokens.extend(self._GetTokensUpTo(tokenize.SYNTAX, ';')) key = tokens[i-1].name
type_name = tokens[i-2]
# TODO(nnorwitz): clean all this up.
assert tokens result[key] = (type_name, default)
name = tokens.pop() return result
indices = name
if tokens: def handle_template(self):
indices = tokens[0] token = self._GetNextToken()
if not indices: assert token.token_type == tokenize.SYNTAX, token
indices = token assert token.name == '<', token
if name.name == ')': templated_types = self._GetTemplatedTypes()
# HACK(nnorwitz): Handle pointers to functions "properly". # TODO(nnorwitz): for now, just ignore the template params.
if (len(tokens) >= 4 and token = self._GetNextToken()
tokens[1].name == '(' and tokens[2].name == '*'): if token.token_type == tokenize.NAME:
tokens.append(name) if token.name == 'class':
name = tokens[3] return self._GetClass(Class, VISIBILITY_PRIVATE, templated_types)
elif name.name == ']': elif token.name == 'struct':
# HACK(nnorwitz): Handle arrays properly. return self._GetClass(Struct, VISIBILITY_PUBLIC, templated_types)
if len(tokens) >= 2: elif token.name == 'friend':
tokens.append(name) return self.handle_friend()
name = tokens[1] self._AddBackToken(token)
new_type = tokens tokens, last = self._GetVarTokensUpTo(tokenize.SYNTAX, '(', ';')
if tokens and isinstance(tokens[0], tokenize.Token): tokens.append(last)
new_type = self.converter.ToType(tokens)[0] self._AddBackTokens(tokens)
return Typedef(indices.start, indices.end, name.name, if last.name == '(':
new_type, self.namespace_stack) return self.GetMethod(FUNCTION_NONE, templated_types)
# Must be a variable definition.
def handle_typeid(self): return None
pass # Not needed yet.
def handle_true(self):
def handle_typename(self): pass # Nothing to do.
pass # Not needed yet.
def handle_false(self):
def _GetTemplatedTypes(self): pass # Nothing to do.
result = {}
tokens = list(self._GetMatchingChar('<', '>')) def handle_asm(self):
len_tokens = len(tokens) - 1 # Ignore trailing '>'. pass # Not needed yet.
i = 0
while i < len_tokens: def handle_class(self):
key = tokens[i].name return self._GetClass(Class, VISIBILITY_PRIVATE, None)
i += 1
if keywords.IsKeyword(key) or key == ',': def _GetBases(self):
continue # Get base classes.
type_name = default = None bases = []
if i < len_tokens: while 1:
i += 1 token = self._GetNextToken()
if tokens[i-1].name == '=': assert token.token_type == tokenize.NAME, token
assert i < len_tokens, '%s %s' % (i, tokens) # TODO(nnorwitz): store kind of inheritance...maybe.
default, unused_next_token = self.GetName(tokens[i:]) if token.name not in ('public', 'protected', 'private'):
i += len(default) # If inheritance type is not specified, it is private.
else: # Just put the token back so we can form a name.
if tokens[i-1].name != ',': # TODO(nnorwitz): it would be good to warn about this.
# We got something like: Type variable.
# Re-adjust the key (variable) and type_name (Type).
key = tokens[i-1].name
type_name = tokens[i-2]
result[key] = (type_name, default)
return result
def handle_template(self):
token = self._GetNextToken()
assert token.token_type == tokenize.SYNTAX, token
assert token.name == '<', token
templated_types = self._GetTemplatedTypes()
# TODO(nnorwitz): for now, just ignore the template params.
token = self._GetNextToken()
if token.token_type == tokenize.NAME:
if token.name == 'class':
return self._GetClass(Class, VISIBILITY_PRIVATE, templated_types
)
elif token.name == 'struct':
return self._GetClass(Struct, VISIBILITY_PUBLIC, templated_types
)
elif token.name == 'friend':
return self.handle_friend()
self._AddBackToken(token) self._AddBackToken(token)
tokens, last = self._GetVarTokensUpTo(tokenize.SYNTAX, '(', ';') else:
tokens.append(last) # Check for virtual inheritance.
self._AddBackTokens(tokens) token = self._GetNextToken()
if last.name == '(': if token.name != 'virtual':
return self.GetMethod(FUNCTION_NONE, templated_types) self._AddBackToken(token)
# Must be a variable definition.
return None
def handle_true(self):
pass # Nothing to do.
def handle_false(self):
pass # Nothing to do.
def handle_asm(self):
pass # Not needed yet.
def handle_class(self):
return self._GetClass(Class, VISIBILITY_PRIVATE, None)
def _GetBases(self):
# Get base classes.
bases = []
while 1:
token = self._GetNextToken()
assert token.token_type == tokenize.NAME, token
# TODO(nnorwitz): store kind of inheritance...maybe.
if token.name not in ('public', 'protected', 'private'):
# If inheritance type is not specified, it is private.
# Just put the token back so we can form a name.
# TODO(nnorwitz): it would be good to warn about this.
self._AddBackToken(token)
else:
# Check for virtual inheritance.
token = self._GetNextToken()
if token.name != 'virtual':
self._AddBackToken(token)
else:
# TODO(nnorwitz): store that we got virtual for this base.
pass
base, next_token = self.GetName()
bases_ast = self.converter.ToType(base)
assert len(bases_ast) == 1, bases_ast
bases.append(bases_ast[0])
assert next_token.token_type == tokenize.SYNTAX, next_token
if next_token.name == '{':
token = next_token
break
# Support multiple inheritance.
assert next_token.name == ',', next_token
return bases, token
def _GetClass(self, class_type, visibility, templated_types):
class_name = None
class_token = self._GetNextToken()
if class_token.token_type != tokenize.NAME:
assert class_token.token_type == tokenize.SYNTAX, class_token
token = class_token
else: else:
# Skip any macro (e.g. storage class specifiers) after the # TODO(nnorwitz): store that we got virtual for this base.
# 'class' keyword. pass
next_token = self._GetNextToken() base, next_token = self.GetName()
if next_token.token_type == tokenize.NAME: bases_ast = self.converter.ToType(base)
self._AddBackToken(next_token) assert len(bases_ast) == 1, bases_ast
else: bases.append(bases_ast[0])
self._AddBackTokens([class_token, next_token]) assert next_token.token_type == tokenize.SYNTAX, next_token
name_tokens, token = self.GetName() if next_token.name == '{':
class_name = ''.join([t.name for t in name_tokens]) token = next_token
bases = None break
if token.token_type == tokenize.SYNTAX: # Support multiple inheritance.
if token.name == ';': assert next_token.name == ',', next_token
# Forward declaration. return bases, token
return class_type(class_token.start, class_token.end,
class_name, None, templated_types, None, def _GetClass(self, class_type, visibility, templated_types):
self.namespace_stack) class_name = None
if token.name in '*&': class_token = self._GetNextToken()
# Inline forward declaration. Could be method or data. if class_token.token_type != tokenize.NAME:
name_token = self._GetNextToken() assert class_token.token_type == tokenize.SYNTAX, class_token
next_token = self._GetNextToken() token = class_token
if next_token.name == ';': else:
# Handle data # Skip any macro (e.g. storage class specifiers) after the
modifiers = ['class'] # 'class' keyword.
return self._CreateVariable(class_token, name_token.name, next_token = self._GetNextToken()
class_name, if next_token.token_type == tokenize.NAME:
modifiers, token.name, None) self._AddBackToken(next_token)
else: else:
# Assume this is a method. self._AddBackTokens([class_token, next_token])
tokens = (class_token, token, name_token, next_token) name_tokens, token = self.GetName()
self._AddBackTokens(tokens) class_name = ''.join([t.name for t in name_tokens])
return self.GetMethod(FUNCTION_NONE, None) bases = None
if token.name == ':': if token.token_type == tokenize.SYNTAX:
bases, token = self._GetBases() if token.name == ';':
# Forward declaration.
body = None return class_type(class_token.start, class_token.end,
if token.token_type == tokenize.SYNTAX and token.name == '{': class_name, None, templated_types, None,
assert token.token_type == tokenize.SYNTAX, token self.namespace_stack)
assert token.name == '{', token if token.name in '*&':
# Inline forward declaration. Could be method or data.
ast = AstBuilder(self.GetScope(), self.filename, class_name, name_token = self._GetNextToken()
visibility, self.namespace_stack) next_token = self._GetNextToken()
body = list(ast.Generate()) if next_token.name == ';':
# Handle data
if not self._handling_typedef: modifiers = ['class']
token = self._GetNextToken() return self._CreateVariable(class_token, name_token.name,
if token.token_type != tokenize.NAME: class_name,
assert token.token_type == tokenize.SYNTAX, token modifiers, token.name, None)
assert token.name == ';', token
else:
new_class = class_type(class_token.start, class_token.end,
class_name, bases, None,
body, self.namespace_stack)
modifiers = []
return self._CreateVariable(class_token,
token.name, new_class,
modifiers, token.name, None)
else: else:
if not self._handling_typedef: # Assume this is a method.
self.HandleError('non-typedef token', token) tokens = (class_token, token, name_token, next_token)
self._AddBackToken(token) self._AddBackTokens(tokens)
return self.GetMethod(FUNCTION_NONE, None)
return class_type(class_token.start, class_token.end, class_name, if token.name == ':':
bases, templated_types, body, self.namespace_stack) bases, token = self._GetBases()
body = None
if token.token_type == tokenize.SYNTAX and token.name == '{':
assert token.token_type == tokenize.SYNTAX, token
assert token.name == '{', token
ast = AstBuilder(self.GetScope(), self.filename, class_name,
visibility, self.namespace_stack)
body = list(ast.Generate())
def handle_namespace(self): if not self._handling_typedef:
token = self._GetNextToken() token = self._GetNextToken()
# Support anonymous namespaces. if token.token_type != tokenize.NAME:
name = None assert token.token_type == tokenize.SYNTAX, token
if token.token_type == tokenize.NAME: assert token.name == ';', token
name = token.name
token = self._GetNextToken()
self.namespace_stack.append(name)
assert token.token_type == tokenize.SYNTAX, token
# Create an internal token that denotes when the namespace is complete.
internal_token = tokenize.Token(_INTERNAL_TOKEN, _NAMESPACE_POP,
None, None)
internal_token.whence = token.whence
if token.name == '=':
# TODO(nnorwitz): handle aliasing namespaces.
name, next_token = self.GetName()
assert next_token.name == ';', next_token
self._AddBackToken(internal_token)
else: else:
assert token.name == '{', token new_class = class_type(class_token.start, class_token.end,
tokens = list(self.GetScope()) class_name, bases, None,
# Replace the trailing } with the internal namespace pop token. body, self.namespace_stack)
tokens[-1] = internal_token
# Handle namespace with nothing in it. modifiers = []
self._AddBackTokens(tokens) return self._CreateVariable(class_token,
return None token.name, new_class,
modifiers, token.name, None)
def handle_using(self): else:
tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';') if not self._handling_typedef:
assert tokens self.HandleError('non-typedef token', token)
return Using(tokens[0].start, tokens[0].end, tokens) self._AddBackToken(token)
def handle_explicit(self): return class_type(class_token.start, class_token.end, class_name,
assert self.in_class bases, templated_types, body, self.namespace_stack)
# Nothing much to do.
# TODO(nnorwitz): maybe verify the method name == class name. def handle_namespace(self):
# This must be a ctor. # Support anonymous namespaces.
return self.GetMethod(FUNCTION_CTOR, None) name = None
name_tokens, token = self.GetName()
if name_tokens:
name = ''.join([t.name for t in name_tokens])
self.namespace_stack.append(name)
assert token.token_type == tokenize.SYNTAX, token
# Create an internal token that denotes when the namespace is complete.
internal_token = tokenize.Token(_INTERNAL_TOKEN, _NAMESPACE_POP,
None, None)
internal_token.whence = token.whence
if token.name == '=':
# TODO(nnorwitz): handle aliasing namespaces.
name, next_token = self.GetName()
assert next_token.name == ';', next_token
self._AddBackToken(internal_token)
else:
assert token.name == '{', token
tokens = list(self.GetScope())
# Replace the trailing } with the internal namespace pop token.
tokens[-1] = internal_token
# Handle namespace with nothing in it.
self._AddBackTokens(tokens)
return None
def handle_using(self):
tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
assert tokens
return Using(tokens[0].start, tokens[0].end, tokens)
def handle_explicit(self):
assert self.in_class
# Nothing much to do.
# TODO(nnorwitz): maybe verify the method name == class name.
# This must be a ctor.
return self.GetMethod(FUNCTION_CTOR, None)
def handle_this(self): def handle_this(self):
pass # Nothing to do. pass # Nothing to do.
def handle_operator(self): def handle_operator(self):
# Pull off the next token(s?) and make that part of the method name. # Pull off the next token(s?) and make that part of the method name.
pass pass
def handle_sizeof(self): def handle_sizeof(self):
pass pass
def handle_case(self): def handle_case(self):
pass pass
def handle_switch(self): def handle_switch(self):
pass pass
def handle_default(self): def handle_default(self):
token = self._GetNextToken() token = self._GetNextToken()
assert token.token_type == tokenize.SYNTAX assert token.token_type == tokenize.SYNTAX
assert token.name == ':' assert token.name == ':'
def handle_if(self): def handle_if(self):
pass pass
def handle_else(self): def handle_else(self):
pass pass
def handle_return(self): def handle_return(self):
tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';') tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
if not tokens: if not tokens:
return Return(self.current_token.start, self.current_token.end, None return Return(self.current_token.start, self.current_token.end, None)
) return Return(tokens[0].start, tokens[0].end, tokens)
return Return(tokens[0].start, tokens[0].end, tokens)
def handle_goto(self):
def handle_goto(self): tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';') assert len(tokens) == 1, str(tokens)
assert len(tokens) == 1, str(tokens) return Goto(tokens[0].start, tokens[0].end, tokens[0].name)
return Goto(tokens[0].start, tokens[0].end, tokens[0].name)
def handle_try(self):
def handle_try(self): pass # Not needed yet.
pass # Not needed yet.
def handle_catch(self): def handle_catch(self):
pass # Not needed yet. pass # Not needed yet.
def handle_throw(self): def handle_throw(self):
pass # Not needed yet. pass # Not needed yet.
def handle_while(self): def handle_while(self):
pass pass
def handle_do(self): def handle_do(self):
pass pass
def handle_for(self): def handle_for(self):
pass pass
def handle_break(self): def handle_break(self):
self._IgnoreUpTo(tokenize.SYNTAX, ';') self._IgnoreUpTo(tokenize.SYNTAX, ';')
def handle_continue(self): def handle_continue(self):
self._IgnoreUpTo(tokenize.SYNTAX, ';') self._IgnoreUpTo(tokenize.SYNTAX, ';')
def BuilderFromSource(source, filename): def BuilderFromSource(source, filename):
"""Utility method that returns an AstBuilder from source code. """Utility method that returns an AstBuilder from source code.
Args: Args:
source: 'C++ source code' source: 'C++ source code'
filename: 'file1' filename: 'file1'
Returns: Returns:
AstBuilder AstBuilder
""" """
return AstBuilder(tokenize.GetTokens(source), filename) return AstBuilder(tokenize.GetTokens(source), filename)
def PrintIndentifiers(filename, should_print): def PrintIndentifiers(filename, should_print):
"""Prints all identifiers for a C++ source file. """Prints all identifiers for a C++ source file.
Args: Args:
filename: 'file1' filename: 'file1'
should_print: predicate with signature: bool Function(token) should_print: predicate with signature: bool Function(token)
""" """
source = utils.ReadFile(filename, False) source = utils.ReadFile(filename, False)
if source is None: if source is None:
sys.stderr.write('Unable to find: %s\n' % filename) sys.stderr.write('Unable to find: %s\n' % filename)
return return
#print('Processing %s' % actual_filename) #print('Processing %s' % actual_filename)
builder = BuilderFromSource(source, filename) builder = BuilderFromSource(source, filename)
try: try:
for node in builder.Generate(): for node in builder.Generate():
if should_print(node): if should_print(node):
print(node.name) print(node.name)
except KeyboardInterrupt: except KeyboardInterrupt:
return return
except: except:
pass pass
def PrintAllIndentifiers(filenames, should_print): def PrintAllIndentifiers(filenames, should_print):
"""Prints all identifiers for each C++ source file in filenames. """Prints all identifiers for each C++ source file in filenames.
Args: Args:
filenames: ['file1', 'file2', ...] filenames: ['file1', 'file2', ...]
should_print: predicate with signature: bool Function(token) should_print: predicate with signature: bool Function(token)
""" """
for path in filenames: for path in filenames:
PrintIndentifiers(path, should_print) PrintIndentifiers(path, should_print)
def main(argv): def main(argv):
for filename in argv[1:]: for filename in argv[1:]:
source = utils.ReadFile(filename) source = utils.ReadFile(filename)
if source is None: if source is None:
continue continue
print('Processing %s' % filename) print('Processing %s' % filename)
builder = BuilderFromSource(source, filename) builder = BuilderFromSource(source, filename)
try: try:
entire_ast = filter(None, builder.Generate()) entire_ast = filter(None, builder.Generate())
except KeyboardInterrupt: except KeyboardInterrupt:
return return
except: except:
# Already printed a warning, print the traceback and continue. # Already printed a warning, print the traceback and continue.
traceback.print_exc() traceback.print_exc()
else: else:
if utils.DEBUG: if utils.DEBUG:
for ast in entire_ast: for ast in entire_ast:
print(ast) print(ast)
if __name__ == '__main__': if __name__ == '__main__':
main(sys.argv) main(sys.argv)
 End of changes. 117 change blocks. 
1455 lines changed or deleted 1488 lines changed or added

Home  |  About  |  Features  |  All  |  Newest  |  Dox  |  Diffs  |  RSS Feeds  |  Screenshots  |  Comments  |  Imprint  |  Privacy  |  HTTP(S)