cadquery-freecad-module/CadQuery/Libs/jedi/parser/fast.py

467 lines
16 KiB
Python

"""
Basically a parser that is faster, because it tries to parse only parts and if
anything changes, it only reparses the changed parts. But because it's not
finished (and still not working as I want), I won't document it any further.
"""
import re
from jedi._compatibility import use_metaclass, unicode
from jedi import settings
from jedi import common
from jedi.parser import Parser
from jedi.parser import representation as pr
from jedi.parser import tokenize
from jedi import cache
from jedi.parser.tokenize import (source_tokens, Token, FLOWS, NEWLINE,
COMMENT, ENDMARKER)
class Module(pr.Module, pr.Simple):
def __init__(self, parsers):
super(Module, self).__init__(self, (1, 0))
self.parsers = parsers
self.reset_caches()
self.start_pos = 1, 0
self.end_pos = None, None
def reset_caches(self):
""" This module does a whole lot of caching, because it uses different
parsers. """
with common.ignored(AttributeError):
del self._used_names
def __getattr__(self, name):
if name.startswith('__'):
raise AttributeError('Not available!')
else:
return getattr(self.parsers[0].module, name)
@property
@cache.underscore_memoization
def used_names(self):
used_names = {}
for p in self.parsers:
for k, statement_set in p.module.used_names.items():
if k in used_names:
used_names[k] |= statement_set
else:
used_names[k] = set(statement_set)
return used_names
def __repr__(self):
return "<fast.%s: %s@%s-%s>" % (type(self).__name__, self.name,
self.start_pos[0], self.end_pos[0])
class CachedFastParser(type):
""" This is a metaclass for caching `FastParser`. """
def __call__(self, source, module_path=None):
if not settings.fast_parser:
return Parser(source, module_path)
pi = cache.parser_cache.get(module_path, None)
if pi is None or isinstance(pi.parser, Parser):
p = super(CachedFastParser, self).__call__(source, module_path)
else:
p = pi.parser # pi is a `cache.ParserCacheItem`
p.update(source)
return p
class ParserNode(object):
def __init__(self, parser, code, parent=None):
self.parent = parent
self.children = []
# must be created before new things are added to it.
self.save_contents(parser, code)
def save_contents(self, parser, code):
self.code = code
self.hash = hash(code)
self.parser = parser
try:
# with fast_parser we have either 1 subscope or only statements.
self.content_scope = parser.module.subscopes[0]
except IndexError:
self.content_scope = parser.module
scope = self.content_scope
self._contents = {}
for c in pr.SCOPE_CONTENTS:
self._contents[c] = list(getattr(scope, c))
self._is_generator = scope.is_generator
self.old_children = self.children
self.children = []
def reset_contents(self):
scope = self.content_scope
for key, c in self._contents.items():
setattr(scope, key, list(c))
scope.is_generator = self._is_generator
if self.parent is None:
# Global vars of the first one can be deleted, in the global scope
# they make no sense.
self.parser.module.global_vars = []
for c in self.children:
c.reset_contents()
def parent_until_indent(self, indent=None):
if indent is None or self.indent >= indent and self.parent:
self.old_children = []
if self.parent is not None:
return self.parent.parent_until_indent(indent)
return self
@property
def indent(self):
if not self.parent:
return 0
module = self.parser.module
try:
el = module.subscopes[0]
except IndexError:
try:
el = module.statements[0]
except IndexError:
try:
el = module.imports[0]
except IndexError:
try:
el = [r for r in module.returns if r is not None][0]
except IndexError:
return self.parent.indent + 1
return el.start_pos[1]
def _set_items(self, parser, set_parent=False):
# insert parser objects into current structure
scope = self.content_scope
for c in pr.SCOPE_CONTENTS:
content = getattr(scope, c)
items = getattr(parser.module, c)
if set_parent:
for i in items:
if i is None:
continue # happens with empty returns
i.parent = scope.use_as_parent
if isinstance(i, (pr.Function, pr.Class)):
for d in i.decorators:
d.parent = scope.use_as_parent
content += items
# global_vars
cur = self
while cur.parent is not None:
cur = cur.parent
cur.parser.module.global_vars += parser.module.global_vars
scope.is_generator |= parser.module.is_generator
def add_node(self, node, set_parent=False):
"""Adding a node means adding a node that was already added earlier"""
self.children.append(node)
self._set_items(node.parser, set_parent=set_parent)
node.old_children = node.children # TODO potential memory leak?
node.children = []
scope = self.content_scope
while scope is not None:
#print('x',scope)
if not isinstance(scope, pr.SubModule):
# TODO This seems like a strange thing. Check again.
scope.end_pos = node.content_scope.end_pos
scope = scope.parent
return node
def add_parser(self, parser, code):
return self.add_node(ParserNode(parser, code, self), True)
class FastParser(use_metaclass(CachedFastParser)):
_keyword_re = re.compile('^[ \t]*(def|class|@|%s)' % '|'.join(tokenize.FLOWS))
def __init__(self, code, module_path=None):
# set values like `pr.Module`.
self.module_path = module_path
self.current_node = None
self.parsers = []
self.module = Module(self.parsers)
self.reset_caches()
try:
self._parse(code)
except:
# FastParser is cached, be careful with exceptions
del self.parsers[:]
raise
def update(self, code):
self.reset_caches()
try:
self._parse(code)
except:
# FastParser is cached, be careful with exceptions
del self.parsers[:]
raise
def _split_parts(self, code):
"""
Split the code into different parts. This makes it possible to parse
each part seperately and therefore cache parts of the file and not
everything.
"""
def gen_part():
text = '\n'.join(current_lines)
del current_lines[:]
return text
# Split only new lines. Distinction between \r\n is the tokenizer's
# job.
self._lines = code.split('\n')
current_lines = []
is_decorator = False
current_indent = 0
old_indent = 0
new_indent = False
in_flow = False
# All things within flows are simply being ignored.
for l in self._lines:
# check for dedents
s = l.lstrip('\t ')
indent = len(l) - len(s)
if not s or s[0] in ('#', '\r'):
current_lines.append(l) # just ignore comments and blank lines
continue
if indent < current_indent: # -> dedent
current_indent = indent
new_indent = False
if not in_flow or indent < old_indent:
if current_lines:
yield gen_part()
in_flow = False
elif new_indent:
current_indent = indent
new_indent = False
# Check lines for functions/classes and split the code there.
if not in_flow:
m = self._keyword_re.match(l)
if m:
in_flow = m.group(1) in tokenize.FLOWS
if not is_decorator and not in_flow:
if current_lines:
yield gen_part()
is_decorator = '@' == m.group(1)
if not is_decorator:
old_indent = current_indent
current_indent += 1 # it must be higher
new_indent = True
elif is_decorator:
is_decorator = False
current_lines.append(l)
if current_lines:
yield gen_part()
def _parse(self, code):
""" :type code: str """
def empty_parser():
new, temp = self._get_parser(unicode(''), unicode(''), 0, [], False)
return new
del self.parsers[:]
line_offset = 0
start = 0
p = None
is_first = True
for code_part in self._split_parts(code):
if is_first or line_offset >= p.module.end_pos[0]:
indent = len(code_part) - len(code_part.lstrip('\t '))
if is_first and self.current_node is not None:
nodes = [self.current_node]
else:
nodes = []
if self.current_node is not None:
self.current_node = \
self.current_node.parent_until_indent(indent)
nodes += self.current_node.old_children
# check if code_part has already been parsed
# print '#'*45,line_offset, p and p.module.end_pos, '\n', code_part
p, node = self._get_parser(code_part, code[start:],
line_offset, nodes, not is_first)
# The actual used code_part is different from the given code
# part, because of docstrings for example there's a chance that
# splits are wrong.
used_lines = self._lines[line_offset:p.module.end_pos[0]]
code_part_actually_used = '\n'.join(used_lines)
if is_first and p.module.subscopes:
# special case, we cannot use a function subscope as a
# base scope, subscopes would save all the other contents
new = empty_parser()
if self.current_node is None:
self.current_node = ParserNode(new, '')
else:
self.current_node.save_contents(new, '')
self.parsers.append(new)
is_first = False
if is_first:
if self.current_node is None:
self.current_node = ParserNode(p, code_part_actually_used)
else:
self.current_node.save_contents(p, code_part_actually_used)
else:
if node is None:
self.current_node = \
self.current_node.add_parser(p, code_part_actually_used)
else:
self.current_node = self.current_node.add_node(node)
self.parsers.append(p)
is_first = False
#else:
#print '#'*45, line_offset, p.module.end_pos, 'theheck\n', repr(code_part)
line_offset += code_part.count('\n') + 1
start += len(code_part) + 1 # +1 for newline
if self.parsers:
self.current_node = self.current_node.parent_until_indent()
else:
self.parsers.append(empty_parser())
self.module.end_pos = self.parsers[-1].module.end_pos
# print(self.parsers[0].module.get_code())
def _get_parser(self, code, parser_code, line_offset, nodes, no_docstr):
h = hash(code)
for index, node in enumerate(nodes):
if node.hash != h or node.code != code:
continue
if node != self.current_node:
offset = int(nodes[0] == self.current_node)
self.current_node.old_children.pop(index - offset)
p = node.parser
m = p.module
m.line_offset += line_offset + 1 - m.start_pos[0]
break
else:
tokenizer = FastTokenizer(parser_code, line_offset)
p = Parser(parser_code, self.module_path, tokenizer=tokenizer,
top_module=self.module, no_docstr=no_docstr)
p.module.parent = self.module
node = None
return p, node
def reset_caches(self):
self.module.reset_caches()
if self.current_node is not None:
self.current_node.reset_contents()
class FastTokenizer(object):
"""
Breaks when certain conditions are met, i.e. a new function or class opens.
"""
def __init__(self, source, line_offset=0):
self.source = source
self.gen = source_tokens(source, line_offset)
self.closed = False
# fast parser options
self.current = self.previous = Token(None, '', (0, 0))
self.in_flow = False
self.new_indent = False
self.parser_indent = self.old_parser_indent = 0
self.is_decorator = False
self.first_stmt = True
self.parentheses_level = 0
def next(self):
""" Python 2 Compatibility """
return self.__next__()
def __next__(self):
if self.closed:
raise common.MultiLevelStopIteration()
current = next(self.gen)
tok_type = current.type
tok_str = current.string
if tok_type == ENDMARKER:
raise common.MultiLevelStopIteration()
self.previous = self.current
self.current = current
# this is exactly the same check as in fast_parser, but this time with
# tokenize and therefore precise.
breaks = ['def', 'class', '@']
def close():
if not self.first_stmt:
self.closed = True
raise common.MultiLevelStopIteration()
# Ignore comments/newlines, irrelevant for indentation.
if self.previous.type in (None, NEWLINE) \
and tok_type not in (COMMENT, NEWLINE):
# print c, tok_name[c[0]]
indent = current.start_pos[1]
if self.parentheses_level:
# parentheses ignore the indentation rules.
pass
elif indent < self.parser_indent: # -> dedent
self.parser_indent = indent
self.new_indent = False
if not self.in_flow or indent < self.old_parser_indent:
close()
self.in_flow = False
elif self.new_indent:
self.parser_indent = indent
self.new_indent = False
if not self.in_flow:
if tok_str in FLOWS or tok_str in breaks:
self.in_flow = tok_str in FLOWS
if not self.is_decorator and not self.in_flow:
close()
self.is_decorator = '@' == tok_str
if not self.is_decorator:
self.old_parser_indent = self.parser_indent
self.parser_indent += 1 # new scope: must be higher
self.new_indent = True
if tok_str != '@':
if self.first_stmt and not self.new_indent:
self.parser_indent = indent
self.first_stmt = False
# Ignore closing parentheses, because they are all
# irrelevant for the indentation.
if tok_str in '([{':
self.parentheses_level += 1
elif tok_str in ')]}':
self.parentheses_level = max(self.parentheses_level - 1, 0)
return current