-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Expand file tree
/
Copy pathtraverser.py
More file actions
132 lines (118 loc) · 5.02 KB
/
traverser.py
File metadata and controls
132 lines (118 loc) · 5.02 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
'''The traverser is the front-end of the Python extractor. It walks the file system yielding
a sequence of modules to be queued up and processed by the back-end.'''
import re
import os.path
from semmle.path_filters import filter_from_pattern
from semmle.util import Extractable, PY_EXTENSIONS, isdir, islink, listdir
from semmle.python import finder, modules
from semmle.worker import ExtractorFailure
try:
FileNotFoundError
except NameError:
FileNotFoundError = IOError
__all__ = [ 'Traverser' ]
class Traverser(object):
'''Default iterable of extractables for the Python extractor,
as specified by the command line options and environment variables.
'''
def __init__(self, options, modulenames, logger):
self.paths = set()
if options.files:
py_files = options.files
for p in py_files:
if not os.path.exists(p) and not options.ignore_missing_modules:
raise FileNotFoundError("'%s' does not exist." % p)
self.paths.add(p)
self.exclude_paths = set([ os.path.abspath(f) for f in options.exclude_file ])
self.exclude = exclude_filter_from_options(options)
self.filter = filter_from_options_and_environment(options)
self.recurse_files = options.recurse_files
self.recurse_packages = options.recursive
self.modulenames = modulenames
self.finder = finder.Finder.from_options_and_env(options, logger)
self.logger = logger
self.ignore_missing_modules = options.ignore_missing_modules
def __iter__(self):
'''Return an iterator over all the specified files'''
for name in self.modulenames:
if not self.exclude(name):
mod = self.finder.find(name)
if mod is None:
self.logger.error("No module named '%s'.", name)
raise ExtractorFailure()
yield mod.get_extractable()
for path in self.paths:
yield Extractable.from_path(path)
for path in self.recurse_files:
for modpath in self._treewalk(path):
yield Extractable.from_path(modpath)
for name in self.recurse_packages:
mod = self.finder.find(name)
if mod is None:
if self.ignore_missing_modules:
continue
self.logger.error("Package '%s' does not exist.", name)
raise ExtractorFailure()
path = mod.path
if path is None:
self.logger.error("Package '%s' does not have a path.", name)
raise ExtractorFailure()
for modpath in self._treewalk(path):
yield Extractable.from_path(modpath)
def _treewalk(self, path):
'''Recursively walk the directory tree, skipping sym-links and
hidden files and directories.'''
#Note that if a path is both explicitly specified *and* specifically excluded,
#then the inclusion takes priority
path = os.path.abspath(path)
self.logger.debug("Traversing %s", path)
filenames = listdir(path)
for filename in filenames:
fullpath = os.path.join(path, filename)
if islink(fullpath):
self.logger.debug("Ignoring %s (symlink)", fullpath)
continue
if isdir(fullpath):
if fullpath in self.exclude_paths:
self.logger.debug("Ignoring %s (excluded)", fullpath)
continue
empty = True
for item in self._treewalk(fullpath):
yield item
empty = False
if not empty:
yield fullpath
elif self.filter(fullpath):
yield fullpath
else:
self.logger.debug("Ignoring %s (filter)", fullpath)
def exclude_filter_from_options(options):
if options.exclude_package:
choices = '|'.join(mod.replace('.', r'\.') for mod in options.exclude_package)
pattern = r'(?:%s)(?:\..+)?' % choices
if options.exclude_pattern:
pattern = '^((?:%s)|(?:%s))$' % (pattern, options.exclude_pattern)
else:
pattern = '^%s$' % pattern
elif options.exclude_pattern:
pattern = '^(?:%s)$' % options.exclude_pattern
else:
def no_filter(name):
return False
return no_filter
matcher = re.compile(pattern)
def exclude_filter(name):
return name is not None and bool(matcher.match(name))
return exclude_filter
def base_filter(path):
_, ext = os.path.splitext(path)
return ext in PY_EXTENSIONS or not ext and modules.is_script(path)
def filter_from_options_and_environment(options):
the_filter = base_filter
filter_prefix = ""
src_path = os.environ.get("LGTM_SRC", None)
if src_path is not None:
filter_prefix = os.path.join(src_path, "")
for line in options.path_filter:
the_filter = filter_from_pattern(line, the_filter, filter_prefix)
return the_filter