-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Expand file tree
/
Copy pathFiles.qll
More file actions
370 lines (318 loc) · 12.2 KB
/
Files.qll
File metadata and controls
370 lines (318 loc) · 12.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
/** Provides classes for working with files and folders. */
import python
private import codeql.util.FileSystem
private module Input implements InputSig {
abstract class ContainerBase extends @container {
abstract string getAbsolutePath();
ContainerBase getParentContainer() { containerparent(result, this) }
string toString() { result = this.getAbsolutePath() }
}
class FolderBase extends ContainerBase, @folder {
override string getAbsolutePath() { folders(this, result) }
}
class FileBase extends ContainerBase, @file {
override string getAbsolutePath() { files(this, result) }
}
predicate hasSourceLocationPrefix = sourceLocationPrefix/1;
}
private module Impl = Make<Input>;
/** A file */
class File extends Container, Impl::File {
/**
* Holds if this element is at the specified location.
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
this.getAbsolutePath() = filepath and
startline = 0 and
startcolumn = 0 and
endline = 0 and
endcolumn = 0
}
/** Whether this file is a source code file. */
predicate fromSource() {
/* If we start to analyze .pyc files, then this will have to change. */
any()
}
/** Gets a short name for this file (just the file name) */
string getShortName() { result = this.getBaseName() }
private int lastLine() {
result = max(int i | exists(Location l | l.getFile() = this and l.getEndLine() = i))
}
/** Whether line n is empty (it contains neither code nor comment). */
predicate emptyLine(int n) {
n in [0 .. this.lastLine()] and
not occupied_line(this, n)
}
string getSpecifiedEncoding() {
exists(Comment c, Location l | l = c.getLocation() and l.getFile() = this |
l.getStartLine() < 3 and
result = c.getText().regexpCapture(".*coding[:=]\\s*([-\\w.]+).*", 1)
)
}
override Container getImportRoot(int n) {
/* File stem must be a legal Python identifier */
this.getStem().regexpMatch("[^\\d\\W]\\w*") and
result = this.getParent().getImportRoot(n)
}
/**
* Gets the contents of this file as a string.
* This will only work for those non-python files that
* are specified to be extracted.
*/
string getContents() { file_contents(this, result) }
/** Holds if this file is likely to get executed directly, and thus act as an entry point for execution. */
predicate isPossibleEntryPoint() {
// Only consider files in the source code, and not things like the standard library
exists(this.getRelativePath()) and
(
// The file doesn't have the extension `.py` but still contains Python statements
not this.getExtension().matches("py%") and
exists(Stmt s | s.getLocation().getFile() = this)
or
// The file contains the usual `if __name__ == '__main__':` construction
exists(If i, Name name, StringLiteral main, Cmpop op |
i.getScope().(Module).getFile() = this and
op instanceof Eq and
i.getTest().(Compare).compares(name, op, main) and
name.getId() = "__name__" and
main.getText() = "__main__"
) and
// Exclude files named `__main__.py`. These are often _not_ meant to be run directly, but
// contain this construct anyway.
//
// Their presence in a package (say, `foo`) means one can execute the package directly using
// `python -m foo` (which will run the `foo/__main__.py` file). Since being an entry point for
// execution means treating imports as absolute, this causes trouble, since when run with
// `python -m`, the interpreter uses the usual package semantics.
not this.getShortName() = "__main__.py"
or
// The file contains a `#!` line referencing the python interpreter
exists(Comment c |
c.getLocation().getFile() = this and
c.getLocation().getStartLine() = 1 and
c.getText().regexpMatch("^#! */.*python(2|3)?[ \\\\t]*$")
)
)
}
}
private predicate occupied_line(File f, int n) {
exists(Location l | l.getFile() = f |
l.getStartLine() = n
or
exists(StringLiteral s | s.getLocation() = l | n in [l.getStartLine() .. l.getEndLine()])
)
}
/** A folder (directory) */
class Folder extends Container, Impl::Folder {
/**
* Holds if this element is at the specified location.
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
this.getAbsolutePath() = filepath and
startline = 0 and
startcolumn = 0 and
endline = 0 and
endcolumn = 0
}
override Container getImportRoot(int n) {
this.isImportRoot(n) and result = this
or
/* Folder must be a legal Python identifier */
this.getBaseName().regexpMatch("[^\\d\\W]\\w*") and
result = this.getParent().getImportRoot(n)
}
}
/**
* A container is an abstract representation of a file system object that can
* hold elements of interest.
*/
class Container extends Impl::Container {
Container getParent() { result = this.getParentContainer() }
/** Whether this file or folder is part of the standard library */
predicate inStdlib() { this.inStdlib(_, _) }
/**
* Whether this file or folder is part of the standard library
* for version `major.minor`
*/
predicate inStdlib(int major, int minor) {
exists(Module m |
m.getPath() = this and
m.inStdLib(major, minor)
)
}
override Container getParentContainer() { result = super.getParentContainer() }
Container getChildContainer(string baseName) {
result = this.getAChildContainer() and
result.getBaseName() = baseName
}
/** Holds if this folder is on the import path. */
predicate isImportRoot() { this.isImportRoot(_) }
/**
* Holds if this folder is on the import path, at index `n` in the list of
* paths. The list of paths is composed of the paths passed to the extractor and
* `sys.path`.
*/
predicate isImportRoot(int n) { this.getAbsolutePath() = import_path_element(n) }
/** Holds if this folder is the root folder for the standard library. */
predicate isStdLibRoot(int major, int minor) {
major = major_version() and
minor = minor_version() and
this.isStdLibRoot()
}
/** Holds if this folder is the root folder for the standard library. */
predicate isStdLibRoot() {
/*
* Look for a standard lib module and find its import path
* We use `os` as it is the most likely to be imported and
* `tty` because it is small for testing.
*/
exists(Module m | m.getName() = "os" or m.getName() = "tty" |
m.getFile().getImportRoot() = this
)
}
/** Gets the path element from which this container would be loaded. */
Container getImportRoot() {
exists(int n |
result = this.getImportRoot(n) and
not exists(int m |
exists(this.getImportRoot(m)) and
m < n
)
)
}
/** Gets the path element from which this container would be loaded, given the index into the list of possible paths `n`. */
abstract Container getImportRoot(int n);
}
private string import_path_element(int n) {
exists(string path, string pathsep, int k |
path = get_path("extractor.path") and k = 0
or
path = get_path("sys.path") and k = count(get_path("extractor.path").splitAt(pathsep))
|
py_flags_versioned("os.pathsep", pathsep, _) and
result = path.splitAt(pathsep, n - k).replaceAll("\\", "/")
)
}
private string get_path(string name) { py_flags_versioned(name, result, _) }
class Location extends @location {
/** Gets the file for this location */
File getFile() { result = this.getPath() }
private Container getPath() {
locations_default(this, result, _, _, _, _)
or
exists(Module m | locations_ast(this, m, _, _, _, _) | result = m.getPath())
}
/** Gets the 1-based line number (inclusive) where this location starts. */
int getStartLine() {
locations_default(this, _, result, _, _, _) or
locations_ast(this, _, result, _, _, _)
}
/** Gets the 1-based column number (inclusive) where this location starts. */
int getStartColumn() {
locations_default(this, _, _, result, _, _) or
locations_ast(this, _, _, result, _, _)
}
/** Gets the 1-based line number (inclusive) where this location ends. */
int getEndLine() {
locations_default(this, _, _, _, result, _) or
locations_ast(this, _, _, _, result, _)
}
/** Gets the 1-based column number (inclusive) where this location ends. */
int getEndColumn() {
locations_default(this, _, _, _, _, result) or
locations_ast(this, _, _, _, _, result)
}
/** Gets a textual representation of this element. */
string toString() {
result = this.getPath().getAbsolutePath() + ":" + this.getStartLine().toString()
}
/**
* Holds if this element is at the specified location.
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
exists(File f | f.getAbsolutePath() = filepath |
locations_default(this, f, startline, startcolumn, endline, endcolumn)
or
exists(Module m | m.getFile() = f |
locations_ast(this, m, startline, startcolumn, endline, endcolumn)
)
)
or
// Packages have no suitable filepath, so we use just the path instead.
exists(Module m | not exists(m.getFile()) |
filepath = m.getPath().getAbsolutePath() and
locations_ast(this, m, startline, startcolumn, endline, endcolumn)
)
}
}
/** A non-empty line in the source code */
class Line extends @py_line {
/**
* Holds if this element is at the specified location.
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
exists(Module m |
m.getFile().getAbsolutePath() = filepath and
endline = startline and
startcolumn = 1 and
py_line_lengths(this, m, startline, endcolumn)
)
}
/** Gets a textual representation of this element. */
string toString() {
exists(Module m | py_line_lengths(this, m, _, _) |
result = m.getFile().getShortName() + ":" + this.getLineNumber().toString()
)
}
/** Gets the line number of this line */
int getLineNumber() { py_line_lengths(this, _, result, _) }
/** Gets the length of this line */
int getLength() { py_line_lengths(this, _, _, result) }
/** Gets the file for this line */
Module getModule() { py_line_lengths(this, result, _, _) }
}
/**
* A syntax error. Note that if there is a syntax error in a module,
* much information about that module will be lost
*/
class SyntaxError extends Location {
SyntaxError() { py_syntax_error_versioned(this, _, major_version().toString()) }
override string toString() { result = "Syntax Error" }
/** Gets the message corresponding to this syntax error */
string getMessage() { py_syntax_error_versioned(this, result, major_version().toString()) }
}
/**
* An encoding error. Note that if there is an encoding error in a module,
* much information about that module will be lost
*/
class EncodingError extends SyntaxError {
EncodingError() {
/* Leave spaces around 'decode' in unlikely event it occurs as a name in a syntax error */
this.getMessage().toLowerCase().matches("% decode %")
}
override string toString() { result = "Encoding Error" }
}