Skip to content

Commit 75cfec8

Browse files
authored
Merge pull request github#4828 from yoff/yoff-python-add-source-nodes
Python: add source nodes
2 parents e87fd86 + 8ceb33d commit 75cfec8

File tree

13 files changed

+110
-58
lines changed

13 files changed

+110
-58
lines changed

python/ql/src/semmle/python/Concepts.qll

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -338,7 +338,7 @@ module HTTP {
338338
/** Gets the URL pattern for this route, if it can be statically determined. */
339339
string getUrlPattern() {
340340
exists(StrConst str |
341-
DataFlow::localFlow(DataFlow::exprNode(str), this.getUrlPatternArg()) and
341+
DataFlow::exprNode(str).(DataFlow::LocalSourceNode).flowsTo(this.getUrlPatternArg()) and
342342
result = str.getText()
343343
)
344344
}
@@ -405,7 +405,9 @@ module HTTP {
405405
/** Gets the mimetype of this HTTP response, if it can be statically determined. */
406406
string getMimetype() {
407407
exists(StrConst str |
408-
DataFlow::localFlow(DataFlow::exprNode(str), this.getMimetypeOrContentTypeArg()) and
408+
DataFlow::exprNode(str)
409+
.(DataFlow::LocalSourceNode)
410+
.flowsTo(this.getMimetypeOrContentTypeArg()) and
409411
result = str.getText().splitAt(";", 0)
410412
)
411413
or

python/ql/src/semmle/python/Exprs.qll

Lines changed: 29 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -584,18 +584,40 @@ class Slice extends Slice_ {
584584
}
585585
}
586586

587+
/**
588+
* Returns all string prefixes in the database that are explicitly marked as Unicode strings.
589+
*
590+
* Helper predicate for `StrConst::isUnicode`.
591+
*/
592+
pragma[nomagic]
593+
private string unicode_prefix() {
594+
result = any(Str_ s).getPrefix() and
595+
result.charAt(_) in ["u", "U"]
596+
}
597+
598+
/**
599+
* Returns all string prefixes in the database that are _not_ explicitly marked as bytestrings.
600+
*
601+
* Helper predicate for `StrConst::isUnicode`.
602+
*/
603+
pragma[nomagic]
604+
private string non_byte_prefix() {
605+
result = any(Str_ s).getPrefix() and
606+
not result.charAt(_) in ["b", "B"]
607+
}
608+
587609
/** A string constant. */
588610
class StrConst extends Str_, ImmutableLiteral {
589611
/* syntax: "hello" */
590612
predicate isUnicode() {
591-
this.getPrefix().charAt(_) = "u"
592-
or
593-
this.getPrefix().charAt(_) = "U"
594-
or
595-
not this.getPrefix().charAt(_) = "b" and major_version() = 3
613+
this.getPrefix() = unicode_prefix()
596614
or
597-
not this.getPrefix().charAt(_) = "b" and
598-
this.getEnclosingModule().hasFromFuture("unicode_literals")
615+
this.getPrefix() = non_byte_prefix() and
616+
(
617+
major_version() = 3
618+
or
619+
this.getEnclosingModule().hasFromFuture("unicode_literals")
620+
)
599621
}
600622

601623
deprecated override string strValue() { result = this.getS() }

python/ql/src/semmle/python/dataflow/new/TypeTracker.qll

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ module StepSummary {
5151
* heap and/or inter-procedural step from `nodeFrom` to `nodeTo`.
5252
*/
5353
cached
54-
predicate step(Node nodeFrom, Node nodeTo, StepSummary summary) {
54+
predicate step(LocalSourceNode nodeFrom, Node nodeTo, StepSummary summary) {
5555
exists(Node mid | typePreservingStep*(nodeFrom, mid) and smallstep(mid, nodeTo, summary))
5656
}
5757

@@ -82,9 +82,8 @@ module StepSummary {
8282

8383
/** Holds if it's reasonable to expect the data flow step from `nodeFrom` to `nodeTo` to preserve types. */
8484
private predicate typePreservingStep(Node nodeFrom, Node nodeTo) {
85-
EssaFlow::essaFlowStep(nodeFrom, nodeTo) or
86-
jumpStep(nodeFrom, nodeTo) or
87-
nodeFrom = nodeTo.(PostUpdateNode).getPreUpdateNode()
85+
simpleLocalFlowStep(nodeFrom, nodeTo) or
86+
jumpStep(nodeFrom, nodeTo)
8887
}
8988

9089
/**
@@ -142,11 +141,11 @@ predicate returnStep(ReturnNode nodeFrom, Node nodeTo) {
142141
* function. This means we will track the fact that `x.attr` can have the type of `y` into the
143142
* assignment to `z` inside `bar`, even though this attribute write happens _after_ `bar` is called.
144143
*/
145-
predicate basicStoreStep(Node nodeFrom, Node nodeTo, string attr) {
144+
predicate basicStoreStep(Node nodeFrom, LocalSourceNode nodeTo, string attr) {
146145
exists(AttrWrite a |
147146
a.mayHaveAttributeName(attr) and
148147
nodeFrom = a.getValue() and
149-
simpleLocalFlowStep*(nodeTo, a.getObject())
148+
nodeTo.flowsTo(a.getObject())
150149
)
151150
}
152151

@@ -275,7 +274,7 @@ class TypeTracker extends TTypeTracker {
275274
* heap and/or inter-procedural step from `nodeFrom` to `nodeTo`.
276275
*/
277276
pragma[inline]
278-
TypeTracker step(Node nodeFrom, Node nodeTo) {
277+
TypeTracker step(LocalSourceNode nodeFrom, Node nodeTo) {
279278
exists(StepSummary summary |
280279
StepSummary::step(nodeFrom, nodeTo, summary) and
281280
result = this.append(summary)

python/ql/src/semmle/python/dataflow/new/internal/Attributes.qll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,8 @@ abstract class AttrRef extends Node {
3030
predicate mayHaveAttributeName(string attrName) {
3131
attrName = this.getAttributeName()
3232
or
33-
exists(Node nodeFrom |
34-
localFlow(nodeFrom, this.getAttributeNameExpr()) and
33+
exists(LocalSourceNode nodeFrom |
34+
nodeFrom.flowsTo(this.getAttributeNameExpr()) and
3535
attrName = nodeFrom.asExpr().(StrConst).getText()
3636
)
3737
}

python/ql/src/semmle/python/dataflow/new/internal/DataFlowPrivate.qll

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,7 @@ module EssaFlow {
186186
* data flow. It is a strict subset of the `localFlowStep` predicate, as it
187187
* excludes SSA flow through instance fields.
188188
*/
189+
cached
189190
predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) {
190191
// If there is ESSA-flow out of a node `node`, we want flow
191192
// both out of `node` and any post-update node of `node`.
@@ -219,12 +220,9 @@ private predicate localEssaStep(EssaNode nodeFrom, EssaNode nodeTo) {
219220
* Holds if `result` is either `node`, or the post-update node for `node`.
220221
*/
221222
private Node update(Node node) {
222-
exists(PostUpdateNode pun |
223-
node = pun.getPreUpdateNode() and
224-
result = pun
225-
)
226-
or
227223
result = node
224+
or
225+
result.(PostUpdateNode).getPreUpdateNode() = node
228226
}
229227

230228
// TODO: Make modules for these headings

python/ql/src/semmle/python/dataflow/new/internal/DataFlowPublic.qll

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -376,6 +376,19 @@ class BarrierGuard extends GuardNode {
376376
}
377377
}
378378

379+
/**
380+
* A data flow node that is a source of local flow. This includes things like
381+
* - Expressions
382+
* - Function parameters
383+
*/
384+
class LocalSourceNode extends Node {
385+
LocalSourceNode() { not simpleLocalFlowStep(_, this) }
386+
387+
/** Holds if this `LocalSourceNode` can flow to `nodeTo` in one or more local flow steps. */
388+
cached
389+
predicate flowsTo(Node nodeTo) { simpleLocalFlowStep*(this, nodeTo) }
390+
}
391+
379392
/**
380393
* Algebraic datatype for tracking data content associated with values.
381394
* Content can be collection elements or object attributes.

python/ql/src/semmle/python/dataflow/new/internal/DataFlowUtil.qll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ Node importNode(string name) {
4646
or
4747
name = alias.getValue().(ImportExpr).getImportedModuleName()
4848
) and
49-
result.(EssaNode).getVar().(AssignmentDefinition).getSourceVariable() = var
49+
result.asExpr() = alias.getValue()
5050
)
5151
or
5252
// Although it may seem superfluous to consider the `foo` part of `from foo import bar as baz` to

python/ql/src/semmle/python/frameworks/Django.qll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1841,7 +1841,7 @@ private module Django {
18411841

18421842
DjangoRouteRegex() {
18431843
this instanceof StrConst and
1844-
DataFlow::localFlow(DataFlow::exprNode(this), rePathCall.getUrlPatternArg())
1844+
DataFlow::exprNode(this).(DataFlow::LocalSourceNode).flowsTo(rePathCall.getUrlPatternArg())
18451845
}
18461846

18471847
DjangoRegexRouteSetup getRouteSetup() { result = rePathCall }

python/ql/src/semmle/python/frameworks/Flask.qll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -319,9 +319,9 @@ private module FlaskModel {
319319
}
320320

321321
override Function getARouteHandler() {
322-
exists(DataFlow::Node view_func_arg, DataFlow::Node func_src |
322+
exists(DataFlow::Node view_func_arg, DataFlow::LocalSourceNode func_src |
323323
view_func_arg.asCfgNode() in [node.getArg(2), node.getArgByName("view_func")] and
324-
DataFlow::localFlow(func_src, view_func_arg) and
324+
func_src.flowsTo(view_func_arg) and
325325
func_src.asExpr().(CallableExpr) = result.getDefinition()
326326
)
327327
}

python/ql/src/semmle/python/objects/TObject.qll

Lines changed: 24 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -229,23 +229,32 @@ predicate class_method(
229229
PointsToInternal::pointsTo(instantiation.getArg(0), context, function, _)
230230
}
231231

232+
/**
233+
* Holds if the literal corresponding to the control flow node `n` has class `cls`.
234+
*
235+
* Helper predicate for `literal_instantiation`. Prevents a bad join with
236+
* `PointsToContext::appliesTo` from occuring.
237+
*/
238+
pragma[nomagic]
239+
private predicate literal_node_class(ControlFlowNode n, ClassObjectInternal cls) {
240+
n instanceof ListNode and cls = ObjectInternal::builtin("list")
241+
or
242+
n instanceof DictNode and cls = ObjectInternal::builtin("dict")
243+
or
244+
n instanceof SetNode and cls = ObjectInternal::builtin("set")
245+
or
246+
n.getNode() instanceof ImaginaryLiteral and cls = ObjectInternal::builtin("complex")
247+
or
248+
n.getNode() instanceof ListComp and cls = ObjectInternal::builtin("list")
249+
or
250+
n.getNode() instanceof SetComp and cls = ObjectInternal::builtin("set")
251+
or
252+
n.getNode() instanceof DictComp and cls = ObjectInternal::builtin("dict")
253+
}
254+
232255
predicate literal_instantiation(ControlFlowNode n, ClassObjectInternal cls, PointsToContext context) {
233256
context.appliesTo(n) and
234-
(
235-
n instanceof ListNode and cls = ObjectInternal::builtin("list")
236-
or
237-
n instanceof DictNode and cls = ObjectInternal::builtin("dict")
238-
or
239-
n instanceof SetNode and cls = ObjectInternal::builtin("set")
240-
or
241-
n.getNode() instanceof ImaginaryLiteral and cls = ObjectInternal::builtin("complex")
242-
or
243-
n.getNode() instanceof ListComp and cls = ObjectInternal::builtin("list")
244-
or
245-
n.getNode() instanceof SetComp and cls = ObjectInternal::builtin("set")
246-
or
247-
n.getNode() instanceof DictComp and cls = ObjectInternal::builtin("dict")
248-
)
257+
literal_node_class(n, cls)
249258
}
250259

251260
predicate super_instantiation(

0 commit comments

Comments
 (0)