Skip to content

Commit 247fd4f

Browse files
committed
Python: Make encoding/decoding preserve taint automatically
With the way we have set things up, there is no way to opt out of this behavior.
1 parent 66f5d0d commit 247fd4f

File tree

3 files changed

+44
-24
lines changed

3 files changed

+44
-24
lines changed

python/ql/src/experimental/semmle/python/Concepts.qll

Lines changed: 26 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -114,8 +114,9 @@ module Path {
114114
* is intended to include deserialization, unmarshalling, decoding, unpickling,
115115
* decompressing, decrypting, parsing etc.
116116
*
117-
* Doing so should normally preserve taint, but it can also be a problem
118-
* in itself, e.g. if it allows code execution or could result in denial-of-service.
117+
* A decoding (automatically) preserves taint from input to output. However, it can
118+
* also be a problem in itself, for example if it allows code execution or could result
119+
* in denial-of-service.
119120
*
120121
* Extend this class to refine existing API models. If you want to model new APIs,
121122
* extend `Decoding::Range` instead.
@@ -145,8 +146,9 @@ module Decoding {
145146
* is intended to include deserialization, unmarshalling, decoding, unpickling,
146147
* decompressing, decrypting, parsing etc.
147148
*
148-
* Doing so should normally preserve taint, but it can also be a problem
149-
* in itself, e.g. if it allows code execution or could result in denial-of-service.
149+
* A decoding (automatically) preserves taint from input to output. However, it can
150+
* also be a problem in itself, for example if it allows code execution or could result
151+
* in denial-of-service.
150152
*
151153
* Extend this class to model new APIs. If you want to refine existing API models,
152154
* extend `Decoding` instead.
@@ -166,12 +168,21 @@ module Decoding {
166168
}
167169
}
168170

171+
private class DecodingAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
172+
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
173+
exists(Decoding decoding |
174+
nodeFrom = decoding.getAnInput() and
175+
nodeTo = decoding.getOutput()
176+
)
177+
}
178+
}
179+
169180
/**
170181
* A data-flow node that encodes data to a binary or textual format. This
171182
* is intended to include serialization, marshalling, encoding, pickling,
172183
* compressing, encrypting, etc.
173184
*
174-
* Doing so should normally preserve taint.
185+
* An encoding (automatically) preserves taint from input to output.
175186
*
176187
* Extend this class to refine existing API models. If you want to model new APIs,
177188
* extend `Encoding::Range` instead.
@@ -198,7 +209,7 @@ module Encoding {
198209
* is intended to include serialization, marshalling, encoding, pickling,
199210
* compressing, encrypting, etc.
200211
*
201-
* Doing so should normally preserve taint.
212+
* An encoding (automatically) preserves taint from input to output.
202213
*
203214
* Extend this class to model new APIs. If you want to refine existing API models,
204215
* extend `Encoding` instead.
@@ -215,6 +226,15 @@ module Encoding {
215226
}
216227
}
217228

229+
private class EncodingAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
230+
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
231+
exists(Encoding encoding |
232+
nodeFrom = encoding.getAnInput() and
233+
nodeTo = encoding.getOutput()
234+
)
235+
}
236+
}
237+
218238
/**
219239
* A data-flow node that dynamically executes Python code.
220240
*

python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep-py3/TestTaint.expected

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,12 @@
1616
| test_string.py:17 | ok | str_methods | ts.casefold() |
1717
| test_string.py:19 | ok | str_methods | ts.format_map(..) |
1818
| test_string.py:20 | ok | str_methods | "{unsafe}".format_map(..) |
19-
| test_string.py:31 | fail | binary_decode_encode | base64.a85encode(..) |
20-
| test_string.py:32 | fail | binary_decode_encode | base64.a85decode(..) |
21-
| test_string.py:35 | fail | binary_decode_encode | base64.b85encode(..) |
22-
| test_string.py:36 | fail | binary_decode_encode | base64.b85decode(..) |
23-
| test_string.py:39 | fail | binary_decode_encode | base64.encodebytes(..) |
24-
| test_string.py:40 | fail | binary_decode_encode | base64.decodebytes(..) |
19+
| test_string.py:31 | ok | binary_decode_encode | base64.a85encode(..) |
20+
| test_string.py:32 | ok | binary_decode_encode | base64.a85decode(..) |
21+
| test_string.py:35 | ok | binary_decode_encode | base64.b85encode(..) |
22+
| test_string.py:36 | ok | binary_decode_encode | base64.b85decode(..) |
23+
| test_string.py:39 | ok | binary_decode_encode | base64.encodebytes(..) |
24+
| test_string.py:40 | ok | binary_decode_encode | base64.decodebytes(..) |
2525
| test_string.py:48 | ok | f_strings | Fstring |
2626
| test_unpacking.py:18 | ok | extended_unpacking | first |
2727
| test_unpacking.py:18 | ok | extended_unpacking | last |

python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/TestTaint.expected

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -123,18 +123,18 @@
123123
| test_string.py:114 | ok | percent_fmt | BinaryExpr |
124124
| test_string.py:115 | ok | percent_fmt | BinaryExpr |
125125
| test_string.py:116 | ok | percent_fmt | BinaryExpr |
126-
| test_string.py:126 | fail | binary_decode_encode | base64.b64encode(..) |
127-
| test_string.py:127 | fail | binary_decode_encode | base64.b64decode(..) |
128-
| test_string.py:129 | fail | binary_decode_encode | base64.standard_b64encode(..) |
129-
| test_string.py:130 | fail | binary_decode_encode | base64.standard_b64decode(..) |
130-
| test_string.py:132 | fail | binary_decode_encode | base64.urlsafe_b64encode(..) |
131-
| test_string.py:133 | fail | binary_decode_encode | base64.urlsafe_b64decode(..) |
132-
| test_string.py:135 | fail | binary_decode_encode | base64.b32encode(..) |
133-
| test_string.py:136 | fail | binary_decode_encode | base64.b32decode(..) |
134-
| test_string.py:138 | fail | binary_decode_encode | base64.b16encode(..) |
135-
| test_string.py:139 | fail | binary_decode_encode | base64.b16decode(..) |
136-
| test_string.py:142 | fail | binary_decode_encode | base64.encodestring(..) |
137-
| test_string.py:143 | fail | binary_decode_encode | base64.decodestring(..) |
126+
| test_string.py:126 | ok | binary_decode_encode | base64.b64encode(..) |
127+
| test_string.py:127 | ok | binary_decode_encode | base64.b64decode(..) |
128+
| test_string.py:129 | ok | binary_decode_encode | base64.standard_b64encode(..) |
129+
| test_string.py:130 | ok | binary_decode_encode | base64.standard_b64decode(..) |
130+
| test_string.py:132 | ok | binary_decode_encode | base64.urlsafe_b64encode(..) |
131+
| test_string.py:133 | ok | binary_decode_encode | base64.urlsafe_b64decode(..) |
132+
| test_string.py:135 | ok | binary_decode_encode | base64.b32encode(..) |
133+
| test_string.py:136 | ok | binary_decode_encode | base64.b32decode(..) |
134+
| test_string.py:138 | ok | binary_decode_encode | base64.b16encode(..) |
135+
| test_string.py:139 | ok | binary_decode_encode | base64.b16decode(..) |
136+
| test_string.py:142 | ok | binary_decode_encode | base64.encodestring(..) |
137+
| test_string.py:143 | ok | binary_decode_encode | base64.decodestring(..) |
138138
| test_string.py:148 | fail | binary_decode_encode | quopri.encodestring(..) |
139139
| test_string.py:149 | fail | binary_decode_encode | quopri.decodestring(..) |
140140
| test_string.py:159 | ok | test_os_path_join | os.path.join(..) |

0 commit comments

Comments
 (0)