forked from pixee/codemodder-python
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgenerate_docs.py
More file actions
387 lines (354 loc) · 19.2 KB
/
generate_docs.py
File metadata and controls
387 lines (354 loc) · 19.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
import argparse
from dataclasses import dataclass
from pathlib import Path
from codemodder.registry import load_registered_codemods
@dataclass
class DocMetadata:
"""Codemod specific metadata only for documentation"""
importance: str
guidance_explained: str
need_sarif: str = "No"
# codemod-specific metadata that's used only for docs, not for codemod API
CORE_CODEMODS = {
"add-requests-timeouts": DocMetadata(
importance="Medium",
guidance_explained="This change makes your code safer but in some cases it may be necessary to adjust the timeout value for your particular application.",
),
"django-debug-flag-on": DocMetadata(
importance="Medium",
guidance_explained="Django's `DEBUG` flag may be overridden somewhere else or the runtime settings file may be set with the `DJANGO_SETTINGS_MODULE` environment variable. This means that the `DEBUG` flag may intentionally be left on as a development aid.",
),
"django-session-cookie-secure-off": DocMetadata(
importance="Medium",
guidance_explained="Django's `SESSION_COOKIE_SECURE` flag may be overridden somewhere else or the runtime settings file may be set with the `DJANGO_SETTINGS_MODULE` environment variable. This means that the flag may intentionally be left off or missing. Also some applications may still want to support pure http. This is often the case for legacy apps.",
),
"enable-jinja2-autoescape": DocMetadata(
importance="High",
guidance_explained="This codemod protects your applications against XSS attacks. However, it's possible you would like to set the `autoescape` parameter to a custom callable.",
),
"fix-mutable-params": DocMetadata(
importance="Medium",
guidance_explained="We believe that this codemod fixes an unsafe practice and that the changes themselves are safe and reliable.",
),
"harden-pyyaml": DocMetadata(
importance="Medium",
guidance_explained="This codemod replaces any unsafe loaders with the `SafeLoader`, which is already the recommended replacement suggested in `yaml` documentation. We believe this replacement is safe and should not result in any issues.",
),
"harden-ruamel": DocMetadata(
importance="Medium",
guidance_explained="This codemod replaces any unsafe `typ` argument with `typ='safe'`, which makes safety explicit and is one of the recommended uses suggested in `ruamel` documentation. We believe this replacement is safe and should not result in any issues.",
),
"https-connection": DocMetadata(
importance="High",
guidance_explained="Support for HTTPS is widespread which, save in some legacy applications, makes this change safe.",
),
"jwt-decode-verify": DocMetadata(
importance="High",
guidance_explained="This codemod ensures your code uses all available validations when calling `jwt.decode`. We believe this replacement is safe and should not result in any issues.",
),
"limit-readline": DocMetadata(
importance="Medium",
guidance_explained="This codemod sets a maximum of 5MB allowed per line read by default. It is unlikely but possible that your code may receive lines that are greater than 5MB _and_ you'd still be interested in reading them, so there is some nominal risk of exceptional cases.",
),
"safe-lxml-parser-defaults": DocMetadata(
importance="High",
guidance_explained="We believe this change is safe, effective, and protects your code against very serious security attacks.",
),
"safe-lxml-parsing": DocMetadata(
importance="High",
guidance_explained="We believe this change is safe, effective, and protects your code against very serious security attacks.",
),
"order-imports": DocMetadata(
importance="Low",
guidance_explained="TODO SKIP FOR NOW",
),
"sandbox-process-creation": DocMetadata(
importance="High",
guidance_explained="We believe this change is safe and effective. The behavior of sandboxing `subprocess.run` and `subprocess.call` calls will only throw `SecurityException` if they see behavior involved in malicious code execution, which is extremely unlikely to happen in normal operation.",
),
"remove-unnecessary-f-str": DocMetadata(
importance="Low",
guidance_explained="We believe this codemod is safe and will not cause any issues.",
),
"unused-imports": DocMetadata(
importance="Low",
guidance_explained="We believe this codemod is safe and will not cause any issues. It is important to note that importing modules may have side-effects that alter the behavior, even if unused, but we believe those cases are rare enough to be safe.",
),
"requests-verify": DocMetadata(
importance="High",
guidance_explained="There may be times when setting `verify=False` is useful for testing though we discourage it. \nYou may also decide to set `verify=/path/to/ca/bundle`. This codemod will not attempt to modify the `verify` value if you do set it to a path.",
),
"secure-flask-cookie": DocMetadata(
importance="Medium",
guidance_explained="Our change provides the most secure way to create cookies in Flask. However, it's possible you have configured your Flask application configurations to use secure cookies. In these cases, using the default parameters for `set_cookie` is safe.",
),
"secure-random": DocMetadata(
importance="High",
guidance_explained="While most of the functions in the `random` module aren't cryptographically secure, there are still valid use cases for `random.random()` such as for simulations or games.",
),
"secure-tempfile": DocMetadata(
importance="High",
guidance_explained="We believe this codemod is safe and will cause no unexpected errors.",
),
"upgrade-sslcontext-minimum-version": DocMetadata(
importance="High",
guidance_explained="This codemod updates the minimum supported version of TLS. Since this is an important security fix and since all modern servers offer TLSv1.2, we believe this change can be safely merged without review.",
),
"upgrade-sslcontext-tls": DocMetadata(
importance="High",
guidance_explained="This codemod updates the minimum supported version of TLS. Since this is an important security fix and since all modern servers offer TLSv1.2, we believe this change can be safely merged without review.",
),
"url-sandbox": DocMetadata(
importance="High",
guidance_explained="""By default, the protection only weaves in 2 checks, which we believe will not cause any issues with the vast majority of code:
1. The given URL must be HTTP/HTTPS.
2. The given URL must not point to a "well-known infrastructure target", which includes things like AWS Metadata Service endpoints, and internal routers (e.g., 192.168.1.1) which are common targets of attacks.
However, on rare occasions an application may use a URL protocol like "file://" or "ftp://" in backend or middleware code.
If you want to allow those protocols, change the incoming PR to look more like this and get the best security possible:
```diff
-resp = requests.get(url)
+resp = safe_requests.get.get(url, allowed_protocols=("ftp",))
```""",
),
"use-defusedxml": DocMetadata(
importance="High",
guidance_explained="We believe this change is safe and effective and guards against serious XML vulnerabilities. You should review this code before merging to make sure the dependency has been properly added to your project.",
),
"use-walrus-if": DocMetadata(
importance="Low",
guidance_explained="We believe that using the walrus operator is an improvement in terms of clarity and readability. However, this change is only compatible with codebases that support Python 3.8 and later, so it requires quick validation before merging.",
),
"bad-lock-with-statement": DocMetadata(
importance="Low",
guidance_explained="We believe this replacement is safe and should not result in any issues.",
),
"sql-parameterization": DocMetadata(
importance="High",
guidance_explained="Python has a wealth of database drivers that all use the same `dbapi2` interface detailed in [PEP249](https://peps.python.org/pep-0249/). Different drivers may require different string tokens used for parameterization, and Python's dynamic typing makes it quite hard, and sometimes impossible, to detect which driver is being used just by looking at the code.",
),
"use-generator": DocMetadata(
importance="Low",
guidance_explained="We believe this replacement is safe and leads to better performance.",
),
"secure-flask-session-configuration": DocMetadata(
importance="Medium",
guidance_explained="Our change fixes explicitly insecure session configuration for a Flask application. However, there may be valid cases to use these insecure configurations, such as for testing or backward compatibility.",
),
"fix-file-resource-leak": DocMetadata(
importance="High",
guidance_explained="We believe this change is safe and will only close file resources that are not referenced outside of the with statement block.",
),
"django-receiver-on-top": DocMetadata(
importance="Medium",
guidance_explained="We believe this change leads to the intended behavior the application and is thus safe.",
),
"numpy-nan-equality": DocMetadata(
importance="Medium",
guidance_explained="We believe any use of `==` to compare with `numpy.nan` is unintended given that it is always `False`. Thus we consider this change safe.",
),
"django-json-response-type": DocMetadata(
importance="Medium",
guidance_explained="This change will only restrict the response type and will not alter the response data itself. Thus we deem it safe.",
),
"fix-deprecated-abstractproperty": DocMetadata(
importance="Low",
guidance_explained="This change fixes deprecated uses and is safe.",
),
"flask-json-response-type": DocMetadata(
importance="Medium",
guidance_explained="This change will only restrict the response type and will not alter the response data itself. Thus we deem it safe.",
),
"exception-without-raise": DocMetadata(
importance="Low",
guidance_explained="A statement with an exception by itself has no effect. Raising the exception is most likely the intended effect and thus we deem it safe.",
),
"remove-future-imports": DocMetadata(
importance="Low",
guidance_explained="Removing future imports is safe and will not cause any issues.",
),
"literal-or-new-object-identity": DocMetadata(
importance="Low",
guidance_explained="Since literals and new objects have their own identities, comparisons against them using `is` operators are most likely a bug and thus we deem the change safe.",
),
"subprocess-shell-false": DocMetadata(
importance="High",
guidance_explained="In most cases setting `shell=False` is correct and leads to much safer code. However there are valid use cases for `shell=True` when using shell functionality like pipes or wildcard is required. In such cases it is important to run only trusted, validated commands.",
),
"use-set-literal": DocMetadata(
importance="Low",
guidance_explained="We believe this change is safe and will not cause any issues.",
),
"remove-module-global": DocMetadata(
importance="Low",
guidance_explained="Since the `global` keyword is intended for use in non-module scopes, using it at the module scope is unnecessary.",
),
"remove-debug-breakpoint": DocMetadata(
importance="Medium",
guidance_explained="Breakpoints are generally used only for debugging and can easily be forgotten before deploying code.",
),
"combine-startswith-endswith": DocMetadata(
importance="Low",
guidance_explained="Simplifying expressions involving `startswith` or `endswith` calls is safe.",
),
"fix-deprecated-logging-warn": DocMetadata(
importance="Low",
guidance_explained="This change fixes deprecated uses and is safe.",
),
"flask-enable-csrf-protection": DocMetadata(
importance="High",
guidance_explained="Flask views may require proper handling of CSRF to function as expected and thus this change may break some views.",
),
"replace-flask-send-file": DocMetadata(
importance="Medium",
guidance_explained="We believe this change is safe and will not cause any issues.",
),
"fix-empty-sequence-comparison": DocMetadata(
importance="Low",
guidance_explained="Values compared to empty sequences should be verified in case they are falsy values that are not a sequence.",
),
"remove-assertion-in-pytest-raises": DocMetadata(
importance="Low",
guidance_explained="We believe this change is safe and will not cause any issues.",
),
"fix-assert-tuple": DocMetadata(
importance="Medium",
guidance_explained="An `assert` statement on a non-empty tuple is likely unintended and should be rewritten. However, the new change may result in assertion failures that should be reviewed.",
),
"lazy-logging": DocMetadata(
importance="Medium",
guidance_explained="We believe this change is safe and will not cause any issues.",
),
"str-concat-in-sequence-literals": DocMetadata(
importance="Medium",
guidance_explained="While string concatenation inside a sequence iterable is likely a mistake, there are instances when you may choose to use them..",
),
"fix-async-task-instantiation": DocMetadata(
importance="Low",
guidance_explained="Manual instantiation of `asyncio.Task` is discouraged. We believe this change is safe and will not cause any issues.",
),
"django-model-without-dunder-str": DocMetadata(
importance="Low",
guidance_explained="This codemod is a great starting point for models with few fields. We encourage you to write custom `__str__` methods that best suit your Django application.",
),
"fix-hasattr-call": DocMetadata(
importance="Low",
guidance_explained="We believe this change is safe because using `callable` is a more reliable way to check if an object is a callable.",
),
"harden-pickle-load": DocMetadata(
importance="High",
guidance_explained="This change may impact performance in some cases, but it is recommended when handling untrusted data.",
),
"fix-dataclass-defaults": DocMetadata(
importance="Medium",
guidance_explained="This change is safe and will prevent runtime `ValueError`.",
),
"fix-missing-self-or-cls": DocMetadata(
importance="Medium",
guidance_explained="This change is safe and will prevent errors when calling on these instance or class methods..",
),
"fix-float-equality": DocMetadata(
importance="Medium",
guidance_explained="This change makes your code more accurate but in some cases it may be necessary to adjust the `abs_tol` and `rel_tol` parameter values for your particular calculations.",
),
"fix-math-isclose": DocMetadata(
importance="Medium",
guidance_explained="This change makes your code more accurate but in some cases it may be necessary to adjust the `abs_tol` parameter value for your particular calculations.",
),
"break-or-continue-out-of-loop": DocMetadata(
importance="Low",
guidance_explained="While this change will make the code consistent, it is likely that the `break` or `continue` statement is a symptom of an error in program logic.",
),
}
DEFECTDOJO_CODEMODS = {
"django-secure-set-cookie": DocMetadata(
importance="Medium",
guidance_explained="Our change provides the most secure way to create cookies in Django. However, it's possible you have configured your Django application configurations to use secure cookies. In these cases, using the default parameters for `set_cookie` is safe.",
need_sarif="Yes (DefectDojo)",
),
"avoid-insecure-deserialization": DocMetadata(
importance="High",
guidance_explained="This change is generally safe and will prevent deserialization vulnerabilities.",
need_sarif="Yes (DefectDojo)",
),
}
SONAR_CODEMOD_NAMES = [
"numpy-nan-equality-S6725",
"literal-or-new-object-identity-S5796",
"django-receiver-on-top-S6552",
"exception-without-raise-S3984",
"fix-assert-tuple-S5905",
"remove-assertion-in-pytest-raises-S5915",
"flask-json-response-type-S5131",
"django-json-response-type-S5131",
"jwt-decode-verify-S5659",
"fix-missing-self-or-cls-S5719",
"secure-tempfile-S5445",
"secure-random-S2245",
"enable-jinja2-autoescape-S5247",
"url-sandbox-S5144",
"fix-float-equality-S1244",
"fix-math-isclose-S6727",
"sql-parameterization-S3649",
"django-model-without-dunder-str-S6554",
"break-or-continue-out-of-loop-S1716",
]
SONAR_CODEMODS = {
name: DocMetadata(
importance=CORE_CODEMODS[
core_codemod_name := "-".join(name.split("-")[:-1])
].importance,
guidance_explained=CORE_CODEMODS[core_codemod_name].guidance_explained,
need_sarif="Yes (Sonar)",
)
for name in SONAR_CODEMOD_NAMES
}
ALL_CODEMODS_METADATA = CORE_CODEMODS | DEFECTDOJO_CODEMODS | SONAR_CODEMODS
def generate_docs(codemod):
try:
codemod_data = ALL_CODEMODS_METADATA[codemod.name]
except KeyError as exc:
raise KeyError(f"Must add {codemod.name} to ALL_CODEMODS_METADATA") from exc
formatted_references = [
f"* [{ref.description or ref.url}]({ref.url})" for ref in codemod.references
]
markdown_references = "\n".join(formatted_references) or "N/A"
# A bit of a hack but keeps the table aligned
spacing = " " * (len(codemod.review_guidance) - 19)
spacers = "-" * (len(codemod.review_guidance) - 19)
output = f"""---
title: "{codemod.summary}"
sidebar_position: 1
---
## {codemod.id}
| Importance | Review Guidance {spacing}| Requires Scanning Tool |
|------------|---------------------{spacers}|------------------------|
| {codemod_data.importance:10} | {codemod.review_guidance:19} | {codemod_data.need_sarif:22} |
{codemod.description}
If you have feedback on this codemod, [please let us know](mailto:feedback@pixee.ai)!
## F.A.Q.
### Why is this codemod marked as {codemod.review_guidance}?
{codemod_data.guidance_explained}
## Codemod Settings
N/A
## References
{markdown_references}
"""
return output
SKIP_DOCS = ["order-imports", "unused-imports"]
def main():
parser = argparse.ArgumentParser(
description="Generate public docs for registered codemods."
)
parser.add_argument(
"directory", type=str, help="directory path where to create doc files"
)
argv = parser.parse_args()
parent_dir = Path(argv.directory)
registry = load_registered_codemods()
for codemod in registry.codemods:
if codemod.name in SKIP_DOCS:
continue
doc = generate_docs(codemod)
codemod_doc_name = f"{codemod.id.replace(':', '_').replace('/', '_')}.md"
with open(parent_dir / codemod_doc_name, "w", encoding="utf-8") as f:
f.write(doc)