-
Notifications
You must be signed in to change notification settings - Fork 10
Expand file tree
/
Copy path__main__.py
More file actions
197 lines (157 loc) · 5.63 KB
/
__main__.py
File metadata and controls
197 lines (157 loc) · 5.63 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
import datetime
import difflib
import os
import sys
from pathlib import Path
import libcst as cst
from libcst.codemod import CodemodContext
from codemodder.file_context import FileContext
from codemodder.logging import logger
from codemodder.cli import parse_args
from codemodder.code_directory import file_line_patterns, match_files
from codemodder.codemods import match_codemods
from codemodder.codemods.change import Change
from codemodder.dependency_manager import DependencyManager
from codemodder.report.codetf_reporter import report_default
from codemodder.semgrep import run as semgrep_run
from codemodder.sarifs import parse_sarif_files
# Must use from import here to point to latest state
from codemodder import global_state
RESULTS_BY_CODEMOD = []
from dataclasses import dataclass
@dataclass
class ChangeSet:
"""A set of changes made to a file at `path`"""
path: str
diff: str
changes: list[Change]
def to_json(self):
return {"path": self.path, "diff": self.diff, "changes": self.changes}
def update_code(file_path, new_code):
"""
Write the `new_code` to the `file_path`
"""
logger.info("Updated file %s", file_path)
with open(file_path, "w", encoding="utf-8") as f:
f.write(new_code)
def run_codemods_for_file(
file_context,
codemods_to_run,
source_tree,
):
for name, codemod_kls in codemods_to_run.items():
wrapper = cst.MetadataWrapper(source_tree)
codemod = codemod_kls(
CodemodContext(wrapper=wrapper),
file_context,
)
if not codemod.should_transform:
continue
logger.info("Running codemod %s for %s", name, file_context.file_path)
output_tree = codemod.transform_module(source_tree)
changed_file = not output_tree.deep_equals(source_tree)
if changed_file:
diff = "".join(
difflib.unified_diff(
source_tree.code.splitlines(1), output_tree.code.splitlines(1)
)
)
logger.debug("CHANGED %s with codemod %s", file_context.file_path, name)
logger.debug(diff)
codemod_kls.CHANGESET_ALL_FILES.append(
ChangeSet(
str(file_context.file_path),
diff,
changes=codemod_kls.CHANGES_IN_FILE,
).to_json()
)
if file_context.dry_run:
logger.info("Dry run, not changing files")
else:
update_code(file_context.file_path, output_tree.code)
def analyze_files(
files_to_analyze,
codemods_to_run,
sarif,
cli_args,
):
for file_path in files_to_analyze:
# TODO: handle potential race condition that file no longer exists at this point
with open(file_path, "r", encoding="utf-8") as f:
code = f.read()
try:
source_tree = cst.parse_module(code)
except Exception:
logger.exception("Error parsing file %s", file_path)
continue
line_exclude = file_line_patterns(file_path, cli_args.path_exclude)
line_include = file_line_patterns(file_path, cli_args.path_include)
sarif_for_file = sarif[str(file_path)]
file_context = FileContext(
file_path,
cli_args.dry_run,
line_exclude,
line_include,
sarif_for_file,
)
run_codemods_for_file(
file_context,
codemods_to_run,
source_tree,
)
def compile_results(codemods):
for name, codemod_kls in codemods.items():
if not codemod_kls.CHANGESET_ALL_FILES:
continue
data = {
"codemod": f"pixee:python/{name}",
"summary": codemod_kls.METADATA.DESCRIPTION,
"references": [],
"properties": {},
"failedFiles": [],
"changeset": codemod_kls.CHANGESET_ALL_FILES,
}
RESULTS_BY_CODEMOD.append(data)
def run(argv, original_args) -> int:
start = datetime.datetime.now()
if not os.path.exists(argv.directory):
# project directory doesn't exist or can’t be read
return 1
global_state.set_directory(Path(argv.directory))
codemods_to_run = match_codemods(argv.codemod_include, argv.codemod_exclude)
if not codemods_to_run:
# We only currently have semgrep codemods so don't go on if no codemods matched.
logger.warning("No codemods to run")
return 0
logger.debug("Codemods to run: %s", codemods_to_run)
# parse sarifs from --sarif flags
sarif_results = parse_sarif_files(argv.sarif or [])
# run semgrep and gather the results
semgrep_results = semgrep_run(codemods_to_run)
# merge the results
sarif_results.update(semgrep_results)
if not sarif_results:
logger.warning("No sarif results.")
files_to_analyze = match_files(
global_state.DIRECTORY, argv.path_exclude, argv.path_include
)
if not files_to_analyze:
logger.warning("No files matched.")
return 0
full_names = [str(path) for path in files_to_analyze]
logger.debug("Matched files:\n%s", "\n".join(full_names))
analyze_files(
files_to_analyze,
codemods_to_run,
sarif_results,
argv,
)
compile_results(codemods_to_run)
DependencyManager().write(dry_run=argv.dry_run)
elapsed = datetime.datetime.now() - start
elapsed_ms = int(elapsed.total_seconds() * 1000)
report_default(elapsed_ms, argv, original_args, RESULTS_BY_CODEMOD)
return 0
if __name__ == "__main__":
sys_argv = sys.argv[1:]
sys.exit(run(parse_args(sys_argv), sys_argv))