forked from bloomberg/pystack
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpycode.cpp
More file actions
244 lines (221 loc) Β· 8.14 KB
/
pycode.cpp
File metadata and controls
244 lines (221 loc) Β· 8.14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
#include <algorithm>
#include <cassert>
#include <stdexcept>
#include <vector>
#include "logging.h"
#include "mem.h"
#include "process.h"
#include "pycode.h"
#include "pycompat.h"
#include "pytypes.h"
#include "version.h"
static const int NO_LINE_NUMBER = -0x80;
namespace pystack {
typedef enum _PyCodeLocationInfoKind {
PY_CODE_LOCATION_INFO_SHORT0 = 0,
PY_CODE_LOCATION_INFO_ONE_LINE0 = 10,
PY_CODE_LOCATION_INFO_ONE_LINE1 = 11,
PY_CODE_LOCATION_INFO_ONE_LINE2 = 12,
PY_CODE_LOCATION_INFO_NO_COLUMNS = 13,
PY_CODE_LOCATION_INFO_LONG = 14,
PY_CODE_LOCATION_INFO_NONE = 15
} _PyCodeLocationInfoKind;
static bool
parse_linetable(const uintptr_t addrq, const std::string& linetable, int firstlineno, LocationInfo* info)
{
const uint8_t* ptr = reinterpret_cast<const uint8_t*>(linetable.c_str());
uint64_t addr = 0;
info->lineno = firstlineno;
auto scan_varint = [&]() {
unsigned int read = *ptr++;
unsigned int val = read & 63;
unsigned int shift = 0;
while (read & 64) {
read = *ptr++;
shift += 6;
val |= (read & 63) << shift;
}
return val;
};
auto scan_signed_varint = [&]() {
unsigned int uval = scan_varint();
int sval = uval >> 1;
int sign = (uval & 1) ? -1 : 1;
return sign * sval;
};
while (*ptr != '\0') {
uint8_t first_byte = *(ptr++);
uint8_t code = (first_byte >> 3) & 15;
size_t length = (first_byte & 7) + 1;
uintptr_t end_addr = addr + length;
switch (code) {
case PY_CODE_LOCATION_INFO_NONE: {
break;
}
case PY_CODE_LOCATION_INFO_LONG: {
int line_delta = scan_signed_varint();
info->lineno += line_delta;
info->end_lineno = info->lineno + scan_varint();
info->column = scan_varint() - 1;
info->end_column = scan_varint() - 1;
break;
}
case PY_CODE_LOCATION_INFO_NO_COLUMNS: {
int line_delta = scan_signed_varint();
info->lineno += line_delta;
info->column = info->end_column = -1;
break;
}
case PY_CODE_LOCATION_INFO_ONE_LINE0:
case PY_CODE_LOCATION_INFO_ONE_LINE1:
case PY_CODE_LOCATION_INFO_ONE_LINE2: {
int line_delta = code - 10;
info->lineno += line_delta;
info->end_lineno = info->lineno;
info->column = *(ptr++);
info->end_column = *(ptr++);
break;
}
default: {
uint8_t second_byte = *(ptr++);
assert((second_byte & 128) == 0);
info->column = code << 3 | (second_byte >> 4);
info->end_column = info->column + (second_byte & 15);
break;
}
}
if (addr <= addrq && end_addr > addrq) {
return true;
}
addr = end_addr;
}
return false;
}
static LocationInfo
getLocationInfo(
const std::shared_ptr<const AbstractProcessManager>& manager,
remote_addr_t code_addr,
Structure<py_code_v>& code,
uintptr_t last_instruction_index)
{
int code_lineno = code.getField(&py_code_v::o_firstlineno);
remote_addr_t lnotab_addr = code.getField(&py_code_v::o_lnotab);
LOG(DEBUG) << std::hex << std::showbase << "Copying lnotab data from address " << lnotab_addr;
std::string lnotab = manager->getBytesFromAddress(lnotab_addr);
assert(manager->versionIsAtLeast(3, 11) || lnotab.size() % 2 == 0);
std::string::size_type last_executed_instruction = last_instruction_index;
LocationInfo location_info = LocationInfo{0, 0, 0, 0};
// Check out https://github.com/python/cpython/blob/main/Objects/lnotab_notes.txt for the format of
// the lnotab table in different versions of the interpreter.
if (manager->versionIsAtLeast(3, 11)) {
uintptr_t code_adaptive = code.getFieldRemoteAddress(&py_code_v::o_code_adaptive);
ptrdiff_t addrq =
(reinterpret_cast<uint16_t*>(last_instruction_index)
- reinterpret_cast<uint16_t*>(code_adaptive));
LocationInfo posinfo;
bool ret = parse_linetable(addrq, lnotab, code_lineno, &posinfo);
if (ret) {
location_info.lineno = posinfo.lineno;
location_info.end_lineno = posinfo.end_lineno;
location_info.column = posinfo.column;
location_info.end_column = posinfo.end_column;
}
} else if (manager->versionIsAtLeast(3, 10)) {
// Word-code is two bytes, so the actual limit in the table 2 * the instruction index
last_executed_instruction <<= 1;
for (std::string::size_type i = 0, current_instruction = 0; i < lnotab.size();) {
unsigned char start_delta = lnotab[i++];
signed char line_delta = lnotab[i++];
current_instruction += start_delta;
code_lineno += (line_delta == NO_LINE_NUMBER) ? 0 : line_delta;
if (current_instruction > last_executed_instruction) {
break;
}
}
location_info.lineno = code_lineno;
location_info.end_lineno = code_lineno;
} else {
for (std::string::size_type i = 0, bc = 0; i < lnotab.size();
code_lineno += static_cast<int8_t>(lnotab[i++]))
{
bc += lnotab[i++];
if (bc > last_executed_instruction) {
break;
}
}
location_info.lineno = code_lineno;
location_info.end_lineno = code_lineno;
}
return location_info;
}
CodeObject::CodeObject(
const std::shared_ptr<const AbstractProcessManager>& manager,
remote_addr_t addr,
uintptr_t lasti)
{
LOG(DEBUG) << std::hex << std::showbase << "Copying code struct from address " << addr;
Structure<py_code_v> code(manager, addr);
remote_addr_t filename_addr = code.getField(&py_code_v::o_filename);
LOG(DEBUG) << std::hex << std::showbase << "Copying filename Python string from address "
<< filename_addr;
d_filename = manager->getStringFromAddress(filename_addr);
LOG(DEBUG) << "Code object filename: " << d_filename;
remote_addr_t name_addr = code.getField(&py_code_v::o_name);
LOG(DEBUG) << std::hex << std::showbase << "Copying code name Python string from address "
<< name_addr;
d_scope = manager->getStringFromAddress(name_addr);
LOG(DEBUG) << "Code object scope: " << d_filename;
LOG(DEBUG) << "Obtaining location info location";
d_location_info = getLocationInfo(manager, addr, code, lasti);
LOG(DEBUG) << "Code object location info: line_range=(" << d_location_info.lineno << ", "
<< d_location_info.end_lineno << ") column_range=(" << d_location_info.column << ", "
<< d_location_info.end_column << ")";
d_narguments = code.getField(&py_code_v::o_argcount);
LOG(DEBUG) << "Code object n arguments: " << d_narguments;
LOG(DEBUG) << "Copying variable names";
remote_addr_t varnames_addr = code.getField(&py_code_v::o_varnames);
TupleObject varnames(manager, varnames_addr);
std::transform(
varnames.Items().cbegin(),
varnames.Items().cend(),
std::back_inserter(d_varnames),
[&](auto& addr) {
const std::string varname = manager->getStringFromAddress(addr);
LOG(DEBUG) << "Variable name found: '" << varname << "'";
return varname;
});
}
CodeObject::CodeObject(std::string filename, std::string scope, LocationInfo location_info)
: d_filename(filename)
, d_scope(scope)
, d_location_info(location_info)
, d_narguments()
, d_varnames()
{
}
std::string
CodeObject::Filename() const
{
return d_filename;
}
std::string
CodeObject::Scope() const
{
return d_scope;
}
const LocationInfo&
CodeObject::Location() const
{
return d_location_info;
}
int
CodeObject::NArguments() const
{
return d_narguments;
}
const std::vector<std::string>&
CodeObject::Varnames() const
{
return d_varnames;
}
} // namespace pystack