-
Notifications
You must be signed in to change notification settings - Fork 57
Expand file tree
/
Copy pathpythread.cpp
More file actions
379 lines (335 loc) Β· 14.2 KB
/
pythread.cpp
File metadata and controls
379 lines (335 loc) Β· 14.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
#include <algorithm>
#include <cassert>
#include <memory>
#include "logging.h"
#include "mem.h"
#include "native_frame.h"
#include "process.h"
#include "pyframe.h"
#include "pythread.h"
#include "structure.h"
#include "version.h"
#include "cpython/pthread.h"
namespace pystack {
Thread::Thread(pid_t pid, pid_t tid)
: d_pid(pid)
, d_tid(tid)
{
}
pid_t
Thread::Tid() const
{
return d_tid;
}
const std::vector<NativeFrame>&
Thread::NativeFrames() const
{
return d_native_frames;
}
void
Thread::populateNativeStackTrace(const std::shared_ptr<const AbstractProcessManager>& manager)
{
d_native_frames = manager->unwindThread(d_tid);
}
off_t tid_offset_in_pthread_struct = 0;
static off_t
findPthreadTidOffset(
const std::shared_ptr<const AbstractProcessManager>& manager,
remote_addr_t interp_state_addr)
{
LOG(DEBUG) << "Attempting to locate tid offset in pthread structure";
Structure<py_is_v> is(manager, interp_state_addr);
auto current_thread_addr = is.getField(&py_is_v::o_tstate_head);
auto thread_head = current_thread_addr;
// Iterate over all Python threads until we find a thread that has a tid equal to
// the process pid. This works because in the main thread the tid is equal to the pid,
// so when this happens it has to happen on the main thread. Note that the main thread
// is not necessarily at the head of the Python thread linked list
#if defined(__GLIBC__)
// If we detect GLIBC, we can try the two main known structs for 'struct
// pthread' that we know about to avoid having to do guess-work by doing a
// linear scan over the struct.
while (current_thread_addr != (remote_addr_t) nullptr) {
Structure<py_thread_v> current_thread(manager, current_thread_addr);
auto pthread_id_addr = current_thread.getField(&py_thread_v::o_thread_id);
pid_t the_tid;
std::vector<off_t> glibc_pthread_offset_candidates = {
offsetof(_pthread_structure_with_simple_header, tid),
offsetof(_pthread_structure_with_tcbhead, tid)};
for (off_t candidate : glibc_pthread_offset_candidates) {
try {
manager->copyObjectFromProcess((remote_addr_t)(pthread_id_addr + candidate), &the_tid);
} catch (const RemoteMemCopyError& ex) {
continue;
}
if (the_tid == manager->Pid()) {
LOG(DEBUG) << "Tid offset located using GLIBC offsets at offset " << std::showbase
<< std::hex << candidate << " in pthread structure";
return candidate;
}
}
remote_addr_t next_thread_addr = current_thread.getField(&py_thread_v::o_next);
if (next_thread_addr == current_thread_addr) {
break;
}
current_thread_addr = next_thread_addr;
}
#endif
current_thread_addr = thread_head;
while (current_thread_addr != (remote_addr_t) nullptr) {
Structure<py_thread_v> current_thread(manager, current_thread_addr);
auto pthread_id_addr = current_thread.getField(&py_thread_v::o_thread_id);
// Attempt to locate a field in the pthread struct that's equal to the pid.
uintptr_t buffer[100];
size_t buffer_size = sizeof(buffer);
while (buffer_size > 0) {
try {
LOG(DEBUG) << "Trying to copy a buffer of " << buffer_size << " bytes to get pthread ID";
manager->copyMemoryFromProcess(pthread_id_addr, buffer_size, &buffer);
break;
} catch (const RemoteMemCopyError& ex) {
LOG(DEBUG) << "Failed to copy buffer to get pthread ID";
buffer_size /= 2;
}
}
LOG(DEBUG) << "Copied a buffer of " << buffer_size << " bytes to get pthread ID";
for (size_t i = 0; i < buffer_size / sizeof(uintptr_t); i++) {
if (static_cast<pid_t>(buffer[i]) == manager->Pid()) {
off_t offset = sizeof(uintptr_t) * i;
LOG(DEBUG) << "Tid offset located by scanning at offset " << std::showbase << std::hex
<< offset << " in pthread structure";
return offset;
}
}
remote_addr_t next_thread_addr = current_thread.getField(&py_thread_v::o_next);
if (next_thread_addr == current_thread_addr) {
break;
}
current_thread_addr = next_thread_addr;
}
LOG(ERROR) << "Could not find tid offset in pthread structure";
return 0;
}
PyThread::PyThread(const std::shared_ptr<const AbstractProcessManager>& manager, remote_addr_t addr)
: Thread(0, 0)
{
d_pid = manager->Pid();
LOG(DEBUG) << std::hex << std::showbase << "Copying main thread struct from address " << addr;
Structure<py_thread_v> ts(manager, addr);
remote_addr_t frame_addr = getFrameAddr(manager, ts);
if (frame_addr != (remote_addr_t) nullptr) {
LOG(DEBUG) << std::hex << std::showbase << "Attempting to construct frame from address "
<< frame_addr;
d_first_frame = std::make_unique<FrameObject>(manager, frame_addr, 0);
}
d_addr = addr;
remote_addr_t candidate_next_addr = ts.getField(&py_thread_v::o_next);
d_next_addr = candidate_next_addr == addr ? (remote_addr_t) nullptr : candidate_next_addr;
d_pthread_id = ts.getField(&py_thread_v::o_thread_id);
d_tid = getThreadTid(manager, ts, d_pthread_id);
d_next = nullptr;
if (d_next_addr != (remote_addr_t)NULL) {
LOG(DEBUG) << std::hex << std::showbase << "Attempting to construct a new thread address "
<< d_next_addr;
d_next = std::make_unique<PyThread>(manager, d_next_addr);
}
d_gil_status = calculateGilStatus(ts, manager);
d_gc_status = calculateGCStatus(ts, manager);
}
int
PyThread::getThreadTid(
const std::shared_ptr<const AbstractProcessManager>& manager,
Structure<py_thread_v>& ts,
unsigned long pthread_id)
{
int the_tid = -1;
if (manager->versionIsAtLeast(3, 11)) {
the_tid = ts.getField(&py_thread_v::o_native_thread_id);
} else {
the_tid = inferTidFromPThreadStructure(manager, pthread_id);
}
return the_tid;
}
int
PyThread::inferTidFromPThreadStructure(
const std::shared_ptr<const AbstractProcessManager>& manager,
unsigned long pthread_id)
{
// There is not a simple way of getting the Thread ID (tid) used by the OS
// given the pthread_id (thread_id) that we just got from the remote process.
// Turns out that the pthread id is just the address of the pthread struct
// that is used to create the thread in the pthread library (this fact is used
// by gdb and other debuggers). This struct contains the tid inside so we just
// need to know the offset in this struct. The struct looks like this (from
// glibc):
//
// struct pthread {
// union
// {
// tcbhead_t header;
// void *__padding[24];
// };
// list_t list;
// pid_t tid;
// ...
// }
//
int the_tid;
manager->copyObjectFromProcess((remote_addr_t)(pthread_id + tid_offset_in_pthread_struct), &the_tid);
// To double check that this number is correct, we then check that this is one
// of the tids that we know. A thread id of 0 means that the thread was terminated
// but not joined.
const auto& tids = manager->Tids();
if (the_tid != 0 && std::find(tids.begin(), tids.end(), the_tid) == tids.end()) {
throw std::runtime_error("Invalid thread ID found!");
}
return the_tid;
}
remote_addr_t
PyThread::getFrameAddr(
const std::shared_ptr<const AbstractProcessManager>& manager,
Structure<py_thread_v>& ts)
{
if (manager->versionIsAtLeast(3, 11) && !manager->versionIsAtLeast(3, 13)) {
remote_addr_t cframe_addr = ts.getField(&py_thread_v::o_frame);
if (!manager->isAddressValid(cframe_addr)) {
return reinterpret_cast<remote_addr_t>(nullptr);
}
Structure<py_cframe_v> cframe(manager, cframe_addr);
return cframe.getField(&py_cframe_v::current_frame);
} else {
return ts.getField(&py_thread_v::o_frame);
}
}
std::shared_ptr<FrameObject>
PyThread::FirstFrame() const
{
return d_first_frame;
}
std::shared_ptr<PyThread>
PyThread::NextThread() const
{
return d_next;
}
PyThread::GilStatus
PyThread::isGilHolder() const
{
return d_gil_status;
}
PyThread::GCStatus
PyThread::isGCCollecting() const
{
return d_gc_status;
}
PyThread::GilStatus
PyThread::calculateGilStatus(
Structure<py_thread_v>& ts,
const std::shared_ptr<const AbstractProcessManager>& manager) const
{
LOG(DEBUG) << "Attempting to determine GIL Status";
remote_addr_t thread_addr;
remote_addr_t pyruntime = manager->findSymbol("_PyRuntime");
if (pyruntime) {
assert(manager->versionIsAtLeast(3, 0));
LOG(DEBUG) << "_PyRuntime symbol detected. Searching for GIL status within _PyRuntime structure";
if (manager->versionIsAtLeast(3, 12)) {
// Fast, exact method supporting per-interpreter GILs:
// The thread state points to an interpreter state, which contains
// a ceval state, which points to a GIL runtime state.
// If that GIL state has `locked` set and `last_holder` is d_addr,
// then the thread represented by this PyThread holds the GIL.
auto is_addr = ts.getField(&py_thread_v::o_interp);
Structure<py_is_v> interp(manager, is_addr);
auto gil_addr = interp.getField(&py_is_v::o_gil_runtime_state);
Structure<py_gilruntimestate_v> gil(manager, gil_addr);
auto locked = gil.getField(&py_gilruntimestate_v::o_locked);
auto holder = gil.getField(&py_gilruntimestate_v::o_last_holder);
return (locked && holder == d_addr ? GilStatus::HELD : GilStatus::NOT_HELD);
} else if (manager->versionIsAtLeast(3, 8)) {
// Fast, exact method by checking the gilstate structure in _PyRuntime
LOG(DEBUG) << "Searching for the GIL by checking the value of 'tstate_current'";
Structure<py_runtime_v> runtime(manager, pyruntime);
uintptr_t tstate_current = runtime.getField(&py_runtime_v::o_tstate_current);
return (tstate_current == d_addr ? GilStatus::HELD : GilStatus::NOT_HELD);
} else {
LOG(DEBUG) << "Searching for the GIL by scanning the _PyRuntime structure";
// Slow, potentially unreliable method for older versions.
// The thread object that has the GIL is stored twice at some unknown
// offsets in the _PyRuntime structure. In order to determine if a given
// thread has the GIL, we scan the _PyRuntime struct and check if the
// address of the given thread object is present twice in the _PyRuntime
// struct.
int hits = 0;
static const size_t MAX_RUNTIME_OFFSET = 2048;
for (void** raddr = (void**)pyruntime;
(void*)raddr < (void*)(pyruntime + MAX_RUNTIME_OFFSET);
raddr++)
{
manager->copyObjectFromProcess((remote_addr_t)raddr, &thread_addr);
if (thread_addr == d_addr && ++hits == 2) {
LOG(DEBUG) << "GIL status correctly determined: HELD";
return GilStatus::HELD;
}
}
LOG(DEBUG) << "GIL status correctly determined: NOT HELD";
return GilStatus::NOT_HELD;
}
} else {
LOG(DEBUG) << "_PyRuntime symbol not detected. Searching for GIL status using "
"_PyThreadState_Current symbol";
// Python 2 and older have a global symbol that holds the current thread
// object (the one that has the GIL).
remote_addr_t current_thread = manager->findSymbol("_PyThreadState_Current");
if (current_thread) {
manager->copyObjectFromProcess((remote_addr_t)current_thread, &thread_addr);
LOG(DEBUG) << "GIL status correctly determined: " << (d_addr ? "HELD" : "NOT HELD");
return thread_addr == d_addr ? GilStatus::HELD : GilStatus::NOT_HELD;
}
}
LOG(DEBUG) << "Failed to determine the GIL status";
return GilStatus::UNKNOWN;
}
PyThread::GCStatus
PyThread::calculateGCStatus(
Structure<py_thread_v>& ts,
const std::shared_ptr<const AbstractProcessManager>& manager) const
{
LOG(DEBUG) << "Attempting to determine GC Status";
remote_addr_t gcstate_addr;
if (manager->versionIsAtLeast(3, 9)) {
auto is_addr = ts.getField(&py_thread_v::o_interp);
Structure<py_is_v> interp(manager, is_addr);
gcstate_addr = interp.getFieldRemoteAddress(&py_is_v::o_gc);
} else if (manager->versionIsAtLeast(3, 7)) {
remote_addr_t pyruntime = manager->findSymbol("_PyRuntime");
if (!pyruntime) {
LOG(DEBUG) << "Failed to get GC status because the _PyRuntime symbol is unavailable";
return GCStatus::COLLECTING_UNKNOWN;
}
Structure<py_runtime_v> runtime(manager, pyruntime);
gcstate_addr = runtime.getFieldRemoteAddress(&py_runtime_v::o_gc);
} else {
LOG(DEBUG) << "GC Status retrieval not supported by this Python version";
return GCStatus::COLLECTING_UNKNOWN;
}
Structure<py_gc_v> gcstate(manager, gcstate_addr);
auto collecting = gcstate.getField(&py_gc_v::o_collecting);
LOG(DEBUG) << "GC status correctly retrieved: " << collecting;
return collecting ? GCStatus::COLLECTING : GCStatus::NOT_COLLECTING;
}
// Create a similar funciton which does not pass the pointer to thread state, only the manager and the
// tid
std::shared_ptr<PyThread>
getThreadFromInterpreterState(
const std::shared_ptr<const AbstractProcessManager>& manager,
remote_addr_t addr)
{
if (tid_offset_in_pthread_struct == 0) {
tid_offset_in_pthread_struct = findPthreadTidOffset(manager, addr);
}
LOG(DEBUG) << std::hex << std::showbase << "Copying PyInterpreterState struct from address " << addr;
Structure<py_is_v> is(manager, addr);
auto thread_addr = is.getField(&py_is_v::o_tstate_head);
return std::make_shared<PyThread>(manager, thread_addr);
}
} // namespace pystack