forked from vmprof/vmprof-python
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathreader.py
More file actions
197 lines (178 loc) · 6.14 KB
/
reader.py
File metadata and controls
197 lines (178 loc) · 6.14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
from __future__ import print_function
import re
import struct
import subprocess
import sys
PY3 = sys.version_info[0] >= 3
class LibraryData(object):
def __init__(self, name, start, end, is_virtual=False, symbols=None):
self.name = name
self.start = start
self.end = end
self.is_virtual = is_virtual
if symbols is None:
symbols = []
self.symbols = symbols
def read_object_data(self, executable=False, reader=None):
if self.is_virtual:
return
offset = 0 if executable else self.start
self.symbols = read_object(reader, self.name, offset)
if not self.symbols and not self.name.startswith('['):
print('WARNING: cannot read symbols for', self.name, file=sys.stderr)
return self.symbols
def get_symbols_from(self, cached_lib, executable=False):
if executable:
self.symbols = cached_lib.symbols[:]
return self.symbols
self.symbols = symbols = []
for (addr, name) in cached_lib.symbols:
symbols.append((addr - cached_lib.start + self.start, name))
return symbols
def __repr__(self):
return '<Library data for %s, ranges %x-%x>' % (self.name, self.start,
self.end)
def read_object(reader, name, lib_start_addr, repeat=True):
if PY3 and isinstance(name, bytes):
name = name.decode('utf-8')
if reader is None:
try:
out = subprocess.check_output('nm -n "%s" 2>/dev/null' % name, shell=True)
if PY3:
out = out.decode('latin1')
except subprocess.CalledProcessError:
out = ''
else:
out = reader(name)
lines = out.splitlines()
symbols = []
for line in lines:
parts = line.split()
if len(parts) != 3:
continue
start_addr, tp, name = parts
if tp in ('t', 'T') and not name.startswith('__gcmap'):
start_addr = int(start_addr, 16) + lib_start_addr
symbols.append((start_addr, name))
symbols.sort()
if repeat and not symbols:
return read_object(reader, '/usr/lib/debug' + name, lib_start_addr,
False)
return symbols
def read_ranges(data):
if PY3 and isinstance(data, bytes):
data = data.decode('latin1')
ranges = []
lines = data.splitlines()
for i, line in enumerate(lines):
if 'Virtual Memory Map' in line:
lines = lines[i:]
break
if 'Virtual Memory Map' in lines[0]:
mode = 'vmmap'
end = 5
while lines[end]:
end += 1
lines = lines[6:end]
elif lines[0].endswith('PATH'):
lines = lines[1:]
mode = 'procstat'
else:
mode = 'proc'
for line in lines:
parts = re.split("\s+", line)
name = parts[-1]
if mode == 'procstat':
start, end = parts[1], parts[2]
elif mode == 'vmmap':
k = 1
while not parts[k].startswith('000'):
k += 1
start, end = parts[k].split('-')
else:
start, end = parts[0].split('-')
start = int(start, 16)
end = int(end, 16)
if name: # don't map anonymous memory, JIT code will be there
ranges.append(LibraryData(name, start, end))
return ranges
def read_word(fileobj):
b = fileobj.read(8)
r = int(struct.unpack('Q', b)[0])
return r
def read_string(fileobj):
lgt = int(struct.unpack('Q', fileobj.read(8))[0])
return fileobj.read(lgt)
MARKER_STACKTRACE = b'\x01'
MARKER_VIRTUAL_IP = b'\x02'
MARKER_TRAILER = b'\x03'
MARKER_INTERP_NAME = b'\x04'
MARKER_HEADER = b'\x05'
VERSION_BASE = 0
VERSION_THREAD_ID = 1
def read_prof(fileobj, virtual_ips_only=False): #
assert read_word(fileobj) == 0 # header count
assert read_word(fileobj) == 3 # header size
assert read_word(fileobj) == 0 # version?
period = read_word(fileobj)
assert read_word(fileobj) == 0
virtual_ips = []
profiles = []
all = 0
interp_name = None
version = 0
while True:
marker = fileobj.read(1)
if marker == MARKER_HEADER:
assert not version, "multiple headers"
version, = struct.unpack("!h", fileobj.read(2))
lgt = ord(fileobj.read(1))
interp_name = fileobj.read(lgt)
if PY3:
interp_name = interp_name.decode()
elif marker == MARKER_STACKTRACE:
count = read_word(fileobj)
# for now
assert count == 1
depth = read_word(fileobj)
assert depth <= 2**16, 'stack strace depth too high'
trace = []
if virtual_ips_only:
fileobj.read(8 * depth)
else:
for j in range(depth):
pc = read_word(fileobj)
if j > 0 and pc > 0:
pc -= 1
trace.append(pc)
if version >= VERSION_THREAD_ID:
thread_id, = struct.unpack('l', fileobj.read(8))
else:
thread_id = 0
profiles.append((trace, 1, thread_id))
elif marker == MARKER_INTERP_NAME:
assert not version, "multiple headers"
assert not interp_name, "Dual interpreter name header"
lgt = ord(fileobj.read(1))
interp_name = fileobj.read(lgt)
if PY3:
interp_name = interp_name.decode()
elif marker == MARKER_VIRTUAL_IP:
unique_id = read_word(fileobj)
name = read_string(fileobj)
all += len(name)
if PY3:
name = name.decode()
virtual_ips.append((unique_id, name))
elif marker == MARKER_TRAILER:
if not virtual_ips_only:
symmap = read_ranges(fileobj.read())
break
else:
assert not marker
symmap = []
break
virtual_ips.sort() # I think it's sorted, but who knows
if virtual_ips_only:
return virtual_ips
return period, profiles, virtual_ips, symmap, interp_name