forked from bloomberg/pystack
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathprocess.py
More file actions
180 lines (150 loc) Β· 5.79 KB
/
process.py
File metadata and controls
180 lines (150 loc) Β· 5.79 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
import gzip
import logging
import pathlib
import re
import subprocess
import tempfile
from typing import Optional
from typing import Tuple
from .errors import InvalidPythonProcess
from .maps import MemoryMapInformation
from .maps import VirtualMap
VERSION_REGEXP = re.compile(r"Python (?P<major>\d+)\.(?P<minor>\d+).*", re.IGNORECASE)
BINARY_REGEXP = re.compile(r"python(?P<major>\d+)\.(?P<minor>\d+).*", re.IGNORECASE)
LIBPYTHON_REGEXP = re.compile(
r".*libpython(?P<major>\d+)\.(?P<minor>\d+).*", re.IGNORECASE
)
# Strings like "3.8.10 (default, May 26 2023, 14:05:08)"
# or "2.7.18rc1 (v2.7.18rc1:8d21aa21f2, Apr 20 2020, 13:19:08)"
# or "3.13.0+ experimental free-threading build (Python)"
BSS_VERSION_REGEXP = re.compile(
rb"((2|3)\.(\d+)\.(\d{1,2}))((a|b|c|rc)\d{1,2})?\+?"
rb"(?: experimental free-threading build)? (\(.{1,64}\))"
)
LOGGER = logging.getLogger(__file__)
def scan_process_bss_for_python_version(
pid: int, bss: VirtualMap
) -> Optional[Tuple[int, int]]:
# Lazy import _pystack to overcome a circular-import
# (we really don't want a new extension just for this) :(
try:
from pystack._pystack import copy_memory_from_address
except ImportError: # pragma: no cover
return None
memory = copy_memory_from_address(pid, bss.start, bss.size)
match = BSS_VERSION_REGEXP.findall(memory)
if not match:
return None
((_, major, minor, patch, *_),) = match
return int(major), int(minor)
def scan_core_bss_for_python_version(
corefile: pathlib.Path, bss: VirtualMap
) -> Optional[Tuple[int, int]]:
with open(corefile, "rb") as the_corefile:
the_corefile.seek(bss.offset)
data = the_corefile.read(bss.size)
match = next(BSS_VERSION_REGEXP.finditer(data), None)
if not match:
return None
_, major, minor, patch, *_ = match.groups()
return int(major), int(minor)
def _get_python_version_from_map_information(
mapinfo: MemoryMapInformation,
) -> Tuple[int, int]:
match = None
assert mapinfo.python.path is not None
if mapinfo.libpython:
assert mapinfo.libpython.path is not None
LOGGER.info(
"Trying to extract version from filename: %s", mapinfo.libpython.path.name
)
match = LIBPYTHON_REGEXP.match(mapinfo.libpython.path.name)
else:
LOGGER.info(
"Trying to extract version from filename: %s", mapinfo.python.path.name
)
match = BINARY_REGEXP.match(mapinfo.python.path.name)
if match is None:
LOGGER.info(
"Could not find version by looking at library or binary path: "
"Trying to get it from running python --version"
)
output = subprocess.check_output(
[mapinfo.python.path, "--version"], text=True, stderr=subprocess.STDOUT
)
match = VERSION_REGEXP.match(output)
if not match:
raise InvalidPythonProcess(
f"Could not determine python version from {mapinfo.python.path}"
)
major = match.group("major")
minor = match.group("minor")
LOGGER.info("Python version determined: %s.%s", major, minor)
return int(major), int(minor)
def get_python_version_for_process(
pid: int, mapinfo: MemoryMapInformation
) -> Tuple[int, int]:
if mapinfo.bss is not None:
version_from_bss = scan_process_bss_for_python_version(pid, mapinfo.bss)
if version_from_bss is not None:
LOGGER.info(
"Version found by scanning the bss section: %d.%d", *version_from_bss
)
return version_from_bss
return _get_python_version_from_map_information(mapinfo)
def get_python_version_for_core(
corefile: pathlib.Path, executable: pathlib.Path, mapinfo: MemoryMapInformation
) -> Tuple[int, int]:
if mapinfo.bss is not None:
version_from_bss = scan_core_bss_for_python_version(corefile, mapinfo.bss)
if version_from_bss is not None:
LOGGER.info(
"Version found by scanning the bss section: %d.%d", *version_from_bss
)
return version_from_bss
return _get_python_version_from_map_information(mapinfo)
def is_elf(filename: pathlib.Path) -> bool:
"Return True if the given file is an ELF file"
try:
elf_header = b"\x7fELF"
with open(filename, "br") as thefile:
return thefile.read(4) == elf_header
except OSError:
return False
def get_thread_name(pid: int, tid: int) -> Optional[str]:
try:
with open(f"/proc/{pid}/task/{tid}/comm") as comm:
return comm.read().strip()
except OSError:
return None
def is_gzip(filename: pathlib.Path) -> bool:
"""
Checks if the given file is a Gzip file based on the header.
Args:
filename (pathlib.Path): The path to the file to be checked.
Returns:
bool: True if the file starts with the Gzip header, False otherwise.
"""
gzip_header = b"\x1f\x8b"
with open(filename, "rb") as thefile:
return thefile.read(2) == gzip_header
def decompress_gzip(
filename: pathlib.Path, chunk_size: int = 4 * 1024 * 1024
) -> pathlib.Path:
"""Decompresses a Gzip file and writes the contents to a temporary file.
Args:
filename: The path to the gzip file to decompress.
chunk_size: Size of chunks to read and write at a time; defaults to 4MB.
Returns:
The path to the temporary file containing the decompressed data.
Raises:
gzip.BadGzipFile: If the file is not a valid gzip file.
"""
with tempfile.NamedTemporaryFile(delete=False) as temp_file:
with gzip.open(filename, "rb") as file_handle:
while True:
chunk = file_handle.read(chunk_size)
if not chunk:
break
temp_file.write(chunk)
return pathlib.Path(temp_file.name)