Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
ae33ca8
Add support for tracking garbage collection and calls to native calls…
brandtbucher Oct 27, 2025
6427b6e
No need to use atomics here
brandtbucher Oct 30, 2025
3801924
Clean up the diff
brandtbucher Nov 2, 2025
cb433c1
Add docs
brandtbucher Nov 5, 2025
c26de8f
Whitespace
brandtbucher Nov 6, 2025
9ea2512
Whitespace
brandtbucher Nov 6, 2025
c06158b
fixup
brandtbucher Nov 6, 2025
e831b33
blurb add
brandtbucher Nov 6, 2025
269fe68
Separate tests
pablogsal Nov 8, 2025
25e770b
Do not show artificial lines in GC frames or native
pablogsal Nov 8, 2025
103835e
Fix flamegraph
pablogsal Nov 8, 2025
1118285
Rework to share code, also catch calls into native code in top frames
brandtbucher Nov 11, 2025
f3ade0d
Default "native" to False
brandtbucher Nov 12, 2025
a280f97
Fix crashes
brandtbucher Nov 12, 2025
06a7bda
Add native frame tests
brandtbucher Nov 12, 2025
920c9e3
Cleanup
brandtbucher Nov 12, 2025
f2b6607
Fix docs
brandtbucher Nov 12, 2025
bc45007
Loosen requirement for native frames in the middle of the stack
brandtbucher Nov 12, 2025
fd7a209
Fewer loops
brandtbucher Nov 12, 2025
4539b6e
fixup
brandtbucher Nov 12, 2025
6c2e2fb
More time for ASan
brandtbucher Nov 12, 2025
c31c6dc
Less native time
brandtbucher Nov 12, 2025
abf1337
Simplify the test
brandtbucher Nov 12, 2025
0b54df2
Don't detect native frames at the top of the stack
brandtbucher Nov 13, 2025
d23ca10
Merge upstream/main into native-gc-sampling
pablogsal Nov 17, 2025
329f549
Clarify comment
pablogsal Nov 17, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion Doc/library/profile.rst
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,14 @@ Profile with real-time sampling statistics::

Sample all threads in the process instead of just the main thread

.. option:: --native

Include artificial ``<native>`` frames to denote calls to non-Python code.

.. option:: --no-gc

Don't include artificial ``<GC>`` frames to denote active garbage collection.

.. option:: --realtime-stats

Print real-time sampling statistics during profiling
Expand Down Expand Up @@ -349,7 +357,7 @@ This section documents the programmatic interface for the :mod:`!profiling.sampl
For command-line usage, see :ref:`sampling-profiler-cli`. For conceptual information
about statistical profiling, see :ref:`statistical-profiling`

.. function:: sample(pid, *, sort=2, sample_interval_usec=100, duration_sec=10, filename=None, all_threads=False, limit=None, show_summary=True, output_format="pstats", realtime_stats=False)
.. function:: sample(pid, *, sort=2, sample_interval_usec=100, duration_sec=10, filename=None, all_threads=False, limit=None, show_summary=True, output_format="pstats", realtime_stats=False, native=False, gc=True)

Sample a Python process and generate profiling data.

Expand All @@ -367,6 +375,8 @@ about statistical profiling, see :ref:`statistical-profiling`
:param bool show_summary: Whether to show summary statistics (default: True)
:param str output_format: Output format - 'pstats' or 'collapsed' (default: 'pstats')
:param bool realtime_stats: Whether to display real-time statistics (default: False)
:param bool native: Whether to include ``<native>`` frames (default: False)
:param bool gc: Whether to include ``<GC>`` frames (default: True)

:raises ValueError: If output_format is not 'pstats' or 'collapsed'

Expand Down
2 changes: 2 additions & 0 deletions Include/internal/pycore_debug_offsets.h
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,7 @@ typedef struct _Py_DebugOffsets {
struct _gc {
uint64_t size;
uint64_t collecting;
uint64_t frame;
} gc;

// Generator object offset;
Expand Down Expand Up @@ -355,6 +356,7 @@ typedef struct _Py_DebugOffsets {
.gc = { \
.size = sizeof(struct _gc_runtime_state), \
.collecting = offsetof(struct _gc_runtime_state, collecting), \
.frame = offsetof(struct _gc_runtime_state, frame), \
}, \
.gen_object = { \
.size = sizeof(PyGenObject), \
Expand Down
4 changes: 4 additions & 0 deletions Include/internal/pycore_global_objects_fini_generated.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions Include/internal/pycore_global_strings.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,12 @@ struct _Py_global_strings {
STRUCT_FOR_STR(dot_locals, ".<locals>")
STRUCT_FOR_STR(empty, "")
STRUCT_FOR_STR(format, ".format")
STRUCT_FOR_STR(gc, "<GC>")
STRUCT_FOR_STR(generic_base, ".generic_base")
STRUCT_FOR_STR(json_decoder, "json.decoder")
STRUCT_FOR_STR(kwdefaults, ".kwdefaults")
STRUCT_FOR_STR(list_err, "list index out of range")
STRUCT_FOR_STR(native, "<native>")
STRUCT_FOR_STR(str_replace_inf, "1e309")
STRUCT_FOR_STR(type_params, ".type_params")
STRUCT_FOR_STR(utf_8, "utf-8")
Expand Down Expand Up @@ -486,6 +488,7 @@ struct _Py_global_strings {
STRUCT_FOR_ID(fullerror)
STRUCT_FOR_ID(func)
STRUCT_FOR_ID(future)
STRUCT_FOR_ID(gc)
STRUCT_FOR_ID(generation)
STRUCT_FOR_ID(get)
STRUCT_FOR_ID(get_debug)
Expand Down Expand Up @@ -629,6 +632,7 @@ struct _Py_global_strings {
STRUCT_FOR_ID(name_from)
STRUCT_FOR_ID(namespace_separator)
STRUCT_FOR_ID(namespaces)
STRUCT_FOR_ID(native)
STRUCT_FOR_ID(ndigits)
STRUCT_FOR_ID(nested)
STRUCT_FOR_ID(new_file_name)
Expand Down
3 changes: 3 additions & 0 deletions Include/internal/pycore_interp_structs.h
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,9 @@ struct _gc_runtime_state {
struct gc_generation_stats generation_stats[NUM_GENERATIONS];
/* true if we are currently running the collector */
int collecting;
// The frame that started the current collection. It might be NULL even when
// collecting (if no Python frame is running):
_PyInterpreterFrame *frame;
/* list of uncollectable objects */
PyObject *garbage;
/* a list of callbacks to be invoked when collection is performed */
Expand Down
1 change: 0 additions & 1 deletion Include/internal/pycore_interpframe_structs.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ enum _frameowner {
FRAME_OWNED_BY_GENERATOR = 1,
FRAME_OWNED_BY_FRAME_OBJECT = 2,
FRAME_OWNED_BY_INTERPRETER = 3,
FRAME_OWNED_BY_CSTACK = 4,
};

struct _PyInterpreterFrame {
Expand Down
4 changes: 4 additions & 0 deletions Include/internal/pycore_runtime_init_generated.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 16 additions & 0 deletions Include/internal/pycore_unicodeobject_generated.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

28 changes: 21 additions & 7 deletions Lib/profiling/sampling/flamegraph.js
Original file line number Diff line number Diff line change
Expand Up @@ -151,17 +151,22 @@ function createPythonTooltip(data) {
const funcname = resolveString(d.data.funcname) || resolveString(d.data.name);
const filename = resolveString(d.data.filename) || "";

// Don't show file location for special frames like <GC> and <native>
const isSpecialFrame = filename === "~";
const fileLocationHTML = isSpecialFrame ? "" : `
<div style="color: #5a6c7d; font-size: 13px; margin-bottom: 12px;
font-family: monospace; background: #f8f9fa;
padding: 4px 8px; border-radius: 4px; word-break: break-all; overflow-wrap: break-word;">
${filename}${d.data.lineno ? ":" + d.data.lineno : ""}
</div>`;

const tooltipHTML = `
<div>
<div style="color: #3776ab; font-weight: 600; font-size: 16px;
margin-bottom: 8px; line-height: 1.3; word-break: break-word; overflow-wrap: break-word;">
${funcname}
</div>
<div style="color: #5a6c7d; font-size: 13px; margin-bottom: 12px;
font-family: monospace; background: #f8f9fa;
padding: 4px 8px; border-radius: 4px; word-break: break-all; overflow-wrap: break-word;">
${filename}${d.data.lineno ? ":" + d.data.lineno : ""}
</div>
${fileLocationHTML}
<div style="display: grid; grid-template-columns: auto 1fr;
gap: 8px 16px; font-size: 14px;">
<span style="color: #5a6c7d; font-weight: 500;">Execution Time:</span>
Expand Down Expand Up @@ -474,14 +479,23 @@ function populateStats(data) {
if (i < hotSpots.length && hotSpots[i]) {
const hotspot = hotSpots[i];
const filename = hotspot.filename || 'unknown';
const basename = filename !== 'unknown' ? filename.split('/').pop() : 'unknown';
const lineno = hotspot.lineno ?? '?';
let funcDisplay = hotspot.funcname || 'unknown';
if (funcDisplay.length > 35) {
funcDisplay = funcDisplay.substring(0, 32) + '...';
}

document.getElementById(`hotspot-file-${num}`).textContent = `${basename}:${lineno}`;
// Don't show file:line for special frames like <GC> and <native>
const isSpecialFrame = filename === '~' && (lineno === 0 || lineno === '?');
let fileDisplay;
if (isSpecialFrame) {
fileDisplay = '--';
} else {
const basename = filename !== 'unknown' ? filename.split('/').pop() : 'unknown';
fileDisplay = `${basename}:${lineno}`;
}

document.getElementById(`hotspot-file-${num}`).textContent = fileDisplay;
document.getElementById(`hotspot-func-${num}`).textContent = funcDisplay;
document.getElementById(`hotspot-detail-${num}`).textContent = `${hotspot.directPercent.toFixed(1)}% samples (${hotspot.directSamples.toLocaleString()})`;
} else {
Expand Down
26 changes: 21 additions & 5 deletions Lib/profiling/sampling/sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,19 +137,19 @@ def _run_with_sync(original_cmd):


class SampleProfiler:
def __init__(self, pid, sample_interval_usec, all_threads, *, mode=PROFILING_MODE_WALL, skip_non_matching_threads=True):
def __init__(self, pid, sample_interval_usec, all_threads, *, mode=PROFILING_MODE_WALL, native=False, gc=True, skip_non_matching_threads=True):
self.pid = pid
self.sample_interval_usec = sample_interval_usec
self.all_threads = all_threads
if _FREE_THREADED_BUILD:
self.unwinder = _remote_debugging.RemoteUnwinder(
self.pid, all_threads=self.all_threads, mode=mode,
self.pid, all_threads=self.all_threads, mode=mode, native=native, gc=gc,
skip_non_matching_threads=skip_non_matching_threads
)
else:
only_active_threads = bool(self.all_threads)
self.unwinder = _remote_debugging.RemoteUnwinder(
self.pid, only_active_thread=only_active_threads, mode=mode,
self.pid, only_active_thread=only_active_threads, mode=mode, native=native, gc=gc,
skip_non_matching_threads=skip_non_matching_threads
)
# Track sample intervals and total sample count
Expand Down Expand Up @@ -616,6 +616,8 @@ def sample(
output_format="pstats",
realtime_stats=False,
mode=PROFILING_MODE_WALL,
native=False,
gc=True,
):
# PROFILING_MODE_ALL implies no skipping at all
if mode == PROFILING_MODE_ALL:
Expand All @@ -627,7 +629,7 @@ def sample(
skip_idle = mode != PROFILING_MODE_WALL

profiler = SampleProfiler(
pid, sample_interval_usec, all_threads=all_threads, mode=mode,
pid, sample_interval_usec, all_threads=all_threads, mode=mode, native=native, gc=gc,
skip_non_matching_threads=skip_non_matching_threads
)
profiler.realtime_stats = realtime_stats
Expand Down Expand Up @@ -717,6 +719,8 @@ def wait_for_process_and_sample(pid, sort_value, args):
output_format=args.format,
realtime_stats=args.realtime_stats,
mode=mode,
native=args.native,
gc=args.gc,
)


Expand Down Expand Up @@ -767,9 +771,19 @@ def main():
sampling_group.add_argument(
"--realtime-stats",
action="store_true",
default=False,
help="Print real-time sampling statistics (Hz, mean, min, max, stdev) during profiling",
)
sampling_group.add_argument(
"--native",
action="store_true",
help="Include artificial \"<native>\" frames to denote calls to non-Python code.",
)
sampling_group.add_argument(
"--no-gc",
action="store_false",
dest="gc",
help="Don't include artificial \"<GC>\" frames to denote active garbage collection.",
)

# Mode options
mode_group = parser.add_argument_group("Mode options")
Expand Down Expand Up @@ -934,6 +948,8 @@ def main():
output_format=args.format,
realtime_stats=args.realtime_stats,
mode=mode,
native=args.native,
gc=args.gc,
)
elif args.module or args.args:
if args.module:
Expand Down
18 changes: 14 additions & 4 deletions Lib/profiling/sampling/stack_collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,16 @@ def process_frames(self, frames, thread_id):
def export(self, filename):
lines = []
for (call_tree, thread_id), count in self.stack_counter.items():
stack_str = ";".join(
f"{os.path.basename(f[0])}:{f[2]}:{f[1]}" for f in call_tree
)
lines.append((f"tid:{thread_id};{stack_str}", count))
parts = [f"tid:{thread_id}"]
for file, line, func in call_tree:
# This is what pstats does for "special" frames:
if file == "~" and line == 0:
part = func
else:
part = f"{os.path.basename(file)}:{func}:{line}"
parts.append(part)
stack_str = ";".join(parts)
lines.append((stack_str, count))

lines.sort(key=lambda x: (-x[1], x[0]))

Expand Down Expand Up @@ -98,6 +104,10 @@ def export(self, filename):
def _format_function_name(func):
filename, lineno, funcname = func

# Special frames like <GC> and <native> should not show file:line
if filename == "~" and lineno == 0:
return funcname

if len(filename) > 50:
parts = filename.split("/")
if len(parts) > 2:
Expand Down
2 changes: 2 additions & 0 deletions Lib/test/test_external_inspection.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,8 @@ def foo():
FrameInfo([script_name, 12, "baz"]),
FrameInfo([script_name, 9, "bar"]),
FrameInfo([threading.__file__, ANY, "Thread.run"]),
FrameInfo([threading.__file__, ANY, "Thread._bootstrap_inner"]),
FrameInfo([threading.__file__, ANY, "Thread._bootstrap"]),
]
# Is possible that there are more threads, so we check that the
# expected stack traces are in the result (looking at you Windows!)
Expand Down
Loading
Loading