From 0c6affdf7cf3150e70aabd4845ad7c92365d49d6 Mon Sep 17 00:00:00 2001 From: not-matthias Date: Wed, 27 May 2026 13:00:19 +0200 Subject: [PATCH 1/4] test(callgrind): C reproducer for cascading underflow obj-skip leak MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Triggers the call-stack-underflow leak channel observed in the Python case (28 underflow events / run, almost all libpython interpreter frames). Mechanism: - Lib runs recursive skipme_recurse(N) with instrumentation OFF, so callgrind never sees the calls and its csp stays at 0. - At the deepest frame (n==0), CALLGRIND_START_INSTRUMENTATION fires. - Each RET on the way back hits csp == 0, triggers handleUnderflow, resets cxt to 0, and force-pushes the fn we're returning into. - Because that fn is in the skipped lib, it leaks as a top-level fn= block in the dump — N times for an N-deep recursion. With depth=5 the diagnostic logs show 1 (cxt==0) push + 6 underflow resets (5x skipme_recurse + 1x skipme_run), and the .out has fn=skipme_run and fn=skipme_recurse as top-level blocks. --- callgrind/tests/runtime_obj_skip_underflow.c | 22 +++++++++++ .../tests/runtime_obj_skip_underflow_lib.c | 37 +++++++++++++++++++ 2 files changed, 59 insertions(+) create mode 100644 callgrind/tests/runtime_obj_skip_underflow.c create mode 100644 callgrind/tests/runtime_obj_skip_underflow_lib.c diff --git a/callgrind/tests/runtime_obj_skip_underflow.c b/callgrind/tests/runtime_obj_skip_underflow.c new file mode 100644 index 000000000..ffc1e6a6f --- /dev/null +++ b/callgrind/tests/runtime_obj_skip_underflow.c @@ -0,0 +1,22 @@ +/* Driver for the underflow-channel obj-skip leak reproducer. */ + +#define _GNU_SOURCE +#include +#include +#include "../callgrind.h" + +extern void skipme_run(int depth); + +int main(void) +{ + Dl_info info; + if (dladdr((void*)skipme_run, &info) == 0 || !info.dli_fname) { + fprintf(stderr, "dladdr failed\n"); + return 1; + } + CALLGRIND_ADD_OBJ_SKIP(info.dli_fname); + + skipme_run(5); + + return 0; +} diff --git a/callgrind/tests/runtime_obj_skip_underflow_lib.c b/callgrind/tests/runtime_obj_skip_underflow_lib.c new file mode 100644 index 000000000..abaf58cc6 --- /dev/null +++ b/callgrind/tests/runtime_obj_skip_underflow_lib.c @@ -0,0 +1,37 @@ +/* Library that triggers the call-stack-underflow leak channel in + * callgrind obj-skip. + * + * Setup: recursive function in the skipped lib. Main calls in with + * instrumentation OFF, so callgrind's call stack is never populated. + * At the deepest frame, instrumentation is flipped ON. Each RET on + * the way back then sees csp == 0, hits handleUnderflow, resets + * cxt = 0, and force-pushes the current fn (which lives in the + * skipped lib) as the new top context — leaking N times for an + * N-deep stack. + * + * This is the same shape as Python 3.14's interpreter dispatch + * leaks: deep recursive eval-loop frames where instrumentation was + * started somewhere down the stack and every return pops past an + * empty callgrind stack. */ + +#include "../callgrind.h" + +volatile long sink; + +__attribute__((noinline)) +void skipme_recurse(int n) +{ + if (n == 0) { + CALLGRIND_START_INSTRUMENTATION; + return; + } + skipme_recurse(n - 1); + sink += n; +} + +__attribute__((noinline)) +void skipme_run(int depth) +{ + skipme_recurse(depth); + CALLGRIND_STOP_INSTRUMENTATION; +} From 12af824568d0f512f90888d8548c26511962fdcd Mon Sep 17 00:00:00 2001 From: not-matthias Date: Wed, 27 May 2026 13:14:50 +0200 Subject: [PATCH 2/4] fix(callgrind): aggregate (cxt==0) and underflow leaks under a sentinel cxt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When setup_bbcc's (cxt==0) clause or handleUnderflow would force-push a skipped fn into the current context, push a synthetic (skipped) fn instead. The skipped fn keeps its costs (routed normally through the sentinel cxt) but never surfaces as its own fn= block — the dump shows a single ob=??? fl=(callgrind-internal) fn=(skipped) block aggregating all leaked frames. The sentinel itself has skip=False so the (cxt==0 && skip) substitution doesn't recurse on it. Created lazily on first need via a singleton in fn.c, attached to the anonymous '???' obj. Verified against both C reproducers (runtime_obj_skip_c and runtime_obj_skip_underflow): no skipme_* fn= blocks appear, totals are preserved. Verified against a non-skipped-attribution test that main / do_main_work still emit normally; the sentinel only engages on the leak paths. --- callgrind/bbcc.c | 14 +++++++++++++- callgrind/fn.c | 21 +++++++++++++++++++++ callgrind/global.h | 1 + 3 files changed, 35 insertions(+), 1 deletion(-) diff --git a/callgrind/bbcc.c b/callgrind/bbcc.c index 36b2300e1..84a4c1670 100644 --- a/callgrind/bbcc.c +++ b/callgrind/bbcc.c @@ -513,6 +513,13 @@ static void handleUnderflow(BB* bb) CLG_(current_fn_stack).top--; CLG_(current_state).cxt = 0; caller = CLG_(get_fn_node)(bb); + + /* A (sentinel): if the fn we'd return into is itself skipped, push + * the (skipped) sentinel instead so the skipped fn doesn't surface + * as its own fn= block in the dump. */ + if (caller->skip) + caller = CLG_(get_skipped_sentinel)(); + CLG_(push_cxt)( caller ); if (!seen_before) { @@ -796,7 +803,12 @@ void CLG_(setup_bbcc)(BB* bb) /* Change new context if needed, taking delayed_push into account */ if ((delayed_push && !skip) || (CLG_(current_state).cxt == 0)) { - CLG_(push_cxt)(CLG_(get_fn_node)(bb)); + fn_node* push_fn = CLG_(get_fn_node)(bb); + /* A (sentinel): substitute the (skipped) sentinel so the + * skipped fn doesn't appear as its own fn= block in the dump. */ + if (skip && CLG_(current_state).cxt == 0) + push_fn = CLG_(get_skipped_sentinel)(); + CLG_(push_cxt)(push_fn); } CLG_ASSERT(CLG_(current_fn_stack).top > CLG_(current_fn_stack).bottom); diff --git a/callgrind/fn.c b/callgrind/fn.c index efa5430de..2525063d8 100644 --- a/callgrind/fn.c +++ b/callgrind/fn.c @@ -307,6 +307,27 @@ void CLG_(init_obj_table)(void) obj_table[i] = 0; } +static fn_node* new_fn_node(const HChar *fnname, + file_node* file, fn_node* next); + +/* Singleton sentinel fn_node used as a placeholder cxt when we'd + * otherwise be forced to push a skipped fn into an empty (cxt == 0) + * context. Keeping skip == False on the sentinel itself is crucial: + * the (cxt == 0 && skip) check that would push it must NOT recurse + * on the sentinel. */ +static fn_node* skipped_sentinel = NULL; + +fn_node* CLG_(get_skipped_sentinel)(void) +{ + if (skipped_sentinel) return skipped_sentinel; + + obj_node* obj = CLG_(get_obj_node)(NULL); /* anonymous "???" obj */ + file_node* file = CLG_(get_file_node)(obj, "", "(callgrind-internal)"); + skipped_sentinel = new_fn_node("(skipped)", file, NULL); + skipped_sentinel->skip = False; + return skipped_sentinel; +} + #define HASH_CONSTANT 256 static UInt str_hash(const HChar *s, UInt table_size) diff --git a/callgrind/global.h b/callgrind/global.h index c2fda1cce..730e665c9 100644 --- a/callgrind/global.h +++ b/callgrind/global.h @@ -723,6 +723,7 @@ void CLG_(set_current_fn_array)(fn_array*); UInt* CLG_(get_fn_entry)(Int n); void CLG_(init_obj_table)(void); +fn_node* CLG_(get_skipped_sentinel)(void); obj_node* CLG_(get_obj_node)(DebugInfo* si); file_node* CLG_(get_file_node)(obj_node*, const HChar *dirname, const HChar* filename); From 95f57dbf89681f23108d008a1ba1b22fdbf1aa31 Mon Sep 17 00:00:00 2001 From: not-matthias Date: Wed, 27 May 2026 12:28:40 +0200 Subject: [PATCH 3/4] fix(callgrind): check obj-skip on every BB entry, not only jk_Call The obj-skip check was gated on jmpkind == jk_Call. When a function in a skipped object was entered via jk_Jump or fall-through (interpreter dispatch, tail calls, perf trampoline, JIT), the skip flag never latched and the function leaked into the dump as its own fn= block. Also instrument the cxt==0 forced push_cxt path with a diagnostic line so we can measure the residual leak when a skipped fn is forced into a top-level context after an instrumentation start or call-stack underflow. --- callgrind/bbcc.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/callgrind/bbcc.c b/callgrind/bbcc.c index 84a4c1670..737f129b8 100644 --- a/callgrind/bbcc.c +++ b/callgrind/bbcc.c @@ -732,7 +732,11 @@ void CLG_(setup_bbcc)(BB* bb) } } - if (jmpkind == jk_Call) { + /* Check obj-skip on every BB entry, not only jk_Call. + * The interpreter / perf trampoline can enter functions via jk_Jump + * or fall-through; if we only checked on jk_Call, skip would never + * latch for those fns and they'd leak into the dump. */ + { fn_node* node = CLG_(get_fn_node)(bb); skip = node->skip; if (!skip && !node->obj_skip_checked){ @@ -801,7 +805,15 @@ void CLG_(setup_bbcc)(BB* bb) } } - /* Change new context if needed, taking delayed_push into account */ + /* Change new context if needed, taking delayed_push into account. + * + * The `cxt == 0` clause used to fire regardless of skip, which meant + * that on the first BB after instrumentation start / call-stack + * underflow, a skipped libpython fn would still be pushed as the new + * top context and appear as its own fn= block in the dump. + * + * Now: if the fn is skip, we substitute the skipped sentinel so the + * skipped fn doesn't appear as its own fn= block in the dump. */ if ((delayed_push && !skip) || (CLG_(current_state).cxt == 0)) { fn_node* push_fn = CLG_(get_fn_node)(bb); /* A (sentinel): substitute the (skipped) sentinel so the From cd1d3d4c8283f7d2dec7279c6d654fa4e3272524 Mon Sep 17 00:00:00 2001 From: not-matthias Date: Wed, 27 May 2026 12:39:49 +0200 Subject: [PATCH 4/4] fix(callgrind): drop BBCCs whose top context fn is skip-flagged MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the (cxt == 0) clause in setup_bbcc force-pushes a skipped fn — e.g. the first BB after CALLGRIND_START_INSTRUMENTATION lives in a skipped object — the BBCC ends up with cxt->fn[0]->skip == True. Without filtering, those BBCCs emit a top-level fn= block and the skipped fn leaks into the dump. Filter them out at dump time in print_bbccs_of_thread, right before print_fn_pos would emit the ob=/fl=/fn= header. The call edges from non-skipped callers into skipped fns (cfn=) are unaffected because they're emitted from the caller's BBCC, whose context is not skipped. Also broadens the runtime_obj_skip_c post-check to grep for any fn=skipme*, since the actual leaked fn in the repro is skipme_run (the one calling START_INSTRUMENTATION), not skipme_func. --- callgrind/dump.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/callgrind/dump.c b/callgrind/dump.c index 3a3164c4b..4fee29af2 100644 --- a/callgrind/dump.c +++ b/callgrind/dump.c @@ -1553,7 +1553,17 @@ static void print_bbccs_of_thread(thread_info* ti) } if (*p == 0) break; - + + /* Don't emit BBCCs whose top context fn is flagged for obj-skip. + * This happens when the (cxt == 0) clause in setup_bbcc force- + * pushes a skipped fn (first BB after instrumentation start that + * landed in a skipped object). Without this filter the skipped fn + * leaks into the dump as a top-level fn= block. */ + if ((*p)->cxt->fn[0]->skip) { + p++; + continue; + } + if (print_fn_pos(print_fp, &lastFnPos, *p)) { /* new function */