Skip to main content

piano_runtime/
guard.rs

1//! Sync function instrumentation -- RAII sentinel.
2//!
3//! Guard is created when entering a profiled function and dropped when
4//! exiting. On drop, it computes self-time via the TLS children-time
5//! accumulator and aggregates into the per-thread FnAgg vec.
6//!
7//! Invariants:
8//! - Guard is !Send. Alloc deltas are computed on the same thread as
9//!   creation. Enforcement: PhantomData<*const ()>.
10//! - Profiler bookkeeping allocs are excluded from user counts.
11//!   Enforcement: ReentrancyGuard (RAII) wraps creation and drop.
12//! - Guard never panics. All arithmetic uses saturating_sub.
13//! - TLS children_ns is saved/restored on create/drop (RAII stack discipline).
14
15use core::sync::atomic::{compiler_fence, Ordering};
16
17use crate::aggregator;
18use crate::alloc::{snapshot_alloc_counters, ReentrancyGuard};
19use crate::children;
20use crate::cpu_clock::cpu_now_ns;
21use crate::session::ProfileSession;
22use crate::time::read;
23use std::marker::PhantomData;
24
25/// RAII sentinel for sync function instrumentation.
26///
27/// Created by `piano_runtime::enter(name_id)`. Dropped at function exit.
28/// On drop: end timestamp, self-time computation, aggregate.
29///
30/// !Send because alloc counters are per-thread TLS.
31pub struct Guard {
32    /// None = inactive (profiling not initialized). Drop is a no-op.
33    session: Option<&'static ProfileSession>,
34    saved_children_ns: u64,
35    name_id: u32,
36    cpu_time_enabled: bool,
37    cpu_start_ns: u64,
38    start_ns: u64,
39    alloc_count_start: u64,
40    alloc_bytes_start: u64,
41    free_count_start: u64,
42    free_bytes_start: u64,
43    _not_send: PhantomData<*const ()>,
44}
45
46/// Enter a profiled function. Returns a Guard that aggregates on drop.
47///
48/// Reads profiling context from &'static ProfileSession.
49/// No function parameters needed. No closure captures created.
50///
51/// If profiling is not active (ProfileSession not initialized), returns an
52/// inactive Guard whose drop is a no-op.
53#[inline(always)]
54pub fn enter(name_id: u32) -> Guard {
55    let session = match ProfileSession::get() {
56        Some(s) => s,
57        None => return Guard::inactive(),
58    };
59    let mut guard = Guard::create(session, name_id);
60    guard.stamp();
61    guard
62}
63
64impl Guard {
65    /// Inactive guard. Drop is a no-op.
66    fn inactive() -> Self {
67        Self {
68            session: None,
69            saved_children_ns: 0,
70            name_id: 0,
71            cpu_time_enabled: false,
72            cpu_start_ns: 0,
73            start_ns: 0,
74            alloc_count_start: 0,
75            alloc_bytes_start: 0,
76            free_count_start: 0,
77            free_bytes_start: 0,
78            _not_send: PhantomData,
79        }
80    }
81
82    /// Create a guard with all bookkeeping done but start_ns = 0.
83    /// Caller must call stamp() after the struct is materialized.
84    ///
85    /// NOT inlined: keeps the heavy bookkeeping (TLS, alloc snapshot)
86    /// out of the caller. Only stamp() (one TSC read) is inlined.
87    #[inline(never)]
88    fn create(session: &'static ProfileSession, name_id: u32) -> Self {
89        let _reentrancy = ReentrancyGuard::enter();
90        let saved_children_ns = children::save_and_zero();
91        let snap = snapshot_alloc_counters();
92        let cpu_start_ns = if session.cpu_time_enabled {
93            cpu_now_ns()
94        } else {
95            0
96        };
97        drop(_reentrancy);
98
99        Self {
100            session: Some(session),
101            saved_children_ns,
102            name_id,
103            cpu_time_enabled: session.cpu_time_enabled,
104            cpu_start_ns,
105            start_ns: 0,
106            alloc_count_start: snap.alloc_count,
107            alloc_bytes_start: snap.alloc_bytes,
108            free_count_start: snap.free_count,
109            free_bytes_start: snap.free_bytes,
110            _not_send: PhantomData,
111        }
112    }
113
114    /// Write the start timestamp. Called after the struct is materialized.
115    #[inline(always)]
116    pub fn stamp(&mut self) {
117        compiler_fence(Ordering::SeqCst);
118        self.start_ns = read();
119    }
120}
121
122impl Drop for Guard {
123    #[inline(always)]
124    fn drop(&mut self) {
125        let end_ticks = read();
126        compiler_fence(Ordering::SeqCst);
127
128        let session = match self.session {
129            Some(s) => s,
130            None => return,
131        };
132        let _reentrancy = ReentrancyGuard::enter();
133        let cpu_end_ns = if self.cpu_time_enabled {
134            cpu_now_ns()
135        } else {
136            0
137        };
138        let snap_end = snapshot_alloc_counters();
139
140        let start_ns = session.calibration.now_ns(self.start_ns);
141        let end_ns = session.calibration.now_ns(end_ticks);
142        let inclusive_ns = end_ns.saturating_sub(start_ns);
143
144        let my_children_ns = children::current_children_ns();
145        let self_ns = inclusive_ns.saturating_sub(my_children_ns);
146        let cpu_self_ns = cpu_end_ns.saturating_sub(self.cpu_start_ns);
147
148        aggregator::aggregate(
149            self.name_id,
150            self_ns,
151            inclusive_ns,
152            cpu_self_ns,
153            snap_end.alloc_count.saturating_sub(self.alloc_count_start),
154            snap_end.alloc_bytes.saturating_sub(self.alloc_bytes_start),
155            snap_end.free_count.saturating_sub(self.free_count_start),
156            snap_end.free_bytes.saturating_sub(self.free_bytes_start),
157            &session.agg_registry,
158        );
159
160        // Report inclusive time to parent scope.
161        children::restore_and_report(self.saved_children_ns, inclusive_ns);
162    }
163}