canic_core/
perf.rs

1//! Cross-cutting performance instrumentation.
2//!
3//! This module provides instruction-count measurement primitives used
4//! across endpoints, ops, timers, and background tasks.
5//!
6//! It is intentionally crate-level infrastructure, not part of the
7//! domain layering (endpoints → ops → model).
8//! Instrumentation modules are layer-neutral and may be used anywhere.
9
10use crate::ids::{EndpointCall, EndpointId};
11use std::{cell::RefCell, collections::HashMap};
12
13thread_local! {
14    /// Last snapshot used by the `perf!` macro.
15    #[cfg(not(test))]
16    pub static PERF_LAST: RefCell<u64> = RefCell::new(perf_counter());
17
18    // Unit tests run outside a canister context, so `perf_counter()` would trap.
19    #[cfg(test)]
20    pub static PERF_LAST: RefCell<u64> = const { RefCell::new(0) };
21
22    /// Aggregated perf counters keyed by kind (endpoint vs timer) and label.
23    static PERF_TABLE: RefCell<HashMap<PerfKey, PerfSlot>> = RefCell::new(HashMap::new());
24
25    /// Stack of active endpoint scopes for exclusive instruction accounting.
26    /// This is independent of `PERF_LAST`, which is only used by `perf!` checkpoints.
27    static PERF_STACK: RefCell<Vec<PerfFrame>> = const { RefCell::new(Vec::new()) };
28}
29
30/// Returns the **call-context instruction counter** for the current execution.
31///
32/// This value is obtained from `ic0.performance_counter(1)` and represents the
33/// total number of WebAssembly instructions executed by *this canister* within
34/// the **current call context**.
35///
36/// Key properties:
37/// - Monotonically increasing for the duration of the call context
38/// - Accumulates across `await` points and resumptions
39/// - Resets only when a new call context begins
40/// - Counts *only* instructions executed by this canister (not other canisters)
41///
42/// This counter is suitable for:
43/// - Endpoint-level performance accounting
44/// - Async workflows and timers
45/// - Regression detection and coarse-grained profiling
46///
47/// It is **not** a measure of cycle cost. Expensive inter-canister operations
48/// (e.g., canister creation) may have low instruction counts here but high cycle
49/// charges elsewhere.
50///
51/// For fine-grained, single-slice profiling (e.g., hot loops), use
52/// `ic0.performance_counter(0)` instead.
53#[must_use]
54pub fn perf_counter() -> u64 {
55    crate::cdk::api::performance_counter(1)
56}
57
58///
59/// PerfKey
60/// Splits perf counters by transport surface so metrics rows remain explicit.
61///
62
63#[derive(Clone, Eq, Hash, Ord, PartialEq, PartialOrd)]
64pub enum PerfKey {
65    Endpoint(String),
66    Timer(String),
67    Checkpoint { scope: String, label: String },
68}
69
70///
71/// PerfFrame
72/// Tracks an active endpoint scope and accumulated child instructions.
73///
74
75struct PerfFrame {
76    start: u64,
77    child_instructions: u64,
78}
79
80///
81/// PerfSlot
82///
83
84#[derive(Default)]
85struct PerfSlot {
86    count: u64,
87    total_instructions: u64,
88}
89
90impl PerfSlot {
91    const fn increment(&mut self, delta: u64) {
92        self.count = self.count.saturating_add(1);
93        self.total_instructions = self.total_instructions.saturating_add(delta);
94    }
95}
96
97///
98/// PerfEntry
99/// Aggregated perf counters keyed by kind (endpoint vs timer) and label.
100///
101
102#[derive(Clone)]
103pub struct PerfEntry {
104    pub key: PerfKey,
105    pub count: u64,
106    pub total_instructions: u64,
107}
108
109/// Record a counter under the provided key.
110pub fn record(key: PerfKey, delta: u64) {
111    PERF_TABLE.with(|table| {
112        let mut table = table.borrow_mut();
113        table.entry(key).or_default().increment(delta);
114    });
115}
116
117pub fn record_endpoint(endpoint: EndpointId, delta_instructions: u64) {
118    record(
119        PerfKey::Endpoint(endpoint.name.to_string()),
120        delta_instructions,
121    );
122}
123
124pub fn record_timer(label: &str, delta_instructions: u64) {
125    record(PerfKey::Timer(label.to_string()), delta_instructions);
126}
127
128pub fn record_checkpoint(scope: &str, label: &str, delta_instructions: u64) {
129    record(
130        PerfKey::Checkpoint {
131            scope: scope.to_string(),
132            label: label.to_string(),
133        },
134        delta_instructions,
135    );
136}
137
138/// Begin an endpoint scope and push it on the stack.
139pub(crate) fn enter_endpoint() {
140    enter_endpoint_at(perf_counter());
141}
142
143/// End the most recent endpoint scope and record exclusive instructions.
144pub(crate) fn exit_endpoint(call: EndpointCall) {
145    exit_endpoint_at(call.endpoint, perf_counter());
146}
147
148fn enter_endpoint_at(start: u64) {
149    PERF_STACK.with(|stack| {
150        let mut stack = stack.borrow_mut();
151
152        // If a previous call trapped, clear any stale frames.
153        if let Some(last) = stack.last()
154            && start < last.start
155        {
156            stack.clear();
157        }
158
159        stack.push(PerfFrame {
160            start,
161            child_instructions: 0,
162        });
163    });
164}
165
166fn exit_endpoint_at(endpoint: EndpointId, end: u64) {
167    PERF_STACK.with(|stack| {
168        let mut stack = stack.borrow_mut();
169        let Some(frame) = stack.pop() else {
170            record_endpoint(endpoint, end);
171            return;
172        };
173
174        let total = end.saturating_sub(frame.start);
175        let exclusive = total.saturating_sub(frame.child_instructions);
176
177        if let Some(parent) = stack.last_mut() {
178            parent.child_instructions = parent.child_instructions.saturating_add(total);
179        }
180
181        record_endpoint(endpoint, exclusive);
182    });
183}
184
185/// Snapshot all recorded perf counters, sorted by key.
186/// Entries are sorted by (kind, label).
187#[must_use]
188pub fn entries() -> Vec<PerfEntry> {
189    PERF_TABLE.with(|table| {
190        let table = table.borrow();
191
192        let mut out: Vec<PerfEntry> = table
193            .iter()
194            .map(|(key, slot)| PerfEntry {
195                key: key.clone(),
196                count: slot.count,
197                total_instructions: slot.total_instructions,
198            })
199            .collect();
200
201        out.sort_by(|a, b| a.key.cmp(&b.key));
202        out
203    })
204}
205
206///
207/// TESTS
208///
209
210#[cfg(test)]
211pub fn reset() {
212    PERF_TABLE.with(|t| t.borrow_mut().clear());
213    PERF_LAST.with(|last| *last.borrow_mut() = 0);
214    PERF_STACK.with(|stack| stack.borrow_mut().clear());
215}
216
217#[cfg(test)]
218mod tests {
219    use super::*;
220
221    fn checkpoint_at(now: u64) {
222        PERF_LAST.with(|last| *last.borrow_mut() = now);
223    }
224
225    fn entry_for(label: &str) -> PerfEntry {
226        entries()
227            .into_iter()
228            .find(|entry| matches!(&entry.key, PerfKey::Endpoint(l) if l == label))
229            .expect("expected perf entry to exist")
230    }
231
232    fn checkpoint_entry_for(scope: &str, label: &str) -> PerfEntry {
233        entries()
234            .into_iter()
235            .find(|entry| {
236                matches!(
237                    &entry.key,
238                    PerfKey::Checkpoint {
239                        scope: entry_scope,
240                        label: entry_label,
241                    } if entry_scope == scope && entry_label == label
242                )
243            })
244            .expect("expected checkpoint perf entry to exist")
245    }
246
247    #[test]
248    fn nested_endpoints_record_exclusive_totals() {
249        reset();
250
251        enter_endpoint_at(100);
252        checkpoint_at(140);
253
254        enter_endpoint_at(200);
255        checkpoint_at(230);
256        exit_endpoint_at(EndpointId::new("child"), 260);
257
258        exit_endpoint_at(EndpointId::new("parent"), 300);
259
260        let parent = entry_for("parent");
261        let child = entry_for("child");
262
263        assert_eq!(child.count, 1);
264        assert_eq!(child.total_instructions, 60);
265        assert_eq!(parent.count, 1);
266        assert_eq!(parent.total_instructions, 140);
267    }
268
269    #[test]
270    fn checkpoints_record_scope_and_label() {
271        reset();
272
273        record_checkpoint("workflow::bootstrap", "load_cfg", 120);
274        record_checkpoint("workflow::bootstrap", "load_cfg", 80);
275
276        let checkpoint = checkpoint_entry_for("workflow::bootstrap", "load_cfg");
277
278        assert_eq!(checkpoint.count, 2);
279        assert_eq!(checkpoint.total_instructions, 200);
280    }
281}
canic_core/perf.rs

canic_core/
perf.rs