canic_core/
perf.rs

1//! Cross-cutting performance instrumentation.
2//!
3//! This module provides instruction-count measurement primitives used
4//! across endpoints, ops, timers, and background tasks.
5//!
6//! It is intentionally crate-level infrastructure, not part of the
7//! domain layering (endpoints → ops → model).
8//! Instrumentation modules are layer-neutral and may be used anywhere.
9
10use crate::ids::{EndpointCall, EndpointId};
11use std::{cell::RefCell, collections::HashMap};
12
13thread_local! {
14    /// Last snapshot used by the `perf!` macro.
15    #[cfg(not(test))]
16    #[allow(clippy::missing_const_for_thread_local)]
17    pub static PERF_LAST: RefCell<u64> = RefCell::new(perf_counter());
18
19    // Unit tests run outside a canister context, so `perf_counter()` would trap.
20    #[cfg(test)]
21    pub static PERF_LAST: RefCell<u64> = const { RefCell::new(0) };
22
23    /// Aggregated perf counters keyed by kind (endpoint vs timer) and label.
24    static PERF_TABLE: RefCell<HashMap<PerfKey, PerfSlot>> = RefCell::new(HashMap::new());
25
26    /// Stack of active endpoint scopes for exclusive instruction accounting.
27    /// This is independent of `PERF_LAST`, which is only used by `perf!` checkpoints.
28    static PERF_STACK: RefCell<Vec<PerfFrame>> = const { RefCell::new(Vec::new()) };
29}
30
31/// Returns the **call-context instruction counter** for the current execution.
32///
33/// This value is obtained from `ic0.performance_counter(1)` and represents the
34/// total number of WebAssembly instructions executed by *this canister* within
35/// the **current call context**.
36///
37/// Key properties:
38/// - Monotonically increasing for the duration of the call context
39/// - Accumulates across `await` points and resumptions
40/// - Resets only when a new call context begins
41/// - Counts *only* instructions executed by this canister (not other canisters)
42///
43/// This counter is suitable for:
44/// - Endpoint-level performance accounting
45/// - Async workflows and timers
46/// - Regression detection and coarse-grained profiling
47///
48/// It is **not** a measure of cycle cost. Expensive inter-canister operations
49/// (e.g., canister creation) may have low instruction counts here but high cycle
50/// charges elsewhere.
51///
52/// For fine-grained, single-slice profiling (e.g., hot loops), use
53/// `ic0.performance_counter(0)` instead.
54#[must_use]
55#[allow(clippy::missing_const_for_fn)]
56pub fn perf_counter() -> u64 {
57    #[cfg(target_arch = "wasm32")]
58    {
59        crate::cdk::api::performance_counter(1)
60    }
61
62    #[cfg(not(target_arch = "wasm32"))]
63    {
64        0
65    }
66}
67
68///
69/// PerfKey
70/// Splits perf counters by transport surface so metrics rows remain explicit.
71///
72
73#[derive(Clone, Eq, Hash, Ord, PartialEq, PartialOrd)]
74pub enum PerfKey {
75    Endpoint(String),
76    Timer(String),
77    Checkpoint { scope: String, label: String },
78}
79
80///
81/// PerfFrame
82/// Tracks an active endpoint scope and accumulated child instructions.
83///
84
85struct PerfFrame {
86    start: u64,
87    child_instructions: u64,
88}
89
90///
91/// PerfSlot
92///
93
94#[derive(Default)]
95struct PerfSlot {
96    count: u64,
97    total_instructions: u64,
98}
99
100impl PerfSlot {
101    const fn increment(&mut self, delta: u64) {
102        self.count = self.count.saturating_add(1);
103        self.total_instructions = self.total_instructions.saturating_add(delta);
104    }
105}
106
107///
108/// PerfEntry
109/// Aggregated perf counters keyed by kind (endpoint vs timer) and label.
110///
111
112#[derive(Clone)]
113pub struct PerfEntry {
114    pub key: PerfKey,
115    pub count: u64,
116    pub total_instructions: u64,
117}
118
119/// Record a counter under the provided key.
120pub fn record(key: PerfKey, delta: u64) {
121    PERF_TABLE.with(|table| {
122        let mut table = table.borrow_mut();
123        table.entry(key).or_default().increment(delta);
124    });
125}
126
127pub fn record_endpoint(endpoint: EndpointId, delta_instructions: u64) {
128    record(
129        PerfKey::Endpoint(endpoint.name.to_string()),
130        delta_instructions,
131    );
132}
133
134pub fn record_timer(label: &str, delta_instructions: u64) {
135    record(PerfKey::Timer(label.to_string()), delta_instructions);
136}
137
138pub fn record_checkpoint(scope: &str, label: &str, delta_instructions: u64) {
139    record(
140        PerfKey::Checkpoint {
141            scope: scope.to_string(),
142            label: label.to_string(),
143        },
144        delta_instructions,
145    );
146}
147
148/// Begin an endpoint scope and push it on the stack.
149pub(crate) fn enter_endpoint() {
150    enter_endpoint_at(perf_counter());
151}
152
153/// End the most recent endpoint scope and record exclusive instructions.
154pub(crate) fn exit_endpoint(call: EndpointCall) {
155    exit_endpoint_at(call.endpoint, perf_counter());
156}
157
158fn enter_endpoint_at(start: u64) {
159    PERF_STACK.with(|stack| {
160        let mut stack = stack.borrow_mut();
161
162        // If a previous call trapped, clear any stale frames.
163        if let Some(last) = stack.last()
164            && start < last.start
165        {
166            stack.clear();
167        }
168
169        stack.push(PerfFrame {
170            start,
171            child_instructions: 0,
172        });
173    });
174}
175
176fn exit_endpoint_at(endpoint: EndpointId, end: u64) {
177    PERF_STACK.with(|stack| {
178        let mut stack = stack.borrow_mut();
179        let Some(frame) = stack.pop() else {
180            record_endpoint(endpoint, end);
181            return;
182        };
183
184        let total = end.saturating_sub(frame.start);
185        let exclusive = total.saturating_sub(frame.child_instructions);
186
187        if let Some(parent) = stack.last_mut() {
188            parent.child_instructions = parent.child_instructions.saturating_add(total);
189        }
190
191        record_endpoint(endpoint, exclusive);
192    });
193}
194
195/// Snapshot all recorded perf counters, sorted by key.
196/// Entries are sorted by (kind, label).
197#[must_use]
198pub fn entries() -> Vec<PerfEntry> {
199    PERF_TABLE.with(|table| {
200        let table = table.borrow();
201
202        let mut out: Vec<PerfEntry> = table
203            .iter()
204            .map(|(key, slot)| PerfEntry {
205                key: key.clone(),
206                count: slot.count,
207                total_instructions: slot.total_instructions,
208            })
209            .collect();
210
211        out.sort_by(|a, b| a.key.cmp(&b.key));
212        out
213    })
214}
215
216///
217/// TESTS
218///
219
220#[cfg(test)]
221pub fn reset() {
222    PERF_TABLE.with(|t| t.borrow_mut().clear());
223    PERF_LAST.with(|last| *last.borrow_mut() = 0);
224    PERF_STACK.with(|stack| stack.borrow_mut().clear());
225}
226
227#[cfg(test)]
228mod tests {
229    use super::*;
230
231    fn checkpoint_at(now: u64) {
232        PERF_LAST.with(|last| *last.borrow_mut() = now);
233    }
234
235    fn entry_for(label: &str) -> PerfEntry {
236        entries()
237            .into_iter()
238            .find(|entry| matches!(&entry.key, PerfKey::Endpoint(l) if l == label))
239            .expect("expected perf entry to exist")
240    }
241
242    fn checkpoint_entry_for(scope: &str, label: &str) -> PerfEntry {
243        entries()
244            .into_iter()
245            .find(|entry| {
246                matches!(
247                    &entry.key,
248                    PerfKey::Checkpoint {
249                        scope: entry_scope,
250                        label: entry_label,
251                    } if entry_scope == scope && entry_label == label
252                )
253            })
254            .expect("expected checkpoint perf entry to exist")
255    }
256
257    #[test]
258    fn nested_endpoints_record_exclusive_totals() {
259        reset();
260
261        enter_endpoint_at(100);
262        checkpoint_at(140);
263
264        enter_endpoint_at(200);
265        checkpoint_at(230);
266        exit_endpoint_at(EndpointId::new("child"), 260);
267
268        exit_endpoint_at(EndpointId::new("parent"), 300);
269
270        let parent = entry_for("parent");
271        let child = entry_for("child");
272
273        assert_eq!(child.count, 1);
274        assert_eq!(child.total_instructions, 60);
275        assert_eq!(parent.count, 1);
276        assert_eq!(parent.total_instructions, 140);
277    }
278
279    #[test]
280    fn checkpoints_record_scope_and_label() {
281        reset();
282
283        record_checkpoint("workflow::bootstrap", "load_cfg", 120);
284        record_checkpoint("workflow::bootstrap", "load_cfg", 80);
285
286        let checkpoint = checkpoint_entry_for("workflow::bootstrap", "load_cfg");
287
288        assert_eq!(checkpoint.count, 2);
289        assert_eq!(checkpoint.total_instructions, 200);
290    }
291}
canic_core/perf.rs

canic_core/
perf.rs