canic_core/
perf.rs

1//! Cross-cutting performance instrumentation.
2//!
3//! This module provides instruction-count measurement primitives used
4//! across endpoints, ops, timers, and background tasks.
5//!
6//! It is intentionally crate-level infrastructure, not part of the
7//! domain layering (endpoints → ops → model).
8
9use canic_cdk::candid::CandidType;
10use serde::{Deserialize, Serialize};
11use std::{cell::RefCell, collections::HashMap};
12
13thread_local! {
14    /// Last snapshot used by the `perf!` macro.
15    #[cfg(not(test))]
16    pub static PERF_LAST: RefCell<u64> = RefCell::new(perf_counter());
17
18    // Unit tests run outside a canister context, so `perf_counter()` would trap.
19    #[cfg(test)]
20    pub static PERF_LAST: RefCell<u64> = const { RefCell::new(0) };
21
22    /// Aggregated perf counters keyed by kind (endpoint vs timer) and label.
23    static PERF_TABLE: RefCell<HashMap<PerfKey, PerfSlot>> = RefCell::new(HashMap::new());
24
25    /// Stack of active endpoint scopes for exclusive instruction accounting.
26    /// This is independent of `PERF_LAST`, which is only used by `perf!` checkpoints.
27    static PERF_STACK: RefCell<Vec<PerfFrame>> = RefCell::new(Vec::new());
28}
29
30/// Returns the **call-context instruction counter** for the current execution.
31///
32/// This value is obtained from `ic0.performance_counter(1)` and represents the
33/// total number of WebAssembly instructions executed by *this canister* within
34/// the **current call context**.
35///
36/// Key properties:
37/// - Monotonically increasing for the duration of the call context
38/// - Accumulates across `await` points and resumptions
39/// - Resets only when a new call context begins
40/// - Counts *only* instructions executed by this canister (not other canisters)
41///
42/// This counter is suitable for:
43/// - Endpoint-level performance accounting
44/// - Async workflows and timers
45/// - Regression detection and coarse-grained profiling
46///
47/// It is **not** a measure of cycle cost. Expensive inter-canister operations
48/// (e.g., canister creation) may have low instruction counts here but high cycle
49/// charges elsewhere.
50///
51/// For fine-grained, single-slice profiling (e.g., hot loops), use
52/// `ic0.performance_counter(0)` instead.
53#[must_use]
54pub fn perf_counter() -> u64 {
55    crate::cdk::api::performance_counter(1)
56}
57
58///
59/// PerfKey
60/// splitting up by Timer type to avoid confusing string comparisons
61///
62
63#[derive(
64    CandidType, Clone, Debug, Deserialize, Serialize, Eq, Hash, Ord, PartialEq, PartialOrd,
65)]
66pub enum PerfKey {
67    Endpoint(String),
68    Timer(String),
69}
70
71///
72/// PerfFrame
73/// Tracks an active endpoint scope and accumulated child instructions.
74///
75
76#[derive(Debug, Default)]
77struct PerfFrame {
78    start: u64,
79    child_instructions: u64,
80}
81
82///
83/// PerfSlot
84///
85
86#[derive(Default)]
87struct PerfSlot {
88    count: u64,
89    total_instructions: u64,
90}
91
92impl PerfSlot {
93    const fn increment(&mut self, delta: u64) {
94        self.count = self.count.saturating_add(1);
95        self.total_instructions = self.total_instructions.saturating_add(delta);
96    }
97}
98
99///
100/// PerfEntry
101/// Aggregated perf counters keyed by kind (endpoint vs timer) and label.
102///
103
104#[derive(CandidType, Clone, Debug, Deserialize, Serialize)]
105pub struct PerfEntry {
106    pub label: String,
107    pub key: PerfKey,
108    pub count: u64,
109    pub total_instructions: u64,
110}
111
112/// Record a counter under the provided key.
113pub fn record(key: PerfKey, delta: u64) {
114    PERF_TABLE.with(|table| {
115        let mut table = table.borrow_mut();
116        table.entry(key).or_default().increment(delta);
117    });
118}
119
120pub fn record_endpoint(func: &str, delta_instructions: u64) {
121    record(PerfKey::Endpoint(func.to_string()), delta_instructions);
122}
123
124pub fn record_timer(label: &str, delta_instructions: u64) {
125    record(PerfKey::Timer(label.to_string()), delta_instructions);
126}
127
128/// Begin an endpoint scope and push it on the stack.
129pub(crate) fn enter_endpoint() {
130    enter_endpoint_at(perf_counter());
131}
132
133/// End the most recent endpoint scope and record exclusive instructions.
134pub(crate) fn exit_endpoint(label: &str) {
135    exit_endpoint_at(label, perf_counter());
136}
137
138fn enter_endpoint_at(start: u64) {
139    PERF_STACK.with(|stack| {
140        let mut stack = stack.borrow_mut();
141
142        // If a previous call trapped, clear any stale frames.
143        if let Some(last) = stack.last() {
144            if start < last.start {
145                stack.clear();
146            }
147        }
148
149        stack.push(PerfFrame {
150            start,
151            child_instructions: 0,
152        });
153    });
154}
155
156fn exit_endpoint_at(label: &str, end: u64) {
157    PERF_STACK.with(|stack| {
158        let mut stack = stack.borrow_mut();
159        let Some(frame) = stack.pop() else {
160            record_endpoint(label, end);
161            return;
162        };
163
164        let total = end.saturating_sub(frame.start);
165        let exclusive = total.saturating_sub(frame.child_instructions);
166
167        if let Some(parent) = stack.last_mut() {
168            parent.child_instructions = parent.child_instructions.saturating_add(total);
169        }
170
171        record_endpoint(label, exclusive);
172    });
173}
174
175/// Snapshot all recorded perf counters, sorted by key.
176#[must_use]
177pub fn entries() -> Vec<PerfEntry> {
178    PERF_TABLE.with(|table| {
179        let table = table.borrow();
180
181        let mut out: Vec<PerfEntry> = table
182            .iter()
183            .map(|(key, slot)| PerfEntry {
184                label: match key {
185                    PerfKey::Endpoint(label) | PerfKey::Timer(label) => label.clone(),
186                },
187                key: key.clone(),
188                count: slot.count,
189                total_instructions: slot.total_instructions,
190            })
191            .collect();
192
193        out.sort_by(|a, b| a.key.cmp(&b.key));
194        out
195    })
196}
197
198///
199/// TESTS
200///
201
202#[cfg(test)]
203pub fn reset() {
204    PERF_TABLE.with(|t| t.borrow_mut().clear());
205    PERF_LAST.with(|last| *last.borrow_mut() = 0);
206    PERF_STACK.with(|stack| stack.borrow_mut().clear());
207}
208
209#[cfg(test)]
210mod tests {
211    use super::*;
212
213    fn checkpoint_at(now: u64) {
214        PERF_LAST.with(|last| *last.borrow_mut() = now);
215    }
216
217    fn entry_for(label: &str) -> PerfEntry {
218        entries()
219            .into_iter()
220            .find(|entry| matches!(entry.key, PerfKey::Endpoint(_)) && entry.label == label)
221            .expect("expected perf entry to exist")
222    }
223
224    #[test]
225    fn nested_endpoints_record_exclusive_totals() {
226        reset();
227
228        enter_endpoint_at(100);
229        checkpoint_at(140);
230
231        enter_endpoint_at(200);
232        checkpoint_at(230);
233        exit_endpoint_at("child", 260);
234
235        exit_endpoint_at("parent", 300);
236
237        let parent = entry_for("parent");
238        let child = entry_for("child");
239
240        assert_eq!(child.count, 1);
241        assert_eq!(child.total_instructions, 60);
242        assert_eq!(parent.count, 1);
243        assert_eq!(parent.total_instructions, 140);
244    }
245}