canic_core/
perf.rs

1//! Cross-cutting performance instrumentation.
2//!
3//! This module provides instruction-count measurement primitives used
4//! across endpoints, ops, timers, and background tasks.
5//!
6//! It is intentionally crate-level infrastructure, not part of the
7//! domain layering (endpoints → ops → model).
8//! Instrumentation modules are layer-neutral and may be used anywhere.
9
10use crate::ids::{EndpointCall, EndpointId};
11use canic_cdk::candid::CandidType;
12use serde::{Deserialize, Serialize};
13use std::{cell::RefCell, collections::HashMap};
14
15thread_local! {
16    /// Last snapshot used by the `perf!` macro.
17    #[cfg(not(test))]
18    pub static PERF_LAST: RefCell<u64> = RefCell::new(perf_counter());
19
20    // Unit tests run outside a canister context, so `perf_counter()` would trap.
21    #[cfg(test)]
22    pub static PERF_LAST: RefCell<u64> = const { RefCell::new(0) };
23
24    /// Aggregated perf counters keyed by kind (endpoint vs timer) and label.
25    static PERF_TABLE: RefCell<HashMap<PerfKey, PerfSlot>> = RefCell::new(HashMap::new());
26
27    /// Stack of active endpoint scopes for exclusive instruction accounting.
28    /// This is independent of `PERF_LAST`, which is only used by `perf!` checkpoints.
29    static PERF_STACK: RefCell<Vec<PerfFrame>> = const { RefCell::new(Vec::new()) };
30}
31
32/// Returns the **call-context instruction counter** for the current execution.
33///
34/// This value is obtained from `ic0.performance_counter(1)` and represents the
35/// total number of WebAssembly instructions executed by *this canister* within
36/// the **current call context**.
37///
38/// Key properties:
39/// - Monotonically increasing for the duration of the call context
40/// - Accumulates across `await` points and resumptions
41/// - Resets only when a new call context begins
42/// - Counts *only* instructions executed by this canister (not other canisters)
43///
44/// This counter is suitable for:
45/// - Endpoint-level performance accounting
46/// - Async workflows and timers
47/// - Regression detection and coarse-grained profiling
48///
49/// It is **not** a measure of cycle cost. Expensive inter-canister operations
50/// (e.g., canister creation) may have low instruction counts here but high cycle
51/// charges elsewhere.
52///
53/// For fine-grained, single-slice profiling (e.g., hot loops), use
54/// `ic0.performance_counter(0)` instead.
55#[must_use]
56pub fn perf_counter() -> u64 {
57    crate::cdk::api::performance_counter(1)
58}
59
60///
61/// PerfKey
62/// splitting up by Timer type to avoid confusing string comparisons
63///
64
65#[derive(
66    CandidType, Clone, Debug, Deserialize, Serialize, Eq, Hash, Ord, PartialEq, PartialOrd,
67)]
68pub enum PerfKey {
69    Endpoint(String),
70    Timer(String),
71}
72
73///
74/// PerfFrame
75/// Tracks an active endpoint scope and accumulated child instructions.
76///
77
78struct PerfFrame {
79    start: u64,
80    child_instructions: u64,
81}
82
83///
84/// PerfSlot
85///
86
87#[derive(Default)]
88struct PerfSlot {
89    count: u64,
90    total_instructions: u64,
91}
92
93impl PerfSlot {
94    const fn increment(&mut self, delta: u64) {
95        self.count = self.count.saturating_add(1);
96        self.total_instructions = self.total_instructions.saturating_add(delta);
97    }
98}
99
100///
101/// PerfEntry
102/// Aggregated perf counters keyed by kind (endpoint vs timer) and label.
103///
104
105#[derive(CandidType, Clone, Debug, Deserialize, Serialize)]
106pub struct PerfEntry {
107    pub key: PerfKey,
108    pub count: u64,
109    pub total_instructions: u64,
110}
111
112/// Record a counter under the provided key.
113pub fn record(key: PerfKey, delta: u64) {
114    PERF_TABLE.with(|table| {
115        let mut table = table.borrow_mut();
116        table.entry(key).or_default().increment(delta);
117    });
118}
119
120pub fn record_endpoint(endpoint: EndpointId, delta_instructions: u64) {
121    record(
122        PerfKey::Endpoint(endpoint.name.to_string()),
123        delta_instructions,
124    );
125}
126
127pub fn record_timer(label: &str, delta_instructions: u64) {
128    record(PerfKey::Timer(label.to_string()), delta_instructions);
129}
130
131/// Begin an endpoint scope and push it on the stack.
132pub(crate) fn enter_endpoint() {
133    enter_endpoint_at(perf_counter());
134}
135
136/// End the most recent endpoint scope and record exclusive instructions.
137pub(crate) fn exit_endpoint(call: EndpointCall) {
138    exit_endpoint_at(call.endpoint, perf_counter());
139}
140
141fn enter_endpoint_at(start: u64) {
142    PERF_STACK.with(|stack| {
143        let mut stack = stack.borrow_mut();
144
145        // If a previous call trapped, clear any stale frames.
146        if let Some(last) = stack.last()
147            && start < last.start
148        {
149            stack.clear();
150        }
151
152        stack.push(PerfFrame {
153            start,
154            child_instructions: 0,
155        });
156    });
157}
158
159fn exit_endpoint_at(endpoint: EndpointId, end: u64) {
160    PERF_STACK.with(|stack| {
161        let mut stack = stack.borrow_mut();
162        let Some(frame) = stack.pop() else {
163            record_endpoint(endpoint, end);
164            return;
165        };
166
167        let total = end.saturating_sub(frame.start);
168        let exclusive = total.saturating_sub(frame.child_instructions);
169
170        if let Some(parent) = stack.last_mut() {
171            parent.child_instructions = parent.child_instructions.saturating_add(total);
172        }
173
174        record_endpoint(endpoint, exclusive);
175    });
176}
177
178/// Snapshot all recorded perf counters, sorted by key.
179/// Entries are sorted by (kind, label).
180#[must_use]
181pub fn entries() -> Vec<PerfEntry> {
182    PERF_TABLE.with(|table| {
183        let table = table.borrow();
184
185        let mut out: Vec<PerfEntry> = table
186            .iter()
187            .map(|(key, slot)| PerfEntry {
188                key: key.clone(),
189                count: slot.count,
190                total_instructions: slot.total_instructions,
191            })
192            .collect();
193
194        out.sort_by(|a, b| a.key.cmp(&b.key));
195        out
196    })
197}
198
199///
200/// TESTS
201///
202
203#[cfg(test)]
204pub fn reset() {
205    PERF_TABLE.with(|t| t.borrow_mut().clear());
206    PERF_LAST.with(|last| *last.borrow_mut() = 0);
207    PERF_STACK.with(|stack| stack.borrow_mut().clear());
208}
209
210#[cfg(test)]
211mod tests {
212    use super::*;
213
214    fn checkpoint_at(now: u64) {
215        PERF_LAST.with(|last| *last.borrow_mut() = now);
216    }
217
218    fn entry_for(label: &str) -> PerfEntry {
219        entries()
220            .into_iter()
221            .find(|entry| matches!(&entry.key, PerfKey::Endpoint(l) if l == label))
222            .expect("expected perf entry to exist")
223    }
224
225    #[test]
226    fn nested_endpoints_record_exclusive_totals() {
227        reset();
228
229        enter_endpoint_at(100);
230        checkpoint_at(140);
231
232        enter_endpoint_at(200);
233        checkpoint_at(230);
234        exit_endpoint_at(EndpointId::new("child"), 260);
235
236        exit_endpoint_at(EndpointId::new("parent"), 300);
237
238        let parent = entry_for("parent");
239        let child = entry_for("child");
240
241        assert_eq!(child.count, 1);
242        assert_eq!(child.total_instructions, 60);
243        assert_eq!(parent.count, 1);
244        assert_eq!(parent.total_instructions, 140);
245    }
246}