canic_core/
perf.rs

1//! Cross-cutting performance instrumentation.
2//!
3//! This module provides instruction-count measurement primitives used
4//! across endpoints, ops, timers, and background tasks.
5//!
6//! It is intentionally crate-level infrastructure, not part of the
7//! domain layering (endpoints → ops → model).
8
9use canic_cdk::candid::CandidType;
10use serde::{Deserialize, Serialize};
11use std::{cell::RefCell, collections::HashMap};
12
13use crate::api::{EndpointCall, EndpointId};
14thread_local! {
15    /// Last snapshot used by the `perf!` macro.
16    #[cfg(not(test))]
17    pub static PERF_LAST: RefCell<u64> = RefCell::new(perf_counter());
18
19    // Unit tests run outside a canister context, so `perf_counter()` would trap.
20    #[cfg(test)]
21    pub static PERF_LAST: RefCell<u64> = const { RefCell::new(0) };
22
23    /// Aggregated perf counters keyed by kind (endpoint vs timer) and label.
24    static PERF_TABLE: RefCell<HashMap<PerfKey, PerfSlot>> = RefCell::new(HashMap::new());
25
26    /// Stack of active endpoint scopes for exclusive instruction accounting.
27    /// This is independent of `PERF_LAST`, which is only used by `perf!` checkpoints.
28    static PERF_STACK: RefCell<Vec<PerfFrame>> = const { RefCell::new(Vec::new()) };
29}
30
31/// Returns the **call-context instruction counter** for the current execution.
32///
33/// This value is obtained from `ic0.performance_counter(1)` and represents the
34/// total number of WebAssembly instructions executed by *this canister* within
35/// the **current call context**.
36///
37/// Key properties:
38/// - Monotonically increasing for the duration of the call context
39/// - Accumulates across `await` points and resumptions
40/// - Resets only when a new call context begins
41/// - Counts *only* instructions executed by this canister (not other canisters)
42///
43/// This counter is suitable for:
44/// - Endpoint-level performance accounting
45/// - Async workflows and timers
46/// - Regression detection and coarse-grained profiling
47///
48/// It is **not** a measure of cycle cost. Expensive inter-canister operations
49/// (e.g., canister creation) may have low instruction counts here but high cycle
50/// charges elsewhere.
51///
52/// For fine-grained, single-slice profiling (e.g., hot loops), use
53/// `ic0.performance_counter(0)` instead.
54#[must_use]
55pub fn perf_counter() -> u64 {
56    crate::cdk::api::performance_counter(1)
57}
58
59///
60/// PerfKey
61/// splitting up by Timer type to avoid confusing string comparisons
62///
63
64#[derive(
65    CandidType, Clone, Debug, Deserialize, Serialize, Eq, Hash, Ord, PartialEq, PartialOrd,
66)]
67pub enum PerfKey {
68    Endpoint(String),
69    Timer(String),
70}
71
72///
73/// PerfFrame
74/// Tracks an active endpoint scope and accumulated child instructions.
75///
76
77struct PerfFrame {
78    start: u64,
79    child_instructions: u64,
80}
81
82///
83/// PerfSlot
84///
85
86#[derive(Default)]
87struct PerfSlot {
88    count: u64,
89    total_instructions: u64,
90}
91
92impl PerfSlot {
93    const fn increment(&mut self, delta: u64) {
94        self.count = self.count.saturating_add(1);
95        self.total_instructions = self.total_instructions.saturating_add(delta);
96    }
97}
98
99///
100/// PerfEntry
101/// Aggregated perf counters keyed by kind (endpoint vs timer) and label.
102///
103
104#[derive(CandidType, Clone, Debug, Deserialize, Serialize)]
105pub struct PerfEntry {
106    pub key: PerfKey,
107    pub count: u64,
108    pub total_instructions: u64,
109}
110
111/// Record a counter under the provided key.
112pub fn record(key: PerfKey, delta: u64) {
113    PERF_TABLE.with(|table| {
114        let mut table = table.borrow_mut();
115        table.entry(key).or_default().increment(delta);
116    });
117}
118
119pub fn record_endpoint(endpoint: EndpointId, delta_instructions: u64) {
120    record(
121        PerfKey::Endpoint(endpoint.name.to_string()),
122        delta_instructions,
123    );
124}
125
126pub fn record_timer(label: &str, delta_instructions: u64) {
127    record(PerfKey::Timer(label.to_string()), delta_instructions);
128}
129
130/// Begin an endpoint scope and push it on the stack.
131pub(crate) fn enter_endpoint() {
132    enter_endpoint_at(perf_counter());
133}
134
135/// End the most recent endpoint scope and record exclusive instructions.
136pub(crate) fn exit_endpoint(call: EndpointCall) {
137    exit_endpoint_at(call.endpoint, perf_counter());
138}
139
140fn enter_endpoint_at(start: u64) {
141    PERF_STACK.with(|stack| {
142        let mut stack = stack.borrow_mut();
143
144        // If a previous call trapped, clear any stale frames.
145        if let Some(last) = stack.last()
146            && start < last.start
147        {
148            stack.clear();
149        }
150
151        stack.push(PerfFrame {
152            start,
153            child_instructions: 0,
154        });
155    });
156}
157
158fn exit_endpoint_at(endpoint: EndpointId, end: u64) {
159    PERF_STACK.with(|stack| {
160        let mut stack = stack.borrow_mut();
161        let Some(frame) = stack.pop() else {
162            record_endpoint(endpoint, end);
163            return;
164        };
165
166        let total = end.saturating_sub(frame.start);
167        let exclusive = total.saturating_sub(frame.child_instructions);
168
169        if let Some(parent) = stack.last_mut() {
170            parent.child_instructions = parent.child_instructions.saturating_add(total);
171        }
172
173        record_endpoint(endpoint, exclusive);
174    });
175}
176
177/// Snapshot all recorded perf counters, sorted by key.
178/// Entries are sorted by (kind, label).
179#[must_use]
180pub fn entries() -> Vec<PerfEntry> {
181    PERF_TABLE.with(|table| {
182        let table = table.borrow();
183
184        let mut out: Vec<PerfEntry> = table
185            .iter()
186            .map(|(key, slot)| PerfEntry {
187                key: key.clone(),
188                count: slot.count,
189                total_instructions: slot.total_instructions,
190            })
191            .collect();
192
193        out.sort_by(|a, b| a.key.cmp(&b.key));
194        out
195    })
196}
197
198///
199/// TESTS
200///
201
202#[cfg(test)]
203pub fn reset() {
204    PERF_TABLE.with(|t| t.borrow_mut().clear());
205    PERF_LAST.with(|last| *last.borrow_mut() = 0);
206    PERF_STACK.with(|stack| stack.borrow_mut().clear());
207}
208
209#[cfg(test)]
210mod tests {
211    use super::*;
212
213    fn checkpoint_at(now: u64) {
214        PERF_LAST.with(|last| *last.borrow_mut() = now);
215    }
216
217    fn entry_for(label: &str) -> PerfEntry {
218        entries()
219            .into_iter()
220            .find(|entry| matches!(&entry.key, PerfKey::Endpoint(l) if l == label))
221            .expect("expected perf entry to exist")
222    }
223
224    #[test]
225    fn nested_endpoints_record_exclusive_totals() {
226        reset();
227
228        enter_endpoint_at(100);
229        checkpoint_at(140);
230
231        enter_endpoint_at(200);
232        checkpoint_at(230);
233        exit_endpoint_at(EndpointId::new("child"), 260);
234
235        exit_endpoint_at(EndpointId::new("parent"), 300);
236
237        let parent = entry_for("parent");
238        let child = entry_for("child");
239
240        assert_eq!(child.count, 1);
241        assert_eq!(child.total_instructions, 60);
242        assert_eq!(parent.count, 1);
243        assert_eq!(parent.total_instructions, 140);
244    }
245}