Skip to main content

canic_core/
perf.rs

1//! Cross-cutting performance instrumentation.
2//!
3//! This module provides instruction-count measurement primitives used
4//! across endpoints, ops, timers, and background tasks.
5//!
6//! It is intentionally crate-level infrastructure, not part of the
7//! domain layering (endpoints → ops → model).
8//! Instrumentation modules are layer-neutral and may be used anywhere.
9
10use crate::ids::{EndpointCall, EndpointCallKind, EndpointId};
11use std::{cell::RefCell, collections::HashMap};
12
13thread_local! {
14    /// Last snapshot used by the `perf!` macro.
15    #[cfg(not(test))]
16    pub static PERF_LAST: RefCell<u64> = RefCell::new(perf_counter());
17
18    // Unit tests run outside a canister context, so `perf_counter()` would trap.
19    #[cfg(test)]
20    pub static PERF_LAST: RefCell<u64> = const { RefCell::new(0) };
21
22    /// Aggregated perf counters keyed by kind (endpoint vs timer) and label.
23    static PERF_TABLE: RefCell<HashMap<PerfKey, PerfSlot>> = RefCell::new(HashMap::new());
24
25    /// Stack of active endpoint scopes for exclusive instruction accounting.
26    /// This is independent of `PERF_LAST`, which is only used by `perf!` checkpoints.
27    static PERF_STACK: RefCell<Vec<PerfFrame>> = const { RefCell::new(Vec::new()) };
28}
29
30/// Returns the **call-context instruction counter** for the current execution.
31///
32/// This value is obtained from `ic0.performance_counter(1)` and represents the
33/// total number of WebAssembly instructions executed by *this canister* within
34/// the **current call context**.
35///
36/// Key properties:
37/// - Monotonically increasing for the duration of the call context
38/// - Accumulates across `await` points and resumptions
39/// - Resets only when a new call context begins
40/// - Counts *only* instructions executed by this canister (not other canisters)
41///
42/// This counter is suitable for:
43/// - Endpoint-level performance accounting
44/// - Async workflows and timers
45/// - Regression detection and coarse-grained profiling
46///
47/// It is **not** a measure of cycle cost. Expensive inter-canister operations
48/// (e.g., canister creation) may have low instruction counts here but high cycle
49/// charges elsewhere.
50///
51/// For fine-grained, single-slice profiling (e.g., hot loops), use
52/// `ic0.performance_counter(0)` instead.
53#[must_use]
54#[cfg_attr(not(target_arch = "wasm32"), expect(clippy::missing_const_for_fn))]
55pub fn perf_counter() -> u64 {
56    #[cfg(target_arch = "wasm32")]
57    {
58        crate::cdk::api::performance_counter(1)
59    }
60
61    #[cfg(not(target_arch = "wasm32"))]
62    {
63        0
64    }
65}
66
67///
68/// PerfKey
69/// Splits perf counters by transport surface so metrics rows remain explicit.
70///
71
72#[derive(Clone, Eq, Hash, Ord, PartialEq, PartialOrd)]
73pub enum PerfKey {
74    Endpoint {
75        kind: EndpointCallKind,
76        name: String,
77    },
78    Timer(String),
79    Checkpoint {
80        scope: String,
81        label: String,
82    },
83}
84
85///
86/// PerfFrame
87/// Tracks an active endpoint scope and accumulated child instructions.
88///
89
90struct PerfFrame {
91    start: u64,
92    child_instructions: u64,
93}
94
95///
96/// PerfSlot
97///
98
99#[derive(Default)]
100struct PerfSlot {
101    count: u64,
102    total_instructions: u64,
103}
104
105impl PerfSlot {
106    const fn increment(&mut self, delta: u64) {
107        self.count = self.count.saturating_add(1);
108        self.total_instructions = self.total_instructions.saturating_add(delta);
109    }
110}
111
112///
113/// PerfEntry
114/// Aggregated perf counters keyed by kind (endpoint vs timer) and label.
115///
116
117#[derive(Clone)]
118pub struct PerfEntry {
119    pub key: PerfKey,
120    pub count: u64,
121    pub total_instructions: u64,
122}
123
124/// Record a counter under the provided key.
125pub fn record(key: PerfKey, delta: u64) {
126    PERF_TABLE.with(|table| {
127        let mut table = table.borrow_mut();
128        table.entry(key).or_default().increment(delta);
129    });
130}
131
132pub fn record_endpoint(endpoint: EndpointId, delta_instructions: u64) {
133    record_endpoint_call(
134        EndpointCall {
135            endpoint,
136            kind: EndpointCallKind::Update,
137        },
138        delta_instructions,
139    );
140}
141
142pub fn record_endpoint_call(call: EndpointCall, delta_instructions: u64) {
143    record(
144        PerfKey::Endpoint {
145            kind: call.kind,
146            name: call.endpoint.name.to_string(),
147        },
148        delta_instructions,
149    );
150}
151
152pub fn record_timer(label: &str, delta_instructions: u64) {
153    record(PerfKey::Timer(label.to_string()), delta_instructions);
154}
155
156pub fn record_checkpoint(scope: &str, label: &str, delta_instructions: u64) {
157    record(
158        PerfKey::Checkpoint {
159            scope: scope.to_string(),
160            label: label.to_string(),
161        },
162        delta_instructions,
163    );
164}
165
166/// Begin an endpoint scope and push it on the stack.
167pub(crate) fn enter_endpoint() {
168    enter_endpoint_at(perf_counter());
169}
170
171/// End the most recent endpoint scope and record exclusive instructions.
172pub(crate) fn exit_endpoint(call: EndpointCall) {
173    exit_endpoint_at(call, perf_counter());
174}
175
176fn enter_endpoint_at(start: u64) {
177    PERF_STACK.with(|stack| {
178        let mut stack = stack.borrow_mut();
179
180        // If a previous call trapped, clear any stale frames.
181        if let Some(last) = stack.last()
182            && start < last.start
183        {
184            stack.clear();
185        }
186
187        stack.push(PerfFrame {
188            start,
189            child_instructions: 0,
190        });
191    });
192}
193
194fn exit_endpoint_at(call: EndpointCall, end: u64) {
195    PERF_STACK.with(|stack| {
196        let mut stack = stack.borrow_mut();
197        let Some(frame) = stack.pop() else {
198            record_endpoint_call(call, end);
199            return;
200        };
201
202        let total = end.saturating_sub(frame.start);
203        let exclusive = total.saturating_sub(frame.child_instructions);
204
205        if let Some(parent) = stack.last_mut() {
206            parent.child_instructions = parent.child_instructions.saturating_add(total);
207        }
208
209        record_endpoint_call(call, exclusive);
210    });
211}
212
213/// Snapshot all recorded perf counters, sorted by key.
214/// Entries are sorted by (kind, label).
215#[must_use]
216pub fn entries() -> Vec<PerfEntry> {
217    PERF_TABLE.with(|table| {
218        let table = table.borrow();
219
220        let mut out: Vec<PerfEntry> = table
221            .iter()
222            .map(|(key, slot)| PerfEntry {
223                key: key.clone(),
224                count: slot.count,
225                total_instructions: slot.total_instructions,
226            })
227            .collect();
228
229        out.sort_by(|a, b| a.key.cmp(&b.key));
230        out
231    })
232}
233
234///
235/// TESTS
236///
237
238#[cfg(test)]
239pub fn reset() {
240    PERF_TABLE.with(|t| t.borrow_mut().clear());
241    PERF_LAST.with(|last| *last.borrow_mut() = 0);
242    PERF_STACK.with(|stack| stack.borrow_mut().clear());
243}
244
245#[cfg(test)]
246mod tests {
247    use super::*;
248
249    fn checkpoint_at(now: u64) {
250        PERF_LAST.with(|last| *last.borrow_mut() = now);
251    }
252
253    fn call(name: &'static str, kind: EndpointCallKind) -> EndpointCall {
254        EndpointCall {
255            endpoint: crate::ids::EndpointId::new(name),
256            kind,
257        }
258    }
259
260    fn entry_for(kind: EndpointCallKind, label: &str) -> PerfEntry {
261        entries()
262            .into_iter()
263            .find(|entry| {
264                matches!(
265                    &entry.key,
266                    PerfKey::Endpoint {
267                        kind: entry_kind,
268                        name
269                    } if *entry_kind == kind && name == label
270                )
271            })
272            .expect("expected perf entry to exist")
273    }
274
275    fn checkpoint_entry_for(scope: &str, label: &str) -> PerfEntry {
276        entries()
277            .into_iter()
278            .find(|entry| {
279                matches!(
280                    &entry.key,
281                    PerfKey::Checkpoint {
282                        scope: entry_scope,
283                        label: entry_label,
284                    } if entry_scope == scope && entry_label == label
285                )
286            })
287            .expect("expected checkpoint perf entry to exist")
288    }
289
290    #[test]
291    fn nested_endpoints_record_exclusive_totals() {
292        reset();
293
294        enter_endpoint_at(100);
295        checkpoint_at(140);
296
297        enter_endpoint_at(200);
298        checkpoint_at(230);
299        exit_endpoint_at(call("child", EndpointCallKind::Query), 260);
300
301        exit_endpoint_at(call("parent", EndpointCallKind::Update), 300);
302
303        let parent = entry_for(EndpointCallKind::Update, "parent");
304        let child = entry_for(EndpointCallKind::Query, "child");
305
306        assert_eq!(child.count, 1);
307        assert_eq!(child.total_instructions, 60);
308        assert_eq!(parent.count, 1);
309        assert_eq!(parent.total_instructions, 140);
310    }
311
312    #[test]
313    fn endpoint_perf_keys_preserve_call_kind() {
314        reset();
315
316        record_endpoint_call(call("same_name", EndpointCallKind::Query), 10);
317        record_endpoint_call(call("same_name", EndpointCallKind::QueryComposite), 20);
318        record_endpoint_call(call("same_name", EndpointCallKind::Update), 30);
319
320        assert_eq!(
321            entry_for(EndpointCallKind::Query, "same_name").total_instructions,
322            10
323        );
324        assert_eq!(
325            entry_for(EndpointCallKind::QueryComposite, "same_name").total_instructions,
326            20
327        );
328        assert_eq!(
329            entry_for(EndpointCallKind::Update, "same_name").total_instructions,
330            30
331        );
332    }
333
334    #[test]
335    fn checkpoints_record_scope_and_label() {
336        reset();
337
338        record_checkpoint("workflow::bootstrap", "load_cfg", 120);
339        record_checkpoint("workflow::bootstrap", "load_cfg", 80);
340
341        let checkpoint = checkpoint_entry_for("workflow::bootstrap", "load_cfg");
342
343        assert_eq!(checkpoint.count, 2);
344        assert_eq!(checkpoint.total_instructions, 200);
345    }
346}