Skip to main content

icydb_core/obs/metrics/
mod.rs

1//! Runtime metrics are update-only by contract.
2//! Query-side instrumentation is intentionally not surfaced by `report`, so
3//! query metrics are non-existent by design under IC query semantics.
4
5use candid::CandidType;
6use canic_cdk::utils::time::now_millis;
7use serde::{Deserialize, Serialize};
8use std::{cell::RefCell, cmp::Ordering, collections::BTreeMap};
9
10/// EventState
11/// Mutable runtime counters and rolling perf state for the current window.
12/// Stored in thread-local memory for update-only instrumentation.
13
14#[derive(CandidType, Clone, Debug, Deserialize, Serialize)]
15pub struct EventState {
16    pub ops: EventOps,
17    pub perf: EventPerf,
18    pub entities: BTreeMap<String, EntityCounters>,
19    pub window_start_ms: u64,
20}
21
22impl Default for EventState {
23    fn default() -> Self {
24        Self {
25            ops: EventOps::default(),
26            perf: EventPerf::default(),
27            entities: BTreeMap::new(),
28            window_start_ms: now_millis(),
29        }
30    }
31}
32
33/// EventOps
34/// Aggregated operation counters for executors, plans, rows, and index maintenance.
35/// Values are monotonic within a metrics window.
36/// Call counters are execution attempts; errors still increment them.
37/// Row counters reflect rows touched after execution, not requested rows.
38#[derive(CandidType, Clone, Debug, Default, Deserialize, Serialize)]
39pub struct EventOps {
40    // Executor entrypoints
41    pub load_calls: u64,
42    pub save_calls: u64,
43    pub delete_calls: u64,
44
45    // Planner kinds
46    pub plan_index: u64,
47    pub plan_keys: u64,
48    pub plan_range: u64,
49    pub plan_full_scan: u64,
50
51    // Rows touched
52    pub rows_loaded: u64,
53    pub rows_scanned: u64,
54    pub rows_deleted: u64,
55
56    // Index maintenance
57    pub index_inserts: u64,
58    pub index_removes: u64,
59    pub reverse_index_inserts: u64,
60    pub reverse_index_removes: u64,
61    pub relation_reverse_lookups: u64,
62    pub relation_delete_blocks: u64,
63    pub unique_violations: u64,
64    pub non_atomic_partial_commits: u64,
65    pub non_atomic_partial_rows_committed: u64,
66}
67
68/// EntityCounters
69/// Per-entity counters mirroring `EventOps` categories.
70/// Used to compute report-level per-entity summaries.
71
72#[derive(CandidType, Clone, Debug, Default, Deserialize, Serialize)]
73pub struct EntityCounters {
74    pub load_calls: u64,
75    pub save_calls: u64,
76    pub delete_calls: u64,
77    pub rows_loaded: u64,
78    pub rows_scanned: u64,
79    pub rows_deleted: u64,
80    pub index_inserts: u64,
81    pub index_removes: u64,
82    pub reverse_index_inserts: u64,
83    pub reverse_index_removes: u64,
84    pub relation_reverse_lookups: u64,
85    pub relation_delete_blocks: u64,
86    pub unique_violations: u64,
87    pub non_atomic_partial_commits: u64,
88    pub non_atomic_partial_rows_committed: u64,
89}
90
91/// EventPerf
92/// Aggregate and max instruction deltas per executor kind.
93/// Captures execution pressure, not wall-clock latency.
94/// Instruction deltas are pressure indicators (validation + planning + execution),
95/// not latency measurements.
96#[derive(CandidType, Clone, Debug, Default, Deserialize, Serialize)]
97pub struct EventPerf {
98    // Instruction totals per executor (ic_cdk::api::performance_counter(1))
99    pub load_inst_total: u128,
100    pub save_inst_total: u128,
101    pub delete_inst_total: u128,
102
103    // Maximum observed instruction deltas
104    pub load_inst_max: u64,
105    pub save_inst_max: u64,
106    pub delete_inst_max: u64,
107}
108
109thread_local! {
110    static EVENT_STATE: RefCell<EventState> = RefCell::new(EventState::default());
111}
112
113/// Borrow metrics immutably.
114pub(crate) fn with_state<R>(f: impl FnOnce(&EventState) -> R) -> R {
115    EVENT_STATE.with(|m| f(&m.borrow()))
116}
117
118/// Borrow metrics mutably.
119pub(crate) fn with_state_mut<R>(f: impl FnOnce(&mut EventState) -> R) -> R {
120    EVENT_STATE.with(|m| f(&mut m.borrow_mut()))
121}
122
123/// Reset all counters (useful in tests).
124pub(super) fn reset() {
125    with_state_mut(|m| *m = EventState::default());
126}
127
128/// Reset all event state: counters, perf, and serialize counters.
129pub(crate) fn reset_all() {
130    reset();
131}
132
133/// Accumulate instruction counts and track a max.
134pub(super) fn add_instructions(total: &mut u128, max: &mut u64, delta_inst: u64) {
135    *total = total.saturating_add(u128::from(delta_inst));
136    if delta_inst > *max {
137        *max = delta_inst;
138    }
139}
140
141/// EventReport
142/// Event/counter report for runtime metrics query endpoints.
143/// Storage snapshot types live in snapshot/storage modules.
144
145#[derive(CandidType, Clone, Debug, Default, Deserialize, Serialize)]
146pub struct EventReport {
147    /// Ephemeral runtime counters since `window_start_ms`.
148    pub counters: Option<EventState>,
149    /// Per-entity ephemeral counters and averages.
150    pub entity_counters: Vec<EntitySummary>,
151}
152
153/// EntitySummary
154/// Derived per-entity metrics for report consumers.
155/// Includes absolute counters and simple averages.
156
157#[derive(CandidType, Clone, Debug, Default, Deserialize, Serialize)]
158pub struct EntitySummary {
159    pub path: String,
160    pub load_calls: u64,
161    pub delete_calls: u64,
162    pub rows_loaded: u64,
163    pub rows_scanned: u64,
164    pub rows_deleted: u64,
165    pub avg_rows_per_load: f64,
166    pub avg_rows_scanned_per_load: f64,
167    pub avg_rows_per_delete: f64,
168    pub index_inserts: u64,
169    pub index_removes: u64,
170    pub reverse_index_inserts: u64,
171    pub reverse_index_removes: u64,
172    pub relation_reverse_lookups: u64,
173    pub relation_delete_blocks: u64,
174    pub unique_violations: u64,
175    pub non_atomic_partial_commits: u64,
176    pub non_atomic_partial_rows_committed: u64,
177}
178
179/// Build a metrics report gated by `window_start_ms`.
180///
181/// This is a window-start filter:
182/// - If `window_start_ms` is `None`, return the current window.
183/// - If `window_start_ms <= state.window_start_ms`, return the current window.
184/// - If `window_start_ms > state.window_start_ms`, return an empty report.
185///
186/// IcyDB stores aggregate counters only, so it cannot produce a precise
187/// sub-window report after `state.window_start_ms`.
188#[must_use]
189#[expect(clippy::cast_precision_loss)]
190pub(super) fn report_window_start(window_start_ms: Option<u64>) -> EventReport {
191    let snap = with_state(Clone::clone);
192    if let Some(requested_window_start_ms) = window_start_ms
193        && requested_window_start_ms > snap.window_start_ms
194    {
195        return EventReport::default();
196    }
197
198    let mut entity_counters: Vec<EntitySummary> = Vec::new();
199    for (path, ops) in &snap.entities {
200        let avg_load = if ops.load_calls > 0 {
201            ops.rows_loaded as f64 / ops.load_calls as f64
202        } else {
203            0.0
204        };
205        let avg_scanned = if ops.load_calls > 0 {
206            ops.rows_scanned as f64 / ops.load_calls as f64
207        } else {
208            0.0
209        };
210        let avg_delete = if ops.delete_calls > 0 {
211            ops.rows_deleted as f64 / ops.delete_calls as f64
212        } else {
213            0.0
214        };
215
216        entity_counters.push(EntitySummary {
217            path: path.clone(),
218            load_calls: ops.load_calls,
219            delete_calls: ops.delete_calls,
220            rows_loaded: ops.rows_loaded,
221            rows_scanned: ops.rows_scanned,
222            rows_deleted: ops.rows_deleted,
223            avg_rows_per_load: avg_load,
224            avg_rows_scanned_per_load: avg_scanned,
225            avg_rows_per_delete: avg_delete,
226            index_inserts: ops.index_inserts,
227            index_removes: ops.index_removes,
228            reverse_index_inserts: ops.reverse_index_inserts,
229            reverse_index_removes: ops.reverse_index_removes,
230            relation_reverse_lookups: ops.relation_reverse_lookups,
231            relation_delete_blocks: ops.relation_delete_blocks,
232            unique_violations: ops.unique_violations,
233            non_atomic_partial_commits: ops.non_atomic_partial_commits,
234            non_atomic_partial_rows_committed: ops.non_atomic_partial_rows_committed,
235        });
236    }
237
238    entity_counters.sort_by(|a, b| {
239        match b
240            .avg_rows_per_load
241            .partial_cmp(&a.avg_rows_per_load)
242            .unwrap_or(Ordering::Equal)
243        {
244            Ordering::Equal => match b.rows_loaded.cmp(&a.rows_loaded) {
245                Ordering::Equal => a.path.cmp(&b.path),
246                other => other,
247            },
248            other => other,
249        }
250    });
251
252    EventReport {
253        counters: Some(snap),
254        entity_counters,
255    }
256}
257
258///
259/// TESTS
260///
261
262#[cfg(test)]
263#[expect(clippy::float_cmp)]
264mod tests {
265    use crate::obs::metrics::{
266        EntityCounters, report_window_start, reset_all, with_state, with_state_mut,
267    };
268
269    #[test]
270    fn reset_all_clears_state() {
271        with_state_mut(|m| {
272            m.ops.load_calls = 3;
273            m.ops.index_inserts = 2;
274            m.perf.save_inst_max = 9;
275            m.entities.insert(
276                "alpha".to_string(),
277                EntityCounters {
278                    load_calls: 1,
279                    ..Default::default()
280                },
281            );
282        });
283
284        reset_all();
285
286        with_state(|m| {
287            assert_eq!(m.ops.load_calls, 0);
288            assert_eq!(m.ops.index_inserts, 0);
289            assert_eq!(m.perf.save_inst_max, 0);
290            assert!(m.entities.is_empty());
291        });
292    }
293
294    #[test]
295    fn report_sorts_entities_by_average_rows() {
296        reset_all();
297        with_state_mut(|m| {
298            m.entities.insert(
299                "alpha".to_string(),
300                EntityCounters {
301                    load_calls: 2,
302                    rows_loaded: 6,
303                    ..Default::default()
304                },
305            );
306            m.entities.insert(
307                "beta".to_string(),
308                EntityCounters {
309                    load_calls: 1,
310                    rows_loaded: 5,
311                    ..Default::default()
312                },
313            );
314            m.entities.insert(
315                "gamma".to_string(),
316                EntityCounters {
317                    load_calls: 2,
318                    rows_loaded: 6,
319                    ..Default::default()
320                },
321            );
322        });
323
324        let report = report_window_start(None);
325        let paths: Vec<_> = report
326            .entity_counters
327            .iter()
328            .map(|e| e.path.as_str())
329            .collect();
330
331        // Order by avg rows per load desc, then rows_loaded desc, then path asc.
332        assert_eq!(paths, ["beta", "alpha", "gamma"]);
333        assert_eq!(report.entity_counters[0].avg_rows_per_load, 5.0);
334        assert_eq!(report.entity_counters[1].avg_rows_per_load, 3.0);
335        assert_eq!(report.entity_counters[2].avg_rows_per_load, 3.0);
336    }
337}