Skip to main content

icydb_core/obs/metrics/
mod.rs

1//! Runtime metrics are update-only by contract.
2//! Query-side instrumentation is intentionally not surfaced by `report`, so
3//! query metrics are non-existent by design under IC query semantics.
4
5use candid::CandidType;
6use canic_cdk::utils::time::now_millis;
7use serde::{Deserialize, Serialize};
8use std::{cell::RefCell, cmp::Ordering, collections::BTreeMap};
9
10/// EventState
11/// Mutable runtime counters and rolling perf state for the current window.
12/// Stored in thread-local memory for update-only instrumentation.
13
14#[derive(CandidType, Clone, Debug, Deserialize, Serialize)]
15pub struct EventState {
16    pub ops: EventOps,
17    pub perf: EventPerf,
18    pub entities: BTreeMap<String, EntityCounters>,
19    pub window_start_ms: u64,
20}
21
22impl Default for EventState {
23    fn default() -> Self {
24        Self {
25            ops: EventOps::default(),
26            perf: EventPerf::default(),
27            entities: BTreeMap::new(),
28            window_start_ms: now_millis(),
29        }
30    }
31}
32
33/// EventOps
34/// Aggregated operation counters for executors, plans, rows, and index maintenance.
35/// Values are monotonic within a metrics window.
36/// Call counters are execution attempts; errors still increment them.
37/// Row counters reflect rows touched after execution, not requested rows.
38#[derive(CandidType, Clone, Debug, Default, Deserialize, Serialize)]
39pub struct EventOps {
40    // Executor entrypoints
41    pub load_calls: u64,
42    pub save_calls: u64,
43    pub delete_calls: u64,
44
45    // Planner kinds
46    pub plan_index: u64,
47    pub plan_keys: u64,
48    pub plan_range: u64,
49    pub plan_full_scan: u64,
50    pub plan_grouped_hash_materialized: u64,
51    pub plan_grouped_ordered_materialized: u64,
52
53    // Rows touched
54    pub rows_loaded: u64,
55    pub rows_scanned: u64,
56    pub rows_deleted: u64,
57
58    // Index maintenance
59    pub index_inserts: u64,
60    pub index_removes: u64,
61    pub reverse_index_inserts: u64,
62    pub reverse_index_removes: u64,
63    pub relation_reverse_lookups: u64,
64    pub relation_delete_blocks: u64,
65    pub unique_violations: u64,
66    pub non_atomic_partial_commits: u64,
67    pub non_atomic_partial_rows_committed: u64,
68}
69
70/// EntityCounters
71/// Per-entity counters mirroring `EventOps` categories.
72/// Used to compute report-level per-entity summaries.
73
74#[derive(CandidType, Clone, Debug, Default, Deserialize, Serialize)]
75pub struct EntityCounters {
76    pub load_calls: u64,
77    pub save_calls: u64,
78    pub delete_calls: u64,
79    pub rows_loaded: u64,
80    pub rows_scanned: u64,
81    pub rows_deleted: u64,
82    pub index_inserts: u64,
83    pub index_removes: u64,
84    pub reverse_index_inserts: u64,
85    pub reverse_index_removes: u64,
86    pub relation_reverse_lookups: u64,
87    pub relation_delete_blocks: u64,
88    pub unique_violations: u64,
89    pub non_atomic_partial_commits: u64,
90    pub non_atomic_partial_rows_committed: u64,
91}
92
93/// EventPerf
94/// Aggregate and max instruction deltas per executor kind.
95/// Captures execution pressure, not wall-clock latency.
96/// Instruction deltas are pressure indicators (validation + planning + execution),
97/// not latency measurements.
98#[derive(CandidType, Clone, Debug, Default, Deserialize, Serialize)]
99pub struct EventPerf {
100    // Instruction totals per executor (ic_cdk::api::performance_counter(1))
101    pub load_inst_total: u128,
102    pub save_inst_total: u128,
103    pub delete_inst_total: u128,
104
105    // Maximum observed instruction deltas
106    pub load_inst_max: u64,
107    pub save_inst_max: u64,
108    pub delete_inst_max: u64,
109}
110
111thread_local! {
112    static EVENT_STATE: RefCell<EventState> = RefCell::new(EventState::default());
113}
114
115/// Borrow metrics immutably.
116pub(crate) fn with_state<R>(f: impl FnOnce(&EventState) -> R) -> R {
117    EVENT_STATE.with(|m| f(&m.borrow()))
118}
119
120/// Borrow metrics mutably.
121pub(crate) fn with_state_mut<R>(f: impl FnOnce(&mut EventState) -> R) -> R {
122    EVENT_STATE.with(|m| f(&mut m.borrow_mut()))
123}
124
125/// Reset all counters (useful in tests).
126pub(super) fn reset() {
127    with_state_mut(|m| *m = EventState::default());
128}
129
130/// Reset all event state: counters, perf, and serialize counters.
131pub(crate) fn reset_all() {
132    reset();
133}
134
135/// Accumulate instruction counts and track a max.
136pub(super) fn add_instructions(total: &mut u128, max: &mut u64, delta_inst: u64) {
137    *total = total.saturating_add(u128::from(delta_inst));
138    if delta_inst > *max {
139        *max = delta_inst;
140    }
141}
142
143/// EventReport
144/// Event/counter report for runtime metrics query endpoints.
145/// Storage snapshot types live in snapshot/storage modules.
146
147#[derive(CandidType, Clone, Debug, Default, Deserialize, Serialize)]
148pub struct EventReport {
149    /// Ephemeral runtime counters since `window_start_ms`.
150    pub counters: Option<EventState>,
151    /// Per-entity ephemeral counters and averages.
152    pub entity_counters: Vec<EntitySummary>,
153}
154
155/// EntitySummary
156/// Derived per-entity metrics for report consumers.
157/// Includes absolute counters and simple averages.
158
159#[derive(CandidType, Clone, Debug, Default, Deserialize, Serialize)]
160pub struct EntitySummary {
161    pub path: String,
162    pub load_calls: u64,
163    pub delete_calls: u64,
164    pub rows_loaded: u64,
165    pub rows_scanned: u64,
166    pub rows_deleted: u64,
167    pub avg_rows_per_load: f64,
168    pub avg_rows_scanned_per_load: f64,
169    pub avg_rows_per_delete: f64,
170    pub index_inserts: u64,
171    pub index_removes: u64,
172    pub reverse_index_inserts: u64,
173    pub reverse_index_removes: u64,
174    pub relation_reverse_lookups: u64,
175    pub relation_delete_blocks: u64,
176    pub unique_violations: u64,
177    pub non_atomic_partial_commits: u64,
178    pub non_atomic_partial_rows_committed: u64,
179}
180
181/// Build a metrics report gated by `window_start_ms`.
182///
183/// This is a window-start filter:
184/// - If `window_start_ms` is `None`, return the current window.
185/// - If `window_start_ms <= state.window_start_ms`, return the current window.
186/// - If `window_start_ms > state.window_start_ms`, return an empty report.
187///
188/// IcyDB stores aggregate counters only, so it cannot produce a precise
189/// sub-window report after `state.window_start_ms`.
190#[must_use]
191#[expect(clippy::cast_precision_loss)]
192pub(super) fn report_window_start(window_start_ms: Option<u64>) -> EventReport {
193    let snap = with_state(Clone::clone);
194    if let Some(requested_window_start_ms) = window_start_ms
195        && requested_window_start_ms > snap.window_start_ms
196    {
197        return EventReport::default();
198    }
199
200    let mut entity_counters: Vec<EntitySummary> = Vec::new();
201    for (path, ops) in &snap.entities {
202        let avg_load = if ops.load_calls > 0 {
203            ops.rows_loaded as f64 / ops.load_calls as f64
204        } else {
205            0.0
206        };
207        let avg_scanned = if ops.load_calls > 0 {
208            ops.rows_scanned as f64 / ops.load_calls as f64
209        } else {
210            0.0
211        };
212        let avg_delete = if ops.delete_calls > 0 {
213            ops.rows_deleted as f64 / ops.delete_calls as f64
214        } else {
215            0.0
216        };
217
218        entity_counters.push(EntitySummary {
219            path: path.clone(),
220            load_calls: ops.load_calls,
221            delete_calls: ops.delete_calls,
222            rows_loaded: ops.rows_loaded,
223            rows_scanned: ops.rows_scanned,
224            rows_deleted: ops.rows_deleted,
225            avg_rows_per_load: avg_load,
226            avg_rows_scanned_per_load: avg_scanned,
227            avg_rows_per_delete: avg_delete,
228            index_inserts: ops.index_inserts,
229            index_removes: ops.index_removes,
230            reverse_index_inserts: ops.reverse_index_inserts,
231            reverse_index_removes: ops.reverse_index_removes,
232            relation_reverse_lookups: ops.relation_reverse_lookups,
233            relation_delete_blocks: ops.relation_delete_blocks,
234            unique_violations: ops.unique_violations,
235            non_atomic_partial_commits: ops.non_atomic_partial_commits,
236            non_atomic_partial_rows_committed: ops.non_atomic_partial_rows_committed,
237        });
238    }
239
240    entity_counters.sort_by(|a, b| {
241        match b
242            .avg_rows_per_load
243            .partial_cmp(&a.avg_rows_per_load)
244            .unwrap_or(Ordering::Equal)
245        {
246            Ordering::Equal => match b.rows_loaded.cmp(&a.rows_loaded) {
247                Ordering::Equal => a.path.cmp(&b.path),
248                other => other,
249            },
250            other => other,
251        }
252    });
253
254    EventReport {
255        counters: Some(snap),
256        entity_counters,
257    }
258}
259
260///
261/// TESTS
262///
263
264#[cfg(test)]
265#[expect(clippy::float_cmp)]
266mod tests {
267    use crate::obs::metrics::{
268        EntityCounters, report_window_start, reset_all, with_state, with_state_mut,
269    };
270
271    #[test]
272    fn reset_all_clears_state() {
273        with_state_mut(|m| {
274            m.ops.load_calls = 3;
275            m.ops.index_inserts = 2;
276            m.perf.save_inst_max = 9;
277            m.entities.insert(
278                "alpha".to_string(),
279                EntityCounters {
280                    load_calls: 1,
281                    ..Default::default()
282                },
283            );
284        });
285
286        reset_all();
287
288        with_state(|m| {
289            assert_eq!(m.ops.load_calls, 0);
290            assert_eq!(m.ops.index_inserts, 0);
291            assert_eq!(m.perf.save_inst_max, 0);
292            assert!(m.entities.is_empty());
293        });
294    }
295
296    #[test]
297    fn report_sorts_entities_by_average_rows() {
298        reset_all();
299        with_state_mut(|m| {
300            m.entities.insert(
301                "alpha".to_string(),
302                EntityCounters {
303                    load_calls: 2,
304                    rows_loaded: 6,
305                    ..Default::default()
306                },
307            );
308            m.entities.insert(
309                "beta".to_string(),
310                EntityCounters {
311                    load_calls: 1,
312                    rows_loaded: 5,
313                    ..Default::default()
314                },
315            );
316            m.entities.insert(
317                "gamma".to_string(),
318                EntityCounters {
319                    load_calls: 2,
320                    rows_loaded: 6,
321                    ..Default::default()
322                },
323            );
324        });
325
326        let report = report_window_start(None);
327        let paths: Vec<_> = report
328            .entity_counters
329            .iter()
330            .map(|e| e.path.as_str())
331            .collect();
332
333        // Order by avg rows per load desc, then rows_loaded desc, then path asc.
334        assert_eq!(paths, ["beta", "alpha", "gamma"]);
335        assert_eq!(report.entity_counters[0].avg_rows_per_load, 5.0);
336        assert_eq!(report.entity_counters[1].avg_rows_per_load, 3.0);
337        assert_eq!(report.entity_counters[2].avg_rows_per_load, 3.0);
338    }
339}