Skip to main content

icydb_core/db/diagnostics/snapshot/
mod.rs

1use crate::{
2    db::{
3        Db, EntityName,
4        data::{DataKey, StorageKey},
5        index::IndexKey,
6    },
7    error::InternalError,
8    traits::CanisterKind,
9    value::Value,
10};
11use candid::CandidType;
12use serde::{Deserialize, Serialize};
13use std::collections::BTreeMap;
14
15///
16/// StorageReport
17/// Live storage snapshot report
18///
19
20#[derive(CandidType, Clone, Debug, Default, Deserialize, Serialize)]
21pub struct StorageReport {
22    pub storage_data: Vec<DataStoreSnapshot>,
23    pub storage_index: Vec<IndexStoreSnapshot>,
24    pub entity_storage: Vec<EntitySnapshot>,
25    pub corrupted_keys: u64,
26    pub corrupted_entries: u64,
27}
28
29///
30/// DataStoreSnapshot
31/// Store-level snapshot metrics.
32///
33
34#[derive(CandidType, Clone, Debug, Default, Deserialize, Serialize)]
35pub struct DataStoreSnapshot {
36    pub path: String,
37    pub entries: u64,
38    pub memory_bytes: u64,
39}
40
41///
42/// IndexStoreSnapshot
43/// Index-store snapshot metrics
44///
45
46#[derive(CandidType, Clone, Debug, Default, Deserialize, Serialize)]
47pub struct IndexStoreSnapshot {
48    pub path: String,
49    pub entries: u64,
50    pub user_entries: u64,
51    pub system_entries: u64,
52    pub memory_bytes: u64,
53}
54
55///
56/// EntitySnapshot
57/// Per-entity storage breakdown across stores
58///
59
60#[derive(CandidType, Clone, Debug, Default, Deserialize, Serialize)]
61pub struct EntitySnapshot {
62    /// Store path (e.g., icydb_schema_tests::schema::TestDataStore)
63    pub store: String,
64
65    /// Entity path (e.g., icydb_schema_tests::canister::db::Index)
66    pub path: String,
67
68    /// Number of rows for this entity in the store
69    pub entries: u64,
70
71    /// Approximate bytes used (key + value)
72    pub memory_bytes: u64,
73
74    /// Minimum primary key for this entity (entity-local ordering)
75    pub min_key: Option<Value>,
76
77    /// Maximum primary key for this entity (entity-local ordering)
78    pub max_key: Option<Value>,
79}
80
81///
82/// EntityStats
83/// Internal struct for building per-entity stats before snapshotting.
84///
85
86#[derive(Default)]
87struct EntityStats {
88    entries: u64,
89    memory_bytes: u64,
90    min_key: Option<StorageKey>,
91    max_key: Option<StorageKey>,
92}
93
94impl EntityStats {
95    fn update(&mut self, dk: &DataKey, value_len: u64) {
96        self.entries = self.entries.saturating_add(1);
97        self.memory_bytes = self
98            .memory_bytes
99            .saturating_add(DataKey::entry_size_bytes(value_len));
100
101        let k = dk.storage_key();
102
103        match &mut self.min_key {
104            Some(min) if k < *min => *min = k,
105            None => self.min_key = Some(k),
106            _ => {}
107        }
108
109        match &mut self.max_key {
110            Some(max) if k > *max => *max = k,
111            None => self.max_key = Some(k),
112            _ => {}
113        }
114    }
115}
116
117/// Build storage snapshot and per-entity breakdown; enrich path names using name→path map
118pub(crate) fn storage_report<C: CanisterKind>(
119    db: &Db<C>,
120    name_to_path: &[(&'static str, &'static str)],
121) -> Result<StorageReport, InternalError> {
122    db.ensure_recovered_state()?;
123    // Build name→path map once, reuse across stores.
124    let name_map: BTreeMap<&'static str, &str> = name_to_path.iter().copied().collect();
125    let mut data = Vec::new();
126    let mut index = Vec::new();
127    let mut entity_storage: Vec<EntitySnapshot> = Vec::new();
128    let mut corrupted_keys = 0u64;
129    let mut corrupted_entries = 0u64;
130
131    db.with_store_registry(|reg| {
132        // Keep diagnostics snapshots deterministic by traversing stores in path order.
133        let mut stores = reg.iter().collect::<Vec<_>>();
134        stores.sort_by_key(|(path, _)| *path);
135
136        for (path, store_handle) in stores {
137            // Phase 1: collect data-store snapshots and per-entity stats.
138            store_handle.with_data(|store| {
139                data.push(DataStoreSnapshot {
140                    path: path.to_string(),
141                    entries: store.len(),
142                    memory_bytes: store.memory_bytes(),
143                });
144
145                // Track per-entity counts, memory, and min/max Keys (not DataKeys)
146                let mut by_entity: BTreeMap<EntityName, EntityStats> = BTreeMap::new();
147
148                for entry in store.iter() {
149                    let Ok(dk) = DataKey::try_from_raw(entry.key()) else {
150                        corrupted_keys = corrupted_keys.saturating_add(1);
151                        continue;
152                    };
153
154                    let value_len = entry.value().len() as u64;
155
156                    by_entity
157                        .entry(*dk.entity_name())
158                        .or_default()
159                        .update(&dk, value_len);
160                }
161
162                for (entity_name, stats) in by_entity {
163                    let path_name = name_map
164                        .get(entity_name.as_str())
165                        .copied()
166                        .unwrap_or(entity_name.as_str());
167                    entity_storage.push(EntitySnapshot {
168                        store: path.to_string(),
169                        path: path_name.to_string(),
170                        entries: stats.entries,
171                        memory_bytes: stats.memory_bytes,
172                        min_key: stats.min_key.map(|key| key.as_value()),
173                        max_key: stats.max_key.map(|key| key.as_value()),
174                    });
175                }
176            });
177
178            // Phase 2: collect index-store snapshots and integrity counters.
179            store_handle.with_index(|store| {
180                let mut user_entries = 0u64;
181                let mut system_entries = 0u64;
182
183                for (key, value) in store.entries() {
184                    let Ok(decoded_key) = IndexKey::try_from_raw(&key) else {
185                        corrupted_entries = corrupted_entries.saturating_add(1);
186                        continue;
187                    };
188
189                    if decoded_key.uses_system_namespace() {
190                        system_entries = system_entries.saturating_add(1);
191                    } else {
192                        user_entries = user_entries.saturating_add(1);
193                    }
194
195                    if value.validate().is_err() {
196                        corrupted_entries = corrupted_entries.saturating_add(1);
197                    }
198                }
199
200                index.push(IndexStoreSnapshot {
201                    path: path.to_string(),
202                    entries: store.len(),
203                    user_entries,
204                    system_entries,
205                    memory_bytes: store.memory_bytes(),
206                });
207            });
208        }
209    });
210
211    // Keep entity snapshot emission deterministic as an explicit contract,
212    // independent of outer store traversal implementation details.
213    entity_storage.sort_by(|left, right| {
214        (left.store.as_str(), left.path.as_str()).cmp(&(right.store.as_str(), right.path.as_str()))
215    });
216
217    Ok(StorageReport {
218        storage_data: data,
219        storage_index: index,
220        entity_storage,
221        corrupted_keys,
222        corrupted_entries,
223    })
224}
225
226///
227/// TESTS
228///
229
230#[cfg(test)]
231mod tests {
232    use crate::{
233        db::{
234            Db,
235            commit::{ensure_recovered_for_write, init_commit_store_for_tests},
236            data::{DataKey, DataStore, RawDataKey, RawRow, StorageKey},
237            identity::{EntityName, IndexName},
238            index::{IndexId, IndexKey, IndexKeyKind, IndexStore, RawIndexEntry, RawIndexKey},
239            registry::StoreRegistry,
240        },
241        test_support::test_memory,
242        traits::Storable,
243    };
244    use std::{borrow::Cow, cell::RefCell};
245
246    use super::{StorageReport, storage_report};
247
248    crate::test_canister! {
249        ident = DiagnosticsCanister,
250        commit_memory_id = 254,
251    }
252
253    const STORE_Z_PATH: &str = "diagnostics_tests::z_store";
254    const STORE_A_PATH: &str = "diagnostics_tests::a_store";
255    const SINGLE_ENTITY_NAME: &str = "diag_single_entity";
256    const SINGLE_ENTITY_PATH: &str = "diagnostics_tests::entity::single";
257    const FIRST_ENTITY_NAME: &str = "diag_first_entity";
258    const FIRST_ENTITY_PATH: &str = "diagnostics_tests::entity::first";
259    const SECOND_ENTITY_NAME: &str = "diag_second_entity";
260    const SECOND_ENTITY_PATH: &str = "diagnostics_tests::entity::second";
261    const MINMAX_ENTITY_NAME: &str = "diag_minmax_entity";
262    const MINMAX_ENTITY_PATH: &str = "diagnostics_tests::entity::minmax";
263    const VALID_ENTITY_NAME: &str = "diag_valid_entity";
264    const VALID_ENTITY_PATH: &str = "diagnostics_tests::entity::valid";
265
266    thread_local! {
267        static STORE_Z_DATA: RefCell<DataStore> = RefCell::new(DataStore::init(test_memory(153)));
268        static STORE_Z_INDEX: RefCell<IndexStore> = RefCell::new(IndexStore::init(test_memory(154)));
269        static STORE_A_DATA: RefCell<DataStore> = RefCell::new(DataStore::init(test_memory(155)));
270        static STORE_A_INDEX: RefCell<IndexStore> = RefCell::new(IndexStore::init(test_memory(156)));
271        static DIAGNOSTICS_REGISTRY: StoreRegistry = {
272            let mut registry = StoreRegistry::new();
273            registry
274                .register_store(STORE_Z_PATH, &STORE_Z_DATA, &STORE_Z_INDEX)
275                .expect("diagnostics test z-store registration should succeed");
276            registry
277                .register_store(STORE_A_PATH, &STORE_A_DATA, &STORE_A_INDEX)
278                .expect("diagnostics test a-store registration should succeed");
279            registry
280        };
281    }
282
283    static DB: Db<DiagnosticsCanister> = Db::new(&DIAGNOSTICS_REGISTRY);
284
285    fn with_data_store_mut<R>(path: &'static str, f: impl FnOnce(&mut DataStore) -> R) -> R {
286        DB.with_store_registry(|registry| {
287            registry
288                .try_get_store(path)
289                .map(|store_handle| store_handle.with_data_mut(f))
290        })
291        .expect("data store lookup should succeed")
292    }
293
294    fn with_index_store_mut<R>(path: &'static str, f: impl FnOnce(&mut IndexStore) -> R) -> R {
295        DB.with_store_registry(|registry| {
296            registry
297                .try_get_store(path)
298                .map(|store_handle| store_handle.with_index_mut(f))
299        })
300        .expect("index store lookup should succeed")
301    }
302
303    fn reset_stores() {
304        init_commit_store_for_tests().expect("commit store init should succeed");
305        ensure_recovered_for_write(&DB).expect("write-side recovery should succeed");
306        DB.with_store_registry(|registry| {
307            // Test cleanup only: this clear-all sweep has set semantics, so
308            // `StoreRegistry` HashMap iteration order is intentionally irrelevant.
309            for (_, store_handle) in registry.iter() {
310                store_handle.with_data_mut(DataStore::clear);
311                store_handle.with_index_mut(IndexStore::clear);
312            }
313        });
314    }
315
316    fn insert_data_row(path: &'static str, entity_name: &str, key: StorageKey, row_len: usize) {
317        let entity =
318            EntityName::try_from_str(entity_name).expect("diagnostics test entity name is valid");
319        let raw_key = DataKey::raw_from_parts(entity, key)
320            .expect("diagnostics test data key should encode from valid parts");
321        let row_bytes = vec![0xAB; row_len.max(1)];
322        let raw_row = RawRow::try_new(row_bytes).expect("diagnostics test row should encode");
323
324        with_data_store_mut(path, |store| {
325            store.insert(raw_key, raw_row);
326        });
327    }
328
329    fn insert_corrupted_data_key(path: &'static str) {
330        let valid = DataKey::raw_from_parts(
331            EntityName::try_from_str(VALID_ENTITY_NAME).expect("valid test entity name"),
332            StorageKey::Int(1),
333        )
334        .expect("valid data key should encode");
335
336        let mut corrupted_bytes = valid.as_bytes().to_vec();
337        corrupted_bytes[0] = 0;
338        let corrupted_key = <RawDataKey as Storable>::from_bytes(Cow::Owned(corrupted_bytes));
339        let raw_row = RawRow::try_new(vec![0xCD]).expect("diagnostics test row should encode");
340
341        with_data_store_mut(path, |store| {
342            store.insert(corrupted_key, raw_row);
343        });
344    }
345
346    fn index_id(entity_name: &str, field: &str) -> IndexId {
347        let entity =
348            EntityName::try_from_str(entity_name).expect("diagnostics test entity name is valid");
349        let name = IndexName::try_from_parts(&entity, &[field])
350            .expect("diagnostics test index name should encode");
351
352        IndexId(name)
353    }
354
355    fn index_key(kind: IndexKeyKind, entity_name: &str, field: &str) -> RawIndexKey {
356        let id = index_id(entity_name, field);
357        IndexKey::empty_with_kind(&id, kind).to_raw()
358    }
359
360    fn insert_index_entry(path: &'static str, key: RawIndexKey, entry: RawIndexEntry) {
361        with_index_store_mut(path, |store| {
362            store.insert(key, entry);
363        });
364    }
365
366    fn diagnostics_report(name_to_path: &[(&'static str, &'static str)]) -> StorageReport {
367        storage_report(&DB, name_to_path).expect("diagnostics snapshot should succeed")
368    }
369
370    fn data_paths(report: &StorageReport) -> Vec<&str> {
371        report
372            .storage_data
373            .iter()
374            .map(|snapshot| snapshot.path.as_str())
375            .collect()
376    }
377
378    fn index_paths(report: &StorageReport) -> Vec<&str> {
379        report
380            .storage_index
381            .iter()
382            .map(|snapshot| snapshot.path.as_str())
383            .collect()
384    }
385
386    fn entity_store_paths(report: &StorageReport) -> Vec<(&str, &str)> {
387        report
388            .entity_storage
389            .iter()
390            .map(|snapshot| (snapshot.store.as_str(), snapshot.path.as_str()))
391            .collect()
392    }
393
394    #[test]
395    fn storage_report_empty_store_snapshot() {
396        reset_stores();
397
398        let report = diagnostics_report(&[]);
399
400        assert_eq!(report.corrupted_keys, 0);
401        assert_eq!(report.corrupted_entries, 0);
402        assert!(report.entity_storage.is_empty());
403
404        assert_eq!(data_paths(&report), vec![STORE_A_PATH, STORE_Z_PATH]);
405        assert_eq!(index_paths(&report), vec![STORE_A_PATH, STORE_Z_PATH]);
406        assert!(
407            report
408                .storage_data
409                .iter()
410                .all(|snapshot| snapshot.entries == 0)
411        );
412        assert!(
413            report
414                .storage_index
415                .iter()
416                .all(|snapshot| snapshot.entries == 0)
417        );
418    }
419
420    #[test]
421    fn storage_report_single_entity_multiple_rows() {
422        reset_stores();
423
424        insert_data_row(STORE_A_PATH, SINGLE_ENTITY_NAME, StorageKey::Int(3), 3);
425        insert_data_row(STORE_A_PATH, SINGLE_ENTITY_NAME, StorageKey::Int(1), 1);
426        insert_data_row(STORE_A_PATH, SINGLE_ENTITY_NAME, StorageKey::Int(2), 2);
427
428        let report = diagnostics_report(&[(SINGLE_ENTITY_NAME, SINGLE_ENTITY_PATH)]);
429        let entity_snapshot = report
430            .entity_storage
431            .iter()
432            .find(|snapshot| snapshot.store == STORE_A_PATH && snapshot.path == SINGLE_ENTITY_PATH)
433            .expect("single-entity snapshot should exist");
434
435        assert_eq!(entity_snapshot.entries, 3);
436    }
437
438    #[test]
439    fn storage_report_multiple_entities_in_same_store() {
440        reset_stores();
441
442        insert_data_row(STORE_A_PATH, FIRST_ENTITY_NAME, StorageKey::Int(10), 1);
443        insert_data_row(STORE_A_PATH, FIRST_ENTITY_NAME, StorageKey::Int(11), 1);
444        insert_data_row(STORE_A_PATH, SECOND_ENTITY_NAME, StorageKey::Int(20), 1);
445
446        let report = diagnostics_report(&[
447            (FIRST_ENTITY_NAME, FIRST_ENTITY_PATH),
448            (SECOND_ENTITY_NAME, SECOND_ENTITY_PATH),
449        ]);
450
451        let first = report
452            .entity_storage
453            .iter()
454            .find(|snapshot| snapshot.store == STORE_A_PATH && snapshot.path == FIRST_ENTITY_PATH)
455            .expect("first-entity snapshot should exist");
456        let second = report
457            .entity_storage
458            .iter()
459            .find(|snapshot| snapshot.store == STORE_A_PATH && snapshot.path == SECOND_ENTITY_PATH)
460            .expect("second-entity snapshot should exist");
461
462        assert_eq!(first.entries, 2);
463        assert_eq!(second.entries, 1);
464    }
465
466    #[test]
467    fn storage_report_entity_snapshots_are_sorted_by_store_then_path() {
468        reset_stores();
469
470        insert_data_row(STORE_Z_PATH, FIRST_ENTITY_NAME, StorageKey::Int(1), 1);
471        insert_data_row(STORE_A_PATH, SECOND_ENTITY_NAME, StorageKey::Int(2), 1);
472        insert_data_row(STORE_A_PATH, FIRST_ENTITY_NAME, StorageKey::Int(3), 1);
473
474        let report = diagnostics_report(&[
475            (FIRST_ENTITY_NAME, "diagnostics_tests::entity::z_first"),
476            (SECOND_ENTITY_NAME, "diagnostics_tests::entity::a_second"),
477        ]);
478
479        assert_eq!(
480            entity_store_paths(&report),
481            vec![
482                (STORE_A_PATH, "diagnostics_tests::entity::a_second"),
483                (STORE_A_PATH, "diagnostics_tests::entity::z_first"),
484                (STORE_Z_PATH, "diagnostics_tests::entity::z_first"),
485            ]
486        );
487    }
488
489    #[test]
490    fn storage_report_min_max_key_correctness() {
491        reset_stores();
492
493        insert_data_row(STORE_A_PATH, MINMAX_ENTITY_NAME, StorageKey::Int(9), 1);
494        insert_data_row(STORE_A_PATH, MINMAX_ENTITY_NAME, StorageKey::Int(-5), 1);
495        insert_data_row(STORE_A_PATH, MINMAX_ENTITY_NAME, StorageKey::Int(3), 1);
496
497        let report = diagnostics_report(&[(MINMAX_ENTITY_NAME, MINMAX_ENTITY_PATH)]);
498        let entity_snapshot = report
499            .entity_storage
500            .iter()
501            .find(|snapshot| snapshot.store == STORE_A_PATH && snapshot.path == MINMAX_ENTITY_PATH)
502            .expect("min/max snapshot should exist");
503
504        assert_eq!(
505            entity_snapshot.min_key,
506            Some(StorageKey::Int(-5).as_value())
507        );
508        assert_eq!(entity_snapshot.max_key, Some(StorageKey::Int(9).as_value()));
509    }
510
511    #[test]
512    fn storage_report_corrupted_key_detection() {
513        reset_stores();
514
515        insert_data_row(STORE_A_PATH, VALID_ENTITY_NAME, StorageKey::Int(7), 1);
516        insert_corrupted_data_key(STORE_A_PATH);
517
518        let report = diagnostics_report(&[(VALID_ENTITY_NAME, VALID_ENTITY_PATH)]);
519
520        assert_eq!(report.corrupted_keys, 1);
521        let entity_snapshot = report
522            .entity_storage
523            .iter()
524            .find(|snapshot| snapshot.store == STORE_A_PATH && snapshot.path == VALID_ENTITY_PATH)
525            .expect("valid-entity snapshot should exist");
526        assert_eq!(entity_snapshot.entries, 1);
527    }
528
529    #[test]
530    fn storage_report_corrupted_index_value_detection() {
531        reset_stores();
532
533        let key = index_key(IndexKeyKind::User, "diag_index_entity", "email");
534        let corrupted_entry = <RawIndexEntry as Storable>::from_bytes(Cow::Owned(vec![0, 0, 0, 0]));
535        insert_index_entry(STORE_A_PATH, key, corrupted_entry);
536
537        let report = diagnostics_report(&[]);
538        let index_snapshot = report
539            .storage_index
540            .iter()
541            .find(|snapshot| snapshot.path == STORE_A_PATH)
542            .expect("index snapshot should exist");
543
544        assert_eq!(report.corrupted_entries, 1);
545        assert_eq!(index_snapshot.entries, 1);
546        assert_eq!(index_snapshot.user_entries, 1);
547        assert_eq!(index_snapshot.system_entries, 0);
548    }
549
550    #[test]
551    fn storage_report_system_vs_user_namespace_split() {
552        reset_stores();
553
554        let user_key = index_key(IndexKeyKind::User, "diag_namespace_entity", "email");
555        let system_key = index_key(IndexKeyKind::System, "diag_namespace_entity", "email");
556        let user_entry =
557            RawIndexEntry::try_from_keys([StorageKey::Int(1)]).expect("user entry should encode");
558        let system_entry =
559            RawIndexEntry::try_from_keys([StorageKey::Int(2)]).expect("system entry should encode");
560        insert_index_entry(STORE_A_PATH, user_key, user_entry);
561        insert_index_entry(STORE_A_PATH, system_key, system_entry);
562
563        let report = diagnostics_report(&[]);
564        let index_snapshot = report
565            .storage_index
566            .iter()
567            .find(|snapshot| snapshot.path == STORE_A_PATH)
568            .expect("index snapshot should exist");
569
570        assert_eq!(report.corrupted_entries, 0);
571        assert_eq!(index_snapshot.entries, 2);
572        assert_eq!(index_snapshot.user_entries, 1);
573        assert_eq!(index_snapshot.system_entries, 1);
574    }
575}