velesdb_core/database/stats.rs
1//! Collection statistics: analyze and cache collection stats.
2
3use crate::{Error, Result};
4
5use super::Database;
6
7impl Database {
8 /// Analyzes a collection, caches stats, and persists them to disk.
9 ///
10 /// # Errors
11 ///
12 /// Returns an error if the name is invalid, the collection does not exist,
13 /// analysis fails, or stats cannot be serialized and written to disk.
14 pub fn analyze_collection(
15 &self,
16 name: &str,
17 ) -> Result<crate::collection::stats::CollectionStats> {
18 crate::validation::validate_collection_name(name)?;
19
20 let collection = self.resolve_collection(name)?;
21 let stats = collection.analyze()?;
22
23 // BFS reorder for cache locality: nodes close in the graph are placed
24 // close in memory, reducing L2/L3 cache misses during search by 15–30%.
25 // No-op for collections < 1 000 vectors (see REORDER_THRESHOLD).
26 collection.reorder_for_locality()?;
27
28 self.collection_stats
29 .write()
30 .insert(name.to_string(), stats.clone());
31
32 // Bug #51: route the write through Collection so that stats_io_mutex
33 // is held, preventing a race with incremental histogram updates.
34 collection.write_stats_guarded(&stats)?;
35
36 // Issue #608: bump analyze_generation after stats are persisted so
37 // the compiled plan cache key changes and stale plans are rebuilt
38 // with the fresh calibrated cost estimates.
39 collection.bump_analyze_generation();
40
41 Ok(stats)
42 }
43
44 /// Returns cached statistics when available, loading from disk if present.
45 ///
46 /// # Errors
47 ///
48 /// Returns an error if the name is invalid, or the on-disk stats file
49 /// exists but cannot be read or deserialized.
50 pub fn get_collection_stats(
51 &self,
52 name: &str,
53 ) -> Result<Option<crate::collection::stats::CollectionStats>> {
54 crate::validation::validate_collection_name(name)?;
55
56 if let Some(stats) = self.collection_stats.read().get(name).cloned() {
57 return Ok(Some(stats));
58 }
59
60 let stats_path = self.data_dir.join(name).join("collection.stats.json");
61 if !stats_path.exists() {
62 return Ok(None);
63 }
64
65 let bytes = std::fs::read(stats_path)?;
66 let stats: crate::collection::stats::CollectionStats = serde_json::from_slice(&bytes)
67 .map_err(|e| Error::Serialization(format!("failed to parse stats: {e}")))?;
68 self.collection_stats
69 .write()
70 .insert(name.to_string(), stats.clone());
71 Ok(Some(stats))
72 }
73}