Skip to main content

velesdb_core/database/
stats.rs

1//! Collection statistics: analyze and cache collection stats.
2
3use crate::{Error, Result};
4
5use super::Database;
6
7impl Database {
8    /// Analyzes a collection, caches stats, and persists them to disk.
9    ///
10    /// # Errors
11    ///
12    /// Returns an error if the name is invalid, the collection does not exist,
13    /// analysis fails, or stats cannot be serialized and written to disk.
14    pub fn analyze_collection(
15        &self,
16        name: &str,
17    ) -> Result<crate::collection::stats::CollectionStats> {
18        crate::validation::validate_collection_name(name)?;
19
20        let collection = self.resolve_collection(name)?;
21        let stats = collection.analyze()?;
22
23        // BFS reorder for cache locality: nodes close in the graph are placed
24        // close in memory, reducing L2/L3 cache misses during search by 15–30%.
25        // No-op for collections < 1 000 vectors (see REORDER_THRESHOLD).
26        collection.reorder_for_locality()?;
27
28        self.collection_stats
29            .write()
30            .insert(name.to_string(), stats.clone());
31
32        // Bug #51: route the write through Collection so that stats_io_mutex
33        // is held, preventing a race with incremental histogram updates.
34        collection.write_stats_guarded(&stats)?;
35
36        // Issue #608: bump analyze_generation after stats are persisted so
37        // the compiled plan cache key changes and stale plans are rebuilt
38        // with the fresh calibrated cost estimates.
39        collection.bump_analyze_generation();
40
41        Ok(stats)
42    }
43
44    /// Returns cached statistics when available, loading from disk if present.
45    ///
46    /// # Errors
47    ///
48    /// Returns an error if the name is invalid, or the on-disk stats file
49    /// exists but cannot be read or deserialized.
50    pub fn get_collection_stats(
51        &self,
52        name: &str,
53    ) -> Result<Option<crate::collection::stats::CollectionStats>> {
54        crate::validation::validate_collection_name(name)?;
55
56        if let Some(stats) = self.collection_stats.read().get(name).cloned() {
57            return Ok(Some(stats));
58        }
59
60        let stats_path = self.data_dir.join(name).join("collection.stats.json");
61        if !stats_path.exists() {
62            return Ok(None);
63        }
64
65        let bytes = std::fs::read(stats_path)?;
66        let stats: crate::collection::stats::CollectionStats = serde_json::from_slice(&bytes)
67            .map_err(|e| Error::Serialization(format!("failed to parse stats: {e}")))?;
68        self.collection_stats
69            .write()
70            .insert(name.to_string(), stats.clone());
71        Ok(Some(stats))
72    }
73}