1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
//! Collection statistics: analyze and cache collection stats.
use crate::{Error, Result};
use super::Database;
impl Database {
/// Analyzes a collection, caches stats, and persists them to disk.
///
/// # Errors
///
/// Returns an error if the name is invalid, the collection does not exist,
/// analysis fails, or stats cannot be serialized and written to disk.
pub fn analyze_collection(
&self,
name: &str,
) -> Result<crate::collection::stats::CollectionStats> {
crate::validation::validate_collection_name(name)?;
let collection = self.resolve_collection(name)?;
let stats = collection.analyze()?;
// BFS reorder for cache locality: nodes close in the graph are placed
// close in memory, reducing L2/L3 cache misses during search by 15–30%.
// No-op for collections < 1 000 vectors (see REORDER_THRESHOLD).
collection.reorder_for_locality()?;
self.collection_stats
.write()
.insert(name.to_string(), stats.clone());
// Bug #51: route the write through Collection so that stats_io_mutex
// is held, preventing a race with incremental histogram updates.
collection.write_stats_guarded(&stats)?;
// Issue #608: bump analyze_generation after stats are persisted so
// the compiled plan cache key changes and stale plans are rebuilt
// with the fresh calibrated cost estimates.
collection.bump_analyze_generation();
Ok(stats)
}
/// Returns cached statistics when available, loading from disk if present.
///
/// # Errors
///
/// Returns an error if the name is invalid, or the on-disk stats file
/// exists but cannot be read or deserialized.
pub fn get_collection_stats(
&self,
name: &str,
) -> Result<Option<crate::collection::stats::CollectionStats>> {
crate::validation::validate_collection_name(name)?;
if let Some(stats) = self.collection_stats.read().get(name).cloned() {
return Ok(Some(stats));
}
let stats_path = self.data_dir.join(name).join("collection.stats.json");
if !stats_path.exists() {
return Ok(None);
}
let bytes = std::fs::read(stats_path)?;
let stats: crate::collection::stats::CollectionStats = serde_json::from_slice(&bytes)
.map_err(|e| Error::Serialization(format!("failed to parse stats: {e}")))?;
self.collection_stats
.write()
.insert(name.to_string(), stats.clone());
Ok(Some(stats))
}
}