Skip to main content

lora_store/memory/
stats.rs

1//! Lightweight cardinality stats used by the cost model.
2//!
3//! Today the stats are exact, derived in O(labels + types) from the
4//! existing `nodes_by_label` / `relationships_by_type` maps and a
5//! tally of indexed property cardinality from the property-index
6//! buckets. Cheap to build, cheap to keep current — no separate
7//! ANALYZE phase, no background sampling.
8//!
9//! When the graph grows beyond what an exact `BTreeMap<String,
10//! usize>` can serve from RAM, this is the seam where a HyperLogLog
11//! sketch will replace the per-(label, property) distinct counts.
12//! The public surface (`GraphStats`) stays the same.
13
14use std::collections::{hash_map::DefaultHasher, BTreeMap, BTreeSet};
15use std::hash::{Hash, Hasher};
16
17/// Snapshot of graph cardinality. Populated by the storage backend
18/// (see [`super::InMemoryGraph::stats`]).
19#[derive(Debug, Default, Clone, PartialEq, Eq)]
20pub struct GraphStats {
21    /// Total live node count.
22    pub node_count: usize,
23    /// Total live relationship count.
24    pub relationship_count: usize,
25    /// Per-label node count. `nodes_by_label[label].len()`.
26    pub nodes_by_label: BTreeMap<String, usize>,
27    /// Per-rel-type relationship count. `relationships_by_type[type].len()`.
28    pub relationships_by_type: BTreeMap<String, usize>,
29    /// Per-(label, property) approximate distinct value count, when
30    /// a property index is active. Empty for non-indexed columns —
31    /// the optimizer falls back to "all rows distinct" for those.
32    pub node_distinct_values: BTreeMap<(String, String), usize>,
33    pub relationship_distinct_values: BTreeMap<(String, String), usize>,
34    /// Online catalog-backed range indexes by `(label_or_type, property)`.
35    pub node_range_indexes: BTreeSet<(String, String)>,
36    pub relationship_range_indexes: BTreeSet<(String, String)>,
37    /// Online catalog-backed text indexes by `(label_or_type, property)`.
38    pub node_text_indexes: BTreeSet<(String, String)>,
39    pub relationship_text_indexes: BTreeSet<(String, String)>,
40    /// Online catalog-backed point indexes by `(label_or_type, property)`.
41    pub node_point_indexes: BTreeSet<(String, String)>,
42    pub relationship_point_indexes: BTreeSet<(String, String)>,
43    /// Online catalog-backed vector indexes by `(label_or_type, property)`.
44    /// Tracked alongside the other index scopes so the optimizer / planner
45    /// can see them in stats fingerprints, though kNN currently goes
46    /// through a flat per-query scan rather than a dedicated structure.
47    pub node_vector_indexes: BTreeSet<(String, String)>,
48    pub relationship_vector_indexes: BTreeSet<(String, String)>,
49}
50
51impl GraphStats {
52    /// Selectivity of an equality predicate `label:prop = value`.
53    /// Returns `Some(rows)` when we have enough info to answer; `None`
54    /// when the optimizer should fall back to its conservative default.
55    pub fn estimate_node_property_equality(&self, label: &str, property: &str) -> Option<u64> {
56        let total = self.nodes_by_label.get(label).copied()? as u64;
57        let distinct = self
58            .node_distinct_values
59            .get(&(label.to_string(), property.to_string()))
60            .copied()
61            .unwrap_or(1)
62            .max(1) as u64;
63        // Uniform-distribution heuristic: each value owns
64        // ⌈total / distinct⌉ rows.
65        Some(total.div_ceil(distinct))
66    }
67
68    pub fn label_count(&self, label: &str) -> Option<u64> {
69        self.nodes_by_label.get(label).copied().map(|c| c as u64)
70    }
71
72    pub fn relationship_type_count(&self, rel_type: &str) -> Option<u64> {
73        self.relationships_by_type
74            .get(rel_type)
75            .copied()
76            .map(|c| c as u64)
77    }
78
79    pub fn fingerprint(&self) -> u64 {
80        let mut hasher = DefaultHasher::new();
81        self.node_count.hash(&mut hasher);
82        self.relationship_count.hash(&mut hasher);
83        self.nodes_by_label.hash(&mut hasher);
84        self.relationships_by_type.hash(&mut hasher);
85        self.node_distinct_values.hash(&mut hasher);
86        self.relationship_distinct_values.hash(&mut hasher);
87        self.node_range_indexes.hash(&mut hasher);
88        self.relationship_range_indexes.hash(&mut hasher);
89        self.node_text_indexes.hash(&mut hasher);
90        self.relationship_text_indexes.hash(&mut hasher);
91        self.node_point_indexes.hash(&mut hasher);
92        self.relationship_point_indexes.hash(&mut hasher);
93        self.node_vector_indexes.hash(&mut hasher);
94        self.relationship_vector_indexes.hash(&mut hasher);
95        hasher.finish()
96    }
97
98    pub fn has_node_range_index(&self, label: &str, property: &str) -> bool {
99        self.node_range_indexes
100            .contains(&(label.to_owned(), property.to_owned()))
101    }
102
103    pub fn has_node_text_index(&self, label: &str, property: &str) -> bool {
104        self.node_text_indexes
105            .contains(&(label.to_owned(), property.to_owned()))
106    }
107
108    pub fn has_node_point_index(&self, label: &str, property: &str) -> bool {
109        self.node_point_indexes
110            .contains(&(label.to_owned(), property.to_owned()))
111    }
112
113    pub fn has_relationship_range_index(&self, rel_type: &str, property: &str) -> bool {
114        self.relationship_range_indexes
115            .contains(&(rel_type.to_owned(), property.to_owned()))
116    }
117
118    pub fn has_relationship_text_index(&self, rel_type: &str, property: &str) -> bool {
119        self.relationship_text_indexes
120            .contains(&(rel_type.to_owned(), property.to_owned()))
121    }
122
123    pub fn has_relationship_point_index(&self, rel_type: &str, property: &str) -> bool {
124        self.relationship_point_indexes
125            .contains(&(rel_type.to_owned(), property.to_owned()))
126    }
127
128    pub fn has_node_vector_index(&self, label: &str, property: &str) -> bool {
129        self.node_vector_indexes
130            .contains(&(label.to_owned(), property.to_owned()))
131    }
132
133    pub fn has_relationship_vector_index(&self, rel_type: &str, property: &str) -> bool {
134        self.relationship_vector_indexes
135            .contains(&(rel_type.to_owned(), property.to_owned()))
136    }
137}