1use std::collections::HashMap;
11use std::mem::size_of;
12
13use nodedb_mem::EngineId;
14use serde::{Deserialize, Serialize};
15
16use super::CsrIndex;
17use crate::GraphError;
18
19#[derive(Debug, Clone, Serialize, Deserialize)]
21pub struct LabelStats {
22 pub edge_count: usize,
24 pub distinct_sources: usize,
26 pub distinct_targets: usize,
28}
29
30#[derive(Debug, Clone, Serialize, Deserialize)]
32pub struct DegreeHistogram {
33 pub min: usize,
34 pub max: usize,
35 pub avg: f64,
36 pub p50: usize,
37 pub p95: usize,
38 pub p99: usize,
39}
40
41#[derive(Debug, Clone, Serialize, Deserialize)]
43pub struct GraphStatistics {
44 pub node_count: usize,
46 pub edge_count: usize,
48 pub label_count: usize,
50 pub label_stats: HashMap<String, LabelStats>,
52 pub out_degree_histogram: DegreeHistogram,
54 pub in_degree_histogram: DegreeHistogram,
56}
57
58impl CsrIndex {
59 pub fn compute_statistics(&self) -> Result<GraphStatistics, GraphError> {
69 let n = self.node_count();
70 if n == 0 {
71 return Ok(GraphStatistics {
72 node_count: 0,
73 edge_count: 0,
74 label_count: 0,
75 label_stats: HashMap::new(),
76 out_degree_histogram: DegreeHistogram {
77 min: 0,
78 max: 0,
79 avg: 0.0,
80 p50: 0,
81 p95: 0,
82 p99: 0,
83 },
84 in_degree_histogram: DegreeHistogram {
85 min: 0,
86 max: 0,
87 avg: 0.0,
88 p50: 0,
89 p95: 0,
90 p99: 0,
91 },
92 });
93 }
94
95 let degree_bytes = 2 * n * size_of::<usize>();
97 let _degree_guard = self
98 .governor
99 .as_ref()
100 .map(|g| g.reserve(EngineId::Graph, degree_bytes))
101 .transpose()?;
102
103 let mut label_edge_count: HashMap<u32, usize> = HashMap::new();
105 let mut label_sources: HashMap<u32, std::collections::HashSet<u32>> = HashMap::new();
106 let mut label_targets: HashMap<u32, std::collections::HashSet<u32>> = HashMap::new();
107
108 let mut out_degrees: Vec<usize> = Vec::with_capacity(n);
111 let mut in_degrees: Vec<usize> = Vec::with_capacity(n);
113
114 let mut total_edges = 0usize;
115
116 for node in 0..n {
117 let node_id = node as u32;
118 let mut out_deg = 0usize;
119 let mut in_deg = 0usize;
120
121 for (lid, dst) in self.dense_iter_out(node_id) {
122 out_deg += 1;
123 total_edges += 1;
124 *label_edge_count.entry(lid).or_insert(0) += 1;
125 label_sources.entry(lid).or_default().insert(node_id);
126 label_targets.entry(lid).or_default().insert(dst);
127 }
128
129 for (_lid, _src) in self.dense_iter_in(node_id) {
130 in_deg += 1;
131 }
132
133 out_degrees.push(out_deg);
134 in_degrees.push(in_deg);
135 }
136
137 let mut label_stats = HashMap::new();
139 for (&lid, &count) in &label_edge_count {
140 let label_name = self.label_name(lid).to_string();
141 label_stats.insert(
142 label_name,
143 LabelStats {
144 edge_count: count,
145 distinct_sources: label_sources.get(&lid).map_or(0, |s| s.len()),
146 distinct_targets: label_targets.get(&lid).map_or(0, |s| s.len()),
147 },
148 );
149 }
150
151 Ok(GraphStatistics {
152 node_count: n,
153 edge_count: total_edges,
154 label_count: label_edge_count.len(),
155 label_stats,
156 out_degree_histogram: compute_histogram(&out_degrees),
157 in_degree_histogram: compute_histogram(&in_degrees),
158 })
159 }
160
161 pub fn label_edge_count(&self, label: &str) -> usize {
165 let Some(lid) = self.label_id(label) else {
166 return 0;
167 };
168
169 let n = self.node_count();
170 let mut count = 0usize;
171 for node in 0..n {
172 for (l, _dst) in self.dense_iter_out(node as u32) {
173 if l == lid {
174 count += 1;
175 }
176 }
177 }
178 count
179 }
180
181 pub fn label_selectivity(&self, label: &str) -> f64 {
186 let total = self.edge_count();
187 if total == 0 {
188 return 0.0;
189 }
190 let count = self.label_edge_count(label);
191 if count == 0 {
192 return 1.0; }
194 count as f64 / total as f64
195 }
196}
197
198fn compute_histogram(degrees: &[usize]) -> DegreeHistogram {
200 if degrees.is_empty() {
201 return DegreeHistogram {
202 min: 0,
203 max: 0,
204 avg: 0.0,
205 p50: 0,
206 p95: 0,
207 p99: 0,
208 };
209 }
210
211 let mut sorted = degrees.to_vec();
212 sorted.sort_unstable();
213
214 let n = sorted.len();
215 let sum: usize = sorted.iter().sum();
216
217 DegreeHistogram {
218 min: sorted[0],
219 max: sorted[n - 1],
220 avg: sum as f64 / n as f64,
221 p50: sorted[n / 2],
222 p95: sorted[(n as f64 * 0.95) as usize],
223 p99: sorted[((n as f64 * 0.99) as usize).min(n - 1)],
224 }
225}
226
227#[cfg(test)]
228mod tests {
229 use super::*;
230
231 #[test]
232 fn statistics_empty_graph() {
233 let csr = CsrIndex::new();
234 let stats = csr.compute_statistics().unwrap();
235 assert_eq!(stats.node_count, 0);
236 assert_eq!(stats.edge_count, 0);
237 assert_eq!(stats.label_count, 0);
238 }
239
240 #[test]
241 fn statistics_basic() {
242 let mut csr = CsrIndex::new();
243 csr.add_edge("a", "KNOWS", "b").unwrap();
244 csr.add_edge("b", "KNOWS", "c").unwrap();
245 csr.add_edge("a", "LIKES", "c").unwrap();
246 csr.compact().expect("no governor, cannot fail");
247
248 let stats = csr.compute_statistics().unwrap();
249 assert_eq!(stats.node_count, 3);
250 assert_eq!(stats.edge_count, 3);
251 assert_eq!(stats.label_count, 2);
252
253 let knows = &stats.label_stats["KNOWS"];
254 assert_eq!(knows.edge_count, 2);
255 assert_eq!(knows.distinct_sources, 2);
256 assert_eq!(knows.distinct_targets, 2);
257
258 let likes = &stats.label_stats["LIKES"];
259 assert_eq!(likes.edge_count, 1);
260 }
261
262 #[test]
263 fn degree_histogram_values() {
264 let mut csr = CsrIndex::new();
265 csr.add_edge("a", "L", "b").unwrap();
266 csr.add_edge("a", "L", "c").unwrap();
267 csr.add_edge("a", "L", "d").unwrap();
268 csr.add_edge("b", "L", "c").unwrap();
269 csr.compact().expect("no governor, cannot fail");
270
271 let stats = csr.compute_statistics().unwrap();
272 assert_eq!(stats.out_degree_histogram.min, 0);
273 assert_eq!(stats.out_degree_histogram.max, 3);
274 assert!(stats.out_degree_histogram.avg > 0.0);
275 }
276
277 #[test]
278 fn label_edge_count_direct() {
279 let mut csr = CsrIndex::new();
280 csr.add_edge("a", "KNOWS", "b").unwrap();
281 csr.add_edge("b", "KNOWS", "c").unwrap();
282 csr.add_edge("a", "LIKES", "c").unwrap();
283 csr.compact().expect("no governor, cannot fail");
284
285 assert_eq!(csr.label_edge_count("KNOWS"), 2);
286 assert_eq!(csr.label_edge_count("LIKES"), 1);
287 assert_eq!(csr.label_edge_count("NONEXISTENT"), 0);
288 }
289
290 #[test]
291 fn label_selectivity_values() {
292 let mut csr = CsrIndex::new();
293 csr.add_edge("a", "KNOWS", "b").unwrap();
294 csr.add_edge("b", "KNOWS", "c").unwrap();
295 csr.add_edge("a", "LIKES", "c").unwrap();
296 csr.compact().expect("no governor, cannot fail");
297
298 let sel_knows = csr.label_selectivity("KNOWS");
299 let sel_likes = csr.label_selectivity("LIKES");
300
301 assert!((sel_knows - 2.0 / 3.0).abs() < 1e-9);
302 assert!((sel_likes - 1.0 / 3.0).abs() < 1e-9);
303 assert_eq!(csr.label_selectivity("NONEXISTENT"), 1.0);
304 }
305
306 #[test]
307 fn statistics_serde_roundtrip() {
308 let mut csr = CsrIndex::new();
309 csr.add_edge("a", "KNOWS", "b").unwrap();
310 csr.compact().expect("no governor, cannot fail");
311
312 let stats = csr.compute_statistics().unwrap();
313 let json = sonic_rs::to_string(&stats).unwrap();
314 let parsed: GraphStatistics = sonic_rs::from_str(&json).unwrap();
315 assert_eq!(parsed.node_count, stats.node_count);
316 assert_eq!(parsed.edge_count, stats.edge_count);
317 }
318
319 #[test]
320 fn statistics_with_buffer_edges() {
321 let mut csr = CsrIndex::new();
322 csr.add_edge("a", "KNOWS", "b").unwrap();
323 let stats = csr.compute_statistics().unwrap();
325 assert_eq!(stats.edge_count, 1);
326 assert_eq!(stats.label_stats["KNOWS"].edge_count, 1);
327 }
328}