1use std::collections::HashMap;
9
10use serde::{Deserialize, Serialize};
11
12use super::CsrIndex;
13
14#[derive(Debug, Clone, Serialize, Deserialize)]
16pub struct LabelStats {
17 pub edge_count: usize,
19 pub distinct_sources: usize,
21 pub distinct_targets: usize,
23}
24
25#[derive(Debug, Clone, Serialize, Deserialize)]
27pub struct DegreeHistogram {
28 pub min: usize,
29 pub max: usize,
30 pub avg: f64,
31 pub p50: usize,
32 pub p95: usize,
33 pub p99: usize,
34}
35
36#[derive(Debug, Clone, Serialize, Deserialize)]
38pub struct GraphStatistics {
39 pub node_count: usize,
41 pub edge_count: usize,
43 pub label_count: usize,
45 pub label_stats: HashMap<String, LabelStats>,
47 pub out_degree_histogram: DegreeHistogram,
49 pub in_degree_histogram: DegreeHistogram,
51}
52
53impl CsrIndex {
54 pub fn compute_statistics(&self) -> GraphStatistics {
59 let n = self.node_count();
60 if n == 0 {
61 return GraphStatistics {
62 node_count: 0,
63 edge_count: 0,
64 label_count: 0,
65 label_stats: HashMap::new(),
66 out_degree_histogram: DegreeHistogram {
67 min: 0,
68 max: 0,
69 avg: 0.0,
70 p50: 0,
71 p95: 0,
72 p99: 0,
73 },
74 in_degree_histogram: DegreeHistogram {
75 min: 0,
76 max: 0,
77 avg: 0.0,
78 p50: 0,
79 p95: 0,
80 p99: 0,
81 },
82 };
83 }
84
85 let mut label_edge_count: HashMap<u32, usize> = HashMap::new();
87 let mut label_sources: HashMap<u32, std::collections::HashSet<u32>> = HashMap::new();
88 let mut label_targets: HashMap<u32, std::collections::HashSet<u32>> = HashMap::new();
89
90 let mut out_degrees: Vec<usize> = Vec::with_capacity(n);
92 let mut in_degrees: Vec<usize> = Vec::with_capacity(n);
93
94 let mut total_edges = 0usize;
95
96 for node in 0..n {
97 let node_id = node as u32;
98 let mut out_deg = 0usize;
99 let mut in_deg = 0usize;
100
101 for (lid, dst) in self.iter_out_edges(node_id) {
102 out_deg += 1;
103 total_edges += 1;
104 *label_edge_count.entry(lid).or_insert(0) += 1;
105 label_sources.entry(lid).or_default().insert(node_id);
106 label_targets.entry(lid).or_default().insert(dst);
107 }
108
109 for (_lid, _src) in self.iter_in_edges(node_id) {
110 in_deg += 1;
111 }
112
113 out_degrees.push(out_deg);
114 in_degrees.push(in_deg);
115 }
116
117 let mut label_stats = HashMap::new();
119 for (&lid, &count) in &label_edge_count {
120 let label_name = self.label_name(lid).to_string();
121 label_stats.insert(
122 label_name,
123 LabelStats {
124 edge_count: count,
125 distinct_sources: label_sources.get(&lid).map_or(0, |s| s.len()),
126 distinct_targets: label_targets.get(&lid).map_or(0, |s| s.len()),
127 },
128 );
129 }
130
131 GraphStatistics {
132 node_count: n,
133 edge_count: total_edges,
134 label_count: label_edge_count.len(),
135 label_stats,
136 out_degree_histogram: compute_histogram(&out_degrees),
137 in_degree_histogram: compute_histogram(&in_degrees),
138 }
139 }
140
141 pub fn label_edge_count(&self, label: &str) -> usize {
145 let Some(lid) = self.label_id(label) else {
146 return 0;
147 };
148
149 let n = self.node_count();
150 let mut count = 0usize;
151 for node in 0..n {
152 for (l, _dst) in self.iter_out_edges(node as u32) {
153 if l == lid {
154 count += 1;
155 }
156 }
157 }
158 count
159 }
160
161 pub fn label_selectivity(&self, label: &str) -> f64 {
166 let total = self.edge_count();
167 if total == 0 {
168 return 0.0;
169 }
170 let count = self.label_edge_count(label);
171 if count == 0 {
172 return 1.0; }
174 count as f64 / total as f64
175 }
176}
177
178fn compute_histogram(degrees: &[usize]) -> DegreeHistogram {
180 if degrees.is_empty() {
181 return DegreeHistogram {
182 min: 0,
183 max: 0,
184 avg: 0.0,
185 p50: 0,
186 p95: 0,
187 p99: 0,
188 };
189 }
190
191 let mut sorted = degrees.to_vec();
192 sorted.sort_unstable();
193
194 let n = sorted.len();
195 let sum: usize = sorted.iter().sum();
196
197 DegreeHistogram {
198 min: sorted[0],
199 max: sorted[n - 1],
200 avg: sum as f64 / n as f64,
201 p50: sorted[n / 2],
202 p95: sorted[(n as f64 * 0.95) as usize],
203 p99: sorted[((n as f64 * 0.99) as usize).min(n - 1)],
204 }
205}
206
207#[cfg(test)]
208mod tests {
209 use super::*;
210
211 #[test]
212 fn statistics_empty_graph() {
213 let csr = CsrIndex::new();
214 let stats = csr.compute_statistics();
215 assert_eq!(stats.node_count, 0);
216 assert_eq!(stats.edge_count, 0);
217 assert_eq!(stats.label_count, 0);
218 }
219
220 #[test]
221 fn statistics_basic() {
222 let mut csr = CsrIndex::new();
223 csr.add_edge("a", "KNOWS", "b").unwrap();
224 csr.add_edge("b", "KNOWS", "c").unwrap();
225 csr.add_edge("a", "LIKES", "c").unwrap();
226 csr.compact();
227
228 let stats = csr.compute_statistics();
229 assert_eq!(stats.node_count, 3);
230 assert_eq!(stats.edge_count, 3);
231 assert_eq!(stats.label_count, 2);
232
233 let knows = &stats.label_stats["KNOWS"];
234 assert_eq!(knows.edge_count, 2);
235 assert_eq!(knows.distinct_sources, 2);
236 assert_eq!(knows.distinct_targets, 2);
237
238 let likes = &stats.label_stats["LIKES"];
239 assert_eq!(likes.edge_count, 1);
240 }
241
242 #[test]
243 fn degree_histogram_values() {
244 let mut csr = CsrIndex::new();
245 csr.add_edge("a", "L", "b").unwrap();
246 csr.add_edge("a", "L", "c").unwrap();
247 csr.add_edge("a", "L", "d").unwrap();
248 csr.add_edge("b", "L", "c").unwrap();
249 csr.compact();
250
251 let stats = csr.compute_statistics();
252 assert_eq!(stats.out_degree_histogram.min, 0);
253 assert_eq!(stats.out_degree_histogram.max, 3);
254 assert!(stats.out_degree_histogram.avg > 0.0);
255 }
256
257 #[test]
258 fn label_edge_count_direct() {
259 let mut csr = CsrIndex::new();
260 csr.add_edge("a", "KNOWS", "b").unwrap();
261 csr.add_edge("b", "KNOWS", "c").unwrap();
262 csr.add_edge("a", "LIKES", "c").unwrap();
263 csr.compact();
264
265 assert_eq!(csr.label_edge_count("KNOWS"), 2);
266 assert_eq!(csr.label_edge_count("LIKES"), 1);
267 assert_eq!(csr.label_edge_count("NONEXISTENT"), 0);
268 }
269
270 #[test]
271 fn label_selectivity_values() {
272 let mut csr = CsrIndex::new();
273 csr.add_edge("a", "KNOWS", "b").unwrap();
274 csr.add_edge("b", "KNOWS", "c").unwrap();
275 csr.add_edge("a", "LIKES", "c").unwrap();
276 csr.compact();
277
278 let sel_knows = csr.label_selectivity("KNOWS");
279 let sel_likes = csr.label_selectivity("LIKES");
280
281 assert!((sel_knows - 2.0 / 3.0).abs() < 1e-9);
282 assert!((sel_likes - 1.0 / 3.0).abs() < 1e-9);
283 assert_eq!(csr.label_selectivity("NONEXISTENT"), 1.0);
284 }
285
286 #[test]
287 fn statistics_serde_roundtrip() {
288 let mut csr = CsrIndex::new();
289 csr.add_edge("a", "KNOWS", "b").unwrap();
290 csr.compact();
291
292 let stats = csr.compute_statistics();
293 let json = sonic_rs::to_string(&stats).unwrap();
294 let parsed: GraphStatistics = sonic_rs::from_str(&json).unwrap();
295 assert_eq!(parsed.node_count, stats.node_count);
296 assert_eq!(parsed.edge_count, stats.edge_count);
297 }
298
299 #[test]
300 fn statistics_with_buffer_edges() {
301 let mut csr = CsrIndex::new();
302 csr.add_edge("a", "KNOWS", "b").unwrap();
303 let stats = csr.compute_statistics();
305 assert_eq!(stats.edge_count, 1);
306 assert_eq!(stats.label_stats["KNOWS"].edge_count, 1);
307 }
308}