1use serde::{Deserialize, Serialize};
7use std::collections::{HashMap, HashSet};
8
9#[derive(Debug, Clone, Serialize, Deserialize)]
11pub struct GraphStats {
12 pub subject_count: usize,
14 pub object_count: usize,
16 pub node_count: usize,
18 pub edge_count: usize,
20 pub predicate_count: usize,
22 pub density: f64,
24 pub avg_out_degree: f64,
26 pub max_out_degree: usize,
28 pub avg_in_degree: f64,
30 pub max_in_degree: usize,
32 pub self_loop_count: usize,
34}
35
36impl GraphStats {
37 pub fn compute(triples: &[(String, String, String)]) -> Self {
39 if triples.is_empty() {
40 return Self {
41 subject_count: 0,
42 object_count: 0,
43 node_count: 0,
44 edge_count: 0,
45 predicate_count: 0,
46 density: 0.0,
47 avg_out_degree: 0.0,
48 max_out_degree: 0,
49 avg_in_degree: 0.0,
50 max_in_degree: 0,
51 self_loop_count: 0,
52 };
53 }
54
55 let mut subjects: HashSet<&str> = HashSet::new();
56 let mut objects: HashSet<&str> = HashSet::new();
57 let mut predicates: HashSet<&str> = HashSet::new();
58 let mut out_degree: HashMap<&str, usize> = HashMap::new();
59 let mut in_degree: HashMap<&str, usize> = HashMap::new();
60 let mut self_loop_count: usize = 0;
61
62 for (s, p, o) in triples {
63 subjects.insert(s.as_str());
64 objects.insert(o.as_str());
65 predicates.insert(p.as_str());
66
67 *out_degree.entry(s.as_str()).or_insert(0) += 1;
68 *in_degree.entry(o.as_str()).or_insert(0) += 1;
69
70 if s == o {
71 self_loop_count += 1;
72 }
73 }
74
75 let subject_count = subjects.len();
76 let object_count = objects.len();
77 let nodes: HashSet<&str> = subjects.union(&objects).copied().collect();
78 let node_count = nodes.len();
79 let edge_count = triples.len();
80 let predicate_count = predicates.len();
81
82 let density = if node_count > 1 {
83 edge_count as f64 / (node_count as f64 * (node_count as f64 - 1.0))
84 } else {
85 0.0
86 };
87
88 let avg_out_degree = if subject_count > 0 {
89 edge_count as f64 / subject_count as f64
90 } else {
91 0.0
92 };
93
94 let max_out_degree = out_degree.values().copied().max().unwrap_or(0);
95
96 let avg_in_degree = if object_count > 0 {
97 edge_count as f64 / object_count as f64
98 } else {
99 0.0
100 };
101
102 let max_in_degree = in_degree.values().copied().max().unwrap_or(0);
103
104 Self {
105 subject_count,
106 object_count,
107 node_count,
108 edge_count,
109 predicate_count,
110 density,
111 avg_out_degree,
112 max_out_degree,
113 avg_in_degree,
114 max_in_degree,
115 self_loop_count,
116 }
117 }
118
119 pub fn is_empty(&self) -> bool {
121 self.edge_count == 0
122 }
123
124 pub fn summary(&self) -> String {
126 format!(
127 "Graph: {} nodes, {} edges, {} predicates | \
128 density={:.6} | out-degree: avg={:.2}, max={} | \
129 in-degree: avg={:.2}, max={} | self-loops: {}",
130 self.node_count,
131 self.edge_count,
132 self.predicate_count,
133 self.density,
134 self.avg_out_degree,
135 self.max_out_degree,
136 self.avg_in_degree,
137 self.max_in_degree,
138 self.self_loop_count,
139 )
140 }
141}
142
143#[derive(Debug, Clone, Serialize, Deserialize)]
145pub struct PredicateStats {
146 pub name: String,
148 pub count: usize,
150 pub unique_subjects: usize,
152 pub unique_objects: usize,
154 pub is_functional: bool,
156 pub is_inverse_functional: bool,
158}
159
160pub fn predicate_statistics(triples: &[(String, String, String)]) -> Vec<PredicateStats> {
162 let mut pred_data: HashMap<&str, Vec<(&str, &str)>> = HashMap::new();
163
164 for (s, p, o) in triples {
165 pred_data
166 .entry(p.as_str())
167 .or_default()
168 .push((s.as_str(), o.as_str()));
169 }
170
171 let mut results: Vec<PredicateStats> = pred_data
172 .into_iter()
173 .map(|(name, pairs)| {
174 let count = pairs.len();
175
176 let mut subj_set: HashSet<&str> = HashSet::new();
177 let mut obj_set: HashSet<&str> = HashSet::new();
178 let mut subj_to_objs: HashMap<&str, HashSet<&str>> = HashMap::new();
179 let mut obj_to_subjs: HashMap<&str, HashSet<&str>> = HashMap::new();
180
181 for &(s, o) in &pairs {
182 subj_set.insert(s);
183 obj_set.insert(o);
184 subj_to_objs.entry(s).or_default().insert(o);
185 obj_to_subjs.entry(o).or_default().insert(s);
186 }
187
188 let is_functional = subj_to_objs.values().all(|objs| objs.len() <= 1);
189 let is_inverse_functional = obj_to_subjs.values().all(|subjs| subjs.len() <= 1);
190
191 PredicateStats {
192 name: name.to_string(),
193 count,
194 unique_subjects: subj_set.len(),
195 unique_objects: obj_set.len(),
196 is_functional,
197 is_inverse_functional,
198 }
199 })
200 .collect();
201
202 results.sort_by(|a, b| b.count.cmp(&a.count).then_with(|| a.name.cmp(&b.name)));
203 results
204}
205
206#[derive(Debug, Clone, Default, Serialize, Deserialize)]
208pub struct DegreeDistribution {
209 pub out_degrees: HashMap<usize, usize>,
211 pub in_degrees: HashMap<usize, usize>,
213}
214
215impl DegreeDistribution {
216 pub fn compute(triples: &[(String, String, String)]) -> Self {
218 let mut out_deg: HashMap<&str, usize> = HashMap::new();
219 let mut in_deg: HashMap<&str, usize> = HashMap::new();
220
221 for (s, _p, o) in triples {
222 *out_deg.entry(s.as_str()).or_insert(0) += 1;
223 *in_deg.entry(o.as_str()).or_insert(0) += 1;
224 }
225
226 let mut out_degrees: HashMap<usize, usize> = HashMap::new();
227 for ° in out_deg.values() {
228 *out_degrees.entry(deg).or_insert(0) += 1;
229 }
230
231 let mut in_degrees: HashMap<usize, usize> = HashMap::new();
232 for ° in in_deg.values() {
233 *in_degrees.entry(deg).or_insert(0) += 1;
234 }
235
236 Self {
237 out_degrees,
238 in_degrees,
239 }
240 }
241
242 pub fn median_out_degree(&self) -> f64 {
244 compute_median_from_distribution(&self.out_degrees)
245 }
246
247 pub fn median_in_degree(&self) -> f64 {
249 compute_median_from_distribution(&self.in_degrees)
250 }
251}
252
253fn compute_median_from_distribution(dist: &HashMap<usize, usize>) -> f64 {
255 if dist.is_empty() {
256 return 0.0;
257 }
258
259 let total: usize = dist.values().sum();
260 if total == 0 {
261 return 0.0;
262 }
263
264 let mut sorted_degrees: Vec<usize> = dist.keys().copied().collect();
265 sorted_degrees.sort_unstable();
266
267 let mut values: Vec<usize> = Vec::with_capacity(total);
269 for ° in &sorted_degrees {
270 let count = dist.get(°).copied().unwrap_or(0);
271 for _ in 0..count {
272 values.push(deg);
273 }
274 }
275
276 if values.len() % 2 == 1 {
277 values[values.len() / 2] as f64
278 } else {
279 let mid = values.len() / 2;
280 (values[mid - 1] as f64 + values[mid] as f64) / 2.0
281 }
282}
283
284pub fn connected_components(triples: &[(String, String, String)]) -> usize {
286 let mut nodes: HashSet<String> = HashSet::new();
287 let mut uf = UnionFind::new();
288
289 for (s, _p, o) in triples {
290 nodes.insert(s.clone());
291 nodes.insert(o.clone());
292 uf.union(s, o);
293 }
294
295 if nodes.is_empty() {
296 return 0;
297 }
298
299 uf.component_count(&nodes)
300}
301
302struct UnionFind {
304 parent: HashMap<String, String>,
305}
306
307impl UnionFind {
308 fn new() -> Self {
309 Self {
310 parent: HashMap::new(),
311 }
312 }
313
314 fn find(&mut self, x: &str) -> String {
315 if !self.parent.contains_key(x) {
317 self.parent.insert(x.to_string(), x.to_string());
318 return x.to_string();
319 }
320
321 let mut current = x.to_string();
323 let mut path = Vec::new();
324
325 loop {
326 let p = self
327 .parent
328 .get(¤t)
329 .cloned()
330 .unwrap_or_else(|| current.clone());
331 if p == current {
332 break;
333 }
334 path.push(current.clone());
335 current = p;
336 }
337
338 let root = current;
340 for node in path {
341 self.parent.insert(node, root.clone());
342 }
343
344 root
345 }
346
347 fn union(&mut self, x: &str, y: &str) {
348 let rx = self.find(x);
349 let ry = self.find(y);
350 if rx != ry {
351 self.parent.insert(rx, ry);
352 }
353 }
354
355 fn component_count(&mut self, nodes: &HashSet<String>) -> usize {
356 let mut roots: HashSet<String> = HashSet::new();
357 for node in nodes {
358 let root = self.find(node);
359 roots.insert(root);
360 }
361 roots.len()
362 }
363}
364
365#[cfg(test)]
366mod tests {
367 use super::*;
368
369 fn make_triples(data: &[(&str, &str, &str)]) -> Vec<(String, String, String)> {
370 data.iter()
371 .map(|(s, p, o)| (s.to_string(), p.to_string(), o.to_string()))
372 .collect()
373 }
374
375 #[test]
376 fn test_graph_stats_empty() {
377 let stats = GraphStats::compute(&[]);
378 assert_eq!(stats.subject_count, 0);
379 assert_eq!(stats.object_count, 0);
380 assert_eq!(stats.node_count, 0);
381 assert_eq!(stats.edge_count, 0);
382 assert_eq!(stats.predicate_count, 0);
383 assert_eq!(stats.density, 0.0);
384 assert_eq!(stats.avg_out_degree, 0.0);
385 assert_eq!(stats.max_out_degree, 0);
386 assert_eq!(stats.avg_in_degree, 0.0);
387 assert_eq!(stats.max_in_degree, 0);
388 assert_eq!(stats.self_loop_count, 0);
389 }
390
391 #[test]
392 fn test_graph_stats_single_triple() {
393 let triples = make_triples(&[("a", "p", "b")]);
394 let stats = GraphStats::compute(&triples);
395 assert_eq!(stats.node_count, 2);
396 assert_eq!(stats.edge_count, 1);
397 assert_eq!(stats.subject_count, 1);
398 assert_eq!(stats.object_count, 1);
399 }
400
401 #[test]
402 fn test_graph_stats_node_count() {
403 let triples = make_triples(&[("a", "p", "b"), ("b", "q", "a")]);
405 let stats = GraphStats::compute(&triples);
406 assert_eq!(stats.node_count, 2);
407 assert_eq!(stats.subject_count, 2);
408 assert_eq!(stats.object_count, 2);
409 }
410
411 #[test]
412 fn test_graph_stats_density() {
413 let triples = make_triples(&[("a", "p", "b"), ("b", "p", "c"), ("c", "p", "a")]);
415 let stats = GraphStats::compute(&triples);
416 assert_eq!(stats.node_count, 3);
417 assert_eq!(stats.edge_count, 3);
418 let expected_density = 3.0 / (3.0 * 2.0);
419 assert!((stats.density - expected_density).abs() < 1e-10);
420 }
421
422 #[test]
423 fn test_graph_stats_degrees() {
424 let triples = make_triples(&[("a", "p", "b"), ("a", "p", "c"), ("b", "p", "c")]);
426 let stats = GraphStats::compute(&triples);
427 assert!((stats.avg_out_degree - 1.5).abs() < 1e-10);
429 assert_eq!(stats.max_out_degree, 2);
430 assert!((stats.avg_in_degree - 1.5).abs() < 1e-10);
432 assert_eq!(stats.max_in_degree, 2);
433 }
434
435 #[test]
436 fn test_graph_stats_self_loop() {
437 let triples = make_triples(&[("a", "p", "a"), ("a", "q", "b")]);
438 let stats = GraphStats::compute(&triples);
439 assert_eq!(stats.self_loop_count, 1);
440 }
441
442 #[test]
443 fn test_graph_stats_summary() {
444 let triples = make_triples(&[("a", "p", "b")]);
445 let stats = GraphStats::compute(&triples);
446 let summary = stats.summary();
447 assert!(!summary.is_empty());
448 assert!(summary.contains("2 nodes"));
449 assert!(summary.contains("1 edges"));
450 }
451
452 #[test]
453 fn test_graph_stats_is_empty() {
454 let stats = GraphStats::compute(&[]);
455 assert!(stats.is_empty());
456
457 let triples = make_triples(&[("a", "p", "b")]);
458 let stats2 = GraphStats::compute(&triples);
459 assert!(!stats2.is_empty());
460 }
461
462 #[test]
463 fn test_predicate_stats_count() {
464 let triples = make_triples(&[
465 ("a", "knows", "b"),
466 ("b", "knows", "c"),
467 ("a", "likes", "c"),
468 ]);
469 let stats = predicate_statistics(&triples);
470 assert_eq!(stats.len(), 2);
471 assert_eq!(stats[0].name, "knows");
473 assert_eq!(stats[0].count, 2);
474 assert_eq!(stats[1].name, "likes");
475 assert_eq!(stats[1].count, 1);
476 }
477
478 #[test]
479 fn test_predicate_stats_functional() {
480 let triples = make_triples(&[("a", "name", "Alice"), ("b", "name", "Bob")]);
482 let stats = predicate_statistics(&triples);
483 assert_eq!(stats.len(), 1);
484 assert!(stats[0].is_functional);
485 }
486
487 #[test]
488 fn test_predicate_stats_not_functional() {
489 let triples = make_triples(&[("a", "knows", "b"), ("a", "knows", "c")]);
491 let stats = predicate_statistics(&triples);
492 assert_eq!(stats.len(), 1);
493 assert!(!stats[0].is_functional);
494 }
495
496 #[test]
497 fn test_predicate_stats_inverse_functional() {
498 let triples = make_triples(&[("a", "id", "1"), ("b", "id", "2")]);
500 let stats = predicate_statistics(&triples);
501 assert_eq!(stats.len(), 1);
502 assert!(stats[0].is_inverse_functional);
503 }
504
505 #[test]
506 fn test_degree_distribution_compute() {
507 let triples = make_triples(&[("a", "p", "b"), ("a", "p", "c"), ("b", "p", "c")]);
509 let dist = DegreeDistribution::compute(&triples);
510 assert_eq!(dist.out_degrees.get(&2), Some(&1));
512 assert_eq!(dist.out_degrees.get(&1), Some(&1));
513 assert_eq!(dist.in_degrees.get(&1), Some(&1));
515 assert_eq!(dist.in_degrees.get(&2), Some(&1));
516 }
517
518 #[test]
519 fn test_degree_distribution_median() {
520 let triples = make_triples(&[
522 ("a", "p", "x"),
523 ("b", "p", "x"),
524 ("c", "p", "x"),
525 ("c", "p", "y"),
526 ("d", "p", "x"),
527 ("d", "p", "y"),
528 ("d", "p", "z"),
529 ]);
530 let dist = DegreeDistribution::compute(&triples);
531 assert!((dist.median_out_degree() - 1.5).abs() < 1e-10);
533 }
534
535 #[test]
536 fn test_connected_components_single() {
537 let triples = make_triples(&[("a", "p", "b"), ("b", "p", "c"), ("c", "p", "a")]);
539 assert_eq!(connected_components(&triples), 1);
540 }
541
542 #[test]
543 fn test_connected_components_two() {
544 let triples = make_triples(&[("a", "p", "b"), ("c", "p", "d")]);
546 assert_eq!(connected_components(&triples), 2);
547 }
548
549 #[test]
550 fn test_connected_components_isolated() {
551 let triples = make_triples(&[("a", "p", "b"), ("c", "p", "d"), ("e", "p", "f")]);
554 assert_eq!(connected_components(&triples), 3);
555 }
556
557 #[test]
558 fn test_graph_stats_multiple_predicates() {
559 let triples = make_triples(&[
560 ("a", "knows", "b"),
561 ("a", "likes", "c"),
562 ("b", "hates", "c"),
563 ]);
564 let stats = GraphStats::compute(&triples);
565 assert_eq!(stats.predicate_count, 3);
566 }
567}