1use crate::graph_builder::build_graph;
2use serde::{Deserialize, Serialize};
3use std::collections::HashSet;
4
5#[derive(Debug, Clone, Serialize, Deserialize)]
6pub struct HealthScore {
7 pub name: String,
8 pub overall: f64,
9 pub connectivity: f64,
10 pub reachability: f64,
11 pub centrality: f64,
12 pub cycle_risk: f64,
13}
14
15#[derive(Debug, Clone, Serialize, Deserialize)]
16pub struct HealthReport {
17 pub scores: Vec<HealthScore>,
18 pub avg_connectivity: f64,
19 pub avg_reachability: f64,
20 pub avg_centrality: f64,
21 pub avg_cycle_risk: f64,
22 pub avg_overall: f64,
23 pub node_count: usize,
24 pub edge_count: usize,
25}
26
27#[derive(Debug, Clone, Serialize, Deserialize)]
28pub struct TerseHealthReport {
29 pub top_scores: Vec<TerseHealthScore>,
30 pub bottom_scores: Vec<TerseHealthScore>,
31 pub avg_overall: f64,
32 pub avg_cycle_risk: f64,
33 pub node_count: usize,
34 pub edge_count: usize,
35}
36
37#[derive(Debug, Clone, Serialize, Deserialize)]
38pub struct TerseHealthScore {
39 pub name: String,
40 pub overall: f64,
41}
42
43pub fn terse_health_report(edges: &[(String, String)], n: usize) -> TerseHealthReport {
44 if edges.is_empty() {
45 return TerseHealthReport {
46 top_scores: Vec::new(),
47 bottom_scores: Vec::new(),
48 avg_overall: 0.0,
49 avg_cycle_risk: 0.0,
50 node_count: 0,
51 edge_count: 0,
52 };
53 }
54
55 let graph = build_graph(edges);
56 let node_vec = &graph.node_vec;
57 let out_adj = &graph.out_adj;
58 let in_adj = &graph.in_adj;
59 let node_count = graph.node_count();
60 if node_count == 0 {
61 return TerseHealthReport {
62 top_scores: Vec::new(),
63 bottom_scores: Vec::new(),
64 avg_overall: 0.0,
65 avg_cycle_risk: 0.0,
66 node_count: 0,
67 edge_count: 0,
68 };
69 }
70
71 let fwd_sccs = compute_sccs(node_count, out_adj);
72 let bwd_sccs = compute_sccs(node_count, in_adj);
73 let cycle_nodes = find_cycles_from_sccs(out_adj, &fwd_sccs);
74
75 let fwd_reach = compute_reachability_with_sccs(node_count, out_adj, &fwd_sccs);
76 let bwd_reach = compute_reachability_with_sccs(node_count, in_adj, &bwd_sccs);
77
78 let total_possible = (node_count - 1).max(1) as f64;
79 let total_degree: f64 = out_adj.iter().map(|s| s.len()).sum::<usize>() as f64;
80 let avg_degree = total_degree / node_count as f64;
81
82 let mut raw_scores: Vec<TerseHealthScore> = Vec::with_capacity(node_count);
83 let mut sum_overall: f64 = 0.0;
84 let mut sum_cycle_risk: f64 = 0.0;
85
86 for (i, name) in node_vec.iter().enumerate() {
87 let out_degree = out_adj[i].len() as f64;
88 let in_degree = in_adj[i].len() as f64;
89 let connectivity =
90 (out_degree + in_degree) / (2.0 * avg_degree.max(1.0)).min(total_degree.max(1.0));
91 let connectivity = connectivity.min(1.0);
92
93 let reach_fwd = (fwd_reach[i] as f64 - 1.0) / total_possible;
94 let reach_bwd = (bwd_reach[i] as f64 - 1.0) / total_possible;
95 let reachability = (reach_fwd + reach_bwd) / 2.0;
96
97 let centrality = if node_count > 1 {
98 (fwd_reach[i] + bwd_reach[i]) as f64 / (2.0 * node_count as f64)
99 } else {
100 1.0
101 };
102
103 let cycle_risk = if cycle_nodes.contains(&i) { 1.0 } else { 0.0 };
104
105 let overall = connectivity * 0.25
106 + reachability * 0.25
107 + centrality * 0.25
108 + (1.0 - cycle_risk) * 0.25;
109
110 sum_overall += overall;
111 sum_cycle_risk += cycle_risk;
112
113 raw_scores.push(TerseHealthScore {
114 name: name.clone(),
115 overall,
116 });
117 }
118
119 let n = n.min(node_count);
120
121 let (mut top_candidates, mut bottom_candidates) = {
122 let mut sorted = raw_scores;
123 sorted.sort_by(|a, b| b.overall.partial_cmp(&a.overall).unwrap_or(std::cmp::Ordering::Equal));
124 let top: Vec<TerseHealthScore> = sorted.iter().take(n).cloned().collect();
125 let bottom: Vec<TerseHealthScore> = sorted.iter().rev().take(n).cloned().collect();
126 (top, bottom)
127 };
128
129 top_candidates.sort_by(|a, b| b.overall.partial_cmp(&a.overall).unwrap_or(std::cmp::Ordering::Equal));
130 bottom_candidates.sort_by(|a, b| a.overall.partial_cmp(&b.overall).unwrap_or(std::cmp::Ordering::Equal));
131
132 TerseHealthReport {
133 top_scores: top_candidates,
134 bottom_scores: bottom_candidates,
135 avg_overall: sum_overall / node_count as f64,
136 avg_cycle_risk: sum_cycle_risk / node_count as f64,
137 node_count,
138 edge_count: edges.len(),
139 }
140}
141
142fn compute_reachability_with_sccs(
143 n: usize,
144 adj: &[HashSet<usize>],
145 sccs: &[Vec<usize>],
146) -> Vec<usize> {
147 let mut comp_of = vec![0usize; n];
148 for (comp_id, members) in sccs.iter().enumerate() {
149 for &node in members {
150 comp_of[node] = comp_id;
151 }
152 }
153 let num_comps = sccs.len();
154 let mut comp_adj: Vec<HashSet<usize>> = vec![HashSet::new(); num_comps];
155 for u in 0..n {
156 for &v in &adj[u] {
157 let cu = comp_of[u];
158 let cv = comp_of[v];
159 if cu != cv {
160 comp_adj[cu].insert(cv);
161 }
162 }
163 }
164 let mut comp_reach = vec![0usize; num_comps];
165 for c in 0..num_comps {
166 let mut visited = vec![false; num_comps];
167 visited[c] = true;
168 let mut count = 0usize;
169 let mut queue = std::collections::VecDeque::from([c]);
170 while let Some(cur) = queue.pop_front() {
171 count += sccs[cur].len();
172 for &next in &comp_adj[cur] {
173 if !visited[next] {
174 visited[next] = true;
175 queue.push_back(next);
176 }
177 }
178 }
179 comp_reach[c] = count;
180 }
181 let mut reach = vec![0usize; n];
182 for i in 0..n {
183 reach[i] = comp_reach[comp_of[i]];
184 }
185 reach
186}
187
188fn compute_sccs(n: usize, adj: &[HashSet<usize>]) -> Vec<Vec<usize>> {
189 struct Tarjan<'a> {
190 adj: &'a [HashSet<usize>],
191 index: usize,
192 stack: Vec<usize>,
193 on_stack: Vec<bool>,
194 indices: Vec<Option<usize>>,
195 lowlinks: Vec<usize>,
196 components: Vec<Vec<usize>>,
197 }
198
199 impl<'a> Tarjan<'a> {
200 fn new(n: usize, adj: &'a [HashSet<usize>]) -> Self {
201 Self {
202 adj,
203 index: 0,
204 stack: Vec::new(),
205 on_stack: vec![false; n],
206 indices: vec![None; n],
207 lowlinks: vec![0; n],
208 components: Vec::new(),
209 }
210 }
211
212 fn strongconnect(&mut self, v: usize) {
213 self.indices[v] = Some(self.index);
214 self.lowlinks[v] = self.index;
215 self.index += 1;
216 self.stack.push(v);
217 self.on_stack[v] = true;
218
219 let neighbors: Vec<usize> = self.adj[v].iter().copied().collect();
220 for w in neighbors {
221 if self.indices[w].is_none() {
222 self.strongconnect(w);
223 }
224 if self.on_stack[w] {
225 self.lowlinks[v] = self.lowlinks[v].min(self.indices[w].unwrap());
226 }
227 }
228
229 if self.indices[v] == Some(self.lowlinks[v]) {
230 let mut component = Vec::new();
231 loop {
232 let w = self.stack.pop().unwrap();
233 self.on_stack[w] = false;
234 component.push(w);
235 if w == v {
236 break;
237 }
238 }
239 self.components.push(component);
240 }
241 }
242 }
243
244 let mut tarjan = Tarjan::new(n, adj);
245 for v in 0..n {
246 if tarjan.indices[v].is_none() {
247 tarjan.strongconnect(v);
248 }
249 }
250 tarjan.components
251}
252
253fn find_cycles_from_sccs(adj: &[HashSet<usize>], sccs: &[Vec<usize>]) -> HashSet<usize> {
254 let mut in_cycle = HashSet::new();
255 for component in sccs {
256 if component.len() > 1 {
257 for &node in component {
258 in_cycle.insert(node);
259 }
260 } else if component.len() == 1 {
261 let v = component[0];
262 if adj[v].contains(&v) {
263 in_cycle.insert(v);
264 }
265 }
266 }
267 in_cycle
268}
269
270pub fn composite_health_score(edges: &[(String, String)]) -> HealthReport {
271 if edges.is_empty() {
272 return HealthReport {
273 scores: Vec::new(),
274 avg_connectivity: 0.0,
275 avg_reachability: 0.0,
276 avg_centrality: 0.0,
277 avg_cycle_risk: 0.0,
278 avg_overall: 0.0,
279 node_count: 0,
280 edge_count: 0,
281 };
282 }
283
284 let graph = build_graph(edges);
285 let node_vec = &graph.node_vec;
286 let out_adj = &graph.out_adj;
287 let in_adj = &graph.in_adj;
288 let n = graph.node_count();
289 if n == 0 {
290 return HealthReport {
291 scores: Vec::new(),
292 avg_connectivity: 0.0,
293 avg_reachability: 0.0,
294 avg_centrality: 0.0,
295 avg_cycle_risk: 0.0,
296 avg_overall: 0.0,
297 node_count: 0,
298 edge_count: 0,
299 };
300 }
301
302 let fwd_sccs = compute_sccs(n, out_adj);
303 let bwd_sccs = compute_sccs(n, in_adj);
304 let cycle_nodes = find_cycles_from_sccs(out_adj, &fwd_sccs);
305
306 let fwd_reach = compute_reachability_with_sccs(n, out_adj, &fwd_sccs);
307 let bwd_reach = compute_reachability_with_sccs(n, in_adj, &bwd_sccs);
308
309 let total_possible = (n - 1).max(1) as f64;
310 let total_degree: f64 = out_adj.iter().map(|s| s.len()).sum::<usize>() as f64;
311 let avg_degree = total_degree / n as f64;
312
313 let mut scores = Vec::with_capacity(n);
314 for (i, name) in node_vec.iter().enumerate() {
315 let out_degree = out_adj[i].len() as f64;
316 let in_degree = in_adj[i].len() as f64;
317 let connectivity =
318 (out_degree + in_degree) / (2.0 * avg_degree.max(1.0)).min(total_degree.max(1.0));
319 let connectivity = connectivity.min(1.0);
320
321 let reach_fwd = (fwd_reach[i] as f64 - 1.0) / total_possible;
322 let reach_bwd = (bwd_reach[i] as f64 - 1.0) / total_possible;
323 let reachability = (reach_fwd + reach_bwd) / 2.0;
324
325 let centrality = if n > 1 {
326 (fwd_reach[i] + bwd_reach[i]) as f64 / (2.0 * n as f64)
327 } else {
328 1.0
329 };
330
331 let cycle_risk = if cycle_nodes.contains(&i) { 1.0 } else { 0.0 };
332
333 let overall = connectivity * 0.25
334 + reachability * 0.25
335 + centrality * 0.25
336 + (1.0 - cycle_risk) * 0.25;
337
338 scores.push(HealthScore {
339 name: name.clone(),
340 overall,
341 connectivity,
342 reachability,
343 centrality,
344 cycle_risk,
345 });
346 }
347
348 let avg_connectivity = scores.iter().map(|s| s.connectivity).sum::<f64>() / n as f64;
349 let avg_reachability = scores.iter().map(|s| s.reachability).sum::<f64>() / n as f64;
350 let avg_centrality = scores.iter().map(|s| s.centrality).sum::<f64>() / n as f64;
351 let avg_cycle_risk = scores.iter().map(|s| s.cycle_risk).sum::<f64>() / n as f64;
352 let avg_overall = scores.iter().map(|s| s.overall).sum::<f64>() / n as f64;
353
354 scores.sort_by(|a, b| {
355 b.overall
356 .partial_cmp(&a.overall)
357 .unwrap_or(std::cmp::Ordering::Equal)
358 });
359
360 HealthReport {
361 scores,
362 avg_connectivity,
363 avg_reachability,
364 avg_centrality,
365 avg_cycle_risk,
366 avg_overall,
367 node_count: n,
368 edge_count: edges.len(),
369 }
370}
371
372#[cfg(test)]
373mod tests {
374 use super::*;
375
376 fn e(a: &str, b: &str) -> (String, String) {
377 (a.to_string(), b.to_string())
378 }
379
380 #[test]
381 fn empty_graph() {
382 let report = composite_health_score(&[]);
383 assert_eq!(report.node_count, 0);
384 assert_eq!(report.scores.len(), 0);
385 }
386
387 #[test]
388 fn single_edge() {
389 let edges = vec![e("a", "b")];
390 let report = composite_health_score(&edges);
391 assert_eq!(report.node_count, 2);
392 assert_eq!(report.scores.len(), 2);
393 }
394
395 #[test]
396 fn hub_node_scores_higher() {
397 let edges = vec![e("hub", "a"), e("hub", "b"), e("hub", "c"), e("a", "b")];
398 let report = composite_health_score(&edges);
399 let hub = report.scores.iter().find(|s| s.name == "hub").unwrap();
400 let a = report.scores.iter().find(|s| s.name == "a").unwrap();
401 assert!(hub.overall > a.overall, "hub should score higher than leaf");
402 }
403
404 #[test]
405 fn cycle_free_zero_cycle_risk() {
406 let edges = vec![e("a", "b"), e("b", "c")];
407 let report = composite_health_score(&edges);
408 for score in &report.scores {
409 assert_eq!(
410 score.cycle_risk, 0.0,
411 "{} should have no cycle risk",
412 score.name
413 );
414 }
415 }
416
417 #[test]
418 fn cycle_increases_cycle_risk() {
419 let edges = vec![e("a", "b"), e("b", "a")];
420 let report = composite_health_score(&edges);
421 for score in &report.scores {
422 assert_eq!(
423 score.cycle_risk, 1.0,
424 "{} should have cycle risk",
425 score.name
426 );
427 }
428 }
429
430 #[test]
431 fn dense_cycle_does_not_recurse_on_current_node() {
432 let edges = vec![
433 e("alpha", "beta"),
434 e("alpha", "gamma"),
435 e("beta", "alpha"),
436 e("beta", "gamma"),
437 e("gamma", "alpha"),
438 e("gamma", "beta"),
439 ];
440 let report = composite_health_score(&edges);
441 assert_eq!(report.node_count, 3);
442 assert_eq!(report.avg_cycle_risk, 1.0);
443 }
444
445 #[test]
446 fn overall_scores_bounded() {
447 let edges = vec![
448 e("a", "b"),
449 e("b", "c"),
450 e("c", "d"),
451 e("d", "a"),
452 e("a", "c"),
453 ];
454 let report = composite_health_score(&edges);
455 for score in &report.scores {
456 assert!(
457 score.overall >= 0.0 && score.overall <= 1.0,
458 "{} overall={}",
459 score.name,
460 score.overall
461 );
462 assert!(score.connectivity >= 0.0 && score.connectivity <= 1.0);
463 assert!(score.reachability >= 0.0 && score.reachability <= 1.0);
464 assert!(score.centrality >= 0.0 && score.centrality <= 1.0);
465 }
466 }
467
468 #[test]
469 fn sorted_by_overall_descending() {
470 let edges = vec![e("a", "b"), e("a", "c"), e("b", "c")];
471 let report = composite_health_score(&edges);
472 for i in 1..report.scores.len() {
473 assert!(report.scores[i - 1].overall >= report.scores[i].overall);
474 }
475 }
476
477 #[test]
478 fn terse_empty() {
479 let report = terse_health_report(&[], 5);
480 assert_eq!(report.node_count, 0);
481 assert_eq!(report.top_scores.len(), 0);
482 assert_eq!(report.bottom_scores.len(), 0);
483 assert_eq!(report.avg_overall, 0.0);
484 }
485
486 #[test]
487 fn terse_matches_composite() {
488 let edges = vec![e("a", "b"), e("a", "c"), e("b", "c"), e("c", "d"), e("d", "a")];
489 let full = composite_health_score(&edges);
490 let terse = terse_health_report(&edges, 2);
491 assert_eq!(terse.node_count, full.node_count);
492 assert_eq!(terse.edge_count, full.edge_count);
493 assert!((terse.avg_overall - full.avg_overall).abs() < 1e-10);
494 assert!((terse.avg_cycle_risk - full.avg_cycle_risk).abs() < 1e-10);
495 assert_eq!(terse.top_scores.len(), 2);
496 assert_eq!(terse.bottom_scores.len(), 2);
497 assert_eq!(terse.top_scores[0].name, full.scores[0].name);
498 assert!((terse.top_scores[0].overall - full.scores[0].overall).abs() < 1e-10);
499 }
500
501 #[test]
502 fn terse_top_sorted_descending() {
503 let edges = vec![e("hub", "a"), e("hub", "b"), e("hub", "c"), e("a", "b")];
504 let terse = terse_health_report(&edges, 2);
505 for i in 1..terse.top_scores.len() {
506 assert!(terse.top_scores[i - 1].overall >= terse.top_scores[i].overall);
507 }
508 }
509
510 #[test]
511 fn terse_bottom_sorted_ascending() {
512 let edges = vec![e("hub", "a"), e("hub", "b"), e("hub", "c"), e("a", "b")];
513 let terse = terse_health_report(&edges, 2);
514 for i in 1..terse.bottom_scores.len() {
515 assert!(terse.bottom_scores[i - 1].overall <= terse.bottom_scores[i].overall);
516 }
517 }
518
519 #[test]
520 fn terse_n_larger_than_nodes() {
521 let edges = vec![e("a", "b")];
522 let terse = terse_health_report(&edges, 10);
523 assert_eq!(terse.top_scores.len(), 2);
524 assert_eq!(terse.bottom_scores.len(), 2);
525 }
526
527 #[test]
528 fn terse_scores_bounded() {
529 let edges = vec![
530 e("a", "b"),
531 e("b", "c"),
532 e("c", "d"),
533 e("d", "a"),
534 e("a", "c"),
535 ];
536 let terse = terse_health_report(&edges, 2);
537 for s in terse.top_scores.iter().chain(terse.bottom_scores.iter()) {
538 assert!(s.overall >= 0.0 && s.overall <= 1.0, "{} overall={}", s.name, s.overall);
539 }
540 }
541
542 #[test]
543 fn terse_cycle_avg() {
544 let edges = vec![e("a", "b"), e("b", "a")];
545 let terse = terse_health_report(&edges, 1);
546 assert!((terse.avg_cycle_risk - 1.0).abs() < 1e-10);
547 }
548}