1use crate::graph_builder::build_graph;
2use serde::{Deserialize, Serialize};
3use std::collections::HashSet;
4
5#[derive(Debug, Clone, Serialize, Deserialize)]
6pub struct HealthScore {
7 pub name: String,
8 pub overall: f64,
9 pub connectivity: f64,
10 pub reachability: f64,
11 pub centrality: f64,
12 pub cycle_risk: f64,
13}
14
15#[derive(Debug, Clone, Serialize, Deserialize)]
16pub struct HealthReport {
17 pub scores: Vec<HealthScore>,
18 pub avg_connectivity: f64,
19 pub avg_reachability: f64,
20 pub avg_centrality: f64,
21 pub avg_cycle_risk: f64,
22 pub avg_overall: f64,
23 pub node_count: usize,
24 pub edge_count: usize,
25}
26
27#[derive(Debug, Clone, Serialize, Deserialize)]
28pub struct TerseHealthReport {
29 pub top_scores: Vec<TerseHealthScore>,
30 pub bottom_scores: Vec<TerseHealthScore>,
31 pub avg_overall: f64,
32 pub avg_cycle_risk: f64,
33 pub node_count: usize,
34 pub edge_count: usize,
35}
36
37#[derive(Debug, Clone, Serialize, Deserialize)]
38pub struct TerseHealthScore {
39 pub name: String,
40 pub overall: f64,
41}
42
43pub fn terse_health_report(edges: &[(String, String)], n: usize) -> TerseHealthReport {
44 if edges.is_empty() {
45 return TerseHealthReport {
46 top_scores: Vec::new(),
47 bottom_scores: Vec::new(),
48 avg_overall: 0.0,
49 avg_cycle_risk: 0.0,
50 node_count: 0,
51 edge_count: 0,
52 };
53 }
54
55 let graph = build_graph(edges);
56 let node_vec = &graph.node_vec;
57 let out_adj = &graph.out_adj;
58 let in_adj = &graph.in_adj;
59 let node_count = graph.node_count();
60 if node_count == 0 {
61 return TerseHealthReport {
62 top_scores: Vec::new(),
63 bottom_scores: Vec::new(),
64 avg_overall: 0.0,
65 avg_cycle_risk: 0.0,
66 node_count: 0,
67 edge_count: 0,
68 };
69 }
70
71 let fwd_sccs = compute_sccs(node_count, out_adj);
72 let bwd_sccs = compute_sccs(node_count, in_adj);
73 let cycle_nodes = find_cycles_from_sccs(out_adj, &fwd_sccs);
74
75 let fwd_reach = compute_reachability_with_sccs(node_count, out_adj, &fwd_sccs);
76 let bwd_reach = compute_reachability_with_sccs(node_count, in_adj, &bwd_sccs);
77
78 let total_possible = (node_count - 1).max(1) as f64;
79 let total_degree: f64 = out_adj.iter().map(|s| s.len()).sum::<usize>() as f64;
80 let avg_degree = total_degree / node_count as f64;
81
82 let mut raw_scores: Vec<TerseHealthScore> = Vec::with_capacity(node_count);
83 let mut sum_overall: f64 = 0.0;
84 let mut sum_cycle_risk: f64 = 0.0;
85
86 for (i, name) in node_vec.iter().enumerate() {
87 let out_degree = out_adj[i].len() as f64;
88 let in_degree = in_adj[i].len() as f64;
89 let connectivity =
90 (out_degree + in_degree) / (2.0 * avg_degree.max(1.0)).min(total_degree.max(1.0));
91 let connectivity = connectivity.min(1.0);
92
93 let reach_fwd = (fwd_reach[i] as f64 - 1.0) / total_possible;
94 let reach_bwd = (bwd_reach[i] as f64 - 1.0) / total_possible;
95 let reachability = (reach_fwd + reach_bwd) / 2.0;
96
97 let centrality = if node_count > 1 {
98 (fwd_reach[i] + bwd_reach[i]) as f64 / (2.0 * node_count as f64)
99 } else {
100 1.0
101 };
102
103 let cycle_risk = if cycle_nodes.contains(&i) { 1.0 } else { 0.0 };
104
105 let overall = connectivity * 0.25
106 + reachability * 0.25
107 + centrality * 0.25
108 + (1.0 - cycle_risk) * 0.25;
109
110 sum_overall += overall;
111 sum_cycle_risk += cycle_risk;
112
113 raw_scores.push(TerseHealthScore {
114 name: name.clone(),
115 overall,
116 });
117 }
118
119 let n = n.min(node_count);
120
121 let (mut top_candidates, mut bottom_candidates) = {
122 let mut sorted = raw_scores;
123 sorted.sort_by(|a, b| {
124 b.overall
125 .partial_cmp(&a.overall)
126 .unwrap_or(std::cmp::Ordering::Equal)
127 });
128 let top: Vec<TerseHealthScore> = sorted.iter().take(n).cloned().collect();
129 let bottom: Vec<TerseHealthScore> = sorted.iter().rev().take(n).cloned().collect();
130 (top, bottom)
131 };
132
133 top_candidates.sort_by(|a, b| {
134 b.overall
135 .partial_cmp(&a.overall)
136 .unwrap_or(std::cmp::Ordering::Equal)
137 });
138 bottom_candidates.sort_by(|a, b| {
139 a.overall
140 .partial_cmp(&b.overall)
141 .unwrap_or(std::cmp::Ordering::Equal)
142 });
143
144 TerseHealthReport {
145 top_scores: top_candidates,
146 bottom_scores: bottom_candidates,
147 avg_overall: sum_overall / node_count as f64,
148 avg_cycle_risk: sum_cycle_risk / node_count as f64,
149 node_count,
150 edge_count: edges.len(),
151 }
152}
153
154fn compute_reachability_with_sccs(
155 n: usize,
156 adj: &[HashSet<usize>],
157 sccs: &[Vec<usize>],
158) -> Vec<usize> {
159 let mut comp_of = vec![0usize; n];
160 for (comp_id, members) in sccs.iter().enumerate() {
161 for &node in members {
162 comp_of[node] = comp_id;
163 }
164 }
165 let num_comps = sccs.len();
166 let mut comp_adj: Vec<HashSet<usize>> = vec![HashSet::new(); num_comps];
167 for u in 0..n {
168 for &v in &adj[u] {
169 let cu = comp_of[u];
170 let cv = comp_of[v];
171 if cu != cv {
172 comp_adj[cu].insert(cv);
173 }
174 }
175 }
176 let mut comp_reach = vec![0usize; num_comps];
177 for c in 0..num_comps {
178 let mut visited = vec![false; num_comps];
179 visited[c] = true;
180 let mut count = 0usize;
181 let mut queue = std::collections::VecDeque::from([c]);
182 while let Some(cur) = queue.pop_front() {
183 count += sccs[cur].len();
184 for &next in &comp_adj[cur] {
185 if !visited[next] {
186 visited[next] = true;
187 queue.push_back(next);
188 }
189 }
190 }
191 comp_reach[c] = count;
192 }
193 let mut reach = vec![0usize; n];
194 for i in 0..n {
195 reach[i] = comp_reach[comp_of[i]];
196 }
197 reach
198}
199
200fn compute_sccs(n: usize, adj: &[HashSet<usize>]) -> Vec<Vec<usize>> {
201 struct Tarjan<'a> {
202 adj: &'a [HashSet<usize>],
203 index: usize,
204 stack: Vec<usize>,
205 on_stack: Vec<bool>,
206 indices: Vec<Option<usize>>,
207 lowlinks: Vec<usize>,
208 components: Vec<Vec<usize>>,
209 }
210
211 impl<'a> Tarjan<'a> {
212 fn new(n: usize, adj: &'a [HashSet<usize>]) -> Self {
213 Self {
214 adj,
215 index: 0,
216 stack: Vec::new(),
217 on_stack: vec![false; n],
218 indices: vec![None; n],
219 lowlinks: vec![0; n],
220 components: Vec::new(),
221 }
222 }
223
224 fn strongconnect(&mut self, v: usize) {
225 self.indices[v] = Some(self.index);
226 self.lowlinks[v] = self.index;
227 self.index += 1;
228 self.stack.push(v);
229 self.on_stack[v] = true;
230
231 let neighbors: Vec<usize> = self.adj[v].iter().copied().collect();
232 for w in neighbors {
233 if self.indices[w].is_none() {
234 self.strongconnect(w);
235 }
236 if self.on_stack[w] {
237 self.lowlinks[v] = self.lowlinks[v].min(self.indices[w].unwrap());
238 }
239 }
240
241 if self.indices[v] == Some(self.lowlinks[v]) {
242 let mut component = Vec::new();
243 loop {
244 let w = self.stack.pop().unwrap();
245 self.on_stack[w] = false;
246 component.push(w);
247 if w == v {
248 break;
249 }
250 }
251 self.components.push(component);
252 }
253 }
254 }
255
256 let mut tarjan = Tarjan::new(n, adj);
257 for v in 0..n {
258 if tarjan.indices[v].is_none() {
259 tarjan.strongconnect(v);
260 }
261 }
262 tarjan.components
263}
264
265fn find_cycles_from_sccs(adj: &[HashSet<usize>], sccs: &[Vec<usize>]) -> HashSet<usize> {
266 let mut in_cycle = HashSet::new();
267 for component in sccs {
268 if component.len() > 1 {
269 for &node in component {
270 in_cycle.insert(node);
271 }
272 } else if component.len() == 1 {
273 let v = component[0];
274 if adj[v].contains(&v) {
275 in_cycle.insert(v);
276 }
277 }
278 }
279 in_cycle
280}
281
282pub fn composite_health_score(edges: &[(String, String)]) -> HealthReport {
283 if edges.is_empty() {
284 return HealthReport {
285 scores: Vec::new(),
286 avg_connectivity: 0.0,
287 avg_reachability: 0.0,
288 avg_centrality: 0.0,
289 avg_cycle_risk: 0.0,
290 avg_overall: 0.0,
291 node_count: 0,
292 edge_count: 0,
293 };
294 }
295
296 let graph = build_graph(edges);
297 let node_vec = &graph.node_vec;
298 let out_adj = &graph.out_adj;
299 let in_adj = &graph.in_adj;
300 let n = graph.node_count();
301 if n == 0 {
302 return HealthReport {
303 scores: Vec::new(),
304 avg_connectivity: 0.0,
305 avg_reachability: 0.0,
306 avg_centrality: 0.0,
307 avg_cycle_risk: 0.0,
308 avg_overall: 0.0,
309 node_count: 0,
310 edge_count: 0,
311 };
312 }
313
314 let fwd_sccs = compute_sccs(n, out_adj);
315 let bwd_sccs = compute_sccs(n, in_adj);
316 let cycle_nodes = find_cycles_from_sccs(out_adj, &fwd_sccs);
317
318 let fwd_reach = compute_reachability_with_sccs(n, out_adj, &fwd_sccs);
319 let bwd_reach = compute_reachability_with_sccs(n, in_adj, &bwd_sccs);
320
321 let total_possible = (n - 1).max(1) as f64;
322 let total_degree: f64 = out_adj.iter().map(|s| s.len()).sum::<usize>() as f64;
323 let avg_degree = total_degree / n as f64;
324
325 let mut scores = Vec::with_capacity(n);
326 for (i, name) in node_vec.iter().enumerate() {
327 let out_degree = out_adj[i].len() as f64;
328 let in_degree = in_adj[i].len() as f64;
329 let connectivity =
330 (out_degree + in_degree) / (2.0 * avg_degree.max(1.0)).min(total_degree.max(1.0));
331 let connectivity = connectivity.min(1.0);
332
333 let reach_fwd = (fwd_reach[i] as f64 - 1.0) / total_possible;
334 let reach_bwd = (bwd_reach[i] as f64 - 1.0) / total_possible;
335 let reachability = (reach_fwd + reach_bwd) / 2.0;
336
337 let centrality = if n > 1 {
338 (fwd_reach[i] + bwd_reach[i]) as f64 / (2.0 * n as f64)
339 } else {
340 1.0
341 };
342
343 let cycle_risk = if cycle_nodes.contains(&i) { 1.0 } else { 0.0 };
344
345 let overall = connectivity * 0.25
346 + reachability * 0.25
347 + centrality * 0.25
348 + (1.0 - cycle_risk) * 0.25;
349
350 scores.push(HealthScore {
351 name: name.clone(),
352 overall,
353 connectivity,
354 reachability,
355 centrality,
356 cycle_risk,
357 });
358 }
359
360 let avg_connectivity = scores.iter().map(|s| s.connectivity).sum::<f64>() / n as f64;
361 let avg_reachability = scores.iter().map(|s| s.reachability).sum::<f64>() / n as f64;
362 let avg_centrality = scores.iter().map(|s| s.centrality).sum::<f64>() / n as f64;
363 let avg_cycle_risk = scores.iter().map(|s| s.cycle_risk).sum::<f64>() / n as f64;
364 let avg_overall = scores.iter().map(|s| s.overall).sum::<f64>() / n as f64;
365
366 scores.sort_by(|a, b| {
367 b.overall
368 .partial_cmp(&a.overall)
369 .unwrap_or(std::cmp::Ordering::Equal)
370 });
371
372 HealthReport {
373 scores,
374 avg_connectivity,
375 avg_reachability,
376 avg_centrality,
377 avg_cycle_risk,
378 avg_overall,
379 node_count: n,
380 edge_count: edges.len(),
381 }
382}
383
384#[cfg(test)]
385mod tests {
386 use super::*;
387
388 fn e(a: &str, b: &str) -> (String, String) {
389 (a.to_string(), b.to_string())
390 }
391
392 #[test]
393 fn empty_graph() {
394 let report = composite_health_score(&[]);
395 assert_eq!(report.node_count, 0);
396 assert_eq!(report.scores.len(), 0);
397 }
398
399 #[test]
400 fn single_edge() {
401 let edges = vec![e("a", "b")];
402 let report = composite_health_score(&edges);
403 assert_eq!(report.node_count, 2);
404 assert_eq!(report.scores.len(), 2);
405 }
406
407 #[test]
408 fn hub_node_scores_higher() {
409 let edges = vec![e("hub", "a"), e("hub", "b"), e("hub", "c"), e("a", "b")];
410 let report = composite_health_score(&edges);
411 let hub = report.scores.iter().find(|s| s.name == "hub").unwrap();
412 let a = report.scores.iter().find(|s| s.name == "a").unwrap();
413 assert!(hub.overall > a.overall, "hub should score higher than leaf");
414 }
415
416 #[test]
417 fn cycle_free_zero_cycle_risk() {
418 let edges = vec![e("a", "b"), e("b", "c")];
419 let report = composite_health_score(&edges);
420 for score in &report.scores {
421 assert_eq!(
422 score.cycle_risk, 0.0,
423 "{} should have no cycle risk",
424 score.name
425 );
426 }
427 }
428
429 #[test]
430 fn cycle_increases_cycle_risk() {
431 let edges = vec![e("a", "b"), e("b", "a")];
432 let report = composite_health_score(&edges);
433 for score in &report.scores {
434 assert_eq!(
435 score.cycle_risk, 1.0,
436 "{} should have cycle risk",
437 score.name
438 );
439 }
440 }
441
442 #[test]
443 fn dense_cycle_does_not_recurse_on_current_node() {
444 let edges = vec![
445 e("alpha", "beta"),
446 e("alpha", "gamma"),
447 e("beta", "alpha"),
448 e("beta", "gamma"),
449 e("gamma", "alpha"),
450 e("gamma", "beta"),
451 ];
452 let report = composite_health_score(&edges);
453 assert_eq!(report.node_count, 3);
454 assert_eq!(report.avg_cycle_risk, 1.0);
455 }
456
457 #[test]
458 fn overall_scores_bounded() {
459 let edges = vec![
460 e("a", "b"),
461 e("b", "c"),
462 e("c", "d"),
463 e("d", "a"),
464 e("a", "c"),
465 ];
466 let report = composite_health_score(&edges);
467 for score in &report.scores {
468 assert!(
469 score.overall >= 0.0 && score.overall <= 1.0,
470 "{} overall={}",
471 score.name,
472 score.overall
473 );
474 assert!(score.connectivity >= 0.0 && score.connectivity <= 1.0);
475 assert!(score.reachability >= 0.0 && score.reachability <= 1.0);
476 assert!(score.centrality >= 0.0 && score.centrality <= 1.0);
477 }
478 }
479
480 #[test]
481 fn sorted_by_overall_descending() {
482 let edges = vec![e("a", "b"), e("a", "c"), e("b", "c")];
483 let report = composite_health_score(&edges);
484 for i in 1..report.scores.len() {
485 assert!(report.scores[i - 1].overall >= report.scores[i].overall);
486 }
487 }
488
489 #[test]
490 fn terse_empty() {
491 let report = terse_health_report(&[], 5);
492 assert_eq!(report.node_count, 0);
493 assert_eq!(report.top_scores.len(), 0);
494 assert_eq!(report.bottom_scores.len(), 0);
495 assert_eq!(report.avg_overall, 0.0);
496 }
497
498 #[test]
499 fn terse_matches_composite() {
500 let edges = vec![
501 e("a", "b"),
502 e("a", "c"),
503 e("b", "c"),
504 e("c", "d"),
505 e("d", "a"),
506 ];
507 let full = composite_health_score(&edges);
508 let terse = terse_health_report(&edges, 2);
509 assert_eq!(terse.node_count, full.node_count);
510 assert_eq!(terse.edge_count, full.edge_count);
511 assert!((terse.avg_overall - full.avg_overall).abs() < 1e-10);
512 assert!((terse.avg_cycle_risk - full.avg_cycle_risk).abs() < 1e-10);
513 assert_eq!(terse.top_scores.len(), 2);
514 assert_eq!(terse.bottom_scores.len(), 2);
515 assert_eq!(terse.top_scores[0].name, full.scores[0].name);
516 assert!((terse.top_scores[0].overall - full.scores[0].overall).abs() < 1e-10);
517 }
518
519 #[test]
520 fn terse_top_sorted_descending() {
521 let edges = vec![e("hub", "a"), e("hub", "b"), e("hub", "c"), e("a", "b")];
522 let terse = terse_health_report(&edges, 2);
523 for i in 1..terse.top_scores.len() {
524 assert!(terse.top_scores[i - 1].overall >= terse.top_scores[i].overall);
525 }
526 }
527
528 #[test]
529 fn terse_bottom_sorted_ascending() {
530 let edges = vec![e("hub", "a"), e("hub", "b"), e("hub", "c"), e("a", "b")];
531 let terse = terse_health_report(&edges, 2);
532 for i in 1..terse.bottom_scores.len() {
533 assert!(terse.bottom_scores[i - 1].overall <= terse.bottom_scores[i].overall);
534 }
535 }
536
537 #[test]
538 fn terse_n_larger_than_nodes() {
539 let edges = vec![e("a", "b")];
540 let terse = terse_health_report(&edges, 10);
541 assert_eq!(terse.top_scores.len(), 2);
542 assert_eq!(terse.bottom_scores.len(), 2);
543 }
544
545 #[test]
546 fn terse_scores_bounded() {
547 let edges = vec![
548 e("a", "b"),
549 e("b", "c"),
550 e("c", "d"),
551 e("d", "a"),
552 e("a", "c"),
553 ];
554 let terse = terse_health_report(&edges, 2);
555 for s in terse.top_scores.iter().chain(terse.bottom_scores.iter()) {
556 assert!(
557 s.overall >= 0.0 && s.overall <= 1.0,
558 "{} overall={}",
559 s.name,
560 s.overall
561 );
562 }
563 }
564
565 #[test]
566 fn terse_cycle_avg() {
567 let edges = vec![e("a", "b"), e("b", "a")];
568 let terse = terse_health_report(&edges, 1);
569 assert!((terse.avg_cycle_risk - 1.0).abs() < 1e-10);
570 }
571}