Skip to main content

tsift_algorithms/
health.rs

1use crate::graph_builder::build_graph;
2use serde::{Deserialize, Serialize};
3use std::collections::HashSet;
4
5#[derive(Debug, Clone, Serialize, Deserialize)]
6pub struct HealthScore {
7    pub name: String,
8    pub overall: f64,
9    pub connectivity: f64,
10    pub reachability: f64,
11    pub centrality: f64,
12    pub cycle_risk: f64,
13}
14
15#[derive(Debug, Clone, Serialize, Deserialize)]
16pub struct HealthReport {
17    pub scores: Vec<HealthScore>,
18    pub avg_connectivity: f64,
19    pub avg_reachability: f64,
20    pub avg_centrality: f64,
21    pub avg_cycle_risk: f64,
22    pub avg_overall: f64,
23    pub node_count: usize,
24    pub edge_count: usize,
25}
26
27#[derive(Debug, Clone, Serialize, Deserialize)]
28pub struct TerseHealthReport {
29    pub top_scores: Vec<TerseHealthScore>,
30    pub bottom_scores: Vec<TerseHealthScore>,
31    pub avg_overall: f64,
32    pub avg_cycle_risk: f64,
33    pub node_count: usize,
34    pub edge_count: usize,
35}
36
37#[derive(Debug, Clone, Serialize, Deserialize)]
38pub struct TerseHealthScore {
39    pub name: String,
40    pub overall: f64,
41}
42
43pub fn terse_health_report(edges: &[(String, String)], n: usize) -> TerseHealthReport {
44    if edges.is_empty() {
45        return TerseHealthReport {
46            top_scores: Vec::new(),
47            bottom_scores: Vec::new(),
48            avg_overall: 0.0,
49            avg_cycle_risk: 0.0,
50            node_count: 0,
51            edge_count: 0,
52        };
53    }
54
55    let graph = build_graph(edges);
56    let node_vec = &graph.node_vec;
57    let out_adj = &graph.out_adj;
58    let in_adj = &graph.in_adj;
59    let node_count = graph.node_count();
60    if node_count == 0 {
61        return TerseHealthReport {
62            top_scores: Vec::new(),
63            bottom_scores: Vec::new(),
64            avg_overall: 0.0,
65            avg_cycle_risk: 0.0,
66            node_count: 0,
67            edge_count: 0,
68        };
69    }
70
71    let fwd_sccs = compute_sccs(node_count, out_adj);
72    let bwd_sccs = compute_sccs(node_count, in_adj);
73    let cycle_nodes = find_cycles_from_sccs(out_adj, &fwd_sccs);
74
75    let fwd_reach = compute_reachability_with_sccs(node_count, out_adj, &fwd_sccs);
76    let bwd_reach = compute_reachability_with_sccs(node_count, in_adj, &bwd_sccs);
77
78    let total_possible = (node_count - 1).max(1) as f64;
79    let total_degree: f64 = out_adj.iter().map(|s| s.len()).sum::<usize>() as f64;
80    let avg_degree = total_degree / node_count as f64;
81
82    let mut raw_scores: Vec<TerseHealthScore> = Vec::with_capacity(node_count);
83    let mut sum_overall: f64 = 0.0;
84    let mut sum_cycle_risk: f64 = 0.0;
85
86    for (i, name) in node_vec.iter().enumerate() {
87        let out_degree = out_adj[i].len() as f64;
88        let in_degree = in_adj[i].len() as f64;
89        let connectivity =
90            (out_degree + in_degree) / (2.0 * avg_degree.max(1.0)).min(total_degree.max(1.0));
91        let connectivity = connectivity.min(1.0);
92
93        let reach_fwd = (fwd_reach[i] as f64 - 1.0) / total_possible;
94        let reach_bwd = (bwd_reach[i] as f64 - 1.0) / total_possible;
95        let reachability = (reach_fwd + reach_bwd) / 2.0;
96
97        let centrality = if node_count > 1 {
98            (fwd_reach[i] + bwd_reach[i]) as f64 / (2.0 * node_count as f64)
99        } else {
100            1.0
101        };
102
103        let cycle_risk = if cycle_nodes.contains(&i) { 1.0 } else { 0.0 };
104
105        let overall = connectivity * 0.25
106            + reachability * 0.25
107            + centrality * 0.25
108            + (1.0 - cycle_risk) * 0.25;
109
110        sum_overall += overall;
111        sum_cycle_risk += cycle_risk;
112
113        raw_scores.push(TerseHealthScore {
114            name: name.clone(),
115            overall,
116        });
117    }
118
119    let n = n.min(node_count);
120
121    let (mut top_candidates, mut bottom_candidates) = {
122        let mut sorted = raw_scores;
123        sorted.sort_by(|a, b| b.overall.partial_cmp(&a.overall).unwrap_or(std::cmp::Ordering::Equal));
124        let top: Vec<TerseHealthScore> = sorted.iter().take(n).cloned().collect();
125        let bottom: Vec<TerseHealthScore> = sorted.iter().rev().take(n).cloned().collect();
126        (top, bottom)
127    };
128
129    top_candidates.sort_by(|a, b| b.overall.partial_cmp(&a.overall).unwrap_or(std::cmp::Ordering::Equal));
130    bottom_candidates.sort_by(|a, b| a.overall.partial_cmp(&b.overall).unwrap_or(std::cmp::Ordering::Equal));
131
132    TerseHealthReport {
133        top_scores: top_candidates,
134        bottom_scores: bottom_candidates,
135        avg_overall: sum_overall / node_count as f64,
136        avg_cycle_risk: sum_cycle_risk / node_count as f64,
137        node_count,
138        edge_count: edges.len(),
139    }
140}
141
142fn compute_reachability_with_sccs(
143    n: usize,
144    adj: &[HashSet<usize>],
145    sccs: &[Vec<usize>],
146) -> Vec<usize> {
147    let mut comp_of = vec![0usize; n];
148    for (comp_id, members) in sccs.iter().enumerate() {
149        for &node in members {
150            comp_of[node] = comp_id;
151        }
152    }
153    let num_comps = sccs.len();
154    let mut comp_adj: Vec<HashSet<usize>> = vec![HashSet::new(); num_comps];
155    for u in 0..n {
156        for &v in &adj[u] {
157            let cu = comp_of[u];
158            let cv = comp_of[v];
159            if cu != cv {
160                comp_adj[cu].insert(cv);
161            }
162        }
163    }
164    let mut comp_reach = vec![0usize; num_comps];
165    for c in 0..num_comps {
166        let mut visited = vec![false; num_comps];
167        visited[c] = true;
168        let mut count = 0usize;
169        let mut queue = std::collections::VecDeque::from([c]);
170        while let Some(cur) = queue.pop_front() {
171            count += sccs[cur].len();
172            for &next in &comp_adj[cur] {
173                if !visited[next] {
174                    visited[next] = true;
175                    queue.push_back(next);
176                }
177            }
178        }
179        comp_reach[c] = count;
180    }
181    let mut reach = vec![0usize; n];
182    for i in 0..n {
183        reach[i] = comp_reach[comp_of[i]];
184    }
185    reach
186}
187
188fn compute_sccs(n: usize, adj: &[HashSet<usize>]) -> Vec<Vec<usize>> {
189    struct Tarjan<'a> {
190        adj: &'a [HashSet<usize>],
191        index: usize,
192        stack: Vec<usize>,
193        on_stack: Vec<bool>,
194        indices: Vec<Option<usize>>,
195        lowlinks: Vec<usize>,
196        components: Vec<Vec<usize>>,
197    }
198
199    impl<'a> Tarjan<'a> {
200        fn new(n: usize, adj: &'a [HashSet<usize>]) -> Self {
201            Self {
202                adj,
203                index: 0,
204                stack: Vec::new(),
205                on_stack: vec![false; n],
206                indices: vec![None; n],
207                lowlinks: vec![0; n],
208                components: Vec::new(),
209            }
210        }
211
212        fn strongconnect(&mut self, v: usize) {
213            self.indices[v] = Some(self.index);
214            self.lowlinks[v] = self.index;
215            self.index += 1;
216            self.stack.push(v);
217            self.on_stack[v] = true;
218
219            let neighbors: Vec<usize> = self.adj[v].iter().copied().collect();
220            for w in neighbors {
221                if self.indices[w].is_none() {
222                    self.strongconnect(w);
223                }
224                if self.on_stack[w] {
225                    self.lowlinks[v] = self.lowlinks[v].min(self.indices[w].unwrap());
226                }
227            }
228
229            if self.indices[v] == Some(self.lowlinks[v]) {
230                let mut component = Vec::new();
231                loop {
232                    let w = self.stack.pop().unwrap();
233                    self.on_stack[w] = false;
234                    component.push(w);
235                    if w == v {
236                        break;
237                    }
238                }
239                self.components.push(component);
240            }
241        }
242    }
243
244    let mut tarjan = Tarjan::new(n, adj);
245    for v in 0..n {
246        if tarjan.indices[v].is_none() {
247            tarjan.strongconnect(v);
248        }
249    }
250    tarjan.components
251}
252
253fn find_cycles_from_sccs(adj: &[HashSet<usize>], sccs: &[Vec<usize>]) -> HashSet<usize> {
254    let mut in_cycle = HashSet::new();
255    for component in sccs {
256        if component.len() > 1 {
257            for &node in component {
258                in_cycle.insert(node);
259            }
260        } else if component.len() == 1 {
261            let v = component[0];
262            if adj[v].contains(&v) {
263                in_cycle.insert(v);
264            }
265        }
266    }
267    in_cycle
268}
269
270pub fn composite_health_score(edges: &[(String, String)]) -> HealthReport {
271    if edges.is_empty() {
272        return HealthReport {
273            scores: Vec::new(),
274            avg_connectivity: 0.0,
275            avg_reachability: 0.0,
276            avg_centrality: 0.0,
277            avg_cycle_risk: 0.0,
278            avg_overall: 0.0,
279            node_count: 0,
280            edge_count: 0,
281        };
282    }
283
284    let graph = build_graph(edges);
285    let node_vec = &graph.node_vec;
286    let out_adj = &graph.out_adj;
287    let in_adj = &graph.in_adj;
288    let n = graph.node_count();
289    if n == 0 {
290        return HealthReport {
291            scores: Vec::new(),
292            avg_connectivity: 0.0,
293            avg_reachability: 0.0,
294            avg_centrality: 0.0,
295            avg_cycle_risk: 0.0,
296            avg_overall: 0.0,
297            node_count: 0,
298            edge_count: 0,
299        };
300    }
301
302    let fwd_sccs = compute_sccs(n, out_adj);
303    let bwd_sccs = compute_sccs(n, in_adj);
304    let cycle_nodes = find_cycles_from_sccs(out_adj, &fwd_sccs);
305
306    let fwd_reach = compute_reachability_with_sccs(n, out_adj, &fwd_sccs);
307    let bwd_reach = compute_reachability_with_sccs(n, in_adj, &bwd_sccs);
308
309    let total_possible = (n - 1).max(1) as f64;
310    let total_degree: f64 = out_adj.iter().map(|s| s.len()).sum::<usize>() as f64;
311    let avg_degree = total_degree / n as f64;
312
313    let mut scores = Vec::with_capacity(n);
314    for (i, name) in node_vec.iter().enumerate() {
315        let out_degree = out_adj[i].len() as f64;
316        let in_degree = in_adj[i].len() as f64;
317        let connectivity =
318            (out_degree + in_degree) / (2.0 * avg_degree.max(1.0)).min(total_degree.max(1.0));
319        let connectivity = connectivity.min(1.0);
320
321        let reach_fwd = (fwd_reach[i] as f64 - 1.0) / total_possible;
322        let reach_bwd = (bwd_reach[i] as f64 - 1.0) / total_possible;
323        let reachability = (reach_fwd + reach_bwd) / 2.0;
324
325        let centrality = if n > 1 {
326            (fwd_reach[i] + bwd_reach[i]) as f64 / (2.0 * n as f64)
327        } else {
328            1.0
329        };
330
331        let cycle_risk = if cycle_nodes.contains(&i) { 1.0 } else { 0.0 };
332
333        let overall = connectivity * 0.25
334            + reachability * 0.25
335            + centrality * 0.25
336            + (1.0 - cycle_risk) * 0.25;
337
338        scores.push(HealthScore {
339            name: name.clone(),
340            overall,
341            connectivity,
342            reachability,
343            centrality,
344            cycle_risk,
345        });
346    }
347
348    let avg_connectivity = scores.iter().map(|s| s.connectivity).sum::<f64>() / n as f64;
349    let avg_reachability = scores.iter().map(|s| s.reachability).sum::<f64>() / n as f64;
350    let avg_centrality = scores.iter().map(|s| s.centrality).sum::<f64>() / n as f64;
351    let avg_cycle_risk = scores.iter().map(|s| s.cycle_risk).sum::<f64>() / n as f64;
352    let avg_overall = scores.iter().map(|s| s.overall).sum::<f64>() / n as f64;
353
354    scores.sort_by(|a, b| {
355        b.overall
356            .partial_cmp(&a.overall)
357            .unwrap_or(std::cmp::Ordering::Equal)
358    });
359
360    HealthReport {
361        scores,
362        avg_connectivity,
363        avg_reachability,
364        avg_centrality,
365        avg_cycle_risk,
366        avg_overall,
367        node_count: n,
368        edge_count: edges.len(),
369    }
370}
371
372#[cfg(test)]
373mod tests {
374    use super::*;
375
376    fn e(a: &str, b: &str) -> (String, String) {
377        (a.to_string(), b.to_string())
378    }
379
380    #[test]
381    fn empty_graph() {
382        let report = composite_health_score(&[]);
383        assert_eq!(report.node_count, 0);
384        assert_eq!(report.scores.len(), 0);
385    }
386
387    #[test]
388    fn single_edge() {
389        let edges = vec![e("a", "b")];
390        let report = composite_health_score(&edges);
391        assert_eq!(report.node_count, 2);
392        assert_eq!(report.scores.len(), 2);
393    }
394
395    #[test]
396    fn hub_node_scores_higher() {
397        let edges = vec![e("hub", "a"), e("hub", "b"), e("hub", "c"), e("a", "b")];
398        let report = composite_health_score(&edges);
399        let hub = report.scores.iter().find(|s| s.name == "hub").unwrap();
400        let a = report.scores.iter().find(|s| s.name == "a").unwrap();
401        assert!(hub.overall > a.overall, "hub should score higher than leaf");
402    }
403
404    #[test]
405    fn cycle_free_zero_cycle_risk() {
406        let edges = vec![e("a", "b"), e("b", "c")];
407        let report = composite_health_score(&edges);
408        for score in &report.scores {
409            assert_eq!(
410                score.cycle_risk, 0.0,
411                "{} should have no cycle risk",
412                score.name
413            );
414        }
415    }
416
417    #[test]
418    fn cycle_increases_cycle_risk() {
419        let edges = vec![e("a", "b"), e("b", "a")];
420        let report = composite_health_score(&edges);
421        for score in &report.scores {
422            assert_eq!(
423                score.cycle_risk, 1.0,
424                "{} should have cycle risk",
425                score.name
426            );
427        }
428    }
429
430    #[test]
431    fn dense_cycle_does_not_recurse_on_current_node() {
432        let edges = vec![
433            e("alpha", "beta"),
434            e("alpha", "gamma"),
435            e("beta", "alpha"),
436            e("beta", "gamma"),
437            e("gamma", "alpha"),
438            e("gamma", "beta"),
439        ];
440        let report = composite_health_score(&edges);
441        assert_eq!(report.node_count, 3);
442        assert_eq!(report.avg_cycle_risk, 1.0);
443    }
444
445    #[test]
446    fn overall_scores_bounded() {
447        let edges = vec![
448            e("a", "b"),
449            e("b", "c"),
450            e("c", "d"),
451            e("d", "a"),
452            e("a", "c"),
453        ];
454        let report = composite_health_score(&edges);
455        for score in &report.scores {
456            assert!(
457                score.overall >= 0.0 && score.overall <= 1.0,
458                "{} overall={}",
459                score.name,
460                score.overall
461            );
462            assert!(score.connectivity >= 0.0 && score.connectivity <= 1.0);
463            assert!(score.reachability >= 0.0 && score.reachability <= 1.0);
464            assert!(score.centrality >= 0.0 && score.centrality <= 1.0);
465        }
466    }
467
468    #[test]
469    fn sorted_by_overall_descending() {
470        let edges = vec![e("a", "b"), e("a", "c"), e("b", "c")];
471        let report = composite_health_score(&edges);
472        for i in 1..report.scores.len() {
473            assert!(report.scores[i - 1].overall >= report.scores[i].overall);
474        }
475    }
476
477    #[test]
478    fn terse_empty() {
479        let report = terse_health_report(&[], 5);
480        assert_eq!(report.node_count, 0);
481        assert_eq!(report.top_scores.len(), 0);
482        assert_eq!(report.bottom_scores.len(), 0);
483        assert_eq!(report.avg_overall, 0.0);
484    }
485
486    #[test]
487    fn terse_matches_composite() {
488        let edges = vec![e("a", "b"), e("a", "c"), e("b", "c"), e("c", "d"), e("d", "a")];
489        let full = composite_health_score(&edges);
490        let terse = terse_health_report(&edges, 2);
491        assert_eq!(terse.node_count, full.node_count);
492        assert_eq!(terse.edge_count, full.edge_count);
493        assert!((terse.avg_overall - full.avg_overall).abs() < 1e-10);
494        assert!((terse.avg_cycle_risk - full.avg_cycle_risk).abs() < 1e-10);
495        assert_eq!(terse.top_scores.len(), 2);
496        assert_eq!(terse.bottom_scores.len(), 2);
497        assert_eq!(terse.top_scores[0].name, full.scores[0].name);
498        assert!((terse.top_scores[0].overall - full.scores[0].overall).abs() < 1e-10);
499    }
500
501    #[test]
502    fn terse_top_sorted_descending() {
503        let edges = vec![e("hub", "a"), e("hub", "b"), e("hub", "c"), e("a", "b")];
504        let terse = terse_health_report(&edges, 2);
505        for i in 1..terse.top_scores.len() {
506            assert!(terse.top_scores[i - 1].overall >= terse.top_scores[i].overall);
507        }
508    }
509
510    #[test]
511    fn terse_bottom_sorted_ascending() {
512        let edges = vec![e("hub", "a"), e("hub", "b"), e("hub", "c"), e("a", "b")];
513        let terse = terse_health_report(&edges, 2);
514        for i in 1..terse.bottom_scores.len() {
515            assert!(terse.bottom_scores[i - 1].overall <= terse.bottom_scores[i].overall);
516        }
517    }
518
519    #[test]
520    fn terse_n_larger_than_nodes() {
521        let edges = vec![e("a", "b")];
522        let terse = terse_health_report(&edges, 10);
523        assert_eq!(terse.top_scores.len(), 2);
524        assert_eq!(terse.bottom_scores.len(), 2);
525    }
526
527    #[test]
528    fn terse_scores_bounded() {
529        let edges = vec![
530            e("a", "b"),
531            e("b", "c"),
532            e("c", "d"),
533            e("d", "a"),
534            e("a", "c"),
535        ];
536        let terse = terse_health_report(&edges, 2);
537        for s in terse.top_scores.iter().chain(terse.bottom_scores.iter()) {
538            assert!(s.overall >= 0.0 && s.overall <= 1.0, "{} overall={}", s.name, s.overall);
539        }
540    }
541
542    #[test]
543    fn terse_cycle_avg() {
544        let edges = vec![e("a", "b"), e("b", "a")];
545        let terse = terse_health_report(&edges, 1);
546        assert!((terse.avg_cycle_risk - 1.0).abs() < 1e-10);
547    }
548}