Skip to main content

tsift_algorithms/
health.rs

1use crate::graph_builder::build_graph;
2use serde::{Deserialize, Serialize};
3use std::collections::HashSet;
4
5#[derive(Debug, Clone, Serialize, Deserialize)]
6pub struct HealthScore {
7    pub name: String,
8    pub overall: f64,
9    pub connectivity: f64,
10    pub reachability: f64,
11    pub centrality: f64,
12    pub cycle_risk: f64,
13}
14
15#[derive(Debug, Clone, Serialize, Deserialize)]
16pub struct HealthReport {
17    pub scores: Vec<HealthScore>,
18    pub avg_connectivity: f64,
19    pub avg_reachability: f64,
20    pub avg_centrality: f64,
21    pub avg_cycle_risk: f64,
22    pub avg_overall: f64,
23    pub node_count: usize,
24    pub edge_count: usize,
25}
26
27#[derive(Debug, Clone, Serialize, Deserialize)]
28pub struct TerseHealthReport {
29    pub top_scores: Vec<TerseHealthScore>,
30    pub bottom_scores: Vec<TerseHealthScore>,
31    pub avg_overall: f64,
32    pub avg_cycle_risk: f64,
33    pub node_count: usize,
34    pub edge_count: usize,
35}
36
37#[derive(Debug, Clone, Serialize, Deserialize)]
38pub struct TerseHealthScore {
39    pub name: String,
40    pub overall: f64,
41}
42
43pub fn terse_health_report(edges: &[(String, String)], n: usize) -> TerseHealthReport {
44    if edges.is_empty() {
45        return TerseHealthReport {
46            top_scores: Vec::new(),
47            bottom_scores: Vec::new(),
48            avg_overall: 0.0,
49            avg_cycle_risk: 0.0,
50            node_count: 0,
51            edge_count: 0,
52        };
53    }
54
55    let graph = build_graph(edges);
56    let node_vec = &graph.node_vec;
57    let out_adj = &graph.out_adj;
58    let in_adj = &graph.in_adj;
59    let node_count = graph.node_count();
60    if node_count == 0 {
61        return TerseHealthReport {
62            top_scores: Vec::new(),
63            bottom_scores: Vec::new(),
64            avg_overall: 0.0,
65            avg_cycle_risk: 0.0,
66            node_count: 0,
67            edge_count: 0,
68        };
69    }
70
71    let fwd_sccs = compute_sccs(node_count, out_adj);
72    let bwd_sccs = compute_sccs(node_count, in_adj);
73    let cycle_nodes = find_cycles_from_sccs(out_adj, &fwd_sccs);
74
75    let fwd_reach = compute_reachability_with_sccs(node_count, out_adj, &fwd_sccs);
76    let bwd_reach = compute_reachability_with_sccs(node_count, in_adj, &bwd_sccs);
77
78    let total_possible = (node_count - 1).max(1) as f64;
79    let total_degree: f64 = out_adj.iter().map(|s| s.len()).sum::<usize>() as f64;
80    let avg_degree = total_degree / node_count as f64;
81
82    let mut raw_scores: Vec<TerseHealthScore> = Vec::with_capacity(node_count);
83    let mut sum_overall: f64 = 0.0;
84    let mut sum_cycle_risk: f64 = 0.0;
85
86    for (i, name) in node_vec.iter().enumerate() {
87        let out_degree = out_adj[i].len() as f64;
88        let in_degree = in_adj[i].len() as f64;
89        let connectivity =
90            (out_degree + in_degree) / (2.0 * avg_degree.max(1.0)).min(total_degree.max(1.0));
91        let connectivity = connectivity.min(1.0);
92
93        let reach_fwd = (fwd_reach[i] as f64 - 1.0) / total_possible;
94        let reach_bwd = (bwd_reach[i] as f64 - 1.0) / total_possible;
95        let reachability = (reach_fwd + reach_bwd) / 2.0;
96
97        let centrality = if node_count > 1 {
98            (fwd_reach[i] + bwd_reach[i]) as f64 / (2.0 * node_count as f64)
99        } else {
100            1.0
101        };
102
103        let cycle_risk = if cycle_nodes.contains(&i) { 1.0 } else { 0.0 };
104
105        let overall = connectivity * 0.25
106            + reachability * 0.25
107            + centrality * 0.25
108            + (1.0 - cycle_risk) * 0.25;
109
110        sum_overall += overall;
111        sum_cycle_risk += cycle_risk;
112
113        raw_scores.push(TerseHealthScore {
114            name: name.clone(),
115            overall,
116        });
117    }
118
119    let n = n.min(node_count);
120
121    let (mut top_candidates, mut bottom_candidates) = {
122        let mut sorted = raw_scores;
123        sorted.sort_by(|a, b| {
124            b.overall
125                .partial_cmp(&a.overall)
126                .unwrap_or(std::cmp::Ordering::Equal)
127        });
128        let top: Vec<TerseHealthScore> = sorted.iter().take(n).cloned().collect();
129        let bottom: Vec<TerseHealthScore> = sorted.iter().rev().take(n).cloned().collect();
130        (top, bottom)
131    };
132
133    top_candidates.sort_by(|a, b| {
134        b.overall
135            .partial_cmp(&a.overall)
136            .unwrap_or(std::cmp::Ordering::Equal)
137    });
138    bottom_candidates.sort_by(|a, b| {
139        a.overall
140            .partial_cmp(&b.overall)
141            .unwrap_or(std::cmp::Ordering::Equal)
142    });
143
144    TerseHealthReport {
145        top_scores: top_candidates,
146        bottom_scores: bottom_candidates,
147        avg_overall: sum_overall / node_count as f64,
148        avg_cycle_risk: sum_cycle_risk / node_count as f64,
149        node_count,
150        edge_count: edges.len(),
151    }
152}
153
154fn compute_reachability_with_sccs(
155    n: usize,
156    adj: &[HashSet<usize>],
157    sccs: &[Vec<usize>],
158) -> Vec<usize> {
159    let mut comp_of = vec![0usize; n];
160    for (comp_id, members) in sccs.iter().enumerate() {
161        for &node in members {
162            comp_of[node] = comp_id;
163        }
164    }
165    let num_comps = sccs.len();
166    let mut comp_adj: Vec<HashSet<usize>> = vec![HashSet::new(); num_comps];
167    for u in 0..n {
168        for &v in &adj[u] {
169            let cu = comp_of[u];
170            let cv = comp_of[v];
171            if cu != cv {
172                comp_adj[cu].insert(cv);
173            }
174        }
175    }
176    let mut comp_reach = vec![0usize; num_comps];
177    for c in 0..num_comps {
178        let mut visited = vec![false; num_comps];
179        visited[c] = true;
180        let mut count = 0usize;
181        let mut queue = std::collections::VecDeque::from([c]);
182        while let Some(cur) = queue.pop_front() {
183            count += sccs[cur].len();
184            for &next in &comp_adj[cur] {
185                if !visited[next] {
186                    visited[next] = true;
187                    queue.push_back(next);
188                }
189            }
190        }
191        comp_reach[c] = count;
192    }
193    let mut reach = vec![0usize; n];
194    for i in 0..n {
195        reach[i] = comp_reach[comp_of[i]];
196    }
197    reach
198}
199
200fn compute_sccs(n: usize, adj: &[HashSet<usize>]) -> Vec<Vec<usize>> {
201    struct Tarjan<'a> {
202        adj: &'a [HashSet<usize>],
203        index: usize,
204        stack: Vec<usize>,
205        on_stack: Vec<bool>,
206        indices: Vec<Option<usize>>,
207        lowlinks: Vec<usize>,
208        components: Vec<Vec<usize>>,
209    }
210
211    impl<'a> Tarjan<'a> {
212        fn new(n: usize, adj: &'a [HashSet<usize>]) -> Self {
213            Self {
214                adj,
215                index: 0,
216                stack: Vec::new(),
217                on_stack: vec![false; n],
218                indices: vec![None; n],
219                lowlinks: vec![0; n],
220                components: Vec::new(),
221            }
222        }
223
224        fn strongconnect(&mut self, v: usize) {
225            self.indices[v] = Some(self.index);
226            self.lowlinks[v] = self.index;
227            self.index += 1;
228            self.stack.push(v);
229            self.on_stack[v] = true;
230
231            let neighbors: Vec<usize> = self.adj[v].iter().copied().collect();
232            for w in neighbors {
233                if self.indices[w].is_none() {
234                    self.strongconnect(w);
235                }
236                if self.on_stack[w] {
237                    self.lowlinks[v] = self.lowlinks[v].min(self.indices[w].unwrap());
238                }
239            }
240
241            if self.indices[v] == Some(self.lowlinks[v]) {
242                let mut component = Vec::new();
243                loop {
244                    let w = self.stack.pop().unwrap();
245                    self.on_stack[w] = false;
246                    component.push(w);
247                    if w == v {
248                        break;
249                    }
250                }
251                self.components.push(component);
252            }
253        }
254    }
255
256    let mut tarjan = Tarjan::new(n, adj);
257    for v in 0..n {
258        if tarjan.indices[v].is_none() {
259            tarjan.strongconnect(v);
260        }
261    }
262    tarjan.components
263}
264
265fn find_cycles_from_sccs(adj: &[HashSet<usize>], sccs: &[Vec<usize>]) -> HashSet<usize> {
266    let mut in_cycle = HashSet::new();
267    for component in sccs {
268        if component.len() > 1 {
269            for &node in component {
270                in_cycle.insert(node);
271            }
272        } else if component.len() == 1 {
273            let v = component[0];
274            if adj[v].contains(&v) {
275                in_cycle.insert(v);
276            }
277        }
278    }
279    in_cycle
280}
281
282pub fn composite_health_score(edges: &[(String, String)]) -> HealthReport {
283    if edges.is_empty() {
284        return HealthReport {
285            scores: Vec::new(),
286            avg_connectivity: 0.0,
287            avg_reachability: 0.0,
288            avg_centrality: 0.0,
289            avg_cycle_risk: 0.0,
290            avg_overall: 0.0,
291            node_count: 0,
292            edge_count: 0,
293        };
294    }
295
296    let graph = build_graph(edges);
297    let node_vec = &graph.node_vec;
298    let out_adj = &graph.out_adj;
299    let in_adj = &graph.in_adj;
300    let n = graph.node_count();
301    if n == 0 {
302        return HealthReport {
303            scores: Vec::new(),
304            avg_connectivity: 0.0,
305            avg_reachability: 0.0,
306            avg_centrality: 0.0,
307            avg_cycle_risk: 0.0,
308            avg_overall: 0.0,
309            node_count: 0,
310            edge_count: 0,
311        };
312    }
313
314    let fwd_sccs = compute_sccs(n, out_adj);
315    let bwd_sccs = compute_sccs(n, in_adj);
316    let cycle_nodes = find_cycles_from_sccs(out_adj, &fwd_sccs);
317
318    let fwd_reach = compute_reachability_with_sccs(n, out_adj, &fwd_sccs);
319    let bwd_reach = compute_reachability_with_sccs(n, in_adj, &bwd_sccs);
320
321    let total_possible = (n - 1).max(1) as f64;
322    let total_degree: f64 = out_adj.iter().map(|s| s.len()).sum::<usize>() as f64;
323    let avg_degree = total_degree / n as f64;
324
325    let mut scores = Vec::with_capacity(n);
326    for (i, name) in node_vec.iter().enumerate() {
327        let out_degree = out_adj[i].len() as f64;
328        let in_degree = in_adj[i].len() as f64;
329        let connectivity =
330            (out_degree + in_degree) / (2.0 * avg_degree.max(1.0)).min(total_degree.max(1.0));
331        let connectivity = connectivity.min(1.0);
332
333        let reach_fwd = (fwd_reach[i] as f64 - 1.0) / total_possible;
334        let reach_bwd = (bwd_reach[i] as f64 - 1.0) / total_possible;
335        let reachability = (reach_fwd + reach_bwd) / 2.0;
336
337        let centrality = if n > 1 {
338            (fwd_reach[i] + bwd_reach[i]) as f64 / (2.0 * n as f64)
339        } else {
340            1.0
341        };
342
343        let cycle_risk = if cycle_nodes.contains(&i) { 1.0 } else { 0.0 };
344
345        let overall = connectivity * 0.25
346            + reachability * 0.25
347            + centrality * 0.25
348            + (1.0 - cycle_risk) * 0.25;
349
350        scores.push(HealthScore {
351            name: name.clone(),
352            overall,
353            connectivity,
354            reachability,
355            centrality,
356            cycle_risk,
357        });
358    }
359
360    let avg_connectivity = scores.iter().map(|s| s.connectivity).sum::<f64>() / n as f64;
361    let avg_reachability = scores.iter().map(|s| s.reachability).sum::<f64>() / n as f64;
362    let avg_centrality = scores.iter().map(|s| s.centrality).sum::<f64>() / n as f64;
363    let avg_cycle_risk = scores.iter().map(|s| s.cycle_risk).sum::<f64>() / n as f64;
364    let avg_overall = scores.iter().map(|s| s.overall).sum::<f64>() / n as f64;
365
366    scores.sort_by(|a, b| {
367        b.overall
368            .partial_cmp(&a.overall)
369            .unwrap_or(std::cmp::Ordering::Equal)
370    });
371
372    HealthReport {
373        scores,
374        avg_connectivity,
375        avg_reachability,
376        avg_centrality,
377        avg_cycle_risk,
378        avg_overall,
379        node_count: n,
380        edge_count: edges.len(),
381    }
382}
383
384#[cfg(test)]
385mod tests {
386    use super::*;
387
388    fn e(a: &str, b: &str) -> (String, String) {
389        (a.to_string(), b.to_string())
390    }
391
392    #[test]
393    fn empty_graph() {
394        let report = composite_health_score(&[]);
395        assert_eq!(report.node_count, 0);
396        assert_eq!(report.scores.len(), 0);
397    }
398
399    #[test]
400    fn single_edge() {
401        let edges = vec![e("a", "b")];
402        let report = composite_health_score(&edges);
403        assert_eq!(report.node_count, 2);
404        assert_eq!(report.scores.len(), 2);
405    }
406
407    #[test]
408    fn hub_node_scores_higher() {
409        let edges = vec![e("hub", "a"), e("hub", "b"), e("hub", "c"), e("a", "b")];
410        let report = composite_health_score(&edges);
411        let hub = report.scores.iter().find(|s| s.name == "hub").unwrap();
412        let a = report.scores.iter().find(|s| s.name == "a").unwrap();
413        assert!(hub.overall > a.overall, "hub should score higher than leaf");
414    }
415
416    #[test]
417    fn cycle_free_zero_cycle_risk() {
418        let edges = vec![e("a", "b"), e("b", "c")];
419        let report = composite_health_score(&edges);
420        for score in &report.scores {
421            assert_eq!(
422                score.cycle_risk, 0.0,
423                "{} should have no cycle risk",
424                score.name
425            );
426        }
427    }
428
429    #[test]
430    fn cycle_increases_cycle_risk() {
431        let edges = vec![e("a", "b"), e("b", "a")];
432        let report = composite_health_score(&edges);
433        for score in &report.scores {
434            assert_eq!(
435                score.cycle_risk, 1.0,
436                "{} should have cycle risk",
437                score.name
438            );
439        }
440    }
441
442    #[test]
443    fn dense_cycle_does_not_recurse_on_current_node() {
444        let edges = vec![
445            e("alpha", "beta"),
446            e("alpha", "gamma"),
447            e("beta", "alpha"),
448            e("beta", "gamma"),
449            e("gamma", "alpha"),
450            e("gamma", "beta"),
451        ];
452        let report = composite_health_score(&edges);
453        assert_eq!(report.node_count, 3);
454        assert_eq!(report.avg_cycle_risk, 1.0);
455    }
456
457    #[test]
458    fn overall_scores_bounded() {
459        let edges = vec![
460            e("a", "b"),
461            e("b", "c"),
462            e("c", "d"),
463            e("d", "a"),
464            e("a", "c"),
465        ];
466        let report = composite_health_score(&edges);
467        for score in &report.scores {
468            assert!(
469                score.overall >= 0.0 && score.overall <= 1.0,
470                "{} overall={}",
471                score.name,
472                score.overall
473            );
474            assert!(score.connectivity >= 0.0 && score.connectivity <= 1.0);
475            assert!(score.reachability >= 0.0 && score.reachability <= 1.0);
476            assert!(score.centrality >= 0.0 && score.centrality <= 1.0);
477        }
478    }
479
480    #[test]
481    fn sorted_by_overall_descending() {
482        let edges = vec![e("a", "b"), e("a", "c"), e("b", "c")];
483        let report = composite_health_score(&edges);
484        for i in 1..report.scores.len() {
485            assert!(report.scores[i - 1].overall >= report.scores[i].overall);
486        }
487    }
488
489    #[test]
490    fn terse_empty() {
491        let report = terse_health_report(&[], 5);
492        assert_eq!(report.node_count, 0);
493        assert_eq!(report.top_scores.len(), 0);
494        assert_eq!(report.bottom_scores.len(), 0);
495        assert_eq!(report.avg_overall, 0.0);
496    }
497
498    #[test]
499    fn terse_matches_composite() {
500        let edges = vec![
501            e("a", "b"),
502            e("a", "c"),
503            e("b", "c"),
504            e("c", "d"),
505            e("d", "a"),
506        ];
507        let full = composite_health_score(&edges);
508        let terse = terse_health_report(&edges, 2);
509        assert_eq!(terse.node_count, full.node_count);
510        assert_eq!(terse.edge_count, full.edge_count);
511        assert!((terse.avg_overall - full.avg_overall).abs() < 1e-10);
512        assert!((terse.avg_cycle_risk - full.avg_cycle_risk).abs() < 1e-10);
513        assert_eq!(terse.top_scores.len(), 2);
514        assert_eq!(terse.bottom_scores.len(), 2);
515        assert_eq!(terse.top_scores[0].name, full.scores[0].name);
516        assert!((terse.top_scores[0].overall - full.scores[0].overall).abs() < 1e-10);
517    }
518
519    #[test]
520    fn terse_top_sorted_descending() {
521        let edges = vec![e("hub", "a"), e("hub", "b"), e("hub", "c"), e("a", "b")];
522        let terse = terse_health_report(&edges, 2);
523        for i in 1..terse.top_scores.len() {
524            assert!(terse.top_scores[i - 1].overall >= terse.top_scores[i].overall);
525        }
526    }
527
528    #[test]
529    fn terse_bottom_sorted_ascending() {
530        let edges = vec![e("hub", "a"), e("hub", "b"), e("hub", "c"), e("a", "b")];
531        let terse = terse_health_report(&edges, 2);
532        for i in 1..terse.bottom_scores.len() {
533            assert!(terse.bottom_scores[i - 1].overall <= terse.bottom_scores[i].overall);
534        }
535    }
536
537    #[test]
538    fn terse_n_larger_than_nodes() {
539        let edges = vec![e("a", "b")];
540        let terse = terse_health_report(&edges, 10);
541        assert_eq!(terse.top_scores.len(), 2);
542        assert_eq!(terse.bottom_scores.len(), 2);
543    }
544
545    #[test]
546    fn terse_scores_bounded() {
547        let edges = vec![
548            e("a", "b"),
549            e("b", "c"),
550            e("c", "d"),
551            e("d", "a"),
552            e("a", "c"),
553        ];
554        let terse = terse_health_report(&edges, 2);
555        for s in terse.top_scores.iter().chain(terse.bottom_scores.iter()) {
556            assert!(
557                s.overall >= 0.0 && s.overall <= 1.0,
558                "{} overall={}",
559                s.name,
560                s.overall
561            );
562        }
563    }
564
565    #[test]
566    fn terse_cycle_avg() {
567        let edges = vec![e("a", "b"), e("b", "a")];
568        let terse = terse_health_report(&edges, 1);
569        assert!((terse.avg_cycle_risk - 1.0).abs() < 1e-10);
570    }
571}