Skip to main content

tsift_algorithms/
health.rs

1use serde::{Deserialize, Serialize};
2use std::collections::{HashMap, HashSet};
3
4#[derive(Debug, Clone, Serialize, Deserialize)]
5pub struct HealthScore {
6    pub name: String,
7    pub overall: f64,
8    pub connectivity: f64,
9    pub reachability: f64,
10    pub centrality: f64,
11    pub cycle_risk: f64,
12}
13
14#[derive(Debug, Clone, Serialize, Deserialize)]
15pub struct HealthReport {
16    pub scores: Vec<HealthScore>,
17    pub avg_connectivity: f64,
18    pub avg_reachability: f64,
19    pub avg_centrality: f64,
20    pub avg_cycle_risk: f64,
21    pub avg_overall: f64,
22    pub node_count: usize,
23    pub edge_count: usize,
24}
25
26#[allow(clippy::type_complexity)]
27fn build_graph(
28    edges: &[(String, String)],
29) -> (
30    Vec<String>,
31    HashMap<String, usize>,
32    Vec<HashSet<usize>>,
33    Vec<HashSet<usize>>,
34) {
35    let mut node_vec: Vec<String> = Vec::new();
36    let mut node_idx: HashMap<String, usize> = HashMap::new();
37    for (a, b) in edges {
38        for name in [a, b] {
39            if !node_idx.contains_key(name) {
40                node_idx.insert(name.clone(), node_vec.len());
41                node_vec.push(name.clone());
42            }
43        }
44    }
45    let n = node_vec.len();
46    let mut out_adj: Vec<HashSet<usize>> = vec![HashSet::new(); n];
47    let mut in_adj: Vec<HashSet<usize>> = vec![HashSet::new(); n];
48    for (a, b) in edges {
49        let ai = node_idx[a];
50        let bi = node_idx[b];
51        if ai != bi {
52            out_adj[ai].insert(bi);
53            in_adj[bi].insert(ai);
54        }
55    }
56    (node_vec, node_idx, out_adj, in_adj)
57}
58
59fn compute_reachability(n: usize, adj: &[HashSet<usize>]) -> Vec<usize> {
60    let mut reach = vec![0usize; n];
61    for start in 0..n {
62        let mut visited = vec![false; n];
63        let mut queue = std::collections::VecDeque::new();
64        visited[start] = true;
65        queue.push_back(start);
66        while let Some(v) = queue.pop_front() {
67            for &w in &adj[v] {
68                if !visited[w] {
69                    visited[w] = true;
70                    queue.push_back(w);
71                }
72            }
73        }
74        reach[start] = visited.iter().filter(|&&v| v).count();
75    }
76    reach
77}
78
79fn find_cycles(n: usize, adj: &[HashSet<usize>]) -> HashSet<usize> {
80    let mut in_cycle = HashSet::new();
81    let mut index = 0usize;
82    let mut stack = Vec::new();
83    let mut on_stack = vec![false; n];
84    let mut indices = vec![None::<usize>; n];
85    let mut lowlinks = vec![0usize; n];
86
87    #[allow(clippy::too_many_arguments)]
88    fn strongconnect(
89        v: usize,
90        adj: &[HashSet<usize>],
91        index: &mut usize,
92        stack: &mut Vec<usize>,
93        on_stack: &mut Vec<bool>,
94        indices: &mut Vec<Option<usize>>,
95        lowlinks: &mut Vec<usize>,
96        in_cycle: &mut HashSet<usize>,
97    ) {
98        indices[v] = Some(*index);
99        lowlinks[v] = *index;
100        *index += 1;
101        stack.push(v);
102        on_stack[v] = true;
103
104        for &w in &adj[v] {
105            if indices[w].is_none() {
106                strongconnect(w, adj, index, stack, on_stack, indices, lowlinks, in_cycle);
107            }
108            if w != v && on_stack[w] {
109                lowlinks[v] = lowlinks[v].min(indices[w].unwrap());
110            }
111        }
112
113        if indices[v] == Some(lowlinks[v]) {
114            let mut component = Vec::new();
115            loop {
116                let w = stack.pop().unwrap();
117                on_stack[w] = false;
118                component.push(w);
119                if w == v {
120                    break;
121                }
122            }
123            if component.len() > 1 || adj[v].contains(&v) {
124                for &node in &component {
125                    in_cycle.insert(node);
126                }
127            }
128        }
129    }
130
131    for v in 0..n {
132        if indices[v].is_none() {
133            strongconnect(
134                v,
135                adj,
136                &mut index,
137                &mut stack,
138                &mut on_stack,
139                &mut indices,
140                &mut lowlinks,
141                &mut in_cycle,
142            );
143        }
144    }
145    in_cycle
146}
147
148pub fn composite_health_score(edges: &[(String, String)]) -> HealthReport {
149    if edges.is_empty() {
150        return HealthReport {
151            scores: Vec::new(),
152            avg_connectivity: 0.0,
153            avg_reachability: 0.0,
154            avg_centrality: 0.0,
155            avg_cycle_risk: 0.0,
156            avg_overall: 0.0,
157            node_count: 0,
158            edge_count: 0,
159        };
160    }
161
162    let (node_vec, _node_idx, out_adj, in_adj) = build_graph(edges);
163    let n = node_vec.len();
164    if n == 0 {
165        return HealthReport {
166            scores: Vec::new(),
167            avg_connectivity: 0.0,
168            avg_reachability: 0.0,
169            avg_centrality: 0.0,
170            avg_cycle_risk: 0.0,
171            avg_overall: 0.0,
172            node_count: 0,
173            edge_count: 0,
174        };
175    }
176
177    let fwd_reach = compute_reachability(n, &out_adj);
178    let bwd_reach = compute_reachability(n, &in_adj);
179    let cycle_nodes = find_cycles(n, &out_adj);
180
181    let total_possible = (n - 1).max(1) as f64;
182    let total_degree: f64 = out_adj.iter().map(|s| s.len()).sum::<usize>() as f64;
183    let avg_degree = total_degree / n as f64;
184
185    let mut scores = Vec::with_capacity(n);
186    for (i, name) in node_vec.iter().enumerate() {
187        let out_degree = out_adj[i].len() as f64;
188        let in_degree = in_adj[i].len() as f64;
189        let connectivity =
190            (out_degree + in_degree) / (2.0 * avg_degree.max(1.0)).min(total_degree.max(1.0));
191        let connectivity = connectivity.min(1.0);
192
193        let reach_fwd = (fwd_reach[i] as f64 - 1.0) / total_possible;
194        let reach_bwd = (bwd_reach[i] as f64 - 1.0) / total_possible;
195        let reachability = (reach_fwd + reach_bwd) / 2.0;
196
197        let centrality = if n > 1 {
198            (fwd_reach[i] + bwd_reach[i]) as f64 / (2.0 * n as f64)
199        } else {
200            1.0
201        };
202
203        let cycle_risk = if cycle_nodes.contains(&i) { 1.0 } else { 0.0 };
204
205        let overall = connectivity * 0.25
206            + reachability * 0.25
207            + centrality * 0.25
208            + (1.0 - cycle_risk) * 0.25;
209
210        scores.push(HealthScore {
211            name: name.clone(),
212            overall,
213            connectivity,
214            reachability,
215            centrality,
216            cycle_risk,
217        });
218    }
219
220    let avg_connectivity = scores.iter().map(|s| s.connectivity).sum::<f64>() / n as f64;
221    let avg_reachability = scores.iter().map(|s| s.reachability).sum::<f64>() / n as f64;
222    let avg_centrality = scores.iter().map(|s| s.centrality).sum::<f64>() / n as f64;
223    let avg_cycle_risk = scores.iter().map(|s| s.cycle_risk).sum::<f64>() / n as f64;
224    let avg_overall = scores.iter().map(|s| s.overall).sum::<f64>() / n as f64;
225
226    scores.sort_by(|a, b| {
227        b.overall
228            .partial_cmp(&a.overall)
229            .unwrap_or(std::cmp::Ordering::Equal)
230    });
231
232    HealthReport {
233        scores,
234        avg_connectivity,
235        avg_reachability,
236        avg_centrality,
237        avg_cycle_risk,
238        avg_overall,
239        node_count: n,
240        edge_count: edges.len(),
241    }
242}
243
244#[cfg(test)]
245mod tests {
246    use super::*;
247
248    fn e(a: &str, b: &str) -> (String, String) {
249        (a.to_string(), b.to_string())
250    }
251
252    #[test]
253    fn empty_graph() {
254        let report = composite_health_score(&[]);
255        assert_eq!(report.node_count, 0);
256        assert_eq!(report.scores.len(), 0);
257    }
258
259    #[test]
260    fn single_edge() {
261        let edges = vec![e("a", "b")];
262        let report = composite_health_score(&edges);
263        assert_eq!(report.node_count, 2);
264        assert_eq!(report.scores.len(), 2);
265    }
266
267    #[test]
268    fn hub_node_scores_higher() {
269        let edges = vec![e("hub", "a"), e("hub", "b"), e("hub", "c"), e("a", "b")];
270        let report = composite_health_score(&edges);
271        let hub = report.scores.iter().find(|s| s.name == "hub").unwrap();
272        let a = report.scores.iter().find(|s| s.name == "a").unwrap();
273        assert!(hub.overall > a.overall, "hub should score higher than leaf");
274    }
275
276    #[test]
277    fn cycle_free_zero_cycle_risk() {
278        let edges = vec![e("a", "b"), e("b", "c")];
279        let report = composite_health_score(&edges);
280        for score in &report.scores {
281            assert_eq!(
282                score.cycle_risk, 0.0,
283                "{} should have no cycle risk",
284                score.name
285            );
286        }
287    }
288
289    #[test]
290    fn cycle_increases_cycle_risk() {
291        let edges = vec![e("a", "b"), e("b", "a")];
292        let report = composite_health_score(&edges);
293        for score in &report.scores {
294            assert_eq!(
295                score.cycle_risk, 1.0,
296                "{} should have cycle risk",
297                score.name
298            );
299        }
300    }
301
302    #[test]
303    fn dense_cycle_does_not_recurse_on_current_node() {
304        let edges = vec![
305            e("alpha", "beta"),
306            e("alpha", "gamma"),
307            e("beta", "alpha"),
308            e("beta", "gamma"),
309            e("gamma", "alpha"),
310            e("gamma", "beta"),
311        ];
312        let report = composite_health_score(&edges);
313        assert_eq!(report.node_count, 3);
314        assert_eq!(report.avg_cycle_risk, 1.0);
315    }
316
317    #[test]
318    fn overall_scores_bounded() {
319        let edges = vec![
320            e("a", "b"),
321            e("b", "c"),
322            e("c", "d"),
323            e("d", "a"),
324            e("a", "c"),
325        ];
326        let report = composite_health_score(&edges);
327        for score in &report.scores {
328            assert!(
329                score.overall >= 0.0 && score.overall <= 1.0,
330                "{} overall={}",
331                score.name,
332                score.overall
333            );
334            assert!(score.connectivity >= 0.0 && score.connectivity <= 1.0);
335            assert!(score.reachability >= 0.0 && score.reachability <= 1.0);
336            assert!(score.centrality >= 0.0 && score.centrality <= 1.0);
337        }
338    }
339
340    #[test]
341    fn sorted_by_overall_descending() {
342        let edges = vec![e("a", "b"), e("a", "c"), e("b", "c")];
343        let report = composite_health_score(&edges);
344        for i in 1..report.scores.len() {
345            assert!(report.scores[i - 1].overall >= report.scores[i].overall);
346        }
347    }
348}