Skip to main content

drft/analyses/
graph_stats.rs

1use super::{Analysis, AnalysisContext};
2use crate::graph::Graph;
3use std::collections::{HashMap, VecDeque};
4
5#[derive(Debug, Clone, serde::Serialize)]
6pub struct GraphStatsResult {
7    pub node_count: usize,
8    pub edge_count: usize,
9    pub density: f64,
10    pub diameter: Option<usize>,
11    pub average_path_length: Option<f64>,
12}
13
14pub struct GraphStats;
15
16impl Analysis for GraphStats {
17    type Output = GraphStatsResult;
18
19    fn name(&self) -> &str {
20        "graph-stats"
21    }
22
23    fn run(&self, ctx: &AnalysisContext) -> GraphStatsResult {
24        let graph = ctx.graph;
25        let real_nodes: Vec<&str> = graph.included_nodes().map(|(s, _)| s.as_str()).collect();
26
27        let node_count = real_nodes.len();
28
29        // Count edges between real nodes
30        let edge_count = graph
31            .edges
32            .iter()
33            .filter(|e| graph.is_internal_edge(e))
34            .count();
35
36        // Density for directed graph: |E| / (|V| * (|V| - 1))
37        let density = if node_count <= 1 {
38            0.0
39        } else {
40            edge_count as f64 / (node_count * (node_count - 1)) as f64
41        };
42
43        // All-pairs shortest paths via BFS from each real node (directed)
44        let (diameter, average_path_length) = if node_count <= 1 {
45            (Some(0), Some(0.0))
46        } else {
47            all_pairs_stats(graph, &real_nodes)
48        };
49
50        GraphStatsResult {
51            node_count,
52            edge_count,
53            density,
54            diameter,
55            average_path_length,
56        }
57    }
58}
59
60/// Compute diameter and average path length via BFS from each node.
61/// Returns (None, None) if the directed graph is not strongly connected.
62fn all_pairs_stats(graph: &Graph, real_nodes: &[&str]) -> (Option<usize>, Option<f64>) {
63    let mut max_dist: usize = 0;
64    let mut total_dist: u64 = 0;
65    let mut pair_count: u64 = 0;
66    let n = real_nodes.len();
67
68    for &source in real_nodes {
69        let distances = bfs_distances(graph, source);
70
71        for &target in real_nodes {
72            if source == target {
73                continue;
74            }
75            match distances.get(target) {
76                Some(&d) => {
77                    max_dist = max_dist.max(d);
78                    total_dist += d as u64;
79                    pair_count += 1;
80                }
81                None => {
82                    // Not reachable — graph is not strongly connected
83                    return (None, None);
84                }
85            }
86        }
87    }
88
89    let expected_pairs = (n * (n - 1)) as u64;
90    if pair_count < expected_pairs {
91        return (None, None);
92    }
93
94    let avg = if pair_count > 0 {
95        total_dist as f64 / pair_count as f64
96    } else {
97        0.0
98    };
99
100    (Some(max_dist), Some(avg))
101}
102
103/// BFS from source, returning distances to all reachable real nodes.
104fn bfs_distances<'a>(graph: &'a Graph, source: &'a str) -> HashMap<&'a str, usize> {
105    let mut distances = HashMap::new();
106    let mut queue = VecDeque::new();
107
108    distances.insert(source, 0);
109    queue.push_back(source);
110
111    while let Some(current) = queue.pop_front() {
112        let current_dist = distances[current];
113        if let Some(edge_indices) = graph.forward.get(current) {
114            for &idx in edge_indices {
115                let edge = &graph.edges[idx];
116                let target = edge.target.as_str();
117                if graph.is_internal_edge(edge) && !distances.contains_key(target) {
118                    distances.insert(target, current_dist + 1);
119                    queue.push_back(target);
120                }
121            }
122        }
123    }
124
125    distances
126}
127
128#[cfg(test)]
129mod tests {
130    use super::*;
131    use crate::analyses::AnalysisContext;
132    use crate::config::Config;
133    use crate::graph::test_helpers::{make_edge, make_node};
134    use std::path::Path;
135
136    fn make_ctx<'a>(graph: &'a Graph, config: &'a Config) -> AnalysisContext<'a> {
137        AnalysisContext {
138            graph,
139            root: Path::new("."),
140            config,
141            lockfile: None,
142        }
143    }
144
145    #[test]
146    fn empty_graph() {
147        let graph = Graph::new();
148        let config = Config::defaults();
149        let result = GraphStats.run(&make_ctx(&graph, &config));
150        assert_eq!(result.node_count, 0);
151        assert_eq!(result.edge_count, 0);
152        assert_eq!(result.density, 0.0);
153        assert_eq!(result.diameter, Some(0));
154    }
155
156    #[test]
157    fn single_node() {
158        let mut graph = Graph::new();
159        graph.add_node(make_node("a.md"));
160        let config = Config::defaults();
161        let result = GraphStats.run(&make_ctx(&graph, &config));
162        assert_eq!(result.node_count, 1);
163        assert_eq!(result.density, 0.0);
164        assert_eq!(result.diameter, Some(0));
165    }
166
167    #[test]
168    fn linear_chain() {
169        let mut graph = Graph::new();
170        graph.add_node(make_node("a.md"));
171        graph.add_node(make_node("b.md"));
172        graph.add_node(make_node("c.md"));
173        graph.add_edge(make_edge("a.md", "b.md"));
174        graph.add_edge(make_edge("b.md", "c.md"));
175
176        let config = Config::defaults();
177        let result = GraphStats.run(&make_ctx(&graph, &config));
178        assert_eq!(result.node_count, 3);
179        assert_eq!(result.edge_count, 2);
180        assert!((result.density - 1.0 / 3.0).abs() < 1e-10);
181        assert_eq!(result.diameter, None);
182    }
183
184    #[test]
185    fn complete_bidirectional() {
186        let mut graph = Graph::new();
187        graph.add_node(make_node("a.md"));
188        graph.add_node(make_node("b.md"));
189        graph.add_node(make_node("c.md"));
190        graph.add_edge(make_edge("a.md", "b.md"));
191        graph.add_edge(make_edge("b.md", "a.md"));
192        graph.add_edge(make_edge("b.md", "c.md"));
193        graph.add_edge(make_edge("c.md", "b.md"));
194        graph.add_edge(make_edge("a.md", "c.md"));
195        graph.add_edge(make_edge("c.md", "a.md"));
196
197        let config = Config::defaults();
198        let result = GraphStats.run(&make_ctx(&graph, &config));
199        assert_eq!(result.node_count, 3);
200        assert_eq!(result.edge_count, 6);
201        assert!((result.density - 1.0).abs() < 1e-10);
202        assert_eq!(result.diameter, Some(1));
203        assert_eq!(result.average_path_length, Some(1.0));
204    }
205
206    #[test]
207    fn cycle_has_diameter() {
208        let mut graph = Graph::new();
209        graph.add_node(make_node("a.md"));
210        graph.add_node(make_node("b.md"));
211        graph.add_node(make_node("c.md"));
212        graph.add_edge(make_edge("a.md", "b.md"));
213        graph.add_edge(make_edge("b.md", "c.md"));
214        graph.add_edge(make_edge("c.md", "a.md"));
215
216        let config = Config::defaults();
217        let result = GraphStats.run(&make_ctx(&graph, &config));
218        assert_eq!(result.diameter, Some(2));
219        assert!((result.average_path_length.unwrap() - 1.5).abs() < 1e-10);
220    }
221}