Skip to main content

drft/analyses/
graph_stats.rs

1use super::{Analysis, AnalysisContext};
2use crate::graph::Graph;
3use std::collections::{HashMap, VecDeque};
4
5#[derive(Debug, Clone, serde::Serialize)]
6pub struct GraphStatsResult {
7    pub node_count: usize,
8    pub edge_count: usize,
9    pub density: f64,
10    pub diameter: Option<usize>,
11    pub average_path_length: Option<f64>,
12}
13
14pub struct GraphStats;
15
16impl Analysis for GraphStats {
17    type Output = GraphStatsResult;
18
19    fn name(&self) -> &str {
20        "graph-stats"
21    }
22
23    fn run(&self, ctx: &AnalysisContext) -> GraphStatsResult {
24        let graph = ctx.graph;
25        let real_nodes: Vec<&str> = graph
26            .nodes
27            .keys()
28            .filter(|p| graph.is_included_node(p))
29            .map(|s| s.as_str())
30            .collect();
31
32        let node_count = real_nodes.len();
33
34        // Count edges between real nodes
35        let edge_count = graph
36            .edges
37            .iter()
38            .filter(|e| graph.is_internal_edge(e))
39            .count();
40
41        // Density for directed graph: |E| / (|V| * (|V| - 1))
42        let density = if node_count <= 1 {
43            0.0
44        } else {
45            edge_count as f64 / (node_count * (node_count - 1)) as f64
46        };
47
48        // All-pairs shortest paths via BFS from each real node (directed)
49        let (diameter, average_path_length) = if node_count <= 1 {
50            (Some(0), Some(0.0))
51        } else {
52            all_pairs_stats(graph, &real_nodes)
53        };
54
55        GraphStatsResult {
56            node_count,
57            edge_count,
58            density,
59            diameter,
60            average_path_length,
61        }
62    }
63}
64
65/// Compute diameter and average path length via BFS from each node.
66/// Returns (None, None) if the directed graph is not strongly connected.
67fn all_pairs_stats(graph: &Graph, real_nodes: &[&str]) -> (Option<usize>, Option<f64>) {
68    let mut max_dist: usize = 0;
69    let mut total_dist: u64 = 0;
70    let mut pair_count: u64 = 0;
71    let n = real_nodes.len();
72
73    for &source in real_nodes {
74        let distances = bfs_distances(graph, source);
75
76        for &target in real_nodes {
77            if source == target {
78                continue;
79            }
80            match distances.get(target) {
81                Some(&d) => {
82                    max_dist = max_dist.max(d);
83                    total_dist += d as u64;
84                    pair_count += 1;
85                }
86                None => {
87                    // Not reachable — graph is not strongly connected
88                    return (None, None);
89                }
90            }
91        }
92    }
93
94    let expected_pairs = (n * (n - 1)) as u64;
95    if pair_count < expected_pairs {
96        return (None, None);
97    }
98
99    let avg = if pair_count > 0 {
100        total_dist as f64 / pair_count as f64
101    } else {
102        0.0
103    };
104
105    (Some(max_dist), Some(avg))
106}
107
108/// BFS from source, returning distances to all reachable real nodes.
109fn bfs_distances<'a>(graph: &'a Graph, source: &'a str) -> HashMap<&'a str, usize> {
110    let mut distances = HashMap::new();
111    let mut queue = VecDeque::new();
112
113    distances.insert(source, 0);
114    queue.push_back(source);
115
116    while let Some(current) = queue.pop_front() {
117        let current_dist = distances[current];
118        if let Some(edge_indices) = graph.forward.get(current) {
119            for &idx in edge_indices {
120                let edge = &graph.edges[idx];
121                let target = edge.target.as_str();
122                if graph.is_internal_edge(edge) && !distances.contains_key(target) {
123                    distances.insert(target, current_dist + 1);
124                    queue.push_back(target);
125                }
126            }
127        }
128    }
129
130    distances
131}
132
133#[cfg(test)]
134mod tests {
135    use super::*;
136    use crate::analyses::AnalysisContext;
137    use crate::config::Config;
138    use crate::graph::test_helpers::{make_edge, make_node};
139    use std::path::Path;
140
141    fn make_ctx<'a>(graph: &'a Graph, config: &'a Config) -> AnalysisContext<'a> {
142        AnalysisContext {
143            graph,
144            root: Path::new("."),
145            config,
146            lockfile: None,
147        }
148    }
149
150    #[test]
151    fn empty_graph() {
152        let graph = Graph::new();
153        let config = Config::defaults();
154        let result = GraphStats.run(&make_ctx(&graph, &config));
155        assert_eq!(result.node_count, 0);
156        assert_eq!(result.edge_count, 0);
157        assert_eq!(result.density, 0.0);
158        assert_eq!(result.diameter, Some(0));
159    }
160
161    #[test]
162    fn single_node() {
163        let mut graph = Graph::new();
164        graph.add_node(make_node("a.md"));
165        let config = Config::defaults();
166        let result = GraphStats.run(&make_ctx(&graph, &config));
167        assert_eq!(result.node_count, 1);
168        assert_eq!(result.density, 0.0);
169        assert_eq!(result.diameter, Some(0));
170    }
171
172    #[test]
173    fn linear_chain() {
174        let mut graph = Graph::new();
175        graph.add_node(make_node("a.md"));
176        graph.add_node(make_node("b.md"));
177        graph.add_node(make_node("c.md"));
178        graph.add_edge(make_edge("a.md", "b.md"));
179        graph.add_edge(make_edge("b.md", "c.md"));
180
181        let config = Config::defaults();
182        let result = GraphStats.run(&make_ctx(&graph, &config));
183        assert_eq!(result.node_count, 3);
184        assert_eq!(result.edge_count, 2);
185        assert!((result.density - 1.0 / 3.0).abs() < 1e-10);
186        assert_eq!(result.diameter, None);
187    }
188
189    #[test]
190    fn complete_bidirectional() {
191        let mut graph = Graph::new();
192        graph.add_node(make_node("a.md"));
193        graph.add_node(make_node("b.md"));
194        graph.add_node(make_node("c.md"));
195        graph.add_edge(make_edge("a.md", "b.md"));
196        graph.add_edge(make_edge("b.md", "a.md"));
197        graph.add_edge(make_edge("b.md", "c.md"));
198        graph.add_edge(make_edge("c.md", "b.md"));
199        graph.add_edge(make_edge("a.md", "c.md"));
200        graph.add_edge(make_edge("c.md", "a.md"));
201
202        let config = Config::defaults();
203        let result = GraphStats.run(&make_ctx(&graph, &config));
204        assert_eq!(result.node_count, 3);
205        assert_eq!(result.edge_count, 6);
206        assert!((result.density - 1.0).abs() < 1e-10);
207        assert_eq!(result.diameter, Some(1));
208        assert_eq!(result.average_path_length, Some(1.0));
209    }
210
211    #[test]
212    fn cycle_has_diameter() {
213        let mut graph = Graph::new();
214        graph.add_node(make_node("a.md"));
215        graph.add_node(make_node("b.md"));
216        graph.add_node(make_node("c.md"));
217        graph.add_edge(make_edge("a.md", "b.md"));
218        graph.add_edge(make_edge("b.md", "c.md"));
219        graph.add_edge(make_edge("c.md", "a.md"));
220
221        let config = Config::defaults();
222        let result = GraphStats.run(&make_ctx(&graph, &config));
223        assert_eq!(result.diameter, Some(2));
224        assert!((result.average_path_length.unwrap() - 1.5).abs() < 1e-10);
225    }
226}