Skip to main content

drft/analyses/
graph_stats.rs

1use super::{Analysis, AnalysisContext};
2use crate::graph::Graph;
3use std::collections::{HashMap, VecDeque};
4
5#[derive(Debug, Clone, serde::Serialize)]
6pub struct GraphStatsResult {
7    pub node_count: usize,
8    pub edge_count: usize,
9    pub density: f64,
10    pub diameter: Option<usize>,
11    pub average_path_length: Option<f64>,
12}
13
14pub struct GraphStats;
15
16impl Analysis for GraphStats {
17    type Output = GraphStatsResult;
18
19    fn name(&self) -> &str {
20        "graph-stats"
21    }
22
23    fn run(&self, ctx: &AnalysisContext) -> GraphStatsResult {
24        let graph = ctx.graph;
25        let real_nodes: Vec<&str> = graph
26            .nodes
27            .keys()
28            .filter(|p| graph.is_file_node(p))
29            .map(|s| s.as_str())
30            .collect();
31
32        let node_count = real_nodes.len();
33
34        // Count edges between real nodes
35        let edge_count = graph
36            .edges
37            .iter()
38            .filter(|e| graph.is_file_node(&e.source) && graph.is_file_node(&e.target))
39            .count();
40
41        // Density for directed graph: |E| / (|V| * (|V| - 1))
42        let density = if node_count <= 1 {
43            0.0
44        } else {
45            edge_count as f64 / (node_count * (node_count - 1)) as f64
46        };
47
48        // All-pairs shortest paths via BFS from each real node (directed)
49        let (diameter, average_path_length) = if node_count <= 1 {
50            (Some(0), Some(0.0))
51        } else {
52            all_pairs_stats(graph, &real_nodes)
53        };
54
55        GraphStatsResult {
56            node_count,
57            edge_count,
58            density,
59            diameter,
60            average_path_length,
61        }
62    }
63}
64
65/// Compute diameter and average path length via BFS from each node.
66/// Returns (None, None) if the directed graph is not strongly connected.
67fn all_pairs_stats(graph: &Graph, real_nodes: &[&str]) -> (Option<usize>, Option<f64>) {
68    let mut max_dist: usize = 0;
69    let mut total_dist: u64 = 0;
70    let mut pair_count: u64 = 0;
71    let n = real_nodes.len();
72
73    for &source in real_nodes {
74        let distances = bfs_distances(graph, source);
75
76        for &target in real_nodes {
77            if source == target {
78                continue;
79            }
80            match distances.get(target) {
81                Some(&d) => {
82                    max_dist = max_dist.max(d);
83                    total_dist += d as u64;
84                    pair_count += 1;
85                }
86                None => {
87                    // Not reachable — graph is not strongly connected
88                    return (None, None);
89                }
90            }
91        }
92    }
93
94    let expected_pairs = (n * (n - 1)) as u64;
95    if pair_count < expected_pairs {
96        return (None, None);
97    }
98
99    let avg = if pair_count > 0 {
100        total_dist as f64 / pair_count as f64
101    } else {
102        0.0
103    };
104
105    (Some(max_dist), Some(avg))
106}
107
108/// BFS from source, returning distances to all reachable real nodes.
109fn bfs_distances<'a>(graph: &'a Graph, source: &'a str) -> HashMap<&'a str, usize> {
110    let mut distances = HashMap::new();
111    let mut queue = VecDeque::new();
112
113    distances.insert(source, 0);
114    queue.push_back(source);
115
116    while let Some(current) = queue.pop_front() {
117        let current_dist = distances[current];
118        if let Some(edge_indices) = graph.forward.get(current) {
119            for &idx in edge_indices {
120                let target = graph.edges[idx].target.as_str();
121                if graph.is_file_node(target) && !distances.contains_key(target) {
122                    distances.insert(target, current_dist + 1);
123                    queue.push_back(target);
124                }
125            }
126        }
127    }
128
129    distances
130}
131
132#[cfg(test)]
133mod tests {
134    use super::*;
135    use crate::analyses::AnalysisContext;
136    use crate::config::Config;
137    use crate::graph::test_helpers::{make_edge, make_node};
138    use std::path::Path;
139
140    fn make_ctx<'a>(graph: &'a Graph, config: &'a Config) -> AnalysisContext<'a> {
141        AnalysisContext {
142            graph,
143            root: Path::new("."),
144            config,
145            lockfile: None,
146        }
147    }
148
149    #[test]
150    fn empty_graph() {
151        let graph = Graph::new();
152        let config = Config::defaults();
153        let result = GraphStats.run(&make_ctx(&graph, &config));
154        assert_eq!(result.node_count, 0);
155        assert_eq!(result.edge_count, 0);
156        assert_eq!(result.density, 0.0);
157        assert_eq!(result.diameter, Some(0));
158    }
159
160    #[test]
161    fn single_node() {
162        let mut graph = Graph::new();
163        graph.add_node(make_node("a.md"));
164        let config = Config::defaults();
165        let result = GraphStats.run(&make_ctx(&graph, &config));
166        assert_eq!(result.node_count, 1);
167        assert_eq!(result.density, 0.0);
168        assert_eq!(result.diameter, Some(0));
169    }
170
171    #[test]
172    fn linear_chain() {
173        let mut graph = Graph::new();
174        graph.add_node(make_node("a.md"));
175        graph.add_node(make_node("b.md"));
176        graph.add_node(make_node("c.md"));
177        graph.add_edge(make_edge("a.md", "b.md"));
178        graph.add_edge(make_edge("b.md", "c.md"));
179
180        let config = Config::defaults();
181        let result = GraphStats.run(&make_ctx(&graph, &config));
182        assert_eq!(result.node_count, 3);
183        assert_eq!(result.edge_count, 2);
184        assert!((result.density - 1.0 / 3.0).abs() < 1e-10);
185        assert_eq!(result.diameter, None);
186    }
187
188    #[test]
189    fn complete_bidirectional() {
190        let mut graph = Graph::new();
191        graph.add_node(make_node("a.md"));
192        graph.add_node(make_node("b.md"));
193        graph.add_node(make_node("c.md"));
194        graph.add_edge(make_edge("a.md", "b.md"));
195        graph.add_edge(make_edge("b.md", "a.md"));
196        graph.add_edge(make_edge("b.md", "c.md"));
197        graph.add_edge(make_edge("c.md", "b.md"));
198        graph.add_edge(make_edge("a.md", "c.md"));
199        graph.add_edge(make_edge("c.md", "a.md"));
200
201        let config = Config::defaults();
202        let result = GraphStats.run(&make_ctx(&graph, &config));
203        assert_eq!(result.node_count, 3);
204        assert_eq!(result.edge_count, 6);
205        assert!((result.density - 1.0).abs() < 1e-10);
206        assert_eq!(result.diameter, Some(1));
207        assert_eq!(result.average_path_length, Some(1.0));
208    }
209
210    #[test]
211    fn cycle_has_diameter() {
212        let mut graph = Graph::new();
213        graph.add_node(make_node("a.md"));
214        graph.add_node(make_node("b.md"));
215        graph.add_node(make_node("c.md"));
216        graph.add_edge(make_edge("a.md", "b.md"));
217        graph.add_edge(make_edge("b.md", "c.md"));
218        graph.add_edge(make_edge("c.md", "a.md"));
219
220        let config = Config::defaults();
221        let result = GraphStats.run(&make_ctx(&graph, &config));
222        assert_eq!(result.diameter, Some(2));
223        assert!((result.average_path_length.unwrap() - 1.5).abs() < 1e-10);
224    }
225}