Skip to main content

sdivi_graph/
metrics.rs

1//! Graph metric computation for [`DependencyGraph`].
2
3use std::path::PathBuf;
4
5use petgraph::graph::NodeIndex;
6use petgraph::visit::EdgeRef;
7use serde::{Deserialize, Serialize};
8
9use crate::dependency_graph::DependencyGraph;
10
11/// Summary metrics for a [`DependencyGraph`].
12///
13/// # Examples
14///
15/// ```rust
16/// use sdivi_graph::dependency_graph::build_dependency_graph_from_edges;
17/// use sdivi_graph::metrics::compute_metrics;
18///
19/// let dg = build_dependency_graph_from_edges(&[], &[]);
20/// let m = compute_metrics(&dg);
21/// assert_eq!(m.node_count, 0);
22/// assert_eq!(m.edge_count, 0);
23/// ```
24#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
25pub struct GraphMetrics {
26    /// Total number of nodes (source files).
27    pub node_count: usize,
28    /// Total number of directed edges.
29    pub edge_count: usize,
30    /// Graph density: `edge_count / (node_count * (node_count - 1))`.
31    /// Zero when `node_count < 2`.
32    pub density: f64,
33    /// Number of directed cycles detected via DFS; self-loops excluded.
34    pub cycle_count: usize,
35    /// Nodes with the highest out-degree, sorted descending.
36    /// Contains at most `TOP_HUB_COUNT` entries.
37    pub top_hubs: Vec<(PathBuf, usize)>,
38    /// Number of weakly connected components.
39    pub component_count: usize,
40}
41
42/// Maximum number of hubs to report.
43const TOP_HUB_COUNT: usize = 10;
44
45/// Computes [`GraphMetrics`] for a [`DependencyGraph`].
46///
47/// Cycle detection excludes self-loops. Connected components are computed as
48/// weakly connected (direction ignored) via Kosaraju's algorithm on the
49/// underlying directed graph.
50pub fn compute_metrics(dg: &DependencyGraph) -> GraphMetrics {
51    let n = dg.node_count();
52    let e = dg.edge_count();
53
54    let density = if n >= 2 {
55        e as f64 / (n * (n - 1)) as f64
56    } else {
57        0.0
58    };
59
60    let top_hubs = compute_top_hubs(dg);
61    let cycle_count = count_cycles(dg);
62    let component_count = compute_component_count(dg);
63
64    GraphMetrics {
65        node_count: n,
66        edge_count: e,
67        density,
68        cycle_count,
69        top_hubs,
70        component_count,
71    }
72}
73
74/// Returns the top-`TOP_HUB_COUNT` nodes by out-degree, sorted descending.
75fn compute_top_hubs(dg: &DependencyGraph) -> Vec<(PathBuf, usize)> {
76    let mut degrees: Vec<(PathBuf, usize)> = (0..dg.node_count())
77        .filter_map(|idx| {
78            let ni = NodeIndex::new(idx);
79            let path = dg.graph.node_weight(ni)?.clone();
80            let out_deg = dg.graph.edges(ni).count();
81            Some((path, out_deg))
82        })
83        .collect();
84
85    degrees.sort_by(|a, b| b.1.cmp(&a.1).then_with(|| a.0.cmp(&b.0)));
86    degrees.truncate(TOP_HUB_COUNT);
87    degrees
88}
89
90/// Counts directed cycles, excluding self-loops.
91///
92/// Uses a DFS-based back-edge detector. Each back edge found during DFS
93/// contributes one cycle count.
94fn count_cycles(dg: &DependencyGraph) -> usize {
95    let n = dg.node_count();
96    if n == 0 {
97        return 0;
98    }
99
100    let mut visited = vec![false; n];
101    let mut on_stack = vec![false; n];
102    let mut cycles = 0;
103
104    for start in 0..n {
105        if !visited[start] {
106            dfs_cycle_count(dg, start, &mut visited, &mut on_stack, &mut cycles);
107        }
108    }
109    cycles
110}
111
112/// DFS traversal that counts back edges (each = one cycle).
113fn dfs_cycle_count(
114    dg: &DependencyGraph,
115    node: usize,
116    visited: &mut Vec<bool>,
117    on_stack: &mut Vec<bool>,
118    cycles: &mut usize,
119) {
120    visited[node] = true;
121    on_stack[node] = true;
122
123    let ni = NodeIndex::new(node);
124    // Collect neighbors into a sorted Vec for deterministic traversal.
125    let mut neighbors: Vec<usize> = dg
126        .graph
127        .edges(ni)
128        .filter_map(|e| {
129            let tgt = e.target().index();
130            // Skip self-loops.
131            if tgt != node {
132                Some(tgt)
133            } else {
134                None
135            }
136        })
137        .collect();
138    neighbors.sort_unstable();
139
140    for neighbor in neighbors {
141        if !visited[neighbor] {
142            dfs_cycle_count(dg, neighbor, visited, on_stack, cycles);
143        } else if on_stack[neighbor] {
144            *cycles += 1;
145        }
146    }
147
148    on_stack[node] = false;
149}
150
151/// Counts weakly connected components via union-find on the undirected edge set.
152fn compute_component_count(dg: &DependencyGraph) -> usize {
153    let n = dg.node_count();
154    if n == 0 {
155        return 0;
156    }
157    compute_wcc_count(dg, n)
158}
159
160/// Counts weakly connected components via union-find.
161fn compute_wcc_count(dg: &DependencyGraph, n: usize) -> usize {
162    let mut parent: Vec<usize> = (0..n).collect();
163
164    fn find(parent: &mut Vec<usize>, x: usize) -> usize {
165        if parent[x] != x {
166            parent[x] = find(parent, parent[x]);
167        }
168        parent[x]
169    }
170
171    fn union(parent: &mut Vec<usize>, x: usize, y: usize) {
172        let rx = find(parent, x);
173        let ry = find(parent, y);
174        if rx != ry {
175            parent[rx] = ry;
176        }
177    }
178
179    for (from, to) in dg.edges_as_pairs() {
180        union(&mut parent, from, to);
181    }
182
183    let mut roots = std::collections::BTreeSet::new();
184    for i in 0..n {
185        roots.insert(find(&mut parent, i));
186    }
187    roots.len()
188}