Skip to main content

arbor_graph/
ranking.rs

1//! Centrality ranking for code nodes.
2//!
3//! We use a production-aware PageRank variant: callers from test files
4//! contribute 10x less weight than production callers, so utility functions
5//! called heavily by tests don't false-inflate centrality scores.
6
7use crate::graph::{ArborGraph, NodeId};
8use std::collections::HashMap;
9
10/// Stores centrality scores after computation.
11#[derive(Debug, Default)]
12pub struct CentralityScores {
13    scores: HashMap<NodeId, f64>,
14}
15
16impl CentralityScores {
17    /// Gets the score for a node.
18    pub fn get(&self, id: NodeId) -> f64 {
19        self.scores.get(&id).copied().unwrap_or(0.0)
20    }
21
22    /// Converts to a HashMap for storage in the graph.
23    pub fn into_map(self) -> HashMap<NodeId, f64> {
24        self.scores
25    }
26}
27
28/// Returns true if this file path is a test/spec/fixture file.
29/// Callers from test files get de-weighted 10x so test utilities don't
30/// false-inflate their centrality scores vs. production callers.
31fn is_test_file(file: &str) -> bool {
32    let lower = file.to_lowercase();
33    lower.contains("/test")
34        || lower.contains("\\test")
35        || lower.contains("/spec")
36        || lower.contains("\\spec")
37        || lower.contains("__test__")
38        || lower.contains("_test.")
39        || lower.contains(".test.")
40        || lower.contains(".spec.")
41        || lower.contains("/fixture")
42        || lower.contains("/mock")
43        || lower.contains("/stub")
44        || lower.contains("/fake")
45        || lower.ends_with("_test.go")
46        || lower.ends_with("_test.py")
47        || lower.ends_with("_test.rs")
48        || lower.ends_with("test.ts")
49        || lower.ends_with("test.js")
50}
51
52/// Computes production-aware centrality scores for all nodes in the graph.
53///
54/// Uses a modified PageRank where:
55/// 1. Nodes initialize with equal score
56/// 2. Each iteration distributes scores along edges
57/// 3. Callers from test/spec/fixture files contribute 10x less weight
58///    — prevents test utilities from appearing more central than production code
59/// 4. Scores are normalized to [0.0, 1.0]
60///
61/// # Arguments
62///
63/// * `graph` - The graph to analyze
64/// * `iterations` - Number of iterations (10-20 is usually enough)
65/// * `damping` - Damping factor (0.85 is standard)
66pub fn compute_centrality(graph: &ArborGraph, iterations: usize, damping: f64) -> CentralityScores {
67    let node_count = graph.node_count();
68    if node_count == 0 {
69        return CentralityScores::default();
70    }
71
72    // Initialize scores
73    let initial_score = 1.0 / node_count as f64;
74    let mut scores: HashMap<NodeId, f64> = graph
75        .node_indexes()
76        .map(|idx| (idx, initial_score))
77        .collect();
78
79    // Count outgoing edges for each node
80    let mut out_degree: HashMap<NodeId, usize> = HashMap::new();
81    for idx in graph.node_indexes() {
82        let callees = graph.get_callees(idx);
83        out_degree.insert(idx, callees.len().max(1));
84    }
85
86    // Iterate
87    for _ in 0..iterations {
88        let mut new_scores: HashMap<NodeId, f64> = HashMap::new();
89
90        for idx in graph.node_indexes() {
91            let base = (1.0 - damping) / node_count as f64;
92
93            let callers = graph.get_callers(idx);
94            let incoming: f64 = callers
95                .iter()
96                .filter_map(|caller| {
97                    let caller_idx = graph.get_index(&caller.id)?;
98                    let caller_score = scores.get(&caller_idx)?;
99                    let caller_out = *out_degree.get(&caller_idx)? as f64;
100
101                    // Test callers contribute 10% weight — they inflate utility functions
102                    // but don't represent real production blast radius
103                    let caller_node = graph.get(caller_idx)?;
104                    let weight = if is_test_file(&caller_node.file) {
105                        0.1
106                    } else {
107                        1.0
108                    };
109
110                    Some(weight * caller_score / caller_out)
111                })
112                .sum();
113
114            new_scores.insert(idx, base + damping * incoming);
115        }
116
117        scores = new_scores;
118    }
119
120    // Normalize to [0, 1] range
121    let max_score = scores.values().cloned().fold(0.0f64, f64::max);
122    if max_score > 0.0 {
123        for score in scores.values_mut() {
124            *score /= max_score;
125        }
126    }
127
128    CentralityScores { scores }
129}
130
131#[cfg(test)]
132mod tests {
133    use super::*;
134    use crate::edge::{Edge, EdgeKind};
135    use arbor_core::{CodeNode, NodeKind};
136
137    #[test]
138    fn test_centrality_empty_graph() {
139        let graph = ArborGraph::new();
140        let scores = compute_centrality(&graph, 10, 0.85);
141        assert!(scores.scores.is_empty());
142    }
143
144    #[test]
145    fn test_centrality_single_node() {
146        let mut graph = ArborGraph::new();
147        let node = CodeNode::new("foo", "foo", NodeKind::Function, "test.rs");
148        graph.add_node(node);
149
150        let scores = compute_centrality(&graph, 10, 0.85);
151        assert_eq!(scores.scores.len(), 1);
152    }
153
154    #[test]
155    fn test_centrality_popular_node_ranks_higher() {
156        let mut graph = ArborGraph::new();
157
158        // Create a "popular" function called by many others
159        let popular = CodeNode::new("popular", "popular", NodeKind::Function, "test.rs");
160        let popular_idx = graph.add_node(popular);
161
162        // Create callers
163        for i in 0..5 {
164            let caller = CodeNode::new(
165                format!("caller{}", i),
166                format!("caller{}", i),
167                NodeKind::Function,
168                "test.rs",
169            );
170            let caller_idx = graph.add_node(caller);
171            graph.add_edge(caller_idx, popular_idx, Edge::new(EdgeKind::Calls));
172        }
173
174        let scores = compute_centrality(&graph, 20, 0.85);
175
176        // The popular node should have the highest score
177        let popular_score = scores.get(popular_idx);
178        assert!(popular_score > 0.5, "Popular node should rank high");
179    }
180}