llmcc_core/
pagerank.rs

1use std::collections::HashMap;
2
3use crate::block::{BlockId, BlockKind, BlockRelation};
4use crate::graph_builder::{GraphNode, ProjectGraph};
5
6/// Edge direction for PageRank traversal.
7#[derive(Debug, Clone, Copy, PartialEq, Eq)]
8pub enum PageRankDirection {
9    /// Follow DependsOn edges: rank flows toward heavily-depended-upon nodes (data types).
10    DependsOn,
11    /// Follow DependedBy edges: rank flows toward orchestrators that many nodes depend on.
12    DependedBy,
13}
14
15impl Default for PageRankDirection {
16    fn default() -> Self {
17        // Default to DependedBy to surface orchestrators and coordinators
18        // (more useful for understanding codebase behavior)
19        Self::DependedBy
20    }
21}
22
23/// Configuration options for the PageRank algorithm.
24#[derive(Debug, Clone)]
25pub struct PageRankConfig {
26    /// Damping factor used when random walkers jump to a random node.
27    pub damping_factor: f64,
28    /// Maximum number of iterations to run the algorithm.
29    pub max_iterations: usize,
30    /// Convergence tolerance (L1 delta between successive rank vectors).
31    pub tolerance: f64,
32    /// Which edge relation to consider when traversing the graph.
33    pub relation: BlockRelation,
34    /// Edge direction: follow DependsOn or DependedBy edges.
35    pub direction: PageRankDirection,
36}
37
38impl Default for PageRankConfig {
39    fn default() -> Self {
40        Self {
41            damping_factor: 0.85,
42            max_iterations: 100,
43            tolerance: 1e-6,
44            relation: BlockRelation::DependsOn,
45            direction: PageRankDirection::default(),
46        }
47    }
48}
49
50/// Result entry produced by PageRank containing metadata about the block.
51#[derive(Debug, Clone)]
52pub struct RankedBlock {
53    pub node: GraphNode,
54    pub score: f64,
55    pub name: String,
56    pub kind: BlockKind,
57    pub file_path: Option<String>,
58}
59
60/// Computes PageRank scores over a [`ProjectGraph`].
61#[derive(Debug)]
62pub struct PageRanker<'graph, 'tcx> {
63    graph: &'graph ProjectGraph<'tcx>,
64    config: PageRankConfig,
65}
66
67impl<'graph, 'tcx> PageRanker<'graph, 'tcx> {
68    /// Create a new PageRanker using the default configuration.
69    pub fn new(graph: &'graph ProjectGraph<'tcx>) -> Self {
70        Self {
71            graph,
72            config: PageRankConfig::default(),
73        }
74    }
75
76    /// Create a new PageRanker with a custom configuration.
77    pub fn with_config(graph: &'graph ProjectGraph<'tcx>, config: PageRankConfig) -> Self {
78        Self { graph, config }
79    }
80
81    /// Borrow the current configuration.
82    pub fn config(&self) -> &PageRankConfig {
83        &self.config
84    }
85
86    /// Mutably borrow the current configuration for in-place adjustments.
87    pub fn config_mut(&mut self) -> &mut PageRankConfig {
88        &mut self.config
89    }
90
91    /// Compute PageRank scores for all blocks in the project and return them in descending order.
92    pub fn rank(&self) -> Vec<RankedBlock> {
93        let mut entries = self.collect_entries();
94        if entries.is_empty() {
95            return Vec::new();
96        }
97
98        let mut outgoing = self.build_adjacency(&entries);
99
100        // Remove isolated nodes with neither incoming nor outgoing edges so they don't skew scoring.
101        let mut incoming_counts = vec![0usize; outgoing.len()];
102        for neighbours in &outgoing {
103            for &target_idx in neighbours {
104                incoming_counts[target_idx] += 1;
105            }
106        }
107
108        let mut any_filtered = false;
109        let mut keep_mask = Vec::with_capacity(entries.len());
110        for (idx, neighbours) in outgoing.iter().enumerate() {
111            let has_outgoing = !neighbours.is_empty();
112            let has_incoming = incoming_counts[idx] > 0;
113            let keep = has_outgoing || has_incoming;
114            if !keep {
115                any_filtered = true;
116            }
117            keep_mask.push(keep);
118        }
119
120        if any_filtered {
121            entries = entries
122                .into_iter()
123                .enumerate()
124                .filter_map(|(idx, entry)| keep_mask[idx].then_some(entry))
125                .collect();
126
127            if entries.is_empty() {
128                return Vec::new();
129            }
130
131            outgoing = self.build_adjacency(&entries);
132        }
133
134        let total_nodes = entries.len();
135        let mut ranks = vec![1.0 / total_nodes as f64; total_nodes];
136        let mut next_ranks = vec![0.0; total_nodes];
137        let damping = self.config.damping_factor;
138        let teleport = (1.0 - damping) / total_nodes as f64;
139
140        for _ in 0..self.config.max_iterations {
141            next_ranks.fill(teleport);
142
143            let mut sink_mass = 0.0;
144            for (idx, neighbours) in outgoing.iter().enumerate() {
145                if neighbours.is_empty() {
146                    sink_mass += ranks[idx];
147                    continue;
148                }
149
150                let share = ranks[idx] * damping / neighbours.len() as f64;
151                for &target_idx in neighbours {
152                    next_ranks[target_idx] += share;
153                }
154            }
155
156            if sink_mass > 0.0 {
157                let redistributed = sink_mass * damping / total_nodes as f64;
158                for value in &mut next_ranks {
159                    *value += redistributed;
160                }
161            }
162
163            let delta: f64 = next_ranks
164                .iter()
165                .zip(&ranks)
166                .map(|(new_score, old_score)| (new_score - old_score).abs())
167                .sum();
168
169            ranks.copy_from_slice(&next_ranks);
170
171            if delta < self.config.tolerance {
172                break;
173            }
174        }
175
176        // Apply block-kind weighting: favor Classes and Funcs over data types
177        fn kind_weight(kind: BlockKind) -> f64 {
178            match kind {
179                BlockKind::Class | BlockKind::Func | BlockKind::Impl => 2.0,
180                BlockKind::Enum | BlockKind::Const | BlockKind::Field => 1.0,
181                _ => 1.0,
182            }
183        }
184
185        let mut ranked: Vec<RankedBlock> = entries
186            .into_iter()
187            .enumerate()
188            .map(|(idx, entry)| {
189                let BlockEntry {
190                    block_id,
191                    unit_index,
192                    name,
193                    kind,
194                    file_path,
195                } = entry;
196
197                let display_name = name
198                    .filter(|name| !name.is_empty())
199                    .unwrap_or_else(|| format!("{}:{}", kind, block_id.as_u32()));
200
201                // Apply kind weight multiplier to the raw rank score
202                let weighted_score = ranks[idx] * kind_weight(kind);
203
204                RankedBlock {
205                    node: GraphNode {
206                        unit_index,
207                        block_id,
208                    },
209                    score: weighted_score,
210                    name: display_name,
211                    kind,
212                    file_path,
213                }
214            })
215            .collect();
216
217        ranked.sort_by(|a, b| {
218            b.score
219                .partial_cmp(&a.score)
220                .unwrap_or(std::cmp::Ordering::Equal)
221        });
222        ranked
223    }
224
225    /// Convenience helper returning only the top `k` ranked blocks.
226    pub fn top_k(&self, k: usize) -> Vec<RankedBlock> {
227        let mut ranked = self.rank();
228        if ranked.len() > k {
229            ranked.truncate(k);
230        }
231        ranked
232    }
233
234    /// Convenience helper returning blocks whose score is above the provided threshold.
235    pub fn above_threshold(&self, threshold: f64) -> Vec<RankedBlock> {
236        self.rank()
237            .into_iter()
238            .filter(|block| block.score >= threshold)
239            .collect()
240    }
241
242    fn collect_entries(&self) -> Vec<BlockEntry> {
243        let mut raw_entries: Vec<(BlockId, usize, Option<String>, BlockKind)> = {
244            let indexes = self.graph.cc.block_indexes.borrow();
245            indexes
246                .block_id_index
247                .iter()
248                .map(|(&block_id, (unit_index, name, kind))| {
249                    (block_id, *unit_index, name.clone(), *kind)
250                })
251                .collect()
252        };
253
254        raw_entries.sort_by_key(|(block_id, ..)| block_id.as_u32());
255
256        raw_entries
257            .into_iter()
258            .map(|(block_id, unit_index, name, kind)| {
259                let file_path = self
260                    .graph
261                    .cc
262                    .files
263                    .get(unit_index)
264                    .and_then(|file| file.path().map(|path| path.to_string()));
265
266                BlockEntry {
267                    block_id,
268                    unit_index,
269                    name,
270                    kind,
271                    file_path,
272                }
273            })
274            .collect()
275    }
276
277    fn build_adjacency(&self, entries: &[BlockEntry]) -> Vec<Vec<usize>> {
278        let mut adjacency: Vec<Vec<usize>> = vec![Vec::new(); entries.len()];
279        let mut index_by_block: HashMap<BlockId, usize> = HashMap::new();
280
281        for (idx, entry) in entries.iter().enumerate() {
282            index_by_block.insert(entry.block_id, idx);
283        }
284
285        for (idx, entry) in entries.iter().enumerate() {
286            let Some(unit_graph) = self.graph.unit_graph(entry.unit_index) else {
287                continue;
288            };
289
290            // Use the configured direction: follow DependsOn or DependedBy edges
291            let relation_to_follow = match self.config.direction {
292                PageRankDirection::DependsOn => self.config.relation,
293                PageRankDirection::DependedBy => {
294                    // Reverse the relation: if we want DependedBy, follow the reverse of DependsOn
295                    match self.config.relation {
296                        BlockRelation::DependsOn => BlockRelation::DependedBy,
297                        BlockRelation::DependedBy => BlockRelation::DependsOn,
298                        _ => self.config.relation,
299                    }
300                }
301            };
302
303            let mut targets = unit_graph
304                .edges()
305                .get_related(entry.block_id, relation_to_follow)
306                .into_iter()
307                .filter_map(|dep_id| index_by_block.get(&dep_id).copied())
308                .filter(|target_idx| *target_idx != idx)
309                .collect::<Vec<_>>();
310
311            targets.sort_unstable();
312            targets.dedup();
313            adjacency[idx] = targets;
314        }
315
316        adjacency
317    }
318}
319
320#[derive(Debug, Clone)]
321struct BlockEntry {
322    block_id: BlockId,
323    unit_index: usize,
324    name: Option<String>,
325    kind: BlockKind,
326    file_path: Option<String>,
327}