llmcc_core/
pagerank.rs

1use std::collections::HashMap;
2
3use crate::block::{BlockId, BlockKind, BlockRelation};
4use crate::graph::{ProjectGraph, UnitNode};
5
6/// Configuration options for PageRank algorithm.
7#[derive(Debug, Clone)]
8pub struct PageRankConfig {
9    /// Damping factor (typically 0.85).
10    pub damping_factor: f64,
11    /// Maximum number of iterations.
12    pub max_iterations: usize,
13    /// Convergence tolerance.
14    pub tolerance: f64,
15    /// Weight assigned to PageRank computed over `DependsOn` edges (foundational influence).
16    pub influence_weight: f64,
17    /// Weight assigned to PageRank computed over `DependedBy` edges (orchestration influence).
18    pub orchestration_weight: f64,
19}
20
21impl Default for PageRankConfig {
22    fn default() -> Self {
23        Self {
24            damping_factor: 0.85,
25            max_iterations: 100,
26            tolerance: 1e-6,
27            influence_weight: 0.2,
28            orchestration_weight: 0.8,
29        }
30    }
31}
32
33/// Result from a PageRank computation.
34#[derive(Debug, Clone)]
35pub struct RankedBlock {
36    pub node: UnitNode,
37    /// Blended score based on the configured influence/orchestration weights.
38    pub score: f64,
39    /// PageRank following `DependsOn` edges – highlights foundational building blocks.
40    pub influence_score: f64,
41    /// PageRank following `DependedBy` edges – highlights orchestrators/entry points.
42    pub orchestration_score: f64,
43    pub name: String,
44    pub kind: BlockKind,
45    pub file_path: Option<String>,
46}
47
48/// Result from ranking computation.
49#[derive(Debug)]
50pub struct RankingResult {
51    pub blocks: Vec<RankedBlock>,
52    pub iterations: usize,
53    pub converged: bool,
54}
55
56impl IntoIterator for RankingResult {
57    type Item = RankedBlock;
58    type IntoIter = std::vec::IntoIter<RankedBlock>;
59
60    fn into_iter(self) -> Self::IntoIter {
61        self.blocks.into_iter()
62    }
63}
64
65/// Computes PageRank scores over a [`ProjectGraph`].
66#[derive(Debug)]
67pub struct PageRanker<'graph, 'tcx> {
68    graph: &'graph ProjectGraph<'tcx>,
69    config: PageRankConfig,
70}
71
72impl<'graph, 'tcx> PageRanker<'graph, 'tcx> {
73    /// Create a new PageRanker with default configuration.
74    pub fn new(graph: &'graph ProjectGraph<'tcx>) -> Self {
75        Self {
76            graph,
77            config: PageRankConfig::default(),
78        }
79    }
80
81    /// Create a new PageRanker with custom configuration.
82    pub fn with_config(graph: &'graph ProjectGraph<'tcx>, config: PageRankConfig) -> Self {
83        Self { graph, config }
84    }
85
86    /// Compute PageRank and return results sorted by score (highest first).
87    ///
88    /// Uses a unified graph built from multiple edge types:
89    /// - **Influence edges** (A→B means B is foundational to A):
90    ///   - Calls: function calls another function
91    ///   - TypeOf: field/param/return uses a type
92    ///   - Implements: type implements a trait
93    /// - **Orchestration edges** (A→B means A orchestrates/uses B):
94    ///   - CalledBy: function is called by another
95    ///   - TypeFor: type is used by field/param/return
96    ///   - ImplementedBy: trait is implemented by types
97    pub fn rank(&self) -> RankingResult {
98        let entries = self.collect_entries();
99
100        if entries.is_empty() {
101            return RankingResult {
102                blocks: Vec::new(),
103                iterations: 0,
104                converged: true,
105            };
106        }
107
108        // Build unified adjacency graphs from multiple edge types
109        let adjacency_influence = self.build_unified_adjacency(
110            &entries,
111            &[
112                BlockRelation::Calls,      // func → func it calls
113                BlockRelation::TypeOf,     // field/param → type definition
114                BlockRelation::Implements, // type → trait it implements
115                BlockRelation::Uses,       // generic usage
116            ],
117        );
118        let adjacency_orchestration = self.build_unified_adjacency(
119            &entries,
120            &[
121                BlockRelation::CalledBy,      // func ← func that calls it
122                BlockRelation::TypeFor,       // type ← field/param that uses it
123                BlockRelation::ImplementedBy, // trait ← types that implement it
124                BlockRelation::UsedBy,        // generic usage
125            ],
126        );
127
128        // Compute PageRank for both directions
129        let (influence_scores, influence_iters, influence_converged) =
130            self.compute_pagerank(&adjacency_influence);
131        let (orchestration_scores, orchestration_iters, orchestration_converged) =
132            self.compute_pagerank(&adjacency_orchestration);
133
134        let total_weight = self.config.influence_weight + self.config.orchestration_weight;
135        let (influence_weight, orchestration_weight) = if total_weight <= f64::EPSILON {
136            (0.5, 0.5)
137        } else {
138            (
139                self.config.influence_weight / total_weight,
140                self.config.orchestration_weight / total_weight,
141            )
142        };
143
144        let blended_scores: Vec<f64> = influence_scores
145            .iter()
146            .zip(&orchestration_scores)
147            .map(|(influence, orchestration)| {
148                influence * influence_weight + orchestration * orchestration_weight
149            })
150            .collect();
151
152        let iterations = influence_iters.max(orchestration_iters);
153        let converged = influence_converged && orchestration_converged;
154
155        // Build ranked results
156        let mut ranked: Vec<RankedBlock> = entries
157            .into_iter()
158            .enumerate()
159            .map(|(idx, entry)| {
160                let display_name = entry
161                    .name
162                    .filter(|name| !name.is_empty())
163                    .unwrap_or_else(|| format!("{}:{}", entry.kind, entry.block_id.as_u32()));
164
165                RankedBlock {
166                    node: UnitNode {
167                        unit_index: entry.unit_index,
168                        block_id: entry.block_id,
169                    },
170                    score: blended_scores[idx],
171                    influence_score: influence_scores[idx],
172                    orchestration_score: orchestration_scores[idx],
173                    name: display_name,
174                    kind: entry.kind,
175                    file_path: entry.file_path,
176                }
177            })
178            .collect();
179
180        ranked.sort_by(|a, b| {
181            b.score
182                .partial_cmp(&a.score)
183                .unwrap_or(std::cmp::Ordering::Equal)
184        });
185
186        RankingResult {
187            blocks: ranked,
188            iterations,
189            converged,
190        }
191    }
192
193    /// Get top k blocks by PageRank score.
194    pub fn top_k(&self, k: usize) -> Vec<RankedBlock> {
195        self.rank().blocks.into_iter().take(k).collect()
196    }
197
198    /// Get all PageRank scores as a HashMap from BlockId to score.
199    /// Useful for aggregating scores by component (crate/module/file).
200    pub fn scores(&self) -> HashMap<BlockId, f64> {
201        self.rank()
202            .blocks
203            .into_iter()
204            .map(|rb| (rb.node.block_id, rb.score))
205            .collect()
206    }
207
208    fn compute_pagerank(&self, adjacency: &[Vec<usize>]) -> (Vec<f64>, usize, bool) {
209        let n = adjacency.len();
210        let mut ranks = vec![1.0 / n as f64; n];
211        let mut next_ranks = vec![0.0; n];
212        let damping = self.config.damping_factor;
213        let teleport = (1.0 - damping) / n as f64;
214
215        let mut iterations = 0;
216        let mut converged = false;
217
218        for iter in 0..self.config.max_iterations {
219            iterations = iter + 1;
220            next_ranks.fill(teleport);
221
222            let mut sink_mass = 0.0;
223            for (idx, neighbors) in adjacency.iter().enumerate() {
224                if neighbors.is_empty() {
225                    sink_mass += ranks[idx];
226                } else {
227                    let share = ranks[idx] * damping / neighbors.len() as f64;
228                    for &target_idx in neighbors {
229                        next_ranks[target_idx] += share;
230                    }
231                }
232            }
233
234            if sink_mass > 0.0 {
235                let redistributed = sink_mass * damping / n as f64;
236                for value in &mut next_ranks {
237                    *value += redistributed;
238                }
239            }
240
241            let delta: f64 = next_ranks
242                .iter()
243                .zip(&ranks)
244                .map(|(new, old)| (new - old).abs())
245                .sum();
246
247            ranks.copy_from_slice(&next_ranks);
248
249            if delta < self.config.tolerance {
250                converged = true;
251                break;
252            }
253        }
254
255        (ranks, iterations, converged)
256    }
257
258    fn collect_entries(&self) -> Vec<BlockEntry> {
259        let mut raw_entries: Vec<(BlockId, usize, Option<String>, BlockKind)> =
260            self.graph.cc.get_all_blocks();
261
262        raw_entries.sort_by_key(|(block_id, ..)| block_id.as_u32());
263
264        raw_entries
265            .into_iter()
266            .map(|(block_id, unit_index, name, kind)| {
267                let file_path = self
268                    .graph
269                    .cc
270                    .files
271                    .get(unit_index)
272                    .and_then(|file| file.path().map(|path| path.to_string()));
273
274                BlockEntry {
275                    block_id,
276                    unit_index,
277                    name,
278                    kind,
279                    file_path,
280                }
281            })
282            .collect()
283    }
284
285    /// Build unified adjacency from multiple relation types.
286    fn build_unified_adjacency(
287        &self,
288        entries: &[BlockEntry],
289        relations: &[BlockRelation],
290    ) -> Vec<Vec<usize>> {
291        let mut adjacency: Vec<Vec<usize>> = vec![Vec::new(); entries.len()];
292        let mut index_by_block: HashMap<BlockId, usize> = HashMap::new();
293
294        for (idx, entry) in entries.iter().enumerate() {
295            index_by_block.insert(entry.block_id, idx);
296        }
297
298        for (idx, entry) in entries.iter().enumerate() {
299            let mut all_targets = Vec::new();
300
301            for &relation in relations {
302                let targets = self
303                    .graph
304                    .cc
305                    .related_map
306                    .get_related(entry.block_id, relation);
307                all_targets.extend(targets);
308            }
309
310            let mut filtered: Vec<usize> = all_targets
311                .into_iter()
312                .filter_map(|dep_id| index_by_block.get(&dep_id).copied())
313                .filter(|target_idx| *target_idx != idx)
314                .collect();
315
316            filtered.sort_unstable();
317            filtered.dedup();
318            adjacency[idx] = filtered;
319        }
320
321        adjacency
322    }
323}
324
325#[derive(Debug, Clone)]
326struct BlockEntry {
327    block_id: BlockId,
328    unit_index: usize,
329    name: Option<String>,
330    kind: BlockKind,
331    file_path: Option<String>,
332}