llmcc_core/
pagerank.rs

1use std::collections::HashMap;
2
3use crate::block::{BlockId, BlockKind, BlockRelation};
4use crate::graph_builder::{GraphNode, ProjectGraph};
5
6/// Configuration options for PageRank algorithm.
7#[derive(Debug, Clone)]
8pub struct PageRankConfig {
9    /// Damping factor (typically 0.85).
10    pub damping_factor: f64,
11    /// Maximum number of iterations.
12    pub max_iterations: usize,
13    /// Convergence tolerance.
14    pub tolerance: f64,
15    /// Weight assigned to PageRank computed over `DependsOn` edges (foundational influence).
16    pub influence_weight: f64,
17    /// Weight assigned to PageRank computed over `DependedBy` edges (orchestration influence).
18    pub orchestration_weight: f64,
19}
20
21impl Default for PageRankConfig {
22    fn default() -> Self {
23        Self {
24            damping_factor: 0.85,
25            max_iterations: 100,
26            tolerance: 1e-6,
27            influence_weight: 0.2,
28            orchestration_weight: 0.8,
29        }
30    }
31}
32
33/// Result from a PageRank computation.
34#[derive(Debug, Clone)]
35pub struct RankedBlock {
36    pub node: GraphNode,
37    /// Blended score based on the configured influence/orchestration weights.
38    pub score: f64,
39    /// PageRank following `DependsOn` edges – highlights foundational building blocks.
40    pub influence_score: f64,
41    /// PageRank following `DependedBy` edges – highlights orchestrators/entry points.
42    pub orchestration_score: f64,
43    pub name: String,
44    pub kind: BlockKind,
45    pub file_path: Option<String>,
46}
47
48/// Result from ranking computation.
49#[derive(Debug)]
50pub struct RankingResult {
51    pub blocks: Vec<RankedBlock>,
52    pub iterations: usize,
53    pub converged: bool,
54}
55
56impl IntoIterator for RankingResult {
57    type Item = RankedBlock;
58    type IntoIter = std::vec::IntoIter<RankedBlock>;
59
60    fn into_iter(self) -> Self::IntoIter {
61        self.blocks.into_iter()
62    }
63}
64
65/// Computes PageRank scores over a [`ProjectGraph`].
66#[derive(Debug)]
67pub struct PageRanker<'graph, 'tcx> {
68    graph: &'graph ProjectGraph<'tcx>,
69    config: PageRankConfig,
70}
71
72impl<'graph, 'tcx> PageRanker<'graph, 'tcx> {
73    /// Create a new PageRanker with default configuration.
74    pub fn new(graph: &'graph ProjectGraph<'tcx>) -> Self {
75        Self {
76            graph,
77            config: PageRankConfig::default(),
78        }
79    }
80
81    /// Create a new PageRanker with custom configuration.
82    pub fn with_config(graph: &'graph ProjectGraph<'tcx>, config: PageRankConfig) -> Self {
83        Self { graph, config }
84    }
85
86    /// Compute PageRank and return results sorted by score (highest first).
87    pub fn rank(&self) -> RankingResult {
88        let entries = self.collect_entries();
89
90        if entries.is_empty() {
91            return RankingResult {
92                blocks: Vec::new(),
93                iterations: 0,
94                converged: true,
95            };
96        }
97
98        let adjacency_depends = self.build_adjacency(&entries, BlockRelation::DependsOn);
99        let adjacency_depended = self.build_adjacency(&entries, BlockRelation::DependedBy);
100
101        // Compute PageRank for both dependency directions.
102        let (depends_scores, depends_iterations, depends_converged) =
103            self.compute_pagerank(&adjacency_depends);
104        let (depended_scores, depended_iterations, depended_converged) =
105            self.compute_pagerank(&adjacency_depended);
106
107        let total_weight = self.config.influence_weight + self.config.orchestration_weight;
108        let (influence_weight, orchestration_weight) = if total_weight <= f64::EPSILON {
109            (0.5, 0.5)
110        } else {
111            (
112                self.config.influence_weight / total_weight,
113                self.config.orchestration_weight / total_weight,
114            )
115        };
116
117        let blended_scores: Vec<f64> = depends_scores
118            .iter()
119            .zip(&depended_scores)
120            .map(|(influence, orchestration)| {
121                influence * influence_weight + orchestration * orchestration_weight
122            })
123            .collect();
124
125        let iterations = depends_iterations.max(depended_iterations);
126        let converged = depends_converged && depended_converged;
127
128        // Build ranked results
129        let mut ranked: Vec<RankedBlock> = entries
130            .into_iter()
131            .enumerate()
132            .map(|(idx, entry)| {
133                let display_name = entry
134                    .name
135                    .filter(|name| !name.is_empty())
136                    .unwrap_or_else(|| format!("{}:{}", entry.kind, entry.block_id.as_u32()));
137
138                RankedBlock {
139                    node: GraphNode {
140                        unit_index: entry.unit_index,
141                        block_id: entry.block_id,
142                    },
143                    score: blended_scores[idx],
144                    influence_score: depends_scores[idx],
145                    orchestration_score: depended_scores[idx],
146                    name: display_name,
147                    kind: entry.kind,
148                    file_path: entry.file_path,
149                }
150            })
151            .collect();
152
153        ranked.sort_by(|a, b| {
154            b.score
155                .partial_cmp(&a.score)
156                .unwrap_or(std::cmp::Ordering::Equal)
157        });
158
159        RankingResult {
160            blocks: ranked,
161            iterations,
162            converged,
163        }
164    }
165
166    /// Get top k blocks by PageRank score.
167    pub fn top_k(&self, k: usize) -> Vec<RankedBlock> {
168        self.rank().blocks.into_iter().take(k).collect()
169    }
170
171    fn compute_pagerank(&self, adjacency: &[Vec<usize>]) -> (Vec<f64>, usize, bool) {
172        let n = adjacency.len();
173        let mut ranks = vec![1.0 / n as f64; n];
174        let mut next_ranks = vec![0.0; n];
175        let damping = self.config.damping_factor;
176        let teleport = (1.0 - damping) / n as f64;
177
178        let mut iterations = 0;
179        let mut converged = false;
180
181        for iter in 0..self.config.max_iterations {
182            iterations = iter + 1;
183            next_ranks.fill(teleport);
184
185            let mut sink_mass = 0.0;
186            for (idx, neighbors) in adjacency.iter().enumerate() {
187                if neighbors.is_empty() {
188                    sink_mass += ranks[idx];
189                } else {
190                    let share = ranks[idx] * damping / neighbors.len() as f64;
191                    for &target_idx in neighbors {
192                        next_ranks[target_idx] += share;
193                    }
194                }
195            }
196
197            if sink_mass > 0.0 {
198                let redistributed = sink_mass * damping / n as f64;
199                for value in &mut next_ranks {
200                    *value += redistributed;
201                }
202            }
203
204            let delta: f64 = next_ranks
205                .iter()
206                .zip(&ranks)
207                .map(|(new, old)| (new - old).abs())
208                .sum();
209
210            ranks.copy_from_slice(&next_ranks);
211
212            if delta < self.config.tolerance {
213                converged = true;
214                break;
215            }
216        }
217
218        (ranks, iterations, converged)
219    }
220
221    fn collect_entries(&self) -> Vec<BlockEntry> {
222        let mut raw_entries: Vec<(BlockId, usize, Option<String>, BlockKind)> = {
223            let indexes = self.graph.cc.block_indexes.borrow();
224            indexes
225                .block_id_index
226                .iter()
227                .map(|(&block_id, (unit_index, name, kind))| {
228                    (block_id, *unit_index, name.clone(), *kind)
229                })
230                .collect()
231        };
232
233        raw_entries.sort_by_key(|(block_id, ..)| block_id.as_u32());
234
235        raw_entries
236            .into_iter()
237            .map(|(block_id, unit_index, name, kind)| {
238                let file_path = self
239                    .graph
240                    .cc
241                    .files
242                    .get(unit_index)
243                    .and_then(|file| file.path().map(|path| path.to_string()));
244
245                BlockEntry {
246                    block_id,
247                    unit_index,
248                    name,
249                    kind,
250                    file_path,
251                }
252            })
253            .collect()
254    }
255
256    fn build_adjacency(&self, entries: &[BlockEntry], relation: BlockRelation) -> Vec<Vec<usize>> {
257        let mut adjacency: Vec<Vec<usize>> = vec![Vec::new(); entries.len()];
258        let mut index_by_block: HashMap<BlockId, usize> = HashMap::new();
259
260        for (idx, entry) in entries.iter().enumerate() {
261            index_by_block.insert(entry.block_id, idx);
262        }
263
264        for (idx, entry) in entries.iter().enumerate() {
265            let Some(unit_graph) = self.graph.unit_graph(entry.unit_index) else {
266                continue;
267            };
268
269            let mut targets = unit_graph
270                .edges()
271                .get_related(entry.block_id, relation)
272                .into_iter()
273                .filter_map(|dep_id| index_by_block.get(&dep_id).copied())
274                .filter(|target_idx| *target_idx != idx)
275                .collect::<Vec<_>>();
276
277            targets.sort_unstable();
278            targets.dedup();
279            adjacency[idx] = targets;
280        }
281
282        adjacency
283    }
284}
285#[derive(Debug, Clone)]
286struct BlockEntry {
287    block_id: BlockId,
288    unit_index: usize,
289    name: Option<String>,
290    kind: BlockKind,
291    file_path: Option<String>,
292}