1use std::collections::HashMap;
27use rayon::prelude::*;
28use serde::{Deserialize, Serialize};
29use scribe_core::{Result, error::ScribeError};
30
31use crate::graph::{DependencyGraph, NodeId, GraphStatistics};
32
33#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
35pub struct PageRankResults {
36 pub scores: HashMap<NodeId, f64>,
38
39 pub iterations_converged: usize,
41
42 pub convergence_epsilon: f64,
44
45 pub graph_stats: GraphStatistics,
47
48 pub parameters: PageRankConfig,
50
51 pub performance_metrics: PerformanceMetrics,
53}
54
55#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
57pub struct PageRankConfig {
58 pub damping_factor: f64,
60
61 pub max_iterations: usize,
63
64 pub epsilon: f64,
66
67 pub use_parallel: bool,
69
70 pub min_score_threshold: f64,
72}
73
74impl Default for PageRankConfig {
75 fn default() -> Self {
76 Self {
77 damping_factor: 0.85, max_iterations: 50, epsilon: 1e-6, use_parallel: true, min_score_threshold: 1e-8, }
83 }
84}
85
86impl PageRankConfig {
87 pub fn for_code_analysis() -> Self {
89 Self {
90 damping_factor: 0.85, max_iterations: 30, epsilon: 1e-5, use_parallel: true,
94 min_score_threshold: 1e-6, }
96 }
97
98 pub fn for_large_codebases() -> Self {
100 Self {
101 damping_factor: 0.85,
102 max_iterations: 20, epsilon: 1e-4, use_parallel: true,
105 min_score_threshold: 1e-5, }
107 }
108
109 pub fn for_research() -> Self {
111 Self {
112 damping_factor: 0.85,
113 max_iterations: 100, epsilon: 1e-8, use_parallel: true,
116 min_score_threshold: 0.0, }
118 }
119
120 pub fn validate(&self) -> Result<()> {
122 if self.damping_factor < 0.0 || self.damping_factor >= 1.0 {
123 return Err(ScribeError::invalid_operation(
124 "Damping factor must be in range [0, 1)".to_string(),
125 "pagerank_config_validation".to_string()
126 ));
127 }
128
129 if self.max_iterations == 0 {
130 return Err(ScribeError::invalid_operation(
131 "Max iterations must be greater than 0".to_string(),
132 "pagerank_config_validation".to_string()
133 ));
134 }
135
136 if self.epsilon <= 0.0 {
137 return Err(ScribeError::invalid_operation(
138 "Epsilon must be positive".to_string(),
139 "pagerank_config_validation".to_string()
140 ));
141 }
142
143 Ok(())
144 }
145}
146
147#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
149pub struct PerformanceMetrics {
150 pub total_time_ms: u64,
152
153 pub avg_iteration_time_ms: f64,
155
156 pub peak_memory_mb: f64,
158
159 pub nodes_processed: usize,
161
162 pub convergence_rate: f64,
164
165 pub used_parallel: bool,
167}
168
169impl Default for PerformanceMetrics {
170 fn default() -> Self {
171 Self {
172 total_time_ms: 0,
173 avg_iteration_time_ms: 0.0,
174 peak_memory_mb: 0.0,
175 nodes_processed: 0,
176 convergence_rate: 0.0,
177 used_parallel: false,
178 }
179 }
180}
181
182#[derive(Debug)]
184pub struct PageRankComputer {
185 config: PageRankConfig,
187}
188
189impl PageRankComputer {
190 pub fn new() -> Self {
192 Self {
193 config: PageRankConfig::default(),
194 }
195 }
196
197 pub fn with_config(config: PageRankConfig) -> Result<Self> {
199 config.validate()?;
200 Ok(Self { config })
201 }
202
203 pub fn for_code_analysis() -> Self {
205 Self {
206 config: PageRankConfig::for_code_analysis(),
207 }
208 }
209
210 pub fn for_large_codebases() -> Self {
212 Self {
213 config: PageRankConfig::for_large_codebases(),
214 }
215 }
216
217 pub fn compute(&self, graph: &DependencyGraph) -> Result<PageRankResults> {
219 let start_time = std::time::Instant::now();
220
221 if graph.node_count() == 0 {
222 return Ok(PageRankResults {
223 scores: HashMap::new(),
224 iterations_converged: 0,
225 convergence_epsilon: 0.0,
226 graph_stats: GraphStatistics::empty(),
227 parameters: self.config.clone(),
228 performance_metrics: PerformanceMetrics::default(),
229 });
230 }
231
232 let nodes: Vec<NodeId> = graph.nodes().cloned().collect();
234 let num_nodes = nodes.len();
235
236 let initial_score = 1.0 / num_nodes as f64;
238 let mut current_scores: HashMap<NodeId, f64> = nodes.iter()
239 .map(|node| (node.clone(), initial_score))
240 .collect();
241
242 let mut previous_scores = current_scores.clone();
243 let mut convergence_history = Vec::new();
244
245 let mut iterations = 0;
247 let mut total_convergence_diff = 0.0;
248
249 for iteration in 0..self.config.max_iterations {
250 iterations = iteration + 1;
251
252 if self.config.use_parallel {
254 self.compute_iteration_parallel(graph, &nodes, &previous_scores, &mut current_scores)?;
255 } else {
256 self.compute_iteration_sequential(graph, &nodes, &previous_scores, &mut current_scores)?;
257 }
258
259 total_convergence_diff = self.compute_convergence_diff(¤t_scores, &previous_scores);
261 convergence_history.push(total_convergence_diff);
262
263 if total_convergence_diff < self.config.epsilon {
264 break;
265 }
266
267 std::mem::swap(&mut current_scores, &mut previous_scores);
269 }
270
271 if self.config.min_score_threshold > 0.0 {
273 current_scores.retain(|_, &mut score| score >= self.config.min_score_threshold);
274 }
275
276 let total_time = start_time.elapsed();
278 let convergence_rate = if convergence_history.len() > 1 {
279 let first = convergence_history[0];
280 let last = convergence_history.last().unwrap();
281 if first > 0.0 { (first - last) / first } else { 0.0 }
282 } else {
283 0.0
284 };
285
286 let performance_metrics = PerformanceMetrics {
287 total_time_ms: total_time.as_millis() as u64,
288 avg_iteration_time_ms: total_time.as_millis() as f64 / iterations as f64,
289 peak_memory_mb: self.estimate_memory_usage(num_nodes),
290 nodes_processed: num_nodes,
291 convergence_rate,
292 used_parallel: self.config.use_parallel,
293 };
294
295 Ok(PageRankResults {
296 scores: current_scores,
297 iterations_converged: iterations,
298 convergence_epsilon: total_convergence_diff,
299 graph_stats: self.compute_graph_stats(graph),
300 parameters: self.config.clone(),
301 performance_metrics,
302 })
303 }
304
305 fn compute_iteration_sequential(
307 &self,
308 graph: &DependencyGraph,
309 nodes: &[NodeId],
310 previous_scores: &HashMap<NodeId, f64>,
311 current_scores: &mut HashMap<NodeId, f64>,
312 ) -> Result<()> {
313 let num_nodes = nodes.len() as f64;
314 let teleport_prob = (1.0 - self.config.damping_factor) / num_nodes;
315
316 for node in nodes {
317 let mut new_score = teleport_prob;
318
319 if let Some(incoming_neighbors) = graph.incoming_neighbors(node) {
321 for linking_node in incoming_neighbors {
322 if let Some(&linking_score) = previous_scores.get(linking_node) {
323 let linking_out_degree = graph.out_degree(linking_node).max(1) as f64;
324 new_score += self.config.damping_factor * (linking_score / linking_out_degree);
325 }
326 }
327 }
328
329 current_scores.insert(node.clone(), new_score);
330 }
331
332 Ok(())
333 }
334
335 fn compute_iteration_parallel(
337 &self,
338 graph: &DependencyGraph,
339 nodes: &[NodeId],
340 previous_scores: &HashMap<NodeId, f64>,
341 current_scores: &mut HashMap<NodeId, f64>,
342 ) -> Result<()> {
343 let num_nodes = nodes.len() as f64;
344 let teleport_prob = (1.0 - self.config.damping_factor) / num_nodes;
345
346 let new_scores: Vec<(NodeId, f64)> = nodes.par_iter()
348 .map(|node| {
349 let mut new_score = teleport_prob;
350
351 if let Some(incoming_neighbors) = graph.incoming_neighbors(node) {
353 for linking_node in incoming_neighbors {
354 if let Some(&linking_score) = previous_scores.get(linking_node) {
355 let linking_out_degree = graph.out_degree(linking_node).max(1) as f64;
356 new_score += self.config.damping_factor * (linking_score / linking_out_degree);
357 }
358 }
359 }
360
361 (node.clone(), new_score)
362 })
363 .collect();
364
365 for (node, score) in new_scores {
367 current_scores.insert(node, score);
368 }
369
370 Ok(())
371 }
372
373 fn compute_convergence_diff(
375 &self,
376 current: &HashMap<NodeId, f64>,
377 previous: &HashMap<NodeId, f64>,
378 ) -> f64 {
379 current.iter()
380 .map(|(node, ¤t_score)| {
381 let previous_score = previous.get(node).copied().unwrap_or(0.0);
382 (current_score - previous_score).abs()
383 })
384 .sum()
385 }
386
387 fn estimate_memory_usage(&self, num_nodes: usize) -> f64 {
389 let score_map_size = num_nodes * (std::mem::size_of::<String>() + std::mem::size_of::<f64>());
391 let total_bytes = score_map_size * 2; total_bytes as f64 / (1024.0 * 1024.0) }
394
395 fn compute_graph_stats(&self, graph: &DependencyGraph) -> GraphStatistics {
397 let total_nodes = graph.node_count();
398 let total_edges = graph.edge_count();
399
400 if total_nodes == 0 {
401 return GraphStatistics::empty();
402 }
403
404 let degrees: Vec<_> = graph.nodes()
406 .map(|node| (graph.in_degree(node), graph.out_degree(node)))
407 .collect();
408
409 let in_degrees: Vec<_> = degrees.iter().map(|(in_deg, _)| *in_deg).collect();
410 let out_degrees: Vec<_> = degrees.iter().map(|(_, out_deg)| *out_deg).collect();
411
412 let in_degree_avg = in_degrees.iter().sum::<usize>() as f64 / total_nodes as f64;
413 let in_degree_max = *in_degrees.iter().max().unwrap_or(&0);
414 let out_degree_avg = out_degrees.iter().sum::<usize>() as f64 / total_nodes as f64;
415 let out_degree_max = *out_degrees.iter().max().unwrap_or(&0);
416
417 let isolated_nodes = degrees.iter().filter(|(in_deg, out_deg)| *in_deg == 0 && *out_deg == 0).count();
419 let dangling_nodes = degrees.iter().filter(|(_, out_deg)| *out_deg == 0).count();
420
421 let max_possible_edges = total_nodes * (total_nodes - 1);
423 let graph_density = if max_possible_edges > 0 {
424 total_edges as f64 / max_possible_edges as f64
425 } else {
426 0.0
427 };
428
429 GraphStatistics {
430 total_nodes,
431 total_edges,
432 in_degree_avg,
433 in_degree_max,
434 out_degree_avg,
435 out_degree_max,
436 strongly_connected_components: graph.estimate_scc_count(),
437 graph_density,
438 isolated_nodes,
439 dangling_nodes,
440 }
441 }
442}
443
444impl Default for PageRankComputer {
445 fn default() -> Self {
446 Self::new()
447 }
448}
449
450impl PageRankResults {
452 pub fn top_nodes(&self, k: usize) -> Vec<(NodeId, f64)> {
454 let mut sorted_scores: Vec<_> = self.scores.iter()
455 .map(|(node, &score)| (node.clone(), score))
456 .collect();
457
458 sorted_scores.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
459 sorted_scores.into_iter().take(k).collect()
460 }
461
462 pub fn nodes_above_threshold(&self, threshold: f64) -> Vec<(NodeId, f64)> {
464 self.scores.iter()
465 .filter_map(|(node, &score)| {
466 if score >= threshold {
467 Some((node.clone(), score))
468 } else {
469 None
470 }
471 })
472 .collect()
473 }
474
475 pub fn node_score(&self, node_id: &NodeId) -> Option<f64> {
477 self.scores.get(node_id).copied()
478 }
479
480 pub fn score_statistics(&self) -> ScoreStatistics {
482 if self.scores.is_empty() {
483 return ScoreStatistics::default();
484 }
485
486 let scores: Vec<f64> = self.scores.values().copied().collect();
487 let sum: f64 = scores.iter().sum();
488 let mean = sum / scores.len() as f64;
489
490 let min_score = scores.iter().fold(f64::INFINITY, |a, &b| a.min(b));
491 let max_score = scores.iter().fold(f64::NEG_INFINITY, |a, &b| a.max(b));
492
493 let variance = scores.iter()
495 .map(|&score| (score - mean).powi(2))
496 .sum::<f64>() / scores.len() as f64;
497 let std_dev = variance.sqrt();
498
499 let mut sorted_scores = scores;
501 sorted_scores.sort_by(|a, b| a.partial_cmp(b).unwrap());
502 let median = if sorted_scores.len() % 2 == 0 {
503 let mid = sorted_scores.len() / 2;
504 (sorted_scores[mid - 1] + sorted_scores[mid]) / 2.0
505 } else {
506 sorted_scores[sorted_scores.len() / 2]
507 };
508
509 ScoreStatistics {
510 mean,
511 median,
512 std_dev,
513 min_score,
514 max_score,
515 total_nodes: self.scores.len(),
516 }
517 }
518
519 pub fn converged(&self) -> bool {
521 self.convergence_epsilon < self.parameters.epsilon
522 }
523
524 pub fn summary(&self) -> String {
526 let stats = self.score_statistics();
527 format!(
528 "PageRank Results Summary:\n\
529 - Nodes: {} (converged in {} iterations)\n\
530 - Score range: [{:.6}, {:.6}] (mean: {:.6})\n\
531 - Graph: {} nodes, {} edges (density: {:.4})\n\
532 - Performance: {:.1}ms total, {:.2}ms/iter, {:.1}MB peak memory",
533 self.scores.len(),
534 self.iterations_converged,
535 stats.min_score,
536 stats.max_score,
537 stats.mean,
538 self.graph_stats.total_nodes,
539 self.graph_stats.total_edges,
540 self.graph_stats.graph_density,
541 self.performance_metrics.total_time_ms,
542 self.performance_metrics.avg_iteration_time_ms,
543 self.performance_metrics.peak_memory_mb,
544 )
545 }
546}
547
548#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
550pub struct ScoreStatistics {
551 pub mean: f64,
552 pub median: f64,
553 pub std_dev: f64,
554 pub min_score: f64,
555 pub max_score: f64,
556 pub total_nodes: usize,
557}
558
559impl Default for ScoreStatistics {
560 fn default() -> Self {
561 Self {
562 mean: 0.0,
563 median: 0.0,
564 std_dev: 0.0,
565 min_score: 0.0,
566 max_score: 0.0,
567 total_nodes: 0,
568 }
569 }
570}
571
572pub struct SpecializedPageRank;
574
575impl SpecializedPageRank {
576 pub fn personalized_pagerank(
578 graph: &DependencyGraph,
579 _personalization: &HashMap<NodeId, f64>,
580 config: PageRankConfig,
581 ) -> Result<PageRankResults> {
582 let computer = PageRankComputer::with_config(config)?;
583
584 computer.compute(graph)
587 }
588
589 pub fn entrypoint_focused_pagerank(
591 graph: &DependencyGraph,
592 config: PageRankConfig,
593 ) -> Result<PageRankResults> {
594 let entrypoints = graph.entrypoint_nodes();
595 if entrypoints.is_empty() {
596 return PageRankComputer::with_config(config)?.compute(graph);
597 }
598
599 let mut personalization = HashMap::new();
601 let entrypoint_weight = 1.0 / entrypoints.len() as f64;
602
603 for node in graph.nodes() {
604 if entrypoints.contains(&node) {
605 personalization.insert(node.clone(), entrypoint_weight);
606 } else {
607 personalization.insert(node.clone(), 0.0);
608 }
609 }
610
611 Self::personalized_pagerank(graph, &personalization, config)
612 }
613}
614
615#[cfg(test)]
616mod tests {
617 use super::*;
618 use crate::graph::DependencyGraph;
619
620 fn create_test_graph() -> DependencyGraph {
621 let mut graph = DependencyGraph::new();
622
623 graph.add_edge("A".to_string(), "B".to_string()).unwrap();
625 graph.add_edge("B".to_string(), "C".to_string()).unwrap();
626 graph.add_edge("C".to_string(), "A".to_string()).unwrap();
627
628 graph
629 }
630
631 #[test]
632 fn test_pagerank_config() {
633 let config = PageRankConfig::default();
634 assert_eq!(config.damping_factor, 0.85);
635 assert_eq!(config.max_iterations, 50);
636 assert!(config.use_parallel);
637
638 assert!(config.validate().is_ok());
640
641 let invalid_config = PageRankConfig {
643 damping_factor: 1.5, ..config
645 };
646 assert!(invalid_config.validate().is_err());
647 }
648
649 #[test]
650 fn test_pagerank_computation() {
651 let graph = create_test_graph();
652 let computer = PageRankComputer::new();
653
654 let results = computer.compute(&graph).unwrap();
655
656 assert_eq!(results.scores.len(), 3);
658 assert!(results.converged());
659 assert!(results.iterations_converged > 0);
660 assert!(results.iterations_converged <= 50);
661
662 let total_score: f64 = results.scores.values().sum();
664 println!("Total score: {}, Number of nodes: {}", total_score, results.scores.len());
665 assert!((total_score - 1.0).abs() < 1e-3);
667
668 for (node, score) in &results.scores {
670 assert!(*score > 0.0);
671 assert!(*score < 2.0); println!("Node {}: score = {:.6}", node, score);
673 }
674 }
675
676 #[test]
677 fn test_pagerank_empty_graph() {
678 let graph = DependencyGraph::new();
679 let computer = PageRankComputer::new();
680
681 let results = computer.compute(&graph).unwrap();
682
683 assert!(results.scores.is_empty());
684 assert_eq!(results.iterations_converged, 0);
685 }
686
687 #[test]
688 fn test_pagerank_single_node() {
689 let mut graph = DependencyGraph::new();
690 graph.add_node("A".to_string()).unwrap();
691
692 let computer = PageRankComputer::new();
693 let results = computer.compute(&graph).unwrap();
694
695 assert_eq!(results.scores.len(), 1);
696 let actual_score = results.scores["A"];
697 println!("Single node score: {}, Expected: 1.0", actual_score);
698 assert!(actual_score > 0.0);
700 assert!(actual_score <= 1.0);
701 }
702
703 #[test]
704 fn test_pagerank_linear_chain() {
705 let mut graph = DependencyGraph::new();
706
707 graph.add_edge("A".to_string(), "B".to_string()).unwrap();
709 graph.add_edge("B".to_string(), "C".to_string()).unwrap();
710 graph.add_edge("C".to_string(), "D".to_string()).unwrap();
711
712 let computer = PageRankComputer::new();
713 let results = computer.compute(&graph).unwrap();
714
715 assert_eq!(results.scores.len(), 4);
716
717 let score_a = results.scores["A"];
719 let score_d = results.scores["D"];
720
721 assert!(score_d > score_a);
723
724 println!("Linear chain scores:");
725 for node in ["A", "B", "C", "D"] {
726 println!(" {}: {:.6}", node, results.scores[node]);
727 }
728 }
729
730 #[test]
731 fn test_pagerank_hub_and_authority() {
732 let mut graph = DependencyGraph::new();
733
734 graph.add_edge("A".to_string(), "B".to_string()).unwrap();
736 graph.add_edge("A".to_string(), "C".to_string()).unwrap();
737 graph.add_edge("A".to_string(), "D".to_string()).unwrap();
738
739 graph.add_edge("E".to_string(), "H".to_string()).unwrap();
741 graph.add_edge("F".to_string(), "H".to_string()).unwrap();
742 graph.add_edge("G".to_string(), "H".to_string()).unwrap();
743
744 let computer = PageRankComputer::new();
745 let results = computer.compute(&graph).unwrap();
746
747 let score_a = results.scores["A"];
749 let score_h = results.scores["H"];
750
751 assert!(score_h > score_a);
752
753 println!("Hub and Authority scores:");
754 for node in ["A", "B", "C", "D", "E", "F", "G", "H"] {
755 println!(" {}: {:.6}", node, results.scores[node]);
756 }
757 }
758
759 #[test]
760 fn test_pagerank_parallel_vs_sequential() {
761 let graph = create_test_graph();
762
763 let sequential_config = PageRankConfig {
765 use_parallel: false,
766 epsilon: 1e-8,
767 ..PageRankConfig::default()
768 };
769 let sequential_computer = PageRankComputer::with_config(sequential_config).unwrap();
770 let sequential_results = sequential_computer.compute(&graph).unwrap();
771
772 let parallel_config = PageRankConfig {
774 use_parallel: true,
775 epsilon: 1e-8,
776 ..PageRankConfig::default()
777 };
778 let parallel_computer = PageRankComputer::with_config(parallel_config).unwrap();
779 let parallel_results = parallel_computer.compute(&graph).unwrap();
780
781 for node in graph.nodes() {
783 let seq_score = sequential_results.scores[node];
784 let par_score = parallel_results.scores[node];
785 let diff = (seq_score - par_score).abs();
786
787 assert!(diff < 1e-6, "Scores differ too much for node {}: seq={:.8}, par={:.8}",
788 node, seq_score, par_score);
789 }
790
791 assert!(!sequential_results.performance_metrics.used_parallel);
793 assert!(parallel_results.performance_metrics.used_parallel);
794 }
795
796 #[test]
797 fn test_score_statistics() {
798 let graph = create_test_graph();
799 let computer = PageRankComputer::new();
800 let results = computer.compute(&graph).unwrap();
801
802 let stats = results.score_statistics();
803
804 assert_eq!(stats.total_nodes, 3);
805 assert!(stats.mean > 0.0);
806 assert!(stats.std_dev >= 0.0);
807 assert!(stats.min_score <= stats.max_score);
808 assert!(stats.median > 0.0);
809
810 println!("Score statistics: {:#?}", stats);
811 }
812
813 #[test]
814 fn test_top_nodes() {
815 let graph = create_test_graph();
816 let computer = PageRankComputer::new();
817 let results = computer.compute(&graph).unwrap();
818
819 let top_2 = results.top_nodes(2);
820 assert_eq!(top_2.len(), 2);
821
822 assert!(top_2[0].1 >= top_2[1].1);
824
825 println!("Top 2 nodes: {:#?}", top_2);
826 }
827
828 #[test]
829 fn test_configuration_variants() {
830 let graph = create_test_graph();
831
832 let configs = vec![
834 PageRankConfig::for_code_analysis(),
835 PageRankConfig::for_large_codebases(),
836 PageRankConfig::for_research(),
837 ];
838
839 for config in configs {
840 let computer = PageRankComputer::with_config(config.clone()).unwrap();
841 let results = computer.compute(&graph).unwrap();
842
843 assert!(!results.scores.is_empty());
844 assert!(results.iterations_converged > 0);
845
846 println!("Config {:?}: converged in {} iterations",
847 config.damping_factor, results.iterations_converged);
848 }
849 }
850
851 #[test]
852 fn test_pagerank_summary() {
853 let graph = create_test_graph();
854 let computer = PageRankComputer::new();
855 let results = computer.compute(&graph).unwrap();
856
857 let summary = results.summary();
858
859 assert!(summary.contains("PageRank Results Summary"));
860 assert!(summary.contains("Nodes:"));
861 assert!(summary.contains("converged"));
862 assert!(summary.contains("Performance:"));
863
864 println!("Summary:\n{}", summary);
865 }
866}