1use rayon::prelude::*;
22use scribe_analysis::heuristics::ScanResult;
23use scribe_core::Result;
24use serde::{Deserialize, Serialize};
25use std::collections::{HashMap, HashSet};
26use std::path::Path;
27
28use crate::graph::{DependencyGraph, NodeId};
29use crate::pagerank::{PageRankComputer, PageRankConfig, PageRankResults};
30use crate::statistics::{GraphAnalysisResults, GraphStatisticsAnalyzer};
31
32#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
34pub struct CentralityResults {
35 pub pagerank_scores: HashMap<NodeId, f64>,
37
38 pub graph_analysis: GraphAnalysisResults,
40
41 pub pagerank_details: PageRankResults,
43
44 pub import_stats: ImportDetectionStats,
46
47 pub integration_metadata: IntegrationMetadata,
49}
50
51#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
53pub struct ImportDetectionStats {
54 pub files_processed: usize,
56
57 pub imports_detected: usize,
59
60 pub imports_resolved: usize,
62
63 pub resolution_rate: f64,
65
66 pub language_breakdown: HashMap<String, usize>,
68
69 pub import_patterns: HashMap<String, ImportPatternStats>,
71}
72
73#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
75pub struct ImportPatternStats {
76 pub total_imports: usize,
78
79 pub relative_imports: usize,
81
82 pub absolute_imports: usize,
84
85 pub stdlib_imports: usize,
87
88 pub third_party_imports: usize,
90}
91
92#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
94pub struct IntegrationMetadata {
95 pub timestamp: chrono::DateTime<chrono::Utc>,
97
98 pub computation_time_ms: u64,
100
101 pub integration_successful: bool,
103
104 pub centrality_weight: f64,
106
107 pub files_with_centrality: usize,
109
110 pub config: CentralityConfig,
112}
113
114#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
116pub struct CentralityConfig {
117 pub pagerank_config: PageRankConfig,
119
120 pub analyze_graph_structure: bool,
122
123 pub import_resolution: ImportResolutionConfig,
125
126 pub integration: IntegrationConfig,
128}
129
130#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
132pub struct ImportResolutionConfig {
133 pub max_search_depth: usize,
135
136 pub resolve_relative_imports: bool,
138
139 pub resolve_absolute_imports: bool,
141
142 pub exclude_stdlib_imports: bool,
144
145 pub path_mappings: HashMap<String, String>,
147}
148
149#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
151pub struct IntegrationConfig {
152 pub centrality_weight: f64,
154
155 pub normalization_method: NormalizationMethod,
157
158 pub min_centrality_threshold: f64,
160
161 pub boost_entrypoints: bool,
163
164 pub entrypoint_boost_factor: f64,
166}
167
168#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
170pub enum NormalizationMethod {
171 MinMax,
173 ZScore,
175 Rank,
177 None,
179}
180
181impl Default for CentralityConfig {
182 fn default() -> Self {
183 Self {
184 pagerank_config: PageRankConfig::for_code_analysis(),
185 analyze_graph_structure: true,
186 import_resolution: ImportResolutionConfig::default(),
187 integration: IntegrationConfig::default(),
188 }
189 }
190}
191
192impl Default for ImportResolutionConfig {
193 fn default() -> Self {
194 Self {
195 max_search_depth: 3,
196 resolve_relative_imports: true,
197 resolve_absolute_imports: true,
198 exclude_stdlib_imports: true,
199 path_mappings: HashMap::new(),
200 }
201 }
202}
203
204impl Default for IntegrationConfig {
205 fn default() -> Self {
206 Self {
207 centrality_weight: 0.15, normalization_method: NormalizationMethod::MinMax,
209 min_centrality_threshold: 1e-6,
210 boost_entrypoints: true,
211 entrypoint_boost_factor: 1.5,
212 }
213 }
214}
215
216#[derive(Debug)]
218pub struct CentralityCalculator {
219 config: CentralityConfig,
221
222 pagerank_computer: PageRankComputer,
224
225 stats_analyzer: GraphStatisticsAnalyzer,
227
228 import_detector: ImportDetector,
230}
231
232impl CentralityCalculator {
233 pub fn new() -> Result<Self> {
235 let config = CentralityConfig::default();
236 Self::with_config(config)
237 }
238
239 pub fn with_config(config: CentralityConfig) -> Result<Self> {
241 let pagerank_computer = PageRankComputer::with_config(config.pagerank_config.clone())?;
242
243 let stats_analyzer = if config.analyze_graph_structure {
244 GraphStatisticsAnalyzer::new()
245 } else {
246 GraphStatisticsAnalyzer::for_large_graphs()
247 };
248
249 let import_detector = ImportDetector::with_config(config.import_resolution.clone());
250
251 Ok(Self {
252 config,
253 pagerank_computer,
254 stats_analyzer,
255 import_detector,
256 })
257 }
258
259 pub fn for_large_codebases() -> Result<Self> {
261 let config = CentralityConfig {
262 pagerank_config: PageRankConfig::for_large_codebases(),
263 analyze_graph_structure: false,
264 ..CentralityConfig::default()
265 };
266 Self::with_config(config)
267 }
268
269 pub fn calculate_centrality<T>(&self, scan_results: &[T]) -> Result<CentralityResults>
271 where
272 T: ScanResult + Sync,
273 {
274 let start_time = std::time::Instant::now();
275
276 let (graph, import_stats) = self.build_dependency_graph(scan_results)?;
278
279 let pagerank_results = self.pagerank_computer.compute(&graph)?;
281
282 let graph_analysis = if self.config.analyze_graph_structure {
284 self.stats_analyzer.analyze(&graph)?
285 } else {
286 self.create_minimal_analysis(&graph)?
288 };
289
290 let computation_time = start_time.elapsed().as_millis() as u64;
292 let integration_metadata = IntegrationMetadata {
293 timestamp: chrono::Utc::now(),
294 computation_time_ms: computation_time,
295 integration_successful: true,
296 centrality_weight: self.config.integration.centrality_weight,
297 files_with_centrality: pagerank_results.scores.len(),
298 config: self.config.clone(),
299 };
300
301 Ok(CentralityResults {
302 pagerank_scores: pagerank_results.scores.clone(),
303 graph_analysis,
304 pagerank_details: pagerank_results,
305 import_stats,
306 integration_metadata,
307 })
308 }
309
310 pub fn integrate_with_heuristics(
312 &self,
313 centrality_results: &CentralityResults,
314 heuristic_scores: &HashMap<String, f64>,
315 ) -> Result<HashMap<String, f64>> {
316 let normalized_centrality = self
317 .normalize_centrality_scores(¢rality_results.pagerank_scores, heuristic_scores)?;
318
319 let mut integrated_scores = HashMap::new();
320 let centrality_weight = self.config.integration.centrality_weight;
321 let heuristic_weight = 1.0 - centrality_weight;
322
323 for (file_path, heuristic_score) in heuristic_scores {
325 let centrality_score = normalized_centrality.get(file_path).copied().unwrap_or(0.0);
326
327 let boosted_centrality = if self.config.integration.boost_entrypoints
329 && self.is_entrypoint_file(file_path)
330 {
331 centrality_score * self.config.integration.entrypoint_boost_factor
332 } else {
333 centrality_score
334 };
335
336 let integrated_score =
337 heuristic_weight * heuristic_score + centrality_weight * boosted_centrality;
338
339 integrated_scores.insert(file_path.clone(), integrated_score);
340 }
341
342 for (file_path, centrality_score) in &normalized_centrality {
344 if !integrated_scores.contains_key(file_path) {
345 let boosted_centrality = if self.config.integration.boost_entrypoints
346 && self.is_entrypoint_file(file_path)
347 {
348 centrality_score * self.config.integration.entrypoint_boost_factor
349 } else {
350 *centrality_score
351 };
352
353 integrated_scores.insert(file_path.clone(), centrality_weight * boosted_centrality);
354 }
355 }
356
357 Ok(integrated_scores)
358 }
359
360 fn build_dependency_graph<T>(
362 &self,
363 scan_results: &[T],
364 ) -> Result<(DependencyGraph, ImportDetectionStats)>
365 where
366 T: ScanResult + Sync,
367 {
368 let mut graph = DependencyGraph::with_capacity(scan_results.len());
369
370 let mut optimized_detector =
372 ImportDetector::with_file_index(self.import_detector.config.clone(), scan_results);
373
374 for result in scan_results {
376 graph.add_node(result.path().to_string())?;
377 }
378
379 let import_stats = if self.config.pagerank_config.use_parallel {
381 self.build_edges_parallel_optimized(&mut graph, scan_results, &optimized_detector)?
382 } else {
383 self.build_edges_sequential_optimized(&mut graph, scan_results, &optimized_detector)?
384 };
385
386 Ok((graph, import_stats))
387 }
388
389 fn build_edges_sequential_optimized<T>(
391 &self,
392 graph: &mut DependencyGraph,
393 scan_results: &[T],
394 optimized_detector: &ImportDetector,
395 ) -> Result<ImportDetectionStats>
396 where
397 T: ScanResult,
398 {
399 let mut stats = ImportDetectionStats {
400 files_processed: 0,
401 imports_detected: 0,
402 imports_resolved: 0,
403 resolution_rate: 0.0,
404 language_breakdown: HashMap::new(),
405 import_patterns: HashMap::new(),
406 };
407
408 let file_path_map: HashMap<&str, &T> = scan_results
410 .iter()
411 .map(|result| (result.path(), result))
412 .collect();
413
414 for result in scan_results {
415 stats.files_processed += 1;
416
417 if let Some(lang) = optimized_detector.detect_language(result.path()) {
419 *stats.language_breakdown.entry(lang.clone()).or_insert(0) += 1;
420 }
421
422 if let Some(imports) = result.imports() {
424 stats.imports_detected += imports.len();
425
426 for import_str in imports {
427 if let Some(resolved_path) =
428 optimized_detector.resolve_import(import_str, result.path(), &file_path_map)
429 {
430 graph.add_edge(result.path().to_string(), resolved_path)?;
431 stats.imports_resolved += 1;
432 }
433 }
434 }
435 }
436
437 stats.resolution_rate = if stats.imports_detected > 0 {
438 stats.imports_resolved as f64 / stats.imports_detected as f64
439 } else {
440 0.0
441 };
442
443 Ok(stats)
444 }
445
446 fn build_edges_sequential<T>(
448 &self,
449 graph: &mut DependencyGraph,
450 scan_results: &[T],
451 ) -> Result<ImportDetectionStats>
452 where
453 T: ScanResult,
454 {
455 let optimized_detector =
456 ImportDetector::with_file_index(self.import_detector.config.clone(), scan_results);
457 self.build_edges_sequential_optimized(graph, scan_results, &optimized_detector)
458 }
459
460 fn build_edges_parallel_optimized<T>(
462 &self,
463 graph: &mut DependencyGraph,
464 scan_results: &[T],
465 optimized_detector: &ImportDetector,
466 ) -> Result<ImportDetectionStats>
467 where
468 T: ScanResult + Sync,
469 {
470 let file_path_map: HashMap<&str, &T> = scan_results
472 .iter()
473 .map(|result| (result.path(), result))
474 .collect();
475
476 let import_edges: Vec<_> = scan_results
478 .par_iter()
479 .flat_map(|result| {
480 let mut edges = Vec::new();
481
482 if let Some(imports) = result.imports() {
483 for import_str in imports {
484 if let Some(resolved_path) = optimized_detector.resolve_import(
485 import_str,
486 result.path(),
487 &file_path_map,
488 ) {
489 edges.push((result.path().to_string(), resolved_path));
490 }
491 }
492 }
493
494 edges
495 })
496 .collect();
497
498 for (from, to) in &import_edges {
500 graph.add_edge(from.clone(), to.clone())?;
501 }
502
503 let total_imports: usize = scan_results
505 .iter()
506 .map(|result| result.imports().map_or(0, |imports| imports.len()))
507 .sum();
508
509 let language_breakdown: HashMap<String, usize> = scan_results
510 .iter()
511 .filter_map(|result| {
512 optimized_detector
513 .detect_language(result.path())
514 .map(|lang| (lang, 1))
515 })
516 .fold(HashMap::new(), |mut acc, (lang, count)| {
517 *acc.entry(lang).or_insert(0) += count;
518 acc
519 });
520
521 let stats = ImportDetectionStats {
522 files_processed: scan_results.len(),
523 imports_detected: total_imports,
524 imports_resolved: import_edges.len(),
525 resolution_rate: if total_imports > 0 {
526 import_edges.len() as f64 / total_imports as f64
527 } else {
528 0.0
529 },
530 language_breakdown,
531 import_patterns: HashMap::new(), };
533
534 Ok(stats)
535 }
536
537 fn build_edges_parallel<T>(
539 &self,
540 graph: &mut DependencyGraph,
541 scan_results: &[T],
542 ) -> Result<ImportDetectionStats>
543 where
544 T: ScanResult + Sync,
545 {
546 let optimized_detector =
547 ImportDetector::with_file_index(self.import_detector.config.clone(), scan_results);
548 self.build_edges_parallel_optimized(graph, scan_results, &optimized_detector)
549 }
550
551 fn normalize_centrality_scores(
553 &self,
554 centrality_scores: &HashMap<String, f64>,
555 heuristic_scores: &HashMap<String, f64>,
556 ) -> Result<HashMap<String, f64>> {
557 if centrality_scores.is_empty() {
558 return Ok(HashMap::new());
559 }
560
561 match self.config.integration.normalization_method {
562 NormalizationMethod::MinMax => {
563 self.normalize_min_max(centrality_scores, heuristic_scores)
564 }
565 NormalizationMethod::ZScore => self.normalize_z_score(centrality_scores),
566 NormalizationMethod::Rank => self.normalize_rank(centrality_scores),
567 NormalizationMethod::None => Ok(centrality_scores.clone()),
568 }
569 }
570
571 fn normalize_min_max(
573 &self,
574 centrality_scores: &HashMap<String, f64>,
575 heuristic_scores: &HashMap<String, f64>,
576 ) -> Result<HashMap<String, f64>> {
577 let centrality_values: Vec<f64> = centrality_scores.values().copied().collect();
578 let min_centrality = centrality_values
579 .iter()
580 .fold(f64::INFINITY, |a, &b| a.min(b));
581 let max_centrality = centrality_values
582 .iter()
583 .fold(f64::NEG_INFINITY, |a, &b| a.max(b));
584
585 let heuristic_values: Vec<f64> = heuristic_scores.values().copied().collect();
587 let max_heuristic = if heuristic_values.is_empty() {
588 1.0
589 } else {
590 heuristic_values
591 .iter()
592 .fold(f64::NEG_INFINITY, |a, &b| a.max(b))
593 };
594
595 let mut normalized = HashMap::new();
596
597 if (max_centrality - min_centrality).abs() < f64::EPSILON {
598 for (path, _) in centrality_scores {
600 normalized.insert(path.clone(), max_heuristic * 0.5); }
602 } else {
603 for (path, &score) in centrality_scores {
604 let normalized_score =
605 ((score - min_centrality) / (max_centrality - min_centrality)) * max_heuristic;
606 if normalized_score >= self.config.integration.min_centrality_threshold {
607 normalized.insert(path.clone(), normalized_score);
608 }
609 }
610 }
611
612 Ok(normalized)
613 }
614
615 fn normalize_z_score(
617 &self,
618 centrality_scores: &HashMap<String, f64>,
619 ) -> Result<HashMap<String, f64>> {
620 let values: Vec<f64> = centrality_scores.values().copied().collect();
621 let mean = values.iter().sum::<f64>() / values.len() as f64;
622 let variance =
623 values.iter().map(|&x| (x - mean).powi(2)).sum::<f64>() / values.len() as f64;
624 let std_dev = variance.sqrt();
625
626 let mut normalized = HashMap::new();
627
628 if std_dev > f64::EPSILON {
629 for (path, &score) in centrality_scores {
630 let z_score = (score - mean) / std_dev;
631 let normalized_score = (z_score + 3.0) / 6.0; if normalized_score >= self.config.integration.min_centrality_threshold {
634 normalized.insert(path.clone(), normalized_score);
635 }
636 }
637 } else {
638 for (path, _) in centrality_scores {
640 normalized.insert(path.clone(), 0.5);
641 }
642 }
643
644 Ok(normalized)
645 }
646
647 fn normalize_rank(
649 &self,
650 centrality_scores: &HashMap<String, f64>,
651 ) -> Result<HashMap<String, f64>> {
652 let mut scored_files: Vec<_> = centrality_scores
653 .iter()
654 .map(|(path, &score)| (path.clone(), score))
655 .collect();
656
657 scored_files.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
658
659 let mut normalized = HashMap::new();
660 let total_files = scored_files.len();
661
662 for (rank, (path, _)) in scored_files.into_iter().enumerate() {
663 let normalized_score = 1.0 - (rank as f64 / total_files as f64);
664 if normalized_score >= self.config.integration.min_centrality_threshold {
665 normalized.insert(path, normalized_score);
666 }
667 }
668
669 Ok(normalized)
670 }
671
672 fn create_minimal_analysis(&self, graph: &DependencyGraph) -> Result<GraphAnalysisResults> {
674 let minimal_analyzer = GraphStatisticsAnalyzer::for_large_graphs();
676 minimal_analyzer.analyze(graph)
677 }
678
679 fn is_entrypoint_file(&self, file_path: &str) -> bool {
681 let path = Path::new(file_path);
682 let file_name = path
683 .file_name()
684 .and_then(|name| name.to_str())
685 .unwrap_or("")
686 .to_lowercase();
687
688 matches!(
689 file_name.as_str(),
690 "main.py"
691 | "main.rs"
692 | "main.go"
693 | "main.js"
694 | "main.ts"
695 | "index.py"
696 | "index.rs"
697 | "index.go"
698 | "index.js"
699 | "index.ts"
700 | "app.py"
701 | "app.rs"
702 | "app.go"
703 | "app.js"
704 | "app.ts"
705 | "server.py"
706 | "server.rs"
707 | "server.go"
708 | "server.js"
709 | "server.ts"
710 | "lib.rs"
711 | "__init__.py"
712 )
713 }
714}
715
716impl Default for CentralityCalculator {
717 fn default() -> Self {
718 Self::new().expect("Failed to create CentralityCalculator")
719 }
720}
721
722#[derive(Debug, Clone)]
724pub struct ImportDetector {
725 config: ImportResolutionConfig,
726 stem_to_paths: HashMap<String, Vec<String>>,
728 filename_to_paths: HashMap<String, Vec<String>>,
730 available_paths: HashSet<String>,
732}
733
734impl ImportDetector {
735 pub fn with_config(config: ImportResolutionConfig) -> Self {
737 Self {
738 config,
739 stem_to_paths: HashMap::new(),
740 filename_to_paths: HashMap::new(),
741 available_paths: HashSet::new(),
742 }
743 }
744
745 pub fn with_file_index<T>(config: ImportResolutionConfig, scan_results: &[T]) -> Self
747 where
748 T: ScanResult,
749 {
750 let mut detector = Self::with_config(config);
751 detector.build_lookup_maps(scan_results);
752 detector
753 }
754
755 fn build_lookup_maps<T>(&mut self, scan_results: &[T])
758 where
759 T: ScanResult,
760 {
761 self.stem_to_paths.clear();
762 self.filename_to_paths.clear();
763 self.available_paths.clear();
764
765 for result in scan_results {
766 let full_path = result.path().to_string();
767 self.available_paths.insert(full_path.clone());
768
769 let path = Path::new(result.path());
770
771 if let Some(stem) = path.file_stem().and_then(|s| s.to_str()) {
773 let stem_lower = stem.to_lowercase();
774 self.stem_to_paths
775 .entry(stem_lower)
776 .or_insert_with(Vec::new)
777 .push(full_path.clone());
778 }
779
780 if let Some(filename) = path.file_name().and_then(|s| s.to_str()) {
782 let filename_lower = filename.to_lowercase();
783 self.filename_to_paths
784 .entry(filename_lower)
785 .or_insert_with(Vec::new)
786 .push(full_path);
787 }
788 }
789 }
790
791 pub fn detect_language(&self, file_path: &str) -> Option<String> {
793 let path = Path::new(file_path);
794 let ext = path.extension()?.to_str()?.to_lowercase();
795
796 match ext.as_str() {
797 "py" => Some("python".to_string()),
798 "js" | "jsx" | "mjs" => Some("javascript".to_string()),
799 "ts" | "tsx" => Some("typescript".to_string()),
800 "rs" => Some("rust".to_string()),
801 "go" => Some("go".to_string()),
802 "java" | "kt" => Some("java".to_string()),
803 "cpp" | "cc" | "cxx" | "hpp" | "h" => Some("cpp".to_string()),
804 "c" => Some("c".to_string()),
805 "rb" => Some("ruby".to_string()),
806 "php" => Some("php".to_string()),
807 "cs" => Some("csharp".to_string()),
808 "swift" => Some("swift".to_string()),
809 _ => None,
810 }
811 }
812
813 pub fn resolve_import<T>(
815 &self,
816 import_str: &str,
817 current_file: &str,
818 file_map: &HashMap<&str, &T>,
819 ) -> Option<String>
820 where
821 T: ScanResult,
822 {
823 if let Some(mapped_path) = self.config.path_mappings.get(import_str) {
825 if file_map.contains_key(mapped_path.as_str()) {
826 return Some(mapped_path.clone());
827 }
828 }
829
830 let current_path = Path::new(current_file);
831 let language = self.detect_language(current_file);
832
833 match language.as_deref() {
834 Some("python") => self.resolve_python_import(import_str, current_path, file_map),
835 Some("javascript") | Some("typescript") => {
836 self.resolve_js_import(import_str, current_path, file_map)
837 }
838 Some("rust") => self.resolve_rust_import(import_str, current_path, file_map),
839 Some("go") => self.resolve_go_import(import_str, current_path, file_map),
840 _ => self.resolve_generic_import(import_str, current_path, file_map),
841 }
842 }
843
844 fn resolve_python_import<T>(
846 &self,
847 import_str: &str,
848 current_path: &Path,
849 file_map: &HashMap<&str, &T>,
850 ) -> Option<String>
851 where
852 T: ScanResult,
853 {
854 let cleaned_import = import_str.trim();
855
856 if self.config.exclude_stdlib_imports && self.is_python_stdlib(cleaned_import) {
858 return None;
859 }
860
861 let module_parts: Vec<&str> = cleaned_import.split('.').collect();
863
864 let mut candidates = Vec::new();
866
867 candidates.push(format!("{}.py", module_parts.join("/")));
869
870 candidates.push(format!("{}/__init__.py", module_parts.join("/")));
872
873 if let Some(parent) = current_path.parent() {
875 let parent_str = parent.to_string_lossy();
876 let relative_candidates: Vec<String> = candidates
877 .iter()
878 .map(|candidate| format!("{}/{}", parent_str, candidate))
879 .collect();
880 candidates.extend(relative_candidates);
881 }
882
883 for candidate in &candidates {
885 if file_map.contains_key(candidate.as_str()) {
886 return Some(candidate.clone());
887 }
888 }
889
890 self.fuzzy_match_import(&module_parts, file_map)
892 }
893
894 fn resolve_js_import<T>(
896 &self,
897 import_str: &str,
898 current_path: &Path,
899 file_map: &HashMap<&str, &T>,
900 ) -> Option<String>
901 where
902 T: ScanResult,
903 {
904 let cleaned_import = import_str.trim();
905
906 if cleaned_import.starts_with("./") || cleaned_import.starts_with("../") {
908 if !self.config.resolve_relative_imports {
909 return None;
910 }
911
912 if let Some(parent) = current_path.parent() {
913 let resolved_path = parent.join(&cleaned_import[2..]); let resolved_str = resolved_path.to_string_lossy();
915
916 for ext in &[".js", ".ts", ".jsx", ".tsx", "/index.js", "/index.ts"] {
918 let candidate = format!("{}{}", resolved_str, ext);
919 if file_map.contains_key(candidate.as_str()) {
920 return Some(candidate);
921 }
922 }
923 }
924 }
925 else if self.config.resolve_absolute_imports {
927 let import_parts: Vec<&str> = cleaned_import.split('/').collect();
928 return self.fuzzy_match_import(&import_parts, file_map);
929 }
930
931 None
932 }
933
934 fn resolve_rust_import<T>(
936 &self,
937 import_str: &str,
938 _current_path: &Path,
939 file_map: &HashMap<&str, &T>,
940 ) -> Option<String>
941 where
942 T: ScanResult,
943 {
944 let cleaned_import = import_str.trim();
945
946 if self.config.exclude_stdlib_imports && self.is_rust_stdlib(cleaned_import) {
948 return None;
949 }
950
951 let parts: Vec<&str> = cleaned_import.split("::").collect();
952
953 let mut candidates = Vec::new();
955
956 candidates.push(format!("{}.rs", parts.join("/")));
958
959 candidates.push(format!("{}/mod.rs", parts.join("/")));
961
962 if parts.len() == 1 {
964 candidates.push(format!("{}/lib.rs", parts[0]));
965 candidates.push(format!("{}/src/lib.rs", parts[0]));
966 }
967
968 for candidate in &candidates {
970 if file_map.contains_key(candidate.as_str()) {
971 return Some(candidate.clone());
972 }
973 }
974
975 self.fuzzy_match_import(&parts, file_map)
977 }
978
979 fn resolve_go_import<T>(
981 &self,
982 import_str: &str,
983 _current_path: &Path,
984 file_map: &HashMap<&str, &T>,
985 ) -> Option<String>
986 where
987 T: ScanResult,
988 {
989 let cleaned_import = import_str.trim().trim_matches('"');
990
991 if self.config.exclude_stdlib_imports && !cleaned_import.contains('.') {
993 return None;
994 }
995
996 let parts: Vec<&str> = cleaned_import.split('/').collect();
997
998 let mut candidates = Vec::new();
1000
1001 candidates.push(format!("{}.go", parts.last()?));
1003 candidates.push(format!("{}/main.go", cleaned_import));
1004 candidates.push(format!("{}/{}.go", cleaned_import, parts.last()?));
1005
1006 for candidate in &candidates {
1007 if file_map.contains_key(candidate.as_str()) {
1008 return Some(candidate.clone());
1009 }
1010 }
1011
1012 self.fuzzy_match_import(&parts, file_map)
1013 }
1014
1015 fn resolve_generic_import<T>(
1017 &self,
1018 import_str: &str,
1019 _current_path: &Path,
1020 file_map: &HashMap<&str, &T>,
1021 ) -> Option<String>
1022 where
1023 T: ScanResult,
1024 {
1025 let cleaned_import = import_str.trim();
1026 let parts: Vec<&str> = cleaned_import.split(&['/', '.', ':']).collect();
1027 self.fuzzy_match_import(&parts, file_map)
1028 }
1029
1030 fn fuzzy_match_import<T>(
1032 &self,
1033 import_parts: &[&str],
1034 _file_map: &HashMap<&str, &T>,
1035 ) -> Option<String>
1036 where
1037 T: ScanResult,
1038 {
1039 if import_parts.is_empty() {
1040 return None;
1041 }
1042
1043 let last_part = import_parts.last()?.to_lowercase();
1044
1045 if let Some(paths) = self.stem_to_paths.get(&last_part) {
1048 if let Some(first_path) = paths.first() {
1050 return Some(first_path.clone());
1051 }
1052 }
1053
1054 if let Some(paths) = self.filename_to_paths.get(&last_part) {
1056 if let Some(first_path) = paths.first() {
1057 return Some(first_path.clone());
1058 }
1059 }
1060
1061 for (stem, paths) in &self.stem_to_paths {
1063 if stem.contains(&last_part) || last_part.contains(stem) {
1064 if let Some(first_path) = paths.first() {
1065 return Some(first_path.clone());
1066 }
1067 }
1068 }
1069
1070 for path in &self.available_paths {
1072 let path_lower = path.to_lowercase();
1073 if import_parts
1074 .iter()
1075 .all(|&part| path_lower.contains(&part.to_lowercase()))
1076 {
1077 return Some(path.clone());
1078 }
1079 }
1080
1081 None
1082 }
1083
1084 fn is_python_stdlib(&self, import_str: &str) -> bool {
1086 let stdlib_modules = [
1087 "os",
1088 "sys",
1089 "re",
1090 "json",
1091 "collections",
1092 "itertools",
1093 "functools",
1094 "typing",
1095 "datetime",
1096 "math",
1097 "random",
1098 "string",
1099 "pathlib",
1100 "io",
1101 "csv",
1102 "xml",
1103 "html",
1104 "urllib",
1105 "http",
1106 "email",
1107 "logging",
1108 "unittest",
1109 "asyncio",
1110 "concurrent",
1111 "multiprocessing",
1112 "threading",
1113 "subprocess",
1114 ];
1115
1116 let first_part = import_str.split('.').next().unwrap_or(import_str);
1117 stdlib_modules.contains(&first_part)
1118 }
1119
1120 fn is_rust_stdlib(&self, import_str: &str) -> bool {
1122 import_str.starts_with("std::")
1123 || import_str.starts_with("core::")
1124 || import_str.starts_with("alloc::")
1125 }
1126}
1127
1128impl CentralityResults {
1130 pub fn top_files_by_centrality(&self, k: usize) -> Vec<(String, f64)> {
1132 let mut scored_files: Vec<_> = self
1133 .pagerank_scores
1134 .iter()
1135 .map(|(path, &score)| (path.clone(), score))
1136 .collect();
1137
1138 scored_files.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
1139 scored_files.into_iter().take(k).collect()
1140 }
1141
1142 pub fn summary(&self) -> String {
1144 format!(
1145 "Centrality Analysis Summary:\n\
1146 - Files with centrality scores: {}\n\
1147 - PageRank iterations: {} (converged: {})\n\
1148 - Graph: {} nodes, {} edges (density: {:.4})\n\
1149 - Import resolution: {:.1}% ({}/{})\n\
1150 - Top languages: {}\n\
1151 - Computation time: {}ms\n\
1152 - Integration weight: {:.2}",
1153 self.pagerank_scores.len(),
1154 self.pagerank_details.iterations_converged,
1155 self.pagerank_details.converged(),
1156 self.graph_analysis.basic_stats.total_nodes,
1157 self.graph_analysis.basic_stats.total_edges,
1158 self.graph_analysis.basic_stats.graph_density,
1159 self.import_stats.resolution_rate * 100.0,
1160 self.import_stats.imports_resolved,
1161 self.import_stats.imports_detected,
1162 self.import_stats
1163 .language_breakdown
1164 .iter()
1165 .max_by_key(|(_, &count)| count)
1166 .map(|(lang, count)| format!("{} ({})", lang, count))
1167 .unwrap_or_else(|| "None".to_string()),
1168 self.integration_metadata.computation_time_ms,
1169 self.integration_metadata.centrality_weight,
1170 )
1171 }
1172}
1173
1174#[cfg(test)]
1175mod tests {
1176 use super::*;
1177 use scribe_analysis::heuristics::DocumentAnalysis;
1178
1179 #[derive(Debug, Clone)]
1181 struct MockScanResult {
1182 path: String,
1183 relative_path: String,
1184 depth: usize,
1185 imports: Option<Vec<String>>,
1186 is_docs: bool,
1187 is_readme: bool,
1188 is_test: bool,
1189 is_entrypoint: bool,
1190 has_examples: bool,
1191 priority_boost: f64,
1192 churn_score: f64,
1193 centrality_in: f64,
1194 doc_analysis: Option<DocumentAnalysis>,
1195 }
1196
1197 impl MockScanResult {
1198 fn new(path: &str) -> Self {
1199 Self {
1200 path: path.to_string(),
1201 relative_path: path.to_string(),
1202 depth: path.matches('/').count(),
1203 imports: None,
1204 is_docs: path.contains("doc") || path.ends_with(".md"),
1205 is_readme: path.to_lowercase().contains("readme"),
1206 is_test: path.contains("test"),
1207 is_entrypoint: path.contains("main") || path.contains("index"),
1208 has_examples: path.contains("example"),
1209 priority_boost: 0.0,
1210 churn_score: 0.5,
1211 centrality_in: 0.0,
1212 doc_analysis: Some(DocumentAnalysis::new()),
1213 }
1214 }
1215
1216 fn with_imports(mut self, imports: Vec<String>) -> Self {
1217 self.imports = Some(imports);
1218 self
1219 }
1220 }
1221
1222 impl ScanResult for MockScanResult {
1223 fn path(&self) -> &str {
1224 &self.path
1225 }
1226 fn relative_path(&self) -> &str {
1227 &self.relative_path
1228 }
1229 fn depth(&self) -> usize {
1230 self.depth
1231 }
1232 fn is_docs(&self) -> bool {
1233 self.is_docs
1234 }
1235 fn is_readme(&self) -> bool {
1236 self.is_readme
1237 }
1238 fn is_test(&self) -> bool {
1239 self.is_test
1240 }
1241 fn is_entrypoint(&self) -> bool {
1242 self.is_entrypoint
1243 }
1244 fn has_examples(&self) -> bool {
1245 self.has_examples
1246 }
1247 fn priority_boost(&self) -> f64 {
1248 self.priority_boost
1249 }
1250 fn churn_score(&self) -> f64 {
1251 self.churn_score
1252 }
1253 fn centrality_in(&self) -> f64 {
1254 self.centrality_in
1255 }
1256 fn imports(&self) -> Option<&[String]> {
1257 self.imports.as_deref()
1258 }
1259 fn doc_analysis(&self) -> Option<&DocumentAnalysis> {
1260 self.doc_analysis.as_ref()
1261 }
1262 }
1263
1264 #[test]
1265 fn test_centrality_calculator_creation() {
1266 let calculator = CentralityCalculator::new();
1267 assert!(calculator.is_ok());
1268
1269 let large_calc = CentralityCalculator::for_large_codebases();
1270 assert!(large_calc.is_ok());
1271 }
1272
1273 #[test]
1274 fn test_import_detection() {
1275 let detector = ImportDetector::with_config(ImportResolutionConfig::default());
1276
1277 assert_eq!(
1279 detector.detect_language("main.py"),
1280 Some("python".to_string())
1281 );
1282 assert_eq!(
1283 detector.detect_language("app.js"),
1284 Some("javascript".to_string())
1285 );
1286 assert_eq!(detector.detect_language("lib.rs"), Some("rust".to_string()));
1287
1288 assert!(detector.is_python_stdlib("os"));
1290 assert!(detector.is_python_stdlib("sys.path"));
1291 assert!(!detector.is_python_stdlib("custom_module"));
1292
1293 assert!(detector.is_rust_stdlib("std::collections::HashMap"));
1295 assert!(detector.is_rust_stdlib("core::fmt"));
1296 assert!(!detector.is_rust_stdlib("serde::Deserialize"));
1297 }
1298
1299 #[test]
1300 fn test_centrality_calculation() {
1301 let calculator = CentralityCalculator::new().unwrap();
1302
1303 let scan_results = vec![
1304 MockScanResult::new("main.py")
1305 .with_imports(vec!["utils".to_string(), "config".to_string()]),
1306 MockScanResult::new("utils.py").with_imports(vec!["config".to_string()]),
1307 MockScanResult::new("config.py"),
1308 MockScanResult::new("test.py").with_imports(vec!["main".to_string()]),
1309 ];
1310
1311 let results = calculator.calculate_centrality(&scan_results).unwrap();
1312
1313 assert!(!results.pagerank_scores.is_empty());
1315 assert!(results.integration_metadata.integration_successful);
1316 assert_eq!(
1317 results.integration_metadata.files_with_centrality,
1318 results.pagerank_scores.len()
1319 );
1320
1321 let config_score = results.pagerank_scores.get("config.py");
1323 assert!(config_score.is_some());
1324
1325 println!("Centrality scores:");
1326 for (file, score) in &results.pagerank_scores {
1327 println!(" {}: {:.6}", file, score);
1328 }
1329
1330 println!("\n{}", results.summary());
1331 }
1332
1333 #[test]
1334 fn test_heuristics_integration() {
1335 let calculator = CentralityCalculator::new().unwrap();
1336
1337 let scan_results = vec![
1338 MockScanResult::new("main.py").with_imports(vec!["utils".to_string()]),
1339 MockScanResult::new("utils.py"),
1340 ];
1341
1342 let centrality_results = calculator.calculate_centrality(&scan_results).unwrap();
1343
1344 let mut heuristic_scores = HashMap::new();
1346 heuristic_scores.insert("main.py".to_string(), 0.8);
1347 heuristic_scores.insert("utils.py".to_string(), 0.6);
1348
1349 let integrated_scores = calculator
1350 .integrate_with_heuristics(¢rality_results, &heuristic_scores)
1351 .unwrap();
1352
1353 assert!(!integrated_scores.is_empty());
1354
1355 for (file, integrated_score) in &integrated_scores {
1357 let original_score = heuristic_scores.get(file).unwrap();
1358 println!(
1359 "File {}: heuristic={:.3}, integrated={:.3}",
1360 file, original_score, integrated_score
1361 );
1362 }
1363 }
1364
1365 #[test]
1366 fn test_normalization_methods() {
1367 let calculator = CentralityCalculator::new().unwrap();
1368
1369 let centrality_scores = vec![
1370 ("file1".to_string(), 0.1),
1371 ("file2".to_string(), 0.3),
1372 ("file3".to_string(), 0.6),
1373 ("file4".to_string(), 1.0),
1374 ]
1375 .into_iter()
1376 .collect();
1377
1378 let heuristic_scores = vec![
1379 ("file1".to_string(), 0.5),
1380 ("file2".to_string(), 0.7),
1381 ("file3".to_string(), 0.9),
1382 ("file4".to_string(), 1.2),
1383 ]
1384 .into_iter()
1385 .collect();
1386
1387 let normalized = calculator
1389 .normalize_min_max(¢rality_scores, &heuristic_scores)
1390 .unwrap();
1391 assert!(!normalized.is_empty());
1392
1393 let z_normalized = calculator.normalize_z_score(¢rality_scores).unwrap();
1395 assert!(!z_normalized.is_empty());
1396
1397 let rank_normalized = calculator.normalize_rank(¢rality_scores).unwrap();
1399 assert!(!rank_normalized.is_empty());
1400
1401 println!("Original scores: {:?}", centrality_scores);
1402 println!("Min-max normalized: {:?}", normalized);
1403 println!("Z-score normalized: {:?}", z_normalized);
1404 println!("Rank normalized: {:?}", rank_normalized);
1405 }
1406
1407 #[test]
1408 fn test_import_resolution() {
1409 let detector = ImportDetector::with_config(ImportResolutionConfig::default());
1410
1411 let scan_results = vec![
1413 MockScanResult::new("src/main.py"),
1414 MockScanResult::new("src/utils.py"),
1415 MockScanResult::new("src/config.py"),
1416 MockScanResult::new("tests/test_main.py"),
1417 ];
1418
1419 let file_map: HashMap<&str, &MockScanResult> = scan_results
1420 .iter()
1421 .map(|result| (result.path(), result))
1422 .collect();
1423
1424 let resolved = detector.resolve_import("utils", "src/main.py", &file_map);
1426 assert!(resolved.is_some());
1427
1428 let resolved_config = detector.resolve_import("src.config", "src/main.py", &file_map);
1430 assert!(resolved_config.is_some());
1432
1433 println!("Resolved imports:");
1434 if let Some(path) = resolved {
1435 println!(" utils -> {}", path);
1436 }
1437 if let Some(path) = resolved_config {
1438 println!(" src.config -> {}", path);
1439 }
1440 }
1441
1442 #[test]
1443 fn test_entrypoint_detection() {
1444 let calculator = CentralityCalculator::new().unwrap();
1445
1446 assert!(calculator.is_entrypoint_file("main.py"));
1447 assert!(calculator.is_entrypoint_file("src/main.rs"));
1448 assert!(calculator.is_entrypoint_file("index.js"));
1449 assert!(calculator.is_entrypoint_file("app.py"));
1450 assert!(calculator.is_entrypoint_file("lib.rs"));
1451 assert!(calculator.is_entrypoint_file("__init__.py"));
1452
1453 assert!(!calculator.is_entrypoint_file("utils.py"));
1454 assert!(!calculator.is_entrypoint_file("config.rs"));
1455 assert!(!calculator.is_entrypoint_file("helper.js"));
1456 }
1457
1458 #[test]
1459 fn test_top_files_by_centrality() {
1460 let mut pagerank_scores = HashMap::new();
1461 pagerank_scores.insert("file1.py".to_string(), 0.4);
1462 pagerank_scores.insert("file2.py".to_string(), 0.6);
1463 pagerank_scores.insert("file3.py".to_string(), 0.2);
1464 pagerank_scores.insert("file4.py".to_string(), 0.8);
1465
1466 let results = CentralityResults {
1467 pagerank_scores,
1468 graph_analysis: GraphAnalysisResults {
1469 basic_stats: crate::graph::GraphStatistics::empty(),
1470 degree_distribution: Default::default(),
1471 connectivity: Default::default(),
1472 structural_patterns: Default::default(),
1473 import_insights: Default::default(),
1474 performance_profile: Default::default(),
1475 analysis_metadata: Default::default(),
1476 },
1477 pagerank_details: PageRankResults {
1478 scores: HashMap::new(),
1479 iterations_converged: 10,
1480 convergence_epsilon: 1e-6,
1481 graph_stats: crate::graph::GraphStatistics::empty(),
1482 parameters: PageRankConfig::default(),
1483 performance_metrics: Default::default(),
1484 },
1485 import_stats: ImportDetectionStats {
1486 files_processed: 4,
1487 imports_detected: 0,
1488 imports_resolved: 0,
1489 resolution_rate: 0.0,
1490 language_breakdown: HashMap::new(),
1491 import_patterns: HashMap::new(),
1492 },
1493 integration_metadata: IntegrationMetadata {
1494 timestamp: chrono::Utc::now(),
1495 computation_time_ms: 100,
1496 integration_successful: true,
1497 centrality_weight: 0.15,
1498 files_with_centrality: 4,
1499 config: CentralityConfig::default(),
1500 },
1501 };
1502
1503 let top_files = results.top_files_by_centrality(2);
1504 assert_eq!(top_files.len(), 2);
1505 assert_eq!(top_files[0].0, "file4.py");
1506 assert_eq!(top_files[0].1, 0.8);
1507 assert_eq!(top_files[1].0, "file2.py");
1508 assert_eq!(top_files[1].1, 0.6);
1509 }
1510}