1use std::collections::{HashMap, HashSet};
22use std::path::Path;
23use serde::{Deserialize, Serialize};
24use rayon::prelude::*;
25use scribe_core::Result;
26use scribe_analysis::heuristics::ScanResult;
27
28use crate::graph::{DependencyGraph, NodeId};
29use crate::pagerank::{PageRankComputer, PageRankResults, PageRankConfig};
30use crate::statistics::{GraphStatisticsAnalyzer, GraphAnalysisResults};
31
32#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
34pub struct CentralityResults {
35 pub pagerank_scores: HashMap<NodeId, f64>,
37
38 pub graph_analysis: GraphAnalysisResults,
40
41 pub pagerank_details: PageRankResults,
43
44 pub import_stats: ImportDetectionStats,
46
47 pub integration_metadata: IntegrationMetadata,
49}
50
51#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
53pub struct ImportDetectionStats {
54 pub files_processed: usize,
56
57 pub imports_detected: usize,
59
60 pub imports_resolved: usize,
62
63 pub resolution_rate: f64,
65
66 pub language_breakdown: HashMap<String, usize>,
68
69 pub import_patterns: HashMap<String, ImportPatternStats>,
71}
72
73#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
75pub struct ImportPatternStats {
76 pub total_imports: usize,
78
79 pub relative_imports: usize,
81
82 pub absolute_imports: usize,
84
85 pub stdlib_imports: usize,
87
88 pub third_party_imports: usize,
90}
91
92#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
94pub struct IntegrationMetadata {
95 pub timestamp: chrono::DateTime<chrono::Utc>,
97
98 pub computation_time_ms: u64,
100
101 pub integration_successful: bool,
103
104 pub centrality_weight: f64,
106
107 pub files_with_centrality: usize,
109
110 pub config: CentralityConfig,
112}
113
114#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
116pub struct CentralityConfig {
117 pub pagerank_config: PageRankConfig,
119
120 pub analyze_graph_structure: bool,
122
123 pub import_resolution: ImportResolutionConfig,
125
126 pub integration: IntegrationConfig,
128}
129
130#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
132pub struct ImportResolutionConfig {
133 pub max_search_depth: usize,
135
136 pub resolve_relative_imports: bool,
138
139 pub resolve_absolute_imports: bool,
141
142 pub exclude_stdlib_imports: bool,
144
145 pub path_mappings: HashMap<String, String>,
147}
148
149#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
151pub struct IntegrationConfig {
152 pub centrality_weight: f64,
154
155 pub normalization_method: NormalizationMethod,
157
158 pub min_centrality_threshold: f64,
160
161 pub boost_entrypoints: bool,
163
164 pub entrypoint_boost_factor: f64,
166}
167
168#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
170pub enum NormalizationMethod {
171 MinMax,
173 ZScore,
175 Rank,
177 None,
179}
180
181impl Default for CentralityConfig {
182 fn default() -> Self {
183 Self {
184 pagerank_config: PageRankConfig::for_code_analysis(),
185 analyze_graph_structure: true,
186 import_resolution: ImportResolutionConfig::default(),
187 integration: IntegrationConfig::default(),
188 }
189 }
190}
191
192impl Default for ImportResolutionConfig {
193 fn default() -> Self {
194 Self {
195 max_search_depth: 3,
196 resolve_relative_imports: true,
197 resolve_absolute_imports: true,
198 exclude_stdlib_imports: true,
199 path_mappings: HashMap::new(),
200 }
201 }
202}
203
204impl Default for IntegrationConfig {
205 fn default() -> Self {
206 Self {
207 centrality_weight: 0.15, normalization_method: NormalizationMethod::MinMax,
209 min_centrality_threshold: 1e-6,
210 boost_entrypoints: true,
211 entrypoint_boost_factor: 1.5,
212 }
213 }
214}
215
216#[derive(Debug)]
218pub struct CentralityCalculator {
219 config: CentralityConfig,
221
222 pagerank_computer: PageRankComputer,
224
225 stats_analyzer: GraphStatisticsAnalyzer,
227
228 import_detector: ImportDetector,
230}
231
232impl CentralityCalculator {
233 pub fn new() -> Result<Self> {
235 let config = CentralityConfig::default();
236 Self::with_config(config)
237 }
238
239 pub fn with_config(config: CentralityConfig) -> Result<Self> {
241 let pagerank_computer = PageRankComputer::with_config(config.pagerank_config.clone())?;
242
243 let stats_analyzer = if config.analyze_graph_structure {
244 GraphStatisticsAnalyzer::new()
245 } else {
246 GraphStatisticsAnalyzer::for_large_graphs()
247 };
248
249 let import_detector = ImportDetector::with_config(config.import_resolution.clone());
250
251 Ok(Self {
252 config,
253 pagerank_computer,
254 stats_analyzer,
255 import_detector,
256 })
257 }
258
259 pub fn for_large_codebases() -> Result<Self> {
261 let config = CentralityConfig {
262 pagerank_config: PageRankConfig::for_large_codebases(),
263 analyze_graph_structure: false,
264 ..CentralityConfig::default()
265 };
266 Self::with_config(config)
267 }
268
269 pub fn calculate_centrality<T>(&self, scan_results: &[T]) -> Result<CentralityResults>
271 where
272 T: ScanResult + Sync,
273 {
274 let start_time = std::time::Instant::now();
275
276 let (graph, import_stats) = self.build_dependency_graph(scan_results)?;
278
279 let pagerank_results = self.pagerank_computer.compute(&graph)?;
281
282 let graph_analysis = if self.config.analyze_graph_structure {
284 self.stats_analyzer.analyze(&graph)?
285 } else {
286 self.create_minimal_analysis(&graph)?
288 };
289
290 let computation_time = start_time.elapsed().as_millis() as u64;
292 let integration_metadata = IntegrationMetadata {
293 timestamp: chrono::Utc::now(),
294 computation_time_ms: computation_time,
295 integration_successful: true,
296 centrality_weight: self.config.integration.centrality_weight,
297 files_with_centrality: pagerank_results.scores.len(),
298 config: self.config.clone(),
299 };
300
301 Ok(CentralityResults {
302 pagerank_scores: pagerank_results.scores.clone(),
303 graph_analysis,
304 pagerank_details: pagerank_results,
305 import_stats,
306 integration_metadata,
307 })
308 }
309
310 pub fn integrate_with_heuristics(
312 &self,
313 centrality_results: &CentralityResults,
314 heuristic_scores: &HashMap<String, f64>,
315 ) -> Result<HashMap<String, f64>> {
316 let normalized_centrality = self.normalize_centrality_scores(
317 ¢rality_results.pagerank_scores,
318 heuristic_scores,
319 )?;
320
321 let mut integrated_scores = HashMap::new();
322 let centrality_weight = self.config.integration.centrality_weight;
323 let heuristic_weight = 1.0 - centrality_weight;
324
325 for (file_path, heuristic_score) in heuristic_scores {
327 let centrality_score = normalized_centrality.get(file_path).copied().unwrap_or(0.0);
328
329 let boosted_centrality = if self.config.integration.boost_entrypoints
331 && self.is_entrypoint_file(file_path) {
332 centrality_score * self.config.integration.entrypoint_boost_factor
333 } else {
334 centrality_score
335 };
336
337 let integrated_score = heuristic_weight * heuristic_score +
338 centrality_weight * boosted_centrality;
339
340 integrated_scores.insert(file_path.clone(), integrated_score);
341 }
342
343 for (file_path, centrality_score) in &normalized_centrality {
345 if !integrated_scores.contains_key(file_path) {
346 let boosted_centrality = if self.config.integration.boost_entrypoints
347 && self.is_entrypoint_file(file_path) {
348 centrality_score * self.config.integration.entrypoint_boost_factor
349 } else {
350 *centrality_score
351 };
352
353 integrated_scores.insert(file_path.clone(), centrality_weight * boosted_centrality);
354 }
355 }
356
357 Ok(integrated_scores)
358 }
359
360 fn build_dependency_graph<T>(&self, scan_results: &[T]) -> Result<(DependencyGraph, ImportDetectionStats)>
362 where
363 T: ScanResult + Sync,
364 {
365 let mut graph = DependencyGraph::with_capacity(scan_results.len());
366
367 let mut optimized_detector = ImportDetector::with_file_index(self.import_detector.config.clone(), scan_results);
369
370 for result in scan_results {
372 graph.add_node(result.path().to_string())?;
373 }
374
375 let import_stats = if self.config.pagerank_config.use_parallel {
377 self.build_edges_parallel_optimized(&mut graph, scan_results, &optimized_detector)?
378 } else {
379 self.build_edges_sequential_optimized(&mut graph, scan_results, &optimized_detector)?
380 };
381
382 Ok((graph, import_stats))
383 }
384
385 fn build_edges_sequential_optimized<T>(
387 &self,
388 graph: &mut DependencyGraph,
389 scan_results: &[T],
390 optimized_detector: &ImportDetector,
391 ) -> Result<ImportDetectionStats>
392 where
393 T: ScanResult,
394 {
395 let mut stats = ImportDetectionStats {
396 files_processed: 0,
397 imports_detected: 0,
398 imports_resolved: 0,
399 resolution_rate: 0.0,
400 language_breakdown: HashMap::new(),
401 import_patterns: HashMap::new(),
402 };
403
404 let file_path_map: HashMap<&str, &T> = scan_results.iter()
406 .map(|result| (result.path(), result))
407 .collect();
408
409 for result in scan_results {
410 stats.files_processed += 1;
411
412 if let Some(lang) = optimized_detector.detect_language(result.path()) {
414 *stats.language_breakdown.entry(lang.clone()).or_insert(0) += 1;
415 }
416
417 if let Some(imports) = result.imports() {
419 stats.imports_detected += imports.len();
420
421 for import_str in imports {
422 if let Some(resolved_path) = optimized_detector.resolve_import(
423 import_str,
424 result.path(),
425 &file_path_map
426 ) {
427 graph.add_edge(result.path().to_string(), resolved_path)?;
428 stats.imports_resolved += 1;
429 }
430 }
431 }
432 }
433
434 stats.resolution_rate = if stats.imports_detected > 0 {
435 stats.imports_resolved as f64 / stats.imports_detected as f64
436 } else {
437 0.0
438 };
439
440 Ok(stats)
441 }
442
443 fn build_edges_sequential<T>(&self, graph: &mut DependencyGraph, scan_results: &[T]) -> Result<ImportDetectionStats>
445 where
446 T: ScanResult,
447 {
448 let optimized_detector = ImportDetector::with_file_index(self.import_detector.config.clone(), scan_results);
449 self.build_edges_sequential_optimized(graph, scan_results, &optimized_detector)
450 }
451
452 fn build_edges_parallel_optimized<T>(
454 &self,
455 graph: &mut DependencyGraph,
456 scan_results: &[T],
457 optimized_detector: &ImportDetector,
458 ) -> Result<ImportDetectionStats>
459 where
460 T: ScanResult + Sync,
461 {
462 let file_path_map: HashMap<&str, &T> = scan_results.iter()
464 .map(|result| (result.path(), result))
465 .collect();
466
467 let import_edges: Vec<_> = scan_results.par_iter()
469 .flat_map(|result| {
470 let mut edges = Vec::new();
471
472 if let Some(imports) = result.imports() {
473 for import_str in imports {
474 if let Some(resolved_path) = optimized_detector.resolve_import(
475 import_str,
476 result.path(),
477 &file_path_map
478 ) {
479 edges.push((result.path().to_string(), resolved_path));
480 }
481 }
482 }
483
484 edges
485 })
486 .collect();
487
488 for (from, to) in &import_edges {
490 graph.add_edge(from.clone(), to.clone())?;
491 }
492
493 let total_imports: usize = scan_results.iter()
495 .map(|result| result.imports().map_or(0, |imports| imports.len()))
496 .sum();
497
498 let language_breakdown: HashMap<String, usize> = scan_results.iter()
499 .filter_map(|result| optimized_detector.detect_language(result.path())
500 .map(|lang| (lang, 1)))
501 .fold(HashMap::new(), |mut acc, (lang, count)| {
502 *acc.entry(lang).or_insert(0) += count;
503 acc
504 });
505
506 let stats = ImportDetectionStats {
507 files_processed: scan_results.len(),
508 imports_detected: total_imports,
509 imports_resolved: import_edges.len(),
510 resolution_rate: if total_imports > 0 {
511 import_edges.len() as f64 / total_imports as f64
512 } else {
513 0.0
514 },
515 language_breakdown,
516 import_patterns: HashMap::new(), };
518
519 Ok(stats)
520 }
521
522 fn build_edges_parallel<T>(&self, graph: &mut DependencyGraph, scan_results: &[T]) -> Result<ImportDetectionStats>
524 where
525 T: ScanResult + Sync,
526 {
527 let optimized_detector = ImportDetector::with_file_index(self.import_detector.config.clone(), scan_results);
528 self.build_edges_parallel_optimized(graph, scan_results, &optimized_detector)
529 }
530
531 fn normalize_centrality_scores(
533 &self,
534 centrality_scores: &HashMap<String, f64>,
535 heuristic_scores: &HashMap<String, f64>,
536 ) -> Result<HashMap<String, f64>> {
537 if centrality_scores.is_empty() {
538 return Ok(HashMap::new());
539 }
540
541 match self.config.integration.normalization_method {
542 NormalizationMethod::MinMax => self.normalize_min_max(centrality_scores, heuristic_scores),
543 NormalizationMethod::ZScore => self.normalize_z_score(centrality_scores),
544 NormalizationMethod::Rank => self.normalize_rank(centrality_scores),
545 NormalizationMethod::None => Ok(centrality_scores.clone()),
546 }
547 }
548
549 fn normalize_min_max(
551 &self,
552 centrality_scores: &HashMap<String, f64>,
553 heuristic_scores: &HashMap<String, f64>,
554 ) -> Result<HashMap<String, f64>> {
555 let centrality_values: Vec<f64> = centrality_scores.values().copied().collect();
556 let min_centrality = centrality_values.iter().fold(f64::INFINITY, |a, &b| a.min(b));
557 let max_centrality = centrality_values.iter().fold(f64::NEG_INFINITY, |a, &b| a.max(b));
558
559 let heuristic_values: Vec<f64> = heuristic_scores.values().copied().collect();
561 let max_heuristic = if heuristic_values.is_empty() {
562 1.0
563 } else {
564 heuristic_values.iter().fold(f64::NEG_INFINITY, |a, &b| a.max(b))
565 };
566
567 let mut normalized = HashMap::new();
568
569 if (max_centrality - min_centrality).abs() < f64::EPSILON {
570 for (path, _) in centrality_scores {
572 normalized.insert(path.clone(), max_heuristic * 0.5); }
574 } else {
575 for (path, &score) in centrality_scores {
576 let normalized_score = ((score - min_centrality) / (max_centrality - min_centrality)) * max_heuristic;
577 if normalized_score >= self.config.integration.min_centrality_threshold {
578 normalized.insert(path.clone(), normalized_score);
579 }
580 }
581 }
582
583 Ok(normalized)
584 }
585
586 fn normalize_z_score(&self, centrality_scores: &HashMap<String, f64>) -> Result<HashMap<String, f64>> {
588 let values: Vec<f64> = centrality_scores.values().copied().collect();
589 let mean = values.iter().sum::<f64>() / values.len() as f64;
590 let variance = values.iter().map(|&x| (x - mean).powi(2)).sum::<f64>() / values.len() as f64;
591 let std_dev = variance.sqrt();
592
593 let mut normalized = HashMap::new();
594
595 if std_dev > f64::EPSILON {
596 for (path, &score) in centrality_scores {
597 let z_score = (score - mean) / std_dev;
598 let normalized_score = (z_score + 3.0) / 6.0; if normalized_score >= self.config.integration.min_centrality_threshold {
601 normalized.insert(path.clone(), normalized_score);
602 }
603 }
604 } else {
605 for (path, _) in centrality_scores {
607 normalized.insert(path.clone(), 0.5);
608 }
609 }
610
611 Ok(normalized)
612 }
613
614 fn normalize_rank(&self, centrality_scores: &HashMap<String, f64>) -> Result<HashMap<String, f64>> {
616 let mut scored_files: Vec<_> = centrality_scores.iter()
617 .map(|(path, &score)| (path.clone(), score))
618 .collect();
619
620 scored_files.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
621
622 let mut normalized = HashMap::new();
623 let total_files = scored_files.len();
624
625 for (rank, (path, _)) in scored_files.into_iter().enumerate() {
626 let normalized_score = 1.0 - (rank as f64 / total_files as f64);
627 if normalized_score >= self.config.integration.min_centrality_threshold {
628 normalized.insert(path, normalized_score);
629 }
630 }
631
632 Ok(normalized)
633 }
634
635 fn create_minimal_analysis(&self, graph: &DependencyGraph) -> Result<GraphAnalysisResults> {
637 let minimal_analyzer = GraphStatisticsAnalyzer::for_large_graphs();
639 minimal_analyzer.analyze(graph)
640 }
641
642 fn is_entrypoint_file(&self, file_path: &str) -> bool {
644 let path = Path::new(file_path);
645 let file_name = path.file_name()
646 .and_then(|name| name.to_str())
647 .unwrap_or("")
648 .to_lowercase();
649
650 matches!(file_name.as_str(),
651 "main.py" | "main.rs" | "main.go" | "main.js" | "main.ts" |
652 "index.py" | "index.rs" | "index.go" | "index.js" | "index.ts" |
653 "app.py" | "app.rs" | "app.go" | "app.js" | "app.ts" |
654 "server.py" | "server.rs" | "server.go" | "server.js" | "server.ts" |
655 "lib.rs" | "__init__.py"
656 )
657 }
658}
659
660impl Default for CentralityCalculator {
661 fn default() -> Self {
662 Self::new().expect("Failed to create CentralityCalculator")
663 }
664}
665
666#[derive(Debug, Clone)]
668pub struct ImportDetector {
669 config: ImportResolutionConfig,
670 stem_to_paths: HashMap<String, Vec<String>>,
672 filename_to_paths: HashMap<String, Vec<String>>,
674 available_paths: HashSet<String>,
676}
677
678impl ImportDetector {
679 pub fn with_config(config: ImportResolutionConfig) -> Self {
681 Self {
682 config,
683 stem_to_paths: HashMap::new(),
684 filename_to_paths: HashMap::new(),
685 available_paths: HashSet::new(),
686 }
687 }
688
689 pub fn with_file_index<T>(config: ImportResolutionConfig, scan_results: &[T]) -> Self
691 where
692 T: ScanResult,
693 {
694 let mut detector = Self::with_config(config);
695 detector.build_lookup_maps(scan_results);
696 detector
697 }
698
699 fn build_lookup_maps<T>(&mut self, scan_results: &[T])
702 where
703 T: ScanResult,
704 {
705 self.stem_to_paths.clear();
706 self.filename_to_paths.clear();
707 self.available_paths.clear();
708
709 for result in scan_results {
710 let full_path = result.path().to_string();
711 self.available_paths.insert(full_path.clone());
712
713 let path = Path::new(result.path());
714
715 if let Some(stem) = path.file_stem().and_then(|s| s.to_str()) {
717 let stem_lower = stem.to_lowercase();
718 self.stem_to_paths.entry(stem_lower).or_insert_with(Vec::new).push(full_path.clone());
719 }
720
721 if let Some(filename) = path.file_name().and_then(|s| s.to_str()) {
723 let filename_lower = filename.to_lowercase();
724 self.filename_to_paths.entry(filename_lower).or_insert_with(Vec::new).push(full_path);
725 }
726 }
727 }
728
729 pub fn detect_language(&self, file_path: &str) -> Option<String> {
731 let path = Path::new(file_path);
732 let ext = path.extension()?.to_str()?.to_lowercase();
733
734 match ext.as_str() {
735 "py" => Some("python".to_string()),
736 "js" | "jsx" | "mjs" => Some("javascript".to_string()),
737 "ts" | "tsx" => Some("typescript".to_string()),
738 "rs" => Some("rust".to_string()),
739 "go" => Some("go".to_string()),
740 "java" | "kt" => Some("java".to_string()),
741 "cpp" | "cc" | "cxx" | "hpp" | "h" => Some("cpp".to_string()),
742 "c" => Some("c".to_string()),
743 "rb" => Some("ruby".to_string()),
744 "php" => Some("php".to_string()),
745 "cs" => Some("csharp".to_string()),
746 "swift" => Some("swift".to_string()),
747 _ => None,
748 }
749 }
750
751 pub fn resolve_import<T>(
753 &self,
754 import_str: &str,
755 current_file: &str,
756 file_map: &HashMap<&str, &T>,
757 ) -> Option<String>
758 where
759 T: ScanResult,
760 {
761 if let Some(mapped_path) = self.config.path_mappings.get(import_str) {
763 if file_map.contains_key(mapped_path.as_str()) {
764 return Some(mapped_path.clone());
765 }
766 }
767
768 let current_path = Path::new(current_file);
769 let language = self.detect_language(current_file);
770
771 match language.as_deref() {
772 Some("python") => self.resolve_python_import(import_str, current_path, file_map),
773 Some("javascript") | Some("typescript") => self.resolve_js_import(import_str, current_path, file_map),
774 Some("rust") => self.resolve_rust_import(import_str, current_path, file_map),
775 Some("go") => self.resolve_go_import(import_str, current_path, file_map),
776 _ => self.resolve_generic_import(import_str, current_path, file_map),
777 }
778 }
779
780 fn resolve_python_import<T>(
782 &self,
783 import_str: &str,
784 current_path: &Path,
785 file_map: &HashMap<&str, &T>,
786 ) -> Option<String>
787 where
788 T: ScanResult,
789 {
790 let cleaned_import = import_str.trim();
791
792 if self.config.exclude_stdlib_imports && self.is_python_stdlib(cleaned_import) {
794 return None;
795 }
796
797 let module_parts: Vec<&str> = cleaned_import.split('.').collect();
799
800 let mut candidates = Vec::new();
802
803 candidates.push(format!("{}.py", module_parts.join("/")));
805
806 candidates.push(format!("{}/__init__.py", module_parts.join("/")));
808
809 if let Some(parent) = current_path.parent() {
811 let parent_str = parent.to_string_lossy();
812 let relative_candidates: Vec<String> = candidates.iter()
813 .map(|candidate| format!("{}/{}", parent_str, candidate))
814 .collect();
815 candidates.extend(relative_candidates);
816 }
817
818 for candidate in &candidates {
820 if file_map.contains_key(candidate.as_str()) {
821 return Some(candidate.clone());
822 }
823 }
824
825 self.fuzzy_match_import(&module_parts, file_map)
827 }
828
829 fn resolve_js_import<T>(
831 &self,
832 import_str: &str,
833 current_path: &Path,
834 file_map: &HashMap<&str, &T>,
835 ) -> Option<String>
836 where
837 T: ScanResult,
838 {
839 let cleaned_import = import_str.trim();
840
841 if cleaned_import.starts_with("./") || cleaned_import.starts_with("../") {
843 if !self.config.resolve_relative_imports {
844 return None;
845 }
846
847 if let Some(parent) = current_path.parent() {
848 let resolved_path = parent.join(&cleaned_import[2..]); let resolved_str = resolved_path.to_string_lossy();
850
851 for ext in &[".js", ".ts", ".jsx", ".tsx", "/index.js", "/index.ts"] {
853 let candidate = format!("{}{}", resolved_str, ext);
854 if file_map.contains_key(candidate.as_str()) {
855 return Some(candidate);
856 }
857 }
858 }
859 }
860 else if self.config.resolve_absolute_imports {
862 let import_parts: Vec<&str> = cleaned_import.split('/').collect();
863 return self.fuzzy_match_import(&import_parts, file_map);
864 }
865
866 None
867 }
868
869 fn resolve_rust_import<T>(
871 &self,
872 import_str: &str,
873 _current_path: &Path,
874 file_map: &HashMap<&str, &T>,
875 ) -> Option<String>
876 where
877 T: ScanResult,
878 {
879 let cleaned_import = import_str.trim();
880
881 if self.config.exclude_stdlib_imports && self.is_rust_stdlib(cleaned_import) {
883 return None;
884 }
885
886 let parts: Vec<&str> = cleaned_import.split("::").collect();
887
888 let mut candidates = Vec::new();
890
891 candidates.push(format!("{}.rs", parts.join("/")));
893
894 candidates.push(format!("{}/mod.rs", parts.join("/")));
896
897 if parts.len() == 1 {
899 candidates.push(format!("{}/lib.rs", parts[0]));
900 candidates.push(format!("{}/src/lib.rs", parts[0]));
901 }
902
903 for candidate in &candidates {
905 if file_map.contains_key(candidate.as_str()) {
906 return Some(candidate.clone());
907 }
908 }
909
910 self.fuzzy_match_import(&parts, file_map)
912 }
913
914 fn resolve_go_import<T>(
916 &self,
917 import_str: &str,
918 _current_path: &Path,
919 file_map: &HashMap<&str, &T>,
920 ) -> Option<String>
921 where
922 T: ScanResult,
923 {
924 let cleaned_import = import_str.trim().trim_matches('"');
925
926 if self.config.exclude_stdlib_imports && !cleaned_import.contains('.') {
928 return None;
929 }
930
931 let parts: Vec<&str> = cleaned_import.split('/').collect();
932
933 let mut candidates = Vec::new();
935
936 candidates.push(format!("{}.go", parts.last()?));
938 candidates.push(format!("{}/main.go", cleaned_import));
939 candidates.push(format!("{}/{}.go", cleaned_import, parts.last()?));
940
941 for candidate in &candidates {
942 if file_map.contains_key(candidate.as_str()) {
943 return Some(candidate.clone());
944 }
945 }
946
947 self.fuzzy_match_import(&parts, file_map)
948 }
949
950 fn resolve_generic_import<T>(
952 &self,
953 import_str: &str,
954 _current_path: &Path,
955 file_map: &HashMap<&str, &T>,
956 ) -> Option<String>
957 where
958 T: ScanResult,
959 {
960 let cleaned_import = import_str.trim();
961 let parts: Vec<&str> = cleaned_import.split(&['/', '.', ':']).collect();
962 self.fuzzy_match_import(&parts, file_map)
963 }
964
965 fn fuzzy_match_import<T>(
967 &self,
968 import_parts: &[&str],
969 _file_map: &HashMap<&str, &T>,
970 ) -> Option<String>
971 where
972 T: ScanResult,
973 {
974 if import_parts.is_empty() {
975 return None;
976 }
977
978 let last_part = import_parts.last()?.to_lowercase();
979
980 if let Some(paths) = self.stem_to_paths.get(&last_part) {
983 if let Some(first_path) = paths.first() {
985 return Some(first_path.clone());
986 }
987 }
988
989 if let Some(paths) = self.filename_to_paths.get(&last_part) {
991 if let Some(first_path) = paths.first() {
992 return Some(first_path.clone());
993 }
994 }
995
996 for (stem, paths) in &self.stem_to_paths {
998 if stem.contains(&last_part) || last_part.contains(stem) {
999 if let Some(first_path) = paths.first() {
1000 return Some(first_path.clone());
1001 }
1002 }
1003 }
1004
1005 for path in &self.available_paths {
1007 let path_lower = path.to_lowercase();
1008 if import_parts.iter().all(|&part| path_lower.contains(&part.to_lowercase())) {
1009 return Some(path.clone());
1010 }
1011 }
1012
1013 None
1014 }
1015
1016 fn is_python_stdlib(&self, import_str: &str) -> bool {
1018 let stdlib_modules = [
1019 "os", "sys", "re", "json", "collections", "itertools", "functools",
1020 "typing", "datetime", "math", "random", "string", "pathlib", "io",
1021 "csv", "xml", "html", "urllib", "http", "email", "logging", "unittest",
1022 "asyncio", "concurrent", "multiprocessing", "threading", "subprocess",
1023 ];
1024
1025 let first_part = import_str.split('.').next().unwrap_or(import_str);
1026 stdlib_modules.contains(&first_part)
1027 }
1028
1029 fn is_rust_stdlib(&self, import_str: &str) -> bool {
1031 import_str.starts_with("std::") ||
1032 import_str.starts_with("core::") ||
1033 import_str.starts_with("alloc::")
1034 }
1035}
1036
1037impl CentralityResults {
1039 pub fn top_files_by_centrality(&self, k: usize) -> Vec<(String, f64)> {
1041 let mut scored_files: Vec<_> = self.pagerank_scores.iter()
1042 .map(|(path, &score)| (path.clone(), score))
1043 .collect();
1044
1045 scored_files.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
1046 scored_files.into_iter().take(k).collect()
1047 }
1048
1049 pub fn summary(&self) -> String {
1051 format!(
1052 "Centrality Analysis Summary:\n\
1053 - Files with centrality scores: {}\n\
1054 - PageRank iterations: {} (converged: {})\n\
1055 - Graph: {} nodes, {} edges (density: {:.4})\n\
1056 - Import resolution: {:.1}% ({}/{})\n\
1057 - Top languages: {}\n\
1058 - Computation time: {}ms\n\
1059 - Integration weight: {:.2}",
1060 self.pagerank_scores.len(),
1061 self.pagerank_details.iterations_converged,
1062 self.pagerank_details.converged(),
1063 self.graph_analysis.basic_stats.total_nodes,
1064 self.graph_analysis.basic_stats.total_edges,
1065 self.graph_analysis.basic_stats.graph_density,
1066 self.import_stats.resolution_rate * 100.0,
1067 self.import_stats.imports_resolved,
1068 self.import_stats.imports_detected,
1069 self.import_stats.language_breakdown.iter()
1070 .max_by_key(|(_, &count)| count)
1071 .map(|(lang, count)| format!("{} ({})", lang, count))
1072 .unwrap_or_else(|| "None".to_string()),
1073 self.integration_metadata.computation_time_ms,
1074 self.integration_metadata.centrality_weight,
1075 )
1076 }
1077}
1078
1079#[cfg(test)]
1080mod tests {
1081 use super::*;
1082 use scribe_analysis::heuristics::DocumentAnalysis;
1083
1084 #[derive(Debug, Clone)]
1086 struct MockScanResult {
1087 path: String,
1088 relative_path: String,
1089 depth: usize,
1090 imports: Option<Vec<String>>,
1091 is_docs: bool,
1092 is_readme: bool,
1093 is_test: bool,
1094 is_entrypoint: bool,
1095 has_examples: bool,
1096 priority_boost: f64,
1097 churn_score: f64,
1098 centrality_in: f64,
1099 doc_analysis: Option<DocumentAnalysis>,
1100 }
1101
1102 impl MockScanResult {
1103 fn new(path: &str) -> Self {
1104 Self {
1105 path: path.to_string(),
1106 relative_path: path.to_string(),
1107 depth: path.matches('/').count(),
1108 imports: None,
1109 is_docs: path.contains("doc") || path.ends_with(".md"),
1110 is_readme: path.to_lowercase().contains("readme"),
1111 is_test: path.contains("test"),
1112 is_entrypoint: path.contains("main") || path.contains("index"),
1113 has_examples: path.contains("example"),
1114 priority_boost: 0.0,
1115 churn_score: 0.5,
1116 centrality_in: 0.0,
1117 doc_analysis: Some(DocumentAnalysis::new()),
1118 }
1119 }
1120
1121 fn with_imports(mut self, imports: Vec<String>) -> Self {
1122 self.imports = Some(imports);
1123 self
1124 }
1125 }
1126
1127 impl ScanResult for MockScanResult {
1128 fn path(&self) -> &str { &self.path }
1129 fn relative_path(&self) -> &str { &self.relative_path }
1130 fn depth(&self) -> usize { self.depth }
1131 fn is_docs(&self) -> bool { self.is_docs }
1132 fn is_readme(&self) -> bool { self.is_readme }
1133 fn is_test(&self) -> bool { self.is_test }
1134 fn is_entrypoint(&self) -> bool { self.is_entrypoint }
1135 fn has_examples(&self) -> bool { self.has_examples }
1136 fn priority_boost(&self) -> f64 { self.priority_boost }
1137 fn churn_score(&self) -> f64 { self.churn_score }
1138 fn centrality_in(&self) -> f64 { self.centrality_in }
1139 fn imports(&self) -> Option<&[String]> { self.imports.as_deref() }
1140 fn doc_analysis(&self) -> Option<&DocumentAnalysis> { self.doc_analysis.as_ref() }
1141 }
1142
1143 #[test]
1144 fn test_centrality_calculator_creation() {
1145 let calculator = CentralityCalculator::new();
1146 assert!(calculator.is_ok());
1147
1148 let large_calc = CentralityCalculator::for_large_codebases();
1149 assert!(large_calc.is_ok());
1150 }
1151
1152 #[test]
1153 fn test_import_detection() {
1154 let detector = ImportDetector::with_config(ImportResolutionConfig::default());
1155
1156 assert_eq!(detector.detect_language("main.py"), Some("python".to_string()));
1158 assert_eq!(detector.detect_language("app.js"), Some("javascript".to_string()));
1159 assert_eq!(detector.detect_language("lib.rs"), Some("rust".to_string()));
1160
1161 assert!(detector.is_python_stdlib("os"));
1163 assert!(detector.is_python_stdlib("sys.path"));
1164 assert!(!detector.is_python_stdlib("custom_module"));
1165
1166 assert!(detector.is_rust_stdlib("std::collections::HashMap"));
1168 assert!(detector.is_rust_stdlib("core::fmt"));
1169 assert!(!detector.is_rust_stdlib("serde::Deserialize"));
1170 }
1171
1172 #[test]
1173 fn test_centrality_calculation() {
1174 let calculator = CentralityCalculator::new().unwrap();
1175
1176 let scan_results = vec![
1177 MockScanResult::new("main.py").with_imports(vec!["utils".to_string(), "config".to_string()]),
1178 MockScanResult::new("utils.py").with_imports(vec!["config".to_string()]),
1179 MockScanResult::new("config.py"),
1180 MockScanResult::new("test.py").with_imports(vec!["main".to_string()]),
1181 ];
1182
1183 let results = calculator.calculate_centrality(&scan_results).unwrap();
1184
1185 assert!(!results.pagerank_scores.is_empty());
1187 assert!(results.integration_metadata.integration_successful);
1188 assert_eq!(results.integration_metadata.files_with_centrality, results.pagerank_scores.len());
1189
1190 let config_score = results.pagerank_scores.get("config.py");
1192 assert!(config_score.is_some());
1193
1194 println!("Centrality scores:");
1195 for (file, score) in &results.pagerank_scores {
1196 println!(" {}: {:.6}", file, score);
1197 }
1198
1199 println!("\n{}", results.summary());
1200 }
1201
1202 #[test]
1203 fn test_heuristics_integration() {
1204 let calculator = CentralityCalculator::new().unwrap();
1205
1206 let scan_results = vec![
1207 MockScanResult::new("main.py").with_imports(vec!["utils".to_string()]),
1208 MockScanResult::new("utils.py"),
1209 ];
1210
1211 let centrality_results = calculator.calculate_centrality(&scan_results).unwrap();
1212
1213 let mut heuristic_scores = HashMap::new();
1215 heuristic_scores.insert("main.py".to_string(), 0.8);
1216 heuristic_scores.insert("utils.py".to_string(), 0.6);
1217
1218 let integrated_scores = calculator.integrate_with_heuristics(
1219 ¢rality_results,
1220 &heuristic_scores
1221 ).unwrap();
1222
1223 assert!(!integrated_scores.is_empty());
1224
1225 for (file, integrated_score) in &integrated_scores {
1227 let original_score = heuristic_scores.get(file).unwrap();
1228 println!("File {}: heuristic={:.3}, integrated={:.3}",
1229 file, original_score, integrated_score);
1230 }
1231 }
1232
1233 #[test]
1234 fn test_normalization_methods() {
1235 let calculator = CentralityCalculator::new().unwrap();
1236
1237 let centrality_scores = vec![
1238 ("file1".to_string(), 0.1),
1239 ("file2".to_string(), 0.3),
1240 ("file3".to_string(), 0.6),
1241 ("file4".to_string(), 1.0),
1242 ].into_iter().collect();
1243
1244 let heuristic_scores = vec![
1245 ("file1".to_string(), 0.5),
1246 ("file2".to_string(), 0.7),
1247 ("file3".to_string(), 0.9),
1248 ("file4".to_string(), 1.2),
1249 ].into_iter().collect();
1250
1251 let normalized = calculator.normalize_min_max(¢rality_scores, &heuristic_scores).unwrap();
1253 assert!(!normalized.is_empty());
1254
1255 let z_normalized = calculator.normalize_z_score(¢rality_scores).unwrap();
1257 assert!(!z_normalized.is_empty());
1258
1259 let rank_normalized = calculator.normalize_rank(¢rality_scores).unwrap();
1261 assert!(!rank_normalized.is_empty());
1262
1263 println!("Original scores: {:?}", centrality_scores);
1264 println!("Min-max normalized: {:?}", normalized);
1265 println!("Z-score normalized: {:?}", z_normalized);
1266 println!("Rank normalized: {:?}", rank_normalized);
1267 }
1268
1269 #[test]
1270 fn test_import_resolution() {
1271 let detector = ImportDetector::with_config(ImportResolutionConfig::default());
1272
1273 let scan_results = vec![
1275 MockScanResult::new("src/main.py"),
1276 MockScanResult::new("src/utils.py"),
1277 MockScanResult::new("src/config.py"),
1278 MockScanResult::new("tests/test_main.py"),
1279 ];
1280
1281 let file_map: HashMap<&str, &MockScanResult> = scan_results.iter()
1282 .map(|result| (result.path(), result))
1283 .collect();
1284
1285 let resolved = detector.resolve_import(
1287 "utils",
1288 "src/main.py",
1289 &file_map
1290 );
1291 assert!(resolved.is_some());
1292
1293 let resolved_config = detector.resolve_import(
1295 "src.config",
1296 "src/main.py",
1297 &file_map
1298 );
1299 assert!(resolved_config.is_some());
1301
1302 println!("Resolved imports:");
1303 if let Some(path) = resolved {
1304 println!(" utils -> {}", path);
1305 }
1306 if let Some(path) = resolved_config {
1307 println!(" src.config -> {}", path);
1308 }
1309 }
1310
1311 #[test]
1312 fn test_entrypoint_detection() {
1313 let calculator = CentralityCalculator::new().unwrap();
1314
1315 assert!(calculator.is_entrypoint_file("main.py"));
1316 assert!(calculator.is_entrypoint_file("src/main.rs"));
1317 assert!(calculator.is_entrypoint_file("index.js"));
1318 assert!(calculator.is_entrypoint_file("app.py"));
1319 assert!(calculator.is_entrypoint_file("lib.rs"));
1320 assert!(calculator.is_entrypoint_file("__init__.py"));
1321
1322 assert!(!calculator.is_entrypoint_file("utils.py"));
1323 assert!(!calculator.is_entrypoint_file("config.rs"));
1324 assert!(!calculator.is_entrypoint_file("helper.js"));
1325 }
1326
1327 #[test]
1328 fn test_top_files_by_centrality() {
1329 let mut pagerank_scores = HashMap::new();
1330 pagerank_scores.insert("file1.py".to_string(), 0.4);
1331 pagerank_scores.insert("file2.py".to_string(), 0.6);
1332 pagerank_scores.insert("file3.py".to_string(), 0.2);
1333 pagerank_scores.insert("file4.py".to_string(), 0.8);
1334
1335 let results = CentralityResults {
1336 pagerank_scores,
1337 graph_analysis: GraphAnalysisResults {
1338 basic_stats: crate::graph::GraphStatistics::empty(),
1339 degree_distribution: Default::default(),
1340 connectivity: Default::default(),
1341 structural_patterns: Default::default(),
1342 import_insights: Default::default(),
1343 performance_profile: Default::default(),
1344 analysis_metadata: Default::default(),
1345 },
1346 pagerank_details: PageRankResults {
1347 scores: HashMap::new(),
1348 iterations_converged: 10,
1349 convergence_epsilon: 1e-6,
1350 graph_stats: crate::graph::GraphStatistics::empty(),
1351 parameters: PageRankConfig::default(),
1352 performance_metrics: Default::default(),
1353 },
1354 import_stats: ImportDetectionStats {
1355 files_processed: 4,
1356 imports_detected: 0,
1357 imports_resolved: 0,
1358 resolution_rate: 0.0,
1359 language_breakdown: HashMap::new(),
1360 import_patterns: HashMap::new(),
1361 },
1362 integration_metadata: IntegrationMetadata {
1363 timestamp: chrono::Utc::now(),
1364 computation_time_ms: 100,
1365 integration_successful: true,
1366 centrality_weight: 0.15,
1367 files_with_centrality: 4,
1368 config: CentralityConfig::default(),
1369 },
1370 };
1371
1372 let top_files = results.top_files_by_centrality(2);
1373 assert_eq!(top_files.len(), 2);
1374 assert_eq!(top_files[0].0, "file4.py");
1375 assert_eq!(top_files[0].1, 0.8);
1376 assert_eq!(top_files[1].0, "file2.py");
1377 assert_eq!(top_files[1].1, 0.6);
1378 }
1379}