1use std::collections::HashMap;
22use std::path::Path;
23use serde::{Deserialize, Serialize};
24use rayon::prelude::*;
25use scribe_core::Result;
26use scribe_analysis::heuristics::ScanResult;
27
28use crate::graph::{DependencyGraph, NodeId};
29use crate::pagerank::{PageRankComputer, PageRankResults, PageRankConfig};
30use crate::statistics::{GraphStatisticsAnalyzer, GraphAnalysisResults};
31
32#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
34pub struct CentralityResults {
35 pub pagerank_scores: HashMap<NodeId, f64>,
37
38 pub graph_analysis: GraphAnalysisResults,
40
41 pub pagerank_details: PageRankResults,
43
44 pub import_stats: ImportDetectionStats,
46
47 pub integration_metadata: IntegrationMetadata,
49}
50
51#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
53pub struct ImportDetectionStats {
54 pub files_processed: usize,
56
57 pub imports_detected: usize,
59
60 pub imports_resolved: usize,
62
63 pub resolution_rate: f64,
65
66 pub language_breakdown: HashMap<String, usize>,
68
69 pub import_patterns: HashMap<String, ImportPatternStats>,
71}
72
73#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
75pub struct ImportPatternStats {
76 pub total_imports: usize,
78
79 pub relative_imports: usize,
81
82 pub absolute_imports: usize,
84
85 pub stdlib_imports: usize,
87
88 pub third_party_imports: usize,
90}
91
92#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
94pub struct IntegrationMetadata {
95 pub timestamp: chrono::DateTime<chrono::Utc>,
97
98 pub computation_time_ms: u64,
100
101 pub integration_successful: bool,
103
104 pub centrality_weight: f64,
106
107 pub files_with_centrality: usize,
109
110 pub config: CentralityConfig,
112}
113
114#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
116pub struct CentralityConfig {
117 pub pagerank_config: PageRankConfig,
119
120 pub analyze_graph_structure: bool,
122
123 pub import_resolution: ImportResolutionConfig,
125
126 pub integration: IntegrationConfig,
128}
129
130#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
132pub struct ImportResolutionConfig {
133 pub max_search_depth: usize,
135
136 pub resolve_relative_imports: bool,
138
139 pub resolve_absolute_imports: bool,
141
142 pub exclude_stdlib_imports: bool,
144
145 pub path_mappings: HashMap<String, String>,
147}
148
149#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
151pub struct IntegrationConfig {
152 pub centrality_weight: f64,
154
155 pub normalization_method: NormalizationMethod,
157
158 pub min_centrality_threshold: f64,
160
161 pub boost_entrypoints: bool,
163
164 pub entrypoint_boost_factor: f64,
166}
167
168#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
170pub enum NormalizationMethod {
171 MinMax,
173 ZScore,
175 Rank,
177 None,
179}
180
181impl Default for CentralityConfig {
182 fn default() -> Self {
183 Self {
184 pagerank_config: PageRankConfig::for_code_analysis(),
185 analyze_graph_structure: true,
186 import_resolution: ImportResolutionConfig::default(),
187 integration: IntegrationConfig::default(),
188 }
189 }
190}
191
192impl Default for ImportResolutionConfig {
193 fn default() -> Self {
194 Self {
195 max_search_depth: 3,
196 resolve_relative_imports: true,
197 resolve_absolute_imports: true,
198 exclude_stdlib_imports: true,
199 path_mappings: HashMap::new(),
200 }
201 }
202}
203
204impl Default for IntegrationConfig {
205 fn default() -> Self {
206 Self {
207 centrality_weight: 0.15, normalization_method: NormalizationMethod::MinMax,
209 min_centrality_threshold: 1e-6,
210 boost_entrypoints: true,
211 entrypoint_boost_factor: 1.5,
212 }
213 }
214}
215
216#[derive(Debug)]
218pub struct CentralityCalculator {
219 config: CentralityConfig,
221
222 pagerank_computer: PageRankComputer,
224
225 stats_analyzer: GraphStatisticsAnalyzer,
227
228 import_detector: ImportDetector,
230}
231
232impl CentralityCalculator {
233 pub fn new() -> Result<Self> {
235 let config = CentralityConfig::default();
236 Self::with_config(config)
237 }
238
239 pub fn with_config(config: CentralityConfig) -> Result<Self> {
241 let pagerank_computer = PageRankComputer::with_config(config.pagerank_config.clone())?;
242
243 let stats_analyzer = if config.analyze_graph_structure {
244 GraphStatisticsAnalyzer::new()
245 } else {
246 GraphStatisticsAnalyzer::for_large_graphs()
247 };
248
249 let import_detector = ImportDetector::with_config(config.import_resolution.clone());
250
251 Ok(Self {
252 config,
253 pagerank_computer,
254 stats_analyzer,
255 import_detector,
256 })
257 }
258
259 pub fn for_large_codebases() -> Result<Self> {
261 let config = CentralityConfig {
262 pagerank_config: PageRankConfig::for_large_codebases(),
263 analyze_graph_structure: false,
264 ..CentralityConfig::default()
265 };
266 Self::with_config(config)
267 }
268
269 pub fn calculate_centrality<T>(&self, scan_results: &[T]) -> Result<CentralityResults>
271 where
272 T: ScanResult + Sync,
273 {
274 let start_time = std::time::Instant::now();
275
276 let (graph, import_stats) = self.build_dependency_graph(scan_results)?;
278
279 let pagerank_results = self.pagerank_computer.compute(&graph)?;
281
282 let graph_analysis = if self.config.analyze_graph_structure {
284 self.stats_analyzer.analyze(&graph)?
285 } else {
286 self.create_minimal_analysis(&graph)?
288 };
289
290 let computation_time = start_time.elapsed().as_millis() as u64;
292 let integration_metadata = IntegrationMetadata {
293 timestamp: chrono::Utc::now(),
294 computation_time_ms: computation_time,
295 integration_successful: true,
296 centrality_weight: self.config.integration.centrality_weight,
297 files_with_centrality: pagerank_results.scores.len(),
298 config: self.config.clone(),
299 };
300
301 Ok(CentralityResults {
302 pagerank_scores: pagerank_results.scores.clone(),
303 graph_analysis,
304 pagerank_details: pagerank_results,
305 import_stats,
306 integration_metadata,
307 })
308 }
309
310 pub fn integrate_with_heuristics(
312 &self,
313 centrality_results: &CentralityResults,
314 heuristic_scores: &HashMap<String, f64>,
315 ) -> Result<HashMap<String, f64>> {
316 let normalized_centrality = self.normalize_centrality_scores(
317 ¢rality_results.pagerank_scores,
318 heuristic_scores,
319 )?;
320
321 let mut integrated_scores = HashMap::new();
322 let centrality_weight = self.config.integration.centrality_weight;
323 let heuristic_weight = 1.0 - centrality_weight;
324
325 for (file_path, heuristic_score) in heuristic_scores {
327 let centrality_score = normalized_centrality.get(file_path).copied().unwrap_or(0.0);
328
329 let boosted_centrality = if self.config.integration.boost_entrypoints
331 && self.is_entrypoint_file(file_path) {
332 centrality_score * self.config.integration.entrypoint_boost_factor
333 } else {
334 centrality_score
335 };
336
337 let integrated_score = heuristic_weight * heuristic_score +
338 centrality_weight * boosted_centrality;
339
340 integrated_scores.insert(file_path.clone(), integrated_score);
341 }
342
343 for (file_path, centrality_score) in &normalized_centrality {
345 if !integrated_scores.contains_key(file_path) {
346 let boosted_centrality = if self.config.integration.boost_entrypoints
347 && self.is_entrypoint_file(file_path) {
348 centrality_score * self.config.integration.entrypoint_boost_factor
349 } else {
350 *centrality_score
351 };
352
353 integrated_scores.insert(file_path.clone(), centrality_weight * boosted_centrality);
354 }
355 }
356
357 Ok(integrated_scores)
358 }
359
360 fn build_dependency_graph<T>(&self, scan_results: &[T]) -> Result<(DependencyGraph, ImportDetectionStats)>
362 where
363 T: ScanResult + Sync,
364 {
365 let mut graph = DependencyGraph::with_capacity(scan_results.len());
366
367 for result in scan_results {
369 graph.add_node(result.path().to_string())?;
370 }
371
372 let import_stats = if self.config.pagerank_config.use_parallel {
374 self.build_edges_parallel(&mut graph, scan_results)?
375 } else {
376 self.build_edges_sequential(&mut graph, scan_results)?
377 };
378
379 Ok((graph, import_stats))
380 }
381
382 fn build_edges_sequential<T>(&self, graph: &mut DependencyGraph, scan_results: &[T]) -> Result<ImportDetectionStats>
384 where
385 T: ScanResult,
386 {
387 let mut stats = ImportDetectionStats {
388 files_processed: 0,
389 imports_detected: 0,
390 imports_resolved: 0,
391 resolution_rate: 0.0,
392 language_breakdown: HashMap::new(),
393 import_patterns: HashMap::new(),
394 };
395
396 let file_path_map: HashMap<&str, &T> = scan_results.iter()
398 .map(|result| (result.path(), result))
399 .collect();
400
401 for result in scan_results {
402 stats.files_processed += 1;
403
404 if let Some(lang) = self.import_detector.detect_language(result.path()) {
406 *stats.language_breakdown.entry(lang.clone()).or_insert(0) += 1;
407 }
408
409 if let Some(imports) = result.imports() {
411 stats.imports_detected += imports.len();
412
413 for import_str in imports {
414 if let Some(resolved_path) = self.import_detector.resolve_import(
415 import_str,
416 result.path(),
417 &file_path_map
418 ) {
419 graph.add_edge(result.path().to_string(), resolved_path)?;
420 stats.imports_resolved += 1;
421 }
422 }
423 }
424 }
425
426 stats.resolution_rate = if stats.imports_detected > 0 {
427 stats.imports_resolved as f64 / stats.imports_detected as f64
428 } else {
429 0.0
430 };
431
432 Ok(stats)
433 }
434
435 fn build_edges_parallel<T>(&self, graph: &mut DependencyGraph, scan_results: &[T]) -> Result<ImportDetectionStats>
437 where
438 T: ScanResult + Sync,
439 {
440 let file_path_map: HashMap<&str, &T> = scan_results.iter()
442 .map(|result| (result.path(), result))
443 .collect();
444
445 let import_edges: Vec<_> = scan_results.par_iter()
447 .flat_map(|result| {
448 let mut edges = Vec::new();
449
450 if let Some(imports) = result.imports() {
451 for import_str in imports {
452 if let Some(resolved_path) = self.import_detector.resolve_import(
453 import_str,
454 result.path(),
455 &file_path_map
456 ) {
457 edges.push((result.path().to_string(), resolved_path));
458 }
459 }
460 }
461
462 edges
463 })
464 .collect();
465
466 for (from, to) in &import_edges {
468 graph.add_edge(from.clone(), to.clone())?;
469 }
470
471 let total_imports: usize = scan_results.iter()
473 .map(|result| result.imports().map_or(0, |imports| imports.len()))
474 .sum();
475
476 let language_breakdown: HashMap<String, usize> = scan_results.iter()
477 .filter_map(|result| self.import_detector.detect_language(result.path())
478 .map(|lang| (lang, 1)))
479 .fold(HashMap::new(), |mut acc, (lang, count)| {
480 *acc.entry(lang).or_insert(0) += count;
481 acc
482 });
483
484 let stats = ImportDetectionStats {
485 files_processed: scan_results.len(),
486 imports_detected: total_imports,
487 imports_resolved: import_edges.len(),
488 resolution_rate: if total_imports > 0 {
489 import_edges.len() as f64 / total_imports as f64
490 } else {
491 0.0
492 },
493 language_breakdown,
494 import_patterns: HashMap::new(), };
496
497 Ok(stats)
498 }
499
500 fn normalize_centrality_scores(
502 &self,
503 centrality_scores: &HashMap<String, f64>,
504 heuristic_scores: &HashMap<String, f64>,
505 ) -> Result<HashMap<String, f64>> {
506 if centrality_scores.is_empty() {
507 return Ok(HashMap::new());
508 }
509
510 match self.config.integration.normalization_method {
511 NormalizationMethod::MinMax => self.normalize_min_max(centrality_scores, heuristic_scores),
512 NormalizationMethod::ZScore => self.normalize_z_score(centrality_scores),
513 NormalizationMethod::Rank => self.normalize_rank(centrality_scores),
514 NormalizationMethod::None => Ok(centrality_scores.clone()),
515 }
516 }
517
518 fn normalize_min_max(
520 &self,
521 centrality_scores: &HashMap<String, f64>,
522 heuristic_scores: &HashMap<String, f64>,
523 ) -> Result<HashMap<String, f64>> {
524 let centrality_values: Vec<f64> = centrality_scores.values().copied().collect();
525 let min_centrality = centrality_values.iter().fold(f64::INFINITY, |a, &b| a.min(b));
526 let max_centrality = centrality_values.iter().fold(f64::NEG_INFINITY, |a, &b| a.max(b));
527
528 let heuristic_values: Vec<f64> = heuristic_scores.values().copied().collect();
530 let max_heuristic = if heuristic_values.is_empty() {
531 1.0
532 } else {
533 heuristic_values.iter().fold(f64::NEG_INFINITY, |a, &b| a.max(b))
534 };
535
536 let mut normalized = HashMap::new();
537
538 if (max_centrality - min_centrality).abs() < f64::EPSILON {
539 for (path, _) in centrality_scores {
541 normalized.insert(path.clone(), max_heuristic * 0.5); }
543 } else {
544 for (path, &score) in centrality_scores {
545 let normalized_score = ((score - min_centrality) / (max_centrality - min_centrality)) * max_heuristic;
546 if normalized_score >= self.config.integration.min_centrality_threshold {
547 normalized.insert(path.clone(), normalized_score);
548 }
549 }
550 }
551
552 Ok(normalized)
553 }
554
555 fn normalize_z_score(&self, centrality_scores: &HashMap<String, f64>) -> Result<HashMap<String, f64>> {
557 let values: Vec<f64> = centrality_scores.values().copied().collect();
558 let mean = values.iter().sum::<f64>() / values.len() as f64;
559 let variance = values.iter().map(|&x| (x - mean).powi(2)).sum::<f64>() / values.len() as f64;
560 let std_dev = variance.sqrt();
561
562 let mut normalized = HashMap::new();
563
564 if std_dev > f64::EPSILON {
565 for (path, &score) in centrality_scores {
566 let z_score = (score - mean) / std_dev;
567 let normalized_score = (z_score + 3.0) / 6.0; if normalized_score >= self.config.integration.min_centrality_threshold {
570 normalized.insert(path.clone(), normalized_score);
571 }
572 }
573 } else {
574 for (path, _) in centrality_scores {
576 normalized.insert(path.clone(), 0.5);
577 }
578 }
579
580 Ok(normalized)
581 }
582
583 fn normalize_rank(&self, centrality_scores: &HashMap<String, f64>) -> Result<HashMap<String, f64>> {
585 let mut scored_files: Vec<_> = centrality_scores.iter()
586 .map(|(path, &score)| (path.clone(), score))
587 .collect();
588
589 scored_files.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
590
591 let mut normalized = HashMap::new();
592 let total_files = scored_files.len();
593
594 for (rank, (path, _)) in scored_files.into_iter().enumerate() {
595 let normalized_score = 1.0 - (rank as f64 / total_files as f64);
596 if normalized_score >= self.config.integration.min_centrality_threshold {
597 normalized.insert(path, normalized_score);
598 }
599 }
600
601 Ok(normalized)
602 }
603
604 fn create_minimal_analysis(&self, graph: &DependencyGraph) -> Result<GraphAnalysisResults> {
606 let minimal_analyzer = GraphStatisticsAnalyzer::for_large_graphs();
608 minimal_analyzer.analyze(graph)
609 }
610
611 fn is_entrypoint_file(&self, file_path: &str) -> bool {
613 let path = Path::new(file_path);
614 let file_name = path.file_name()
615 .and_then(|name| name.to_str())
616 .unwrap_or("")
617 .to_lowercase();
618
619 matches!(file_name.as_str(),
620 "main.py" | "main.rs" | "main.go" | "main.js" | "main.ts" |
621 "index.py" | "index.rs" | "index.go" | "index.js" | "index.ts" |
622 "app.py" | "app.rs" | "app.go" | "app.js" | "app.ts" |
623 "server.py" | "server.rs" | "server.go" | "server.js" | "server.ts" |
624 "lib.rs" | "__init__.py"
625 )
626 }
627}
628
629impl Default for CentralityCalculator {
630 fn default() -> Self {
631 Self::new().expect("Failed to create CentralityCalculator")
632 }
633}
634
635#[derive(Debug, Clone)]
637pub struct ImportDetector {
638 config: ImportResolutionConfig,
639}
640
641impl ImportDetector {
642 pub fn with_config(config: ImportResolutionConfig) -> Self {
644 Self { config }
645 }
646
647 pub fn detect_language(&self, file_path: &str) -> Option<String> {
649 let path = Path::new(file_path);
650 let ext = path.extension()?.to_str()?.to_lowercase();
651
652 match ext.as_str() {
653 "py" => Some("python".to_string()),
654 "js" | "jsx" | "mjs" => Some("javascript".to_string()),
655 "ts" | "tsx" => Some("typescript".to_string()),
656 "rs" => Some("rust".to_string()),
657 "go" => Some("go".to_string()),
658 "java" | "kt" => Some("java".to_string()),
659 "cpp" | "cc" | "cxx" | "hpp" | "h" => Some("cpp".to_string()),
660 "c" => Some("c".to_string()),
661 "rb" => Some("ruby".to_string()),
662 "php" => Some("php".to_string()),
663 "cs" => Some("csharp".to_string()),
664 "swift" => Some("swift".to_string()),
665 _ => None,
666 }
667 }
668
669 pub fn resolve_import<T>(
671 &self,
672 import_str: &str,
673 current_file: &str,
674 file_map: &HashMap<&str, &T>,
675 ) -> Option<String>
676 where
677 T: ScanResult,
678 {
679 if let Some(mapped_path) = self.config.path_mappings.get(import_str) {
681 if file_map.contains_key(mapped_path.as_str()) {
682 return Some(mapped_path.clone());
683 }
684 }
685
686 let current_path = Path::new(current_file);
687 let language = self.detect_language(current_file);
688
689 match language.as_deref() {
690 Some("python") => self.resolve_python_import(import_str, current_path, file_map),
691 Some("javascript") | Some("typescript") => self.resolve_js_import(import_str, current_path, file_map),
692 Some("rust") => self.resolve_rust_import(import_str, current_path, file_map),
693 Some("go") => self.resolve_go_import(import_str, current_path, file_map),
694 _ => self.resolve_generic_import(import_str, current_path, file_map),
695 }
696 }
697
698 fn resolve_python_import<T>(
700 &self,
701 import_str: &str,
702 current_path: &Path,
703 file_map: &HashMap<&str, &T>,
704 ) -> Option<String>
705 where
706 T: ScanResult,
707 {
708 let cleaned_import = import_str.trim();
709
710 if self.config.exclude_stdlib_imports && self.is_python_stdlib(cleaned_import) {
712 return None;
713 }
714
715 let module_parts: Vec<&str> = cleaned_import.split('.').collect();
717
718 let mut candidates = Vec::new();
720
721 candidates.push(format!("{}.py", module_parts.join("/")));
723
724 candidates.push(format!("{}/__init__.py", module_parts.join("/")));
726
727 if let Some(parent) = current_path.parent() {
729 let parent_str = parent.to_string_lossy();
730 let relative_candidates: Vec<String> = candidates.iter()
731 .map(|candidate| format!("{}/{}", parent_str, candidate))
732 .collect();
733 candidates.extend(relative_candidates);
734 }
735
736 for candidate in &candidates {
738 if file_map.contains_key(candidate.as_str()) {
739 return Some(candidate.clone());
740 }
741 }
742
743 self.fuzzy_match_import(&module_parts, file_map)
745 }
746
747 fn resolve_js_import<T>(
749 &self,
750 import_str: &str,
751 current_path: &Path,
752 file_map: &HashMap<&str, &T>,
753 ) -> Option<String>
754 where
755 T: ScanResult,
756 {
757 let cleaned_import = import_str.trim();
758
759 if cleaned_import.starts_with("./") || cleaned_import.starts_with("../") {
761 if !self.config.resolve_relative_imports {
762 return None;
763 }
764
765 if let Some(parent) = current_path.parent() {
766 let resolved_path = parent.join(&cleaned_import[2..]); let resolved_str = resolved_path.to_string_lossy();
768
769 for ext in &[".js", ".ts", ".jsx", ".tsx", "/index.js", "/index.ts"] {
771 let candidate = format!("{}{}", resolved_str, ext);
772 if file_map.contains_key(candidate.as_str()) {
773 return Some(candidate);
774 }
775 }
776 }
777 }
778 else if self.config.resolve_absolute_imports {
780 let import_parts: Vec<&str> = cleaned_import.split('/').collect();
781 return self.fuzzy_match_import(&import_parts, file_map);
782 }
783
784 None
785 }
786
787 fn resolve_rust_import<T>(
789 &self,
790 import_str: &str,
791 _current_path: &Path,
792 file_map: &HashMap<&str, &T>,
793 ) -> Option<String>
794 where
795 T: ScanResult,
796 {
797 let cleaned_import = import_str.trim();
798
799 if self.config.exclude_stdlib_imports && self.is_rust_stdlib(cleaned_import) {
801 return None;
802 }
803
804 let parts: Vec<&str> = cleaned_import.split("::").collect();
805
806 let mut candidates = Vec::new();
808
809 candidates.push(format!("{}.rs", parts.join("/")));
811
812 candidates.push(format!("{}/mod.rs", parts.join("/")));
814
815 if parts.len() == 1 {
817 candidates.push(format!("{}/lib.rs", parts[0]));
818 candidates.push(format!("{}/src/lib.rs", parts[0]));
819 }
820
821 for candidate in &candidates {
823 if file_map.contains_key(candidate.as_str()) {
824 return Some(candidate.clone());
825 }
826 }
827
828 self.fuzzy_match_import(&parts, file_map)
830 }
831
832 fn resolve_go_import<T>(
834 &self,
835 import_str: &str,
836 _current_path: &Path,
837 file_map: &HashMap<&str, &T>,
838 ) -> Option<String>
839 where
840 T: ScanResult,
841 {
842 let cleaned_import = import_str.trim().trim_matches('"');
843
844 if self.config.exclude_stdlib_imports && !cleaned_import.contains('.') {
846 return None;
847 }
848
849 let parts: Vec<&str> = cleaned_import.split('/').collect();
850
851 let mut candidates = Vec::new();
853
854 candidates.push(format!("{}.go", parts.last()?));
856 candidates.push(format!("{}/main.go", cleaned_import));
857 candidates.push(format!("{}/{}.go", cleaned_import, parts.last()?));
858
859 for candidate in &candidates {
860 if file_map.contains_key(candidate.as_str()) {
861 return Some(candidate.clone());
862 }
863 }
864
865 self.fuzzy_match_import(&parts, file_map)
866 }
867
868 fn resolve_generic_import<T>(
870 &self,
871 import_str: &str,
872 _current_path: &Path,
873 file_map: &HashMap<&str, &T>,
874 ) -> Option<String>
875 where
876 T: ScanResult,
877 {
878 let cleaned_import = import_str.trim();
879 let parts: Vec<&str> = cleaned_import.split(&['/', '.', ':']).collect();
880 self.fuzzy_match_import(&parts, file_map)
881 }
882
883 fn fuzzy_match_import<T>(
885 &self,
886 import_parts: &[&str],
887 file_map: &HashMap<&str, &T>,
888 ) -> Option<String>
889 where
890 T: ScanResult,
891 {
892 if import_parts.is_empty() {
893 return None;
894 }
895
896 let last_part = import_parts.last()?.to_lowercase();
897
898 for &file_path in file_map.keys() {
900 let file_path_lower = file_path.to_lowercase();
901
902 if let Some(file_stem) = Path::new(file_path).file_stem() {
904 if let Some(stem_str) = file_stem.to_str() {
905 if stem_str.to_lowercase() == last_part {
906 return Some(file_path.to_string());
907 }
908 }
909 }
910
911 if import_parts.iter().all(|&part| file_path_lower.contains(&part.to_lowercase())) {
913 return Some(file_path.to_string());
914 }
915 }
916
917 None
918 }
919
920 fn is_python_stdlib(&self, import_str: &str) -> bool {
922 let stdlib_modules = [
923 "os", "sys", "re", "json", "collections", "itertools", "functools",
924 "typing", "datetime", "math", "random", "string", "pathlib", "io",
925 "csv", "xml", "html", "urllib", "http", "email", "logging", "unittest",
926 "asyncio", "concurrent", "multiprocessing", "threading", "subprocess",
927 ];
928
929 let first_part = import_str.split('.').next().unwrap_or(import_str);
930 stdlib_modules.contains(&first_part)
931 }
932
933 fn is_rust_stdlib(&self, import_str: &str) -> bool {
935 import_str.starts_with("std::") ||
936 import_str.starts_with("core::") ||
937 import_str.starts_with("alloc::")
938 }
939}
940
941impl CentralityResults {
943 pub fn top_files_by_centrality(&self, k: usize) -> Vec<(String, f64)> {
945 let mut scored_files: Vec<_> = self.pagerank_scores.iter()
946 .map(|(path, &score)| (path.clone(), score))
947 .collect();
948
949 scored_files.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
950 scored_files.into_iter().take(k).collect()
951 }
952
953 pub fn summary(&self) -> String {
955 format!(
956 "Centrality Analysis Summary:\n\
957 - Files with centrality scores: {}\n\
958 - PageRank iterations: {} (converged: {})\n\
959 - Graph: {} nodes, {} edges (density: {:.4})\n\
960 - Import resolution: {:.1}% ({}/{})\n\
961 - Top languages: {}\n\
962 - Computation time: {}ms\n\
963 - Integration weight: {:.2}",
964 self.pagerank_scores.len(),
965 self.pagerank_details.iterations_converged,
966 self.pagerank_details.converged(),
967 self.graph_analysis.basic_stats.total_nodes,
968 self.graph_analysis.basic_stats.total_edges,
969 self.graph_analysis.basic_stats.graph_density,
970 self.import_stats.resolution_rate * 100.0,
971 self.import_stats.imports_resolved,
972 self.import_stats.imports_detected,
973 self.import_stats.language_breakdown.iter()
974 .max_by_key(|(_, &count)| count)
975 .map(|(lang, count)| format!("{} ({})", lang, count))
976 .unwrap_or_else(|| "None".to_string()),
977 self.integration_metadata.computation_time_ms,
978 self.integration_metadata.centrality_weight,
979 )
980 }
981}
982
983#[cfg(test)]
984mod tests {
985 use super::*;
986 use scribe_analysis::heuristics::DocumentAnalysis;
987
988 #[derive(Debug, Clone)]
990 struct MockScanResult {
991 path: String,
992 relative_path: String,
993 depth: usize,
994 imports: Option<Vec<String>>,
995 is_docs: bool,
996 is_readme: bool,
997 is_test: bool,
998 is_entrypoint: bool,
999 has_examples: bool,
1000 priority_boost: f64,
1001 churn_score: f64,
1002 centrality_in: f64,
1003 doc_analysis: Option<DocumentAnalysis>,
1004 }
1005
1006 impl MockScanResult {
1007 fn new(path: &str) -> Self {
1008 Self {
1009 path: path.to_string(),
1010 relative_path: path.to_string(),
1011 depth: path.matches('/').count(),
1012 imports: None,
1013 is_docs: path.contains("doc") || path.ends_with(".md"),
1014 is_readme: path.to_lowercase().contains("readme"),
1015 is_test: path.contains("test"),
1016 is_entrypoint: path.contains("main") || path.contains("index"),
1017 has_examples: path.contains("example"),
1018 priority_boost: 0.0,
1019 churn_score: 0.5,
1020 centrality_in: 0.0,
1021 doc_analysis: Some(DocumentAnalysis::new()),
1022 }
1023 }
1024
1025 fn with_imports(mut self, imports: Vec<String>) -> Self {
1026 self.imports = Some(imports);
1027 self
1028 }
1029 }
1030
1031 impl ScanResult for MockScanResult {
1032 fn path(&self) -> &str { &self.path }
1033 fn relative_path(&self) -> &str { &self.relative_path }
1034 fn depth(&self) -> usize { self.depth }
1035 fn is_docs(&self) -> bool { self.is_docs }
1036 fn is_readme(&self) -> bool { self.is_readme }
1037 fn is_test(&self) -> bool { self.is_test }
1038 fn is_entrypoint(&self) -> bool { self.is_entrypoint }
1039 fn has_examples(&self) -> bool { self.has_examples }
1040 fn priority_boost(&self) -> f64 { self.priority_boost }
1041 fn churn_score(&self) -> f64 { self.churn_score }
1042 fn centrality_in(&self) -> f64 { self.centrality_in }
1043 fn imports(&self) -> Option<&[String]> { self.imports.as_deref() }
1044 fn doc_analysis(&self) -> Option<&DocumentAnalysis> { self.doc_analysis.as_ref() }
1045 }
1046
1047 #[test]
1048 fn test_centrality_calculator_creation() {
1049 let calculator = CentralityCalculator::new();
1050 assert!(calculator.is_ok());
1051
1052 let large_calc = CentralityCalculator::for_large_codebases();
1053 assert!(large_calc.is_ok());
1054 }
1055
1056 #[test]
1057 fn test_import_detection() {
1058 let detector = ImportDetector::with_config(ImportResolutionConfig::default());
1059
1060 assert_eq!(detector.detect_language("main.py"), Some("python".to_string()));
1062 assert_eq!(detector.detect_language("app.js"), Some("javascript".to_string()));
1063 assert_eq!(detector.detect_language("lib.rs"), Some("rust".to_string()));
1064
1065 assert!(detector.is_python_stdlib("os"));
1067 assert!(detector.is_python_stdlib("sys.path"));
1068 assert!(!detector.is_python_stdlib("custom_module"));
1069
1070 assert!(detector.is_rust_stdlib("std::collections::HashMap"));
1072 assert!(detector.is_rust_stdlib("core::fmt"));
1073 assert!(!detector.is_rust_stdlib("serde::Deserialize"));
1074 }
1075
1076 #[test]
1077 fn test_centrality_calculation() {
1078 let calculator = CentralityCalculator::new().unwrap();
1079
1080 let scan_results = vec![
1081 MockScanResult::new("main.py").with_imports(vec!["utils".to_string(), "config".to_string()]),
1082 MockScanResult::new("utils.py").with_imports(vec!["config".to_string()]),
1083 MockScanResult::new("config.py"),
1084 MockScanResult::new("test.py").with_imports(vec!["main".to_string()]),
1085 ];
1086
1087 let results = calculator.calculate_centrality(&scan_results).unwrap();
1088
1089 assert!(!results.pagerank_scores.is_empty());
1091 assert!(results.integration_metadata.integration_successful);
1092 assert_eq!(results.integration_metadata.files_with_centrality, results.pagerank_scores.len());
1093
1094 let config_score = results.pagerank_scores.get("config.py");
1096 assert!(config_score.is_some());
1097
1098 println!("Centrality scores:");
1099 for (file, score) in &results.pagerank_scores {
1100 println!(" {}: {:.6}", file, score);
1101 }
1102
1103 println!("\n{}", results.summary());
1104 }
1105
1106 #[test]
1107 fn test_heuristics_integration() {
1108 let calculator = CentralityCalculator::new().unwrap();
1109
1110 let scan_results = vec![
1111 MockScanResult::new("main.py").with_imports(vec!["utils".to_string()]),
1112 MockScanResult::new("utils.py"),
1113 ];
1114
1115 let centrality_results = calculator.calculate_centrality(&scan_results).unwrap();
1116
1117 let mut heuristic_scores = HashMap::new();
1119 heuristic_scores.insert("main.py".to_string(), 0.8);
1120 heuristic_scores.insert("utils.py".to_string(), 0.6);
1121
1122 let integrated_scores = calculator.integrate_with_heuristics(
1123 ¢rality_results,
1124 &heuristic_scores
1125 ).unwrap();
1126
1127 assert!(!integrated_scores.is_empty());
1128
1129 for (file, integrated_score) in &integrated_scores {
1131 let original_score = heuristic_scores.get(file).unwrap();
1132 println!("File {}: heuristic={:.3}, integrated={:.3}",
1133 file, original_score, integrated_score);
1134 }
1135 }
1136
1137 #[test]
1138 fn test_normalization_methods() {
1139 let calculator = CentralityCalculator::new().unwrap();
1140
1141 let centrality_scores = vec![
1142 ("file1".to_string(), 0.1),
1143 ("file2".to_string(), 0.3),
1144 ("file3".to_string(), 0.6),
1145 ("file4".to_string(), 1.0),
1146 ].into_iter().collect();
1147
1148 let heuristic_scores = vec![
1149 ("file1".to_string(), 0.5),
1150 ("file2".to_string(), 0.7),
1151 ("file3".to_string(), 0.9),
1152 ("file4".to_string(), 1.2),
1153 ].into_iter().collect();
1154
1155 let normalized = calculator.normalize_min_max(¢rality_scores, &heuristic_scores).unwrap();
1157 assert!(!normalized.is_empty());
1158
1159 let z_normalized = calculator.normalize_z_score(¢rality_scores).unwrap();
1161 assert!(!z_normalized.is_empty());
1162
1163 let rank_normalized = calculator.normalize_rank(¢rality_scores).unwrap();
1165 assert!(!rank_normalized.is_empty());
1166
1167 println!("Original scores: {:?}", centrality_scores);
1168 println!("Min-max normalized: {:?}", normalized);
1169 println!("Z-score normalized: {:?}", z_normalized);
1170 println!("Rank normalized: {:?}", rank_normalized);
1171 }
1172
1173 #[test]
1174 fn test_import_resolution() {
1175 let detector = ImportDetector::with_config(ImportResolutionConfig::default());
1176
1177 let scan_results = vec![
1179 MockScanResult::new("src/main.py"),
1180 MockScanResult::new("src/utils.py"),
1181 MockScanResult::new("src/config.py"),
1182 MockScanResult::new("tests/test_main.py"),
1183 ];
1184
1185 let file_map: HashMap<&str, &MockScanResult> = scan_results.iter()
1186 .map(|result| (result.path(), result))
1187 .collect();
1188
1189 let resolved = detector.resolve_import(
1191 "utils",
1192 "src/main.py",
1193 &file_map
1194 );
1195 assert!(resolved.is_some());
1196
1197 let resolved_config = detector.resolve_import(
1199 "src.config",
1200 "src/main.py",
1201 &file_map
1202 );
1203 assert!(resolved_config.is_some());
1205
1206 println!("Resolved imports:");
1207 if let Some(path) = resolved {
1208 println!(" utils -> {}", path);
1209 }
1210 if let Some(path) = resolved_config {
1211 println!(" src.config -> {}", path);
1212 }
1213 }
1214
1215 #[test]
1216 fn test_entrypoint_detection() {
1217 let calculator = CentralityCalculator::new().unwrap();
1218
1219 assert!(calculator.is_entrypoint_file("main.py"));
1220 assert!(calculator.is_entrypoint_file("src/main.rs"));
1221 assert!(calculator.is_entrypoint_file("index.js"));
1222 assert!(calculator.is_entrypoint_file("app.py"));
1223 assert!(calculator.is_entrypoint_file("lib.rs"));
1224 assert!(calculator.is_entrypoint_file("__init__.py"));
1225
1226 assert!(!calculator.is_entrypoint_file("utils.py"));
1227 assert!(!calculator.is_entrypoint_file("config.rs"));
1228 assert!(!calculator.is_entrypoint_file("helper.js"));
1229 }
1230
1231 #[test]
1232 fn test_top_files_by_centrality() {
1233 let mut pagerank_scores = HashMap::new();
1234 pagerank_scores.insert("file1.py".to_string(), 0.4);
1235 pagerank_scores.insert("file2.py".to_string(), 0.6);
1236 pagerank_scores.insert("file3.py".to_string(), 0.2);
1237 pagerank_scores.insert("file4.py".to_string(), 0.8);
1238
1239 let results = CentralityResults {
1240 pagerank_scores,
1241 graph_analysis: GraphAnalysisResults {
1242 basic_stats: crate::graph::GraphStatistics::empty(),
1243 degree_distribution: Default::default(),
1244 connectivity: Default::default(),
1245 structural_patterns: Default::default(),
1246 import_insights: Default::default(),
1247 performance_profile: Default::default(),
1248 analysis_metadata: Default::default(),
1249 },
1250 pagerank_details: PageRankResults {
1251 scores: HashMap::new(),
1252 iterations_converged: 10,
1253 convergence_epsilon: 1e-6,
1254 graph_stats: crate::graph::GraphStatistics::empty(),
1255 parameters: PageRankConfig::default(),
1256 performance_metrics: Default::default(),
1257 },
1258 import_stats: ImportDetectionStats {
1259 files_processed: 4,
1260 imports_detected: 0,
1261 imports_resolved: 0,
1262 resolution_rate: 0.0,
1263 language_breakdown: HashMap::new(),
1264 import_patterns: HashMap::new(),
1265 },
1266 integration_metadata: IntegrationMetadata {
1267 timestamp: chrono::Utc::now(),
1268 computation_time_ms: 100,
1269 integration_successful: true,
1270 centrality_weight: 0.15,
1271 files_with_centrality: 4,
1272 config: CentralityConfig::default(),
1273 },
1274 };
1275
1276 let top_files = results.top_files_by_centrality(2);
1277 assert_eq!(top_files.len(), 2);
1278 assert_eq!(top_files[0].0, "file4.py");
1279 assert_eq!(top_files[0].1, 0.8);
1280 assert_eq!(top_files[1].0, "file2.py");
1281 assert_eq!(top_files[1].1, 0.6);
1282 }
1283}