scribe_graph/
centrality.rs

1//! # Centrality Calculator with Heuristics Integration
2//!
3//! Main interface for PageRank centrality calculation and integration with the
4//! heuristic scoring system used by Scribe. This module provides the high-level API for:
5//!
6//! ## Key Features
7//! - **PageRank Centrality Computation**: Research-grade algorithm with convergence detection
8//! - **Import Graph Construction**: Builds dependency graphs from file scan results  
9//! - **Heuristics Integration**: Seamless integration with V2 scoring system
10//! - **Performance Optimization**: Efficient computation for large codebases
11//! - **Multi-language Support**: Import detection across programming languages
12//! - **Comprehensive Analysis**: Full graph statistics and structural insights
13//!
14//! ## Integration with Scribe Heuristics
15//! The centrality scores are integrated into the heuristic scoring formula:
16//! ```text
17//! final_score = Σ(weight_i × normalized_score_i) + priority_boost + template_boost
18//! ```
19//! Where `centrality_score` becomes a weighted component when V2 features are enabled.
20
21use rayon::prelude::*;
22use scribe_analysis::heuristics::ScanResult;
23use scribe_core::{file, Result};
24use serde::{Deserialize, Serialize};
25use std::collections::{HashMap, HashSet};
26use std::path::{Path, PathBuf};
27
28use crate::graph::{DependencyGraph, NodeId};
29use crate::pagerank::{PageRankComputer, PageRankConfig, PageRankResults};
30use crate::statistics::{GraphAnalysisResults, GraphStatisticsAnalyzer};
31
32/// Complete centrality calculation results with comprehensive metadata
33#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
34pub struct CentralityResults {
35    /// PageRank scores (file path -> centrality score)
36    pub pagerank_scores: HashMap<NodeId, f64>,
37
38    /// Graph analysis results
39    pub graph_analysis: GraphAnalysisResults,
40
41    /// PageRank computation details
42    pub pagerank_details: PageRankResults,
43
44    /// Import detection statistics
45    pub import_stats: ImportDetectionStats,
46
47    /// Integration metadata
48    pub integration_metadata: IntegrationMetadata,
49}
50
51/// Statistics about import detection and graph construction
52#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
53pub struct ImportDetectionStats {
54    /// Number of files processed for import detection
55    pub files_processed: usize,
56
57    /// Number of import relationships detected
58    pub imports_detected: usize,
59
60    /// Number of resolved imports (mapped to actual files)
61    pub imports_resolved: usize,
62
63    /// Import resolution success rate
64    pub resolution_rate: f64,
65
66    /// Language breakdown of processed files
67    pub language_breakdown: HashMap<String, usize>,
68
69    /// Import patterns by language
70    pub import_patterns: HashMap<String, ImportPatternStats>,
71}
72
73/// Import pattern statistics for a specific language
74#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
75pub struct ImportPatternStats {
76    /// Total imports found
77    pub total_imports: usize,
78
79    /// Relative imports (./,../)
80    pub relative_imports: usize,
81
82    /// Absolute imports
83    pub absolute_imports: usize,
84
85    /// Standard library imports
86    pub stdlib_imports: usize,
87
88    /// Third-party imports
89    pub third_party_imports: usize,
90}
91
92/// Metadata about centrality-heuristics integration
93#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
94pub struct IntegrationMetadata {
95    /// When the analysis was performed
96    pub timestamp: chrono::DateTime<chrono::Utc>,
97
98    /// Total computation time
99    pub computation_time_ms: u64,
100
101    /// Whether centrality was successfully integrated
102    pub integration_successful: bool,
103
104    /// Centrality weight used in integration
105    pub centrality_weight: f64,
106
107    /// Number of files with centrality scores
108    pub files_with_centrality: usize,
109
110    /// Configuration used
111    pub config: CentralityConfig,
112}
113
114/// Configuration for centrality calculation
115#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
116pub struct CentralityConfig {
117    /// PageRank algorithm configuration
118    pub pagerank_config: PageRankConfig,
119
120    /// Whether to perform expensive graph analysis
121    pub analyze_graph_structure: bool,
122
123    /// Import resolution configuration
124    pub import_resolution: ImportResolutionConfig,
125
126    /// Integration parameters
127    pub integration: IntegrationConfig,
128}
129
130/// Configuration for import resolution
131#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
132pub struct ImportResolutionConfig {
133    /// Maximum search depth for import resolution
134    pub max_search_depth: usize,
135
136    /// Whether to resolve relative imports
137    pub resolve_relative_imports: bool,
138
139    /// Whether to resolve absolute imports
140    pub resolve_absolute_imports: bool,
141
142    /// Whether to exclude standard library imports
143    pub exclude_stdlib_imports: bool,
144
145    /// Custom import path mappings
146    pub path_mappings: HashMap<String, String>,
147}
148
149/// Configuration for heuristics integration
150#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
151pub struct IntegrationConfig {
152    /// Weight for centrality in final score
153    pub centrality_weight: f64,
154
155    /// Normalization method for centrality scores
156    pub normalization_method: NormalizationMethod,
157
158    /// Minimum centrality score threshold
159    pub min_centrality_threshold: f64,
160
161    /// Whether to boost entrypoint centrality
162    pub boost_entrypoints: bool,
163
164    /// Entrypoint boost factor
165    pub entrypoint_boost_factor: f64,
166}
167
168/// Methods for normalizing centrality scores
169#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
170pub enum NormalizationMethod {
171    /// Normalize to \[0,1\] range
172    MinMax,
173    /// Z-score normalization
174    ZScore,
175    /// Rank-based normalization
176    Rank,
177    /// No normalization
178    None,
179}
180
181impl Default for CentralityConfig {
182    fn default() -> Self {
183        Self {
184            pagerank_config: PageRankConfig::for_code_analysis(),
185            analyze_graph_structure: true,
186            import_resolution: ImportResolutionConfig::default(),
187            integration: IntegrationConfig::default(),
188        }
189    }
190}
191
192impl Default for ImportResolutionConfig {
193    fn default() -> Self {
194        Self {
195            max_search_depth: 3,
196            resolve_relative_imports: true,
197            resolve_absolute_imports: true,
198            exclude_stdlib_imports: true,
199            path_mappings: HashMap::new(),
200        }
201    }
202}
203
204impl Default for IntegrationConfig {
205    fn default() -> Self {
206        Self {
207            centrality_weight: 0.15, // 15% weight in V2 scoring
208            normalization_method: NormalizationMethod::MinMax,
209            min_centrality_threshold: 1e-6,
210            boost_entrypoints: true,
211            entrypoint_boost_factor: 1.5,
212        }
213    }
214}
215
216/// Main centrality calculator with heuristics integration
217#[derive(Debug)]
218pub struct CentralityCalculator {
219    /// Configuration
220    config: CentralityConfig,
221
222    /// PageRank computer
223    pagerank_computer: PageRankComputer,
224
225    /// Graph statistics analyzer
226    stats_analyzer: GraphStatisticsAnalyzer,
227
228    /// Import detector
229    import_detector: ImportDetector,
230}
231
232impl CentralityCalculator {
233    /// Create a new centrality calculator with default configuration
234    pub fn new() -> Result<Self> {
235        let config = CentralityConfig::default();
236        Self::with_config(config)
237    }
238
239    /// Create with custom configuration
240    pub fn with_config(config: CentralityConfig) -> Result<Self> {
241        let pagerank_computer = PageRankComputer::with_config(config.pagerank_config.clone())?;
242
243        let stats_analyzer = if config.analyze_graph_structure {
244            GraphStatisticsAnalyzer::new()
245        } else {
246            GraphStatisticsAnalyzer::for_large_graphs()
247        };
248
249        let import_detector = ImportDetector::with_config(config.import_resolution.clone());
250
251        Ok(Self {
252            config,
253            pagerank_computer,
254            stats_analyzer,
255            import_detector,
256        })
257    }
258
259    /// Create optimized for large codebases
260    pub fn for_large_codebases() -> Result<Self> {
261        let config = CentralityConfig {
262            pagerank_config: PageRankConfig::for_large_codebases(),
263            analyze_graph_structure: false,
264            ..CentralityConfig::default()
265        };
266        Self::with_config(config)
267    }
268
269    /// Calculate centrality scores for a collection of scan results
270    pub fn calculate_centrality<T>(&self, scan_results: &[T]) -> Result<CentralityResults>
271    where
272        T: ScanResult + Sync,
273    {
274        let start_time = std::time::Instant::now();
275
276        // Build dependency graph from scan results
277        let (graph, import_stats) = self.build_dependency_graph(scan_results)?;
278
279        // Compute PageRank centrality
280        let pagerank_results = self.pagerank_computer.compute(&graph)?;
281
282        // Perform graph analysis if enabled
283        let graph_analysis = if self.config.analyze_graph_structure {
284            self.stats_analyzer.analyze(&graph)?
285        } else {
286            // Create minimal analysis for large graphs
287            self.create_minimal_analysis(&graph)?
288        };
289
290        // Create integration metadata
291        let computation_time = start_time.elapsed().as_millis() as u64;
292        let integration_metadata = IntegrationMetadata {
293            timestamp: chrono::Utc::now(),
294            computation_time_ms: computation_time,
295            integration_successful: true,
296            centrality_weight: self.config.integration.centrality_weight,
297            files_with_centrality: pagerank_results.scores.len(),
298            config: self.config.clone(),
299        };
300
301        Ok(CentralityResults {
302            pagerank_scores: pagerank_results.scores.clone(),
303            graph_analysis,
304            pagerank_details: pagerank_results,
305            import_stats,
306            integration_metadata,
307        })
308    }
309
310    /// Integrate centrality scores with existing heuristic scores
311    pub fn integrate_with_heuristics(
312        &self,
313        centrality_results: &CentralityResults,
314        heuristic_scores: &HashMap<String, f64>,
315    ) -> Result<HashMap<String, f64>> {
316        let normalized_centrality = self
317            .normalize_centrality_scores(&centrality_results.pagerank_scores, heuristic_scores)?;
318
319        let mut integrated_scores = HashMap::new();
320        let centrality_weight = self.config.integration.centrality_weight;
321        let heuristic_weight = 1.0 - centrality_weight;
322
323        // Combine heuristic and centrality scores
324        for (file_path, heuristic_score) in heuristic_scores {
325            let centrality_score = normalized_centrality.get(file_path).copied().unwrap_or(0.0);
326
327            // Apply entrypoint boost if configured
328            let boosted_centrality = if self.config.integration.boost_entrypoints
329                && self.is_entrypoint_file(file_path)
330            {
331                centrality_score * self.config.integration.entrypoint_boost_factor
332            } else {
333                centrality_score
334            };
335
336            let integrated_score =
337                heuristic_weight * heuristic_score + centrality_weight * boosted_centrality;
338
339            integrated_scores.insert(file_path.clone(), integrated_score);
340        }
341
342        // Add centrality-only files (not in heuristic scores)
343        for (file_path, centrality_score) in &normalized_centrality {
344            if !integrated_scores.contains_key(file_path) {
345                let boosted_centrality = if self.config.integration.boost_entrypoints
346                    && self.is_entrypoint_file(file_path)
347                {
348                    centrality_score * self.config.integration.entrypoint_boost_factor
349                } else {
350                    *centrality_score
351                };
352
353                integrated_scores.insert(file_path.clone(), centrality_weight * boosted_centrality);
354            }
355        }
356
357        Ok(integrated_scores)
358    }
359
360    /// Build dependency graph from scan results
361    fn build_dependency_graph<T>(
362        &self,
363        scan_results: &[T],
364    ) -> Result<(DependencyGraph, ImportDetectionStats)>
365    where
366        T: ScanResult + Sync,
367    {
368        let mut graph = DependencyGraph::with_capacity(scan_results.len());
369
370        // Create optimized import detector with pre-computed lookup maps
371        let mut optimized_detector =
372            ImportDetector::with_file_index(self.import_detector.config.clone(), scan_results);
373
374        // Add all files as nodes first
375        for result in scan_results {
376            graph.add_node(result.path().to_string())?;
377        }
378
379        // Detect imports and build edges using optimized detector
380        let import_stats = if self.config.pagerank_config.use_parallel {
381            self.build_edges_parallel_optimized(&mut graph, scan_results, &optimized_detector)?
382        } else {
383            self.build_edges_sequential_optimized(&mut graph, scan_results, &optimized_detector)?
384        };
385
386        Ok((graph, import_stats))
387    }
388
389    /// Build graph edges sequentially - OPTIMIZED
390    fn build_edges_sequential_optimized<T>(
391        &self,
392        graph: &mut DependencyGraph,
393        scan_results: &[T],
394        optimized_detector: &ImportDetector,
395    ) -> Result<ImportDetectionStats>
396    where
397        T: ScanResult,
398    {
399        let mut stats = ImportDetectionStats {
400            files_processed: 0,
401            imports_detected: 0,
402            imports_resolved: 0,
403            resolution_rate: 0.0,
404            language_breakdown: HashMap::new(),
405            import_patterns: HashMap::new(),
406        };
407
408        // Create file path lookup for resolution
409        let file_path_map: HashMap<&str, &T> = scan_results
410            .iter()
411            .map(|result| (result.path(), result))
412            .collect();
413
414        for result in scan_results {
415            stats.files_processed += 1;
416
417            // Track language
418            if let Some(lang) = optimized_detector.detect_language(result.path()) {
419                *stats.language_breakdown.entry(lang.clone()).or_insert(0) += 1;
420            }
421
422            // Extract and resolve imports using optimized detector
423            if let Some(imports) = result.imports() {
424                stats.imports_detected += imports.len();
425
426                for import_str in imports {
427                    if let Some(resolved_path) =
428                        optimized_detector.resolve_import(import_str, result.path(), &file_path_map)
429                    {
430                        graph.add_edge(result.path().to_string(), resolved_path)?;
431                        stats.imports_resolved += 1;
432                    }
433                }
434            }
435        }
436
437        stats.resolution_rate = if stats.imports_detected > 0 {
438            stats.imports_resolved as f64 / stats.imports_detected as f64
439        } else {
440            0.0
441        };
442
443        Ok(stats)
444    }
445
446    /// Build graph edges sequentially - LEGACY
447    fn build_edges_sequential<T>(
448        &self,
449        graph: &mut DependencyGraph,
450        scan_results: &[T],
451    ) -> Result<ImportDetectionStats>
452    where
453        T: ScanResult,
454    {
455        let optimized_detector =
456            ImportDetector::with_file_index(self.import_detector.config.clone(), scan_results);
457        self.build_edges_sequential_optimized(graph, scan_results, &optimized_detector)
458    }
459
460    /// Build graph edges in parallel - OPTIMIZED
461    fn build_edges_parallel_optimized<T>(
462        &self,
463        graph: &mut DependencyGraph,
464        scan_results: &[T],
465        optimized_detector: &ImportDetector,
466    ) -> Result<ImportDetectionStats>
467    where
468        T: ScanResult + Sync,
469    {
470        // Create file path lookup
471        let file_path_map: HashMap<&str, &T> = scan_results
472            .iter()
473            .map(|result| (result.path(), result))
474            .collect();
475
476        // Process imports in parallel using optimized detector
477        let import_edges: Vec<_> = scan_results
478            .par_iter()
479            .flat_map(|result| {
480                let mut edges = Vec::new();
481
482                if let Some(imports) = result.imports() {
483                    for import_str in imports {
484                        if let Some(resolved_path) = optimized_detector.resolve_import(
485                            import_str,
486                            result.path(),
487                            &file_path_map,
488                        ) {
489                            edges.push((result.path().to_string(), resolved_path));
490                        }
491                    }
492                }
493
494                edges
495            })
496            .collect();
497
498        // Add edges to graph
499        for (from, to) in &import_edges {
500            graph.add_edge(from.clone(), to.clone())?;
501        }
502
503        // Calculate statistics
504        let total_imports: usize = scan_results
505            .iter()
506            .map(|result| result.imports().map_or(0, |imports| imports.len()))
507            .sum();
508
509        let language_breakdown: HashMap<String, usize> = scan_results
510            .iter()
511            .filter_map(|result| {
512                optimized_detector
513                    .detect_language(result.path())
514                    .map(|lang| (lang, 1))
515            })
516            .fold(HashMap::new(), |mut acc, (lang, count)| {
517                *acc.entry(lang).or_insert(0) += count;
518                acc
519            });
520
521        let stats = ImportDetectionStats {
522            files_processed: scan_results.len(),
523            imports_detected: total_imports,
524            imports_resolved: import_edges.len(),
525            resolution_rate: if total_imports > 0 {
526                import_edges.len() as f64 / total_imports as f64
527            } else {
528                0.0
529            },
530            language_breakdown,
531            import_patterns: HashMap::new(), // TODO: Implement detailed pattern analysis
532        };
533
534        Ok(stats)
535    }
536
537    /// Build graph edges in parallel - LEGACY
538    fn build_edges_parallel<T>(
539        &self,
540        graph: &mut DependencyGraph,
541        scan_results: &[T],
542    ) -> Result<ImportDetectionStats>
543    where
544        T: ScanResult + Sync,
545    {
546        let optimized_detector =
547            ImportDetector::with_file_index(self.import_detector.config.clone(), scan_results);
548        self.build_edges_parallel_optimized(graph, scan_results, &optimized_detector)
549    }
550
551    /// Normalize centrality scores for integration with heuristics
552    fn normalize_centrality_scores(
553        &self,
554        centrality_scores: &HashMap<String, f64>,
555        heuristic_scores: &HashMap<String, f64>,
556    ) -> Result<HashMap<String, f64>> {
557        if centrality_scores.is_empty() {
558            return Ok(HashMap::new());
559        }
560
561        match self.config.integration.normalization_method {
562            NormalizationMethod::MinMax => {
563                self.normalize_min_max(centrality_scores, heuristic_scores)
564            }
565            NormalizationMethod::ZScore => self.normalize_z_score(centrality_scores),
566            NormalizationMethod::Rank => self.normalize_rank(centrality_scores),
567            NormalizationMethod::None => Ok(centrality_scores.clone()),
568        }
569    }
570
571    /// Min-max normalization to match heuristic score range
572    fn normalize_min_max(
573        &self,
574        centrality_scores: &HashMap<String, f64>,
575        heuristic_scores: &HashMap<String, f64>,
576    ) -> Result<HashMap<String, f64>> {
577        let centrality_values: Vec<f64> = centrality_scores.values().copied().collect();
578        let min_centrality = centrality_values
579            .iter()
580            .fold(f64::INFINITY, |a, &b| a.min(b));
581        let max_centrality = centrality_values
582            .iter()
583            .fold(f64::NEG_INFINITY, |a, &b| a.max(b));
584
585        // Target range based on heuristic scores
586        let heuristic_values: Vec<f64> = heuristic_scores.values().copied().collect();
587        let max_heuristic = if heuristic_values.is_empty() {
588            1.0
589        } else {
590            heuristic_values
591                .iter()
592                .fold(f64::NEG_INFINITY, |a, &b| a.max(b))
593        };
594
595        let mut normalized = HashMap::new();
596
597        if (max_centrality - min_centrality).abs() < f64::EPSILON {
598            // All scores are the same
599            for (path, _) in centrality_scores {
600                normalized.insert(path.clone(), max_heuristic * 0.5); // Use half of max heuristic
601            }
602        } else {
603            for (path, &score) in centrality_scores {
604                let normalized_score =
605                    ((score - min_centrality) / (max_centrality - min_centrality)) * max_heuristic;
606                if normalized_score >= self.config.integration.min_centrality_threshold {
607                    normalized.insert(path.clone(), normalized_score);
608                }
609            }
610        }
611
612        Ok(normalized)
613    }
614
615    /// Z-score normalization
616    fn normalize_z_score(
617        &self,
618        centrality_scores: &HashMap<String, f64>,
619    ) -> Result<HashMap<String, f64>> {
620        let values: Vec<f64> = centrality_scores.values().copied().collect();
621        let mean = values.iter().sum::<f64>() / values.len() as f64;
622        let variance =
623            values.iter().map(|&x| (x - mean).powi(2)).sum::<f64>() / values.len() as f64;
624        let std_dev = variance.sqrt();
625
626        let mut normalized = HashMap::new();
627
628        if std_dev > f64::EPSILON {
629            for (path, &score) in centrality_scores {
630                let z_score = (score - mean) / std_dev;
631                // Shift and scale to positive range
632                let normalized_score = (z_score + 3.0) / 6.0; // Roughly [0,1] for most values
633                if normalized_score >= self.config.integration.min_centrality_threshold {
634                    normalized.insert(path.clone(), normalized_score);
635                }
636            }
637        } else {
638            // All scores are the same
639            for (path, _) in centrality_scores {
640                normalized.insert(path.clone(), 0.5);
641            }
642        }
643
644        Ok(normalized)
645    }
646
647    /// Rank-based normalization
648    fn normalize_rank(
649        &self,
650        centrality_scores: &HashMap<String, f64>,
651    ) -> Result<HashMap<String, f64>> {
652        let mut scored_files: Vec<_> = centrality_scores
653            .iter()
654            .map(|(path, &score)| (path.clone(), score))
655            .collect();
656
657        scored_files.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
658
659        let mut normalized = HashMap::new();
660        let total_files = scored_files.len();
661
662        for (rank, (path, _)) in scored_files.into_iter().enumerate() {
663            let normalized_score = 1.0 - (rank as f64 / total_files as f64);
664            if normalized_score >= self.config.integration.min_centrality_threshold {
665                normalized.insert(path, normalized_score);
666            }
667        }
668
669        Ok(normalized)
670    }
671
672    /// Create minimal analysis for large graphs (performance optimization)
673    fn create_minimal_analysis(&self, graph: &DependencyGraph) -> Result<GraphAnalysisResults> {
674        // Use a simplified analyzer for large graphs
675        let minimal_analyzer = GraphStatisticsAnalyzer::for_large_graphs();
676        minimal_analyzer.analyze(graph)
677    }
678
679    /// Check if a file is an entrypoint
680    fn is_entrypoint_file(&self, file_path: &str) -> bool {
681        let path = Path::new(file_path);
682        let language = file::detect_language_from_path(path);
683        file::is_entrypoint_path(path, &language)
684    }
685}
686
687impl Default for CentralityCalculator {
688    fn default() -> Self {
689        Self::new().expect("Failed to create CentralityCalculator")
690    }
691}
692
693/// Import detection and resolution engine with pre-computed lookup optimization
694#[derive(Debug, Clone)]
695pub struct ImportDetector {
696    config: ImportResolutionConfig,
697    /// Pre-computed lookup map: file stem -> full paths (massive performance improvement)
698    stem_to_paths: HashMap<String, Vec<String>>,
699    /// Pre-computed lookup map: filename -> full paths
700    filename_to_paths: HashMap<String, Vec<String>>,
701    /// Set of all available file paths for quick existence checks
702    available_paths: HashSet<String>,
703}
704
705const PYTHON_FILE_EXTENSIONS: &[&str] = &["py"];
706const PYTHON_SUFFIXES: &[&str] = &[".py"];
707const JS_FILE_EXTENSIONS: &[&str] = &["js", "jsx", "ts", "tsx", "mjs", "cjs"];
708const JS_SUFFIXES: &[&str] = &[".js", ".jsx", ".ts", ".tsx", ".mjs", ".cjs"];
709const RUST_FILE_EXTENSIONS: &[&str] = &["rs"];
710const RUST_SUFFIXES: &[&str] = &[".rs"];
711
712fn strip_known_suffix<'a>(value: &'a str, suffixes: &[&str]) -> &'a str {
713    for suffix in suffixes {
714        if value.ends_with(suffix) {
715            return &value[..value.len() - suffix.len()];
716        }
717    }
718    value
719}
720
721impl ImportDetector {
722    /// Create with configuration
723    pub fn with_config(config: ImportResolutionConfig) -> Self {
724        Self {
725            config,
726            stem_to_paths: HashMap::new(),
727            filename_to_paths: HashMap::new(),
728            available_paths: HashSet::new(),
729        }
730    }
731
732    /// Create with pre-computed lookup maps for massive performance improvement
733    pub fn with_file_index<T>(config: ImportResolutionConfig, scan_results: &[T]) -> Self
734    where
735        T: ScanResult,
736    {
737        let mut detector = Self::with_config(config);
738        detector.build_lookup_maps(scan_results);
739        detector
740    }
741
742    /// Build inverted index mapping file stems/names to full paths
743    /// This eliminates the O(n) scan-all-files bottleneck
744    fn build_lookup_maps<T>(&mut self, scan_results: &[T])
745    where
746        T: ScanResult,
747    {
748        self.stem_to_paths.clear();
749        self.filename_to_paths.clear();
750        self.available_paths.clear();
751
752        for result in scan_results {
753            let full_path = result.path().to_string();
754            self.available_paths.insert(full_path.clone());
755
756            let path = Path::new(result.path());
757
758            // Index by file stem (name without extension)
759            if let Some(stem) = path.file_stem().and_then(|s| s.to_str()) {
760                let stem_lower = stem.to_lowercase();
761                self.stem_to_paths
762                    .entry(stem_lower)
763                    .or_insert_with(Vec::new)
764                    .push(full_path.clone());
765            }
766
767            // Index by full filename
768            if let Some(filename) = path.file_name().and_then(|s| s.to_str()) {
769                let filename_lower = filename.to_lowercase();
770                self.filename_to_paths
771                    .entry(filename_lower)
772                    .or_insert_with(Vec::new)
773                    .push(full_path);
774            }
775        }
776    }
777
778    /// Detect programming language from file extension
779    pub fn detect_language(&self, file_path: &str) -> Option<String> {
780        let path = Path::new(file_path);
781        let ext = path.extension()?.to_str()?.to_lowercase();
782
783        match ext.as_str() {
784            "py" => Some("python".to_string()),
785            "js" | "jsx" | "mjs" => Some("javascript".to_string()),
786            "ts" | "tsx" => Some("typescript".to_string()),
787            "rs" => Some("rust".to_string()),
788            "go" => Some("go".to_string()),
789            "java" | "kt" => Some("java".to_string()),
790            "cpp" | "cc" | "cxx" | "hpp" | "h" => Some("cpp".to_string()),
791            "c" => Some("c".to_string()),
792            "rb" => Some("ruby".to_string()),
793            "php" => Some("php".to_string()),
794            "cs" => Some("csharp".to_string()),
795            "swift" => Some("swift".to_string()),
796            _ => None,
797        }
798    }
799
800    /// Resolve import string to actual file path
801    pub fn resolve_import<T>(
802        &self,
803        import_str: &str,
804        current_file: &str,
805        file_map: &HashMap<&str, &T>,
806    ) -> Option<String>
807    where
808        T: ScanResult,
809    {
810        // Check custom path mappings first
811        if let Some(mapped_path) = self.config.path_mappings.get(import_str) {
812            if file_map.contains_key(mapped_path.as_str()) {
813                return Some(mapped_path.clone());
814            }
815        }
816
817        let current_path = Path::new(current_file);
818        let language = self.detect_language(current_file);
819
820        match language.as_deref() {
821            Some("python") => self.resolve_python_import(import_str, current_path, file_map),
822            Some("javascript") | Some("typescript") => {
823                self.resolve_js_import(import_str, current_path, file_map)
824            }
825            Some("rust") => self.resolve_rust_import(import_str, current_path, file_map),
826            Some("go") => self.resolve_go_import(import_str, current_path, file_map),
827            _ => self.resolve_generic_import(import_str, current_path, file_map),
828        }
829    }
830
831    /// Resolve Python import
832    fn resolve_python_import<T>(
833        &self,
834        import_str: &str,
835        current_path: &Path,
836        file_map: &HashMap<&str, &T>,
837    ) -> Option<String>
838    where
839        T: ScanResult,
840    {
841        let cleaned_import = import_str.trim();
842        if cleaned_import.is_empty() {
843            return None;
844        }
845
846        if self.config.exclude_stdlib_imports && self.is_python_stdlib(cleaned_import) {
847            return None;
848        }
849
850        let mut module = cleaned_import;
851        if let Some(alias_index) = module.find(" as ") {
852            module = &module[..alias_index];
853        }
854
855        let mut base_dir = current_path.parent().unwrap_or(current_path).to_path_buf();
856        let mut relative_levels = 0;
857        while module.starts_with('.') {
858            relative_levels += 1;
859            module = &module[1..];
860        }
861
862        for _ in 0..relative_levels {
863            if let Some(parent) = base_dir.parent() {
864                base_dir = parent.to_path_buf();
865            }
866        }
867
868        module = module.trim();
869        let module = strip_known_suffix(module, PYTHON_SUFFIXES);
870        let module_parts: Vec<&str> = if module.is_empty() {
871            Vec::new()
872        } else {
873            module.split('.').filter(|part| !part.is_empty()).collect()
874        };
875
876        if !module_parts.is_empty() {
877            if let Some(resolved) = self.resolve_relative_python(&base_dir, &module_parts, file_map)
878            {
879                return Some(resolved);
880            }
881        }
882
883        if module_parts.is_empty() {
884            return None;
885        }
886
887        self.find_module_candidate(&module_parts, PYTHON_FILE_EXTENSIONS)
888    }
889
890    /// Resolve JavaScript/TypeScript import
891    fn resolve_js_import<T>(
892        &self,
893        import_str: &str,
894        current_path: &Path,
895        file_map: &HashMap<&str, &T>,
896    ) -> Option<String>
897    where
898        T: ScanResult,
899    {
900        let cleaned_import = import_str.trim();
901        if cleaned_import.is_empty() {
902            return None;
903        }
904
905        let parent_dir = current_path.parent().unwrap_or(current_path);
906
907        if cleaned_import.starts_with("./") || cleaned_import.starts_with("../") {
908            if !self.config.resolve_relative_imports {
909                return None;
910            }
911
912            if let Some(resolved) = self.resolve_relative_js(parent_dir, cleaned_import, file_map) {
913                return Some(resolved);
914            }
915        } else {
916            // Attempt to resolve within the same directory first
917            if let Some(resolved) = self.resolve_relative_js(parent_dir, cleaned_import, file_map) {
918                return Some(resolved);
919            }
920
921            if !self.config.resolve_absolute_imports {
922                return None;
923            }
924
925            let normalized = strip_known_suffix(cleaned_import, JS_SUFFIXES);
926            let module_parts: Vec<&str> = normalized
927                .split('/')
928                .filter(|segment| !segment.is_empty())
929                .collect();
930
931            if module_parts.is_empty() {
932                return None;
933            }
934
935            return self.find_module_candidate(&module_parts, JS_FILE_EXTENSIONS);
936        }
937
938        None
939    }
940
941    /// Resolve Rust import (use/mod statements)
942    fn resolve_rust_import<T>(
943        &self,
944        import_str: &str,
945        current_path: &Path,
946        file_map: &HashMap<&str, &T>,
947    ) -> Option<String>
948    where
949        T: ScanResult,
950    {
951        let cleaned_import = import_str.trim();
952        if cleaned_import.is_empty() {
953            return None;
954        }
955
956        if self.config.exclude_stdlib_imports && self.is_rust_stdlib(cleaned_import) {
957            return None;
958        }
959
960        let mut module = cleaned_import;
961
962        if let Some(stripped) = module.strip_prefix("crate::") {
963            module = stripped;
964        }
965
966        while let Some(stripped) = module.strip_prefix("self::") {
967            module = stripped;
968        }
969
970        let mut base_dir = current_path.parent().unwrap_or(current_path).to_path_buf();
971        while let Some(stripped) = module.strip_prefix("super::") {
972            module = stripped;
973            if let Some(parent) = base_dir.parent() {
974                base_dir = parent.to_path_buf();
975            }
976        }
977
978        module = strip_known_suffix(module, RUST_SUFFIXES);
979        let module_parts: Vec<&str> = module
980            .split("::")
981            .filter(|segment| !segment.is_empty())
982            .collect();
983
984        if module_parts.is_empty() {
985            return None;
986        }
987
988        if let Some(resolved) = self.resolve_relative_rust(&base_dir, &module_parts, file_map) {
989            return Some(resolved);
990        }
991
992        if module_parts.len() == 1 {
993            let crate_lib = base_dir.join("lib.rs");
994            if let Some(candidate_str) = crate_lib.to_str() {
995                if file_map.contains_key(candidate_str) {
996                    return Some(candidate_str.to_string());
997                }
998            }
999        }
1000
1001        self.find_module_candidate(&module_parts, RUST_FILE_EXTENSIONS)
1002    }
1003
1004    /// Resolve Go import
1005    fn resolve_go_import<T>(
1006        &self,
1007        import_str: &str,
1008        _current_path: &Path,
1009        file_map: &HashMap<&str, &T>,
1010    ) -> Option<String>
1011    where
1012        T: ScanResult,
1013    {
1014        let cleaned_import = import_str.trim().trim_matches('"');
1015
1016        // Skip standard library
1017        if self.config.exclude_stdlib_imports && !cleaned_import.contains('.') {
1018            return None;
1019        }
1020
1021        let parts: Vec<&str> = cleaned_import.split('/').collect();
1022
1023        // Try various Go file patterns
1024        let mut candidates = Vec::new();
1025
1026        // Package directory
1027        candidates.push(format!("{}.go", parts.last()?));
1028        candidates.push(format!("{}/main.go", cleaned_import));
1029        candidates.push(format!("{}/{}.go", cleaned_import, parts.last()?));
1030
1031        for candidate in &candidates {
1032            if file_map.contains_key(candidate.as_str()) {
1033                return Some(candidate.clone());
1034            }
1035        }
1036
1037        self.fuzzy_match_import(&parts, file_map)
1038    }
1039
1040    /// Generic import resolution
1041    fn resolve_generic_import<T>(
1042        &self,
1043        import_str: &str,
1044        _current_path: &Path,
1045        file_map: &HashMap<&str, &T>,
1046    ) -> Option<String>
1047    where
1048        T: ScanResult,
1049    {
1050        let cleaned_import = import_str.trim();
1051        let parts: Vec<&str> = cleaned_import.split(&['/', '.', ':']).collect();
1052        self.fuzzy_match_import(&parts, file_map)
1053    }
1054
1055    fn resolve_relative_python<T>(
1056        &self,
1057        base_dir: &Path,
1058        module_parts: &[&str],
1059        file_map: &HashMap<&str, &T>,
1060    ) -> Option<String>
1061    where
1062        T: ScanResult,
1063    {
1064        if module_parts.is_empty() {
1065            return None;
1066        }
1067
1068        let mut module_path = base_dir.to_path_buf();
1069        for part in module_parts {
1070            module_path.push(part);
1071        }
1072
1073        let mut candidate = module_path.clone();
1074        candidate.set_extension("py");
1075        if let Some(candidate_str) = candidate.to_str() {
1076            if file_map.contains_key(candidate_str) {
1077                return Some(candidate_str.to_string());
1078            }
1079        }
1080
1081        let init_candidate = module_path.join("__init__.py");
1082        if let Some(candidate_str) = init_candidate.to_str() {
1083            if file_map.contains_key(candidate_str) {
1084                return Some(candidate_str.to_string());
1085            }
1086        }
1087
1088        None
1089    }
1090
1091    fn resolve_relative_js<T>(
1092        &self,
1093        base_dir: &Path,
1094        import_path: &str,
1095        file_map: &HashMap<&str, &T>,
1096    ) -> Option<String>
1097    where
1098        T: ScanResult,
1099    {
1100        let normalized = strip_known_suffix(import_path, JS_SUFFIXES);
1101        let target = self.build_relative_js_path(base_dir, normalized);
1102
1103        for ext in JS_FILE_EXTENSIONS {
1104            let mut candidate = target.clone();
1105            candidate.set_extension(ext);
1106            if let Some(candidate_str) = candidate.to_str() {
1107                if file_map.contains_key(candidate_str) {
1108                    return Some(candidate_str.to_string());
1109                }
1110            }
1111        }
1112
1113        for ext in JS_FILE_EXTENSIONS {
1114            let index_candidate = target.join(format!("index.{}", ext));
1115            if let Some(candidate_str) = index_candidate.to_str() {
1116                if file_map.contains_key(candidate_str) {
1117                    return Some(candidate_str.to_string());
1118                }
1119            }
1120        }
1121
1122        None
1123    }
1124
1125    fn build_relative_js_path(&self, base_dir: &Path, import_path: &str) -> PathBuf {
1126        let mut resolved = base_dir.to_path_buf();
1127        for segment in import_path.split('/') {
1128            match segment {
1129                "" | "." => {}
1130                ".." => {
1131                    if let Some(parent) = resolved.parent() {
1132                        resolved = parent.to_path_buf();
1133                    }
1134                }
1135                _ => resolved.push(segment),
1136            }
1137        }
1138        resolved
1139    }
1140
1141    fn resolve_relative_rust<T>(
1142        &self,
1143        base_dir: &Path,
1144        module_parts: &[&str],
1145        file_map: &HashMap<&str, &T>,
1146    ) -> Option<String>
1147    where
1148        T: ScanResult,
1149    {
1150        if module_parts.is_empty() {
1151            return None;
1152        }
1153
1154        let mut module_path = base_dir.to_path_buf();
1155        for part in module_parts {
1156            module_path.push(part);
1157        }
1158
1159        let mut candidate = module_path.clone();
1160        candidate.set_extension("rs");
1161        if let Some(candidate_str) = candidate.to_str() {
1162            if file_map.contains_key(candidate_str) {
1163                return Some(candidate_str.to_string());
1164            }
1165        }
1166
1167        let mod_candidate = module_path.join("mod.rs");
1168        if let Some(candidate_str) = mod_candidate.to_str() {
1169            if file_map.contains_key(candidate_str) {
1170                return Some(candidate_str.to_string());
1171            }
1172        }
1173
1174        None
1175    }
1176
1177    fn find_module_candidate(&self, module_parts: &[&str], extensions: &[&str]) -> Option<String> {
1178        if module_parts.is_empty() {
1179            return None;
1180        }
1181
1182        let stem = module_parts.last().unwrap().to_lowercase();
1183        let candidates = self.stem_to_paths.get(&stem)?;
1184
1185        for candidate in candidates {
1186            if self.module_path_matches(candidate, module_parts, extensions) {
1187                return Some(candidate.clone());
1188            }
1189        }
1190
1191        None
1192    }
1193
1194    fn module_path_matches(
1195        &self,
1196        candidate: &str,
1197        module_parts: &[&str],
1198        extensions: &[&str],
1199    ) -> bool {
1200        let path = Path::new(candidate);
1201        let file_name = match path.file_name().and_then(|n| n.to_str()) {
1202            Some(name) => name,
1203            None => return false,
1204        };
1205
1206        let lower_file = file_name.to_lowercase();
1207        if lower_file == "__init__.py" {
1208            return self.dir_path_matches(path.parent(), module_parts);
1209        }
1210
1211        let ext = Path::new(file_name)
1212            .extension()
1213            .and_then(|e| e.to_str())
1214            .map(|s| s.to_lowercase())
1215            .unwrap_or_default();
1216
1217        if !extensions
1218            .iter()
1219            .any(|allowed| allowed.eq_ignore_ascii_case(&ext))
1220        {
1221            return false;
1222        }
1223
1224        let stem = Path::new(file_name)
1225            .file_stem()
1226            .and_then(|s| s.to_str())
1227            .map(|s| s.to_lowercase())
1228            .unwrap_or_default();
1229
1230        if stem == "index" && !module_parts.is_empty() {
1231            return self.dir_path_matches(path.parent(), module_parts);
1232        }
1233
1234        if module_parts.is_empty() {
1235            return false;
1236        }
1237
1238        if stem != module_parts.last().unwrap().to_lowercase() {
1239            return false;
1240        }
1241
1242        self.dir_path_matches(
1243            path.parent(),
1244            &module_parts[..module_parts.len().saturating_sub(1)],
1245        )
1246    }
1247
1248    fn dir_path_matches(&self, dir: Option<&Path>, module_parts: &[&str]) -> bool {
1249        if module_parts.is_empty() {
1250            return true;
1251        }
1252
1253        let mut current = dir;
1254        for expected in module_parts.iter().rev() {
1255            match current {
1256                Some(path) => {
1257                    let name = path.file_name().and_then(|n| n.to_str());
1258                    match name {
1259                        Some(name) if name.eq_ignore_ascii_case(expected) => {
1260                            current = path.parent();
1261                        }
1262                        _ => return false,
1263                    }
1264                }
1265                None => return false,
1266            }
1267        }
1268
1269        true
1270    }
1271
1272    /// Fuzzy matching for import resolution - OPTIMIZED with pre-computed maps
1273    fn fuzzy_match_import<T>(
1274        &self,
1275        import_parts: &[&str],
1276        _file_map: &HashMap<&str, &T>,
1277    ) -> Option<String>
1278    where
1279        T: ScanResult,
1280    {
1281        if import_parts.is_empty() {
1282            return None;
1283        }
1284
1285        let last_part = import_parts.last()?.to_lowercase();
1286
1287        // MASSIVE PERFORMANCE IMPROVEMENT: Use pre-computed lookup maps instead of O(n) scan
1288        // 1. First try exact stem match (most common case)
1289        if let Some(paths) = self.stem_to_paths.get(&last_part) {
1290            // Return first match (could be made smarter with scoring)
1291            if let Some(first_path) = paths.first() {
1292                return Some(first_path.clone());
1293            }
1294        }
1295
1296        // 2. Try filename match
1297        if let Some(paths) = self.filename_to_paths.get(&last_part) {
1298            if let Some(first_path) = paths.first() {
1299                return Some(first_path.clone());
1300            }
1301        }
1302
1303        // 3. Try partial matching against stems
1304        for (stem, paths) in &self.stem_to_paths {
1305            if stem.contains(&last_part) || last_part.contains(stem) {
1306                if let Some(first_path) = paths.first() {
1307                    return Some(first_path.clone());
1308                }
1309            }
1310        }
1311
1312        // 4. Fallback: check if path contains all import parts
1313        for path in &self.available_paths {
1314            let path_lower = path.to_lowercase();
1315            if import_parts
1316                .iter()
1317                .all(|&part| path_lower.contains(&part.to_lowercase()))
1318            {
1319                return Some(path.clone());
1320            }
1321        }
1322
1323        None
1324    }
1325
1326    /// Check if import is Python standard library
1327    fn is_python_stdlib(&self, import_str: &str) -> bool {
1328        let stdlib_modules = [
1329            "os",
1330            "sys",
1331            "re",
1332            "json",
1333            "collections",
1334            "itertools",
1335            "functools",
1336            "typing",
1337            "datetime",
1338            "math",
1339            "random",
1340            "string",
1341            "pathlib",
1342            "io",
1343            "csv",
1344            "xml",
1345            "html",
1346            "urllib",
1347            "http",
1348            "email",
1349            "logging",
1350            "unittest",
1351            "asyncio",
1352            "concurrent",
1353            "multiprocessing",
1354            "threading",
1355            "subprocess",
1356        ];
1357
1358        let first_part = import_str.split('.').next().unwrap_or(import_str);
1359        stdlib_modules.contains(&first_part)
1360    }
1361
1362    /// Check if import is Rust standard library
1363    fn is_rust_stdlib(&self, import_str: &str) -> bool {
1364        import_str.starts_with("std::")
1365            || import_str.starts_with("core::")
1366            || import_str.starts_with("alloc::")
1367    }
1368}
1369
1370/// Utility functions for centrality results analysis
1371impl CentralityResults {
1372    /// Get files sorted by centrality score (descending)
1373    pub fn top_files_by_centrality(&self, k: usize) -> Vec<(String, f64)> {
1374        let mut scored_files: Vec<_> = self
1375            .pagerank_scores
1376            .iter()
1377            .map(|(path, &score)| (path.clone(), score))
1378            .collect();
1379
1380        scored_files.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
1381        scored_files.into_iter().take(k).collect()
1382    }
1383
1384    /// Get summary statistics about centrality computation
1385    pub fn summary(&self) -> String {
1386        format!(
1387            "Centrality Analysis Summary:\n\
1388             - Files with centrality scores: {}\n\
1389             - PageRank iterations: {} (converged: {})\n\
1390             - Graph: {} nodes, {} edges (density: {:.4})\n\
1391             - Import resolution: {:.1}% ({}/{})\n\
1392             - Top languages: {}\n\
1393             - Computation time: {}ms\n\
1394             - Integration weight: {:.2}",
1395            self.pagerank_scores.len(),
1396            self.pagerank_details.iterations_converged,
1397            self.pagerank_details.converged(),
1398            self.graph_analysis.basic_stats.total_nodes,
1399            self.graph_analysis.basic_stats.total_edges,
1400            self.graph_analysis.basic_stats.graph_density,
1401            self.import_stats.resolution_rate * 100.0,
1402            self.import_stats.imports_resolved,
1403            self.import_stats.imports_detected,
1404            self.import_stats
1405                .language_breakdown
1406                .iter()
1407                .max_by_key(|(_, &count)| count)
1408                .map(|(lang, count)| format!("{} ({})", lang, count))
1409                .unwrap_or_else(|| "None".to_string()),
1410            self.integration_metadata.computation_time_ms,
1411            self.integration_metadata.centrality_weight,
1412        )
1413    }
1414}
1415
1416#[cfg(test)]
1417mod tests {
1418    use super::*;
1419    use scribe_analysis::heuristics::DocumentAnalysis;
1420
1421    // Mock scan result for testing
1422    #[derive(Debug, Clone)]
1423    struct MockScanResult {
1424        path: String,
1425        relative_path: String,
1426        depth: usize,
1427        imports: Option<Vec<String>>,
1428        is_docs: bool,
1429        is_readme: bool,
1430        is_test: bool,
1431        is_entrypoint: bool,
1432        has_examples: bool,
1433        priority_boost: f64,
1434        churn_score: f64,
1435        centrality_in: f64,
1436        doc_analysis: Option<DocumentAnalysis>,
1437    }
1438
1439    impl MockScanResult {
1440        fn new(path: &str) -> Self {
1441            Self {
1442                path: path.to_string(),
1443                relative_path: path.to_string(),
1444                depth: path.matches('/').count(),
1445                imports: None,
1446                is_docs: path.contains("doc") || path.ends_with(".md"),
1447                is_readme: path.to_lowercase().contains("readme"),
1448                is_test: path.contains("test"),
1449                is_entrypoint: path.contains("main") || path.contains("index"),
1450                has_examples: path.contains("example"),
1451                priority_boost: 0.0,
1452                churn_score: 0.5,
1453                centrality_in: 0.0,
1454                doc_analysis: Some(DocumentAnalysis::new()),
1455            }
1456        }
1457
1458        fn with_imports(mut self, imports: Vec<String>) -> Self {
1459            self.imports = Some(imports);
1460            self
1461        }
1462    }
1463
1464    impl ScanResult for MockScanResult {
1465        fn path(&self) -> &str {
1466            &self.path
1467        }
1468        fn relative_path(&self) -> &str {
1469            &self.relative_path
1470        }
1471        fn depth(&self) -> usize {
1472            self.depth
1473        }
1474        fn is_docs(&self) -> bool {
1475            self.is_docs
1476        }
1477        fn is_readme(&self) -> bool {
1478            self.is_readme
1479        }
1480        fn is_test(&self) -> bool {
1481            self.is_test
1482        }
1483        fn is_entrypoint(&self) -> bool {
1484            self.is_entrypoint
1485        }
1486        fn has_examples(&self) -> bool {
1487            self.has_examples
1488        }
1489        fn priority_boost(&self) -> f64 {
1490            self.priority_boost
1491        }
1492        fn churn_score(&self) -> f64 {
1493            self.churn_score
1494        }
1495        fn centrality_in(&self) -> f64 {
1496            self.centrality_in
1497        }
1498        fn imports(&self) -> Option<&[String]> {
1499            self.imports.as_deref()
1500        }
1501        fn doc_analysis(&self) -> Option<&DocumentAnalysis> {
1502            self.doc_analysis.as_ref()
1503        }
1504    }
1505
1506    #[test]
1507    fn test_centrality_calculator_creation() {
1508        let calculator = CentralityCalculator::new();
1509        assert!(calculator.is_ok());
1510
1511        let large_calc = CentralityCalculator::for_large_codebases();
1512        assert!(large_calc.is_ok());
1513    }
1514
1515    #[test]
1516    fn test_import_detection() {
1517        let detector = ImportDetector::with_config(ImportResolutionConfig::default());
1518
1519        // Test language detection
1520        assert_eq!(
1521            detector.detect_language("main.py"),
1522            Some("python".to_string())
1523        );
1524        assert_eq!(
1525            detector.detect_language("app.js"),
1526            Some("javascript".to_string())
1527        );
1528        assert_eq!(detector.detect_language("lib.rs"), Some("rust".to_string()));
1529
1530        // Test Python stdlib detection
1531        assert!(detector.is_python_stdlib("os"));
1532        assert!(detector.is_python_stdlib("sys.path"));
1533        assert!(!detector.is_python_stdlib("custom_module"));
1534
1535        // Test Rust stdlib detection
1536        assert!(detector.is_rust_stdlib("std::collections::HashMap"));
1537        assert!(detector.is_rust_stdlib("core::fmt"));
1538        assert!(!detector.is_rust_stdlib("serde::Deserialize"));
1539    }
1540
1541    #[test]
1542    fn test_centrality_calculation() {
1543        let calculator = CentralityCalculator::new().unwrap();
1544
1545        let scan_results = vec![
1546            MockScanResult::new("main.py")
1547                .with_imports(vec!["utils".to_string(), "config".to_string()]),
1548            MockScanResult::new("utils.py").with_imports(vec!["config".to_string()]),
1549            MockScanResult::new("config.py"),
1550            MockScanResult::new("test.py").with_imports(vec!["main".to_string()]),
1551        ];
1552
1553        let results = calculator.calculate_centrality(&scan_results).unwrap();
1554
1555        // Basic checks
1556        assert!(!results.pagerank_scores.is_empty());
1557        assert!(results.integration_metadata.integration_successful);
1558        assert_eq!(
1559            results.integration_metadata.files_with_centrality,
1560            results.pagerank_scores.len()
1561        );
1562
1563        // config.py should have high centrality (imported by main.py and utils.py)
1564        let config_score = results.pagerank_scores.get("config.py");
1565        assert!(config_score.is_some());
1566
1567        println!("Centrality scores:");
1568        for (file, score) in &results.pagerank_scores {
1569            println!("  {}: {:.6}", file, score);
1570        }
1571
1572        println!("\n{}", results.summary());
1573    }
1574
1575    #[test]
1576    fn test_heuristics_integration() {
1577        let calculator = CentralityCalculator::new().unwrap();
1578
1579        let scan_results = vec![
1580            MockScanResult::new("main.py").with_imports(vec!["utils".to_string()]),
1581            MockScanResult::new("utils.py"),
1582        ];
1583
1584        let centrality_results = calculator.calculate_centrality(&scan_results).unwrap();
1585
1586        // Mock heuristic scores
1587        let mut heuristic_scores = HashMap::new();
1588        heuristic_scores.insert("main.py".to_string(), 0.8);
1589        heuristic_scores.insert("utils.py".to_string(), 0.6);
1590
1591        let integrated_scores = calculator
1592            .integrate_with_heuristics(&centrality_results, &heuristic_scores)
1593            .unwrap();
1594
1595        assert!(!integrated_scores.is_empty());
1596
1597        // Integrated scores should be different from original heuristic scores
1598        for (file, integrated_score) in &integrated_scores {
1599            let original_score = heuristic_scores.get(file).unwrap();
1600            println!(
1601                "File {}: heuristic={:.3}, integrated={:.3}",
1602                file, original_score, integrated_score
1603            );
1604        }
1605    }
1606
1607    #[test]
1608    fn test_normalization_methods() {
1609        let calculator = CentralityCalculator::new().unwrap();
1610
1611        let centrality_scores = vec![
1612            ("file1".to_string(), 0.1),
1613            ("file2".to_string(), 0.3),
1614            ("file3".to_string(), 0.6),
1615            ("file4".to_string(), 1.0),
1616        ]
1617        .into_iter()
1618        .collect();
1619
1620        let heuristic_scores = vec![
1621            ("file1".to_string(), 0.5),
1622            ("file2".to_string(), 0.7),
1623            ("file3".to_string(), 0.9),
1624            ("file4".to_string(), 1.2),
1625        ]
1626        .into_iter()
1627        .collect();
1628
1629        // Test min-max normalization
1630        let normalized = calculator
1631            .normalize_min_max(&centrality_scores, &heuristic_scores)
1632            .unwrap();
1633        assert!(!normalized.is_empty());
1634
1635        // Test z-score normalization
1636        let z_normalized = calculator.normalize_z_score(&centrality_scores).unwrap();
1637        assert!(!z_normalized.is_empty());
1638
1639        // Test rank normalization
1640        let rank_normalized = calculator.normalize_rank(&centrality_scores).unwrap();
1641        assert!(!rank_normalized.is_empty());
1642
1643        println!("Original scores: {:?}", centrality_scores);
1644        println!("Min-max normalized: {:?}", normalized);
1645        println!("Z-score normalized: {:?}", z_normalized);
1646        println!("Rank normalized: {:?}", rank_normalized);
1647    }
1648
1649    #[test]
1650    fn test_import_resolution() {
1651        let detector = ImportDetector::with_config(ImportResolutionConfig::default());
1652
1653        // Create mock file map
1654        let scan_results = vec![
1655            MockScanResult::new("src/main.py"),
1656            MockScanResult::new("src/utils.py"),
1657            MockScanResult::new("src/config.py"),
1658            MockScanResult::new("tests/test_main.py"),
1659        ];
1660
1661        let file_map: HashMap<&str, &MockScanResult> = scan_results
1662            .iter()
1663            .map(|result| (result.path(), result))
1664            .collect();
1665
1666        // Test Python import resolution
1667        let resolved = detector.resolve_import("utils", "src/main.py", &file_map);
1668        assert!(resolved.is_some());
1669
1670        // Test module path resolution
1671        let resolved_config = detector.resolve_import("src.config", "src/main.py", &file_map);
1672        // Should resolve to src/config.py through fuzzy matching
1673        assert!(resolved_config.is_some());
1674
1675        println!("Resolved imports:");
1676        if let Some(path) = resolved {
1677            println!("  utils -> {}", path);
1678        }
1679        if let Some(path) = resolved_config {
1680            println!("  src.config -> {}", path);
1681        }
1682    }
1683
1684    #[test]
1685    fn test_entrypoint_detection() {
1686        let calculator = CentralityCalculator::new().unwrap();
1687
1688        assert!(calculator.is_entrypoint_file("main.py"));
1689        assert!(calculator.is_entrypoint_file("src/main.rs"));
1690        assert!(calculator.is_entrypoint_file("index.js"));
1691        assert!(calculator.is_entrypoint_file("app.py"));
1692        assert!(calculator.is_entrypoint_file("lib.rs"));
1693        assert!(calculator.is_entrypoint_file("__init__.py"));
1694
1695        assert!(!calculator.is_entrypoint_file("utils.py"));
1696        assert!(!calculator.is_entrypoint_file("config.rs"));
1697        assert!(!calculator.is_entrypoint_file("helper.js"));
1698    }
1699
1700    #[test]
1701    fn test_top_files_by_centrality() {
1702        let mut pagerank_scores = HashMap::new();
1703        pagerank_scores.insert("file1.py".to_string(), 0.4);
1704        pagerank_scores.insert("file2.py".to_string(), 0.6);
1705        pagerank_scores.insert("file3.py".to_string(), 0.2);
1706        pagerank_scores.insert("file4.py".to_string(), 0.8);
1707
1708        let results = CentralityResults {
1709            pagerank_scores,
1710            graph_analysis: GraphAnalysisResults {
1711                basic_stats: crate::graph::GraphStatistics::empty(),
1712                degree_distribution: Default::default(),
1713                connectivity: Default::default(),
1714                structural_patterns: Default::default(),
1715                import_insights: Default::default(),
1716                performance_profile: Default::default(),
1717                analysis_metadata: Default::default(),
1718            },
1719            pagerank_details: PageRankResults {
1720                scores: HashMap::new(),
1721                iterations_converged: 10,
1722                convergence_epsilon: 1e-6,
1723                graph_stats: crate::graph::GraphStatistics::empty(),
1724                parameters: PageRankConfig::default(),
1725                performance_metrics: Default::default(),
1726            },
1727            import_stats: ImportDetectionStats {
1728                files_processed: 4,
1729                imports_detected: 0,
1730                imports_resolved: 0,
1731                resolution_rate: 0.0,
1732                language_breakdown: HashMap::new(),
1733                import_patterns: HashMap::new(),
1734            },
1735            integration_metadata: IntegrationMetadata {
1736                timestamp: chrono::Utc::now(),
1737                computation_time_ms: 100,
1738                integration_successful: true,
1739                centrality_weight: 0.15,
1740                files_with_centrality: 4,
1741                config: CentralityConfig::default(),
1742            },
1743        };
1744
1745        let top_files = results.top_files_by_centrality(2);
1746        assert_eq!(top_files.len(), 2);
1747        assert_eq!(top_files[0].0, "file4.py");
1748        assert_eq!(top_files[0].1, 0.8);
1749        assert_eq!(top_files[1].0, "file2.py");
1750        assert_eq!(top_files[1].1, 0.6);
1751    }
1752}