scribe_graph/
centrality.rs

1//! # Centrality Calculator with Heuristics Integration
2//!
3//! Main interface for PageRank centrality calculation and integration with the existing
4//! FastPath heuristic scoring system. This module provides the high-level API for:
5//!
6//! ## Key Features
7//! - **PageRank Centrality Computation**: Research-grade algorithm with convergence detection
8//! - **Import Graph Construction**: Builds dependency graphs from file scan results  
9//! - **Heuristics Integration**: Seamless integration with V2 scoring system
10//! - **Performance Optimization**: Efficient computation for large codebases
11//! - **Multi-language Support**: Import detection across programming languages
12//! - **Comprehensive Analysis**: Full graph statistics and structural insights
13//!
14//! ## Integration with FastPath Heuristics
15//! The centrality scores are integrated into the heuristic scoring formula:
16//! ```text
17//! final_score = Σ(weight_i × normalized_score_i) + priority_boost + template_boost
18//! ```
19//! Where `centrality_score` becomes a weighted component when V2 features are enabled.
20
21use std::collections::{HashMap, HashSet};
22use std::path::Path;
23use serde::{Deserialize, Serialize};
24use rayon::prelude::*;
25use scribe_core::Result;
26use scribe_analysis::heuristics::ScanResult;
27
28use crate::graph::{DependencyGraph, NodeId};
29use crate::pagerank::{PageRankComputer, PageRankResults, PageRankConfig};
30use crate::statistics::{GraphStatisticsAnalyzer, GraphAnalysisResults};
31
32/// Complete centrality calculation results with comprehensive metadata
33#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
34pub struct CentralityResults {
35    /// PageRank scores (file path -> centrality score)
36    pub pagerank_scores: HashMap<NodeId, f64>,
37    
38    /// Graph analysis results
39    pub graph_analysis: GraphAnalysisResults,
40    
41    /// PageRank computation details
42    pub pagerank_details: PageRankResults,
43    
44    /// Import detection statistics
45    pub import_stats: ImportDetectionStats,
46    
47    /// Integration metadata
48    pub integration_metadata: IntegrationMetadata,
49}
50
51/// Statistics about import detection and graph construction
52#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
53pub struct ImportDetectionStats {
54    /// Number of files processed for import detection
55    pub files_processed: usize,
56    
57    /// Number of import relationships detected
58    pub imports_detected: usize,
59    
60    /// Number of resolved imports (mapped to actual files)
61    pub imports_resolved: usize,
62    
63    /// Import resolution success rate
64    pub resolution_rate: f64,
65    
66    /// Language breakdown of processed files
67    pub language_breakdown: HashMap<String, usize>,
68    
69    /// Import patterns by language
70    pub import_patterns: HashMap<String, ImportPatternStats>,
71}
72
73/// Import pattern statistics for a specific language
74#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
75pub struct ImportPatternStats {
76    /// Total imports found
77    pub total_imports: usize,
78    
79    /// Relative imports (./,../)
80    pub relative_imports: usize,
81    
82    /// Absolute imports
83    pub absolute_imports: usize,
84    
85    /// Standard library imports
86    pub stdlib_imports: usize,
87    
88    /// Third-party imports
89    pub third_party_imports: usize,
90}
91
92/// Metadata about centrality-heuristics integration
93#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
94pub struct IntegrationMetadata {
95    /// When the analysis was performed
96    pub timestamp: chrono::DateTime<chrono::Utc>,
97    
98    /// Total computation time
99    pub computation_time_ms: u64,
100    
101    /// Whether centrality was successfully integrated
102    pub integration_successful: bool,
103    
104    /// Centrality weight used in integration
105    pub centrality_weight: f64,
106    
107    /// Number of files with centrality scores
108    pub files_with_centrality: usize,
109    
110    /// Configuration used
111    pub config: CentralityConfig,
112}
113
114/// Configuration for centrality calculation
115#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
116pub struct CentralityConfig {
117    /// PageRank algorithm configuration
118    pub pagerank_config: PageRankConfig,
119    
120    /// Whether to perform expensive graph analysis
121    pub analyze_graph_structure: bool,
122    
123    /// Import resolution configuration
124    pub import_resolution: ImportResolutionConfig,
125    
126    /// Integration parameters
127    pub integration: IntegrationConfig,
128}
129
130/// Configuration for import resolution
131#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
132pub struct ImportResolutionConfig {
133    /// Maximum search depth for import resolution
134    pub max_search_depth: usize,
135    
136    /// Whether to resolve relative imports
137    pub resolve_relative_imports: bool,
138    
139    /// Whether to resolve absolute imports
140    pub resolve_absolute_imports: bool,
141    
142    /// Whether to exclude standard library imports
143    pub exclude_stdlib_imports: bool,
144    
145    /// Custom import path mappings
146    pub path_mappings: HashMap<String, String>,
147}
148
149/// Configuration for heuristics integration
150#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
151pub struct IntegrationConfig {
152    /// Weight for centrality in final score
153    pub centrality_weight: f64,
154    
155    /// Normalization method for centrality scores
156    pub normalization_method: NormalizationMethod,
157    
158    /// Minimum centrality score threshold
159    pub min_centrality_threshold: f64,
160    
161    /// Whether to boost entrypoint centrality
162    pub boost_entrypoints: bool,
163    
164    /// Entrypoint boost factor
165    pub entrypoint_boost_factor: f64,
166}
167
168/// Methods for normalizing centrality scores
169#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
170pub enum NormalizationMethod {
171    /// Normalize to [0,1] range
172    MinMax,
173    /// Z-score normalization
174    ZScore,
175    /// Rank-based normalization
176    Rank,
177    /// No normalization
178    None,
179}
180
181impl Default for CentralityConfig {
182    fn default() -> Self {
183        Self {
184            pagerank_config: PageRankConfig::for_code_analysis(),
185            analyze_graph_structure: true,
186            import_resolution: ImportResolutionConfig::default(),
187            integration: IntegrationConfig::default(),
188        }
189    }
190}
191
192impl Default for ImportResolutionConfig {
193    fn default() -> Self {
194        Self {
195            max_search_depth: 3,
196            resolve_relative_imports: true,
197            resolve_absolute_imports: true,
198            exclude_stdlib_imports: true,
199            path_mappings: HashMap::new(),
200        }
201    }
202}
203
204impl Default for IntegrationConfig {
205    fn default() -> Self {
206        Self {
207            centrality_weight: 0.15, // 15% weight in V2 scoring
208            normalization_method: NormalizationMethod::MinMax,
209            min_centrality_threshold: 1e-6,
210            boost_entrypoints: true,
211            entrypoint_boost_factor: 1.5,
212        }
213    }
214}
215
216/// Main centrality calculator with heuristics integration
217#[derive(Debug)]
218pub struct CentralityCalculator {
219    /// Configuration
220    config: CentralityConfig,
221    
222    /// PageRank computer
223    pagerank_computer: PageRankComputer,
224    
225    /// Graph statistics analyzer
226    stats_analyzer: GraphStatisticsAnalyzer,
227    
228    /// Import detector
229    import_detector: ImportDetector,
230}
231
232impl CentralityCalculator {
233    /// Create a new centrality calculator with default configuration
234    pub fn new() -> Result<Self> {
235        let config = CentralityConfig::default();
236        Self::with_config(config)
237    }
238    
239    /// Create with custom configuration
240    pub fn with_config(config: CentralityConfig) -> Result<Self> {
241        let pagerank_computer = PageRankComputer::with_config(config.pagerank_config.clone())?;
242        
243        let stats_analyzer = if config.analyze_graph_structure {
244            GraphStatisticsAnalyzer::new()
245        } else {
246            GraphStatisticsAnalyzer::for_large_graphs()
247        };
248        
249        let import_detector = ImportDetector::with_config(config.import_resolution.clone());
250        
251        Ok(Self {
252            config,
253            pagerank_computer,
254            stats_analyzer,
255            import_detector,
256        })
257    }
258    
259    /// Create optimized for large codebases
260    pub fn for_large_codebases() -> Result<Self> {
261        let config = CentralityConfig {
262            pagerank_config: PageRankConfig::for_large_codebases(),
263            analyze_graph_structure: false,
264            ..CentralityConfig::default()
265        };
266        Self::with_config(config)
267    }
268    
269    /// Calculate centrality scores for a collection of scan results
270    pub fn calculate_centrality<T>(&self, scan_results: &[T]) -> Result<CentralityResults> 
271    where 
272        T: ScanResult + Sync,
273    {
274        let start_time = std::time::Instant::now();
275        
276        // Build dependency graph from scan results
277        let (graph, import_stats) = self.build_dependency_graph(scan_results)?;
278        
279        // Compute PageRank centrality
280        let pagerank_results = self.pagerank_computer.compute(&graph)?;
281        
282        // Perform graph analysis if enabled
283        let graph_analysis = if self.config.analyze_graph_structure {
284            self.stats_analyzer.analyze(&graph)?
285        } else {
286            // Create minimal analysis for large graphs
287            self.create_minimal_analysis(&graph)?
288        };
289        
290        // Create integration metadata
291        let computation_time = start_time.elapsed().as_millis() as u64;
292        let integration_metadata = IntegrationMetadata {
293            timestamp: chrono::Utc::now(),
294            computation_time_ms: computation_time,
295            integration_successful: true,
296            centrality_weight: self.config.integration.centrality_weight,
297            files_with_centrality: pagerank_results.scores.len(),
298            config: self.config.clone(),
299        };
300        
301        Ok(CentralityResults {
302            pagerank_scores: pagerank_results.scores.clone(),
303            graph_analysis,
304            pagerank_details: pagerank_results,
305            import_stats,
306            integration_metadata,
307        })
308    }
309    
310    /// Integrate centrality scores with existing heuristic scores
311    pub fn integrate_with_heuristics(
312        &self,
313        centrality_results: &CentralityResults,
314        heuristic_scores: &HashMap<String, f64>,
315    ) -> Result<HashMap<String, f64>> {
316        let normalized_centrality = self.normalize_centrality_scores(
317            &centrality_results.pagerank_scores,
318            heuristic_scores,
319        )?;
320        
321        let mut integrated_scores = HashMap::new();
322        let centrality_weight = self.config.integration.centrality_weight;
323        let heuristic_weight = 1.0 - centrality_weight;
324        
325        // Combine heuristic and centrality scores
326        for (file_path, heuristic_score) in heuristic_scores {
327            let centrality_score = normalized_centrality.get(file_path).copied().unwrap_or(0.0);
328            
329            // Apply entrypoint boost if configured
330            let boosted_centrality = if self.config.integration.boost_entrypoints 
331                && self.is_entrypoint_file(file_path) {
332                centrality_score * self.config.integration.entrypoint_boost_factor
333            } else {
334                centrality_score
335            };
336            
337            let integrated_score = heuristic_weight * heuristic_score + 
338                                 centrality_weight * boosted_centrality;
339            
340            integrated_scores.insert(file_path.clone(), integrated_score);
341        }
342        
343        // Add centrality-only files (not in heuristic scores)
344        for (file_path, centrality_score) in &normalized_centrality {
345            if !integrated_scores.contains_key(file_path) {
346                let boosted_centrality = if self.config.integration.boost_entrypoints 
347                    && self.is_entrypoint_file(file_path) {
348                    centrality_score * self.config.integration.entrypoint_boost_factor
349                } else {
350                    *centrality_score
351                };
352                
353                integrated_scores.insert(file_path.clone(), centrality_weight * boosted_centrality);
354            }
355        }
356        
357        Ok(integrated_scores)
358    }
359    
360    /// Build dependency graph from scan results
361    fn build_dependency_graph<T>(&self, scan_results: &[T]) -> Result<(DependencyGraph, ImportDetectionStats)> 
362    where 
363        T: ScanResult + Sync,
364    {
365        let mut graph = DependencyGraph::with_capacity(scan_results.len());
366        
367        // Create optimized import detector with pre-computed lookup maps
368        let mut optimized_detector = ImportDetector::with_file_index(self.import_detector.config.clone(), scan_results);
369        
370        // Add all files as nodes first
371        for result in scan_results {
372            graph.add_node(result.path().to_string())?;
373        }
374        
375        // Detect imports and build edges using optimized detector
376        let import_stats = if self.config.pagerank_config.use_parallel {
377            self.build_edges_parallel_optimized(&mut graph, scan_results, &optimized_detector)?
378        } else {
379            self.build_edges_sequential_optimized(&mut graph, scan_results, &optimized_detector)?
380        };
381        
382        Ok((graph, import_stats))
383    }
384    
385    /// Build graph edges sequentially - OPTIMIZED
386    fn build_edges_sequential_optimized<T>(
387        &self, 
388        graph: &mut DependencyGraph, 
389        scan_results: &[T],
390        optimized_detector: &ImportDetector,
391    ) -> Result<ImportDetectionStats>
392    where 
393        T: ScanResult,
394    {
395        let mut stats = ImportDetectionStats {
396            files_processed: 0,
397            imports_detected: 0,
398            imports_resolved: 0,
399            resolution_rate: 0.0,
400            language_breakdown: HashMap::new(),
401            import_patterns: HashMap::new(),
402        };
403        
404        // Create file path lookup for resolution
405        let file_path_map: HashMap<&str, &T> = scan_results.iter()
406            .map(|result| (result.path(), result))
407            .collect();
408        
409        for result in scan_results {
410            stats.files_processed += 1;
411            
412            // Track language
413            if let Some(lang) = optimized_detector.detect_language(result.path()) {
414                *stats.language_breakdown.entry(lang.clone()).or_insert(0) += 1;
415            }
416            
417            // Extract and resolve imports using optimized detector
418            if let Some(imports) = result.imports() {
419                stats.imports_detected += imports.len();
420                
421                for import_str in imports {
422                    if let Some(resolved_path) = optimized_detector.resolve_import(
423                        import_str, 
424                        result.path(), 
425                        &file_path_map
426                    ) {
427                        graph.add_edge(result.path().to_string(), resolved_path)?;
428                        stats.imports_resolved += 1;
429                    }
430                }
431            }
432        }
433        
434        stats.resolution_rate = if stats.imports_detected > 0 {
435            stats.imports_resolved as f64 / stats.imports_detected as f64
436        } else {
437            0.0
438        };
439        
440        Ok(stats)
441    }
442    
443    /// Build graph edges sequentially - LEGACY
444    fn build_edges_sequential<T>(&self, graph: &mut DependencyGraph, scan_results: &[T]) -> Result<ImportDetectionStats>
445    where 
446        T: ScanResult,
447    {
448        let optimized_detector = ImportDetector::with_file_index(self.import_detector.config.clone(), scan_results);
449        self.build_edges_sequential_optimized(graph, scan_results, &optimized_detector)
450    }
451    
452    /// Build graph edges in parallel - OPTIMIZED
453    fn build_edges_parallel_optimized<T>(
454        &self, 
455        graph: &mut DependencyGraph, 
456        scan_results: &[T],
457        optimized_detector: &ImportDetector,
458    ) -> Result<ImportDetectionStats>
459    where 
460        T: ScanResult + Sync,
461    {
462        // Create file path lookup
463        let file_path_map: HashMap<&str, &T> = scan_results.iter()
464            .map(|result| (result.path(), result))
465            .collect();
466        
467        // Process imports in parallel using optimized detector
468        let import_edges: Vec<_> = scan_results.par_iter()
469            .flat_map(|result| {
470                let mut edges = Vec::new();
471                
472                if let Some(imports) = result.imports() {
473                    for import_str in imports {
474                        if let Some(resolved_path) = optimized_detector.resolve_import(
475                            import_str, 
476                            result.path(), 
477                            &file_path_map
478                        ) {
479                            edges.push((result.path().to_string(), resolved_path));
480                        }
481                    }
482                }
483                
484                edges
485            })
486            .collect();
487        
488        // Add edges to graph
489        for (from, to) in &import_edges {
490            graph.add_edge(from.clone(), to.clone())?;
491        }
492        
493        // Calculate statistics
494        let total_imports: usize = scan_results.iter()
495            .map(|result| result.imports().map_or(0, |imports| imports.len()))
496            .sum();
497        
498        let language_breakdown: HashMap<String, usize> = scan_results.iter()
499            .filter_map(|result| optimized_detector.detect_language(result.path())
500                .map(|lang| (lang, 1)))
501            .fold(HashMap::new(), |mut acc, (lang, count)| {
502                *acc.entry(lang).or_insert(0) += count;
503                acc
504            });
505        
506        let stats = ImportDetectionStats {
507            files_processed: scan_results.len(),
508            imports_detected: total_imports,
509            imports_resolved: import_edges.len(),
510            resolution_rate: if total_imports > 0 {
511                import_edges.len() as f64 / total_imports as f64
512            } else {
513                0.0
514            },
515            language_breakdown,
516            import_patterns: HashMap::new(), // TODO: Implement detailed pattern analysis
517        };
518        
519        Ok(stats)
520    }
521    
522    /// Build graph edges in parallel - LEGACY
523    fn build_edges_parallel<T>(&self, graph: &mut DependencyGraph, scan_results: &[T]) -> Result<ImportDetectionStats>
524    where 
525        T: ScanResult + Sync,
526    {
527        let optimized_detector = ImportDetector::with_file_index(self.import_detector.config.clone(), scan_results);
528        self.build_edges_parallel_optimized(graph, scan_results, &optimized_detector)
529    }
530    
531    /// Normalize centrality scores for integration with heuristics
532    fn normalize_centrality_scores(
533        &self,
534        centrality_scores: &HashMap<String, f64>,
535        heuristic_scores: &HashMap<String, f64>,
536    ) -> Result<HashMap<String, f64>> {
537        if centrality_scores.is_empty() {
538            return Ok(HashMap::new());
539        }
540        
541        match self.config.integration.normalization_method {
542            NormalizationMethod::MinMax => self.normalize_min_max(centrality_scores, heuristic_scores),
543            NormalizationMethod::ZScore => self.normalize_z_score(centrality_scores),
544            NormalizationMethod::Rank => self.normalize_rank(centrality_scores),
545            NormalizationMethod::None => Ok(centrality_scores.clone()),
546        }
547    }
548    
549    /// Min-max normalization to match heuristic score range
550    fn normalize_min_max(
551        &self,
552        centrality_scores: &HashMap<String, f64>,
553        heuristic_scores: &HashMap<String, f64>,
554    ) -> Result<HashMap<String, f64>> {
555        let centrality_values: Vec<f64> = centrality_scores.values().copied().collect();
556        let min_centrality = centrality_values.iter().fold(f64::INFINITY, |a, &b| a.min(b));
557        let max_centrality = centrality_values.iter().fold(f64::NEG_INFINITY, |a, &b| a.max(b));
558        
559        // Target range based on heuristic scores
560        let heuristic_values: Vec<f64> = heuristic_scores.values().copied().collect();
561        let max_heuristic = if heuristic_values.is_empty() {
562            1.0
563        } else {
564            heuristic_values.iter().fold(f64::NEG_INFINITY, |a, &b| a.max(b))
565        };
566        
567        let mut normalized = HashMap::new();
568        
569        if (max_centrality - min_centrality).abs() < f64::EPSILON {
570            // All scores are the same
571            for (path, _) in centrality_scores {
572                normalized.insert(path.clone(), max_heuristic * 0.5); // Use half of max heuristic
573            }
574        } else {
575            for (path, &score) in centrality_scores {
576                let normalized_score = ((score - min_centrality) / (max_centrality - min_centrality)) * max_heuristic;
577                if normalized_score >= self.config.integration.min_centrality_threshold {
578                    normalized.insert(path.clone(), normalized_score);
579                }
580            }
581        }
582        
583        Ok(normalized)
584    }
585    
586    /// Z-score normalization
587    fn normalize_z_score(&self, centrality_scores: &HashMap<String, f64>) -> Result<HashMap<String, f64>> {
588        let values: Vec<f64> = centrality_scores.values().copied().collect();
589        let mean = values.iter().sum::<f64>() / values.len() as f64;
590        let variance = values.iter().map(|&x| (x - mean).powi(2)).sum::<f64>() / values.len() as f64;
591        let std_dev = variance.sqrt();
592        
593        let mut normalized = HashMap::new();
594        
595        if std_dev > f64::EPSILON {
596            for (path, &score) in centrality_scores {
597                let z_score = (score - mean) / std_dev;
598                // Shift and scale to positive range
599                let normalized_score = (z_score + 3.0) / 6.0; // Roughly [0,1] for most values
600                if normalized_score >= self.config.integration.min_centrality_threshold {
601                    normalized.insert(path.clone(), normalized_score);
602                }
603            }
604        } else {
605            // All scores are the same
606            for (path, _) in centrality_scores {
607                normalized.insert(path.clone(), 0.5);
608            }
609        }
610        
611        Ok(normalized)
612    }
613    
614    /// Rank-based normalization
615    fn normalize_rank(&self, centrality_scores: &HashMap<String, f64>) -> Result<HashMap<String, f64>> {
616        let mut scored_files: Vec<_> = centrality_scores.iter()
617            .map(|(path, &score)| (path.clone(), score))
618            .collect();
619        
620        scored_files.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
621        
622        let mut normalized = HashMap::new();
623        let total_files = scored_files.len();
624        
625        for (rank, (path, _)) in scored_files.into_iter().enumerate() {
626            let normalized_score = 1.0 - (rank as f64 / total_files as f64);
627            if normalized_score >= self.config.integration.min_centrality_threshold {
628                normalized.insert(path, normalized_score);
629            }
630        }
631        
632        Ok(normalized)
633    }
634    
635    /// Create minimal analysis for large graphs (performance optimization)
636    fn create_minimal_analysis(&self, graph: &DependencyGraph) -> Result<GraphAnalysisResults> {
637        // Use a simplified analyzer for large graphs
638        let minimal_analyzer = GraphStatisticsAnalyzer::for_large_graphs();
639        minimal_analyzer.analyze(graph)
640    }
641    
642    /// Check if a file is an entrypoint
643    fn is_entrypoint_file(&self, file_path: &str) -> bool {
644        let path = Path::new(file_path);
645        let file_name = path.file_name()
646            .and_then(|name| name.to_str())
647            .unwrap_or("")
648            .to_lowercase();
649        
650        matches!(file_name.as_str(),
651            "main.py" | "main.rs" | "main.go" | "main.js" | "main.ts" |
652            "index.py" | "index.rs" | "index.go" | "index.js" | "index.ts" |
653            "app.py" | "app.rs" | "app.go" | "app.js" | "app.ts" |
654            "server.py" | "server.rs" | "server.go" | "server.js" | "server.ts" |
655            "lib.rs" | "__init__.py"
656        )
657    }
658}
659
660impl Default for CentralityCalculator {
661    fn default() -> Self {
662        Self::new().expect("Failed to create CentralityCalculator")
663    }
664}
665
666/// Import detection and resolution engine with pre-computed lookup optimization
667#[derive(Debug, Clone)]
668pub struct ImportDetector {
669    config: ImportResolutionConfig,
670    /// Pre-computed lookup map: file stem -> full paths (massive performance improvement)
671    stem_to_paths: HashMap<String, Vec<String>>,
672    /// Pre-computed lookup map: filename -> full paths
673    filename_to_paths: HashMap<String, Vec<String>>,
674    /// Set of all available file paths for quick existence checks
675    available_paths: HashSet<String>,
676}
677
678impl ImportDetector {
679    /// Create with configuration
680    pub fn with_config(config: ImportResolutionConfig) -> Self {
681        Self { 
682            config,
683            stem_to_paths: HashMap::new(),
684            filename_to_paths: HashMap::new(),
685            available_paths: HashSet::new(),
686        }
687    }
688    
689    /// Create with pre-computed lookup maps for massive performance improvement
690    pub fn with_file_index<T>(config: ImportResolutionConfig, scan_results: &[T]) -> Self 
691    where 
692        T: ScanResult,
693    {
694        let mut detector = Self::with_config(config);
695        detector.build_lookup_maps(scan_results);
696        detector
697    }
698    
699    /// Build inverted index mapping file stems/names to full paths
700    /// This eliminates the O(n) scan-all-files bottleneck
701    fn build_lookup_maps<T>(&mut self, scan_results: &[T]) 
702    where 
703        T: ScanResult,
704    {
705        self.stem_to_paths.clear();
706        self.filename_to_paths.clear();
707        self.available_paths.clear();
708        
709        for result in scan_results {
710            let full_path = result.path().to_string();
711            self.available_paths.insert(full_path.clone());
712            
713            let path = Path::new(result.path());
714            
715            // Index by file stem (name without extension)
716            if let Some(stem) = path.file_stem().and_then(|s| s.to_str()) {
717                let stem_lower = stem.to_lowercase();
718                self.stem_to_paths.entry(stem_lower).or_insert_with(Vec::new).push(full_path.clone());
719            }
720            
721            // Index by full filename
722            if let Some(filename) = path.file_name().and_then(|s| s.to_str()) {
723                let filename_lower = filename.to_lowercase();
724                self.filename_to_paths.entry(filename_lower).or_insert_with(Vec::new).push(full_path);
725            }
726        }
727    }
728    
729    /// Detect programming language from file extension
730    pub fn detect_language(&self, file_path: &str) -> Option<String> {
731        let path = Path::new(file_path);
732        let ext = path.extension()?.to_str()?.to_lowercase();
733        
734        match ext.as_str() {
735            "py" => Some("python".to_string()),
736            "js" | "jsx" | "mjs" => Some("javascript".to_string()),
737            "ts" | "tsx" => Some("typescript".to_string()),
738            "rs" => Some("rust".to_string()),
739            "go" => Some("go".to_string()),
740            "java" | "kt" => Some("java".to_string()),
741            "cpp" | "cc" | "cxx" | "hpp" | "h" => Some("cpp".to_string()),
742            "c" => Some("c".to_string()),
743            "rb" => Some("ruby".to_string()),
744            "php" => Some("php".to_string()),
745            "cs" => Some("csharp".to_string()),
746            "swift" => Some("swift".to_string()),
747            _ => None,
748        }
749    }
750    
751    /// Resolve import string to actual file path
752    pub fn resolve_import<T>(
753        &self,
754        import_str: &str,
755        current_file: &str,
756        file_map: &HashMap<&str, &T>,
757    ) -> Option<String>
758    where
759        T: ScanResult,
760    {
761        // Check custom path mappings first
762        if let Some(mapped_path) = self.config.path_mappings.get(import_str) {
763            if file_map.contains_key(mapped_path.as_str()) {
764                return Some(mapped_path.clone());
765            }
766        }
767        
768        let current_path = Path::new(current_file);
769        let language = self.detect_language(current_file);
770        
771        match language.as_deref() {
772            Some("python") => self.resolve_python_import(import_str, current_path, file_map),
773            Some("javascript") | Some("typescript") => self.resolve_js_import(import_str, current_path, file_map),
774            Some("rust") => self.resolve_rust_import(import_str, current_path, file_map),
775            Some("go") => self.resolve_go_import(import_str, current_path, file_map),
776            _ => self.resolve_generic_import(import_str, current_path, file_map),
777        }
778    }
779    
780    /// Resolve Python import
781    fn resolve_python_import<T>(
782        &self,
783        import_str: &str,
784        current_path: &Path,
785        file_map: &HashMap<&str, &T>,
786    ) -> Option<String>
787    where
788        T: ScanResult,
789    {
790        let cleaned_import = import_str.trim();
791        
792        // Skip standard library imports if configured
793        if self.config.exclude_stdlib_imports && self.is_python_stdlib(cleaned_import) {
794            return None;
795        }
796        
797        // Convert module path to file path
798        let module_parts: Vec<&str> = cleaned_import.split('.').collect();
799        
800        // Try various combinations
801        let mut candidates = Vec::new();
802        
803        // Direct module file
804        candidates.push(format!("{}.py", module_parts.join("/")));
805        
806        // Module package
807        candidates.push(format!("{}/__init__.py", module_parts.join("/")));
808        
809        // Relative to current file directory
810        if let Some(parent) = current_path.parent() {
811            let parent_str = parent.to_string_lossy();
812            let relative_candidates: Vec<String> = candidates.iter()
813                .map(|candidate| format!("{}/{}", parent_str, candidate))
814                .collect();
815            candidates.extend(relative_candidates);
816        }
817        
818        // Find first matching candidate
819        for candidate in &candidates {
820            if file_map.contains_key(candidate.as_str()) {
821                return Some(candidate.clone());
822            }
823        }
824        
825        // Fuzzy matching as fallback
826        self.fuzzy_match_import(&module_parts, file_map)
827    }
828    
829    /// Resolve JavaScript/TypeScript import
830    fn resolve_js_import<T>(
831        &self,
832        import_str: &str,
833        current_path: &Path,
834        file_map: &HashMap<&str, &T>,
835    ) -> Option<String>
836    where
837        T: ScanResult,
838    {
839        let cleaned_import = import_str.trim();
840        
841        // Handle relative imports
842        if cleaned_import.starts_with("./") || cleaned_import.starts_with("../") {
843            if !self.config.resolve_relative_imports {
844                return None;
845            }
846            
847            if let Some(parent) = current_path.parent() {
848                let resolved_path = parent.join(&cleaned_import[2..]); // Remove ./
849                let resolved_str = resolved_path.to_string_lossy();
850                
851                // Try different extensions
852                for ext in &[".js", ".ts", ".jsx", ".tsx", "/index.js", "/index.ts"] {
853                    let candidate = format!("{}{}", resolved_str, ext);
854                    if file_map.contains_key(candidate.as_str()) {
855                        return Some(candidate);
856                    }
857                }
858            }
859        }
860        // Handle absolute imports
861        else if self.config.resolve_absolute_imports {
862            let import_parts: Vec<&str> = cleaned_import.split('/').collect();
863            return self.fuzzy_match_import(&import_parts, file_map);
864        }
865        
866        None
867    }
868    
869    /// Resolve Rust import (use/mod statements)
870    fn resolve_rust_import<T>(
871        &self,
872        import_str: &str,
873        _current_path: &Path,
874        file_map: &HashMap<&str, &T>,
875    ) -> Option<String>
876    where
877        T: ScanResult,
878    {
879        let cleaned_import = import_str.trim();
880        
881        // Skip standard library crates
882        if self.config.exclude_stdlib_imports && self.is_rust_stdlib(cleaned_import) {
883            return None;
884        }
885        
886        let parts: Vec<&str> = cleaned_import.split("::").collect();
887        
888        // Try to resolve as file path
889        let mut candidates = Vec::new();
890        
891        // Direct module file
892        candidates.push(format!("{}.rs", parts.join("/")));
893        
894        // Module directory with mod.rs
895        candidates.push(format!("{}/mod.rs", parts.join("/")));
896        
897        // lib.rs in crate
898        if parts.len() == 1 {
899            candidates.push(format!("{}/lib.rs", parts[0]));
900            candidates.push(format!("{}/src/lib.rs", parts[0]));
901        }
902        
903        // Find first matching candidate
904        for candidate in &candidates {
905            if file_map.contains_key(candidate.as_str()) {
906                return Some(candidate.clone());
907            }
908        }
909        
910        // Fuzzy matching
911        self.fuzzy_match_import(&parts, file_map)
912    }
913    
914    /// Resolve Go import
915    fn resolve_go_import<T>(
916        &self,
917        import_str: &str,
918        _current_path: &Path,
919        file_map: &HashMap<&str, &T>,
920    ) -> Option<String>
921    where
922        T: ScanResult,
923    {
924        let cleaned_import = import_str.trim().trim_matches('"');
925        
926        // Skip standard library
927        if self.config.exclude_stdlib_imports && !cleaned_import.contains('.') {
928            return None;
929        }
930        
931        let parts: Vec<&str> = cleaned_import.split('/').collect();
932        
933        // Try various Go file patterns
934        let mut candidates = Vec::new();
935        
936        // Package directory
937        candidates.push(format!("{}.go", parts.last()?));
938        candidates.push(format!("{}/main.go", cleaned_import));
939        candidates.push(format!("{}/{}.go", cleaned_import, parts.last()?));
940        
941        for candidate in &candidates {
942            if file_map.contains_key(candidate.as_str()) {
943                return Some(candidate.clone());
944            }
945        }
946        
947        self.fuzzy_match_import(&parts, file_map)
948    }
949    
950    /// Generic import resolution
951    fn resolve_generic_import<T>(
952        &self,
953        import_str: &str,
954        _current_path: &Path,
955        file_map: &HashMap<&str, &T>,
956    ) -> Option<String>
957    where
958        T: ScanResult,
959    {
960        let cleaned_import = import_str.trim();
961        let parts: Vec<&str> = cleaned_import.split(&['/', '.', ':']).collect();
962        self.fuzzy_match_import(&parts, file_map)
963    }
964    
965    /// Fuzzy matching for import resolution - OPTIMIZED with pre-computed maps
966    fn fuzzy_match_import<T>(
967        &self,
968        import_parts: &[&str],
969        _file_map: &HashMap<&str, &T>,
970    ) -> Option<String>
971    where
972        T: ScanResult,
973    {
974        if import_parts.is_empty() {
975            return None;
976        }
977        
978        let last_part = import_parts.last()?.to_lowercase();
979        
980        // MASSIVE PERFORMANCE IMPROVEMENT: Use pre-computed lookup maps instead of O(n) scan
981        // 1. First try exact stem match (most common case)
982        if let Some(paths) = self.stem_to_paths.get(&last_part) {
983            // Return first match (could be made smarter with scoring)
984            if let Some(first_path) = paths.first() {
985                return Some(first_path.clone());
986            }
987        }
988        
989        // 2. Try filename match
990        if let Some(paths) = self.filename_to_paths.get(&last_part) {
991            if let Some(first_path) = paths.first() {
992                return Some(first_path.clone());
993            }
994        }
995        
996        // 3. Try partial matching against stems
997        for (stem, paths) in &self.stem_to_paths {
998            if stem.contains(&last_part) || last_part.contains(stem) {
999                if let Some(first_path) = paths.first() {
1000                    return Some(first_path.clone());
1001                }
1002            }
1003        }
1004        
1005        // 4. Fallback: check if path contains all import parts
1006        for path in &self.available_paths {
1007            let path_lower = path.to_lowercase();
1008            if import_parts.iter().all(|&part| path_lower.contains(&part.to_lowercase())) {
1009                return Some(path.clone());
1010            }
1011        }
1012        
1013        None
1014    }
1015    
1016    /// Check if import is Python standard library
1017    fn is_python_stdlib(&self, import_str: &str) -> bool {
1018        let stdlib_modules = [
1019            "os", "sys", "re", "json", "collections", "itertools", "functools",
1020            "typing", "datetime", "math", "random", "string", "pathlib", "io",
1021            "csv", "xml", "html", "urllib", "http", "email", "logging", "unittest",
1022            "asyncio", "concurrent", "multiprocessing", "threading", "subprocess",
1023        ];
1024        
1025        let first_part = import_str.split('.').next().unwrap_or(import_str);
1026        stdlib_modules.contains(&first_part)
1027    }
1028    
1029    /// Check if import is Rust standard library
1030    fn is_rust_stdlib(&self, import_str: &str) -> bool {
1031        import_str.starts_with("std::") || 
1032        import_str.starts_with("core::") ||
1033        import_str.starts_with("alloc::")
1034    }
1035}
1036
1037/// Utility functions for centrality results analysis
1038impl CentralityResults {
1039    /// Get files sorted by centrality score (descending)
1040    pub fn top_files_by_centrality(&self, k: usize) -> Vec<(String, f64)> {
1041        let mut scored_files: Vec<_> = self.pagerank_scores.iter()
1042            .map(|(path, &score)| (path.clone(), score))
1043            .collect();
1044        
1045        scored_files.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
1046        scored_files.into_iter().take(k).collect()
1047    }
1048    
1049    /// Get summary statistics about centrality computation
1050    pub fn summary(&self) -> String {
1051        format!(
1052            "Centrality Analysis Summary:\n\
1053             - Files with centrality scores: {}\n\
1054             - PageRank iterations: {} (converged: {})\n\
1055             - Graph: {} nodes, {} edges (density: {:.4})\n\
1056             - Import resolution: {:.1}% ({}/{})\n\
1057             - Top languages: {}\n\
1058             - Computation time: {}ms\n\
1059             - Integration weight: {:.2}",
1060            self.pagerank_scores.len(),
1061            self.pagerank_details.iterations_converged,
1062            self.pagerank_details.converged(),
1063            self.graph_analysis.basic_stats.total_nodes,
1064            self.graph_analysis.basic_stats.total_edges,
1065            self.graph_analysis.basic_stats.graph_density,
1066            self.import_stats.resolution_rate * 100.0,
1067            self.import_stats.imports_resolved,
1068            self.import_stats.imports_detected,
1069            self.import_stats.language_breakdown.iter()
1070                .max_by_key(|(_, &count)| count)
1071                .map(|(lang, count)| format!("{} ({})", lang, count))
1072                .unwrap_or_else(|| "None".to_string()),
1073            self.integration_metadata.computation_time_ms,
1074            self.integration_metadata.centrality_weight,
1075        )
1076    }
1077}
1078
1079#[cfg(test)]
1080mod tests {
1081    use super::*;
1082    use scribe_analysis::heuristics::DocumentAnalysis;
1083    
1084    // Mock scan result for testing
1085    #[derive(Debug, Clone)]
1086    struct MockScanResult {
1087        path: String,
1088        relative_path: String,
1089        depth: usize,
1090        imports: Option<Vec<String>>,
1091        is_docs: bool,
1092        is_readme: bool,
1093        is_test: bool,
1094        is_entrypoint: bool,
1095        has_examples: bool,
1096        priority_boost: f64,
1097        churn_score: f64,
1098        centrality_in: f64,
1099        doc_analysis: Option<DocumentAnalysis>,
1100    }
1101    
1102    impl MockScanResult {
1103        fn new(path: &str) -> Self {
1104            Self {
1105                path: path.to_string(),
1106                relative_path: path.to_string(),
1107                depth: path.matches('/').count(),
1108                imports: None,
1109                is_docs: path.contains("doc") || path.ends_with(".md"),
1110                is_readme: path.to_lowercase().contains("readme"),
1111                is_test: path.contains("test"),
1112                is_entrypoint: path.contains("main") || path.contains("index"),
1113                has_examples: path.contains("example"),
1114                priority_boost: 0.0,
1115                churn_score: 0.5,
1116                centrality_in: 0.0,
1117                doc_analysis: Some(DocumentAnalysis::new()),
1118            }
1119        }
1120        
1121        fn with_imports(mut self, imports: Vec<String>) -> Self {
1122            self.imports = Some(imports);
1123            self
1124        }
1125    }
1126    
1127    impl ScanResult for MockScanResult {
1128        fn path(&self) -> &str { &self.path }
1129        fn relative_path(&self) -> &str { &self.relative_path }
1130        fn depth(&self) -> usize { self.depth }
1131        fn is_docs(&self) -> bool { self.is_docs }
1132        fn is_readme(&self) -> bool { self.is_readme }
1133        fn is_test(&self) -> bool { self.is_test }
1134        fn is_entrypoint(&self) -> bool { self.is_entrypoint }
1135        fn has_examples(&self) -> bool { self.has_examples }
1136        fn priority_boost(&self) -> f64 { self.priority_boost }
1137        fn churn_score(&self) -> f64 { self.churn_score }
1138        fn centrality_in(&self) -> f64 { self.centrality_in }
1139        fn imports(&self) -> Option<&[String]> { self.imports.as_deref() }
1140        fn doc_analysis(&self) -> Option<&DocumentAnalysis> { self.doc_analysis.as_ref() }
1141    }
1142    
1143    #[test]
1144    fn test_centrality_calculator_creation() {
1145        let calculator = CentralityCalculator::new();
1146        assert!(calculator.is_ok());
1147        
1148        let large_calc = CentralityCalculator::for_large_codebases();
1149        assert!(large_calc.is_ok());
1150    }
1151    
1152    #[test]
1153    fn test_import_detection() {
1154        let detector = ImportDetector::with_config(ImportResolutionConfig::default());
1155        
1156        // Test language detection
1157        assert_eq!(detector.detect_language("main.py"), Some("python".to_string()));
1158        assert_eq!(detector.detect_language("app.js"), Some("javascript".to_string()));
1159        assert_eq!(detector.detect_language("lib.rs"), Some("rust".to_string()));
1160        
1161        // Test Python stdlib detection
1162        assert!(detector.is_python_stdlib("os"));
1163        assert!(detector.is_python_stdlib("sys.path"));
1164        assert!(!detector.is_python_stdlib("custom_module"));
1165        
1166        // Test Rust stdlib detection
1167        assert!(detector.is_rust_stdlib("std::collections::HashMap"));
1168        assert!(detector.is_rust_stdlib("core::fmt"));
1169        assert!(!detector.is_rust_stdlib("serde::Deserialize"));
1170    }
1171    
1172    #[test]
1173    fn test_centrality_calculation() {
1174        let calculator = CentralityCalculator::new().unwrap();
1175        
1176        let scan_results = vec![
1177            MockScanResult::new("main.py").with_imports(vec!["utils".to_string(), "config".to_string()]),
1178            MockScanResult::new("utils.py").with_imports(vec!["config".to_string()]),
1179            MockScanResult::new("config.py"),
1180            MockScanResult::new("test.py").with_imports(vec!["main".to_string()]),
1181        ];
1182        
1183        let results = calculator.calculate_centrality(&scan_results).unwrap();
1184        
1185        // Basic checks
1186        assert!(!results.pagerank_scores.is_empty());
1187        assert!(results.integration_metadata.integration_successful);
1188        assert_eq!(results.integration_metadata.files_with_centrality, results.pagerank_scores.len());
1189        
1190        // config.py should have high centrality (imported by main.py and utils.py)
1191        let config_score = results.pagerank_scores.get("config.py");
1192        assert!(config_score.is_some());
1193        
1194        println!("Centrality scores:");
1195        for (file, score) in &results.pagerank_scores {
1196            println!("  {}: {:.6}", file, score);
1197        }
1198        
1199        println!("\n{}", results.summary());
1200    }
1201    
1202    #[test]
1203    fn test_heuristics_integration() {
1204        let calculator = CentralityCalculator::new().unwrap();
1205        
1206        let scan_results = vec![
1207            MockScanResult::new("main.py").with_imports(vec!["utils".to_string()]),
1208            MockScanResult::new("utils.py"),
1209        ];
1210        
1211        let centrality_results = calculator.calculate_centrality(&scan_results).unwrap();
1212        
1213        // Mock heuristic scores
1214        let mut heuristic_scores = HashMap::new();
1215        heuristic_scores.insert("main.py".to_string(), 0.8);
1216        heuristic_scores.insert("utils.py".to_string(), 0.6);
1217        
1218        let integrated_scores = calculator.integrate_with_heuristics(
1219            &centrality_results, 
1220            &heuristic_scores
1221        ).unwrap();
1222        
1223        assert!(!integrated_scores.is_empty());
1224        
1225        // Integrated scores should be different from original heuristic scores
1226        for (file, integrated_score) in &integrated_scores {
1227            let original_score = heuristic_scores.get(file).unwrap();
1228            println!("File {}: heuristic={:.3}, integrated={:.3}", 
1229                     file, original_score, integrated_score);
1230        }
1231    }
1232    
1233    #[test]
1234    fn test_normalization_methods() {
1235        let calculator = CentralityCalculator::new().unwrap();
1236        
1237        let centrality_scores = vec![
1238            ("file1".to_string(), 0.1),
1239            ("file2".to_string(), 0.3),
1240            ("file3".to_string(), 0.6),
1241            ("file4".to_string(), 1.0),
1242        ].into_iter().collect();
1243        
1244        let heuristic_scores = vec![
1245            ("file1".to_string(), 0.5),
1246            ("file2".to_string(), 0.7),
1247            ("file3".to_string(), 0.9),
1248            ("file4".to_string(), 1.2),
1249        ].into_iter().collect();
1250        
1251        // Test min-max normalization
1252        let normalized = calculator.normalize_min_max(&centrality_scores, &heuristic_scores).unwrap();
1253        assert!(!normalized.is_empty());
1254        
1255        // Test z-score normalization
1256        let z_normalized = calculator.normalize_z_score(&centrality_scores).unwrap();
1257        assert!(!z_normalized.is_empty());
1258        
1259        // Test rank normalization
1260        let rank_normalized = calculator.normalize_rank(&centrality_scores).unwrap();
1261        assert!(!rank_normalized.is_empty());
1262        
1263        println!("Original scores: {:?}", centrality_scores);
1264        println!("Min-max normalized: {:?}", normalized);
1265        println!("Z-score normalized: {:?}", z_normalized);
1266        println!("Rank normalized: {:?}", rank_normalized);
1267    }
1268    
1269    #[test]
1270    fn test_import_resolution() {
1271        let detector = ImportDetector::with_config(ImportResolutionConfig::default());
1272        
1273        // Create mock file map
1274        let scan_results = vec![
1275            MockScanResult::new("src/main.py"),
1276            MockScanResult::new("src/utils.py"),
1277            MockScanResult::new("src/config.py"),
1278            MockScanResult::new("tests/test_main.py"),
1279        ];
1280        
1281        let file_map: HashMap<&str, &MockScanResult> = scan_results.iter()
1282            .map(|result| (result.path(), result))
1283            .collect();
1284        
1285        // Test Python import resolution
1286        let resolved = detector.resolve_import(
1287            "utils", 
1288            "src/main.py", 
1289            &file_map
1290        );
1291        assert!(resolved.is_some());
1292        
1293        // Test module path resolution
1294        let resolved_config = detector.resolve_import(
1295            "src.config",
1296            "src/main.py",
1297            &file_map
1298        );
1299        // Should resolve to src/config.py through fuzzy matching
1300        assert!(resolved_config.is_some());
1301        
1302        println!("Resolved imports:");
1303        if let Some(path) = resolved {
1304            println!("  utils -> {}", path);
1305        }
1306        if let Some(path) = resolved_config {
1307            println!("  src.config -> {}", path);
1308        }
1309    }
1310    
1311    #[test]
1312    fn test_entrypoint_detection() {
1313        let calculator = CentralityCalculator::new().unwrap();
1314        
1315        assert!(calculator.is_entrypoint_file("main.py"));
1316        assert!(calculator.is_entrypoint_file("src/main.rs"));
1317        assert!(calculator.is_entrypoint_file("index.js"));
1318        assert!(calculator.is_entrypoint_file("app.py"));
1319        assert!(calculator.is_entrypoint_file("lib.rs"));
1320        assert!(calculator.is_entrypoint_file("__init__.py"));
1321        
1322        assert!(!calculator.is_entrypoint_file("utils.py"));
1323        assert!(!calculator.is_entrypoint_file("config.rs"));
1324        assert!(!calculator.is_entrypoint_file("helper.js"));
1325    }
1326    
1327    #[test]
1328    fn test_top_files_by_centrality() {
1329        let mut pagerank_scores = HashMap::new();
1330        pagerank_scores.insert("file1.py".to_string(), 0.4);
1331        pagerank_scores.insert("file2.py".to_string(), 0.6);
1332        pagerank_scores.insert("file3.py".to_string(), 0.2);
1333        pagerank_scores.insert("file4.py".to_string(), 0.8);
1334        
1335        let results = CentralityResults {
1336            pagerank_scores,
1337            graph_analysis: GraphAnalysisResults {
1338                basic_stats: crate::graph::GraphStatistics::empty(),
1339                degree_distribution: Default::default(),
1340                connectivity: Default::default(),
1341                structural_patterns: Default::default(),
1342                import_insights: Default::default(),
1343                performance_profile: Default::default(),
1344                analysis_metadata: Default::default(),
1345            },
1346            pagerank_details: PageRankResults {
1347                scores: HashMap::new(),
1348                iterations_converged: 10,
1349                convergence_epsilon: 1e-6,
1350                graph_stats: crate::graph::GraphStatistics::empty(),
1351                parameters: PageRankConfig::default(),
1352                performance_metrics: Default::default(),
1353            },
1354            import_stats: ImportDetectionStats {
1355                files_processed: 4,
1356                imports_detected: 0,
1357                imports_resolved: 0,
1358                resolution_rate: 0.0,
1359                language_breakdown: HashMap::new(),
1360                import_patterns: HashMap::new(),
1361            },
1362            integration_metadata: IntegrationMetadata {
1363                timestamp: chrono::Utc::now(),
1364                computation_time_ms: 100,
1365                integration_successful: true,
1366                centrality_weight: 0.15,
1367                files_with_centrality: 4,
1368                config: CentralityConfig::default(),
1369            },
1370        };
1371        
1372        let top_files = results.top_files_by_centrality(2);
1373        assert_eq!(top_files.len(), 2);
1374        assert_eq!(top_files[0].0, "file4.py");
1375        assert_eq!(top_files[0].1, 0.8);
1376        assert_eq!(top_files[1].0, "file2.py");
1377        assert_eq!(top_files[1].1, 0.6);
1378    }
1379}