scribe_graph/
centrality.rs

1//! # Centrality Calculator with Heuristics Integration
2//!
3//! Main interface for PageRank centrality calculation and integration with the existing
4//! FastPath heuristic scoring system. This module provides the high-level API for:
5//!
6//! ## Key Features
7//! - **PageRank Centrality Computation**: Research-grade algorithm with convergence detection
8//! - **Import Graph Construction**: Builds dependency graphs from file scan results  
9//! - **Heuristics Integration**: Seamless integration with V2 scoring system
10//! - **Performance Optimization**: Efficient computation for large codebases
11//! - **Multi-language Support**: Import detection across programming languages
12//! - **Comprehensive Analysis**: Full graph statistics and structural insights
13//!
14//! ## Integration with FastPath Heuristics
15//! The centrality scores are integrated into the heuristic scoring formula:
16//! ```text
17//! final_score = Σ(weight_i × normalized_score_i) + priority_boost + template_boost
18//! ```
19//! Where `centrality_score` becomes a weighted component when V2 features are enabled.
20
21use std::collections::HashMap;
22use std::path::Path;
23use serde::{Deserialize, Serialize};
24use rayon::prelude::*;
25use scribe_core::Result;
26use scribe_analysis::heuristics::ScanResult;
27
28use crate::graph::{DependencyGraph, NodeId};
29use crate::pagerank::{PageRankComputer, PageRankResults, PageRankConfig};
30use crate::statistics::{GraphStatisticsAnalyzer, GraphAnalysisResults};
31
32/// Complete centrality calculation results with comprehensive metadata
33#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
34pub struct CentralityResults {
35    /// PageRank scores (file path -> centrality score)
36    pub pagerank_scores: HashMap<NodeId, f64>,
37    
38    /// Graph analysis results
39    pub graph_analysis: GraphAnalysisResults,
40    
41    /// PageRank computation details
42    pub pagerank_details: PageRankResults,
43    
44    /// Import detection statistics
45    pub import_stats: ImportDetectionStats,
46    
47    /// Integration metadata
48    pub integration_metadata: IntegrationMetadata,
49}
50
51/// Statistics about import detection and graph construction
52#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
53pub struct ImportDetectionStats {
54    /// Number of files processed for import detection
55    pub files_processed: usize,
56    
57    /// Number of import relationships detected
58    pub imports_detected: usize,
59    
60    /// Number of resolved imports (mapped to actual files)
61    pub imports_resolved: usize,
62    
63    /// Import resolution success rate
64    pub resolution_rate: f64,
65    
66    /// Language breakdown of processed files
67    pub language_breakdown: HashMap<String, usize>,
68    
69    /// Import patterns by language
70    pub import_patterns: HashMap<String, ImportPatternStats>,
71}
72
73/// Import pattern statistics for a specific language
74#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
75pub struct ImportPatternStats {
76    /// Total imports found
77    pub total_imports: usize,
78    
79    /// Relative imports (./,../)
80    pub relative_imports: usize,
81    
82    /// Absolute imports
83    pub absolute_imports: usize,
84    
85    /// Standard library imports
86    pub stdlib_imports: usize,
87    
88    /// Third-party imports
89    pub third_party_imports: usize,
90}
91
92/// Metadata about centrality-heuristics integration
93#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
94pub struct IntegrationMetadata {
95    /// When the analysis was performed
96    pub timestamp: chrono::DateTime<chrono::Utc>,
97    
98    /// Total computation time
99    pub computation_time_ms: u64,
100    
101    /// Whether centrality was successfully integrated
102    pub integration_successful: bool,
103    
104    /// Centrality weight used in integration
105    pub centrality_weight: f64,
106    
107    /// Number of files with centrality scores
108    pub files_with_centrality: usize,
109    
110    /// Configuration used
111    pub config: CentralityConfig,
112}
113
114/// Configuration for centrality calculation
115#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
116pub struct CentralityConfig {
117    /// PageRank algorithm configuration
118    pub pagerank_config: PageRankConfig,
119    
120    /// Whether to perform expensive graph analysis
121    pub analyze_graph_structure: bool,
122    
123    /// Import resolution configuration
124    pub import_resolution: ImportResolutionConfig,
125    
126    /// Integration parameters
127    pub integration: IntegrationConfig,
128}
129
130/// Configuration for import resolution
131#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
132pub struct ImportResolutionConfig {
133    /// Maximum search depth for import resolution
134    pub max_search_depth: usize,
135    
136    /// Whether to resolve relative imports
137    pub resolve_relative_imports: bool,
138    
139    /// Whether to resolve absolute imports
140    pub resolve_absolute_imports: bool,
141    
142    /// Whether to exclude standard library imports
143    pub exclude_stdlib_imports: bool,
144    
145    /// Custom import path mappings
146    pub path_mappings: HashMap<String, String>,
147}
148
149/// Configuration for heuristics integration
150#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
151pub struct IntegrationConfig {
152    /// Weight for centrality in final score
153    pub centrality_weight: f64,
154    
155    /// Normalization method for centrality scores
156    pub normalization_method: NormalizationMethod,
157    
158    /// Minimum centrality score threshold
159    pub min_centrality_threshold: f64,
160    
161    /// Whether to boost entrypoint centrality
162    pub boost_entrypoints: bool,
163    
164    /// Entrypoint boost factor
165    pub entrypoint_boost_factor: f64,
166}
167
168/// Methods for normalizing centrality scores
169#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
170pub enum NormalizationMethod {
171    /// Normalize to [0,1] range
172    MinMax,
173    /// Z-score normalization
174    ZScore,
175    /// Rank-based normalization
176    Rank,
177    /// No normalization
178    None,
179}
180
181impl Default for CentralityConfig {
182    fn default() -> Self {
183        Self {
184            pagerank_config: PageRankConfig::for_code_analysis(),
185            analyze_graph_structure: true,
186            import_resolution: ImportResolutionConfig::default(),
187            integration: IntegrationConfig::default(),
188        }
189    }
190}
191
192impl Default for ImportResolutionConfig {
193    fn default() -> Self {
194        Self {
195            max_search_depth: 3,
196            resolve_relative_imports: true,
197            resolve_absolute_imports: true,
198            exclude_stdlib_imports: true,
199            path_mappings: HashMap::new(),
200        }
201    }
202}
203
204impl Default for IntegrationConfig {
205    fn default() -> Self {
206        Self {
207            centrality_weight: 0.15, // 15% weight in V2 scoring
208            normalization_method: NormalizationMethod::MinMax,
209            min_centrality_threshold: 1e-6,
210            boost_entrypoints: true,
211            entrypoint_boost_factor: 1.5,
212        }
213    }
214}
215
216/// Main centrality calculator with heuristics integration
217#[derive(Debug)]
218pub struct CentralityCalculator {
219    /// Configuration
220    config: CentralityConfig,
221    
222    /// PageRank computer
223    pagerank_computer: PageRankComputer,
224    
225    /// Graph statistics analyzer
226    stats_analyzer: GraphStatisticsAnalyzer,
227    
228    /// Import detector
229    import_detector: ImportDetector,
230}
231
232impl CentralityCalculator {
233    /// Create a new centrality calculator with default configuration
234    pub fn new() -> Result<Self> {
235        let config = CentralityConfig::default();
236        Self::with_config(config)
237    }
238    
239    /// Create with custom configuration
240    pub fn with_config(config: CentralityConfig) -> Result<Self> {
241        let pagerank_computer = PageRankComputer::with_config(config.pagerank_config.clone())?;
242        
243        let stats_analyzer = if config.analyze_graph_structure {
244            GraphStatisticsAnalyzer::new()
245        } else {
246            GraphStatisticsAnalyzer::for_large_graphs()
247        };
248        
249        let import_detector = ImportDetector::with_config(config.import_resolution.clone());
250        
251        Ok(Self {
252            config,
253            pagerank_computer,
254            stats_analyzer,
255            import_detector,
256        })
257    }
258    
259    /// Create optimized for large codebases
260    pub fn for_large_codebases() -> Result<Self> {
261        let config = CentralityConfig {
262            pagerank_config: PageRankConfig::for_large_codebases(),
263            analyze_graph_structure: false,
264            ..CentralityConfig::default()
265        };
266        Self::with_config(config)
267    }
268    
269    /// Calculate centrality scores for a collection of scan results
270    pub fn calculate_centrality<T>(&self, scan_results: &[T]) -> Result<CentralityResults> 
271    where 
272        T: ScanResult + Sync,
273    {
274        let start_time = std::time::Instant::now();
275        
276        // Build dependency graph from scan results
277        let (graph, import_stats) = self.build_dependency_graph(scan_results)?;
278        
279        // Compute PageRank centrality
280        let pagerank_results = self.pagerank_computer.compute(&graph)?;
281        
282        // Perform graph analysis if enabled
283        let graph_analysis = if self.config.analyze_graph_structure {
284            self.stats_analyzer.analyze(&graph)?
285        } else {
286            // Create minimal analysis for large graphs
287            self.create_minimal_analysis(&graph)?
288        };
289        
290        // Create integration metadata
291        let computation_time = start_time.elapsed().as_millis() as u64;
292        let integration_metadata = IntegrationMetadata {
293            timestamp: chrono::Utc::now(),
294            computation_time_ms: computation_time,
295            integration_successful: true,
296            centrality_weight: self.config.integration.centrality_weight,
297            files_with_centrality: pagerank_results.scores.len(),
298            config: self.config.clone(),
299        };
300        
301        Ok(CentralityResults {
302            pagerank_scores: pagerank_results.scores.clone(),
303            graph_analysis,
304            pagerank_details: pagerank_results,
305            import_stats,
306            integration_metadata,
307        })
308    }
309    
310    /// Integrate centrality scores with existing heuristic scores
311    pub fn integrate_with_heuristics(
312        &self,
313        centrality_results: &CentralityResults,
314        heuristic_scores: &HashMap<String, f64>,
315    ) -> Result<HashMap<String, f64>> {
316        let normalized_centrality = self.normalize_centrality_scores(
317            &centrality_results.pagerank_scores,
318            heuristic_scores,
319        )?;
320        
321        let mut integrated_scores = HashMap::new();
322        let centrality_weight = self.config.integration.centrality_weight;
323        let heuristic_weight = 1.0 - centrality_weight;
324        
325        // Combine heuristic and centrality scores
326        for (file_path, heuristic_score) in heuristic_scores {
327            let centrality_score = normalized_centrality.get(file_path).copied().unwrap_or(0.0);
328            
329            // Apply entrypoint boost if configured
330            let boosted_centrality = if self.config.integration.boost_entrypoints 
331                && self.is_entrypoint_file(file_path) {
332                centrality_score * self.config.integration.entrypoint_boost_factor
333            } else {
334                centrality_score
335            };
336            
337            let integrated_score = heuristic_weight * heuristic_score + 
338                                 centrality_weight * boosted_centrality;
339            
340            integrated_scores.insert(file_path.clone(), integrated_score);
341        }
342        
343        // Add centrality-only files (not in heuristic scores)
344        for (file_path, centrality_score) in &normalized_centrality {
345            if !integrated_scores.contains_key(file_path) {
346                let boosted_centrality = if self.config.integration.boost_entrypoints 
347                    && self.is_entrypoint_file(file_path) {
348                    centrality_score * self.config.integration.entrypoint_boost_factor
349                } else {
350                    *centrality_score
351                };
352                
353                integrated_scores.insert(file_path.clone(), centrality_weight * boosted_centrality);
354            }
355        }
356        
357        Ok(integrated_scores)
358    }
359    
360    /// Build dependency graph from scan results
361    fn build_dependency_graph<T>(&self, scan_results: &[T]) -> Result<(DependencyGraph, ImportDetectionStats)> 
362    where 
363        T: ScanResult + Sync,
364    {
365        let mut graph = DependencyGraph::with_capacity(scan_results.len());
366        
367        // Add all files as nodes first
368        for result in scan_results {
369            graph.add_node(result.path().to_string())?;
370        }
371        
372        // Detect imports and build edges
373        let import_stats = if self.config.pagerank_config.use_parallel {
374            self.build_edges_parallel(&mut graph, scan_results)?
375        } else {
376            self.build_edges_sequential(&mut graph, scan_results)?
377        };
378        
379        Ok((graph, import_stats))
380    }
381    
382    /// Build graph edges sequentially
383    fn build_edges_sequential<T>(&self, graph: &mut DependencyGraph, scan_results: &[T]) -> Result<ImportDetectionStats>
384    where 
385        T: ScanResult,
386    {
387        let mut stats = ImportDetectionStats {
388            files_processed: 0,
389            imports_detected: 0,
390            imports_resolved: 0,
391            resolution_rate: 0.0,
392            language_breakdown: HashMap::new(),
393            import_patterns: HashMap::new(),
394        };
395        
396        // Create file path lookup for resolution
397        let file_path_map: HashMap<&str, &T> = scan_results.iter()
398            .map(|result| (result.path(), result))
399            .collect();
400        
401        for result in scan_results {
402            stats.files_processed += 1;
403            
404            // Track language
405            if let Some(lang) = self.import_detector.detect_language(result.path()) {
406                *stats.language_breakdown.entry(lang.clone()).or_insert(0) += 1;
407            }
408            
409            // Extract and resolve imports
410            if let Some(imports) = result.imports() {
411                stats.imports_detected += imports.len();
412                
413                for import_str in imports {
414                    if let Some(resolved_path) = self.import_detector.resolve_import(
415                        import_str, 
416                        result.path(), 
417                        &file_path_map
418                    ) {
419                        graph.add_edge(result.path().to_string(), resolved_path)?;
420                        stats.imports_resolved += 1;
421                    }
422                }
423            }
424        }
425        
426        stats.resolution_rate = if stats.imports_detected > 0 {
427            stats.imports_resolved as f64 / stats.imports_detected as f64
428        } else {
429            0.0
430        };
431        
432        Ok(stats)
433    }
434    
435    /// Build graph edges in parallel
436    fn build_edges_parallel<T>(&self, graph: &mut DependencyGraph, scan_results: &[T]) -> Result<ImportDetectionStats>
437    where 
438        T: ScanResult + Sync,
439    {
440        // Create file path lookup
441        let file_path_map: HashMap<&str, &T> = scan_results.iter()
442            .map(|result| (result.path(), result))
443            .collect();
444        
445        // Process imports in parallel
446        let import_edges: Vec<_> = scan_results.par_iter()
447            .flat_map(|result| {
448                let mut edges = Vec::new();
449                
450                if let Some(imports) = result.imports() {
451                    for import_str in imports {
452                        if let Some(resolved_path) = self.import_detector.resolve_import(
453                            import_str, 
454                            result.path(), 
455                            &file_path_map
456                        ) {
457                            edges.push((result.path().to_string(), resolved_path));
458                        }
459                    }
460                }
461                
462                edges
463            })
464            .collect();
465        
466        // Add edges to graph
467        for (from, to) in &import_edges {
468            graph.add_edge(from.clone(), to.clone())?;
469        }
470        
471        // Calculate statistics
472        let total_imports: usize = scan_results.iter()
473            .map(|result| result.imports().map_or(0, |imports| imports.len()))
474            .sum();
475        
476        let language_breakdown: HashMap<String, usize> = scan_results.iter()
477            .filter_map(|result| self.import_detector.detect_language(result.path())
478                .map(|lang| (lang, 1)))
479            .fold(HashMap::new(), |mut acc, (lang, count)| {
480                *acc.entry(lang).or_insert(0) += count;
481                acc
482            });
483        
484        let stats = ImportDetectionStats {
485            files_processed: scan_results.len(),
486            imports_detected: total_imports,
487            imports_resolved: import_edges.len(),
488            resolution_rate: if total_imports > 0 {
489                import_edges.len() as f64 / total_imports as f64
490            } else {
491                0.0
492            },
493            language_breakdown,
494            import_patterns: HashMap::new(), // TODO: Implement detailed pattern analysis
495        };
496        
497        Ok(stats)
498    }
499    
500    /// Normalize centrality scores for integration with heuristics
501    fn normalize_centrality_scores(
502        &self,
503        centrality_scores: &HashMap<String, f64>,
504        heuristic_scores: &HashMap<String, f64>,
505    ) -> Result<HashMap<String, f64>> {
506        if centrality_scores.is_empty() {
507            return Ok(HashMap::new());
508        }
509        
510        match self.config.integration.normalization_method {
511            NormalizationMethod::MinMax => self.normalize_min_max(centrality_scores, heuristic_scores),
512            NormalizationMethod::ZScore => self.normalize_z_score(centrality_scores),
513            NormalizationMethod::Rank => self.normalize_rank(centrality_scores),
514            NormalizationMethod::None => Ok(centrality_scores.clone()),
515        }
516    }
517    
518    /// Min-max normalization to match heuristic score range
519    fn normalize_min_max(
520        &self,
521        centrality_scores: &HashMap<String, f64>,
522        heuristic_scores: &HashMap<String, f64>,
523    ) -> Result<HashMap<String, f64>> {
524        let centrality_values: Vec<f64> = centrality_scores.values().copied().collect();
525        let min_centrality = centrality_values.iter().fold(f64::INFINITY, |a, &b| a.min(b));
526        let max_centrality = centrality_values.iter().fold(f64::NEG_INFINITY, |a, &b| a.max(b));
527        
528        // Target range based on heuristic scores
529        let heuristic_values: Vec<f64> = heuristic_scores.values().copied().collect();
530        let max_heuristic = if heuristic_values.is_empty() {
531            1.0
532        } else {
533            heuristic_values.iter().fold(f64::NEG_INFINITY, |a, &b| a.max(b))
534        };
535        
536        let mut normalized = HashMap::new();
537        
538        if (max_centrality - min_centrality).abs() < f64::EPSILON {
539            // All scores are the same
540            for (path, _) in centrality_scores {
541                normalized.insert(path.clone(), max_heuristic * 0.5); // Use half of max heuristic
542            }
543        } else {
544            for (path, &score) in centrality_scores {
545                let normalized_score = ((score - min_centrality) / (max_centrality - min_centrality)) * max_heuristic;
546                if normalized_score >= self.config.integration.min_centrality_threshold {
547                    normalized.insert(path.clone(), normalized_score);
548                }
549            }
550        }
551        
552        Ok(normalized)
553    }
554    
555    /// Z-score normalization
556    fn normalize_z_score(&self, centrality_scores: &HashMap<String, f64>) -> Result<HashMap<String, f64>> {
557        let values: Vec<f64> = centrality_scores.values().copied().collect();
558        let mean = values.iter().sum::<f64>() / values.len() as f64;
559        let variance = values.iter().map(|&x| (x - mean).powi(2)).sum::<f64>() / values.len() as f64;
560        let std_dev = variance.sqrt();
561        
562        let mut normalized = HashMap::new();
563        
564        if std_dev > f64::EPSILON {
565            for (path, &score) in centrality_scores {
566                let z_score = (score - mean) / std_dev;
567                // Shift and scale to positive range
568                let normalized_score = (z_score + 3.0) / 6.0; // Roughly [0,1] for most values
569                if normalized_score >= self.config.integration.min_centrality_threshold {
570                    normalized.insert(path.clone(), normalized_score);
571                }
572            }
573        } else {
574            // All scores are the same
575            for (path, _) in centrality_scores {
576                normalized.insert(path.clone(), 0.5);
577            }
578        }
579        
580        Ok(normalized)
581    }
582    
583    /// Rank-based normalization
584    fn normalize_rank(&self, centrality_scores: &HashMap<String, f64>) -> Result<HashMap<String, f64>> {
585        let mut scored_files: Vec<_> = centrality_scores.iter()
586            .map(|(path, &score)| (path.clone(), score))
587            .collect();
588        
589        scored_files.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
590        
591        let mut normalized = HashMap::new();
592        let total_files = scored_files.len();
593        
594        for (rank, (path, _)) in scored_files.into_iter().enumerate() {
595            let normalized_score = 1.0 - (rank as f64 / total_files as f64);
596            if normalized_score >= self.config.integration.min_centrality_threshold {
597                normalized.insert(path, normalized_score);
598            }
599        }
600        
601        Ok(normalized)
602    }
603    
604    /// Create minimal analysis for large graphs (performance optimization)
605    fn create_minimal_analysis(&self, graph: &DependencyGraph) -> Result<GraphAnalysisResults> {
606        // Use a simplified analyzer for large graphs
607        let minimal_analyzer = GraphStatisticsAnalyzer::for_large_graphs();
608        minimal_analyzer.analyze(graph)
609    }
610    
611    /// Check if a file is an entrypoint
612    fn is_entrypoint_file(&self, file_path: &str) -> bool {
613        let path = Path::new(file_path);
614        let file_name = path.file_name()
615            .and_then(|name| name.to_str())
616            .unwrap_or("")
617            .to_lowercase();
618        
619        matches!(file_name.as_str(),
620            "main.py" | "main.rs" | "main.go" | "main.js" | "main.ts" |
621            "index.py" | "index.rs" | "index.go" | "index.js" | "index.ts" |
622            "app.py" | "app.rs" | "app.go" | "app.js" | "app.ts" |
623            "server.py" | "server.rs" | "server.go" | "server.js" | "server.ts" |
624            "lib.rs" | "__init__.py"
625        )
626    }
627}
628
629impl Default for CentralityCalculator {
630    fn default() -> Self {
631        Self::new().expect("Failed to create CentralityCalculator")
632    }
633}
634
635/// Import detection and resolution engine
636#[derive(Debug, Clone)]
637pub struct ImportDetector {
638    config: ImportResolutionConfig,
639}
640
641impl ImportDetector {
642    /// Create with configuration
643    pub fn with_config(config: ImportResolutionConfig) -> Self {
644        Self { config }
645    }
646    
647    /// Detect programming language from file extension
648    pub fn detect_language(&self, file_path: &str) -> Option<String> {
649        let path = Path::new(file_path);
650        let ext = path.extension()?.to_str()?.to_lowercase();
651        
652        match ext.as_str() {
653            "py" => Some("python".to_string()),
654            "js" | "jsx" | "mjs" => Some("javascript".to_string()),
655            "ts" | "tsx" => Some("typescript".to_string()),
656            "rs" => Some("rust".to_string()),
657            "go" => Some("go".to_string()),
658            "java" | "kt" => Some("java".to_string()),
659            "cpp" | "cc" | "cxx" | "hpp" | "h" => Some("cpp".to_string()),
660            "c" => Some("c".to_string()),
661            "rb" => Some("ruby".to_string()),
662            "php" => Some("php".to_string()),
663            "cs" => Some("csharp".to_string()),
664            "swift" => Some("swift".to_string()),
665            _ => None,
666        }
667    }
668    
669    /// Resolve import string to actual file path
670    pub fn resolve_import<T>(
671        &self,
672        import_str: &str,
673        current_file: &str,
674        file_map: &HashMap<&str, &T>,
675    ) -> Option<String>
676    where
677        T: ScanResult,
678    {
679        // Check custom path mappings first
680        if let Some(mapped_path) = self.config.path_mappings.get(import_str) {
681            if file_map.contains_key(mapped_path.as_str()) {
682                return Some(mapped_path.clone());
683            }
684        }
685        
686        let current_path = Path::new(current_file);
687        let language = self.detect_language(current_file);
688        
689        match language.as_deref() {
690            Some("python") => self.resolve_python_import(import_str, current_path, file_map),
691            Some("javascript") | Some("typescript") => self.resolve_js_import(import_str, current_path, file_map),
692            Some("rust") => self.resolve_rust_import(import_str, current_path, file_map),
693            Some("go") => self.resolve_go_import(import_str, current_path, file_map),
694            _ => self.resolve_generic_import(import_str, current_path, file_map),
695        }
696    }
697    
698    /// Resolve Python import
699    fn resolve_python_import<T>(
700        &self,
701        import_str: &str,
702        current_path: &Path,
703        file_map: &HashMap<&str, &T>,
704    ) -> Option<String>
705    where
706        T: ScanResult,
707    {
708        let cleaned_import = import_str.trim();
709        
710        // Skip standard library imports if configured
711        if self.config.exclude_stdlib_imports && self.is_python_stdlib(cleaned_import) {
712            return None;
713        }
714        
715        // Convert module path to file path
716        let module_parts: Vec<&str> = cleaned_import.split('.').collect();
717        
718        // Try various combinations
719        let mut candidates = Vec::new();
720        
721        // Direct module file
722        candidates.push(format!("{}.py", module_parts.join("/")));
723        
724        // Module package
725        candidates.push(format!("{}/__init__.py", module_parts.join("/")));
726        
727        // Relative to current file directory
728        if let Some(parent) = current_path.parent() {
729            let parent_str = parent.to_string_lossy();
730            let relative_candidates: Vec<String> = candidates.iter()
731                .map(|candidate| format!("{}/{}", parent_str, candidate))
732                .collect();
733            candidates.extend(relative_candidates);
734        }
735        
736        // Find first matching candidate
737        for candidate in &candidates {
738            if file_map.contains_key(candidate.as_str()) {
739                return Some(candidate.clone());
740            }
741        }
742        
743        // Fuzzy matching as fallback
744        self.fuzzy_match_import(&module_parts, file_map)
745    }
746    
747    /// Resolve JavaScript/TypeScript import
748    fn resolve_js_import<T>(
749        &self,
750        import_str: &str,
751        current_path: &Path,
752        file_map: &HashMap<&str, &T>,
753    ) -> Option<String>
754    where
755        T: ScanResult,
756    {
757        let cleaned_import = import_str.trim();
758        
759        // Handle relative imports
760        if cleaned_import.starts_with("./") || cleaned_import.starts_with("../") {
761            if !self.config.resolve_relative_imports {
762                return None;
763            }
764            
765            if let Some(parent) = current_path.parent() {
766                let resolved_path = parent.join(&cleaned_import[2..]); // Remove ./
767                let resolved_str = resolved_path.to_string_lossy();
768                
769                // Try different extensions
770                for ext in &[".js", ".ts", ".jsx", ".tsx", "/index.js", "/index.ts"] {
771                    let candidate = format!("{}{}", resolved_str, ext);
772                    if file_map.contains_key(candidate.as_str()) {
773                        return Some(candidate);
774                    }
775                }
776            }
777        }
778        // Handle absolute imports
779        else if self.config.resolve_absolute_imports {
780            let import_parts: Vec<&str> = cleaned_import.split('/').collect();
781            return self.fuzzy_match_import(&import_parts, file_map);
782        }
783        
784        None
785    }
786    
787    /// Resolve Rust import (use/mod statements)
788    fn resolve_rust_import<T>(
789        &self,
790        import_str: &str,
791        _current_path: &Path,
792        file_map: &HashMap<&str, &T>,
793    ) -> Option<String>
794    where
795        T: ScanResult,
796    {
797        let cleaned_import = import_str.trim();
798        
799        // Skip standard library crates
800        if self.config.exclude_stdlib_imports && self.is_rust_stdlib(cleaned_import) {
801            return None;
802        }
803        
804        let parts: Vec<&str> = cleaned_import.split("::").collect();
805        
806        // Try to resolve as file path
807        let mut candidates = Vec::new();
808        
809        // Direct module file
810        candidates.push(format!("{}.rs", parts.join("/")));
811        
812        // Module directory with mod.rs
813        candidates.push(format!("{}/mod.rs", parts.join("/")));
814        
815        // lib.rs in crate
816        if parts.len() == 1 {
817            candidates.push(format!("{}/lib.rs", parts[0]));
818            candidates.push(format!("{}/src/lib.rs", parts[0]));
819        }
820        
821        // Find first matching candidate
822        for candidate in &candidates {
823            if file_map.contains_key(candidate.as_str()) {
824                return Some(candidate.clone());
825            }
826        }
827        
828        // Fuzzy matching
829        self.fuzzy_match_import(&parts, file_map)
830    }
831    
832    /// Resolve Go import
833    fn resolve_go_import<T>(
834        &self,
835        import_str: &str,
836        _current_path: &Path,
837        file_map: &HashMap<&str, &T>,
838    ) -> Option<String>
839    where
840        T: ScanResult,
841    {
842        let cleaned_import = import_str.trim().trim_matches('"');
843        
844        // Skip standard library
845        if self.config.exclude_stdlib_imports && !cleaned_import.contains('.') {
846            return None;
847        }
848        
849        let parts: Vec<&str> = cleaned_import.split('/').collect();
850        
851        // Try various Go file patterns
852        let mut candidates = Vec::new();
853        
854        // Package directory
855        candidates.push(format!("{}.go", parts.last()?));
856        candidates.push(format!("{}/main.go", cleaned_import));
857        candidates.push(format!("{}/{}.go", cleaned_import, parts.last()?));
858        
859        for candidate in &candidates {
860            if file_map.contains_key(candidate.as_str()) {
861                return Some(candidate.clone());
862            }
863        }
864        
865        self.fuzzy_match_import(&parts, file_map)
866    }
867    
868    /// Generic import resolution
869    fn resolve_generic_import<T>(
870        &self,
871        import_str: &str,
872        _current_path: &Path,
873        file_map: &HashMap<&str, &T>,
874    ) -> Option<String>
875    where
876        T: ScanResult,
877    {
878        let cleaned_import = import_str.trim();
879        let parts: Vec<&str> = cleaned_import.split(&['/', '.', ':']).collect();
880        self.fuzzy_match_import(&parts, file_map)
881    }
882    
883    /// Fuzzy matching for import resolution
884    fn fuzzy_match_import<T>(
885        &self,
886        import_parts: &[&str],
887        file_map: &HashMap<&str, &T>,
888    ) -> Option<String>
889    where
890        T: ScanResult,
891    {
892        if import_parts.is_empty() {
893            return None;
894        }
895        
896        let last_part = import_parts.last()?.to_lowercase();
897        
898        // Find files that contain the import parts
899        for &file_path in file_map.keys() {
900            let file_path_lower = file_path.to_lowercase();
901            
902            // Check if filename matches
903            if let Some(file_stem) = Path::new(file_path).file_stem() {
904                if let Some(stem_str) = file_stem.to_str() {
905                    if stem_str.to_lowercase() == last_part {
906                        return Some(file_path.to_string());
907                    }
908                }
909            }
910            
911            // Check if path contains import parts
912            if import_parts.iter().all(|&part| file_path_lower.contains(&part.to_lowercase())) {
913                return Some(file_path.to_string());
914            }
915        }
916        
917        None
918    }
919    
920    /// Check if import is Python standard library
921    fn is_python_stdlib(&self, import_str: &str) -> bool {
922        let stdlib_modules = [
923            "os", "sys", "re", "json", "collections", "itertools", "functools",
924            "typing", "datetime", "math", "random", "string", "pathlib", "io",
925            "csv", "xml", "html", "urllib", "http", "email", "logging", "unittest",
926            "asyncio", "concurrent", "multiprocessing", "threading", "subprocess",
927        ];
928        
929        let first_part = import_str.split('.').next().unwrap_or(import_str);
930        stdlib_modules.contains(&first_part)
931    }
932    
933    /// Check if import is Rust standard library
934    fn is_rust_stdlib(&self, import_str: &str) -> bool {
935        import_str.starts_with("std::") || 
936        import_str.starts_with("core::") ||
937        import_str.starts_with("alloc::")
938    }
939}
940
941/// Utility functions for centrality results analysis
942impl CentralityResults {
943    /// Get files sorted by centrality score (descending)
944    pub fn top_files_by_centrality(&self, k: usize) -> Vec<(String, f64)> {
945        let mut scored_files: Vec<_> = self.pagerank_scores.iter()
946            .map(|(path, &score)| (path.clone(), score))
947            .collect();
948        
949        scored_files.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
950        scored_files.into_iter().take(k).collect()
951    }
952    
953    /// Get summary statistics about centrality computation
954    pub fn summary(&self) -> String {
955        format!(
956            "Centrality Analysis Summary:\n\
957             - Files with centrality scores: {}\n\
958             - PageRank iterations: {} (converged: {})\n\
959             - Graph: {} nodes, {} edges (density: {:.4})\n\
960             - Import resolution: {:.1}% ({}/{})\n\
961             - Top languages: {}\n\
962             - Computation time: {}ms\n\
963             - Integration weight: {:.2}",
964            self.pagerank_scores.len(),
965            self.pagerank_details.iterations_converged,
966            self.pagerank_details.converged(),
967            self.graph_analysis.basic_stats.total_nodes,
968            self.graph_analysis.basic_stats.total_edges,
969            self.graph_analysis.basic_stats.graph_density,
970            self.import_stats.resolution_rate * 100.0,
971            self.import_stats.imports_resolved,
972            self.import_stats.imports_detected,
973            self.import_stats.language_breakdown.iter()
974                .max_by_key(|(_, &count)| count)
975                .map(|(lang, count)| format!("{} ({})", lang, count))
976                .unwrap_or_else(|| "None".to_string()),
977            self.integration_metadata.computation_time_ms,
978            self.integration_metadata.centrality_weight,
979        )
980    }
981}
982
983#[cfg(test)]
984mod tests {
985    use super::*;
986    use scribe_analysis::heuristics::DocumentAnalysis;
987    
988    // Mock scan result for testing
989    #[derive(Debug, Clone)]
990    struct MockScanResult {
991        path: String,
992        relative_path: String,
993        depth: usize,
994        imports: Option<Vec<String>>,
995        is_docs: bool,
996        is_readme: bool,
997        is_test: bool,
998        is_entrypoint: bool,
999        has_examples: bool,
1000        priority_boost: f64,
1001        churn_score: f64,
1002        centrality_in: f64,
1003        doc_analysis: Option<DocumentAnalysis>,
1004    }
1005    
1006    impl MockScanResult {
1007        fn new(path: &str) -> Self {
1008            Self {
1009                path: path.to_string(),
1010                relative_path: path.to_string(),
1011                depth: path.matches('/').count(),
1012                imports: None,
1013                is_docs: path.contains("doc") || path.ends_with(".md"),
1014                is_readme: path.to_lowercase().contains("readme"),
1015                is_test: path.contains("test"),
1016                is_entrypoint: path.contains("main") || path.contains("index"),
1017                has_examples: path.contains("example"),
1018                priority_boost: 0.0,
1019                churn_score: 0.5,
1020                centrality_in: 0.0,
1021                doc_analysis: Some(DocumentAnalysis::new()),
1022            }
1023        }
1024        
1025        fn with_imports(mut self, imports: Vec<String>) -> Self {
1026            self.imports = Some(imports);
1027            self
1028        }
1029    }
1030    
1031    impl ScanResult for MockScanResult {
1032        fn path(&self) -> &str { &self.path }
1033        fn relative_path(&self) -> &str { &self.relative_path }
1034        fn depth(&self) -> usize { self.depth }
1035        fn is_docs(&self) -> bool { self.is_docs }
1036        fn is_readme(&self) -> bool { self.is_readme }
1037        fn is_test(&self) -> bool { self.is_test }
1038        fn is_entrypoint(&self) -> bool { self.is_entrypoint }
1039        fn has_examples(&self) -> bool { self.has_examples }
1040        fn priority_boost(&self) -> f64 { self.priority_boost }
1041        fn churn_score(&self) -> f64 { self.churn_score }
1042        fn centrality_in(&self) -> f64 { self.centrality_in }
1043        fn imports(&self) -> Option<&[String]> { self.imports.as_deref() }
1044        fn doc_analysis(&self) -> Option<&DocumentAnalysis> { self.doc_analysis.as_ref() }
1045    }
1046    
1047    #[test]
1048    fn test_centrality_calculator_creation() {
1049        let calculator = CentralityCalculator::new();
1050        assert!(calculator.is_ok());
1051        
1052        let large_calc = CentralityCalculator::for_large_codebases();
1053        assert!(large_calc.is_ok());
1054    }
1055    
1056    #[test]
1057    fn test_import_detection() {
1058        let detector = ImportDetector::with_config(ImportResolutionConfig::default());
1059        
1060        // Test language detection
1061        assert_eq!(detector.detect_language("main.py"), Some("python".to_string()));
1062        assert_eq!(detector.detect_language("app.js"), Some("javascript".to_string()));
1063        assert_eq!(detector.detect_language("lib.rs"), Some("rust".to_string()));
1064        
1065        // Test Python stdlib detection
1066        assert!(detector.is_python_stdlib("os"));
1067        assert!(detector.is_python_stdlib("sys.path"));
1068        assert!(!detector.is_python_stdlib("custom_module"));
1069        
1070        // Test Rust stdlib detection
1071        assert!(detector.is_rust_stdlib("std::collections::HashMap"));
1072        assert!(detector.is_rust_stdlib("core::fmt"));
1073        assert!(!detector.is_rust_stdlib("serde::Deserialize"));
1074    }
1075    
1076    #[test]
1077    fn test_centrality_calculation() {
1078        let calculator = CentralityCalculator::new().unwrap();
1079        
1080        let scan_results = vec![
1081            MockScanResult::new("main.py").with_imports(vec!["utils".to_string(), "config".to_string()]),
1082            MockScanResult::new("utils.py").with_imports(vec!["config".to_string()]),
1083            MockScanResult::new("config.py"),
1084            MockScanResult::new("test.py").with_imports(vec!["main".to_string()]),
1085        ];
1086        
1087        let results = calculator.calculate_centrality(&scan_results).unwrap();
1088        
1089        // Basic checks
1090        assert!(!results.pagerank_scores.is_empty());
1091        assert!(results.integration_metadata.integration_successful);
1092        assert_eq!(results.integration_metadata.files_with_centrality, results.pagerank_scores.len());
1093        
1094        // config.py should have high centrality (imported by main.py and utils.py)
1095        let config_score = results.pagerank_scores.get("config.py");
1096        assert!(config_score.is_some());
1097        
1098        println!("Centrality scores:");
1099        for (file, score) in &results.pagerank_scores {
1100            println!("  {}: {:.6}", file, score);
1101        }
1102        
1103        println!("\n{}", results.summary());
1104    }
1105    
1106    #[test]
1107    fn test_heuristics_integration() {
1108        let calculator = CentralityCalculator::new().unwrap();
1109        
1110        let scan_results = vec![
1111            MockScanResult::new("main.py").with_imports(vec!["utils".to_string()]),
1112            MockScanResult::new("utils.py"),
1113        ];
1114        
1115        let centrality_results = calculator.calculate_centrality(&scan_results).unwrap();
1116        
1117        // Mock heuristic scores
1118        let mut heuristic_scores = HashMap::new();
1119        heuristic_scores.insert("main.py".to_string(), 0.8);
1120        heuristic_scores.insert("utils.py".to_string(), 0.6);
1121        
1122        let integrated_scores = calculator.integrate_with_heuristics(
1123            &centrality_results, 
1124            &heuristic_scores
1125        ).unwrap();
1126        
1127        assert!(!integrated_scores.is_empty());
1128        
1129        // Integrated scores should be different from original heuristic scores
1130        for (file, integrated_score) in &integrated_scores {
1131            let original_score = heuristic_scores.get(file).unwrap();
1132            println!("File {}: heuristic={:.3}, integrated={:.3}", 
1133                     file, original_score, integrated_score);
1134        }
1135    }
1136    
1137    #[test]
1138    fn test_normalization_methods() {
1139        let calculator = CentralityCalculator::new().unwrap();
1140        
1141        let centrality_scores = vec![
1142            ("file1".to_string(), 0.1),
1143            ("file2".to_string(), 0.3),
1144            ("file3".to_string(), 0.6),
1145            ("file4".to_string(), 1.0),
1146        ].into_iter().collect();
1147        
1148        let heuristic_scores = vec![
1149            ("file1".to_string(), 0.5),
1150            ("file2".to_string(), 0.7),
1151            ("file3".to_string(), 0.9),
1152            ("file4".to_string(), 1.2),
1153        ].into_iter().collect();
1154        
1155        // Test min-max normalization
1156        let normalized = calculator.normalize_min_max(&centrality_scores, &heuristic_scores).unwrap();
1157        assert!(!normalized.is_empty());
1158        
1159        // Test z-score normalization
1160        let z_normalized = calculator.normalize_z_score(&centrality_scores).unwrap();
1161        assert!(!z_normalized.is_empty());
1162        
1163        // Test rank normalization
1164        let rank_normalized = calculator.normalize_rank(&centrality_scores).unwrap();
1165        assert!(!rank_normalized.is_empty());
1166        
1167        println!("Original scores: {:?}", centrality_scores);
1168        println!("Min-max normalized: {:?}", normalized);
1169        println!("Z-score normalized: {:?}", z_normalized);
1170        println!("Rank normalized: {:?}", rank_normalized);
1171    }
1172    
1173    #[test]
1174    fn test_import_resolution() {
1175        let detector = ImportDetector::with_config(ImportResolutionConfig::default());
1176        
1177        // Create mock file map
1178        let scan_results = vec![
1179            MockScanResult::new("src/main.py"),
1180            MockScanResult::new("src/utils.py"),
1181            MockScanResult::new("src/config.py"),
1182            MockScanResult::new("tests/test_main.py"),
1183        ];
1184        
1185        let file_map: HashMap<&str, &MockScanResult> = scan_results.iter()
1186            .map(|result| (result.path(), result))
1187            .collect();
1188        
1189        // Test Python import resolution
1190        let resolved = detector.resolve_import(
1191            "utils", 
1192            "src/main.py", 
1193            &file_map
1194        );
1195        assert!(resolved.is_some());
1196        
1197        // Test module path resolution
1198        let resolved_config = detector.resolve_import(
1199            "src.config",
1200            "src/main.py",
1201            &file_map
1202        );
1203        // Should resolve to src/config.py through fuzzy matching
1204        assert!(resolved_config.is_some());
1205        
1206        println!("Resolved imports:");
1207        if let Some(path) = resolved {
1208            println!("  utils -> {}", path);
1209        }
1210        if let Some(path) = resolved_config {
1211            println!("  src.config -> {}", path);
1212        }
1213    }
1214    
1215    #[test]
1216    fn test_entrypoint_detection() {
1217        let calculator = CentralityCalculator::new().unwrap();
1218        
1219        assert!(calculator.is_entrypoint_file("main.py"));
1220        assert!(calculator.is_entrypoint_file("src/main.rs"));
1221        assert!(calculator.is_entrypoint_file("index.js"));
1222        assert!(calculator.is_entrypoint_file("app.py"));
1223        assert!(calculator.is_entrypoint_file("lib.rs"));
1224        assert!(calculator.is_entrypoint_file("__init__.py"));
1225        
1226        assert!(!calculator.is_entrypoint_file("utils.py"));
1227        assert!(!calculator.is_entrypoint_file("config.rs"));
1228        assert!(!calculator.is_entrypoint_file("helper.js"));
1229    }
1230    
1231    #[test]
1232    fn test_top_files_by_centrality() {
1233        let mut pagerank_scores = HashMap::new();
1234        pagerank_scores.insert("file1.py".to_string(), 0.4);
1235        pagerank_scores.insert("file2.py".to_string(), 0.6);
1236        pagerank_scores.insert("file3.py".to_string(), 0.2);
1237        pagerank_scores.insert("file4.py".to_string(), 0.8);
1238        
1239        let results = CentralityResults {
1240            pagerank_scores,
1241            graph_analysis: GraphAnalysisResults {
1242                basic_stats: crate::graph::GraphStatistics::empty(),
1243                degree_distribution: Default::default(),
1244                connectivity: Default::default(),
1245                structural_patterns: Default::default(),
1246                import_insights: Default::default(),
1247                performance_profile: Default::default(),
1248                analysis_metadata: Default::default(),
1249            },
1250            pagerank_details: PageRankResults {
1251                scores: HashMap::new(),
1252                iterations_converged: 10,
1253                convergence_epsilon: 1e-6,
1254                graph_stats: crate::graph::GraphStatistics::empty(),
1255                parameters: PageRankConfig::default(),
1256                performance_metrics: Default::default(),
1257            },
1258            import_stats: ImportDetectionStats {
1259                files_processed: 4,
1260                imports_detected: 0,
1261                imports_resolved: 0,
1262                resolution_rate: 0.0,
1263                language_breakdown: HashMap::new(),
1264                import_patterns: HashMap::new(),
1265            },
1266            integration_metadata: IntegrationMetadata {
1267                timestamp: chrono::Utc::now(),
1268                computation_time_ms: 100,
1269                integration_successful: true,
1270                centrality_weight: 0.15,
1271                files_with_centrality: 4,
1272                config: CentralityConfig::default(),
1273            },
1274        };
1275        
1276        let top_files = results.top_files_by_centrality(2);
1277        assert_eq!(top_files.len(), 2);
1278        assert_eq!(top_files[0].0, "file4.py");
1279        assert_eq!(top_files[0].1, 0.8);
1280        assert_eq!(top_files[1].0, "file2.py");
1281        assert_eq!(top_files[1].1, 0.6);
1282    }
1283}
scribe_graph/centrality.rs

scribe_graph/
centrality.rs