scribe_graph/
centrality.rs

1//! # Centrality Calculator with Heuristics Integration
2//!
3//! Main interface for PageRank centrality calculation and integration with the existing
4//! FastPath heuristic scoring system. This module provides the high-level API for:
5//!
6//! ## Key Features
7//! - **PageRank Centrality Computation**: Research-grade algorithm with convergence detection
8//! - **Import Graph Construction**: Builds dependency graphs from file scan results  
9//! - **Heuristics Integration**: Seamless integration with V2 scoring system
10//! - **Performance Optimization**: Efficient computation for large codebases
11//! - **Multi-language Support**: Import detection across programming languages
12//! - **Comprehensive Analysis**: Full graph statistics and structural insights
13//!
14//! ## Integration with FastPath Heuristics
15//! The centrality scores are integrated into the heuristic scoring formula:
16//! ```text
17//! final_score = Σ(weight_i × normalized_score_i) + priority_boost + template_boost
18//! ```
19//! Where `centrality_score` becomes a weighted component when V2 features are enabled.
20
21use rayon::prelude::*;
22use scribe_analysis::heuristics::ScanResult;
23use scribe_core::Result;
24use serde::{Deserialize, Serialize};
25use std::collections::{HashMap, HashSet};
26use std::path::Path;
27
28use crate::graph::{DependencyGraph, NodeId};
29use crate::pagerank::{PageRankComputer, PageRankConfig, PageRankResults};
30use crate::statistics::{GraphAnalysisResults, GraphStatisticsAnalyzer};
31
32/// Complete centrality calculation results with comprehensive metadata
33#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
34pub struct CentralityResults {
35    /// PageRank scores (file path -> centrality score)
36    pub pagerank_scores: HashMap<NodeId, f64>,
37
38    /// Graph analysis results
39    pub graph_analysis: GraphAnalysisResults,
40
41    /// PageRank computation details
42    pub pagerank_details: PageRankResults,
43
44    /// Import detection statistics
45    pub import_stats: ImportDetectionStats,
46
47    /// Integration metadata
48    pub integration_metadata: IntegrationMetadata,
49}
50
51/// Statistics about import detection and graph construction
52#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
53pub struct ImportDetectionStats {
54    /// Number of files processed for import detection
55    pub files_processed: usize,
56
57    /// Number of import relationships detected
58    pub imports_detected: usize,
59
60    /// Number of resolved imports (mapped to actual files)
61    pub imports_resolved: usize,
62
63    /// Import resolution success rate
64    pub resolution_rate: f64,
65
66    /// Language breakdown of processed files
67    pub language_breakdown: HashMap<String, usize>,
68
69    /// Import patterns by language
70    pub import_patterns: HashMap<String, ImportPatternStats>,
71}
72
73/// Import pattern statistics for a specific language
74#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
75pub struct ImportPatternStats {
76    /// Total imports found
77    pub total_imports: usize,
78
79    /// Relative imports (./,../)
80    pub relative_imports: usize,
81
82    /// Absolute imports
83    pub absolute_imports: usize,
84
85    /// Standard library imports
86    pub stdlib_imports: usize,
87
88    /// Third-party imports
89    pub third_party_imports: usize,
90}
91
92/// Metadata about centrality-heuristics integration
93#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
94pub struct IntegrationMetadata {
95    /// When the analysis was performed
96    pub timestamp: chrono::DateTime<chrono::Utc>,
97
98    /// Total computation time
99    pub computation_time_ms: u64,
100
101    /// Whether centrality was successfully integrated
102    pub integration_successful: bool,
103
104    /// Centrality weight used in integration
105    pub centrality_weight: f64,
106
107    /// Number of files with centrality scores
108    pub files_with_centrality: usize,
109
110    /// Configuration used
111    pub config: CentralityConfig,
112}
113
114/// Configuration for centrality calculation
115#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
116pub struct CentralityConfig {
117    /// PageRank algorithm configuration
118    pub pagerank_config: PageRankConfig,
119
120    /// Whether to perform expensive graph analysis
121    pub analyze_graph_structure: bool,
122
123    /// Import resolution configuration
124    pub import_resolution: ImportResolutionConfig,
125
126    /// Integration parameters
127    pub integration: IntegrationConfig,
128}
129
130/// Configuration for import resolution
131#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
132pub struct ImportResolutionConfig {
133    /// Maximum search depth for import resolution
134    pub max_search_depth: usize,
135
136    /// Whether to resolve relative imports
137    pub resolve_relative_imports: bool,
138
139    /// Whether to resolve absolute imports
140    pub resolve_absolute_imports: bool,
141
142    /// Whether to exclude standard library imports
143    pub exclude_stdlib_imports: bool,
144
145    /// Custom import path mappings
146    pub path_mappings: HashMap<String, String>,
147}
148
149/// Configuration for heuristics integration
150#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
151pub struct IntegrationConfig {
152    /// Weight for centrality in final score
153    pub centrality_weight: f64,
154
155    /// Normalization method for centrality scores
156    pub normalization_method: NormalizationMethod,
157
158    /// Minimum centrality score threshold
159    pub min_centrality_threshold: f64,
160
161    /// Whether to boost entrypoint centrality
162    pub boost_entrypoints: bool,
163
164    /// Entrypoint boost factor
165    pub entrypoint_boost_factor: f64,
166}
167
168/// Methods for normalizing centrality scores
169#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
170pub enum NormalizationMethod {
171    /// Normalize to \[0,1\] range
172    MinMax,
173    /// Z-score normalization
174    ZScore,
175    /// Rank-based normalization
176    Rank,
177    /// No normalization
178    None,
179}
180
181impl Default for CentralityConfig {
182    fn default() -> Self {
183        Self {
184            pagerank_config: PageRankConfig::for_code_analysis(),
185            analyze_graph_structure: true,
186            import_resolution: ImportResolutionConfig::default(),
187            integration: IntegrationConfig::default(),
188        }
189    }
190}
191
192impl Default for ImportResolutionConfig {
193    fn default() -> Self {
194        Self {
195            max_search_depth: 3,
196            resolve_relative_imports: true,
197            resolve_absolute_imports: true,
198            exclude_stdlib_imports: true,
199            path_mappings: HashMap::new(),
200        }
201    }
202}
203
204impl Default for IntegrationConfig {
205    fn default() -> Self {
206        Self {
207            centrality_weight: 0.15, // 15% weight in V2 scoring
208            normalization_method: NormalizationMethod::MinMax,
209            min_centrality_threshold: 1e-6,
210            boost_entrypoints: true,
211            entrypoint_boost_factor: 1.5,
212        }
213    }
214}
215
216/// Main centrality calculator with heuristics integration
217#[derive(Debug)]
218pub struct CentralityCalculator {
219    /// Configuration
220    config: CentralityConfig,
221
222    /// PageRank computer
223    pagerank_computer: PageRankComputer,
224
225    /// Graph statistics analyzer
226    stats_analyzer: GraphStatisticsAnalyzer,
227
228    /// Import detector
229    import_detector: ImportDetector,
230}
231
232impl CentralityCalculator {
233    /// Create a new centrality calculator with default configuration
234    pub fn new() -> Result<Self> {
235        let config = CentralityConfig::default();
236        Self::with_config(config)
237    }
238
239    /// Create with custom configuration
240    pub fn with_config(config: CentralityConfig) -> Result<Self> {
241        let pagerank_computer = PageRankComputer::with_config(config.pagerank_config.clone())?;
242
243        let stats_analyzer = if config.analyze_graph_structure {
244            GraphStatisticsAnalyzer::new()
245        } else {
246            GraphStatisticsAnalyzer::for_large_graphs()
247        };
248
249        let import_detector = ImportDetector::with_config(config.import_resolution.clone());
250
251        Ok(Self {
252            config,
253            pagerank_computer,
254            stats_analyzer,
255            import_detector,
256        })
257    }
258
259    /// Create optimized for large codebases
260    pub fn for_large_codebases() -> Result<Self> {
261        let config = CentralityConfig {
262            pagerank_config: PageRankConfig::for_large_codebases(),
263            analyze_graph_structure: false,
264            ..CentralityConfig::default()
265        };
266        Self::with_config(config)
267    }
268
269    /// Calculate centrality scores for a collection of scan results
270    pub fn calculate_centrality<T>(&self, scan_results: &[T]) -> Result<CentralityResults>
271    where
272        T: ScanResult + Sync,
273    {
274        let start_time = std::time::Instant::now();
275
276        // Build dependency graph from scan results
277        let (graph, import_stats) = self.build_dependency_graph(scan_results)?;
278
279        // Compute PageRank centrality
280        let pagerank_results = self.pagerank_computer.compute(&graph)?;
281
282        // Perform graph analysis if enabled
283        let graph_analysis = if self.config.analyze_graph_structure {
284            self.stats_analyzer.analyze(&graph)?
285        } else {
286            // Create minimal analysis for large graphs
287            self.create_minimal_analysis(&graph)?
288        };
289
290        // Create integration metadata
291        let computation_time = start_time.elapsed().as_millis() as u64;
292        let integration_metadata = IntegrationMetadata {
293            timestamp: chrono::Utc::now(),
294            computation_time_ms: computation_time,
295            integration_successful: true,
296            centrality_weight: self.config.integration.centrality_weight,
297            files_with_centrality: pagerank_results.scores.len(),
298            config: self.config.clone(),
299        };
300
301        Ok(CentralityResults {
302            pagerank_scores: pagerank_results.scores.clone(),
303            graph_analysis,
304            pagerank_details: pagerank_results,
305            import_stats,
306            integration_metadata,
307        })
308    }
309
310    /// Integrate centrality scores with existing heuristic scores
311    pub fn integrate_with_heuristics(
312        &self,
313        centrality_results: &CentralityResults,
314        heuristic_scores: &HashMap<String, f64>,
315    ) -> Result<HashMap<String, f64>> {
316        let normalized_centrality = self
317            .normalize_centrality_scores(&centrality_results.pagerank_scores, heuristic_scores)?;
318
319        let mut integrated_scores = HashMap::new();
320        let centrality_weight = self.config.integration.centrality_weight;
321        let heuristic_weight = 1.0 - centrality_weight;
322
323        // Combine heuristic and centrality scores
324        for (file_path, heuristic_score) in heuristic_scores {
325            let centrality_score = normalized_centrality.get(file_path).copied().unwrap_or(0.0);
326
327            // Apply entrypoint boost if configured
328            let boosted_centrality = if self.config.integration.boost_entrypoints
329                && self.is_entrypoint_file(file_path)
330            {
331                centrality_score * self.config.integration.entrypoint_boost_factor
332            } else {
333                centrality_score
334            };
335
336            let integrated_score =
337                heuristic_weight * heuristic_score + centrality_weight * boosted_centrality;
338
339            integrated_scores.insert(file_path.clone(), integrated_score);
340        }
341
342        // Add centrality-only files (not in heuristic scores)
343        for (file_path, centrality_score) in &normalized_centrality {
344            if !integrated_scores.contains_key(file_path) {
345                let boosted_centrality = if self.config.integration.boost_entrypoints
346                    && self.is_entrypoint_file(file_path)
347                {
348                    centrality_score * self.config.integration.entrypoint_boost_factor
349                } else {
350                    *centrality_score
351                };
352
353                integrated_scores.insert(file_path.clone(), centrality_weight * boosted_centrality);
354            }
355        }
356
357        Ok(integrated_scores)
358    }
359
360    /// Build dependency graph from scan results
361    fn build_dependency_graph<T>(
362        &self,
363        scan_results: &[T],
364    ) -> Result<(DependencyGraph, ImportDetectionStats)>
365    where
366        T: ScanResult + Sync,
367    {
368        let mut graph = DependencyGraph::with_capacity(scan_results.len());
369
370        // Create optimized import detector with pre-computed lookup maps
371        let mut optimized_detector =
372            ImportDetector::with_file_index(self.import_detector.config.clone(), scan_results);
373
374        // Add all files as nodes first
375        for result in scan_results {
376            graph.add_node(result.path().to_string())?;
377        }
378
379        // Detect imports and build edges using optimized detector
380        let import_stats = if self.config.pagerank_config.use_parallel {
381            self.build_edges_parallel_optimized(&mut graph, scan_results, &optimized_detector)?
382        } else {
383            self.build_edges_sequential_optimized(&mut graph, scan_results, &optimized_detector)?
384        };
385
386        Ok((graph, import_stats))
387    }
388
389    /// Build graph edges sequentially - OPTIMIZED
390    fn build_edges_sequential_optimized<T>(
391        &self,
392        graph: &mut DependencyGraph,
393        scan_results: &[T],
394        optimized_detector: &ImportDetector,
395    ) -> Result<ImportDetectionStats>
396    where
397        T: ScanResult,
398    {
399        let mut stats = ImportDetectionStats {
400            files_processed: 0,
401            imports_detected: 0,
402            imports_resolved: 0,
403            resolution_rate: 0.0,
404            language_breakdown: HashMap::new(),
405            import_patterns: HashMap::new(),
406        };
407
408        // Create file path lookup for resolution
409        let file_path_map: HashMap<&str, &T> = scan_results
410            .iter()
411            .map(|result| (result.path(), result))
412            .collect();
413
414        for result in scan_results {
415            stats.files_processed += 1;
416
417            // Track language
418            if let Some(lang) = optimized_detector.detect_language(result.path()) {
419                *stats.language_breakdown.entry(lang.clone()).or_insert(0) += 1;
420            }
421
422            // Extract and resolve imports using optimized detector
423            if let Some(imports) = result.imports() {
424                stats.imports_detected += imports.len();
425
426                for import_str in imports {
427                    if let Some(resolved_path) =
428                        optimized_detector.resolve_import(import_str, result.path(), &file_path_map)
429                    {
430                        graph.add_edge(result.path().to_string(), resolved_path)?;
431                        stats.imports_resolved += 1;
432                    }
433                }
434            }
435        }
436
437        stats.resolution_rate = if stats.imports_detected > 0 {
438            stats.imports_resolved as f64 / stats.imports_detected as f64
439        } else {
440            0.0
441        };
442
443        Ok(stats)
444    }
445
446    /// Build graph edges sequentially - LEGACY
447    fn build_edges_sequential<T>(
448        &self,
449        graph: &mut DependencyGraph,
450        scan_results: &[T],
451    ) -> Result<ImportDetectionStats>
452    where
453        T: ScanResult,
454    {
455        let optimized_detector =
456            ImportDetector::with_file_index(self.import_detector.config.clone(), scan_results);
457        self.build_edges_sequential_optimized(graph, scan_results, &optimized_detector)
458    }
459
460    /// Build graph edges in parallel - OPTIMIZED
461    fn build_edges_parallel_optimized<T>(
462        &self,
463        graph: &mut DependencyGraph,
464        scan_results: &[T],
465        optimized_detector: &ImportDetector,
466    ) -> Result<ImportDetectionStats>
467    where
468        T: ScanResult + Sync,
469    {
470        // Create file path lookup
471        let file_path_map: HashMap<&str, &T> = scan_results
472            .iter()
473            .map(|result| (result.path(), result))
474            .collect();
475
476        // Process imports in parallel using optimized detector
477        let import_edges: Vec<_> = scan_results
478            .par_iter()
479            .flat_map(|result| {
480                let mut edges = Vec::new();
481
482                if let Some(imports) = result.imports() {
483                    for import_str in imports {
484                        if let Some(resolved_path) = optimized_detector.resolve_import(
485                            import_str,
486                            result.path(),
487                            &file_path_map,
488                        ) {
489                            edges.push((result.path().to_string(), resolved_path));
490                        }
491                    }
492                }
493
494                edges
495            })
496            .collect();
497
498        // Add edges to graph
499        for (from, to) in &import_edges {
500            graph.add_edge(from.clone(), to.clone())?;
501        }
502
503        // Calculate statistics
504        let total_imports: usize = scan_results
505            .iter()
506            .map(|result| result.imports().map_or(0, |imports| imports.len()))
507            .sum();
508
509        let language_breakdown: HashMap<String, usize> = scan_results
510            .iter()
511            .filter_map(|result| {
512                optimized_detector
513                    .detect_language(result.path())
514                    .map(|lang| (lang, 1))
515            })
516            .fold(HashMap::new(), |mut acc, (lang, count)| {
517                *acc.entry(lang).or_insert(0) += count;
518                acc
519            });
520
521        let stats = ImportDetectionStats {
522            files_processed: scan_results.len(),
523            imports_detected: total_imports,
524            imports_resolved: import_edges.len(),
525            resolution_rate: if total_imports > 0 {
526                import_edges.len() as f64 / total_imports as f64
527            } else {
528                0.0
529            },
530            language_breakdown,
531            import_patterns: HashMap::new(), // TODO: Implement detailed pattern analysis
532        };
533
534        Ok(stats)
535    }
536
537    /// Build graph edges in parallel - LEGACY
538    fn build_edges_parallel<T>(
539        &self,
540        graph: &mut DependencyGraph,
541        scan_results: &[T],
542    ) -> Result<ImportDetectionStats>
543    where
544        T: ScanResult + Sync,
545    {
546        let optimized_detector =
547            ImportDetector::with_file_index(self.import_detector.config.clone(), scan_results);
548        self.build_edges_parallel_optimized(graph, scan_results, &optimized_detector)
549    }
550
551    /// Normalize centrality scores for integration with heuristics
552    fn normalize_centrality_scores(
553        &self,
554        centrality_scores: &HashMap<String, f64>,
555        heuristic_scores: &HashMap<String, f64>,
556    ) -> Result<HashMap<String, f64>> {
557        if centrality_scores.is_empty() {
558            return Ok(HashMap::new());
559        }
560
561        match self.config.integration.normalization_method {
562            NormalizationMethod::MinMax => {
563                self.normalize_min_max(centrality_scores, heuristic_scores)
564            }
565            NormalizationMethod::ZScore => self.normalize_z_score(centrality_scores),
566            NormalizationMethod::Rank => self.normalize_rank(centrality_scores),
567            NormalizationMethod::None => Ok(centrality_scores.clone()),
568        }
569    }
570
571    /// Min-max normalization to match heuristic score range
572    fn normalize_min_max(
573        &self,
574        centrality_scores: &HashMap<String, f64>,
575        heuristic_scores: &HashMap<String, f64>,
576    ) -> Result<HashMap<String, f64>> {
577        let centrality_values: Vec<f64> = centrality_scores.values().copied().collect();
578        let min_centrality = centrality_values
579            .iter()
580            .fold(f64::INFINITY, |a, &b| a.min(b));
581        let max_centrality = centrality_values
582            .iter()
583            .fold(f64::NEG_INFINITY, |a, &b| a.max(b));
584
585        // Target range based on heuristic scores
586        let heuristic_values: Vec<f64> = heuristic_scores.values().copied().collect();
587        let max_heuristic = if heuristic_values.is_empty() {
588            1.0
589        } else {
590            heuristic_values
591                .iter()
592                .fold(f64::NEG_INFINITY, |a, &b| a.max(b))
593        };
594
595        let mut normalized = HashMap::new();
596
597        if (max_centrality - min_centrality).abs() < f64::EPSILON {
598            // All scores are the same
599            for (path, _) in centrality_scores {
600                normalized.insert(path.clone(), max_heuristic * 0.5); // Use half of max heuristic
601            }
602        } else {
603            for (path, &score) in centrality_scores {
604                let normalized_score =
605                    ((score - min_centrality) / (max_centrality - min_centrality)) * max_heuristic;
606                if normalized_score >= self.config.integration.min_centrality_threshold {
607                    normalized.insert(path.clone(), normalized_score);
608                }
609            }
610        }
611
612        Ok(normalized)
613    }
614
615    /// Z-score normalization
616    fn normalize_z_score(
617        &self,
618        centrality_scores: &HashMap<String, f64>,
619    ) -> Result<HashMap<String, f64>> {
620        let values: Vec<f64> = centrality_scores.values().copied().collect();
621        let mean = values.iter().sum::<f64>() / values.len() as f64;
622        let variance =
623            values.iter().map(|&x| (x - mean).powi(2)).sum::<f64>() / values.len() as f64;
624        let std_dev = variance.sqrt();
625
626        let mut normalized = HashMap::new();
627
628        if std_dev > f64::EPSILON {
629            for (path, &score) in centrality_scores {
630                let z_score = (score - mean) / std_dev;
631                // Shift and scale to positive range
632                let normalized_score = (z_score + 3.0) / 6.0; // Roughly [0,1] for most values
633                if normalized_score >= self.config.integration.min_centrality_threshold {
634                    normalized.insert(path.clone(), normalized_score);
635                }
636            }
637        } else {
638            // All scores are the same
639            for (path, _) in centrality_scores {
640                normalized.insert(path.clone(), 0.5);
641            }
642        }
643
644        Ok(normalized)
645    }
646
647    /// Rank-based normalization
648    fn normalize_rank(
649        &self,
650        centrality_scores: &HashMap<String, f64>,
651    ) -> Result<HashMap<String, f64>> {
652        let mut scored_files: Vec<_> = centrality_scores
653            .iter()
654            .map(|(path, &score)| (path.clone(), score))
655            .collect();
656
657        scored_files.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
658
659        let mut normalized = HashMap::new();
660        let total_files = scored_files.len();
661
662        for (rank, (path, _)) in scored_files.into_iter().enumerate() {
663            let normalized_score = 1.0 - (rank as f64 / total_files as f64);
664            if normalized_score >= self.config.integration.min_centrality_threshold {
665                normalized.insert(path, normalized_score);
666            }
667        }
668
669        Ok(normalized)
670    }
671
672    /// Create minimal analysis for large graphs (performance optimization)
673    fn create_minimal_analysis(&self, graph: &DependencyGraph) -> Result<GraphAnalysisResults> {
674        // Use a simplified analyzer for large graphs
675        let minimal_analyzer = GraphStatisticsAnalyzer::for_large_graphs();
676        minimal_analyzer.analyze(graph)
677    }
678
679    /// Check if a file is an entrypoint
680    fn is_entrypoint_file(&self, file_path: &str) -> bool {
681        let path = Path::new(file_path);
682        let file_name = path
683            .file_name()
684            .and_then(|name| name.to_str())
685            .unwrap_or("")
686            .to_lowercase();
687
688        matches!(
689            file_name.as_str(),
690            "main.py"
691                | "main.rs"
692                | "main.go"
693                | "main.js"
694                | "main.ts"
695                | "index.py"
696                | "index.rs"
697                | "index.go"
698                | "index.js"
699                | "index.ts"
700                | "app.py"
701                | "app.rs"
702                | "app.go"
703                | "app.js"
704                | "app.ts"
705                | "server.py"
706                | "server.rs"
707                | "server.go"
708                | "server.js"
709                | "server.ts"
710                | "lib.rs"
711                | "__init__.py"
712        )
713    }
714}
715
716impl Default for CentralityCalculator {
717    fn default() -> Self {
718        Self::new().expect("Failed to create CentralityCalculator")
719    }
720}
721
722/// Import detection and resolution engine with pre-computed lookup optimization
723#[derive(Debug, Clone)]
724pub struct ImportDetector {
725    config: ImportResolutionConfig,
726    /// Pre-computed lookup map: file stem -> full paths (massive performance improvement)
727    stem_to_paths: HashMap<String, Vec<String>>,
728    /// Pre-computed lookup map: filename -> full paths
729    filename_to_paths: HashMap<String, Vec<String>>,
730    /// Set of all available file paths for quick existence checks
731    available_paths: HashSet<String>,
732}
733
734impl ImportDetector {
735    /// Create with configuration
736    pub fn with_config(config: ImportResolutionConfig) -> Self {
737        Self {
738            config,
739            stem_to_paths: HashMap::new(),
740            filename_to_paths: HashMap::new(),
741            available_paths: HashSet::new(),
742        }
743    }
744
745    /// Create with pre-computed lookup maps for massive performance improvement
746    pub fn with_file_index<T>(config: ImportResolutionConfig, scan_results: &[T]) -> Self
747    where
748        T: ScanResult,
749    {
750        let mut detector = Self::with_config(config);
751        detector.build_lookup_maps(scan_results);
752        detector
753    }
754
755    /// Build inverted index mapping file stems/names to full paths
756    /// This eliminates the O(n) scan-all-files bottleneck
757    fn build_lookup_maps<T>(&mut self, scan_results: &[T])
758    where
759        T: ScanResult,
760    {
761        self.stem_to_paths.clear();
762        self.filename_to_paths.clear();
763        self.available_paths.clear();
764
765        for result in scan_results {
766            let full_path = result.path().to_string();
767            self.available_paths.insert(full_path.clone());
768
769            let path = Path::new(result.path());
770
771            // Index by file stem (name without extension)
772            if let Some(stem) = path.file_stem().and_then(|s| s.to_str()) {
773                let stem_lower = stem.to_lowercase();
774                self.stem_to_paths
775                    .entry(stem_lower)
776                    .or_insert_with(Vec::new)
777                    .push(full_path.clone());
778            }
779
780            // Index by full filename
781            if let Some(filename) = path.file_name().and_then(|s| s.to_str()) {
782                let filename_lower = filename.to_lowercase();
783                self.filename_to_paths
784                    .entry(filename_lower)
785                    .or_insert_with(Vec::new)
786                    .push(full_path);
787            }
788        }
789    }
790
791    /// Detect programming language from file extension
792    pub fn detect_language(&self, file_path: &str) -> Option<String> {
793        let path = Path::new(file_path);
794        let ext = path.extension()?.to_str()?.to_lowercase();
795
796        match ext.as_str() {
797            "py" => Some("python".to_string()),
798            "js" | "jsx" | "mjs" => Some("javascript".to_string()),
799            "ts" | "tsx" => Some("typescript".to_string()),
800            "rs" => Some("rust".to_string()),
801            "go" => Some("go".to_string()),
802            "java" | "kt" => Some("java".to_string()),
803            "cpp" | "cc" | "cxx" | "hpp" | "h" => Some("cpp".to_string()),
804            "c" => Some("c".to_string()),
805            "rb" => Some("ruby".to_string()),
806            "php" => Some("php".to_string()),
807            "cs" => Some("csharp".to_string()),
808            "swift" => Some("swift".to_string()),
809            _ => None,
810        }
811    }
812
813    /// Resolve import string to actual file path
814    pub fn resolve_import<T>(
815        &self,
816        import_str: &str,
817        current_file: &str,
818        file_map: &HashMap<&str, &T>,
819    ) -> Option<String>
820    where
821        T: ScanResult,
822    {
823        // Check custom path mappings first
824        if let Some(mapped_path) = self.config.path_mappings.get(import_str) {
825            if file_map.contains_key(mapped_path.as_str()) {
826                return Some(mapped_path.clone());
827            }
828        }
829
830        let current_path = Path::new(current_file);
831        let language = self.detect_language(current_file);
832
833        match language.as_deref() {
834            Some("python") => self.resolve_python_import(import_str, current_path, file_map),
835            Some("javascript") | Some("typescript") => {
836                self.resolve_js_import(import_str, current_path, file_map)
837            }
838            Some("rust") => self.resolve_rust_import(import_str, current_path, file_map),
839            Some("go") => self.resolve_go_import(import_str, current_path, file_map),
840            _ => self.resolve_generic_import(import_str, current_path, file_map),
841        }
842    }
843
844    /// Resolve Python import
845    fn resolve_python_import<T>(
846        &self,
847        import_str: &str,
848        current_path: &Path,
849        file_map: &HashMap<&str, &T>,
850    ) -> Option<String>
851    where
852        T: ScanResult,
853    {
854        let cleaned_import = import_str.trim();
855
856        // Skip standard library imports if configured
857        if self.config.exclude_stdlib_imports && self.is_python_stdlib(cleaned_import) {
858            return None;
859        }
860
861        // Convert module path to file path
862        let module_parts: Vec<&str> = cleaned_import.split('.').collect();
863
864        // Try various combinations
865        let mut candidates = Vec::new();
866
867        // Direct module file
868        candidates.push(format!("{}.py", module_parts.join("/")));
869
870        // Module package
871        candidates.push(format!("{}/__init__.py", module_parts.join("/")));
872
873        // Relative to current file directory
874        if let Some(parent) = current_path.parent() {
875            let parent_str = parent.to_string_lossy();
876            let relative_candidates: Vec<String> = candidates
877                .iter()
878                .map(|candidate| format!("{}/{}", parent_str, candidate))
879                .collect();
880            candidates.extend(relative_candidates);
881        }
882
883        // Find first matching candidate
884        for candidate in &candidates {
885            if file_map.contains_key(candidate.as_str()) {
886                return Some(candidate.clone());
887            }
888        }
889
890        // Fuzzy matching as fallback
891        self.fuzzy_match_import(&module_parts, file_map)
892    }
893
894    /// Resolve JavaScript/TypeScript import
895    fn resolve_js_import<T>(
896        &self,
897        import_str: &str,
898        current_path: &Path,
899        file_map: &HashMap<&str, &T>,
900    ) -> Option<String>
901    where
902        T: ScanResult,
903    {
904        let cleaned_import = import_str.trim();
905
906        // Handle relative imports
907        if cleaned_import.starts_with("./") || cleaned_import.starts_with("../") {
908            if !self.config.resolve_relative_imports {
909                return None;
910            }
911
912            if let Some(parent) = current_path.parent() {
913                let resolved_path = parent.join(&cleaned_import[2..]); // Remove ./
914                let resolved_str = resolved_path.to_string_lossy();
915
916                // Try different extensions
917                for ext in &[".js", ".ts", ".jsx", ".tsx", "/index.js", "/index.ts"] {
918                    let candidate = format!("{}{}", resolved_str, ext);
919                    if file_map.contains_key(candidate.as_str()) {
920                        return Some(candidate);
921                    }
922                }
923            }
924        }
925        // Handle absolute imports
926        else if self.config.resolve_absolute_imports {
927            let import_parts: Vec<&str> = cleaned_import.split('/').collect();
928            return self.fuzzy_match_import(&import_parts, file_map);
929        }
930
931        None
932    }
933
934    /// Resolve Rust import (use/mod statements)
935    fn resolve_rust_import<T>(
936        &self,
937        import_str: &str,
938        _current_path: &Path,
939        file_map: &HashMap<&str, &T>,
940    ) -> Option<String>
941    where
942        T: ScanResult,
943    {
944        let cleaned_import = import_str.trim();
945
946        // Skip standard library crates
947        if self.config.exclude_stdlib_imports && self.is_rust_stdlib(cleaned_import) {
948            return None;
949        }
950
951        let parts: Vec<&str> = cleaned_import.split("::").collect();
952
953        // Try to resolve as file path
954        let mut candidates = Vec::new();
955
956        // Direct module file
957        candidates.push(format!("{}.rs", parts.join("/")));
958
959        // Module directory with mod.rs
960        candidates.push(format!("{}/mod.rs", parts.join("/")));
961
962        // lib.rs in crate
963        if parts.len() == 1 {
964            candidates.push(format!("{}/lib.rs", parts[0]));
965            candidates.push(format!("{}/src/lib.rs", parts[0]));
966        }
967
968        // Find first matching candidate
969        for candidate in &candidates {
970            if file_map.contains_key(candidate.as_str()) {
971                return Some(candidate.clone());
972            }
973        }
974
975        // Fuzzy matching
976        self.fuzzy_match_import(&parts, file_map)
977    }
978
979    /// Resolve Go import
980    fn resolve_go_import<T>(
981        &self,
982        import_str: &str,
983        _current_path: &Path,
984        file_map: &HashMap<&str, &T>,
985    ) -> Option<String>
986    where
987        T: ScanResult,
988    {
989        let cleaned_import = import_str.trim().trim_matches('"');
990
991        // Skip standard library
992        if self.config.exclude_stdlib_imports && !cleaned_import.contains('.') {
993            return None;
994        }
995
996        let parts: Vec<&str> = cleaned_import.split('/').collect();
997
998        // Try various Go file patterns
999        let mut candidates = Vec::new();
1000
1001        // Package directory
1002        candidates.push(format!("{}.go", parts.last()?));
1003        candidates.push(format!("{}/main.go", cleaned_import));
1004        candidates.push(format!("{}/{}.go", cleaned_import, parts.last()?));
1005
1006        for candidate in &candidates {
1007            if file_map.contains_key(candidate.as_str()) {
1008                return Some(candidate.clone());
1009            }
1010        }
1011
1012        self.fuzzy_match_import(&parts, file_map)
1013    }
1014
1015    /// Generic import resolution
1016    fn resolve_generic_import<T>(
1017        &self,
1018        import_str: &str,
1019        _current_path: &Path,
1020        file_map: &HashMap<&str, &T>,
1021    ) -> Option<String>
1022    where
1023        T: ScanResult,
1024    {
1025        let cleaned_import = import_str.trim();
1026        let parts: Vec<&str> = cleaned_import.split(&['/', '.', ':']).collect();
1027        self.fuzzy_match_import(&parts, file_map)
1028    }
1029
1030    /// Fuzzy matching for import resolution - OPTIMIZED with pre-computed maps
1031    fn fuzzy_match_import<T>(
1032        &self,
1033        import_parts: &[&str],
1034        _file_map: &HashMap<&str, &T>,
1035    ) -> Option<String>
1036    where
1037        T: ScanResult,
1038    {
1039        if import_parts.is_empty() {
1040            return None;
1041        }
1042
1043        let last_part = import_parts.last()?.to_lowercase();
1044
1045        // MASSIVE PERFORMANCE IMPROVEMENT: Use pre-computed lookup maps instead of O(n) scan
1046        // 1. First try exact stem match (most common case)
1047        if let Some(paths) = self.stem_to_paths.get(&last_part) {
1048            // Return first match (could be made smarter with scoring)
1049            if let Some(first_path) = paths.first() {
1050                return Some(first_path.clone());
1051            }
1052        }
1053
1054        // 2. Try filename match
1055        if let Some(paths) = self.filename_to_paths.get(&last_part) {
1056            if let Some(first_path) = paths.first() {
1057                return Some(first_path.clone());
1058            }
1059        }
1060
1061        // 3. Try partial matching against stems
1062        for (stem, paths) in &self.stem_to_paths {
1063            if stem.contains(&last_part) || last_part.contains(stem) {
1064                if let Some(first_path) = paths.first() {
1065                    return Some(first_path.clone());
1066                }
1067            }
1068        }
1069
1070        // 4. Fallback: check if path contains all import parts
1071        for path in &self.available_paths {
1072            let path_lower = path.to_lowercase();
1073            if import_parts
1074                .iter()
1075                .all(|&part| path_lower.contains(&part.to_lowercase()))
1076            {
1077                return Some(path.clone());
1078            }
1079        }
1080
1081        None
1082    }
1083
1084    /// Check if import is Python standard library
1085    fn is_python_stdlib(&self, import_str: &str) -> bool {
1086        let stdlib_modules = [
1087            "os",
1088            "sys",
1089            "re",
1090            "json",
1091            "collections",
1092            "itertools",
1093            "functools",
1094            "typing",
1095            "datetime",
1096            "math",
1097            "random",
1098            "string",
1099            "pathlib",
1100            "io",
1101            "csv",
1102            "xml",
1103            "html",
1104            "urllib",
1105            "http",
1106            "email",
1107            "logging",
1108            "unittest",
1109            "asyncio",
1110            "concurrent",
1111            "multiprocessing",
1112            "threading",
1113            "subprocess",
1114        ];
1115
1116        let first_part = import_str.split('.').next().unwrap_or(import_str);
1117        stdlib_modules.contains(&first_part)
1118    }
1119
1120    /// Check if import is Rust standard library
1121    fn is_rust_stdlib(&self, import_str: &str) -> bool {
1122        import_str.starts_with("std::")
1123            || import_str.starts_with("core::")
1124            || import_str.starts_with("alloc::")
1125    }
1126}
1127
1128/// Utility functions for centrality results analysis
1129impl CentralityResults {
1130    /// Get files sorted by centrality score (descending)
1131    pub fn top_files_by_centrality(&self, k: usize) -> Vec<(String, f64)> {
1132        let mut scored_files: Vec<_> = self
1133            .pagerank_scores
1134            .iter()
1135            .map(|(path, &score)| (path.clone(), score))
1136            .collect();
1137
1138        scored_files.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
1139        scored_files.into_iter().take(k).collect()
1140    }
1141
1142    /// Get summary statistics about centrality computation
1143    pub fn summary(&self) -> String {
1144        format!(
1145            "Centrality Analysis Summary:\n\
1146             - Files with centrality scores: {}\n\
1147             - PageRank iterations: {} (converged: {})\n\
1148             - Graph: {} nodes, {} edges (density: {:.4})\n\
1149             - Import resolution: {:.1}% ({}/{})\n\
1150             - Top languages: {}\n\
1151             - Computation time: {}ms\n\
1152             - Integration weight: {:.2}",
1153            self.pagerank_scores.len(),
1154            self.pagerank_details.iterations_converged,
1155            self.pagerank_details.converged(),
1156            self.graph_analysis.basic_stats.total_nodes,
1157            self.graph_analysis.basic_stats.total_edges,
1158            self.graph_analysis.basic_stats.graph_density,
1159            self.import_stats.resolution_rate * 100.0,
1160            self.import_stats.imports_resolved,
1161            self.import_stats.imports_detected,
1162            self.import_stats
1163                .language_breakdown
1164                .iter()
1165                .max_by_key(|(_, &count)| count)
1166                .map(|(lang, count)| format!("{} ({})", lang, count))
1167                .unwrap_or_else(|| "None".to_string()),
1168            self.integration_metadata.computation_time_ms,
1169            self.integration_metadata.centrality_weight,
1170        )
1171    }
1172}
1173
1174#[cfg(test)]
1175mod tests {
1176    use super::*;
1177    use scribe_analysis::heuristics::DocumentAnalysis;
1178
1179    // Mock scan result for testing
1180    #[derive(Debug, Clone)]
1181    struct MockScanResult {
1182        path: String,
1183        relative_path: String,
1184        depth: usize,
1185        imports: Option<Vec<String>>,
1186        is_docs: bool,
1187        is_readme: bool,
1188        is_test: bool,
1189        is_entrypoint: bool,
1190        has_examples: bool,
1191        priority_boost: f64,
1192        churn_score: f64,
1193        centrality_in: f64,
1194        doc_analysis: Option<DocumentAnalysis>,
1195    }
1196
1197    impl MockScanResult {
1198        fn new(path: &str) -> Self {
1199            Self {
1200                path: path.to_string(),
1201                relative_path: path.to_string(),
1202                depth: path.matches('/').count(),
1203                imports: None,
1204                is_docs: path.contains("doc") || path.ends_with(".md"),
1205                is_readme: path.to_lowercase().contains("readme"),
1206                is_test: path.contains("test"),
1207                is_entrypoint: path.contains("main") || path.contains("index"),
1208                has_examples: path.contains("example"),
1209                priority_boost: 0.0,
1210                churn_score: 0.5,
1211                centrality_in: 0.0,
1212                doc_analysis: Some(DocumentAnalysis::new()),
1213            }
1214        }
1215
1216        fn with_imports(mut self, imports: Vec<String>) -> Self {
1217            self.imports = Some(imports);
1218            self
1219        }
1220    }
1221
1222    impl ScanResult for MockScanResult {
1223        fn path(&self) -> &str {
1224            &self.path
1225        }
1226        fn relative_path(&self) -> &str {
1227            &self.relative_path
1228        }
1229        fn depth(&self) -> usize {
1230            self.depth
1231        }
1232        fn is_docs(&self) -> bool {
1233            self.is_docs
1234        }
1235        fn is_readme(&self) -> bool {
1236            self.is_readme
1237        }
1238        fn is_test(&self) -> bool {
1239            self.is_test
1240        }
1241        fn is_entrypoint(&self) -> bool {
1242            self.is_entrypoint
1243        }
1244        fn has_examples(&self) -> bool {
1245            self.has_examples
1246        }
1247        fn priority_boost(&self) -> f64 {
1248            self.priority_boost
1249        }
1250        fn churn_score(&self) -> f64 {
1251            self.churn_score
1252        }
1253        fn centrality_in(&self) -> f64 {
1254            self.centrality_in
1255        }
1256        fn imports(&self) -> Option<&[String]> {
1257            self.imports.as_deref()
1258        }
1259        fn doc_analysis(&self) -> Option<&DocumentAnalysis> {
1260            self.doc_analysis.as_ref()
1261        }
1262    }
1263
1264    #[test]
1265    fn test_centrality_calculator_creation() {
1266        let calculator = CentralityCalculator::new();
1267        assert!(calculator.is_ok());
1268
1269        let large_calc = CentralityCalculator::for_large_codebases();
1270        assert!(large_calc.is_ok());
1271    }
1272
1273    #[test]
1274    fn test_import_detection() {
1275        let detector = ImportDetector::with_config(ImportResolutionConfig::default());
1276
1277        // Test language detection
1278        assert_eq!(
1279            detector.detect_language("main.py"),
1280            Some("python".to_string())
1281        );
1282        assert_eq!(
1283            detector.detect_language("app.js"),
1284            Some("javascript".to_string())
1285        );
1286        assert_eq!(detector.detect_language("lib.rs"), Some("rust".to_string()));
1287
1288        // Test Python stdlib detection
1289        assert!(detector.is_python_stdlib("os"));
1290        assert!(detector.is_python_stdlib("sys.path"));
1291        assert!(!detector.is_python_stdlib("custom_module"));
1292
1293        // Test Rust stdlib detection
1294        assert!(detector.is_rust_stdlib("std::collections::HashMap"));
1295        assert!(detector.is_rust_stdlib("core::fmt"));
1296        assert!(!detector.is_rust_stdlib("serde::Deserialize"));
1297    }
1298
1299    #[test]
1300    fn test_centrality_calculation() {
1301        let calculator = CentralityCalculator::new().unwrap();
1302
1303        let scan_results = vec![
1304            MockScanResult::new("main.py")
1305                .with_imports(vec!["utils".to_string(), "config".to_string()]),
1306            MockScanResult::new("utils.py").with_imports(vec!["config".to_string()]),
1307            MockScanResult::new("config.py"),
1308            MockScanResult::new("test.py").with_imports(vec!["main".to_string()]),
1309        ];
1310
1311        let results = calculator.calculate_centrality(&scan_results).unwrap();
1312
1313        // Basic checks
1314        assert!(!results.pagerank_scores.is_empty());
1315        assert!(results.integration_metadata.integration_successful);
1316        assert_eq!(
1317            results.integration_metadata.files_with_centrality,
1318            results.pagerank_scores.len()
1319        );
1320
1321        // config.py should have high centrality (imported by main.py and utils.py)
1322        let config_score = results.pagerank_scores.get("config.py");
1323        assert!(config_score.is_some());
1324
1325        println!("Centrality scores:");
1326        for (file, score) in &results.pagerank_scores {
1327            println!("  {}: {:.6}", file, score);
1328        }
1329
1330        println!("\n{}", results.summary());
1331    }
1332
1333    #[test]
1334    fn test_heuristics_integration() {
1335        let calculator = CentralityCalculator::new().unwrap();
1336
1337        let scan_results = vec![
1338            MockScanResult::new("main.py").with_imports(vec!["utils".to_string()]),
1339            MockScanResult::new("utils.py"),
1340        ];
1341
1342        let centrality_results = calculator.calculate_centrality(&scan_results).unwrap();
1343
1344        // Mock heuristic scores
1345        let mut heuristic_scores = HashMap::new();
1346        heuristic_scores.insert("main.py".to_string(), 0.8);
1347        heuristic_scores.insert("utils.py".to_string(), 0.6);
1348
1349        let integrated_scores = calculator
1350            .integrate_with_heuristics(&centrality_results, &heuristic_scores)
1351            .unwrap();
1352
1353        assert!(!integrated_scores.is_empty());
1354
1355        // Integrated scores should be different from original heuristic scores
1356        for (file, integrated_score) in &integrated_scores {
1357            let original_score = heuristic_scores.get(file).unwrap();
1358            println!(
1359                "File {}: heuristic={:.3}, integrated={:.3}",
1360                file, original_score, integrated_score
1361            );
1362        }
1363    }
1364
1365    #[test]
1366    fn test_normalization_methods() {
1367        let calculator = CentralityCalculator::new().unwrap();
1368
1369        let centrality_scores = vec![
1370            ("file1".to_string(), 0.1),
1371            ("file2".to_string(), 0.3),
1372            ("file3".to_string(), 0.6),
1373            ("file4".to_string(), 1.0),
1374        ]
1375        .into_iter()
1376        .collect();
1377
1378        let heuristic_scores = vec![
1379            ("file1".to_string(), 0.5),
1380            ("file2".to_string(), 0.7),
1381            ("file3".to_string(), 0.9),
1382            ("file4".to_string(), 1.2),
1383        ]
1384        .into_iter()
1385        .collect();
1386
1387        // Test min-max normalization
1388        let normalized = calculator
1389            .normalize_min_max(&centrality_scores, &heuristic_scores)
1390            .unwrap();
1391        assert!(!normalized.is_empty());
1392
1393        // Test z-score normalization
1394        let z_normalized = calculator.normalize_z_score(&centrality_scores).unwrap();
1395        assert!(!z_normalized.is_empty());
1396
1397        // Test rank normalization
1398        let rank_normalized = calculator.normalize_rank(&centrality_scores).unwrap();
1399        assert!(!rank_normalized.is_empty());
1400
1401        println!("Original scores: {:?}", centrality_scores);
1402        println!("Min-max normalized: {:?}", normalized);
1403        println!("Z-score normalized: {:?}", z_normalized);
1404        println!("Rank normalized: {:?}", rank_normalized);
1405    }
1406
1407    #[test]
1408    fn test_import_resolution() {
1409        let detector = ImportDetector::with_config(ImportResolutionConfig::default());
1410
1411        // Create mock file map
1412        let scan_results = vec![
1413            MockScanResult::new("src/main.py"),
1414            MockScanResult::new("src/utils.py"),
1415            MockScanResult::new("src/config.py"),
1416            MockScanResult::new("tests/test_main.py"),
1417        ];
1418
1419        let file_map: HashMap<&str, &MockScanResult> = scan_results
1420            .iter()
1421            .map(|result| (result.path(), result))
1422            .collect();
1423
1424        // Test Python import resolution
1425        let resolved = detector.resolve_import("utils", "src/main.py", &file_map);
1426        assert!(resolved.is_some());
1427
1428        // Test module path resolution
1429        let resolved_config = detector.resolve_import("src.config", "src/main.py", &file_map);
1430        // Should resolve to src/config.py through fuzzy matching
1431        assert!(resolved_config.is_some());
1432
1433        println!("Resolved imports:");
1434        if let Some(path) = resolved {
1435            println!("  utils -> {}", path);
1436        }
1437        if let Some(path) = resolved_config {
1438            println!("  src.config -> {}", path);
1439        }
1440    }
1441
1442    #[test]
1443    fn test_entrypoint_detection() {
1444        let calculator = CentralityCalculator::new().unwrap();
1445
1446        assert!(calculator.is_entrypoint_file("main.py"));
1447        assert!(calculator.is_entrypoint_file("src/main.rs"));
1448        assert!(calculator.is_entrypoint_file("index.js"));
1449        assert!(calculator.is_entrypoint_file("app.py"));
1450        assert!(calculator.is_entrypoint_file("lib.rs"));
1451        assert!(calculator.is_entrypoint_file("__init__.py"));
1452
1453        assert!(!calculator.is_entrypoint_file("utils.py"));
1454        assert!(!calculator.is_entrypoint_file("config.rs"));
1455        assert!(!calculator.is_entrypoint_file("helper.js"));
1456    }
1457
1458    #[test]
1459    fn test_top_files_by_centrality() {
1460        let mut pagerank_scores = HashMap::new();
1461        pagerank_scores.insert("file1.py".to_string(), 0.4);
1462        pagerank_scores.insert("file2.py".to_string(), 0.6);
1463        pagerank_scores.insert("file3.py".to_string(), 0.2);
1464        pagerank_scores.insert("file4.py".to_string(), 0.8);
1465
1466        let results = CentralityResults {
1467            pagerank_scores,
1468            graph_analysis: GraphAnalysisResults {
1469                basic_stats: crate::graph::GraphStatistics::empty(),
1470                degree_distribution: Default::default(),
1471                connectivity: Default::default(),
1472                structural_patterns: Default::default(),
1473                import_insights: Default::default(),
1474                performance_profile: Default::default(),
1475                analysis_metadata: Default::default(),
1476            },
1477            pagerank_details: PageRankResults {
1478                scores: HashMap::new(),
1479                iterations_converged: 10,
1480                convergence_epsilon: 1e-6,
1481                graph_stats: crate::graph::GraphStatistics::empty(),
1482                parameters: PageRankConfig::default(),
1483                performance_metrics: Default::default(),
1484            },
1485            import_stats: ImportDetectionStats {
1486                files_processed: 4,
1487                imports_detected: 0,
1488                imports_resolved: 0,
1489                resolution_rate: 0.0,
1490                language_breakdown: HashMap::new(),
1491                import_patterns: HashMap::new(),
1492            },
1493            integration_metadata: IntegrationMetadata {
1494                timestamp: chrono::Utc::now(),
1495                computation_time_ms: 100,
1496                integration_successful: true,
1497                centrality_weight: 0.15,
1498                files_with_centrality: 4,
1499                config: CentralityConfig::default(),
1500            },
1501        };
1502
1503        let top_files = results.top_files_by_centrality(2);
1504        assert_eq!(top_files.len(), 2);
1505        assert_eq!(top_files[0].0, "file4.py");
1506        assert_eq!(top_files[0].1, 0.8);
1507        assert_eq!(top_files[1].0, "file2.py");
1508        assert_eq!(top_files[1].1, 0.6);
1509    }
1510}