scribe_analysis/heuristics/
enhanced_scoring.rs

1//! # Enhanced Scoring System with Complexity Integration
2//!
3//! This module extends the basic heuristic scoring with comprehensive complexity analysis,
4//! providing deeper insights into code quality and maintainability for better file selection.
5//!
6//! ## Enhanced Features
7//!
8//! - **Complexity-Aware Scoring**: Integrates cyclomatic, cognitive, and maintainability metrics
9//! - **Quality-Based Prioritization**: Considers code quality alongside importance
10//! - **Language-Specific Analysis**: Tailored complexity analysis per programming language
11//! - **Maintainability Assessment**: Factors in long-term code maintenance concerns
12//! - **Adaptive Weights**: Adjusts scoring based on repository characteristics
13
14use super::scoring::RawScoreComponents;
15use super::{HeuristicWeights, ScanResult, ScoreComponents};
16use crate::complexity::{ComplexityAnalyzer, ComplexityConfig, ComplexityMetrics};
17use rayon::prelude::*;
18use scribe_core::{Result, ScribeError};
19use std::collections::HashMap;
20
21/// Enhanced score components that include complexity metrics
22#[derive(Debug, Clone)]
23pub struct EnhancedScoreComponents {
24    /// Base score components from standard heuristics
25    pub base_score: ScoreComponents,
26
27    /// Complexity-based scores
28    pub complexity_score: f64,
29    pub maintainability_score: f64,
30    pub cognitive_score: f64,
31    pub quality_score: f64,
32
33    /// Combined final score
34    pub enhanced_final_score: f64,
35
36    /// Detailed complexity metrics
37    pub complexity_metrics: Option<ComplexityMetrics>,
38
39    /// Complexity-adjusted weights
40    pub adjusted_weights: EnhancedWeights,
41}
42
43/// Enhanced weights that include complexity factors
44#[derive(Debug, Clone)]
45pub struct EnhancedWeights {
46    /// Base heuristic weights
47    pub base_weights: HeuristicWeights,
48
49    /// Complexity weight factors
50    pub complexity_weight: f64,
51    pub maintainability_weight: f64,
52    pub cognitive_weight: f64,
53    pub quality_weight: f64,
54
55    /// Adaptive weight adjustments
56    pub adaptive_factors: AdaptiveFactors,
57}
58
59/// Adaptive factors that adjust scoring based on repository characteristics
60#[derive(Debug, Clone)]
61pub struct AdaptiveFactors {
62    /// Repository size factor (larger repos may prefer simpler files)
63    pub repo_size_factor: f64,
64
65    /// Language complexity factor (some languages naturally more complex)
66    pub language_factor: f64,
67
68    /// Project maturity factor (mature projects may prioritize maintainability)
69    pub maturity_factor: f64,
70
71    /// Team experience factor (affects complexity tolerance)
72    pub experience_factor: f64,
73}
74
75/// Enhanced heuristic scorer with complexity integration
76#[derive(Debug)]
77pub struct EnhancedHeuristicScorer {
78    /// Base scorer for standard heuristics
79    base_scorer: super::scoring::HeuristicScorer,
80
81    /// Complexity analyzer
82    complexity_analyzer: ComplexityAnalyzer,
83
84    /// Enhanced weights configuration
85    weights: EnhancedWeights,
86
87    /// Repository characteristics for adaptive scoring
88    repo_characteristics: RepositoryCharacteristics,
89
90    /// Content cache for complexity analysis
91    content_cache: HashMap<String, ComplexityMetrics>,
92
93    /// Whether to enable expensive complexity analysis (disabled by default for performance)
94    enable_complexity_analysis: bool,
95}
96
97/// Repository characteristics for adaptive scoring
98#[derive(Debug, Clone)]
99pub struct RepositoryCharacteristics {
100    /// Total number of files in repository
101    pub total_files: usize,
102
103    /// Primary programming languages
104    pub primary_languages: Vec<String>,
105
106    /// Repository age in months
107    pub age_months: usize,
108
109    /// Average team size
110    pub team_size: usize,
111
112    /// Project type (library, application, framework, etc.)
113    pub project_type: ProjectType,
114}
115
116/// Project type classification
117#[derive(Debug, Clone)]
118pub enum ProjectType {
119    Library,
120    Application,
121    Framework,
122    Tool,
123    Game,
124    WebService,
125    EmbeddedSystem,
126    Unknown,
127}
128
129impl Default for EnhancedWeights {
130    fn default() -> Self {
131        Self {
132            base_weights: HeuristicWeights::default(),
133            complexity_weight: 0.15,
134            maintainability_weight: 0.20,
135            cognitive_weight: 0.10,
136            quality_weight: 0.15,
137            adaptive_factors: AdaptiveFactors::default(),
138        }
139    }
140}
141
142impl Default for AdaptiveFactors {
143    fn default() -> Self {
144        Self {
145            repo_size_factor: 1.0,
146            language_factor: 1.0,
147            maturity_factor: 1.0,
148            experience_factor: 1.0,
149        }
150    }
151}
152
153impl Default for RepositoryCharacteristics {
154    fn default() -> Self {
155        Self {
156            total_files: 100,
157            primary_languages: vec!["rust".to_string()],
158            age_months: 12,
159            team_size: 3,
160            project_type: ProjectType::Application,
161        }
162    }
163}
164
165impl EnhancedHeuristicScorer {
166    /// Create a new enhanced scorer with default configuration
167    pub fn new() -> Self {
168        let base_weights = HeuristicWeights::default();
169        let base_scorer = super::scoring::HeuristicScorer::new(base_weights.clone());
170
171        Self {
172            base_scorer,
173            complexity_analyzer: ComplexityAnalyzer::new(),
174            weights: EnhancedWeights::default(),
175            repo_characteristics: RepositoryCharacteristics::default(),
176            content_cache: HashMap::new(),
177            enable_complexity_analysis: false, // TEMPORARILY DISABLED to test baseline performance
178        }
179    }
180
181    /// Enable complexity analysis (WARNING: This significantly impacts performance)
182    pub fn enable_complexity_analysis(&mut self) {
183        self.enable_complexity_analysis = true;
184    }
185
186    /// Disable complexity analysis for better performance
187    pub fn disable_complexity_analysis(&mut self) {
188        self.enable_complexity_analysis = false;
189        self.content_cache.clear(); // Clear cache to save memory
190    }
191
192    /// Create enhanced scorer with custom configuration
193    pub fn with_config(
194        weights: EnhancedWeights,
195        complexity_config: ComplexityConfig,
196        repo_characteristics: RepositoryCharacteristics,
197    ) -> Self {
198        let base_scorer = super::scoring::HeuristicScorer::new(weights.base_weights.clone());
199        let complexity_analyzer = ComplexityAnalyzer::with_config(complexity_config);
200
201        Self {
202            base_scorer,
203            complexity_analyzer,
204            weights,
205            repo_characteristics,
206            content_cache: HashMap::new(),
207            enable_complexity_analysis: false, // TEMPORARILY DISABLED to test baseline performance
208        }
209    }
210
211    /// Score a file with enhanced complexity-aware analysis
212    pub fn score_file_enhanced<T>(
213        &mut self,
214        file: &T,
215        file_content: &str,
216        all_files: &[T],
217    ) -> Result<EnhancedScoreComponents>
218    where
219        T: ScanResult + Clone,
220    {
221        // Get base heuristic score
222        let base_score = self.base_scorer.score_file(file, all_files)?;
223
224        // Detect language from file path
225        let language = self.detect_language(file.path());
226
227        // Analyze complexity only if enabled (with caching)
228        let (
229            complexity_metrics,
230            complexity_score,
231            maintainability_score,
232            cognitive_score,
233            quality_score,
234        ) = if self.enable_complexity_analysis {
235            let complexity_metrics = if let Some(cached) = self.content_cache.get(file.path()) {
236                cached.clone()
237            } else {
238                let metrics = self
239                    .complexity_analyzer
240                    .analyze_content(file_content, &language)?;
241                self.content_cache
242                    .insert(file.path().to_string(), metrics.clone());
243                metrics
244            };
245
246            // Calculate complexity-based scores
247            let complexity_score = self.calculate_complexity_score(&complexity_metrics);
248            let maintainability_score = self.calculate_maintainability_score(&complexity_metrics);
249            let cognitive_score = self.calculate_cognitive_score(&complexity_metrics);
250            let quality_score = self.calculate_quality_score(&complexity_metrics);
251
252            (
253                Some(complexity_metrics),
254                complexity_score,
255                maintainability_score,
256                cognitive_score,
257                quality_score,
258            )
259        } else {
260            // Skip expensive complexity analysis - use neutral/default scores
261            (None, 0.5, 0.5, 0.5, 0.5)
262        };
263
264        // Apply adaptive adjustments
265        let adjusted_weights = if let Some(ref metrics) = complexity_metrics {
266            self.calculate_adaptive_weights(file, metrics)
267        } else {
268            // Use default weights when complexity analysis is disabled
269            self.weights.clone()
270        };
271
272        // Calculate enhanced final score
273        let enhanced_final_score = self.calculate_enhanced_final_score(
274            &base_score,
275            complexity_score,
276            maintainability_score,
277            cognitive_score,
278            quality_score,
279            &adjusted_weights,
280        );
281
282        Ok(EnhancedScoreComponents {
283            base_score,
284            complexity_score,
285            maintainability_score,
286            cognitive_score,
287            quality_score,
288            enhanced_final_score,
289            complexity_metrics,
290            adjusted_weights,
291        })
292    }
293
294    /// Score all files with enhanced analysis
295    pub fn score_all_files_enhanced<T>(
296        &mut self,
297        files_with_content: &[(T, String)],
298    ) -> Result<Vec<(usize, EnhancedScoreComponents)>>
299    where
300        T: ScanResult + Clone + Sync + Send,
301    {
302        let files: Vec<_> = files_with_content.iter().map(|(f, _)| f.clone()).collect();
303
304        // PERFORMANCE OPTIMIZATION: Parallelize complexity analysis first
305        let complexity_results: Result<HashMap<usize, Option<ComplexityMetrics>>> =
306            if self.enable_complexity_analysis {
307                // Create a snapshot of cache for parallel access
308                let cache_snapshot: HashMap<String, ComplexityMetrics> = self.content_cache.clone();
309
310                // Compute complexity metrics in parallel for all files
311                let results: Result<Vec<(usize, Option<ComplexityMetrics>)>> = files_with_content
312                    .par_iter()
313                    .enumerate()
314                    .map(|(idx, (file, content))| {
315                        // Check cache first
316                        if let Some(cached) = cache_snapshot.get(file.path()) {
317                            return Ok((idx, Some(cached.clone())));
318                        }
319
320                        // Detect language and analyze complexity
321                        let language = Self::detect_language_static(file.path());
322                        let analyzer = ComplexityAnalyzer::new();
323
324                        match analyzer.analyze_content(content, &language) {
325                            Ok(metrics) => Ok((idx, Some(metrics))),
326                            Err(_) => Ok((idx, None)), // Skip files that can't be analyzed
327                        }
328                    })
329                    .collect();
330
331                results.map(|vec| vec.into_iter().collect())
332            } else {
333                Ok(HashMap::new())
334            };
335
336        let complexity_results = complexity_results?;
337
338        // Update cache with new results (sequential for cache safety)
339        for (idx, metrics_opt) in &complexity_results {
340            if let Some(metrics) = metrics_opt {
341                let file_path = files_with_content[*idx].0.path().to_string();
342                self.content_cache.insert(file_path, metrics.clone());
343            }
344        }
345
346        // Now score all files sequentially with pre-computed complexity metrics
347        let mut scored_files = Vec::new();
348        for (idx, (file, content)) in files_with_content.iter().enumerate() {
349            let score = self.score_file_enhanced_with_precomputed_complexity(
350                file,
351                content,
352                &files,
353                complexity_results.get(&idx).and_then(|opt| opt.as_ref()),
354            )?;
355            scored_files.push((idx, score));
356        }
357
358        // Sort by enhanced final score (descending)
359        scored_files.sort_by(|a, b| {
360            b.1.enhanced_final_score
361                .partial_cmp(&a.1.enhanced_final_score)
362                .unwrap_or(std::cmp::Ordering::Equal)
363        });
364
365        Ok(scored_files)
366    }
367
368    /// Score a file with pre-computed complexity metrics (for parallel optimization)
369    fn score_file_enhanced_with_precomputed_complexity<T>(
370        &mut self,
371        file: &T,
372        file_content: &str,
373        all_files: &[T],
374        precomputed_complexity: Option<&ComplexityMetrics>,
375    ) -> Result<EnhancedScoreComponents>
376    where
377        T: ScanResult + Clone,
378    {
379        // Get base heuristic score
380        let base_score = self.base_scorer.score_file(file, all_files)?;
381
382        // Use pre-computed complexity metrics or defaults
383        let (
384            complexity_metrics,
385            complexity_score,
386            maintainability_score,
387            cognitive_score,
388            quality_score,
389        ) = if let Some(metrics) = precomputed_complexity {
390            // Use pre-computed metrics
391            let complexity_score = self.calculate_complexity_score(metrics);
392            let maintainability_score = self.calculate_maintainability_score(metrics);
393            let cognitive_score = self.calculate_cognitive_score(metrics);
394            let quality_score = self.calculate_quality_score(metrics);
395
396            (
397                Some(metrics.clone()),
398                complexity_score,
399                maintainability_score,
400                cognitive_score,
401                quality_score,
402            )
403        } else {
404            // Skip expensive complexity analysis - use neutral/default scores
405            (None, 0.5, 0.5, 0.5, 0.5)
406        };
407
408        // Apply adaptive adjustments
409        let adjusted_weights = if let Some(ref metrics) = complexity_metrics {
410            self.calculate_adaptive_weights(file, metrics)
411        } else {
412            // Use default weights when complexity analysis is disabled
413            self.weights.clone()
414        };
415
416        // Calculate enhanced final score
417        let enhanced_final_score = self.calculate_enhanced_final_score(
418            &base_score,
419            complexity_score,
420            maintainability_score,
421            cognitive_score,
422            quality_score,
423            &adjusted_weights,
424        );
425
426        Ok(EnhancedScoreComponents {
427            base_score,
428            complexity_score,
429            maintainability_score,
430            cognitive_score,
431            quality_score,
432            enhanced_final_score,
433            complexity_metrics,
434            adjusted_weights,
435        })
436    }
437
438    /// Detect programming language from file path
439    fn detect_language(&self, path: &str) -> String {
440        Self::detect_language_static(path)
441    }
442
443    /// Static version for parallel processing
444    fn detect_language_static(path: &str) -> String {
445        let extension = std::path::Path::new(path)
446            .extension()
447            .and_then(|ext| ext.to_str())
448            .unwrap_or("");
449
450        match extension.to_lowercase().as_str() {
451            "rs" => "rust",
452            "py" => "python",
453            "js" => "javascript",
454            "ts" => "typescript",
455            "java" => "java",
456            "cs" => "c#",
457            "go" => "go",
458            "c" => "c",
459            "cpp" | "cc" | "cxx" => "cpp",
460            "h" | "hpp" => "c",
461            "rb" => "ruby",
462            "php" => "php",
463            "swift" => "swift",
464            "kt" => "kotlin",
465            "scala" => "scala",
466            _ => "unknown",
467        }
468        .to_string()
469    }
470
471    /// Calculate complexity-based score (0-1, where 1 is good)
472    fn calculate_complexity_score(&self, metrics: &ComplexityMetrics) -> f64 {
473        // Invert complexity score - lower complexity is better
474        1.0 - metrics.complexity_score()
475    }
476
477    /// Calculate maintainability score (0-1)
478    fn calculate_maintainability_score(&self, metrics: &ComplexityMetrics) -> f64 {
479        // Maintainability index is 0-100, normalize to 0-1
480        metrics.maintainability_index / 100.0
481    }
482
483    /// Calculate cognitive load score (0-1, where 1 is good)
484    fn calculate_cognitive_score(&self, metrics: &ComplexityMetrics) -> f64 {
485        // Lower cognitive complexity is better
486        let cognitive_ratio = metrics.cognitive_complexity as f64 / 20.0; // Normalize to rough 0-1 range
487        (1.0 - cognitive_ratio.min(1.0)).max(0.0)
488    }
489
490    /// Calculate overall quality score (0-1)
491    fn calculate_quality_score(&self, metrics: &ComplexityMetrics) -> f64 {
492        // Composite quality score
493        let complexity_factor = 1.0 - (metrics.cyclomatic_complexity as f64 / 15.0).min(1.0);
494        let nesting_factor = 1.0 - (metrics.max_nesting_depth as f64 / 6.0).min(1.0);
495        let density_factor = metrics.code_density.min(1.0);
496        let comment_factor = (metrics.comment_ratio * 2.0).min(1.0); // Good commenting is valuable
497
498        (complexity_factor * 0.3
499            + nesting_factor * 0.2
500            + density_factor * 0.3
501            + comment_factor * 0.2)
502            .min(1.0)
503    }
504
505    /// Calculate adaptive weights based on file and repository characteristics
506    fn calculate_adaptive_weights<T>(
507        &self,
508        file: &T,
509        metrics: &ComplexityMetrics,
510    ) -> EnhancedWeights
511    where
512        T: ScanResult,
513    {
514        let mut weights = self.weights.clone();
515
516        // Adjust weights based on repository size
517        if self.repo_characteristics.total_files > 1000 {
518            // Large repos: prioritize simplicity and maintainability
519            weights.maintainability_weight *= 1.3;
520            weights.complexity_weight *= 1.2;
521        } else if self.repo_characteristics.total_files < 50 {
522            // Small repos: focus more on functionality
523            weights.base_weights.import_weight *= 1.2;
524            weights.base_weights.doc_weight *= 1.1;
525        }
526
527        // Adjust based on project type
528        match self.repo_characteristics.project_type {
529            ProjectType::Library => {
530                // Libraries need excellent documentation and maintainability
531                weights.base_weights.doc_weight *= 1.4;
532                weights.maintainability_weight *= 1.3;
533                weights.quality_weight *= 1.2;
534            }
535            ProjectType::Framework => {
536                // Frameworks need clear architecture and examples
537                weights.base_weights.entrypoint_weight *= 1.3;
538                weights.base_weights.examples_weight *= 1.4;
539                weights.quality_weight *= 1.2;
540            }
541            ProjectType::Tool => {
542                // Tools prioritize main functionality and simplicity
543                weights.base_weights.entrypoint_weight *= 1.5;
544                weights.complexity_weight *= 1.3;
545            }
546            _ => {
547                // Default adjustments for applications
548            }
549        }
550
551        // Adjust based on file complexity
552        if metrics.cyclomatic_complexity > 10 {
553            // High complexity files might be core logic - boost importance
554            weights.base_weights.import_weight *= 1.2;
555        }
556
557        if metrics.maintainability_index < 30.0 {
558            // Low maintainability - might indicate technical debt hotspots
559            weights.maintainability_weight *= 1.4;
560        }
561
562        // Language-specific adjustments
563        let language = &metrics.language_metrics.language;
564        match language.as_str() {
565            "rust" => {
566                // Rust: Consider ownership complexity
567                if let Some(ownership) = metrics
568                    .language_metrics
569                    .complexity_factors
570                    .get("ownership_complexity")
571                {
572                    if *ownership > 5.0 {
573                        weights.complexity_weight *= 1.2;
574                    }
575                }
576            }
577            "python" => {
578                // Python: Value documentation and simplicity
579                weights.base_weights.doc_weight *= 1.1;
580                weights.complexity_weight *= 1.1;
581            }
582            "javascript" | "typescript" => {
583                // JS/TS: Consider async complexity
584                if let Some(async_complexity) = metrics
585                    .language_metrics
586                    .complexity_factors
587                    .get("promise_complexity")
588                {
589                    if *async_complexity > 3.0 {
590                        weights.cognitive_weight *= 1.2;
591                    }
592                }
593            }
594            _ => {}
595        }
596
597        weights
598    }
599
600    /// Calculate the final enhanced score
601    fn calculate_enhanced_final_score(
602        &self,
603        base_score: &ScoreComponents,
604        complexity_score: f64,
605        maintainability_score: f64,
606        cognitive_score: f64,
607        quality_score: f64,
608        weights: &EnhancedWeights,
609    ) -> f64 {
610        // Combine base score with complexity metrics
611        let base_contribution = base_score.final_score * 0.6; // Base heuristics weight
612
613        let complexity_contribution = complexity_score * weights.complexity_weight
614            + maintainability_score * weights.maintainability_weight
615            + cognitive_score * weights.cognitive_weight
616            + quality_score * weights.quality_weight;
617
618        let enhanced_contribution = complexity_contribution * 0.4; // Complexity metrics weight
619
620        // Apply adaptive factors
621        let final_score = (base_contribution + enhanced_contribution)
622            * weights.adaptive_factors.repo_size_factor
623            * weights.adaptive_factors.language_factor
624            * weights.adaptive_factors.maturity_factor
625            * weights.adaptive_factors.experience_factor;
626
627        final_score.min(2.0) // Cap the score to prevent extreme values
628    }
629
630    /// Update repository characteristics
631    pub fn update_repository_characteristics(
632        &mut self,
633        characteristics: RepositoryCharacteristics,
634    ) {
635        self.repo_characteristics = characteristics;
636
637        // Recalculate adaptive factors based on new characteristics
638        self.weights.adaptive_factors = self.calculate_adaptive_factors();
639    }
640
641    /// Calculate adaptive factors based on repository characteristics
642    fn calculate_adaptive_factors(&self) -> AdaptiveFactors {
643        let repo_size_factor = match self.repo_characteristics.total_files {
644            0..=50 => 1.1,      // Small repos - boost importance
645            51..=500 => 1.0,    // Medium repos - neutral
646            501..=2000 => 0.95, // Large repos - slight penalty
647            _ => 0.9,           // Very large repos - prefer simpler files
648        };
649
650        let language_factor = if self
651            .repo_characteristics
652            .primary_languages
653            .contains(&"rust".to_string())
654        {
655            1.05 // Rust projects tend to have good practices
656        } else if self
657            .repo_characteristics
658            .primary_languages
659            .contains(&"javascript".to_string())
660        {
661            0.95 // JS can be more complex to analyze
662        } else {
663            1.0
664        };
665
666        let maturity_factor = match self.repo_characteristics.age_months {
667            0..=6 => 0.9,   // New projects - focus on functionality
668            7..=24 => 1.0,  // Maturing projects - balanced
669            25..=60 => 1.1, // Mature projects - prioritize maintainability
670            _ => 1.2,       // Very mature projects - heavily prioritize quality
671        };
672
673        let experience_factor = match self.repo_characteristics.team_size {
674            1 => 1.1,       // Solo projects - prefer simpler code
675            2..=5 => 1.0,   // Small teams - balanced
676            6..=15 => 0.95, // Medium teams - can handle complexity
677            _ => 0.9,       // Large teams - prefer well-structured code
678        };
679
680        AdaptiveFactors {
681            repo_size_factor,
682            language_factor,
683            maturity_factor,
684            experience_factor,
685        }
686    }
687
688    /// Clear the complexity metrics cache
689    pub fn clear_cache(&mut self) {
690        self.content_cache.clear();
691    }
692
693    /// Get cache statistics
694    pub fn cache_stats(&self) -> (usize, usize) {
695        (self.content_cache.len(), self.content_cache.capacity())
696    }
697}
698
699impl EnhancedScoreComponents {
700    /// Get a breakdown of score contributions
701    pub fn score_breakdown(&self) -> HashMap<String, f64> {
702        let mut breakdown = self.base_score.as_map();
703
704        breakdown.insert("complexity_score".to_string(), self.complexity_score);
705        breakdown.insert(
706            "maintainability_score".to_string(),
707            self.maintainability_score,
708        );
709        breakdown.insert("cognitive_score".to_string(), self.cognitive_score);
710        breakdown.insert("quality_score".to_string(), self.quality_score);
711        breakdown.insert(
712            "enhanced_final_score".to_string(),
713            self.enhanced_final_score,
714        );
715
716        breakdown
717    }
718
719    /// Get the dominant scoring factor
720    pub fn dominant_factor(&self) -> (&'static str, f64) {
721        let factors = [
722            ("base_heuristics", self.base_score.final_score * 0.6),
723            (
724                "complexity",
725                self.complexity_score * self.adjusted_weights.complexity_weight * 0.4,
726            ),
727            (
728                "maintainability",
729                self.maintainability_score * self.adjusted_weights.maintainability_weight * 0.4,
730            ),
731            (
732                "cognitive",
733                self.cognitive_score * self.adjusted_weights.cognitive_weight * 0.4,
734            ),
735            (
736                "quality",
737                self.quality_score * self.adjusted_weights.quality_weight * 0.4,
738            ),
739        ];
740
741        factors
742            .iter()
743            .max_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal))
744            .map(|(name, score)| (*name, *score))
745            .unwrap_or(("none", 0.0))
746    }
747
748    /// Get a human-readable explanation of the score
749    pub fn explanation(&self) -> String {
750        let (dominant, _) = self.dominant_factor();
751        let complexity_summary = if let Some(metrics) = &self.complexity_metrics {
752            metrics.summary()
753        } else {
754            "No complexity metrics".to_string()
755        };
756
757        format!(
758            "Score: {:.3} (dominated by {}), Base: {:.3}, Quality: {:.3}, {}",
759            self.enhanced_final_score,
760            dominant,
761            self.base_score.final_score,
762            self.quality_score,
763            complexity_summary
764        )
765    }
766}
767
768#[cfg(test)]
769mod tests {
770    use super::*;
771    use crate::heuristics::DocumentAnalysis;
772
773    // Mock scan result for testing
774    #[derive(Debug, Clone)]
775    struct MockScanResult {
776        path: String,
777        relative_path: String,
778        depth: usize,
779        is_docs: bool,
780        is_readme: bool,
781        is_test: bool,
782        is_entrypoint: bool,
783        has_examples: bool,
784        priority_boost: f64,
785        churn_score: f64,
786        centrality_in: f64,
787        imports: Option<Vec<String>>,
788        doc_analysis: Option<DocumentAnalysis>,
789    }
790
791    impl MockScanResult {
792        fn new(path: &str) -> Self {
793            Self {
794                path: path.to_string(),
795                relative_path: path.to_string(),
796                depth: path.matches('/').count(),
797                is_docs: path.contains("doc") || path.ends_with(".md"),
798                is_readme: path.to_lowercase().contains("readme"),
799                is_test: path.contains("test") || path.contains("spec"),
800                is_entrypoint: path.contains("main") || path.contains("index"),
801                has_examples: path.contains("example") || path.contains("demo"),
802                priority_boost: 0.0,
803                churn_score: 0.5,
804                centrality_in: 0.3,
805                imports: Some(vec!["std::collections::HashMap".to_string()]),
806                doc_analysis: Some(DocumentAnalysis::new()),
807            }
808        }
809    }
810
811    impl super::super::ScanResult for MockScanResult {
812        fn path(&self) -> &str {
813            &self.path
814        }
815        fn relative_path(&self) -> &str {
816            &self.relative_path
817        }
818        fn depth(&self) -> usize {
819            self.depth
820        }
821        fn is_docs(&self) -> bool {
822            self.is_docs
823        }
824        fn is_readme(&self) -> bool {
825            self.is_readme
826        }
827        fn is_test(&self) -> bool {
828            self.is_test
829        }
830        fn is_entrypoint(&self) -> bool {
831            self.is_entrypoint
832        }
833        fn has_examples(&self) -> bool {
834            self.has_examples
835        }
836        fn priority_boost(&self) -> f64 {
837            self.priority_boost
838        }
839        fn churn_score(&self) -> f64 {
840            self.churn_score
841        }
842        fn centrality_in(&self) -> f64 {
843            self.centrality_in
844        }
845        fn imports(&self) -> Option<&[String]> {
846            self.imports.as_deref()
847        }
848        fn doc_analysis(&self) -> Option<&DocumentAnalysis> {
849            self.doc_analysis.as_ref()
850        }
851    }
852
853    #[test]
854    fn test_enhanced_scorer_creation() {
855        let scorer = EnhancedHeuristicScorer::new();
856        assert!(scorer.weights.complexity_weight > 0.0);
857        assert!(scorer.weights.maintainability_weight > 0.0);
858    }
859
860    #[test]
861    fn test_language_detection() {
862        let scorer = EnhancedHeuristicScorer::new();
863
864        assert_eq!(scorer.detect_language("src/main.rs"), "rust");
865        assert_eq!(scorer.detect_language("app.py"), "python");
866        assert_eq!(scorer.detect_language("script.js"), "javascript");
867        assert_eq!(scorer.detect_language("component.ts"), "typescript");
868        assert_eq!(scorer.detect_language("Main.java"), "java");
869    }
870
871    #[test]
872    fn test_enhanced_file_scoring() {
873        let mut scorer = EnhancedHeuristicScorer::new();
874        scorer.enable_complexity_analysis(); // Enable complexity analysis for testing
875
876        let file = MockScanResult::new("src/main.rs");
877        let content = r#"
878fn main() {
879    if condition() {
880        for i in 0..10 {
881            println!("Hello {}", i);
882        }
883    }
884}
885"#;
886        let files = vec![file.clone()];
887
888        let result = scorer.score_file_enhanced(&file, content, &files);
889        assert!(result.is_ok());
890
891        let score = result.unwrap();
892        assert!(score.enhanced_final_score > 0.0);
893        assert!(score.complexity_score >= 0.0 && score.complexity_score <= 1.0);
894        assert!(score.quality_score >= 0.0 && score.quality_score <= 1.0);
895        assert!(score.complexity_metrics.is_some());
896    }
897
898    #[test]
899    fn test_adaptive_weights() {
900        let weights = EnhancedWeights::default();
901        let complexity_config = ComplexityConfig::default();
902        let mut repo_chars = RepositoryCharacteristics::default();
903        repo_chars.project_type = ProjectType::Library;
904        repo_chars.total_files = 1500; // Large repository
905
906        let mut scorer =
907            EnhancedHeuristicScorer::with_config(weights, complexity_config, repo_chars);
908
909        let file = MockScanResult::new("src/lib.rs");
910        let simple_content = "fn simple() { println!(\"hello\"); }";
911        let files = vec![file.clone()];
912
913        let result = scorer.score_file_enhanced(&file, simple_content, &files);
914        assert!(result.is_ok());
915
916        let score = result.unwrap();
917
918        // For a library, documentation and maintainability should have higher weights
919        assert!(
920            score.adjusted_weights.base_weights.doc_weight
921                >= score.adjusted_weights.base_weights.import_weight
922        );
923    }
924
925    #[test]
926    fn test_complexity_vs_simple_code() {
927        let mut scorer = EnhancedHeuristicScorer::new();
928        scorer.enable_complexity_analysis(); // Enable complexity analysis for testing
929
930        let file1 = MockScanResult::new("simple.rs");
931        let simple_content = "fn simple() { println!(\"hello\"); }";
932
933        let file2 = MockScanResult::new("complex.rs");
934        let complex_content = r#"
935fn complex() {
936    for i in 0..100 {
937        if i % 2 == 0 {
938            while condition() {
939                match value {
940                    1 => { if nested() { deep(); } },
941                    2 => { if more_nested() { deeper(); } },
942                    _ => { if even_more() { deepest(); } },
943                }
944            }
945        }
946    }
947}
948"#;
949
950        let files = vec![file1.clone(), file2.clone()];
951
952        let simple_score = scorer
953            .score_file_enhanced(&file1, simple_content, &files)
954            .unwrap();
955        let complex_score = scorer
956            .score_file_enhanced(&file2, complex_content, &files)
957            .unwrap();
958
959        // Simple code should generally score better on complexity metrics
960        assert!(simple_score.complexity_score > complex_score.complexity_score);
961        assert!(simple_score.cognitive_score > complex_score.cognitive_score);
962    }
963
964    #[test]
965    fn test_score_breakdown() {
966        let mut scorer = EnhancedHeuristicScorer::new();
967
968        let file = MockScanResult::new("test.rs");
969        let content = "fn test() { if x > 0 { return 1; } else { return 0; } }";
970        let files = vec![file.clone()];
971
972        let score = scorer.score_file_enhanced(&file, content, &files).unwrap();
973        let breakdown = score.score_breakdown();
974
975        assert!(breakdown.contains_key("complexity_score"));
976        assert!(breakdown.contains_key("maintainability_score"));
977        assert!(breakdown.contains_key("cognitive_score"));
978        assert!(breakdown.contains_key("quality_score"));
979        assert!(breakdown.contains_key("enhanced_final_score"));
980
981        let explanation = score.explanation();
982        assert!(explanation.contains("Score:"));
983        assert!(explanation.contains("dominated by"));
984    }
985
986    #[test]
987    fn test_repository_characteristics_update() {
988        let mut scorer = EnhancedHeuristicScorer::new();
989
990        let initial_factors = scorer.weights.adaptive_factors.clone();
991
992        let mut new_chars = RepositoryCharacteristics::default();
993        new_chars.total_files = 5000; // Much larger
994        new_chars.project_type = ProjectType::Framework;
995        new_chars.age_months = 48; // Mature project
996
997        scorer.update_repository_characteristics(new_chars);
998
999        let new_factors = &scorer.weights.adaptive_factors;
1000
1001        // Large, mature projects should have different factors
1002        assert_ne!(
1003            initial_factors.repo_size_factor,
1004            new_factors.repo_size_factor
1005        );
1006        assert_ne!(initial_factors.maturity_factor, new_factors.maturity_factor);
1007    }
1008}
scribe_analysis/heuristics/enhanced_scoring.rs

scribe_analysis/heuristics/
enhanced_scoring.rs