scribe_analysis/heuristics/scoring/
mod.rs

1//! # Core Scoring Algorithms for Heuristic File Prioritization
2//!
3//! Implements the multi-dimensional scoring system originally prototyped in Python and now
4//! maintained directly inside the Scribe Rust workspace:
5//!
6//! ## Scoring Formula
7//! ```text
8//! final_score = Σ(weight_i × normalized_score_i) + priority_boost + template_boost
9//! ```
10//!
11//! Where component scores include:
12//! - Documentation importance (doc_score)
13//! - README prioritization (readme_score)
14//! - Import graph centrality (import_score)
15//! - Path depth penalty (path_score)
16//! - Test-code relationships (test_link_score)
17//! - Git churn recency (churn_score)
18//! - PageRank centrality (centrality_score, V2)
19//! - Entrypoint detection (entrypoint_score)
20//! - Examples detection (examples_score)
21
22use super::{import_analysis::ImportGraph, ScanResult};
23use scribe_core::Result;
24use std::collections::HashMap;
25
26// Public modules
27pub mod final_scoring;
28pub mod normalization;
29pub mod types;
30
31// Re-export main types
32pub use normalization::{NormalizationStats, NormalizedScores};
33pub use types::{HeuristicWeights, ScoreComponents, ScoringFeatures};
34
35/// Main heuristic scorer that coordinates all scoring components
36#[derive(Debug)]
37pub struct HeuristicScorer {
38    weights: HeuristicWeights,
39    import_graph: Option<ImportGraph>,
40    norm_stats: Option<NormalizationStats>,
41}
42
43impl HeuristicScorer {
44    /// Create a new scorer with given weights
45    pub fn new(weights: HeuristicWeights) -> Self {
46        Self {
47            weights,
48            import_graph: None,
49            norm_stats: None,
50        }
51    }
52
53    /// Create a scorer with default weights
54    pub fn default() -> Self {
55        Self::new(HeuristicWeights::default())
56    }
57
58    /// Set the import graph for centrality calculations
59    pub fn set_import_graph(&mut self, graph: ImportGraph) {
60        self.import_graph = Some(graph);
61    }
62
63    /// Score a single file within the context of all files
64    pub fn score_file<T>(&mut self, file: &T, all_files: &[T]) -> Result<ScoreComponents>
65    where
66        T: ScanResult,
67    {
68        // Build normalization statistics if not cached
69        if self.norm_stats.is_none() {
70            self.norm_stats = Some(normalization::build_normalization_stats(all_files));
71        }
72
73        let norm_stats = self.norm_stats.as_ref().unwrap();
74        let normalized_scores =
75            normalization::normalize_scores(file, norm_stats, &self.weights.features);
76
77        // Calculate template boost
78        let template_boost = if self.weights.features.enable_template_boost {
79            super::template_detection::get_template_score_boost(file.path()).unwrap_or(0.0)
80        } else {
81            0.0
82        };
83
84        // Apply weighted formula
85        let final_score = final_scoring::calculate_final_score(
86            &normalized_scores,
87            &self.weights,
88            template_boost,
89            file.priority_boost(),
90        );
91
92        Ok(ScoreComponents {
93            final_score,
94            doc_score: normalized_scores.doc_score,
95            readme_score: normalized_scores.readme_score,
96            import_score: normalized_scores.import_score,
97            path_score: normalized_scores.path_score,
98            test_link_score: normalized_scores.test_link_score,
99            churn_score: normalized_scores.churn_score,
100            centrality_score: normalized_scores.centrality_score,
101            entrypoint_score: normalized_scores.entrypoint_score,
102            examples_score: normalized_scores.examples_score,
103            priority_boost: file.priority_boost(),
104            template_boost,
105            weights: self.weights.clone(),
106        })
107    }
108
109    /// Score all files and return ranked results
110    pub fn score_all_files<T>(&mut self, files: &[T]) -> Result<Vec<(usize, ScoreComponents)>>
111    where
112        T: ScanResult,
113    {
114        let mut scored_files = Vec::new();
115
116        for (idx, file) in files.iter().enumerate() {
117            let score = self.score_file(file, files)?;
118            scored_files.push((idx, score));
119        }
120
121        // Sort by final score (descending)
122        scored_files.sort_by(|a, b| {
123            b.1.final_score
124                .partial_cmp(&a.1.final_score)
125                .unwrap_or(std::cmp::Ordering::Equal)
126        });
127
128        Ok(scored_files)
129    }
130
131    /// Score files with custom weights for specific use cases
132    pub fn score_with_preset<T>(
133        &mut self,
134        files: &[T],
135        preset: WeightPreset,
136    ) -> Result<Vec<(usize, ScoreComponents)>>
137    where
138        T: ScanResult,
139    {
140        // Temporarily change weights
141        let original_weights = self.weights.clone();
142        self.weights = match preset {
143            WeightPreset::Documentation => HeuristicWeights::for_documentation(),
144            WeightPreset::CoreCode => HeuristicWeights::for_core_code(),
145            WeightPreset::Tests => HeuristicWeights::for_tests(),
146            WeightPreset::Balanced => HeuristicWeights::balanced(),
147        };
148
149        // Clear cached normalization stats since weights changed
150        self.norm_stats = None;
151
152        let result = self.score_all_files(files);
153
154        // Restore original weights
155        self.weights = original_weights;
156        self.norm_stats = None;
157
158        result
159    }
160
161    /// Get current weights
162    pub fn weights(&self) -> &HeuristicWeights {
163        &self.weights
164    }
165
166    /// Update weights
167    pub fn set_weights(&mut self, weights: HeuristicWeights) {
168        self.weights = weights;
169        self.norm_stats = None; // Clear cache
170    }
171}
172
173/// Preset weight configurations for common use cases
174#[derive(Debug, Clone, Copy)]
175pub enum WeightPreset {
176    Documentation,
177    CoreCode,
178    Tests,
179    Balanced,
180}
181
182impl Default for HeuristicScorer {
183    fn default() -> Self {
184        Self::new(HeuristicWeights::default())
185    }
186}
187
188#[cfg(test)]
189mod tests {
190    use super::*;
191    use std::path::PathBuf;
192
193    // Mock implementation for testing
194    struct MockFile {
195        path: String,
196        is_docs: bool,
197        is_readme: bool,
198        depth: usize,
199        priority_boost: f64,
200    }
201
202    impl ScanResult for MockFile {
203        fn path(&self) -> &str {
204            &self.path
205        }
206        fn relative_path(&self) -> &str {
207            &self.path
208        }
209        fn depth(&self) -> usize {
210            self.depth
211        }
212        fn is_docs(&self) -> bool {
213            self.is_docs
214        }
215        fn is_readme(&self) -> bool {
216            self.is_readme
217        }
218        fn is_test(&self) -> bool {
219            false
220        }
221        fn is_entrypoint(&self) -> bool {
222            false
223        }
224        fn has_examples(&self) -> bool {
225            false
226        }
227        fn priority_boost(&self) -> f64 {
228            self.priority_boost
229        }
230        fn churn_score(&self) -> f64 {
231            0.0
232        }
233        fn centrality_in(&self) -> f64 {
234            0.0
235        }
236        fn imports(&self) -> Option<&[String]> {
237            None
238        }
239        fn doc_analysis(&self) -> Option<&crate::heuristics::DocumentAnalysis> {
240            None
241        }
242    }
243
244    #[test]
245    fn test_basic_scoring() {
246        let mut scorer = HeuristicScorer::default();
247
248        let files = vec![
249            MockFile {
250                path: "README.md".to_string(),
251                is_docs: false,
252                is_readme: true,
253                depth: 1,
254                priority_boost: 0.0,
255            },
256            MockFile {
257                path: "src/lib.rs".to_string(),
258                is_docs: false,
259                is_readme: false,
260                depth: 2,
261                priority_boost: 0.0,
262            },
263        ];
264
265        let scores = scorer.score_all_files(&files).unwrap();
266        assert_eq!(scores.len(), 2);
267
268        // README should score higher
269        assert!(scores[0].1.final_score > 0.0);
270    }
271
272    #[test]
273    fn test_weight_presets() {
274        let mut scorer = HeuristicScorer::default();
275
276        let files = vec![MockFile {
277            path: "README.md".to_string(),
278            is_docs: false,
279            is_readme: true,
280            depth: 1,
281            priority_boost: 0.0,
282        }];
283
284        let doc_scores = scorer
285            .score_with_preset(&files, WeightPreset::Documentation)
286            .unwrap();
287        let core_scores = scorer
288            .score_with_preset(&files, WeightPreset::CoreCode)
289            .unwrap();
290
291        // Documentation preset should give higher scores to README
292        assert!(doc_scores[0].1.final_score >= core_scores[0].1.final_score);
293    }
294
295    #[test]
296    fn test_score_explanation() {
297        let mut scorer = HeuristicScorer::default();
298
299        let files = vec![MockFile {
300            path: "README.md".to_string(),
301            is_docs: false,
302            is_readme: true,
303            depth: 1,
304            priority_boost: 1.0,
305        }];
306
307        let score = scorer.score_file(&files[0], &files).unwrap();
308        let explanation = score.explanation();
309
310        assert!(explanation.contains("Score:"));
311        assert!(!score.primary_importance_reason().is_empty());
312    }
313}