scribe_analysis/heuristics/scoring/
normalization.rs1use super::types::{RawScoreComponents, HeuristicWeights};
4use super::super::ScanResult;
5
6#[derive(Debug, Clone)]
8pub struct NormalizedScores {
9 pub doc_score: f64,
10 pub readme_score: f64,
11 pub import_score: f64,
12 pub path_score: f64,
13 pub test_link_score: f64,
14 pub churn_score: f64,
15 pub centrality_score: f64,
16 pub entrypoint_score: f64,
17 pub examples_score: f64,
18}
19
20#[derive(Debug, Clone)]
22pub struct NormalizationStats {
23 pub max_doc_raw: f64,
24 pub max_readme_raw: f64,
25 pub max_import_degree_in: usize,
26 pub max_import_degree_out: usize,
27 pub max_path_depth: usize,
28 pub max_test_links: usize,
29 pub max_churn_commits: usize,
30 pub max_centrality_raw: f64,
31 pub max_examples_count: usize,
32}
33
34pub fn build_normalization_stats<T>(files: &[T]) -> NormalizationStats
36where
37 T: ScanResult,
38{
39 let mut stats = NormalizationStats {
40 max_doc_raw: 0.0,
41 max_readme_raw: 0.0,
42 max_import_degree_in: 0,
43 max_import_degree_out: 0,
44 max_path_depth: 0,
45 max_test_links: 0,
46 max_churn_commits: 0,
47 max_centrality_raw: 0.0,
48 max_examples_count: 0,
49 };
50
51 for file in files {
52 if file.is_docs() {
54 stats.max_doc_raw = stats.max_doc_raw.max(1.0);
55 if let Some(doc_analysis) = file.doc_analysis() {
56 stats.max_doc_raw = stats.max_doc_raw.max(doc_analysis.structure_score());
57 }
58 }
59
60 if file.is_readme() {
62 let readme_score = if file.depth() <= 1 { 1.5 } else { 1.0 };
63 stats.max_readme_raw = stats.max_readme_raw.max(readme_score);
64 }
65
66 stats.max_path_depth = stats.max_path_depth.max(file.depth());
68
69 if file.is_test() {
71 stats.max_test_links = stats.max_test_links.max(1);
72 }
73
74 let churn_score = file.churn_score() as usize;
76 stats.max_churn_commits = stats.max_churn_commits.max(churn_score);
77
78 let examples_count = count_examples_in_file(file);
80 stats.max_examples_count = stats.max_examples_count.max(examples_count);
81 }
82
83 stats.max_doc_raw = stats.max_doc_raw.max(1.0);
85 stats.max_readme_raw = stats.max_readme_raw.max(1.0);
86 stats.max_import_degree_in = stats.max_import_degree_in.max(1);
87 stats.max_import_degree_out = stats.max_import_degree_out.max(1);
88 stats.max_path_depth = stats.max_path_depth.max(1);
89 stats.max_test_links = stats.max_test_links.max(1);
90 stats.max_churn_commits = stats.max_churn_commits.max(1);
91 stats.max_centrality_raw = stats.max_centrality_raw.max(0.1);
92 stats.max_examples_count = stats.max_examples_count.max(1);
93
94 stats
95}
96
97pub fn normalize_scores(
99 raw_scores: &RawScoreComponents,
100 stats: &NormalizationStats
101) -> NormalizedScores {
102 NormalizedScores {
103 doc_score: raw_scores.doc_raw / stats.max_doc_raw,
104 readme_score: raw_scores.readme_raw / stats.max_readme_raw,
105 import_score: calculate_import_score(raw_scores, stats),
106 path_score: calculate_path_score(raw_scores, stats),
107 test_link_score: raw_scores.test_links_found as f64 / stats.max_test_links as f64,
108 churn_score: raw_scores.churn_commits as f64 / stats.max_churn_commits as f64,
109 centrality_score: raw_scores.centrality_raw / stats.max_centrality_raw,
110 entrypoint_score: if raw_scores.is_entrypoint { 1.0 } else { 0.0 },
111 examples_score: raw_scores.examples_count as f64 / stats.max_examples_count as f64,
112 }
113}
114
115fn calculate_import_score(raw_scores: &RawScoreComponents, stats: &NormalizationStats) -> f64 {
117 let in_score = raw_scores.import_degree_in as f64 / stats.max_import_degree_in as f64;
118 let out_score = raw_scores.import_degree_out as f64 / stats.max_import_degree_out as f64;
119
120 0.7 * in_score + 0.3 * out_score
122}
123
124fn calculate_path_score(raw_scores: &RawScoreComponents, stats: &NormalizationStats) -> f64 {
126 1.0 - (raw_scores.path_depth as f64 / stats.max_path_depth as f64)
128}
129
130fn count_examples_in_file<T: ScanResult>(file: &T) -> usize {
132 if file.has_examples() { 1 } else { 0 }
134}
135
136#[cfg(test)]
137mod tests {
138 use super::*;
139 use std::path::PathBuf;
140
141 struct MockFile {
143 path: String,
144 is_docs: bool,
145 is_readme: bool,
146 depth: usize,
147 content: Option<String>,
148 }
149
150 impl ScanResult for MockFile {
151 fn path(&self) -> &str { &self.path }
152 fn relative_path(&self) -> &str { &self.path }
153 fn depth(&self) -> usize { self.depth }
154 fn is_docs(&self) -> bool { self.is_docs }
155 fn is_readme(&self) -> bool { self.is_readme }
156 fn is_test(&self) -> bool { false }
157 fn is_entrypoint(&self) -> bool { false }
158 fn has_examples(&self) -> bool { false }
159 fn priority_boost(&self) -> f64 { 0.0 }
160 fn churn_score(&self) -> f64 { 0.0 }
161 fn centrality_in(&self) -> f64 { 0.0 }
162 fn imports(&self) -> Option<&[String]> { None }
163 fn doc_analysis(&self) -> Option<&crate::heuristics::DocumentAnalysis> { None }
164 }
165
166 #[test]
167 fn test_normalization_stats() {
168 let files = vec![
169 MockFile {
170 path: "README.md".to_string(),
171 is_docs: false,
172 is_readme: true,
173 depth: 1,
174 content: None,
175 },
176 MockFile {
177 path: "docs/guide.md".to_string(),
178 is_docs: true,
179 is_readme: false,
180 depth: 2,
181 content: None,
182 },
183 ];
184
185 let stats = build_normalization_stats(&files);
186 assert!(stats.max_readme_raw > 0.0);
187 assert!(stats.max_doc_raw > 0.0);
188 assert_eq!(stats.max_path_depth, 2);
189 }
190
191 #[test]
192 fn test_path_score_inversion() {
193 let raw_scores = RawScoreComponents {
194 doc_raw: 0.0,
195 readme_raw: 0.0,
196 import_degree_in: 0,
197 import_degree_out: 0,
198 path_depth: 3, test_links_found: 0,
200 churn_commits: 0,
201 centrality_raw: 0.0,
202 is_entrypoint: false,
203 examples_count: 0,
204 };
205
206 let stats = NormalizationStats {
207 max_doc_raw: 1.0,
208 max_readme_raw: 1.0,
209 max_import_degree_in: 1,
210 max_import_degree_out: 1,
211 max_path_depth: 5, max_test_links: 1,
213 max_churn_commits: 1,
214 max_centrality_raw: 1.0,
215 max_examples_count: 1,
216 };
217
218 let path_score = calculate_path_score(&raw_scores, &stats);
219 assert!((path_score - 0.4).abs() < 0.01);
221 }
222}