1use std::collections::HashMap;
22use scribe_core::Result;
23use super::{ScanResult, import_analysis::ImportGraph};
24
25#[derive(Debug, Clone)]
27pub struct ScoreComponents {
28 pub final_score: f64,
30
31 pub doc_score: f64,
33 pub readme_score: f64,
34 pub import_score: f64,
35 pub path_score: f64,
36 pub test_link_score: f64,
37 pub churn_score: f64,
38 pub centrality_score: f64,
39 pub entrypoint_score: f64,
40 pub examples_score: f64,
41
42 pub priority_boost: f64,
44 pub template_boost: f64,
45
46 pub raw_scores: RawScoreComponents,
48
49 pub weights: HeuristicWeights,
51}
52
53#[derive(Debug, Clone)]
55pub struct RawScoreComponents {
56 pub doc_raw: f64,
57 pub readme_raw: f64,
58 pub import_degree_in: usize,
59 pub import_degree_out: usize,
60 pub path_depth: usize,
61 pub test_links_found: usize,
62 pub churn_commits: usize,
63 pub centrality_raw: f64,
64 pub is_entrypoint: bool,
65 pub examples_count: usize,
66}
67
68#[derive(Debug, Clone)]
70pub struct HeuristicWeights {
71 pub doc_weight: f64,
72 pub readme_weight: f64,
73 pub import_weight: f64,
74 pub path_weight: f64,
75 pub test_link_weight: f64,
76 pub churn_weight: f64,
77 pub centrality_weight: f64, pub entrypoint_weight: f64,
79 pub examples_weight: f64,
80
81 pub features: ScoringFeatures,
83}
84
85#[derive(Debug, Clone)]
87pub struct ScoringFeatures {
88 pub enable_centrality: bool,
90 pub enable_template_boost: bool,
92 pub enable_doc_analysis: bool,
94 pub enable_test_linking: bool,
96 pub enable_churn_analysis: bool,
98 pub enable_examples_detection: bool,
100}
101
102impl Default for HeuristicWeights {
103 fn default() -> Self {
105 Self {
106 doc_weight: 0.15, readme_weight: 0.20, import_weight: 0.20, path_weight: 0.10, test_link_weight: 0.10, churn_weight: 0.15, centrality_weight: 0.0, entrypoint_weight: 0.05, examples_weight: 0.05, features: ScoringFeatures::v1(),
117 }
118 }
119}
120
121impl HeuristicWeights {
122 pub fn with_v2_features() -> Self {
124 Self {
125 doc_weight: 0.12,
126 readme_weight: 0.18,
127 import_weight: 0.15,
128 path_weight: 0.08,
129 test_link_weight: 0.08,
130 churn_weight: 0.12,
131 centrality_weight: 0.12, entrypoint_weight: 0.08,
133 examples_weight: 0.07,
134
135 features: ScoringFeatures::v2(),
136 }
137 }
138
139 pub fn normalize(&mut self) {
141 let total = self.doc_weight + self.readme_weight + self.import_weight +
142 self.path_weight + self.test_link_weight + self.churn_weight +
143 self.centrality_weight + self.entrypoint_weight + self.examples_weight;
144
145 if total > 0.0 {
146 self.doc_weight /= total;
147 self.readme_weight /= total;
148 self.import_weight /= total;
149 self.path_weight /= total;
150 self.test_link_weight /= total;
151 self.churn_weight /= total;
152 self.centrality_weight /= total;
153 self.entrypoint_weight /= total;
154 self.examples_weight /= total;
155 }
156 }
157
158 pub fn active_weight_sum(&self) -> f64 {
160 let mut sum = self.doc_weight + self.readme_weight + self.import_weight +
161 self.path_weight + self.test_link_weight + self.churn_weight +
162 self.entrypoint_weight + self.examples_weight;
163
164 if self.features.enable_centrality {
165 sum += self.centrality_weight;
166 }
167
168 sum
169 }
170}
171
172impl Default for ScoringFeatures {
173 fn default() -> Self {
174 Self::v1()
175 }
176}
177
178impl ScoringFeatures {
179 pub fn v1() -> Self {
181 Self {
182 enable_centrality: false,
183 enable_template_boost: true,
184 enable_doc_analysis: true,
185 enable_test_linking: true,
186 enable_churn_analysis: true,
187 enable_examples_detection: true,
188 }
189 }
190
191 pub fn v2() -> Self {
193 Self {
194 enable_centrality: true,
195 enable_template_boost: true,
196 enable_doc_analysis: true,
197 enable_test_linking: true,
198 enable_churn_analysis: true,
199 enable_examples_detection: true,
200 }
201 }
202
203 pub fn minimal() -> Self {
205 Self {
206 enable_centrality: false,
207 enable_template_boost: false,
208 enable_doc_analysis: false,
209 enable_test_linking: false,
210 enable_churn_analysis: false,
211 enable_examples_detection: false,
212 }
213 }
214}
215
216#[derive(Debug)]
218pub struct HeuristicScorer {
219 weights: HeuristicWeights,
221 import_graph: Option<ImportGraph>,
223 norm_stats: Option<NormalizationStats>,
225}
226
227#[derive(Debug, Clone)]
229struct NormalizationStats {
230 doc_max: f64,
231 import_in_max: f64,
232 import_out_max: f64,
233 path_max: f64,
234 test_links_max: f64,
235 churn_max: f64,
236 centrality_max: f64,
237 examples_max: f64,
238}
239
240impl HeuristicScorer {
241 pub fn new(weights: HeuristicWeights) -> Self {
243 Self {
244 weights,
245 import_graph: None,
246 norm_stats: None,
247 }
248 }
249
250 pub fn set_import_graph(&mut self, graph: ImportGraph) {
252 self.import_graph = Some(graph);
253 }
254
255 pub fn score_file<T>(&mut self, file: &T, all_files: &[T]) -> Result<ScoreComponents>
257 where
258 T: ScanResult,
259 {
260 if self.norm_stats.is_none() {
262 self.norm_stats = Some(self.build_normalization_stats(all_files));
263 }
264
265 let norm_stats = self.norm_stats.as_ref().unwrap();
266 let raw_scores = self.calculate_raw_scores(file);
267 let normalized_scores = self.normalize_scores(&raw_scores, norm_stats);
268
269 let template_boost = if self.weights.features.enable_template_boost {
271 super::template_detection::get_template_score_boost(file.path()).unwrap_or(0.0)
272 } else {
273 0.0
274 };
275
276 let final_score = self.calculate_final_score(&normalized_scores, template_boost, file.priority_boost());
278
279 Ok(ScoreComponents {
280 final_score,
281 doc_score: normalized_scores.doc_score,
282 readme_score: normalized_scores.readme_score,
283 import_score: normalized_scores.import_score,
284 path_score: normalized_scores.path_score,
285 test_link_score: normalized_scores.test_link_score,
286 churn_score: normalized_scores.churn_score,
287 centrality_score: normalized_scores.centrality_score,
288 entrypoint_score: normalized_scores.entrypoint_score,
289 examples_score: normalized_scores.examples_score,
290 priority_boost: file.priority_boost(),
291 template_boost,
292 raw_scores,
293 weights: self.weights.clone(),
294 })
295 }
296
297 pub fn score_all_files<T>(&mut self, files: &[T]) -> Result<Vec<(usize, ScoreComponents)>>
299 where
300 T: ScanResult,
301 {
302 let mut scored_files = Vec::new();
303
304 for (idx, file) in files.iter().enumerate() {
305 let score = self.score_file(file, files)?;
306 scored_files.push((idx, score));
307 }
308
309 scored_files.sort_by(|a, b| b.1.final_score.partial_cmp(&a.1.final_score).unwrap_or(std::cmp::Ordering::Equal));
311
312 Ok(scored_files)
313 }
314
315 fn calculate_raw_scores<T>(&self, file: &T) -> RawScoreComponents
317 where
318 T: ScanResult,
319 {
320 let doc_raw = if file.is_docs() { 1.0 } else { 0.0 } +
322 if let Some(doc_analysis) = file.doc_analysis() {
323 doc_analysis.structure_score()
324 } else {
325 0.0
326 };
327
328 let readme_raw = if file.is_readme() {
330 if file.depth() <= 1 { 1.5 } else { 1.0 }
332 } else {
333 0.0
334 };
335
336 let (import_degree_in, import_degree_out) = if let Some(graph) = &self.import_graph {
338 graph.get_node_degrees(file.path()).unwrap_or((0, 0))
339 } else {
340 let import_count = file.imports().map(|imports| imports.len()).unwrap_or(0);
342 (0, import_count) };
344
345 let path_depth = file.depth();
347
348 let test_links_found = if self.weights.features.enable_test_linking {
350 self.count_test_links(file)
351 } else {
352 0
353 };
354
355 let churn_commits = if self.weights.features.enable_churn_analysis {
357 (file.churn_score() * 10.0) as usize } else {
359 0
360 };
361
362 let centrality_raw = if self.weights.features.enable_centrality {
364 file.centrality_in()
365 } else {
366 0.0
367 };
368
369 let is_entrypoint = file.is_entrypoint();
371
372 let examples_count = if self.weights.features.enable_examples_detection {
374 self.count_examples(file)
375 } else {
376 0
377 };
378
379 RawScoreComponents {
380 doc_raw,
381 readme_raw,
382 import_degree_in,
383 import_degree_out,
384 path_depth,
385 test_links_found,
386 churn_commits,
387 centrality_raw,
388 is_entrypoint,
389 examples_count,
390 }
391 }
392
393 fn build_normalization_stats<T>(&self, files: &[T]) -> NormalizationStats
395 where
396 T: ScanResult,
397 {
398 let mut doc_max: f64 = 0.0;
399 let mut import_in_max: f64 = 0.0;
400 let mut import_out_max: f64 = 0.0;
401 let mut path_max: f64 = 0.0;
402 let mut test_links_max: f64 = 0.0;
403 let mut churn_max: f64 = 0.0;
404 let mut centrality_max: f64 = 0.0;
405 let mut examples_max: f64 = 0.0;
406
407 for file in files {
408 let raw = self.calculate_raw_scores(file);
409
410 doc_max = doc_max.max(raw.doc_raw);
411 import_in_max = import_in_max.max(raw.import_degree_in as f64);
412 import_out_max = import_out_max.max(raw.import_degree_out as f64);
413 path_max = path_max.max(raw.path_depth as f64);
414 test_links_max = test_links_max.max(raw.test_links_found as f64);
415 churn_max = churn_max.max(raw.churn_commits as f64);
416 centrality_max = centrality_max.max(raw.centrality_raw);
417 examples_max = examples_max.max(raw.examples_count as f64);
418 }
419
420 NormalizationStats {
422 doc_max: doc_max.max(1.0),
423 import_in_max: import_in_max.max(1.0),
424 import_out_max: import_out_max.max(1.0),
425 path_max: path_max.max(1.0),
426 test_links_max: test_links_max.max(1.0),
427 churn_max: churn_max.max(1.0),
428 centrality_max: centrality_max.max(1.0),
429 examples_max: examples_max.max(1.0),
430 }
431 }
432
433 fn normalize_scores(&self, raw: &RawScoreComponents, stats: &NormalizationStats) -> NormalizedScores {
435 let doc_score = (raw.doc_raw / stats.doc_max).min(1.0);
437
438 let readme_score = raw.readme_raw.min(1.0);
440
441 let import_in_norm = raw.import_degree_in as f64 / stats.import_in_max;
443 let import_out_norm = raw.import_degree_out as f64 / stats.import_out_max;
444 let import_score = (0.7 * import_in_norm + 0.3 * import_out_norm).min(1.0);
445
446 let path_score = if raw.path_depth == 0 {
448 1.0
449 } else {
450 (1.0 / (raw.path_depth as f64)).min(1.0)
451 };
452
453 let test_link_score = (raw.test_links_found as f64 / stats.test_links_max).min(1.0);
455
456 let churn_score = (raw.churn_commits as f64 / stats.churn_max).min(1.0);
458
459 let centrality_score = if self.weights.features.enable_centrality {
461 (raw.centrality_raw / stats.centrality_max).min(1.0)
462 } else {
463 0.0
464 };
465
466 let entrypoint_score = if raw.is_entrypoint { 1.0 } else { 0.0 };
468
469 let examples_score = (raw.examples_count as f64 / stats.examples_max).min(1.0);
471
472 NormalizedScores {
473 doc_score,
474 readme_score,
475 import_score,
476 path_score,
477 test_link_score,
478 churn_score,
479 centrality_score,
480 entrypoint_score,
481 examples_score,
482 }
483 }
484
485 fn calculate_final_score(&self, scores: &NormalizedScores, template_boost: f64, priority_boost: f64) -> f64 {
487 let weighted_sum =
488 self.weights.doc_weight * scores.doc_score +
489 self.weights.readme_weight * scores.readme_score +
490 self.weights.import_weight * scores.import_score +
491 self.weights.path_weight * scores.path_score +
492 self.weights.test_link_weight * scores.test_link_score +
493 self.weights.churn_weight * scores.churn_score +
494 self.weights.centrality_weight * scores.centrality_score +
495 self.weights.entrypoint_weight * scores.entrypoint_score +
496 self.weights.examples_weight * scores.examples_score;
497
498 weighted_sum + template_boost + priority_boost
499 }
500
501 fn count_test_links<T>(&self, file: &T) -> usize
503 where
504 T: ScanResult,
505 {
506 if file.is_test() {
507 return 0; }
509
510 let path = file.path();
511 let mut links = 0;
512
513 let base_name = std::path::Path::new(path)
515 .file_stem()
516 .and_then(|s| s.to_str())
517 .unwrap_or("");
518
519 let test_patterns = [
521 format!("{}_test", base_name),
522 format!("test_{}", base_name),
523 format!("{}.test", base_name),
524 format!("{}_spec", base_name),
525 format!("spec_{}", base_name),
526 ];
527
528 for _pattern in &test_patterns {
530 if base_name.len() > 5 && !base_name.starts_with("test") {
533 links += 1;
534 break;
535 }
536 }
537
538 links
539 }
540
541 fn count_examples<T>(&self, file: &T) -> usize
543 where
544 T: ScanResult,
545 {
546 if !file.has_examples() {
547 return 0;
548 }
549
550 let path = file.path().to_lowercase();
552 let mut count = 0;
553
554 if path.contains("example") || path.contains("demo") || path.contains("sample") {
555 count += 2;
556 }
557
558 if path.contains("tutorial") || path.contains("guide") {
559 count += 1;
560 }
561
562 count
563 }
564}
565
566#[derive(Debug)]
568struct NormalizedScores {
569 pub doc_score: f64,
570 pub readme_score: f64,
571 pub import_score: f64,
572 pub path_score: f64,
573 pub test_link_score: f64,
574 pub churn_score: f64,
575 pub centrality_score: f64,
576 pub entrypoint_score: f64,
577 pub examples_score: f64,
578}
579
580impl ScoreComponents {
581 pub fn total_score(&self) -> f64 {
583 self.final_score
584 }
585
586 pub fn as_map(&self) -> HashMap<String, f64> {
588 let mut map = HashMap::new();
589 map.insert("doc_score".to_string(), self.doc_score);
590 map.insert("readme_score".to_string(), self.readme_score);
591 map.insert("import_score".to_string(), self.import_score);
592 map.insert("path_score".to_string(), self.path_score);
593 map.insert("test_link_score".to_string(), self.test_link_score);
594 map.insert("churn_score".to_string(), self.churn_score);
595 map.insert("centrality_score".to_string(), self.centrality_score);
596 map.insert("entrypoint_score".to_string(), self.entrypoint_score);
597 map.insert("examples_score".to_string(), self.examples_score);
598 map.insert("priority_boost".to_string(), self.priority_boost);
599 map.insert("template_boost".to_string(), self.template_boost);
600 map.insert("final_score".to_string(), self.final_score);
601 map
602 }
603
604 pub fn dominant_component(&self) -> (&'static str, f64) {
606 let components = [
607 ("doc", self.doc_score),
608 ("readme", self.readme_score),
609 ("import", self.import_score),
610 ("path", self.path_score),
611 ("test_link", self.test_link_score),
612 ("churn", self.churn_score),
613 ("centrality", self.centrality_score),
614 ("entrypoint", self.entrypoint_score),
615 ("examples", self.examples_score),
616 ];
617
618 components.iter()
619 .max_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal))
620 .map(|(name, score)| (*name, *score))
621 .unwrap_or(("none", 0.0))
622 }
623}
624
625#[cfg(test)]
626mod tests {
627 use super::*;
628 use super::super::DocumentAnalysis;
629
630 #[derive(Debug)]
632 struct MockScanResult {
633 path: String,
634 relative_path: String,
635 depth: usize,
636 is_docs: bool,
637 is_readme: bool,
638 is_test: bool,
639 is_entrypoint: bool,
640 has_examples: bool,
641 priority_boost: f64,
642 churn_score: f64,
643 centrality_in: f64,
644 imports: Option<Vec<String>>,
645 doc_analysis: Option<DocumentAnalysis>,
646 }
647
648 impl MockScanResult {
649 fn new(path: &str) -> Self {
650 Self {
651 path: path.to_string(),
652 relative_path: path.to_string(),
653 depth: path.matches('/').count(),
654 is_docs: path.contains("doc") || path.ends_with(".md"),
655 is_readme: path.to_lowercase().contains("readme"),
656 is_test: path.contains("test") || path.contains("spec"),
657 is_entrypoint: path.contains("main") || path.contains("index"),
658 has_examples: path.contains("example") || path.contains("demo"),
659 priority_boost: 0.0,
660 churn_score: 0.5,
661 centrality_in: 0.3,
662 imports: Some(vec!["std::collections::HashMap".to_string()]),
663 doc_analysis: Some(DocumentAnalysis::new()),
664 }
665 }
666 }
667
668 impl ScanResult for MockScanResult {
669 fn path(&self) -> &str { &self.path }
670 fn relative_path(&self) -> &str { &self.relative_path }
671 fn depth(&self) -> usize { self.depth }
672 fn is_docs(&self) -> bool { self.is_docs }
673 fn is_readme(&self) -> bool { self.is_readme }
674 fn is_test(&self) -> bool { self.is_test }
675 fn is_entrypoint(&self) -> bool { self.is_entrypoint }
676 fn has_examples(&self) -> bool { self.has_examples }
677 fn priority_boost(&self) -> f64 { self.priority_boost }
678 fn churn_score(&self) -> f64 { self.churn_score }
679 fn centrality_in(&self) -> f64 { self.centrality_in }
680 fn imports(&self) -> Option<&[String]> { self.imports.as_deref() }
681 fn doc_analysis(&self) -> Option<&DocumentAnalysis> { self.doc_analysis.as_ref() }
682 }
683
684 #[test]
685 fn test_scorer_creation() {
686 let weights = HeuristicWeights::default();
687 let scorer = HeuristicScorer::new(weights);
688
689 assert!(scorer.weights.doc_weight > 0.0);
690 assert!(scorer.weights.readme_weight > 0.0);
691 }
692
693 #[test]
694 fn test_v1_vs_v2_weights() {
695 let v1 = HeuristicWeights::default();
696 let v2 = HeuristicWeights::with_v2_features();
697
698 assert_eq!(v1.centrality_weight, 0.0);
700 assert!(!v1.features.enable_centrality);
701
702 assert!(v2.centrality_weight > 0.0);
704 assert!(v2.features.enable_centrality);
705 }
706
707 #[test]
708 fn test_weight_normalization() {
709 let mut weights = HeuristicWeights {
710 doc_weight: 2.0,
711 readme_weight: 3.0,
712 import_weight: 1.0,
713 path_weight: 1.0,
714 test_link_weight: 1.0,
715 churn_weight: 1.0,
716 centrality_weight: 1.0,
717 entrypoint_weight: 1.0,
718 examples_weight: 1.0,
719 features: ScoringFeatures::v2(),
720 };
721
722 weights.normalize();
723
724 let total = weights.doc_weight + weights.readme_weight + weights.import_weight +
725 weights.path_weight + weights.test_link_weight + weights.churn_weight +
726 weights.centrality_weight + weights.entrypoint_weight + weights.examples_weight;
727
728 assert!((total - 1.0).abs() < 0.001);
729 }
730
731 #[test]
732 fn test_file_scoring() {
733 let weights = HeuristicWeights::default();
734 let mut scorer = HeuristicScorer::new(weights);
735
736 let files = vec![
737 MockScanResult::new("README.md"),
738 MockScanResult::new("src/main.rs"),
739 MockScanResult::new("src/lib/deep/nested.rs"),
740 MockScanResult::new("examples/demo.rs"),
741 MockScanResult::new("tests/unit_test.rs"),
742 ];
743
744 let result = scorer.score_file(&files[0], &files);
745 assert!(result.is_ok());
746
747 let score = result.unwrap();
748 assert!(score.final_score > 0.0);
749 assert!(score.readme_score > 0.0); }
751
752 #[test]
753 fn test_score_all_files() {
754 let weights = HeuristicWeights::default();
755 let mut scorer = HeuristicScorer::new(weights);
756
757 let files = vec![
758 MockScanResult::new("README.md"),
759 MockScanResult::new("src/main.rs"),
760 MockScanResult::new("src/lib/utils.rs"),
761 ];
762
763 let result = scorer.score_all_files(&files);
764 assert!(result.is_ok());
765
766 let scored = result.unwrap();
767 assert_eq!(scored.len(), 3);
768
769 if scored.len() > 1 {
771 assert!(scored[0].1.final_score >= scored[1].1.final_score);
772 }
773 }
774
775 #[test]
776 fn test_score_components_map() {
777 let score = ScoreComponents {
778 final_score: 0.85,
779 doc_score: 0.1,
780 readme_score: 0.8,
781 import_score: 0.3,
782 path_score: 0.5,
783 test_link_score: 0.2,
784 churn_score: 0.4,
785 centrality_score: 0.0,
786 entrypoint_score: 0.0,
787 examples_score: 0.0,
788 priority_boost: 0.0,
789 template_boost: 0.05,
790 raw_scores: RawScoreComponents {
791 doc_raw: 1.0,
792 readme_raw: 1.0,
793 import_degree_in: 3,
794 import_degree_out: 5,
795 path_depth: 1,
796 test_links_found: 2,
797 churn_commits: 10,
798 centrality_raw: 0.0,
799 is_entrypoint: false,
800 examples_count: 0,
801 },
802 weights: HeuristicWeights::default(),
803 };
804
805 let map = score.as_map();
806 assert_eq!(map["final_score"], 0.85);
807 assert_eq!(map["readme_score"], 0.8);
808
809 let (dominant, _) = score.dominant_component();
810 assert_eq!(dominant, "readme");
811 }
812
813 #[test]
814 fn test_scoring_features() {
815 let v1_features = ScoringFeatures::v1();
816 assert!(!v1_features.enable_centrality);
817 assert!(v1_features.enable_template_boost);
818
819 let v2_features = ScoringFeatures::v2();
820 assert!(v2_features.enable_centrality);
821 assert!(v2_features.enable_template_boost);
822
823 let minimal_features = ScoringFeatures::minimal();
824 assert!(!minimal_features.enable_centrality);
825 assert!(!minimal_features.enable_template_boost);
826 }
827}