scribe_analysis/heuristics/
mod.rs1pub mod enhanced_scoring;
24pub mod import_analysis;
25pub mod scoring;
26pub mod template_detection;
27
28pub use scoring::{
29 HeuristicScorer, HeuristicWeights, RawScoreComponents, ScoreComponents, ScoringFeatures,
30 WeightPreset,
31};
32
33pub use template_detection::{
34 get_template_score_boost, is_template_file, TemplateDetectionMethod, TemplateDetector,
35 TemplateEngine,
36};
37
38pub use import_analysis::{
39 import_matches_file, CentralityCalculator, ImportGraph, ImportGraphBuilder,
40};
41
42pub use enhanced_scoring::{
43 AdaptiveFactors, EnhancedHeuristicScorer, EnhancedScoreComponents, EnhancedWeights,
44 ProjectType, RepositoryCharacteristics,
45};
46
47use scribe_core::Result;
48use std::collections::HashMap;
49
50#[derive(Debug)]
52pub struct HeuristicSystem {
53 scorer: HeuristicScorer,
55 template_detector: TemplateDetector,
57 import_builder: ImportGraphBuilder,
59}
60
61impl HeuristicSystem {
62 pub fn new() -> Result<Self> {
64 Ok(Self {
65 scorer: HeuristicScorer::new(HeuristicWeights::default()),
66 template_detector: TemplateDetector::new()?,
67 import_builder: ImportGraphBuilder::new()?,
68 })
69 }
70
71 pub fn with_weights(weights: HeuristicWeights) -> Result<Self> {
73 Ok(Self {
74 scorer: HeuristicScorer::new(weights),
75 template_detector: TemplateDetector::new()?,
76 import_builder: ImportGraphBuilder::new()?,
77 })
78 }
79
80 pub fn with_v2_features() -> Result<Self> {
82 Ok(Self {
83 scorer: HeuristicScorer::new(HeuristicWeights::with_v2_features()),
84 template_detector: TemplateDetector::new()?,
85 import_builder: ImportGraphBuilder::new()?,
86 })
87 }
88
89 pub fn score_file<T>(&mut self, file: &T, all_files: &[T]) -> Result<ScoreComponents>
91 where
92 T: ScanResult,
93 {
94 self.scorer.score_file(file, all_files)
95 }
96
97 pub fn score_all_files<T>(&mut self, files: &[T]) -> Result<Vec<(usize, ScoreComponents)>>
99 where
100 T: ScanResult,
101 {
102 self.scorer.score_all_files(files)
103 }
104
105 pub fn get_top_files<T>(
107 &mut self,
108 files: &[T],
109 top_k: usize,
110 ) -> Result<Vec<(usize, ScoreComponents)>>
111 where
112 T: ScanResult,
113 {
114 Ok(self
115 .score_all_files(files)?
116 .into_iter()
117 .take(top_k)
118 .collect())
119 }
120
121 pub fn get_template_boost(&self, file_path: &str) -> Result<f64> {
123 self.template_detector.get_score_boost(file_path)
124 }
125
126 pub fn import_matches(&self, import_name: &str, file_path: &str) -> bool {
128 import_analysis::import_matches_file(import_name, file_path)
129 }
130}
131
132impl Default for HeuristicSystem {
133 fn default() -> Self {
134 Self::new().expect("Failed to create HeuristicSystem")
135 }
136}
137
138pub trait ScanResult {
140 fn path(&self) -> &str;
142
143 fn relative_path(&self) -> &str;
145
146 fn depth(&self) -> usize;
148
149 fn is_docs(&self) -> bool;
151
152 fn is_readme(&self) -> bool;
154
155 fn is_test(&self) -> bool;
157
158 fn is_entrypoint(&self) -> bool;
160
161 fn has_examples(&self) -> bool;
163
164 fn priority_boost(&self) -> f64;
166
167 fn churn_score(&self) -> f64;
169
170 fn centrality_in(&self) -> f64;
172
173 fn imports(&self) -> Option<&[String]>;
175
176 fn doc_analysis(&self) -> Option<&DocumentAnalysis>;
178}
179
180#[derive(Debug, Clone)]
182pub struct DocumentAnalysis {
183 pub heading_count: usize,
185 pub toc_indicators: usize,
187 pub link_count: usize,
189 pub code_block_count: usize,
191 pub is_well_structured: bool,
193}
194
195impl DocumentAnalysis {
196 pub fn new() -> Self {
197 Self {
198 heading_count: 0,
199 toc_indicators: 0,
200 link_count: 0,
201 code_block_count: 0,
202 is_well_structured: false,
203 }
204 }
205
206 pub fn structure_score(&self) -> f64 {
208 let mut score = 0.0;
209
210 if self.heading_count > 0 {
212 score += (self.heading_count as f64 / 10.0).min(0.5);
213 }
214
215 if self.toc_indicators > 0 {
217 score += 0.3;
218 }
219
220 if self.link_count > 0 {
222 score += (self.link_count as f64 / 20.0).min(0.3);
223 }
224
225 if self.code_block_count > 0 {
227 score += (self.code_block_count as f64 / 10.0).min(0.2);
228 }
229
230 score
231 }
232}
233
234impl Default for DocumentAnalysis {
235 fn default() -> Self {
236 Self::new()
237 }
238}
239
240#[derive(Debug, Clone)]
242pub struct HeuristicMetrics {
243 pub files_processed: usize,
245 pub processing_time_ms: u64,
247 pub import_graph_time_ms: u64,
249 pub template_detection_time_ms: u64,
251 pub avg_time_per_file_ms: f64,
253 pub cache_hit_rates: HashMap<String, f64>,
255}
256
257impl HeuristicMetrics {
258 pub fn new() -> Self {
259 Self {
260 files_processed: 0,
261 processing_time_ms: 0,
262 import_graph_time_ms: 0,
263 template_detection_time_ms: 0,
264 avg_time_per_file_ms: 0.0,
265 cache_hit_rates: HashMap::new(),
266 }
267 }
268
269 pub fn finalize(&mut self) {
270 if self.files_processed > 0 {
271 self.avg_time_per_file_ms =
272 self.processing_time_ms as f64 / self.files_processed as f64;
273 }
274 }
275}
276
277impl Default for HeuristicMetrics {
278 fn default() -> Self {
279 Self::new()
280 }
281}
282
283#[cfg(test)]
284mod tests {
285 use super::*;
286
287 #[test]
288 fn test_heuristic_system_creation() {
289 let system = HeuristicSystem::new();
290 assert!(system.is_ok());
291
292 let v2_system = HeuristicSystem::with_v2_features();
293 assert!(v2_system.is_ok());
294 }
295
296 #[test]
297 fn test_document_analysis() {
298 let mut doc = DocumentAnalysis::new();
299 doc.heading_count = 5;
300 doc.link_count = 10;
301 doc.code_block_count = 3;
302
303 let score = doc.structure_score();
304 assert!(score > 0.0);
305 assert!(score < 2.0); }
307
308 #[test]
309 fn test_metrics() {
310 let mut metrics = HeuristicMetrics::new();
311 metrics.files_processed = 100;
312 metrics.processing_time_ms = 500;
313 metrics.finalize();
314
315 assert_eq!(metrics.avg_time_per_file_ms, 5.0);
316 }
317}