scribe_analysis/heuristics/
mod.rs1pub mod template_detection;
24pub mod scoring;
25pub mod import_analysis;
26
27pub use scoring::{
28 HeuristicScorer,
29 ScoreComponents,
30 HeuristicWeights,
31 ScoringFeatures,
32};
33
34pub use template_detection::{
35 TemplateDetector,
36 TemplateEngine,
37 TemplateDetectionMethod,
38 is_template_file,
39 get_template_score_boost,
40};
41
42pub use import_analysis::{
43 ImportGraphBuilder,
44 ImportGraph,
45 CentralityCalculator,
46 import_matches_file,
47};
48
49use scribe_core::Result;
50use std::collections::HashMap;
51
52#[derive(Debug)]
54pub struct HeuristicSystem {
55 scorer: HeuristicScorer,
57 template_detector: TemplateDetector,
59 import_builder: ImportGraphBuilder,
61}
62
63impl HeuristicSystem {
64 pub fn new() -> Result<Self> {
66 Ok(Self {
67 scorer: HeuristicScorer::new(HeuristicWeights::default()),
68 template_detector: TemplateDetector::new()?,
69 import_builder: ImportGraphBuilder::new()?,
70 })
71 }
72
73 pub fn with_weights(weights: HeuristicWeights) -> Result<Self> {
75 Ok(Self {
76 scorer: HeuristicScorer::new(weights),
77 template_detector: TemplateDetector::new()?,
78 import_builder: ImportGraphBuilder::new()?,
79 })
80 }
81
82 pub fn with_v2_features() -> Result<Self> {
84 Ok(Self {
85 scorer: HeuristicScorer::new(HeuristicWeights::with_v2_features()),
86 template_detector: TemplateDetector::new()?,
87 import_builder: ImportGraphBuilder::new()?,
88 })
89 }
90
91 pub fn score_file<T>(&mut self, file: &T, all_files: &[T]) -> Result<ScoreComponents>
93 where
94 T: ScanResult,
95 {
96 self.scorer.score_file(file, all_files)
97 }
98
99 pub fn score_all_files<T>(&mut self, files: &[T]) -> Result<Vec<(usize, ScoreComponents)>>
101 where
102 T: ScanResult,
103 {
104 self.scorer.score_all_files(files)
105 }
106
107 pub fn get_top_files<T>(&mut self, files: &[T], top_k: usize) -> Result<Vec<(usize, ScoreComponents)>>
109 where
110 T: ScanResult,
111 {
112 Ok(self.score_all_files(files)?.into_iter().take(top_k).collect())
113 }
114
115 pub fn get_template_boost(&self, file_path: &str) -> Result<f64> {
117 self.template_detector.get_score_boost(file_path)
118 }
119
120 pub fn import_matches(&self, import_name: &str, file_path: &str) -> bool {
122 import_analysis::import_matches_file(import_name, file_path)
123 }
124}
125
126impl Default for HeuristicSystem {
127 fn default() -> Self {
128 Self::new().expect("Failed to create HeuristicSystem")
129 }
130}
131
132pub trait ScanResult {
134 fn path(&self) -> &str;
136
137 fn relative_path(&self) -> &str;
139
140 fn depth(&self) -> usize;
142
143 fn is_docs(&self) -> bool;
145
146 fn is_readme(&self) -> bool;
148
149 fn is_test(&self) -> bool;
151
152 fn is_entrypoint(&self) -> bool;
154
155 fn has_examples(&self) -> bool;
157
158 fn priority_boost(&self) -> f64;
160
161 fn churn_score(&self) -> f64;
163
164 fn centrality_in(&self) -> f64;
166
167 fn imports(&self) -> Option<&[String]>;
169
170 fn doc_analysis(&self) -> Option<&DocumentAnalysis>;
172}
173
174#[derive(Debug, Clone)]
176pub struct DocumentAnalysis {
177 pub heading_count: usize,
179 pub toc_indicators: usize,
181 pub link_count: usize,
183 pub code_block_count: usize,
185 pub is_well_structured: bool,
187}
188
189impl DocumentAnalysis {
190 pub fn new() -> Self {
191 Self {
192 heading_count: 0,
193 toc_indicators: 0,
194 link_count: 0,
195 code_block_count: 0,
196 is_well_structured: false,
197 }
198 }
199
200 pub fn structure_score(&self) -> f64 {
202 let mut score = 0.0;
203
204 if self.heading_count > 0 {
206 score += (self.heading_count as f64 / 10.0).min(0.5);
207 }
208
209 if self.toc_indicators > 0 {
211 score += 0.3;
212 }
213
214 if self.link_count > 0 {
216 score += (self.link_count as f64 / 20.0).min(0.3);
217 }
218
219 if self.code_block_count > 0 {
221 score += (self.code_block_count as f64 / 10.0).min(0.2);
222 }
223
224 score
225 }
226}
227
228impl Default for DocumentAnalysis {
229 fn default() -> Self {
230 Self::new()
231 }
232}
233
234#[derive(Debug, Clone)]
236pub struct HeuristicMetrics {
237 pub files_processed: usize,
239 pub processing_time_ms: u64,
241 pub import_graph_time_ms: u64,
243 pub template_detection_time_ms: u64,
245 pub avg_time_per_file_ms: f64,
247 pub cache_hit_rates: HashMap<String, f64>,
249}
250
251impl HeuristicMetrics {
252 pub fn new() -> Self {
253 Self {
254 files_processed: 0,
255 processing_time_ms: 0,
256 import_graph_time_ms: 0,
257 template_detection_time_ms: 0,
258 avg_time_per_file_ms: 0.0,
259 cache_hit_rates: HashMap::new(),
260 }
261 }
262
263 pub fn finalize(&mut self) {
264 if self.files_processed > 0 {
265 self.avg_time_per_file_ms = self.processing_time_ms as f64 / self.files_processed as f64;
266 }
267 }
268}
269
270impl Default for HeuristicMetrics {
271 fn default() -> Self {
272 Self::new()
273 }
274}
275
276#[cfg(test)]
277mod tests {
278 use super::*;
279
280 #[test]
281 fn test_heuristic_system_creation() {
282 let system = HeuristicSystem::new();
283 assert!(system.is_ok());
284
285 let v2_system = HeuristicSystem::with_v2_features();
286 assert!(v2_system.is_ok());
287 }
288
289 #[test]
290 fn test_document_analysis() {
291 let mut doc = DocumentAnalysis::new();
292 doc.heading_count = 5;
293 doc.link_count = 10;
294 doc.code_block_count = 3;
295
296 let score = doc.structure_score();
297 assert!(score > 0.0);
298 assert!(score < 2.0); }
300
301 #[test]
302 fn test_metrics() {
303 let mut metrics = HeuristicMetrics::new();
304 metrics.files_processed = 100;
305 metrics.processing_time_ms = 500;
306 metrics.finalize();
307
308 assert_eq!(metrics.avg_time_per_file_ms, 5.0);
309 }
310}