scribe_analysis/heuristics/
mod.rs1pub mod import_analysis;
24pub mod scoring;
25pub mod template_detection;
26
27pub use scoring::{
28 HeuristicScorer, HeuristicWeights, ScoreComponents, ScoringFeatures, WeightPreset,
29};
30
31pub use template_detection::{
32 get_template_score_boost, is_template_file, TemplateDetectionMethod, TemplateDetector,
33 TemplateEngine,
34};
35
36pub use import_analysis::{
37 import_matches_file, CentralityCalculator, ImportGraph, ImportGraphBuilder,
38};
39
40use scribe_core::Result;
41use std::collections::HashMap;
42
43#[derive(Debug)]
45pub struct HeuristicSystem {
46 scorer: HeuristicScorer,
48 template_detector: TemplateDetector,
50}
51
52impl HeuristicSystem {
53 pub fn new() -> Result<Self> {
55 Ok(Self {
56 scorer: HeuristicScorer::new(HeuristicWeights::default()),
57 template_detector: TemplateDetector::new()?,
58 })
59 }
60
61 pub fn with_weights(weights: HeuristicWeights) -> Result<Self> {
63 Ok(Self {
64 scorer: HeuristicScorer::new(weights),
65 template_detector: TemplateDetector::new()?,
66 })
67 }
68
69 pub fn with_v2_features() -> Result<Self> {
71 Ok(Self {
72 scorer: HeuristicScorer::new(HeuristicWeights::with_v2_features()),
73 template_detector: TemplateDetector::new()?,
74 })
75 }
76
77 pub fn score_file<T>(&mut self, file: &T, all_files: &[T]) -> Result<ScoreComponents>
79 where
80 T: ScanResult,
81 {
82 self.scorer.score_file(file, all_files)
83 }
84
85 pub fn score_all_files<T>(&mut self, files: &[T]) -> Result<Vec<(usize, ScoreComponents)>>
87 where
88 T: ScanResult,
89 {
90 self.scorer.score_all_files(files)
91 }
92
93 pub fn get_top_files<T>(
95 &mut self,
96 files: &[T],
97 top_k: usize,
98 ) -> Result<Vec<(usize, ScoreComponents)>>
99 where
100 T: ScanResult,
101 {
102 Ok(self
103 .score_all_files(files)?
104 .into_iter()
105 .take(top_k)
106 .collect())
107 }
108
109 pub fn get_template_boost(&self, file_path: &str) -> Result<f64> {
111 self.template_detector.get_score_boost(file_path)
112 }
113
114 pub fn import_matches(&self, import_name: &str, file_path: &str) -> bool {
116 import_analysis::import_matches_file(import_name, file_path)
117 }
118}
119
120impl Default for HeuristicSystem {
121 fn default() -> Self {
122 Self::new().expect("Failed to create HeuristicSystem")
123 }
124}
125
126pub trait ScanResult {
128 fn path(&self) -> &str;
130
131 fn relative_path(&self) -> &str;
133
134 fn depth(&self) -> usize;
136
137 fn is_docs(&self) -> bool;
139
140 fn is_readme(&self) -> bool;
142
143 fn is_test(&self) -> bool;
145
146 fn is_entrypoint(&self) -> bool;
148
149 fn has_examples(&self) -> bool;
151
152 fn priority_boost(&self) -> f64;
154
155 fn churn_score(&self) -> f64;
157
158 fn centrality_in(&self) -> f64;
160
161 fn imports(&self) -> Option<&[String]>;
163
164 fn doc_analysis(&self) -> Option<&DocumentAnalysis>;
166}
167
168#[derive(Debug, Clone)]
170pub struct DocumentAnalysis {
171 pub heading_count: usize,
173 pub toc_indicators: usize,
175 pub link_count: usize,
177 pub code_block_count: usize,
179 pub is_well_structured: bool,
181}
182
183impl DocumentAnalysis {
184 pub fn new() -> Self {
185 Self {
186 heading_count: 0,
187 toc_indicators: 0,
188 link_count: 0,
189 code_block_count: 0,
190 is_well_structured: false,
191 }
192 }
193
194 pub fn structure_score(&self) -> f64 {
196 let mut score = 0.0;
197
198 if self.heading_count > 0 {
200 score += (self.heading_count as f64 / 10.0).min(0.5);
201 }
202
203 if self.toc_indicators > 0 {
205 score += 0.3;
206 }
207
208 if self.link_count > 0 {
210 score += (self.link_count as f64 / 20.0).min(0.3);
211 }
212
213 if self.code_block_count > 0 {
215 score += (self.code_block_count as f64 / 10.0).min(0.2);
216 }
217
218 score
219 }
220}
221
222impl Default for DocumentAnalysis {
223 fn default() -> Self {
224 Self::new()
225 }
226}
227
228#[derive(Debug, Clone)]
230pub struct HeuristicMetrics {
231 pub files_processed: usize,
233 pub processing_time_ms: u64,
235 pub import_graph_time_ms: u64,
237 pub template_detection_time_ms: u64,
239 pub avg_time_per_file_ms: f64,
241 pub cache_hit_rates: HashMap<String, f64>,
243}
244
245impl HeuristicMetrics {
246 pub fn new() -> Self {
247 Self {
248 files_processed: 0,
249 processing_time_ms: 0,
250 import_graph_time_ms: 0,
251 template_detection_time_ms: 0,
252 avg_time_per_file_ms: 0.0,
253 cache_hit_rates: HashMap::new(),
254 }
255 }
256
257 pub fn finalize(&mut self) {
258 if self.files_processed > 0 {
259 self.avg_time_per_file_ms =
260 self.processing_time_ms as f64 / self.files_processed as f64;
261 }
262 }
263}
264
265impl Default for HeuristicMetrics {
266 fn default() -> Self {
267 Self::new()
268 }
269}
270
271#[cfg(test)]
272mod tests {
273 use super::*;
274
275 #[test]
276 fn test_heuristic_system_creation() {
277 let system = HeuristicSystem::new();
278 assert!(system.is_ok());
279
280 let v2_system = HeuristicSystem::with_v2_features();
281 assert!(v2_system.is_ok());
282 }
283
284 #[test]
285 fn test_document_analysis() {
286 let mut doc = DocumentAnalysis::new();
287 doc.heading_count = 5;
288 doc.link_count = 10;
289 doc.code_block_count = 3;
290
291 let score = doc.structure_score();
292 assert!(score > 0.0);
293 assert!(score < 2.0); }
295
296 #[test]
297 fn test_metrics() {
298 let mut metrics = HeuristicMetrics::new();
299 metrics.files_processed = 100;
300 metrics.processing_time_ms = 500;
301 metrics.finalize();
302
303 assert_eq!(metrics.avg_time_per_file_ms, 5.0);
304 }
305}