scribe_analysis/heuristics/
mod.rs1pub mod template_detection;
24pub mod scoring;
25pub mod import_analysis;
26pub mod enhanced_scoring;
27
28pub use scoring::{
29 HeuristicScorer,
30 ScoreComponents,
31 RawScoreComponents,
32 HeuristicWeights,
33 ScoringFeatures,
34 WeightPreset,
35};
36
37pub use template_detection::{
38 TemplateDetector,
39 TemplateEngine,
40 TemplateDetectionMethod,
41 is_template_file,
42 get_template_score_boost,
43};
44
45pub use import_analysis::{
46 ImportGraphBuilder,
47 ImportGraph,
48 CentralityCalculator,
49 import_matches_file,
50};
51
52pub use enhanced_scoring::{
53 EnhancedHeuristicScorer,
54 EnhancedScoreComponents,
55 EnhancedWeights,
56 AdaptiveFactors,
57 RepositoryCharacteristics,
58 ProjectType,
59};
60
61use scribe_core::Result;
62use std::collections::HashMap;
63
64#[derive(Debug)]
66pub struct HeuristicSystem {
67 scorer: HeuristicScorer,
69 template_detector: TemplateDetector,
71 import_builder: ImportGraphBuilder,
73}
74
75impl HeuristicSystem {
76 pub fn new() -> Result<Self> {
78 Ok(Self {
79 scorer: HeuristicScorer::new(HeuristicWeights::default()),
80 template_detector: TemplateDetector::new()?,
81 import_builder: ImportGraphBuilder::new()?,
82 })
83 }
84
85 pub fn with_weights(weights: HeuristicWeights) -> Result<Self> {
87 Ok(Self {
88 scorer: HeuristicScorer::new(weights),
89 template_detector: TemplateDetector::new()?,
90 import_builder: ImportGraphBuilder::new()?,
91 })
92 }
93
94 pub fn with_v2_features() -> Result<Self> {
96 Ok(Self {
97 scorer: HeuristicScorer::new(HeuristicWeights::with_v2_features()),
98 template_detector: TemplateDetector::new()?,
99 import_builder: ImportGraphBuilder::new()?,
100 })
101 }
102
103 pub fn score_file<T>(&mut self, file: &T, all_files: &[T]) -> Result<ScoreComponents>
105 where
106 T: ScanResult,
107 {
108 self.scorer.score_file(file, all_files)
109 }
110
111 pub fn score_all_files<T>(&mut self, files: &[T]) -> Result<Vec<(usize, ScoreComponents)>>
113 where
114 T: ScanResult,
115 {
116 self.scorer.score_all_files(files)
117 }
118
119 pub fn get_top_files<T>(&mut self, files: &[T], top_k: usize) -> Result<Vec<(usize, ScoreComponents)>>
121 where
122 T: ScanResult,
123 {
124 Ok(self.score_all_files(files)?.into_iter().take(top_k).collect())
125 }
126
127 pub fn get_template_boost(&self, file_path: &str) -> Result<f64> {
129 self.template_detector.get_score_boost(file_path)
130 }
131
132 pub fn import_matches(&self, import_name: &str, file_path: &str) -> bool {
134 import_analysis::import_matches_file(import_name, file_path)
135 }
136}
137
138impl Default for HeuristicSystem {
139 fn default() -> Self {
140 Self::new().expect("Failed to create HeuristicSystem")
141 }
142}
143
144pub trait ScanResult {
146 fn path(&self) -> &str;
148
149 fn relative_path(&self) -> &str;
151
152 fn depth(&self) -> usize;
154
155 fn is_docs(&self) -> bool;
157
158 fn is_readme(&self) -> bool;
160
161 fn is_test(&self) -> bool;
163
164 fn is_entrypoint(&self) -> bool;
166
167 fn has_examples(&self) -> bool;
169
170 fn priority_boost(&self) -> f64;
172
173 fn churn_score(&self) -> f64;
175
176 fn centrality_in(&self) -> f64;
178
179 fn imports(&self) -> Option<&[String]>;
181
182 fn doc_analysis(&self) -> Option<&DocumentAnalysis>;
184}
185
186#[derive(Debug, Clone)]
188pub struct DocumentAnalysis {
189 pub heading_count: usize,
191 pub toc_indicators: usize,
193 pub link_count: usize,
195 pub code_block_count: usize,
197 pub is_well_structured: bool,
199}
200
201impl DocumentAnalysis {
202 pub fn new() -> Self {
203 Self {
204 heading_count: 0,
205 toc_indicators: 0,
206 link_count: 0,
207 code_block_count: 0,
208 is_well_structured: false,
209 }
210 }
211
212 pub fn structure_score(&self) -> f64 {
214 let mut score = 0.0;
215
216 if self.heading_count > 0 {
218 score += (self.heading_count as f64 / 10.0).min(0.5);
219 }
220
221 if self.toc_indicators > 0 {
223 score += 0.3;
224 }
225
226 if self.link_count > 0 {
228 score += (self.link_count as f64 / 20.0).min(0.3);
229 }
230
231 if self.code_block_count > 0 {
233 score += (self.code_block_count as f64 / 10.0).min(0.2);
234 }
235
236 score
237 }
238}
239
240impl Default for DocumentAnalysis {
241 fn default() -> Self {
242 Self::new()
243 }
244}
245
246#[derive(Debug, Clone)]
248pub struct HeuristicMetrics {
249 pub files_processed: usize,
251 pub processing_time_ms: u64,
253 pub import_graph_time_ms: u64,
255 pub template_detection_time_ms: u64,
257 pub avg_time_per_file_ms: f64,
259 pub cache_hit_rates: HashMap<String, f64>,
261}
262
263impl HeuristicMetrics {
264 pub fn new() -> Self {
265 Self {
266 files_processed: 0,
267 processing_time_ms: 0,
268 import_graph_time_ms: 0,
269 template_detection_time_ms: 0,
270 avg_time_per_file_ms: 0.0,
271 cache_hit_rates: HashMap::new(),
272 }
273 }
274
275 pub fn finalize(&mut self) {
276 if self.files_processed > 0 {
277 self.avg_time_per_file_ms = self.processing_time_ms as f64 / self.files_processed as f64;
278 }
279 }
280}
281
282impl Default for HeuristicMetrics {
283 fn default() -> Self {
284 Self::new()
285 }
286}
287
288#[cfg(test)]
289mod tests {
290 use super::*;
291
292 #[test]
293 fn test_heuristic_system_creation() {
294 let system = HeuristicSystem::new();
295 assert!(system.is_ok());
296
297 let v2_system = HeuristicSystem::with_v2_features();
298 assert!(v2_system.is_ok());
299 }
300
301 #[test]
302 fn test_document_analysis() {
303 let mut doc = DocumentAnalysis::new();
304 doc.heading_count = 5;
305 doc.link_count = 10;
306 doc.code_block_count = 3;
307
308 let score = doc.structure_score();
309 assert!(score > 0.0);
310 assert!(score < 2.0); }
312
313 #[test]
314 fn test_metrics() {
315 let mut metrics = HeuristicMetrics::new();
316 metrics.files_processed = 100;
317 metrics.processing_time_ms = 500;
318 metrics.finalize();
319
320 assert_eq!(metrics.avg_time_per_file_ms, 5.0);
321 }
322}