1use std::collections::{HashMap, HashSet};
7use std::path::Path;
8use serde::{Deserialize, Serialize};
9use scribe_core::{Result, ScribeError};
10
11#[derive(Debug, Clone, Serialize, Deserialize)]
13pub struct TwoPassConfig {
14 pub speculation_ratio: f64,
16 pub speculation_threshold: f64,
18 pub max_iterations: usize,
20 pub enable_gap_analysis: bool,
22}
23
24impl Default for TwoPassConfig {
25 fn default() -> Self {
26 Self {
27 speculation_ratio: 0.75, speculation_threshold: 0.5, max_iterations: 3,
30 enable_gap_analysis: true,
31 }
32 }
33}
34
35#[derive(Debug, Clone, Serialize, Deserialize)]
37pub struct TwoPassResult {
38 pub speculative_files: Vec<String>,
40 pub rule_based_files: Vec<String>,
42 pub coverage_gaps: Vec<CoverageGap>,
44 pub selection_score: f64,
46 pub budget_utilization: f64,
48 pub metrics: SelectionMetrics,
50}
51
52#[derive(Debug, Clone, Serialize, Deserialize)]
54pub struct CoverageGap {
55 pub gap_type: String,
57 pub severity: f64,
59 pub candidate_files: Vec<String>,
61 pub reason: String,
63}
64
65#[derive(Debug, Clone, Serialize, Deserialize)]
67pub struct SelectionMetrics {
68 pub speculation_time_ms: u64,
70 pub rule_based_time_ms: u64,
72 pub rules_evaluated: usize,
74 pub gaps_found: usize,
76 pub files_considered: usize,
78}
79
80#[derive(Debug, Clone)]
82pub struct SelectionRule {
83 pub name: String,
85 pub weight: f64,
87 pub evaluator: fn(&SelectionContext, &str) -> f64,
89 pub description: String,
91}
92
93#[derive(Debug)]
95pub struct SelectionContext<'a> {
96 pub selected_files: &'a HashSet<String>,
98 pub available_files: &'a HashMap<String, FileInfo>,
100 pub dependencies: &'a HashMap<String, Vec<String>>,
102 pub interfaces: &'a HashMap<String, Vec<String>>,
104 pub remaining_budget: usize,
106}
107
108#[derive(Debug, Clone)]
110pub struct FileInfo {
111 pub path: String,
113 pub token_count: usize,
115 pub file_type: String,
117 pub importance: f64,
119 pub dependencies: Vec<String>,
121 pub dependents: Vec<String>,
123 pub exposed_interfaces: Vec<String>,
125 pub consumed_interfaces: Vec<String>,
127}
128
129pub struct TwoPassSelector {
131 config: TwoPassConfig,
132 rules: Vec<SelectionRule>,
133}
134
135impl TwoPassSelector {
136 pub fn new() -> Self {
138 Self {
139 config: TwoPassConfig::default(),
140 rules: Self::create_default_rules(),
141 }
142 }
143
144 pub fn with_config(config: TwoPassConfig) -> Self {
146 Self {
147 config,
148 rules: Self::create_default_rules(),
149 }
150 }
151
152 pub fn select_files(
154 &self,
155 available_files: &HashMap<String, FileInfo>,
156 dependencies: &HashMap<String, Vec<String>>,
157 interfaces: &HashMap<String, Vec<String>>,
158 total_budget: usize,
159 ) -> Result<TwoPassResult> {
160 let start_time = std::time::Instant::now();
161
162 let speculation_budget = (total_budget as f64 * self.config.speculation_ratio) as usize;
164 let speculation_start = std::time::Instant::now();
165
166 let speculative_files = self.speculative_pass(
167 available_files,
168 dependencies,
169 speculation_budget,
170 )?;
171
172 let speculation_time = speculation_start.elapsed().as_millis() as u64;
173
174 let rule_budget = total_budget - speculation_budget;
176 let rule_start = std::time::Instant::now();
177
178 let mut selected_files: HashSet<String> = speculative_files.iter().cloned().collect();
179
180 let (rule_based_files, coverage_gaps) = self.rule_based_pass(
181 &selected_files,
182 available_files,
183 dependencies,
184 interfaces,
185 rule_budget,
186 )?;
187
188 let rule_time = rule_start.elapsed().as_millis() as u64;
189
190 selected_files.extend(rule_based_files.iter().cloned());
192
193 let total_tokens: usize = selected_files.iter()
195 .filter_map(|f| available_files.get(f))
196 .map(|info| info.token_count)
197 .sum();
198
199 let budget_utilization = total_tokens as f64 / total_budget as f64;
200 let selection_score = self.calculate_selection_score(&selected_files, available_files)?;
201
202 let gaps_count = coverage_gaps.len();
203
204 Ok(TwoPassResult {
205 speculative_files,
206 rule_based_files,
207 coverage_gaps,
208 selection_score,
209 budget_utilization,
210 metrics: SelectionMetrics {
211 speculation_time_ms: speculation_time,
212 rule_based_time_ms: rule_time,
213 rules_evaluated: self.rules.len(),
214 gaps_found: gaps_count,
215 files_considered: available_files.len(),
216 },
217 })
218 }
219
220 fn speculative_pass(
222 &self,
223 available_files: &HashMap<String, FileInfo>,
224 dependencies: &HashMap<String, Vec<String>>,
225 budget: usize,
226 ) -> Result<Vec<String>> {
227 let mut selected = Vec::new();
228 let mut remaining_budget = budget;
229
230 let mut candidates: Vec<(&String, &FileInfo)> = available_files.iter().collect();
232 candidates.sort_by(|a, b| {
233 let score_a = a.1.importance * self.calculate_confidence(a.1, dependencies);
234 let score_b = b.1.importance * self.calculate_confidence(b.1, dependencies);
235 score_b.partial_cmp(&score_a).unwrap_or(std::cmp::Ordering::Equal)
236 });
237
238 for (file_path, file_info) in candidates {
240 let confidence = self.calculate_confidence(file_info, dependencies);
241
242 if confidence >= self.config.speculation_threshold
243 && file_info.token_count <= remaining_budget {
244 selected.push(file_path.clone());
245 remaining_budget -= file_info.token_count;
246 }
247 }
248
249 Ok(selected)
250 }
251
252 fn rule_based_pass(
254 &self,
255 selected_files: &HashSet<String>,
256 available_files: &HashMap<String, FileInfo>,
257 dependencies: &HashMap<String, Vec<String>>,
258 interfaces: &HashMap<String, Vec<String>>,
259 budget: usize,
260 ) -> Result<(Vec<String>, Vec<CoverageGap>)> {
261 let mut additional_files = Vec::new();
262 let mut coverage_gaps = Vec::new();
263 let mut remaining_budget = budget;
264
265 if self.config.enable_gap_analysis {
267 coverage_gaps = self.analyze_coverage_gaps(
268 selected_files,
269 available_files,
270 dependencies,
271 interfaces,
272 )?;
273 }
274
275 let context = SelectionContext {
277 selected_files,
278 available_files,
279 dependencies,
280 interfaces,
281 remaining_budget,
282 };
283
284 let mut rule_scores: HashMap<String, f64> = HashMap::new();
286
287 for (file_path, file_info) in available_files {
288 if !selected_files.contains(file_path) && file_info.token_count <= remaining_budget {
289 let mut total_score = 0.0;
290
291 for rule in &self.rules {
292 let rule_score = (rule.evaluator)(&context, file_path);
293 total_score += rule_score * rule.weight;
294 }
295
296 rule_scores.insert(file_path.clone(), total_score);
297 }
298 }
299
300 let mut sorted_scores: Vec<(&String, &f64)> = rule_scores.iter().collect();
302 sorted_scores.sort_by(|a, b| b.1.partial_cmp(a.1).unwrap_or(std::cmp::Ordering::Equal));
303
304 for (file_path, _score) in sorted_scores {
305 if let Some(file_info) = available_files.get(file_path) {
306 if file_info.token_count <= remaining_budget {
307 additional_files.push(file_path.clone());
308 remaining_budget -= file_info.token_count;
309 }
310 }
311 }
312
313 Ok((additional_files, coverage_gaps))
314 }
315
316 fn calculate_confidence(&self, file_info: &FileInfo, dependencies: &HashMap<String, Vec<String>>) -> f64 {
318 let mut confidence = 0.5; confidence += (file_info.dependents.len() as f64 * 0.1).min(0.3);
322
323 if !file_info.exposed_interfaces.is_empty() {
325 confidence += 0.2;
326 }
327
328 match file_info.file_type.as_str() {
330 "source" => confidence += 0.1,
331 "interface" => confidence += 0.2,
332 "config" => confidence += 0.05,
333 _ => {}
334 }
335
336 confidence.min(1.0)
337 }
338
339 fn analyze_coverage_gaps(
341 &self,
342 selected_files: &HashSet<String>,
343 available_files: &HashMap<String, FileInfo>,
344 dependencies: &HashMap<String, Vec<String>>,
345 interfaces: &HashMap<String, Vec<String>>,
346 ) -> Result<Vec<CoverageGap>> {
347 let mut gaps = Vec::new();
348
349 for selected_file in selected_files {
351 if let Some(file_info) = available_files.get(selected_file) {
352 for dep in &file_info.dependencies {
353 if !selected_files.contains(dep) && available_files.contains_key(dep) {
354 gaps.push(CoverageGap {
355 gap_type: "missing_dependency".to_string(),
356 severity: 0.8,
357 candidate_files: vec![dep.clone()],
358 reason: format!("{} depends on {}", selected_file, dep),
359 });
360 }
361 }
362 }
363 }
364
365 for (interface, implementers) in interfaces {
367 let has_implementation = implementers.iter().any(|imp| selected_files.contains(imp));
368 if !has_implementation && !implementers.is_empty() {
369 gaps.push(CoverageGap {
370 gap_type: "missing_interface_implementation".to_string(),
371 severity: 0.6,
372 candidate_files: implementers.clone(),
373 reason: format!("Interface {} has no selected implementations", interface),
374 });
375 }
376 }
377
378 let test_files: Vec<_> = selected_files.iter()
380 .filter(|f| available_files.get(*f).map_or(false, |info| info.file_type == "test"))
381 .collect();
382
383 for test_file in test_files {
384 if let Some(test_info) = available_files.get(test_file) {
385 let has_source = test_info.dependencies.iter()
386 .any(|dep| selected_files.contains(dep) &&
387 available_files.get(dep).map_or(false, |info| info.file_type == "source"));
388
389 if !has_source {
390 gaps.push(CoverageGap {
391 gap_type: "orphaned_test".to_string(),
392 severity: 0.4,
393 candidate_files: test_info.dependencies.clone(),
394 reason: format!("Test file {} has no corresponding source files selected", test_file),
395 });
396 }
397 }
398 }
399
400 Ok(gaps)
401 }
402
403 fn calculate_selection_score(
405 &self,
406 selected_files: &HashSet<String>,
407 available_files: &HashMap<String, FileInfo>,
408 ) -> Result<f64> {
409 if selected_files.is_empty() {
410 return Ok(0.0);
411 }
412
413 let mut total_importance = 0.0;
414 let mut total_files = 0.0;
415
416 for file_path in selected_files {
417 if let Some(file_info) = available_files.get(file_path) {
418 total_importance += file_info.importance;
419 total_files += 1.0;
420 }
421 }
422
423 Ok(total_importance / total_files)
424 }
425
426 fn create_default_rules() -> Vec<SelectionRule> {
428 vec![
429 SelectionRule {
430 name: "dependency_completeness".to_string(),
431 weight: 0.25,
432 evaluator: |context, file_path| {
433 if let Some(file_info) = context.available_files.get(file_path) {
434 let satisfies_dependencies = context.selected_files.iter()
436 .filter_map(|selected| context.available_files.get(selected))
437 .filter(|selected_info| selected_info.dependencies.contains(&file_path.to_string()))
438 .count();
439
440 let missing_deps = file_info.dependencies.iter()
442 .filter(|dep| !context.selected_files.contains(*dep))
443 .count();
444
445 let dependency_satisfaction_score = if satisfies_dependencies > 0 {
446 0.8 + (satisfies_dependencies as f64 * 0.1).min(0.2)
447 } else {
448 0.3
449 };
450
451 let completeness_score = if file_info.dependencies.is_empty() {
452 1.0 } else {
454 1.0 - (missing_deps as f64 / file_info.dependencies.len() as f64)
455 };
456
457 (dependency_satisfaction_score + completeness_score) / 2.0
458 } else {
459 0.0
460 }
461 },
462 description: "Prefer files that complete dependency chains".to_string(),
463 },
464 SelectionRule {
465 name: "interface_coverage".to_string(),
466 weight: 0.2,
467 evaluator: |context, file_path| {
468 if let Some(file_info) = context.available_files.get(file_path) {
469 let interface_score = file_info.exposed_interfaces.len() as f64 * 0.3;
470 let implementation_score = file_info.consumed_interfaces.len() as f64 * 0.1;
471 (interface_score + implementation_score).min(1.0)
472 } else {
473 0.0
474 }
475 },
476 description: "Prefer files that expose or implement important interfaces".to_string(),
477 },
478 SelectionRule {
479 name: "test_source_pairing".to_string(),
480 weight: 0.15,
481 evaluator: |context, file_path| {
482 if let Some(file_info) = context.available_files.get(file_path) {
483 if file_info.file_type == "test" {
484 let has_source = file_info.dependencies.iter()
486 .any(|dep| context.selected_files.contains(dep) &&
487 context.available_files.get(dep).map_or(false, |info| info.file_type == "source"));
488 if has_source { 1.0 } else { 0.2 }
489 } else if file_info.file_type == "source" {
490 let has_tests = file_info.dependents.iter()
492 .any(|dep| context.available_files.get(dep).map_or(false, |info| info.file_type == "test"));
493 if has_tests { 0.8 } else { 0.5 }
494 } else {
495 0.5
496 }
497 } else {
498 0.0
499 }
500 },
501 description: "Prefer test-source file pairings".to_string(),
502 },
503 SelectionRule {
504 name: "centrality_score".to_string(),
505 weight: 0.15,
506 evaluator: |context, file_path| {
507 if let Some(file_info) = context.available_files.get(file_path) {
508 let in_degree = file_info.dependents.len() as f64;
509 let out_degree = file_info.dependencies.len() as f64;
510 let centrality = (in_degree * 0.7 + out_degree * 0.3) / 10.0; centrality.min(1.0)
512 } else {
513 0.0
514 }
515 },
516 description: "Prefer files with high connectivity in dependency graph".to_string(),
517 },
518 SelectionRule {
519 name: "importance_alignment".to_string(),
520 weight: 0.1,
521 evaluator: |_context, file_path| {
522 if let Some(file_info) = _context.available_files.get(file_path) {
523 file_info.importance
524 } else {
525 0.0
526 }
527 },
528 description: "Prefer files with high intrinsic importance scores".to_string(),
529 },
530 SelectionRule {
531 name: "token_efficiency".to_string(),
532 weight: 0.08,
533 evaluator: |context, file_path| {
534 if let Some(file_info) = context.available_files.get(file_path) {
535 let efficiency = file_info.importance / (file_info.token_count as f64 / 1000.0).max(0.1);
536 efficiency.min(1.0)
537 } else {
538 0.0
539 }
540 },
541 description: "Prefer files with high importance-to-token ratio".to_string(),
542 },
543 SelectionRule {
544 name: "gap_filling".to_string(),
545 weight: 0.05,
546 evaluator: |context, file_path| {
547 if let Some(file_info) = context.available_files.get(file_path) {
548 let fills_dependency_gap = file_info.dependents.iter()
550 .any(|dep| context.selected_files.contains(dep));
551
552 let fills_interface_gap = !file_info.exposed_interfaces.is_empty() &&
553 file_info.exposed_interfaces.iter().any(|iface| {
554 context.interfaces.get(iface)
555 .map_or(false, |impls| impls.iter().any(|imp| context.selected_files.contains(imp)))
556 });
557
558 if fills_dependency_gap || fills_interface_gap {
559 0.8
560 } else {
561 0.3
562 }
563 } else {
564 0.0
565 }
566 },
567 description: "Prefer files that fill critical coverage gaps".to_string(),
568 },
569 SelectionRule {
570 name: "configuration_completeness".to_string(),
571 weight: 0.02,
572 evaluator: |context, file_path| {
573 if let Some(file_info) = context.available_files.get(file_path) {
574 if file_info.file_type == "config" {
575 let related_sources = context.selected_files.iter()
576 .filter(|f| context.available_files.get(*f).map_or(false, |info| info.file_type == "source"))
577 .count();
578
579 if related_sources > 0 {
580 0.7 } else {
582 0.2
583 }
584 } else {
585 0.5 }
587 } else {
588 0.0
589 }
590 },
591 description: "Include configuration files when relevant source code is selected".to_string(),
592 },
593 ]
594 }
595}
596
597impl Default for TwoPassSelector {
598 fn default() -> Self {
599 Self::new()
600 }
601}
602
603#[cfg(test)]
604mod tests {
605 use super::*;
606
607 fn create_test_files() -> HashMap<String, FileInfo> {
608 let mut files = HashMap::new();
609
610 files.insert("src/main.rs".to_string(), FileInfo {
611 path: "src/main.rs".to_string(),
612 token_count: 500,
613 file_type: "source".to_string(),
614 importance: 0.9,
615 dependencies: vec!["src/lib.rs".to_string()],
616 dependents: vec![],
617 exposed_interfaces: vec!["Main".to_string()],
618 consumed_interfaces: vec!["Library".to_string()],
619 });
620
621 files.insert("src/lib.rs".to_string(), FileInfo {
622 path: "src/lib.rs".to_string(),
623 token_count: 800,
624 file_type: "source".to_string(),
625 importance: 0.8,
626 dependencies: vec![],
627 dependents: vec!["src/main.rs".to_string()],
628 exposed_interfaces: vec!["Library".to_string()],
629 consumed_interfaces: vec![],
630 });
631
632 files.insert("tests/integration_test.rs".to_string(), FileInfo {
633 path: "tests/integration_test.rs".to_string(),
634 token_count: 300,
635 file_type: "test".to_string(),
636 importance: 0.6,
637 dependencies: vec!["src/lib.rs".to_string()],
638 dependents: vec![],
639 exposed_interfaces: vec![],
640 consumed_interfaces: vec!["Library".to_string()],
641 });
642
643 files.insert("config/settings.toml".to_string(), FileInfo {
644 path: "config/settings.toml".to_string(),
645 token_count: 100,
646 file_type: "config".to_string(),
647 importance: 0.3,
648 dependencies: vec![],
649 dependents: vec![],
650 exposed_interfaces: vec![],
651 consumed_interfaces: vec![],
652 });
653
654 files
655 }
656
657 fn create_test_dependencies() -> HashMap<String, Vec<String>> {
658 let mut deps = HashMap::new();
659 deps.insert("src/main.rs".to_string(), vec!["src/lib.rs".to_string()]);
660 deps.insert("tests/integration_test.rs".to_string(), vec!["src/lib.rs".to_string()]);
661 deps
662 }
663
664 fn create_test_interfaces() -> HashMap<String, Vec<String>> {
665 let mut interfaces = HashMap::new();
666 interfaces.insert("Library".to_string(), vec!["src/lib.rs".to_string()]);
667 interfaces.insert("Main".to_string(), vec!["src/main.rs".to_string()]);
668 interfaces
669 }
670
671 #[test]
672 fn test_two_pass_selector_creation() {
673 let selector = TwoPassSelector::new();
674 assert_eq!(selector.config.speculation_ratio, 0.75);
675 assert_eq!(selector.rules.len(), 8);
676 }
677
678 #[test]
679 fn test_speculative_pass() {
680 let selector = TwoPassSelector::new();
681 let files = create_test_files();
682 let dependencies = create_test_dependencies();
683
684 let result = selector.speculative_pass(&files, &dependencies, 1000).unwrap();
685
686 assert!(!result.is_empty());
687
688 for file_path in &result {
690 if let Some(file_info) = files.get(file_path) {
691 let confidence = selector.calculate_confidence(file_info, &dependencies);
692 println!("Selected: {} (importance: {}, confidence: {})", file_path, file_info.importance, confidence);
693 }
694 }
695
696 let has_high_importance_file = result.iter().any(|f| {
698 files.get(f).map_or(false, |info| info.importance >= 0.8)
699 });
700 assert!(has_high_importance_file, "Should select at least one high-importance file");
701 }
702
703 #[test]
704 fn test_full_two_pass_selection() {
705 let selector = TwoPassSelector::new();
706 let files = create_test_files();
707 let dependencies = create_test_dependencies();
708 let interfaces = create_test_interfaces();
709
710 let result = selector.select_files(&files, &dependencies, &interfaces, 1500).unwrap();
711
712 assert!(!result.speculative_files.is_empty());
713 assert!(result.budget_utilization <= 1.0);
714 assert!(result.selection_score > 0.0);
715 assert!(result.metrics.files_considered > 0);
716 }
717
718 #[test]
719 fn test_coverage_gap_analysis() {
720 let selector = TwoPassSelector::new();
721 let files = create_test_files();
722 let dependencies = create_test_dependencies();
723 let interfaces = create_test_interfaces();
724
725 let mut selected = HashSet::new();
726 selected.insert("src/main.rs".to_string());
727 let gaps = selector.analyze_coverage_gaps(&selected, &files, &dependencies, &interfaces).unwrap();
730
731 assert!(!gaps.is_empty());
732 assert!(gaps.iter().any(|gap| gap.gap_type == "missing_dependency"));
734 }
735
736 #[test]
737 fn test_rule_evaluation() {
738 let selector = TwoPassSelector::new();
739 let files = create_test_files();
740 let dependencies = create_test_dependencies();
741 let interfaces = create_test_interfaces();
742
743 let mut selected = HashSet::new();
744 selected.insert("src/main.rs".to_string());
745
746 let context = SelectionContext {
747 selected_files: &selected,
748 available_files: &files,
749 dependencies: &dependencies,
750 interfaces: &interfaces,
751 remaining_budget: 1000,
752 };
753
754 let dep_rule = &selector.rules[0];
756 let score = (dep_rule.evaluator)(&context, "src/lib.rs");
757 println!("Dependency rule score for src/lib.rs: {}", score);
758
759 assert!(score >= 0.5, "src/lib.rs should score well as it fills a dependency gap (score: {})", score);
761
762 let interface_rule = &selector.rules[1];
764 let interface_score = (interface_rule.evaluator)(&context, "src/lib.rs");
765 println!("Interface rule score for src/lib.rs: {}", interface_score);
766 assert!(interface_score > 0.0, "src/lib.rs should have some interface score");
767 }
768}