1use rayon::prelude::*;
7use scribe_core::{Result, ScribeError};
8use serde::{Deserialize, Serialize};
9use std::collections::{HashMap, HashSet};
10use std::path::Path;
11
12#[derive(Debug, Clone, Serialize, Deserialize)]
14pub struct TwoPassConfig {
15 pub speculation_ratio: f64,
17 pub speculation_threshold: f64,
19 pub max_iterations: usize,
21 pub enable_gap_analysis: bool,
23}
24
25impl Default for TwoPassConfig {
26 fn default() -> Self {
27 Self {
28 speculation_ratio: 0.75, speculation_threshold: 0.5, max_iterations: 3,
31 enable_gap_analysis: true,
32 }
33 }
34}
35
36#[derive(Debug, Clone, Serialize, Deserialize)]
38pub struct TwoPassResult {
39 pub speculative_files: Vec<String>,
41 pub rule_based_files: Vec<String>,
43 pub coverage_gaps: Vec<CoverageGap>,
45 pub selection_score: f64,
47 pub budget_utilization: f64,
49 pub metrics: SelectionMetrics,
51}
52
53#[derive(Debug, Clone, Serialize, Deserialize)]
55pub struct CoverageGap {
56 pub gap_type: String,
58 pub severity: f64,
60 pub candidate_files: Vec<String>,
62 pub reason: String,
64}
65
66#[derive(Debug, Clone, Serialize, Deserialize)]
68pub struct SelectionMetrics {
69 pub speculation_time_ms: u64,
71 pub rule_based_time_ms: u64,
73 pub rules_evaluated: usize,
75 pub gaps_found: usize,
77 pub files_considered: usize,
79}
80
81#[derive(Debug, Clone)]
83pub struct SelectionRule {
84 pub name: String,
86 pub weight: f64,
88 pub evaluator: fn(&SelectionContext, &str) -> f64,
90 pub description: String,
92}
93
94#[derive(Debug)]
96pub struct SelectionContext<'a> {
97 pub selected_files: &'a HashSet<String>,
99 pub available_files: &'a HashMap<String, FileInfo>,
101 pub dependencies: &'a HashMap<String, Vec<String>>,
103 pub interfaces: &'a HashMap<String, Vec<String>>,
105 pub remaining_budget: usize,
107 pub dependents_map: &'a HashMap<String, Vec<String>>,
109 pub selected_source_count: usize,
111}
112
113#[derive(Debug, Clone)]
115pub struct FileInfo {
116 pub path: String,
118 pub token_count: usize,
120 pub file_type: String,
122 pub importance: f64,
124 pub dependencies: Vec<String>,
126 pub dependents: Vec<String>,
128 pub exposed_interfaces: Vec<String>,
130 pub consumed_interfaces: Vec<String>,
132}
133
134pub struct TwoPassSelector {
136 config: TwoPassConfig,
137 rules: Vec<SelectionRule>,
138}
139
140impl TwoPassSelector {
141 pub fn new() -> Self {
143 Self {
144 config: TwoPassConfig::default(),
145 rules: Self::create_default_rules(),
146 }
147 }
148
149 pub fn with_config(config: TwoPassConfig) -> Self {
151 Self {
152 config,
153 rules: Self::create_default_rules(),
154 }
155 }
156
157 pub fn select_files(
159 &self,
160 available_files: &HashMap<String, FileInfo>,
161 dependencies: &HashMap<String, Vec<String>>,
162 interfaces: &HashMap<String, Vec<String>>,
163 total_budget: usize,
164 ) -> Result<TwoPassResult> {
165 let start_time = std::time::Instant::now();
166
167 let speculation_budget = (total_budget as f64 * self.config.speculation_ratio) as usize;
169 let speculation_start = std::time::Instant::now();
170
171 let speculative_files =
172 self.speculative_pass(available_files, dependencies, speculation_budget)?;
173
174 let speculation_time = speculation_start.elapsed().as_millis() as u64;
175
176 let rule_budget = total_budget - speculation_budget;
178 let rule_start = std::time::Instant::now();
179
180 let mut selected_files: HashSet<String> = speculative_files.iter().cloned().collect();
181
182 let (rule_based_files, coverage_gaps) = self.rule_based_pass(
183 &selected_files,
184 available_files,
185 dependencies,
186 interfaces,
187 rule_budget,
188 )?;
189
190 let rule_time = rule_start.elapsed().as_millis() as u64;
191
192 selected_files.extend(rule_based_files.iter().cloned());
194
195 let total_tokens: usize = selected_files
197 .iter()
198 .filter_map(|f| available_files.get(f))
199 .map(|info| info.token_count)
200 .sum();
201
202 let budget_utilization = total_tokens as f64 / total_budget as f64;
203 let selection_score = self.calculate_selection_score(&selected_files, available_files)?;
204
205 let gaps_count = coverage_gaps.len();
206
207 Ok(TwoPassResult {
208 speculative_files,
209 rule_based_files,
210 coverage_gaps,
211 selection_score,
212 budget_utilization,
213 metrics: SelectionMetrics {
214 speculation_time_ms: speculation_time,
215 rule_based_time_ms: rule_time,
216 rules_evaluated: self.rules.len(),
217 gaps_found: gaps_count,
218 files_considered: available_files.len(),
219 },
220 })
221 }
222
223 fn speculative_pass(
225 &self,
226 available_files: &HashMap<String, FileInfo>,
227 dependencies: &HashMap<String, Vec<String>>,
228 budget: usize,
229 ) -> Result<Vec<String>> {
230 let mut selected = Vec::new();
231 let mut remaining_budget = budget;
232
233 let mut candidates: Vec<(&String, &FileInfo, f64)> = available_files
235 .par_iter()
236 .map(|(file_path, file_info)| {
237 let confidence = self.calculate_confidence(file_info, dependencies);
238 (file_path, file_info, confidence)
239 })
240 .collect();
241
242 candidates.sort_by(|a, b| {
243 let score_a = a.1.importance * a.2; let score_b = b.1.importance * b.2; score_b
246 .partial_cmp(&score_a)
247 .unwrap_or(std::cmp::Ordering::Equal)
248 });
249
250 for (file_path, file_info, confidence) in candidates {
252 if confidence >= self.config.speculation_threshold
253 && file_info.token_count <= remaining_budget
254 {
255 selected.push(file_path.clone());
256 remaining_budget -= file_info.token_count;
257 }
258 }
259
260 Ok(selected)
261 }
262
263 fn rule_based_pass(
265 &self,
266 selected_files: &HashSet<String>,
267 available_files: &HashMap<String, FileInfo>,
268 dependencies: &HashMap<String, Vec<String>>,
269 interfaces: &HashMap<String, Vec<String>>,
270 budget: usize,
271 ) -> Result<(Vec<String>, Vec<CoverageGap>)> {
272 let mut additional_files = Vec::new();
273 let mut coverage_gaps = Vec::new();
274 let mut remaining_budget = budget;
275
276 if self.config.enable_gap_analysis {
278 coverage_gaps = self.analyze_coverage_gaps(
279 selected_files,
280 available_files,
281 dependencies,
282 interfaces,
283 )?;
284 }
285
286 let mut dependents_map: HashMap<String, Vec<String>> = HashMap::new();
288 for (file_path, file_info) in available_files {
289 for dep in &file_info.dependencies {
290 dependents_map
291 .entry(dep.clone())
292 .or_default()
293 .push(file_path.clone());
294 }
295 }
296
297 let selected_source_count = selected_files
299 .iter()
300 .filter(|f| {
301 available_files
302 .get(*f)
303 .map_or(false, |info| info.file_type == "source")
304 })
305 .count();
306
307 let context = SelectionContext {
309 selected_files,
310 available_files,
311 dependencies,
312 interfaces,
313 remaining_budget,
314 dependents_map: &dependents_map,
315 selected_source_count,
316 };
317
318 let rule_scores: HashMap<String, f64> = available_files
320 .par_iter()
321 .filter(|(file_path, file_info)| {
322 !selected_files.contains(*file_path) && file_info.token_count <= remaining_budget
323 })
324 .map(|(file_path, _file_info)| {
325 let total_score = self
326 .rules
327 .iter()
328 .map(|rule| (rule.evaluator)(&context, file_path) * rule.weight)
329 .sum();
330 (file_path.clone(), total_score)
331 })
332 .collect();
333
334 let mut sorted_scores: Vec<(&String, &f64)> = rule_scores.iter().collect();
336 sorted_scores.sort_by(|a, b| b.1.partial_cmp(a.1).unwrap_or(std::cmp::Ordering::Equal));
337
338 for (file_path, _score) in sorted_scores {
339 if let Some(file_info) = available_files.get(file_path) {
340 if file_info.token_count <= remaining_budget {
341 additional_files.push(file_path.clone());
342 remaining_budget -= file_info.token_count;
343 }
344 }
345 }
346
347 Ok((additional_files, coverage_gaps))
348 }
349
350 fn calculate_confidence(
352 &self,
353 file_info: &FileInfo,
354 dependencies: &HashMap<String, Vec<String>>,
355 ) -> f64 {
356 let mut confidence = 0.5; confidence += (file_info.dependents.len() as f64 * 0.1).min(0.3);
360
361 if !file_info.exposed_interfaces.is_empty() {
363 confidence += 0.2;
364 }
365
366 match file_info.file_type.as_str() {
368 "source" => confidence += 0.1,
369 "interface" => confidence += 0.2,
370 "config" => confidence += 0.05,
371 _ => {}
372 }
373
374 confidence.min(1.0)
375 }
376
377 fn analyze_coverage_gaps(
379 &self,
380 selected_files: &HashSet<String>,
381 available_files: &HashMap<String, FileInfo>,
382 dependencies: &HashMap<String, Vec<String>>,
383 interfaces: &HashMap<String, Vec<String>>,
384 ) -> Result<Vec<CoverageGap>> {
385 let mut gaps = Vec::new();
386
387 for selected_file in selected_files {
389 if let Some(file_info) = available_files.get(selected_file) {
390 for dep in &file_info.dependencies {
391 if !selected_files.contains(dep) && available_files.contains_key(dep) {
392 gaps.push(CoverageGap {
393 gap_type: "missing_dependency".to_string(),
394 severity: 0.8,
395 candidate_files: vec![dep.clone()],
396 reason: format!("{} depends on {}", selected_file, dep),
397 });
398 }
399 }
400 }
401 }
402
403 for (interface, implementers) in interfaces {
405 let has_implementation = implementers.iter().any(|imp| selected_files.contains(imp));
406 if !has_implementation && !implementers.is_empty() {
407 gaps.push(CoverageGap {
408 gap_type: "missing_interface_implementation".to_string(),
409 severity: 0.6,
410 candidate_files: implementers.clone(),
411 reason: format!("Interface {} has no selected implementations", interface),
412 });
413 }
414 }
415
416 let test_files: Vec<_> = selected_files
418 .iter()
419 .filter(|f| {
420 available_files
421 .get(*f)
422 .map_or(false, |info| info.file_type == "test")
423 })
424 .collect();
425
426 for test_file in test_files {
427 if let Some(test_info) = available_files.get(test_file) {
428 let has_source = test_info.dependencies.iter().any(|dep| {
429 selected_files.contains(dep)
430 && available_files
431 .get(dep)
432 .map_or(false, |info| info.file_type == "source")
433 });
434
435 if !has_source {
436 gaps.push(CoverageGap {
437 gap_type: "orphaned_test".to_string(),
438 severity: 0.4,
439 candidate_files: test_info.dependencies.clone(),
440 reason: format!(
441 "Test file {} has no corresponding source files selected",
442 test_file
443 ),
444 });
445 }
446 }
447 }
448
449 Ok(gaps)
450 }
451
452 fn calculate_selection_score(
454 &self,
455 selected_files: &HashSet<String>,
456 available_files: &HashMap<String, FileInfo>,
457 ) -> Result<f64> {
458 if selected_files.is_empty() {
459 return Ok(0.0);
460 }
461
462 let mut total_importance = 0.0;
463 let mut total_files = 0.0;
464
465 for file_path in selected_files {
466 if let Some(file_info) = available_files.get(file_path) {
467 total_importance += file_info.importance;
468 total_files += 1.0;
469 }
470 }
471
472 Ok(total_importance / total_files)
473 }
474
475 fn create_default_rules() -> Vec<SelectionRule> {
477 vec![
478 SelectionRule {
479 name: "dependency_completeness".to_string(),
480 weight: 0.25,
481 evaluator: |context, file_path| {
482 if let Some(file_info) = context.available_files.get(file_path) {
483 let satisfies_dependencies = context
485 .dependents_map
486 .get(file_path)
487 .map(|dependents| {
488 dependents
489 .iter()
490 .filter(|dependent| context.selected_files.contains(*dependent))
491 .count()
492 })
493 .unwrap_or(0);
494
495 let missing_deps = file_info
497 .dependencies
498 .iter()
499 .filter(|dep| !context.selected_files.contains(*dep))
500 .count();
501
502 let dependency_satisfaction_score = if satisfies_dependencies > 0 {
503 0.8 + (satisfies_dependencies as f64 * 0.1).min(0.2)
504 } else {
505 0.3
506 };
507
508 let completeness_score = if file_info.dependencies.is_empty() {
509 1.0 } else {
511 1.0 - (missing_deps as f64 / file_info.dependencies.len() as f64)
512 };
513
514 (dependency_satisfaction_score + completeness_score) / 2.0
515 } else {
516 0.0
517 }
518 },
519 description: "Prefer files that complete dependency chains".to_string(),
520 },
521 SelectionRule {
522 name: "interface_coverage".to_string(),
523 weight: 0.2,
524 evaluator: |context, file_path| {
525 if let Some(file_info) = context.available_files.get(file_path) {
526 let interface_score = file_info.exposed_interfaces.len() as f64 * 0.3;
527 let implementation_score = file_info.consumed_interfaces.len() as f64 * 0.1;
528 (interface_score + implementation_score).min(1.0)
529 } else {
530 0.0
531 }
532 },
533 description: "Prefer files that expose or implement important interfaces"
534 .to_string(),
535 },
536 SelectionRule {
537 name: "test_source_pairing".to_string(),
538 weight: 0.15,
539 evaluator: |context, file_path| {
540 if let Some(file_info) = context.available_files.get(file_path) {
541 if file_info.file_type == "test" {
542 let has_source = file_info.dependencies.iter().any(|dep| {
544 context.selected_files.contains(dep)
545 && context
546 .available_files
547 .get(dep)
548 .map_or(false, |info| info.file_type == "source")
549 });
550 if has_source {
551 1.0
552 } else {
553 0.2
554 }
555 } else if file_info.file_type == "source" {
556 let has_tests = file_info.dependents.iter().any(|dep| {
558 context
559 .available_files
560 .get(dep)
561 .map_or(false, |info| info.file_type == "test")
562 });
563 if has_tests {
564 0.8
565 } else {
566 0.5
567 }
568 } else {
569 0.5
570 }
571 } else {
572 0.0
573 }
574 },
575 description: "Prefer test-source file pairings".to_string(),
576 },
577 SelectionRule {
578 name: "centrality_score".to_string(),
579 weight: 0.15,
580 evaluator: |context, file_path| {
581 if let Some(file_info) = context.available_files.get(file_path) {
582 let in_degree = file_info.dependents.len() as f64;
583 let out_degree = file_info.dependencies.len() as f64;
584 let centrality = (in_degree * 0.7 + out_degree * 0.3) / 10.0; centrality.min(1.0)
586 } else {
587 0.0
588 }
589 },
590 description: "Prefer files with high connectivity in dependency graph".to_string(),
591 },
592 SelectionRule {
593 name: "importance_alignment".to_string(),
594 weight: 0.1,
595 evaluator: |_context, file_path| {
596 if let Some(file_info) = _context.available_files.get(file_path) {
597 file_info.importance
598 } else {
599 0.0
600 }
601 },
602 description: "Prefer files with high intrinsic importance scores".to_string(),
603 },
604 SelectionRule {
605 name: "token_efficiency".to_string(),
606 weight: 0.08,
607 evaluator: |context, file_path| {
608 if let Some(file_info) = context.available_files.get(file_path) {
609 let efficiency =
610 file_info.importance / (file_info.token_count as f64 / 1000.0).max(0.1);
611 efficiency.min(1.0)
612 } else {
613 0.0
614 }
615 },
616 description: "Prefer files with high importance-to-token ratio".to_string(),
617 },
618 SelectionRule {
619 name: "gap_filling".to_string(),
620 weight: 0.05,
621 evaluator: |context, file_path| {
622 if let Some(file_info) = context.available_files.get(file_path) {
623 let fills_dependency_gap = file_info
625 .dependents
626 .iter()
627 .any(|dep| context.selected_files.contains(dep));
628
629 let fills_interface_gap = !file_info.exposed_interfaces.is_empty()
630 && file_info.exposed_interfaces.iter().any(|iface| {
631 context.interfaces.get(iface).map_or(false, |impls| {
632 impls.iter().any(|imp| context.selected_files.contains(imp))
633 })
634 });
635
636 if fills_dependency_gap || fills_interface_gap {
637 0.8
638 } else {
639 0.3
640 }
641 } else {
642 0.0
643 }
644 },
645 description: "Prefer files that fill critical coverage gaps".to_string(),
646 },
647 SelectionRule {
648 name: "configuration_completeness".to_string(),
649 weight: 0.02,
650 evaluator: |context, file_path| {
651 if let Some(file_info) = context.available_files.get(file_path) {
652 if file_info.file_type == "config" {
653 if context.selected_source_count > 0 {
655 0.7 } else {
657 0.2
658 }
659 } else {
660 0.5 }
662 } else {
663 0.0
664 }
665 },
666 description: "Include configuration files when relevant source code is selected"
667 .to_string(),
668 },
669 ]
670 }
671}
672
673impl Default for TwoPassSelector {
674 fn default() -> Self {
675 Self::new()
676 }
677}
678
679#[cfg(test)]
680mod tests {
681 use super::*;
682
683 fn create_test_files() -> HashMap<String, FileInfo> {
684 let mut files = HashMap::new();
685
686 files.insert(
687 "src/main.rs".to_string(),
688 FileInfo {
689 path: "src/main.rs".to_string(),
690 token_count: 500,
691 file_type: "source".to_string(),
692 importance: 0.9,
693 dependencies: vec!["src/lib.rs".to_string()],
694 dependents: vec![],
695 exposed_interfaces: vec!["Main".to_string()],
696 consumed_interfaces: vec!["Library".to_string()],
697 },
698 );
699
700 files.insert(
701 "src/lib.rs".to_string(),
702 FileInfo {
703 path: "src/lib.rs".to_string(),
704 token_count: 800,
705 file_type: "source".to_string(),
706 importance: 0.8,
707 dependencies: vec![],
708 dependents: vec!["src/main.rs".to_string()],
709 exposed_interfaces: vec!["Library".to_string()],
710 consumed_interfaces: vec![],
711 },
712 );
713
714 files.insert(
715 "tests/integration_test.rs".to_string(),
716 FileInfo {
717 path: "tests/integration_test.rs".to_string(),
718 token_count: 300,
719 file_type: "test".to_string(),
720 importance: 0.6,
721 dependencies: vec!["src/lib.rs".to_string()],
722 dependents: vec![],
723 exposed_interfaces: vec![],
724 consumed_interfaces: vec!["Library".to_string()],
725 },
726 );
727
728 files.insert(
729 "config/settings.toml".to_string(),
730 FileInfo {
731 path: "config/settings.toml".to_string(),
732 token_count: 100,
733 file_type: "config".to_string(),
734 importance: 0.3,
735 dependencies: vec![],
736 dependents: vec![],
737 exposed_interfaces: vec![],
738 consumed_interfaces: vec![],
739 },
740 );
741
742 files
743 }
744
745 fn create_test_dependencies() -> HashMap<String, Vec<String>> {
746 let mut deps = HashMap::new();
747 deps.insert("src/main.rs".to_string(), vec!["src/lib.rs".to_string()]);
748 deps.insert(
749 "tests/integration_test.rs".to_string(),
750 vec!["src/lib.rs".to_string()],
751 );
752 deps
753 }
754
755 fn create_test_interfaces() -> HashMap<String, Vec<String>> {
756 let mut interfaces = HashMap::new();
757 interfaces.insert("Library".to_string(), vec!["src/lib.rs".to_string()]);
758 interfaces.insert("Main".to_string(), vec!["src/main.rs".to_string()]);
759 interfaces
760 }
761
762 #[test]
763 fn test_two_pass_selector_creation() {
764 let selector = TwoPassSelector::new();
765 assert_eq!(selector.config.speculation_ratio, 0.75);
766 assert_eq!(selector.rules.len(), 8);
767 }
768
769 #[test]
770 fn test_speculative_pass() {
771 let selector = TwoPassSelector::new();
772 let files = create_test_files();
773 let dependencies = create_test_dependencies();
774
775 let result = selector
776 .speculative_pass(&files, &dependencies, 1000)
777 .unwrap();
778
779 assert!(!result.is_empty());
780
781 for file_path in &result {
783 if let Some(file_info) = files.get(file_path) {
784 let confidence = selector.calculate_confidence(file_info, &dependencies);
785 println!(
786 "Selected: {} (importance: {}, confidence: {})",
787 file_path, file_info.importance, confidence
788 );
789 }
790 }
791
792 let has_high_importance_file = result
794 .iter()
795 .any(|f| files.get(f).map_or(false, |info| info.importance >= 0.8));
796 assert!(
797 has_high_importance_file,
798 "Should select at least one high-importance file"
799 );
800 }
801
802 #[test]
803 fn test_full_two_pass_selection() {
804 let selector = TwoPassSelector::new();
805 let files = create_test_files();
806 let dependencies = create_test_dependencies();
807 let interfaces = create_test_interfaces();
808
809 let result = selector
810 .select_files(&files, &dependencies, &interfaces, 1500)
811 .unwrap();
812
813 assert!(!result.speculative_files.is_empty());
814 assert!(result.budget_utilization <= 1.0);
815 assert!(result.selection_score > 0.0);
816 assert!(result.metrics.files_considered > 0);
817 }
818
819 #[test]
820 fn test_coverage_gap_analysis() {
821 let selector = TwoPassSelector::new();
822 let files = create_test_files();
823 let dependencies = create_test_dependencies();
824 let interfaces = create_test_interfaces();
825
826 let mut selected = HashSet::new();
827 selected.insert("src/main.rs".to_string());
828 let gaps = selector
831 .analyze_coverage_gaps(&selected, &files, &dependencies, &interfaces)
832 .unwrap();
833
834 assert!(!gaps.is_empty());
835 assert!(gaps.iter().any(|gap| gap.gap_type == "missing_dependency"));
837 }
838
839 #[test]
840 fn test_rule_evaluation() {
841 let selector = TwoPassSelector::new();
842 let files = create_test_files();
843 let dependencies = create_test_dependencies();
844 let interfaces = create_test_interfaces();
845
846 let mut selected = HashSet::new();
847 selected.insert("src/main.rs".to_string());
848
849 let mut dependents_map: HashMap<String, Vec<String>> = HashMap::new();
851 for (file_path, file_info) in &files {
852 for dep in &file_info.dependencies {
853 dependents_map
854 .entry(dep.clone())
855 .or_default()
856 .push(file_path.clone());
857 }
858 }
859
860 let selected_source_count = selected
862 .iter()
863 .filter(|f| {
864 files
865 .get(*f)
866 .map_or(false, |info| info.file_type == "source")
867 })
868 .count();
869
870 let context = SelectionContext {
871 selected_files: &selected,
872 available_files: &files,
873 dependencies: &dependencies,
874 interfaces: &interfaces,
875 remaining_budget: 1000,
876 dependents_map: &dependents_map,
877 selected_source_count,
878 };
879
880 let dep_rule = &selector.rules[0];
882 let score = (dep_rule.evaluator)(&context, "src/lib.rs");
883 println!("Dependency rule score for src/lib.rs: {}", score);
884
885 assert!(
887 score >= 0.5,
888 "src/lib.rs should score well as it fills a dependency gap (score: {})",
889 score
890 );
891
892 let interface_rule = &selector.rules[1];
894 let interface_score = (interface_rule.evaluator)(&context, "src/lib.rs");
895 println!("Interface rule score for src/lib.rs: {}", interface_score);
896 assert!(
897 interface_score > 0.0,
898 "src/lib.rs should have some interface score"
899 );
900 }
901}