1use crate::ml_features::InferredOwnership;
33use serde::{Deserialize, Serialize};
34
35#[derive(Debug, Clone, Serialize, Deserialize)]
37pub struct ValidationSample {
38 pub ground_truth: InferredOwnership,
40 pub rule_prediction: InferredOwnership,
42 pub ml_prediction: InferredOwnership,
44 pub ml_confidence: f64,
46}
47
48impl ValidationSample {
49 pub fn new(
51 ground_truth: InferredOwnership,
52 rule_prediction: InferredOwnership,
53 ml_prediction: InferredOwnership,
54 ml_confidence: f64,
55 ) -> Self {
56 Self {
57 ground_truth,
58 rule_prediction,
59 ml_prediction,
60 ml_confidence: ml_confidence.clamp(0.0, 1.0),
61 }
62 }
63
64 pub fn rule_correct(&self) -> bool {
66 self.rule_prediction == self.ground_truth
67 }
68
69 pub fn ml_correct(&self) -> bool {
71 self.ml_prediction == self.ground_truth
72 }
73
74 pub fn hybrid_prediction(&self, threshold: f64) -> InferredOwnership {
76 if self.ml_confidence >= threshold {
77 self.ml_prediction
78 } else {
79 self.rule_prediction
80 }
81 }
82
83 pub fn hybrid_correct(&self, threshold: f64) -> bool {
85 self.hybrid_prediction(threshold) == self.ground_truth
86 }
87}
88
89#[derive(Debug, Clone, Serialize, Deserialize)]
91pub struct ThresholdMetrics {
92 pub threshold: f64,
94 pub sample_count: usize,
96 pub accuracy: f64,
98 pub precision: f64,
100 pub recall: f64,
102 pub f1_score: f64,
104 pub fallback_rate: f64,
106 pub ml_usage_rate: f64,
108}
109
110impl ThresholdMetrics {
111 pub fn calculate(samples: &[ValidationSample], threshold: f64) -> Self {
113 if samples.is_empty() {
114 return Self {
115 threshold,
116 sample_count: 0,
117 accuracy: 0.0,
118 precision: 0.0,
119 recall: 0.0,
120 f1_score: 0.0,
121 fallback_rate: 1.0,
122 ml_usage_rate: 0.0,
123 };
124 }
125
126 let sample_count = samples.len();
127 let mut correct = 0;
128 let mut using_ml = 0;
129 let mut using_rules = 0;
130
131 let mut true_positives = 0;
133 let mut false_positives = 0;
134 let mut false_negatives = 0;
135
136 for sample in samples {
137 let prediction = sample.hybrid_prediction(threshold);
138 let is_correct = prediction == sample.ground_truth;
139
140 if is_correct {
141 correct += 1;
142 true_positives += 1;
143 } else {
144 false_positives += 1;
146 false_negatives += 1;
147 }
148
149 if sample.ml_confidence >= threshold {
150 using_ml += 1;
151 } else {
152 using_rules += 1;
153 }
154 }
155
156 let accuracy = correct as f64 / sample_count as f64;
157 let fallback_rate = using_rules as f64 / sample_count as f64;
158 let ml_usage_rate = using_ml as f64 / sample_count as f64;
159
160 let precision = if true_positives + false_positives > 0 {
162 true_positives as f64 / (true_positives + false_positives) as f64
163 } else {
164 0.0
165 };
166
167 let recall = if true_positives + false_negatives > 0 {
168 true_positives as f64 / (true_positives + false_negatives) as f64
169 } else {
170 0.0
171 };
172
173 let f1_score = if precision + recall > 0.0 {
174 2.0 * precision * recall / (precision + recall)
175 } else {
176 0.0
177 };
178
179 Self {
180 threshold,
181 sample_count,
182 accuracy,
183 precision,
184 recall,
185 f1_score,
186 fallback_rate,
187 ml_usage_rate,
188 }
189 }
190}
191
192#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
194pub enum SelectionCriteria {
195 MaxAccuracy,
197 MaxF1,
199 BalancedAccuracyFallback,
202 MinFallbackAboveBaseline,
204}
205
206impl std::fmt::Display for SelectionCriteria {
207 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
208 match self {
209 SelectionCriteria::MaxAccuracy => write!(f, "max-accuracy"),
210 SelectionCriteria::MaxF1 => write!(f, "max-f1"),
211 SelectionCriteria::BalancedAccuracyFallback => write!(f, "balanced"),
212 SelectionCriteria::MinFallbackAboveBaseline => write!(f, "min-fallback"),
213 }
214 }
215}
216
217#[derive(Debug, Clone, Serialize, Deserialize)]
219pub struct TuningResult {
220 pub optimal_threshold: f64,
222 pub criteria: String,
224 pub optimal_metrics: ThresholdMetrics,
226 pub all_thresholds: Vec<ThresholdMetrics>,
228 pub baseline_accuracy: f64,
230 pub ml_only_accuracy: f64,
232 pub improvement_over_baseline: f64,
234}
235
236impl TuningResult {
237 pub fn to_markdown(&self) -> String {
239 let mut threshold_table = String::from(
240 "| Threshold | Accuracy | F1 | Fallback Rate | ML Usage |\n|-----------|----------|----|--------------|---------|\n",
241 );
242
243 for m in &self.all_thresholds {
244 threshold_table.push_str(&format!(
245 "| {:.2} | {:.1}% | {:.3} | {:.1}% | {:.1}% |\n",
246 m.threshold,
247 m.accuracy * 100.0,
248 m.f1_score,
249 m.fallback_rate * 100.0,
250 m.ml_usage_rate * 100.0,
251 ));
252 }
253
254 format!(
255 r#"## Threshold Tuning Report
256
257### Optimal Configuration
258
259| Parameter | Value |
260|-----------|-------|
261| **Optimal Threshold** | {:.2} |
262| **Selection Criteria** | {} |
263| **Accuracy** | {:.1}% |
264| **F1 Score** | {:.3} |
265| **Fallback Rate** | {:.1}% |
266
267### Comparison to Baselines
268
269| Method | Accuracy |
270|--------|----------|
271| Rules Only (baseline) | {:.1}% |
272| ML Only (threshold=0) | {:.1}% |
273| **Hybrid (optimal)** | **{:.1}%** |
274| Improvement | {:+.1}% |
275
276### All Thresholds
277
278{}
279
280### Recommendation
281
282{}
283"#,
284 self.optimal_threshold,
285 self.criteria,
286 self.optimal_metrics.accuracy * 100.0,
287 self.optimal_metrics.f1_score,
288 self.optimal_metrics.fallback_rate * 100.0,
289 self.baseline_accuracy * 100.0,
290 self.ml_only_accuracy * 100.0,
291 self.optimal_metrics.accuracy * 100.0,
292 self.improvement_over_baseline * 100.0,
293 threshold_table,
294 if self.improvement_over_baseline > 0.0 {
295 format!(
296 "✅ **ADOPT HYBRID**: {:.1}% accuracy improvement at threshold {:.2}",
297 self.improvement_over_baseline * 100.0,
298 self.optimal_threshold
299 )
300 } else {
301 "❌ **KEEP RULES ONLY**: No improvement from ML enhancement".to_string()
302 }
303 )
304 }
305}
306
307#[derive(Debug, Clone)]
309pub struct ThresholdTuner {
310 candidates: Vec<f64>,
312 criteria: SelectionCriteria,
314}
315
316impl Default for ThresholdTuner {
317 fn default() -> Self {
318 Self::new()
319 }
320}
321
322impl ThresholdTuner {
323 pub fn new() -> Self {
325 Self {
326 candidates: vec![0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.65, 0.7, 0.8, 0.9],
327 criteria: SelectionCriteria::MaxAccuracy,
328 }
329 }
330
331 pub fn with_candidates(candidates: Vec<f64>) -> Self {
333 Self {
334 candidates: candidates.into_iter().map(|t| t.clamp(0.0, 1.0)).collect(),
335 criteria: SelectionCriteria::MaxAccuracy,
336 }
337 }
338
339 pub fn with_criteria(mut self, criteria: SelectionCriteria) -> Self {
341 self.criteria = criteria;
342 self
343 }
344
345 pub fn add_candidate(&mut self, threshold: f64) {
347 let t = threshold.clamp(0.0, 1.0);
348 if !self.candidates.contains(&t) {
349 self.candidates.push(t);
350 self.candidates.sort_by(|a, b| a.partial_cmp(b).unwrap());
351 }
352 }
353
354 pub fn tune(&self, samples: &[ValidationSample]) -> TuningResult {
356 if samples.is_empty() {
357 return TuningResult {
358 optimal_threshold: 0.65,
359 criteria: self.criteria.to_string(),
360 optimal_metrics: ThresholdMetrics::calculate(&[], 0.65),
361 all_thresholds: vec![],
362 baseline_accuracy: 0.0,
363 ml_only_accuracy: 0.0,
364 improvement_over_baseline: 0.0,
365 };
366 }
367
368 let baseline_correct = samples.iter().filter(|s| s.rule_correct()).count();
370 let baseline_accuracy = baseline_correct as f64 / samples.len() as f64;
371
372 let ml_only_correct = samples.iter().filter(|s| s.ml_correct()).count();
374 let ml_only_accuracy = ml_only_correct as f64 / samples.len() as f64;
375
376 let all_thresholds: Vec<ThresholdMetrics> = self
378 .candidates
379 .iter()
380 .map(|&t| ThresholdMetrics::calculate(samples, t))
381 .collect();
382
383 let optimal = self.select_optimal(&all_thresholds, baseline_accuracy);
385
386 let improvement = optimal.accuracy - baseline_accuracy;
387
388 TuningResult {
389 optimal_threshold: optimal.threshold,
390 criteria: self.criteria.to_string(),
391 optimal_metrics: optimal.clone(),
392 all_thresholds,
393 baseline_accuracy,
394 ml_only_accuracy,
395 improvement_over_baseline: improvement,
396 }
397 }
398
399 fn select_optimal(
400 &self,
401 metrics: &[ThresholdMetrics],
402 baseline_accuracy: f64,
403 ) -> ThresholdMetrics {
404 if metrics.is_empty() {
405 return ThresholdMetrics {
406 threshold: 0.65,
407 sample_count: 0,
408 accuracy: 0.0,
409 precision: 0.0,
410 recall: 0.0,
411 f1_score: 0.0,
412 fallback_rate: 1.0,
413 ml_usage_rate: 0.0,
414 };
415 }
416
417 match self.criteria {
418 SelectionCriteria::MaxAccuracy => metrics
419 .iter()
420 .max_by(|a, b| a.accuracy.partial_cmp(&b.accuracy).unwrap())
421 .cloned()
422 .unwrap(),
423
424 SelectionCriteria::MaxF1 => metrics
425 .iter()
426 .max_by(|a, b| a.f1_score.partial_cmp(&b.f1_score).unwrap())
427 .cloned()
428 .unwrap(),
429
430 SelectionCriteria::BalancedAccuracyFallback => {
431 metrics
433 .iter()
434 .max_by(|a, b| {
435 let score_a = 0.7 * a.accuracy + 0.3 * a.ml_usage_rate;
436 let score_b = 0.7 * b.accuracy + 0.3 * b.ml_usage_rate;
437 score_a.partial_cmp(&score_b).unwrap()
438 })
439 .cloned()
440 .unwrap()
441 }
442
443 SelectionCriteria::MinFallbackAboveBaseline => {
444 let above_baseline: Vec<_> = metrics
446 .iter()
447 .filter(|m| m.accuracy >= baseline_accuracy)
448 .collect();
449
450 if above_baseline.is_empty() {
451 metrics
453 .iter()
454 .max_by(|a, b| a.accuracy.partial_cmp(&b.accuracy).unwrap())
455 .cloned()
456 .unwrap()
457 } else {
458 above_baseline
460 .into_iter()
461 .min_by(|a, b| a.fallback_rate.partial_cmp(&b.fallback_rate).unwrap())
462 .cloned()
463 .unwrap()
464 }
465 }
466 }
467 }
468}
469
470pub fn find_optimal_threshold(samples: &[ValidationSample]) -> f64 {
472 ThresholdTuner::new().tune(samples).optimal_threshold
473}
474
475#[cfg(test)]
476mod tests {
477 use super::*;
478
479 #[test]
484 fn validation_sample_new() {
485 let sample = ValidationSample::new(
486 InferredOwnership::Owned,
487 InferredOwnership::Owned,
488 InferredOwnership::Borrowed,
489 0.8,
490 );
491
492 assert_eq!(sample.ground_truth, InferredOwnership::Owned);
493 assert!(sample.rule_correct());
494 assert!(!sample.ml_correct());
495 }
496
497 #[test]
498 fn validation_sample_clamps_confidence() {
499 let sample = ValidationSample::new(
500 InferredOwnership::Owned,
501 InferredOwnership::Owned,
502 InferredOwnership::Owned,
503 1.5, );
505
506 assert!((sample.ml_confidence - 1.0).abs() < 0.001);
507
508 let sample2 = ValidationSample::new(
509 InferredOwnership::Owned,
510 InferredOwnership::Owned,
511 InferredOwnership::Owned,
512 -0.5, );
514
515 assert!((sample2.ml_confidence - 0.0).abs() < 0.001);
516 }
517
518 #[test]
519 fn validation_sample_hybrid_prediction() {
520 let sample = ValidationSample::new(
521 InferredOwnership::Owned,
522 InferredOwnership::Borrowed, InferredOwnership::Owned, 0.7, );
526
527 assert_eq!(sample.hybrid_prediction(0.5), InferredOwnership::Owned);
529
530 assert_eq!(sample.hybrid_prediction(0.8), InferredOwnership::Borrowed);
532 }
533
534 #[test]
535 fn validation_sample_hybrid_correct() {
536 let sample = ValidationSample::new(
537 InferredOwnership::Owned, InferredOwnership::Borrowed, InferredOwnership::Owned, 0.7,
541 );
542
543 assert!(sample.hybrid_correct(0.5));
545
546 assert!(!sample.hybrid_correct(0.8));
548 }
549
550 #[test]
555 fn threshold_metrics_empty() {
556 let metrics = ThresholdMetrics::calculate(&[], 0.5);
557 assert_eq!(metrics.sample_count, 0);
558 assert_eq!(metrics.accuracy, 0.0);
559 }
560
561 #[test]
562 fn threshold_metrics_all_correct() {
563 let samples = vec![
564 ValidationSample::new(
565 InferredOwnership::Owned,
566 InferredOwnership::Owned,
567 InferredOwnership::Owned,
568 0.9,
569 ),
570 ValidationSample::new(
571 InferredOwnership::Borrowed,
572 InferredOwnership::Borrowed,
573 InferredOwnership::Borrowed,
574 0.8,
575 ),
576 ];
577
578 let metrics = ThresholdMetrics::calculate(&samples, 0.5);
579 assert_eq!(metrics.sample_count, 2);
580 assert!((metrics.accuracy - 1.0).abs() < 0.001);
581 }
582
583 #[test]
584 fn threshold_metrics_fallback_rate() {
585 let samples = vec![
586 ValidationSample::new(
587 InferredOwnership::Owned,
588 InferredOwnership::Owned,
589 InferredOwnership::Owned,
590 0.9, ),
592 ValidationSample::new(
593 InferredOwnership::Borrowed,
594 InferredOwnership::Borrowed,
595 InferredOwnership::Borrowed,
596 0.3, ),
598 ];
599
600 let metrics = ThresholdMetrics::calculate(&samples, 0.5);
601 assert!((metrics.fallback_rate - 0.5).abs() < 0.001); assert!((metrics.ml_usage_rate - 0.5).abs() < 0.001); }
604
605 #[test]
610 fn threshold_tuner_default() {
611 let tuner = ThresholdTuner::new();
612 assert_eq!(tuner.candidates.len(), 10);
613 assert!(tuner.candidates.contains(&0.65));
614 }
615
616 #[test]
617 fn threshold_tuner_add_candidate() {
618 let mut tuner = ThresholdTuner::new();
619 tuner.add_candidate(0.55);
620 assert!(tuner.candidates.contains(&0.55));
621 }
622
623 #[test]
624 fn threshold_tuner_tune_empty() {
625 let tuner = ThresholdTuner::new();
626 let result = tuner.tune(&[]);
627 assert!((result.optimal_threshold - 0.65).abs() < 0.001);
628 }
629
630 #[test]
631 fn threshold_tuner_finds_optimal() {
632 let samples = vec![
634 ValidationSample::new(
636 InferredOwnership::Owned,
637 InferredOwnership::Borrowed, InferredOwnership::Owned, 0.9,
640 ),
641 ValidationSample::new(
642 InferredOwnership::Borrowed,
643 InferredOwnership::Owned, InferredOwnership::Borrowed, 0.85,
646 ),
647 ValidationSample::new(
649 InferredOwnership::Owned,
650 InferredOwnership::Owned, InferredOwnership::Borrowed, 0.4,
653 ),
654 ValidationSample::new(
655 InferredOwnership::Borrowed,
656 InferredOwnership::Borrowed, InferredOwnership::Owned, 0.3,
659 ),
660 ];
661
662 let tuner = ThresholdTuner::new().with_criteria(SelectionCriteria::MaxAccuracy);
663 let result = tuner.tune(&samples);
664
665 assert!(result.optimal_threshold >= 0.5);
667 assert!(result.optimal_metrics.accuracy > 0.5);
668 }
669
670 #[test]
671 fn threshold_tuner_selection_criteria() {
672 let samples = vec![
673 ValidationSample::new(
674 InferredOwnership::Owned,
675 InferredOwnership::Owned,
676 InferredOwnership::Owned,
677 0.9,
678 ),
679 ValidationSample::new(
680 InferredOwnership::Borrowed,
681 InferredOwnership::Borrowed,
682 InferredOwnership::Borrowed,
683 0.8,
684 ),
685 ];
686
687 let max_acc = ThresholdTuner::new()
688 .with_criteria(SelectionCriteria::MaxAccuracy)
689 .tune(&samples);
690
691 let max_f1 = ThresholdTuner::new()
692 .with_criteria(SelectionCriteria::MaxF1)
693 .tune(&samples);
694
695 assert!((max_acc.optimal_metrics.accuracy - 1.0).abs() < 0.001);
697 assert!((max_f1.optimal_metrics.accuracy - 1.0).abs() < 0.001);
698 }
699
700 #[test]
701 fn threshold_tuner_balanced_criteria() {
702 let mut samples = Vec::new();
704
705 for _ in 0..80 {
707 samples.push(ValidationSample::new(
708 InferredOwnership::Owned,
709 InferredOwnership::Borrowed, InferredOwnership::Owned, 0.9,
712 ));
713 }
714
715 for _ in 0..20 {
717 samples.push(ValidationSample::new(
718 InferredOwnership::Borrowed,
719 InferredOwnership::Borrowed, InferredOwnership::Owned, 0.3,
722 ));
723 }
724
725 let balanced = ThresholdTuner::new()
726 .with_criteria(SelectionCriteria::BalancedAccuracyFallback)
727 .tune(&samples);
728
729 assert!(balanced.optimal_metrics.accuracy > 0.7);
731 }
732
733 #[test]
738 fn tuning_result_to_markdown() {
739 let samples = vec![ValidationSample::new(
740 InferredOwnership::Owned,
741 InferredOwnership::Owned,
742 InferredOwnership::Owned,
743 0.9,
744 )];
745
746 let result = ThresholdTuner::new().tune(&samples);
747 let md = result.to_markdown();
748
749 assert!(md.contains("Threshold Tuning Report"));
750 assert!(md.contains("Optimal Threshold"));
751 assert!(md.contains("Accuracy"));
752 }
753
754 #[test]
755 fn tuning_result_improvement() {
756 let samples = vec![
758 ValidationSample::new(
759 InferredOwnership::Owned,
760 InferredOwnership::Borrowed, InferredOwnership::Owned, 0.9,
763 ),
764 ValidationSample::new(
765 InferredOwnership::Borrowed,
766 InferredOwnership::Borrowed, InferredOwnership::Borrowed, 0.8,
769 ),
770 ];
771
772 let result = ThresholdTuner::new().tune(&samples);
773
774 assert!((result.baseline_accuracy - 0.5).abs() < 0.001);
776 assert!((result.optimal_metrics.accuracy - 1.0).abs() < 0.001);
777 assert!(result.improvement_over_baseline > 0.0);
778 }
779
780 #[test]
785 fn find_optimal_threshold_function() {
786 let samples = vec![ValidationSample::new(
787 InferredOwnership::Owned,
788 InferredOwnership::Owned,
789 InferredOwnership::Owned,
790 0.9,
791 )];
792
793 let threshold = find_optimal_threshold(&samples);
794 assert!((0.0..=1.0).contains(&threshold));
795 }
796
797 #[test]
802 fn selection_criteria_display() {
803 assert_eq!(SelectionCriteria::MaxAccuracy.to_string(), "max-accuracy");
804 assert_eq!(SelectionCriteria::MaxF1.to_string(), "max-f1");
805 assert_eq!(
806 SelectionCriteria::BalancedAccuracyFallback.to_string(),
807 "balanced"
808 );
809 assert_eq!(
810 SelectionCriteria::MinFallbackAboveBaseline.to_string(),
811 "min-fallback"
812 );
813 }
814
815 #[test]
820 fn threshold_metrics_calculate_mixed_correct_incorrect() {
821 let samples = vec![
823 ValidationSample::new(
824 InferredOwnership::Owned,
825 InferredOwnership::Owned, InferredOwnership::Owned, 0.9,
828 ),
829 ValidationSample::new(
830 InferredOwnership::Borrowed,
831 InferredOwnership::Borrowed, InferredOwnership::Borrowed, 0.8,
834 ),
835 ValidationSample::new(
836 InferredOwnership::Owned,
837 InferredOwnership::Borrowed, InferredOwnership::Borrowed, 0.7,
840 ),
841 ValidationSample::new(
842 InferredOwnership::Borrowed,
843 InferredOwnership::Owned, InferredOwnership::Owned, 0.3,
846 ),
847 ];
848
849 let metrics = ThresholdMetrics::calculate(&samples, 0.5);
850 assert_eq!(metrics.sample_count, 4);
851 assert!((metrics.accuracy - 0.5).abs() < 0.001);
852 assert!((metrics.precision - 0.5).abs() < 0.001);
854 assert!((metrics.recall - 0.5).abs() < 0.001);
855 assert!((metrics.f1_score - 0.5).abs() < 0.001);
857 }
858
859 #[test]
860 fn threshold_metrics_calculate_all_incorrect() {
861 let samples = vec![
862 ValidationSample::new(
863 InferredOwnership::Owned,
864 InferredOwnership::Borrowed, InferredOwnership::Borrowed, 0.9,
867 ),
868 ValidationSample::new(
869 InferredOwnership::Borrowed,
870 InferredOwnership::Owned, InferredOwnership::Owned, 0.8,
873 ),
874 ];
875
876 let metrics = ThresholdMetrics::calculate(&samples, 0.5);
877 assert_eq!(metrics.sample_count, 2);
878 assert!((metrics.accuracy - 0.0).abs() < 0.001);
879 assert!((metrics.precision - 0.0).abs() < 0.001);
881 assert!((metrics.recall - 0.0).abs() < 0.001);
882 assert!((metrics.f1_score - 0.0).abs() < 0.001);
883 }
884
885 #[test]
886 fn threshold_metrics_calculate_high_threshold_all_fallback() {
887 let samples = vec![
889 ValidationSample::new(
890 InferredOwnership::Owned,
891 InferredOwnership::Owned, InferredOwnership::Borrowed, 0.5,
894 ),
895 ValidationSample::new(
896 InferredOwnership::Borrowed,
897 InferredOwnership::Borrowed, InferredOwnership::Owned, 0.4,
900 ),
901 ];
902
903 let metrics = ThresholdMetrics::calculate(&samples, 0.99);
904 assert!((metrics.accuracy - 1.0).abs() < 0.001);
906 assert!((metrics.fallback_rate - 1.0).abs() < 0.001);
907 assert!((metrics.ml_usage_rate - 0.0).abs() < 0.001);
908 }
909
910 #[test]
911 fn threshold_metrics_calculate_low_threshold_all_ml() {
912 let samples = vec![
914 ValidationSample::new(
915 InferredOwnership::Owned,
916 InferredOwnership::Borrowed, InferredOwnership::Owned, 0.2,
919 ),
920 ValidationSample::new(
921 InferredOwnership::Borrowed,
922 InferredOwnership::Owned, InferredOwnership::Borrowed, 0.15,
925 ),
926 ];
927
928 let metrics = ThresholdMetrics::calculate(&samples, 0.1);
929 assert!((metrics.accuracy - 1.0).abs() < 0.001);
931 assert!((metrics.fallback_rate - 0.0).abs() < 0.001);
932 assert!((metrics.ml_usage_rate - 1.0).abs() < 0.001);
933 }
934
935 #[test]
936 fn threshold_metrics_precision_recall_edge_zero_tp() {
937 let samples = vec![
939 ValidationSample::new(
940 InferredOwnership::Owned,
941 InferredOwnership::Borrowed,
942 InferredOwnership::Borrowed,
943 0.9,
944 ),
945 ];
946
947 let metrics = ThresholdMetrics::calculate(&samples, 0.5);
948 assert!((metrics.precision - 0.0).abs() < 0.001);
950 assert!((metrics.recall - 0.0).abs() < 0.001);
951 assert!((metrics.f1_score - 0.0).abs() < 0.001);
952 }
953
954 #[test]
959 fn select_optimal_empty_metrics() {
960 let tuner = ThresholdTuner::with_candidates(vec![]);
961 let result = tuner.tune(&[
962 ValidationSample::new(
963 InferredOwnership::Owned,
964 InferredOwnership::Owned,
965 InferredOwnership::Owned,
966 0.9,
967 ),
968 ]);
969 assert!((result.optimal_threshold - 0.65).abs() < 0.001);
971 }
972
973 #[test]
974 fn select_optimal_max_f1_selects_highest_f1() {
975 let samples = vec![
977 ValidationSample::new(
978 InferredOwnership::Owned,
979 InferredOwnership::Borrowed,
980 InferredOwnership::Owned,
981 0.9,
982 ),
983 ValidationSample::new(
984 InferredOwnership::Borrowed,
985 InferredOwnership::Borrowed,
986 InferredOwnership::Owned,
987 0.3,
988 ),
989 ];
990
991 let tuner = ThresholdTuner::new().with_criteria(SelectionCriteria::MaxF1);
992 let result = tuner.tune(&samples);
993
994 assert!(result.optimal_metrics.f1_score >= 0.0);
996 assert!(result.optimal_metrics.f1_score <= 1.0);
997 }
998
999 #[test]
1000 fn select_optimal_balanced_accuracy_fallback() {
1001 let mut samples = Vec::new();
1003 for _ in 0..100 {
1004 samples.push(ValidationSample::new(
1005 InferredOwnership::Owned,
1006 InferredOwnership::Borrowed,
1007 InferredOwnership::Owned,
1008 0.85,
1009 ));
1010 }
1011 for _ in 0..50 {
1012 samples.push(ValidationSample::new(
1013 InferredOwnership::Borrowed,
1014 InferredOwnership::Borrowed,
1015 InferredOwnership::Owned,
1016 0.3,
1017 ));
1018 }
1019
1020 let tuner = ThresholdTuner::new()
1021 .with_criteria(SelectionCriteria::BalancedAccuracyFallback);
1022 let result = tuner.tune(&samples);
1023
1024 assert!(result.optimal_metrics.accuracy > 0.0);
1026 assert!(result.optimal_threshold >= 0.1 && result.optimal_threshold <= 0.9);
1027 }
1028
1029 #[test]
1030 fn select_optimal_min_fallback_above_baseline_found() {
1031 let mut samples = Vec::new();
1033 for _ in 0..80 {
1035 samples.push(ValidationSample::new(
1036 InferredOwnership::Owned,
1037 InferredOwnership::Borrowed,
1038 InferredOwnership::Owned,
1039 0.9,
1040 ));
1041 }
1042 for _ in 0..20 {
1044 samples.push(ValidationSample::new(
1045 InferredOwnership::Borrowed,
1046 InferredOwnership::Borrowed,
1047 InferredOwnership::Owned,
1048 0.3,
1049 ));
1050 }
1051
1052 let tuner = ThresholdTuner::new()
1053 .with_criteria(SelectionCriteria::MinFallbackAboveBaseline);
1054 let result = tuner.tune(&samples);
1055
1056 assert!(result.optimal_metrics.accuracy >= result.baseline_accuracy);
1060 assert!(result.optimal_metrics.fallback_rate < 1.0);
1062 }
1063
1064 #[test]
1065 fn select_optimal_min_fallback_no_above_baseline() {
1066 let samples = vec![
1069 ValidationSample::new(
1070 InferredOwnership::Owned,
1071 InferredOwnership::Owned,
1072 InferredOwnership::Borrowed,
1073 0.9,
1074 ),
1075 ValidationSample::new(
1076 InferredOwnership::Borrowed,
1077 InferredOwnership::Borrowed,
1078 InferredOwnership::Owned,
1079 0.8,
1080 ),
1081 ];
1082
1083 let tuner = ThresholdTuner::new()
1084 .with_criteria(SelectionCriteria::MinFallbackAboveBaseline);
1085 let result = tuner.tune(&samples);
1086
1087 assert!(result.optimal_threshold >= 0.1);
1092 }
1093
1094 #[test]
1095 fn select_optimal_min_fallback_all_below_baseline_fallback_to_max_accuracy() {
1096 let samples = vec![
1101 ValidationSample::new(
1103 InferredOwnership::Owned,
1104 InferredOwnership::Owned,
1105 InferredOwnership::Borrowed,
1106 0.99, ),
1108 ValidationSample::new(
1109 InferredOwnership::Borrowed,
1110 InferredOwnership::Borrowed,
1111 InferredOwnership::Owned,
1112 0.99, ),
1114 ValidationSample::new(
1116 InferredOwnership::Owned,
1117 InferredOwnership::Owned,
1118 InferredOwnership::Borrowed,
1119 0.5,
1120 ),
1121 ValidationSample::new(
1122 InferredOwnership::Borrowed,
1123 InferredOwnership::Borrowed,
1124 InferredOwnership::Owned,
1125 0.5,
1126 ),
1127 ];
1128
1129 let tuner = ThresholdTuner::with_candidates(vec![0.1, 0.3, 0.5, 0.7, 0.95])
1130 .with_criteria(SelectionCriteria::MinFallbackAboveBaseline);
1131 let result = tuner.tune(&samples);
1132
1133 assert!(result.optimal_threshold > 0.0);
1140 }
1141
1142 #[test]
1147 fn threshold_tuner_with_candidates_clamps() {
1148 let tuner = ThresholdTuner::with_candidates(vec![-0.5, 1.5, 0.5]);
1149 let result = tuner.tune(&[
1151 ValidationSample::new(
1152 InferredOwnership::Owned,
1153 InferredOwnership::Owned,
1154 InferredOwnership::Owned,
1155 0.9,
1156 ),
1157 ]);
1158 assert!(result.all_thresholds.len() == 3);
1159 }
1160
1161 #[test]
1162 fn threshold_tuner_add_candidate_no_duplicate() {
1163 let mut tuner = ThresholdTuner::new();
1164 let original_len = tuner.candidates.len();
1165 tuner.add_candidate(0.65); assert_eq!(tuner.candidates.len(), original_len);
1167 }
1168
1169 #[test]
1170 fn threshold_tuner_add_candidate_sorted() {
1171 let mut tuner = ThresholdTuner::with_candidates(vec![0.3, 0.7]);
1172 tuner.add_candidate(0.5);
1173 assert_eq!(tuner.candidates, vec![0.3, 0.5, 0.7]);
1174 }
1175
1176 #[test]
1181 fn tuning_result_to_markdown_no_improvement() {
1182 let samples = vec![
1184 ValidationSample::new(
1185 InferredOwnership::Owned,
1186 InferredOwnership::Owned, InferredOwnership::Borrowed, 0.9,
1189 ),
1190 ValidationSample::new(
1191 InferredOwnership::Borrowed,
1192 InferredOwnership::Borrowed, InferredOwnership::Owned, 0.8,
1195 ),
1196 ];
1197
1198 let result = ThresholdTuner::new().tune(&samples);
1199 let md = result.to_markdown();
1200 assert!(md.contains("Threshold Tuning Report"));
1204 assert!(md.contains("Comparison to Baselines"));
1205 assert!(md.contains("All Thresholds"));
1206 assert!(md.contains("Recommendation"));
1207 }
1208
1209 #[test]
1210 fn tuning_result_to_markdown_with_improvement() {
1211 let samples = vec![
1213 ValidationSample::new(
1214 InferredOwnership::Owned,
1215 InferredOwnership::Borrowed, InferredOwnership::Owned, 0.9,
1218 ),
1219 ValidationSample::new(
1220 InferredOwnership::Borrowed,
1221 InferredOwnership::Borrowed, InferredOwnership::Borrowed, 0.8,
1224 ),
1225 ];
1226
1227 let result = ThresholdTuner::new().tune(&samples);
1228 let md = result.to_markdown();
1229 assert!(md.contains("ADOPT HYBRID"));
1230 }
1231
1232 #[test]
1233 fn tuning_result_to_markdown_threshold_table_rows() {
1234 let samples = vec![
1235 ValidationSample::new(
1236 InferredOwnership::Owned,
1237 InferredOwnership::Owned,
1238 InferredOwnership::Owned,
1239 0.9,
1240 ),
1241 ];
1242
1243 let result = ThresholdTuner::new().tune(&samples);
1244 let md = result.to_markdown();
1245
1246 let table_rows = md.matches("| 0.").count();
1248 assert!(table_rows >= 10, "Should have at least 10 threshold rows, got {}", table_rows);
1249 }
1250
1251 #[test]
1256 fn find_optimal_threshold_empty_samples() {
1257 let threshold = find_optimal_threshold(&[]);
1258 assert!((threshold - 0.65).abs() < 0.001);
1259 }
1260
1261 #[test]
1262 fn find_optimal_threshold_uses_max_accuracy() {
1263 let samples = vec![
1265 ValidationSample::new(
1266 InferredOwnership::Owned,
1267 InferredOwnership::Borrowed,
1268 InferredOwnership::Owned,
1269 0.9,
1270 ),
1271 ValidationSample::new(
1272 InferredOwnership::Borrowed,
1273 InferredOwnership::Borrowed,
1274 InferredOwnership::Owned,
1275 0.3,
1276 ),
1277 ];
1278
1279 let threshold = find_optimal_threshold(&samples);
1280 assert!(threshold >= 0.1 && threshold <= 0.9);
1282 }
1283}