1use serde::{Deserialize, Serialize};
25
26use crate::generator::GeneratedCode;
27use crate::ml::{ActiveLearner, CommitFeatures, DefectPredictor, QualityGate, RichLabel};
28
29#[derive(Debug, Clone, Serialize, Deserialize)]
31pub struct PipelineConfig {
32 pub quality_threshold: f32,
34 pub num_clusters: usize,
36 pub batch_size: usize,
38 pub max_oracle_calls: usize,
40 pub target_reduction: f32,
42 pub enable_active_learning: bool,
44 pub enable_defect_priority: bool,
46}
47
48impl Default for PipelineConfig {
49 fn default() -> Self {
50 Self {
51 quality_threshold: 0.5,
52 num_clusters: 5,
53 batch_size: 100,
54 max_oracle_calls: 1000,
55 target_reduction: 10.0,
56 enable_active_learning: true,
57 enable_defect_priority: true,
58 }
59 }
60}
61
62impl PipelineConfig {
63 #[must_use]
65 pub fn strict() -> Self {
66 Self {
67 quality_threshold: 0.7,
68 ..Default::default()
69 }
70 }
71
72 #[must_use]
74 pub fn fast() -> Self {
75 Self {
76 quality_threshold: 0.3,
77 enable_active_learning: false,
78 enable_defect_priority: false,
79 ..Default::default()
80 }
81 }
82
83 #[must_use]
85 pub fn validate(&self) -> Vec<String> {
86 let mut errors = Vec::new();
87
88 if self.quality_threshold < 0.0 || self.quality_threshold > 1.0 {
89 errors.push("quality_threshold must be in [0.0, 1.0]".to_string());
90 }
91
92 if self.num_clusters == 0 {
93 errors.push("num_clusters must be > 0".to_string());
94 }
95
96 if self.batch_size == 0 {
97 errors.push("batch_size must be > 0".to_string());
98 }
99
100 if self.target_reduction <= 0.0 {
101 errors.push("target_reduction must be > 0".to_string());
102 }
103
104 errors
105 }
106}
107
108#[derive(Debug, Clone, Default, Serialize, Deserialize)]
110pub struct DataQualityMetrics {
111 pub novelty: f32,
113 pub diversity: f32,
115 pub difficulty: f32,
117 pub coverage: f32,
119 pub bug_rate: f32,
121}
122
123impl DataQualityMetrics {
124 #[must_use]
126 pub fn overall(&self) -> f32 {
127 let weights = [0.2, 0.25, 0.2, 0.2, 0.15]; let values = [
129 self.novelty,
130 self.diversity,
131 self.difficulty,
132 self.coverage,
133 self.bug_rate,
134 ];
135
136 let weighted_sum: f32 = values.iter().zip(&weights).map(|(v, w)| v * w).sum();
137 let total_weight: f32 = weights.iter().sum();
138
139 weighted_sum / total_weight
140 }
141
142 #[must_use]
144 pub fn meets_targets(&self) -> bool {
145 self.diversity >= 0.6 && self.bug_rate >= 0.15 && self.coverage >= 0.7
146 }
147}
148
149#[derive(Debug, Clone)]
151pub struct StageResult {
152 pub stage: String,
154 pub input_count: usize,
156 pub output_count: usize,
158 pub time_ms: u64,
160}
161
162impl StageResult {
163 #[must_use]
165 pub fn reduction_factor(&self) -> f32 {
166 if self.output_count == 0 {
167 f32::INFINITY
168 } else {
169 self.input_count as f32 / self.output_count as f32
170 }
171 }
172
173 #[must_use]
175 pub fn pass_rate(&self) -> f32 {
176 if self.input_count == 0 {
177 0.0
178 } else {
179 self.output_count as f32 / self.input_count as f32
180 }
181 }
182}
183
184#[derive(Debug, Clone, Default)]
186pub struct PipelineResult {
187 pub stages: Vec<StageResult>,
189 pub labels: Vec<RichLabel>,
191 pub quality: DataQualityMetrics,
193 pub total_generated: usize,
195 pub oracle_calls: usize,
197 pub oracle_reduction: f32,
199}
200
201impl PipelineResult {
202 #[must_use]
204 pub fn stage(&self, name: &str) -> Option<&StageResult> {
205 self.stages.iter().find(|s| s.stage == name)
206 }
207
208 #[must_use]
210 pub fn total_time_ms(&self) -> u64 {
211 self.stages.iter().map(|s| s.time_ms).sum()
212 }
213
214 #[must_use]
216 pub fn met_oracle_target(&self, target: f32) -> bool {
217 self.oracle_reduction >= target
218 }
219}
220
221#[derive(Debug, Clone)]
223pub struct PreparedSample {
224 pub code: GeneratedCode,
226 pub quality_score: f32,
228 pub defect_probability: f32,
230 pub cluster: Option<usize>,
232 pub priority: usize,
234}
235
236#[derive(Debug)]
238pub struct CodexPipeline {
239 config: PipelineConfig,
241 quality_gate: QualityGate,
243 defect_predictor: DefectPredictor,
245 active_learner: ActiveLearner,
247 stats: PipelineStats,
249}
250
251#[derive(Debug, Clone, Default)]
253pub struct PipelineStats {
254 pub runs: usize,
256 pub samples_processed: usize,
258 pub oracle_calls: usize,
260 pub bugs_found: usize,
262 pub avg_oracle_reduction: f32,
264}
265
266impl Default for CodexPipeline {
267 fn default() -> Self {
268 Self::new(PipelineConfig::default())
269 }
270}
271
272impl CodexPipeline {
273 #[must_use]
275 pub fn new(config: PipelineConfig) -> Self {
276 Self {
277 quality_gate: QualityGate::new(config.quality_threshold),
278 defect_predictor: DefectPredictor::new(),
279 active_learner: ActiveLearner::new(config.num_clusters),
280 config,
281 stats: PipelineStats::default(),
282 }
283 }
284
285 #[must_use]
287 pub fn config(&self) -> &PipelineConfig {
288 &self.config
289 }
290
291 #[must_use]
293 pub fn stats(&self) -> &PipelineStats {
294 &self.stats
295 }
296
297 pub fn filter_quality<'a>(
299 &mut self,
300 codes: &'a [GeneratedCode],
301 ) -> (Vec<&'a GeneratedCode>, StageResult) {
302 let start = std::time::Instant::now();
303 let input_count = codes.len();
304
305 let passed = self.quality_gate.filter_batch(codes);
306
307 let result = StageResult {
308 stage: "quality_gate".to_string(),
309 input_count,
310 output_count: passed.len(),
311 time_ms: start.elapsed().as_millis() as u64,
312 };
313
314 (passed, result)
315 }
316
317 pub fn prioritize_defects<'a>(
319 &self,
320 codes: &'a [&GeneratedCode],
321 ) -> (Vec<&'a GeneratedCode>, StageResult) {
322 let start = std::time::Instant::now();
323 let input_count = codes.len();
324
325 if !self.config.enable_defect_priority {
326 return (
327 codes.to_vec(),
328 StageResult {
329 stage: "defect_priority".to_string(),
330 input_count,
331 output_count: input_count,
332 time_ms: start.elapsed().as_millis() as u64,
333 },
334 );
335 }
336
337 let pairs: Vec<(CommitFeatures, String)> = codes
339 .iter()
340 .map(|c| (CommitFeatures::default(), c.code.clone()))
341 .collect();
342
343 let order = self.defect_predictor.prioritize(&pairs);
344
345 let output_count = order.len().min(self.config.batch_size);
347 let prioritized: Vec<&GeneratedCode> = order
348 .iter()
349 .take(output_count)
350 .filter_map(|&i| codes.get(i).copied())
351 .collect();
352
353 let result = StageResult {
354 stage: "defect_priority".to_string(),
355 input_count,
356 output_count: prioritized.len(),
357 time_ms: start.elapsed().as_millis() as u64,
358 };
359
360 (prioritized, result)
361 }
362
363 pub fn sample_active<'a>(
365 &mut self,
366 codes: &'a [&GeneratedCode],
367 ) -> (Vec<&'a GeneratedCode>, StageResult) {
368 let start = std::time::Instant::now();
369 let input_count = codes.len();
370
371 if !self.config.enable_active_learning || codes.is_empty() {
372 return (
373 codes.to_vec(),
374 StageResult {
375 stage: "active_learning".to_string(),
376 input_count,
377 output_count: input_count,
378 time_ms: start.elapsed().as_millis() as u64,
379 },
380 );
381 }
382
383 let code_strings: Vec<&str> = codes.iter().map(|c| c.code.as_str()).collect();
385 self.active_learner.fit(&code_strings);
386
387 let batch_size = self.config.batch_size.min(codes.len());
389 let selected_indices = self.active_learner.select_batch(&code_strings, batch_size);
390
391 let selected: Vec<&GeneratedCode> = selected_indices
392 .iter()
393 .filter_map(|&i| codes.get(i).copied())
394 .collect();
395
396 let result = StageResult {
397 stage: "active_learning".to_string(),
398 input_count,
399 output_count: selected.len(),
400 time_ms: start.elapsed().as_millis() as u64,
401 };
402
403 (selected, result)
404 }
405
406 pub fn prepare(&mut self, codes: &[GeneratedCode]) -> (Vec<PreparedSample>, Vec<StageResult>) {
408 let mut stages = Vec::new();
409
410 let (quality_passed_refs, stage1) = self.filter_quality(codes);
412 let quality_passed: Vec<GeneratedCode> = quality_passed_refs.into_iter().cloned().collect();
413 stages.push(stage1);
414
415 if quality_passed.is_empty() {
416 return (vec![], stages);
417 }
418
419 let quality_refs: Vec<&GeneratedCode> = quality_passed.iter().collect();
421 let (prioritized_refs, stage2) = self.prioritize_defects(&quality_refs);
422 let prioritized: Vec<GeneratedCode> = prioritized_refs.into_iter().cloned().collect();
423 stages.push(stage2);
424
425 let prioritized_refs: Vec<&GeneratedCode> = prioritized.iter().collect();
427 let (sampled_refs, stage3) = self.sample_active(&prioritized_refs);
428 let sampled: Vec<GeneratedCode> = sampled_refs.into_iter().cloned().collect();
429 stages.push(stage3);
430
431 let prepared: Vec<PreparedSample> = sampled
433 .into_iter()
434 .enumerate()
435 .map(|(i, code)| {
436 let quality_score = self.quality_gate.score(
437 &crate::ml::QualityFeatureExtractor::new().extract_from_generated(&code),
438 );
439
440 let defect_pred = self
441 .defect_predictor
442 .predict(&CommitFeatures::default(), &code.code);
443
444 let cluster = self.active_learner.get_cluster(&code.code);
445
446 PreparedSample {
447 code,
448 quality_score,
449 defect_probability: defect_pred.base_probability,
450 cluster,
451 priority: i,
452 }
453 })
454 .collect();
455
456 (prepared, stages)
457 }
458
459 pub fn update_feedback(&mut self, code: &str, revealed_bug: bool) {
461 self.active_learner.update_feedback(code, revealed_bug);
462
463 if revealed_bug {
464 self.stats.bugs_found += 1;
465 }
466 }
467
468 #[must_use]
470 pub fn compute_quality(&self, labels: &[RichLabel]) -> DataQualityMetrics {
471 if labels.is_empty() {
472 return DataQualityMetrics::default();
473 }
474
475 let bugs = labels.iter().filter(|l| !l.is_correct).count();
477 let bug_rate = bugs as f32 / labels.len() as f32;
478
479 let diversity = self.active_learner.silhouette_score().max(0.0);
481
482 let total_severity: f32 = labels
484 .iter()
485 .filter_map(|l| l.error_category)
486 .map(|c| c.severity())
487 .sum();
488 let difficulty = if bugs > 0 {
489 (total_severity / bugs as f32).min(1.0)
490 } else {
491 0.3
492 };
493
494 let avg_structural_sim: f32 = labels
496 .iter()
497 .map(|l| l.soft_labels.structural_similarity)
498 .sum::<f32>()
499 / labels.len() as f32;
500 let coverage = 1.0 - avg_structural_sim; let novelty = 0.5;
504
505 DataQualityMetrics {
506 novelty,
507 diversity,
508 difficulty,
509 coverage,
510 bug_rate,
511 }
512 }
513
514 pub fn run_dry(&mut self, codes: &[GeneratedCode]) -> PipelineResult {
516 let total_generated = codes.len();
517
518 let (prepared, stages) = self.prepare(codes);
519
520 let oracle_calls = prepared.len();
521 let oracle_reduction = if oracle_calls > 0 {
522 total_generated as f32 / oracle_calls as f32
523 } else {
524 f32::INFINITY
525 };
526
527 self.stats.runs += 1;
529 self.stats.samples_processed += total_generated;
530 self.stats.oracle_calls += oracle_calls;
531
532 if self.stats.runs > 1 {
533 self.stats.avg_oracle_reduction =
534 (self.stats.avg_oracle_reduction * (self.stats.runs - 1) as f32 + oracle_reduction)
535 / self.stats.runs as f32;
536 } else {
537 self.stats.avg_oracle_reduction = oracle_reduction;
538 }
539
540 PipelineResult {
541 stages,
542 labels: vec![], quality: DataQualityMetrics::default(),
544 total_generated,
545 oracle_calls,
546 oracle_reduction,
547 }
548 }
549
550 pub fn reset(&mut self) {
552 self.quality_gate.reset_stats();
553 self.active_learner = ActiveLearner::new(self.config.num_clusters);
554 }
555}
556
557#[cfg(test)]
558mod tests {
559 use super::*;
560 use crate::ml::ErrorCategory;
561 use crate::Language;
562
563 fn sample_codes() -> Vec<GeneratedCode> {
564 vec![
565 GeneratedCode {
566 code: "x = 1".to_string(),
567 language: Language::Python,
568 ast_depth: 1,
569 features: vec![],
570 },
571 GeneratedCode {
572 code: "def add(a, b):\n return a + b".to_string(),
573 language: Language::Python,
574 ast_depth: 3,
575 features: vec!["function".to_string()],
576 },
577 GeneratedCode {
578 code: "for i in range(10):\n if i % 2 == 0:\n print(i)".to_string(),
579 language: Language::Python,
580 ast_depth: 5,
581 features: vec!["loop".to_string(), "conditional".to_string()],
582 },
583 GeneratedCode {
584 code: "class Foo:\n def __init__(self):\n self.x = 0\n def get(self):\n return self.x".to_string(),
585 language: Language::Python,
586 ast_depth: 6,
587 features: vec!["class".to_string(), "method".to_string()],
588 },
589 ]
590 }
591
592 #[test]
595 fn test_pipeline_config_default() {
596 let config = PipelineConfig::default();
597 assert!((config.quality_threshold - 0.5).abs() < f32::EPSILON);
598 assert_eq!(config.num_clusters, 5);
599 }
600
601 #[test]
602 fn test_pipeline_config_strict() {
603 let config = PipelineConfig::strict();
604 assert!((config.quality_threshold - 0.7).abs() < f32::EPSILON);
605 }
606
607 #[test]
608 fn test_pipeline_config_fast() {
609 let config = PipelineConfig::fast();
610 assert!(!config.enable_active_learning);
611 assert!(!config.enable_defect_priority);
612 }
613
614 #[test]
615 fn test_pipeline_config_validate() {
616 let valid = PipelineConfig::default();
617 assert!(valid.validate().is_empty());
618
619 let invalid = PipelineConfig {
620 quality_threshold: 1.5,
621 num_clusters: 0,
622 ..Default::default()
623 };
624 assert!(!invalid.validate().is_empty());
625 }
626
627 #[test]
630 fn test_data_quality_overall() {
631 let metrics = DataQualityMetrics {
632 novelty: 0.8,
633 diversity: 0.7,
634 difficulty: 0.6,
635 coverage: 0.8,
636 bug_rate: 0.2,
637 };
638
639 let score = metrics.overall();
640 assert!(score > 0.0);
641 assert!(score <= 1.0);
642 }
643
644 #[test]
645 fn test_data_quality_meets_targets() {
646 let good = DataQualityMetrics {
647 diversity: 0.7,
648 bug_rate: 0.2,
649 coverage: 0.8,
650 ..Default::default()
651 };
652 assert!(good.meets_targets());
653
654 let bad = DataQualityMetrics::default();
655 assert!(!bad.meets_targets());
656 }
657
658 #[test]
661 fn test_stage_result_reduction() {
662 let result = StageResult {
663 stage: "test".to_string(),
664 input_count: 100,
665 output_count: 10,
666 time_ms: 50,
667 };
668
669 assert!((result.reduction_factor() - 10.0).abs() < 0.001);
670 assert!((result.pass_rate() - 0.1).abs() < 0.001);
671 }
672
673 #[test]
674 fn test_stage_result_edge_cases() {
675 let zero_output = StageResult {
676 stage: "test".to_string(),
677 input_count: 100,
678 output_count: 0,
679 time_ms: 0,
680 };
681 assert!(zero_output.reduction_factor().is_infinite());
682
683 let zero_input = StageResult {
684 stage: "test".to_string(),
685 input_count: 0,
686 output_count: 0,
687 time_ms: 0,
688 };
689 assert!((zero_input.pass_rate() - 0.0).abs() < 0.001);
690 }
691
692 #[test]
695 fn test_pipeline_result_stage_lookup() {
696 let result = PipelineResult {
697 stages: vec![
698 StageResult {
699 stage: "quality_gate".to_string(),
700 input_count: 100,
701 output_count: 50,
702 time_ms: 10,
703 },
704 StageResult {
705 stage: "defect_priority".to_string(),
706 input_count: 50,
707 output_count: 20,
708 time_ms: 5,
709 },
710 ],
711 ..Default::default()
712 };
713
714 assert!(result.stage("quality_gate").is_some());
715 assert!(result.stage("nonexistent").is_none());
716 }
717
718 #[test]
719 fn test_pipeline_result_total_time() {
720 let result = PipelineResult {
721 stages: vec![
722 StageResult {
723 stage: "a".to_string(),
724 input_count: 0,
725 output_count: 0,
726 time_ms: 100,
727 },
728 StageResult {
729 stage: "b".to_string(),
730 input_count: 0,
731 output_count: 0,
732 time_ms: 200,
733 },
734 ],
735 ..Default::default()
736 };
737
738 assert_eq!(result.total_time_ms(), 300);
739 }
740
741 #[test]
744 fn test_codex_pipeline_new() {
745 let pipeline = CodexPipeline::default();
746 assert_eq!(pipeline.stats().runs, 0);
747 }
748
749 #[test]
750 fn test_codex_pipeline_filter_quality() {
751 let mut pipeline = CodexPipeline::new(PipelineConfig {
752 quality_threshold: 0.3,
753 ..Default::default()
754 });
755
756 let codes = sample_codes();
757 let (passed, stage) = pipeline.filter_quality(&codes);
758
759 assert!(passed.len() <= codes.len());
760 assert_eq!(stage.stage, "quality_gate");
761 assert_eq!(stage.input_count, codes.len());
762 }
763
764 #[test]
765 fn test_codex_pipeline_prioritize_defects() {
766 let pipeline = CodexPipeline::default();
767 let codes = sample_codes();
768 let refs: Vec<&GeneratedCode> = codes.iter().collect();
769
770 let (prioritized, stage) = pipeline.prioritize_defects(&refs);
771
772 assert!(!prioritized.is_empty());
773 assert_eq!(stage.stage, "defect_priority");
774 }
775
776 #[test]
777 fn test_codex_pipeline_sample_active() {
778 let mut pipeline = CodexPipeline::new(PipelineConfig {
779 batch_size: 2,
780 ..Default::default()
781 });
782
783 let codes = sample_codes();
784 let refs: Vec<&GeneratedCode> = codes.iter().collect();
785
786 let (sampled, stage) = pipeline.sample_active(&refs);
787
788 assert!(sampled.len() <= 2);
789 assert_eq!(stage.stage, "active_learning");
790 }
791
792 #[test]
793 fn test_codex_pipeline_prepare() {
794 let mut pipeline = CodexPipeline::new(PipelineConfig {
795 quality_threshold: 0.2,
796 batch_size: 10,
797 ..Default::default()
798 });
799
800 let codes = sample_codes();
801 let (prepared, stages) = pipeline.prepare(&codes);
802
803 assert!(!prepared.is_empty());
804 assert_eq!(stages.len(), 3); }
806
807 #[test]
808 fn test_codex_pipeline_run_dry() {
809 let mut pipeline = CodexPipeline::new(PipelineConfig {
810 quality_threshold: 0.2,
811 ..Default::default()
812 });
813
814 let codes = sample_codes();
815 let result = pipeline.run_dry(&codes);
816
817 assert_eq!(result.total_generated, codes.len());
818 assert!(result.oracle_calls <= codes.len());
819 assert!(result.oracle_reduction >= 1.0);
820 }
821
822 #[test]
823 fn test_codex_pipeline_update_feedback() {
824 let mut pipeline = CodexPipeline::default();
825
826 let codes = sample_codes();
828 let refs: Vec<&GeneratedCode> = codes.iter().collect();
829 let _ = pipeline.sample_active(&refs);
830
831 pipeline.update_feedback("def add(a, b): return a + b", true);
832 assert_eq!(pipeline.stats().bugs_found, 1);
833
834 pipeline.update_feedback("x = 1", false);
835 assert_eq!(pipeline.stats().bugs_found, 1);
836 }
837
838 #[test]
839 fn test_codex_pipeline_compute_quality() {
840 let pipeline = CodexPipeline::default();
841
842 let labels = vec![
843 RichLabel::correct(crate::ml::SoftLabels::default()),
844 RichLabel::incorrect(
845 ErrorCategory::TypeMismatch,
846 "error".to_string(),
847 crate::ml::SoftLabels::default(),
848 ),
849 ];
850
851 let quality = pipeline.compute_quality(&labels);
852 assert!((quality.bug_rate - 0.5).abs() < 0.001);
853 }
854
855 #[test]
856 fn test_codex_pipeline_reset() {
857 let mut pipeline = CodexPipeline::default();
858
859 let codes = sample_codes();
860 let _ = pipeline.run_dry(&codes);
861
862 pipeline.reset();
863 assert_eq!(pipeline.stats().runs, 1);
865 }
866
867 #[test]
870 fn test_pipeline_config_debug() {
871 let config = PipelineConfig::default();
872 let debug = format!("{config:?}");
873 assert!(debug.contains("PipelineConfig"));
874 }
875
876 #[test]
877 fn test_data_quality_metrics_debug() {
878 let metrics = DataQualityMetrics::default();
879 let debug = format!("{metrics:?}");
880 assert!(debug.contains("DataQualityMetrics"));
881 }
882
883 #[test]
884 fn test_codex_pipeline_debug() {
885 let pipeline = CodexPipeline::default();
886 let debug = format!("{pipeline:?}");
887 assert!(debug.contains("CodexPipeline"));
888 }
889
890 #[test]
893 fn test_pipeline_config_serialize() {
894 let config = PipelineConfig::default();
895 let json = serde_json::to_string(&config).unwrap();
896 let restored: PipelineConfig = serde_json::from_str(&json).unwrap();
897 assert!((config.quality_threshold - restored.quality_threshold).abs() < f32::EPSILON);
898 }
899
900 #[test]
901 fn test_data_quality_metrics_serialize() {
902 let metrics = DataQualityMetrics {
903 novelty: 0.5,
904 diversity: 0.6,
905 difficulty: 0.7,
906 coverage: 0.8,
907 bug_rate: 0.15,
908 };
909 let json = serde_json::to_string(&metrics).unwrap();
910 let restored: DataQualityMetrics = serde_json::from_str(&json).unwrap();
911 assert!((metrics.diversity - restored.diversity).abs() < 0.001);
912 }
913
914 #[test]
917 fn test_full_pipeline_flow() {
918 let mut pipeline = CodexPipeline::new(PipelineConfig {
919 quality_threshold: 0.2, batch_size: 10,
921 ..Default::default()
922 });
923
924 let codes = sample_codes();
926
927 let result = pipeline.run_dry(&codes);
929
930 assert_eq!(result.stages.len(), 3);
932 assert!(result.stage("quality_gate").is_some());
933 assert!(result.stage("defect_priority").is_some());
934 assert!(result.stage("active_learning").is_some());
935
936 assert!(result.oracle_reduction >= 1.0);
938 }
939
940 #[test]
941 fn test_pipeline_oracle_reduction() {
942 let mut pipeline = CodexPipeline::new(PipelineConfig {
943 quality_threshold: 0.6, batch_size: 2, ..Default::default()
946 });
947
948 let mut codes = Vec::new();
950 for i in 0..100 {
951 codes.push(GeneratedCode {
952 code: format!("x_{i} = {i}"),
953 language: Language::Python,
954 ast_depth: 1,
955 features: vec![],
956 });
957 }
958
959 let result = pipeline.run_dry(&codes);
960
961 assert!(result.oracle_calls <= 20); }
964}
965
966#[cfg(test)]
968mod proptests {
969 use super::*;
970 use proptest::prelude::*;
971
972 proptest! {
973 #[test]
975 fn prop_quality_threshold_valid(threshold in -0.5f32..1.5) {
976 let config = PipelineConfig {
977 quality_threshold: threshold.clamp(0.0, 1.0),
978 ..Default::default()
979 };
980 prop_assert!(config.quality_threshold >= 0.0);
981 prop_assert!(config.quality_threshold <= 1.0);
982 }
983
984 #[test]
986 fn prop_oracle_reduction_bounded(total in 1usize..1000, calls in 0usize..1000) {
987 let reduction = if calls == 0 {
988 f32::INFINITY
989 } else {
990 total as f32 / calls as f32
991 };
992
993 if calls > 0 {
994 prop_assert!(reduction >= total as f32 / calls as f32);
995 }
996 }
997
998 #[test]
1000 fn prop_quality_overall_bounded(
1001 novelty in 0.0f32..1.0,
1002 diversity in 0.0f32..1.0,
1003 difficulty in 0.0f32..1.0,
1004 coverage in 0.0f32..1.0,
1005 bug_rate in 0.0f32..1.0,
1006 ) {
1007 let metrics = DataQualityMetrics {
1008 novelty,
1009 diversity,
1010 difficulty,
1011 coverage,
1012 bug_rate,
1013 };
1014
1015 let overall = metrics.overall();
1016 prop_assert!(overall >= 0.0);
1017 prop_assert!(overall <= 1.0);
1018 }
1019 }
1020}