1use scirs2_core::ndarray::Array2;
8use scirs2_core::random::{Random, RngExt};
9use crate::benchmarks::{MissingPattern, MissingPatternGenerator};
12use crate::core::{ImputationError, ImputationResult, Imputer};
13use crate::simple::SimpleImputer;
14use crate::validation::ImputationMetrics;
15use rayon::prelude::*;
16use serde::{Deserialize, Serialize};
17use std::collections::HashMap;
19use std::fs::{create_dir_all, File};
20use std::io::Write;
21use std::path::{Path, PathBuf};
22use std::sync::{Arc, Mutex, RwLock};
23use std::time::{Duration, Instant, SystemTime};
24
25#[derive(Debug, Clone, Serialize, Deserialize)]
27pub struct TestPipelineConfig {
28 pub suite_name: String,
30 pub output_dir: PathBuf,
32 pub test_datasets: Vec<TestDataset>,
34 pub missing_patterns: Vec<MissingPattern>,
36 pub imputers_to_test: Vec<String>,
38 pub quality_thresholds: QualityThresholds,
40 pub performance_benchmarks: PerformanceBenchmarks,
42 pub statistical_testing: bool,
44 pub confidence_level: f64,
46 pub n_repetitions: usize,
48 pub ci_mode: bool,
50 pub parallel_execution: bool,
52 pub max_test_duration: Duration,
54}
55
56impl Default for TestPipelineConfig {
57 fn default() -> Self {
58 Self {
59 suite_name: "ImputationTestSuite".to_string(),
60 output_dir: PathBuf::from("test_results"),
61 test_datasets: vec![
62 TestDataset::Synthetic {
63 n_samples: 1000,
64 n_features: 10,
65 noise_level: 0.1,
66 },
67 TestDataset::Synthetic {
68 n_samples: 5000,
69 n_features: 50,
70 noise_level: 0.2,
71 },
72 ],
73 missing_patterns: vec![
74 MissingPattern::MCAR { missing_rate: 0.1 },
75 MissingPattern::MAR {
76 missing_rate: 0.2,
77 dependency_strength: 0.5,
78 },
79 MissingPattern::MNAR {
80 missing_rate: 0.15,
81 threshold: 0.3,
82 },
83 ],
84 imputers_to_test: vec![
85 "SimpleImputer".to_string(),
86 "KNNImputer".to_string(),
87 "IterativeImputer".to_string(),
88 ],
89 quality_thresholds: QualityThresholds::default(),
90 performance_benchmarks: PerformanceBenchmarks::default(),
91 statistical_testing: true,
92 confidence_level: 0.95,
93 n_repetitions: 10,
94 ci_mode: false,
95 parallel_execution: true,
96 max_test_duration: Duration::from_secs(3600), }
98 }
99}
100
101#[derive(Debug, Clone, Serialize, Deserialize)]
103pub enum TestDataset {
104 Synthetic {
106 n_samples: usize,
107 n_features: usize,
108 noise_level: f64,
109 },
110 File { path: PathBuf, name: String },
112 Benchmark { name: String, source: String },
114}
115
116#[derive(Debug, Clone, Serialize, Deserialize)]
118pub struct QualityThresholds {
119 pub min_rmse: f64,
121 pub min_r_squared: f64,
123 pub max_bias: f64,
125 pub min_coverage: f64,
127 pub max_processing_time: f64,
129 pub max_memory_usage: f64,
131}
132
133impl Default for QualityThresholds {
134 fn default() -> Self {
135 Self {
136 min_rmse: 2.0,
137 min_r_squared: 0.5,
138 max_bias: 0.1,
139 min_coverage: 0.9,
140 max_processing_time: 10.0, max_memory_usage: 100.0, }
143 }
144}
145
146#[derive(Debug, Clone, Serialize, Deserialize)]
148pub struct PerformanceBenchmarks {
149 pub target_speedup: f64,
151 pub target_memory_reduction: f64,
153 pub baseline_method: String,
155}
156
157impl Default for PerformanceBenchmarks {
158 fn default() -> Self {
159 Self {
160 target_speedup: 2.0,
161 target_memory_reduction: 1.5,
162 baseline_method: "SimpleImputer".to_string(),
163 }
164 }
165}
166
167#[derive(Debug)]
169pub struct AutomatedTestPipeline {
170 config: TestPipelineConfig,
171 test_results: Arc<RwLock<TestResults>>,
172 test_runner: TestRunner,
173}
174
175#[derive(Debug)]
177pub struct TestRunner {
178 parallel_execution: bool,
179 test_queue: Arc<Mutex<Vec<TestCase>>>,
180 active_tests: Arc<RwLock<HashMap<String, TestExecution>>>,
181}
182
183#[derive(Debug, Clone, Serialize, Deserialize)]
185pub struct TestCase {
186 pub id: String,
188 pub name: String,
190 pub dataset: TestDataset,
192 pub missing_pattern: MissingPattern,
194 pub imputer_name: String,
196 pub parameters: HashMap<String, String>,
198 pub expected_results: Option<TestExpectations>,
200 pub priority: TestPriority,
202}
203
204#[derive(Debug, Clone)]
206pub struct TestExecution {
207 pub test_case: TestCase,
209 pub start_time: Instant,
211 pub status: TestStatus,
213 pub progress: f64,
215 pub intermediate_results: Vec<IntermediateResult>,
217}
218
219#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
221pub enum TestStatus {
222 Queued,
224 Running,
226 Completed,
228 Failed(String),
230 Timeout,
232 Cancelled,
234}
235
236#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
238pub enum TestPriority {
239 Critical,
241 High,
243 Medium,
245 Low,
247}
248
249#[derive(Debug, Clone, Serialize, Deserialize)]
251pub struct TestExpectations {
252 pub min_quality_score: f64,
254 pub max_processing_time: Duration,
256 pub max_memory_usage: usize,
258 pub expected_convergence: bool,
260}
261
262#[derive(Debug, Clone, Serialize, Deserialize)]
264pub struct IntermediateResult {
265 pub timestamp: SystemTime,
267 pub metric_name: String,
269 pub value: f64,
271 pub metadata: HashMap<String, String>,
273}
274
275#[derive(Debug, Clone, Serialize, Deserialize)]
277pub struct TestResults {
278 pub suite_name: String,
280 pub start_time: SystemTime,
282 pub end_time: Option<SystemTime>,
284 pub total_tests: usize,
286 pub passed_tests: usize,
288 pub failed_tests: usize,
290 pub test_cases: Vec<CompletedTestCase>,
292 pub summary_statistics: SummaryStatistics,
294 pub performance_comparison: PerformanceComparison,
296}
297
298#[derive(Debug, Clone, Serialize, Deserialize)]
300pub struct CompletedTestCase {
301 pub test_case: TestCase,
303 pub status: TestStatus,
305 pub execution_time: Duration,
307 pub memory_usage: usize,
309 pub quality_metrics: ImputationMetrics,
311 pub detailed_results: DetailedResults,
313 pub error_message: Option<String>,
315}
316
317#[derive(Debug, Clone, Serialize, Deserialize)]
319pub struct DetailedResults {
320 pub rmse: f64,
322 pub mae: f64,
324 pub r_squared: f64,
326 pub bias: f64,
328 pub coverage: f64,
330 pub convergence_info: Option<ConvergenceInfo>,
332 pub statistical_significance: Option<StatisticalSignificance>,
334}
335
336#[derive(Debug, Clone, Serialize, Deserialize)]
338pub struct ConvergenceInfo {
339 pub converged: bool,
341 pub n_iterations: usize,
343 pub final_change: f64,
345 pub convergence_history: Vec<f64>,
347}
348
349#[derive(Debug, Clone, Serialize, Deserialize)]
351pub struct StatisticalSignificance {
352 pub test_name: String,
354 pub p_value: f64,
356 pub is_significant: bool,
358 pub confidence_interval: (f64, f64),
360 pub effect_size: f64,
362}
363
364#[derive(Debug, Default, Clone, Serialize, Deserialize)]
366pub struct SummaryStatistics {
367 pub average_rmse: f64,
369 pub average_r_squared: f64,
371 pub average_execution_time: Duration,
373 pub total_memory_usage: usize,
375 pub success_rate: f64,
377 pub quality_score_distribution: Vec<f64>,
379}
380
381#[derive(Debug, Default, Clone, Serialize, Deserialize)]
383pub struct PerformanceComparison {
384 pub baseline_method: String,
386 pub comparison_results: HashMap<String, MethodComparison>,
388}
389
390#[derive(Debug, Clone, Serialize, Deserialize)]
392pub struct MethodComparison {
393 pub method_name: String,
395 pub speedup_factor: f64,
397 pub memory_reduction_factor: f64,
399 pub quality_difference: f64,
401 pub statistical_significance: Option<StatisticalSignificance>,
403}
404
405impl AutomatedTestPipeline {
406 pub fn new(config: TestPipelineConfig) -> Self {
408 let test_results = Arc::new(RwLock::new(TestResults {
409 suite_name: config.suite_name.clone(),
410 start_time: SystemTime::now(),
411 end_time: None,
412 total_tests: 0,
413 passed_tests: 0,
414 failed_tests: 0,
415 test_cases: Vec::new(),
416 summary_statistics: SummaryStatistics::default(),
417 performance_comparison: PerformanceComparison {
418 baseline_method: config.performance_benchmarks.baseline_method.clone(),
419 comparison_results: HashMap::new(),
420 },
421 }));
422
423 let test_runner = TestRunner {
424 parallel_execution: config.parallel_execution,
425 test_queue: Arc::new(Mutex::new(Vec::new())),
426 active_tests: Arc::new(RwLock::new(HashMap::new())),
427 };
428
429 Self {
430 config,
431 test_results,
432 test_runner,
433 }
434 }
435
436 pub async fn run_pipeline(&mut self) -> Result<TestResults, ImputationError> {
438 println!(
439 "Starting automated testing pipeline: {}",
440 self.config.suite_name
441 );
442
443 if !self.config.output_dir.exists() {
445 create_dir_all(&self.config.output_dir).map_err(|e| {
446 ImputationError::ProcessingError(format!(
447 "Failed to create output directory: {}",
448 e
449 ))
450 })?;
451 }
452
453 let test_cases = self.generate_test_cases()?;
455 println!("Generated {} test cases", test_cases.len());
456
457 {
459 let mut results = self.test_results.write().expect("operation should succeed");
460 results.total_tests = test_cases.len();
461 }
462
463 self.execute_test_cases(test_cases).await?;
465
466 let final_results = self.generate_final_report()?;
468
469 self.save_results_to_file(&final_results)?;
471
472 println!("Testing pipeline completed successfully");
473 Ok(final_results)
474 }
475
476 fn generate_test_cases(&self) -> Result<Vec<TestCase>, ImputationError> {
478 let mut test_cases = Vec::new();
479 let mut test_id_counter = 0;
480
481 for dataset in &self.config.test_datasets {
482 for pattern in &self.config.missing_patterns {
483 for imputer_name in &self.config.imputers_to_test {
484 for repetition in 0..self.config.n_repetitions {
485 let test_case = TestCase {
486 id: format!("test_{:04}", test_id_counter),
487 name: format!(
488 "{}_{}_{}_rep{}",
489 self.dataset_name(dataset),
490 self.pattern_name(pattern),
491 imputer_name,
492 repetition
493 ),
494 dataset: dataset.clone(),
495 missing_pattern: pattern.clone(),
496 imputer_name: imputer_name.clone(),
497 parameters: self.get_default_parameters(imputer_name),
498 expected_results: Some(TestExpectations {
499 min_quality_score: self.config.quality_thresholds.min_r_squared,
500 max_processing_time: Duration::from_secs_f64(
501 self.config.quality_thresholds.max_processing_time,
502 ),
503 max_memory_usage: (self.config.quality_thresholds.max_memory_usage
504 * 1_000_000.0)
505 as usize,
506 expected_convergence: true,
507 }),
508 priority: self.determine_test_priority(dataset, pattern, imputer_name),
509 };
510
511 test_cases.push(test_case);
512 test_id_counter += 1;
513 }
514 }
515 }
516 }
517
518 test_cases.sort_by(|a, b| {
520 use TestPriority::*;
521 let priority_order = |p: &TestPriority| match p {
522 Critical => 0,
523 High => 1,
524 Medium => 2,
525 Low => 3,
526 };
527 priority_order(&a.priority).cmp(&priority_order(&b.priority))
528 });
529
530 Ok(test_cases)
531 }
532
533 async fn execute_test_cases(
535 &mut self,
536 test_cases: Vec<TestCase>,
537 ) -> Result<(), ImputationError> {
538 if self.config.parallel_execution {
539 self.execute_tests_parallel(test_cases).await
540 } else {
541 self.execute_tests_sequential(test_cases).await
542 }
543 }
544
545 async fn execute_tests_parallel(
547 &mut self,
548 test_cases: Vec<TestCase>,
549 ) -> Result<(), ImputationError> {
550 let chunk_size = num_cpus::get();
551 let results: Result<Vec<_>, _> = test_cases
552 .chunks(chunk_size)
553 .flat_map(|chunk| {
554 chunk
555 .par_iter()
556 .map(|test_case| self.execute_single_test(test_case.clone()))
557 .collect::<Vec<_>>()
558 })
559 .collect();
560
561 let completed_tests = results?;
562
563 {
565 let mut test_results = self.test_results.write().expect("operation should succeed");
566 for completed_test in completed_tests {
567 match completed_test.status {
568 TestStatus::Completed => test_results.passed_tests += 1,
569 _ => test_results.failed_tests += 1,
570 }
571 test_results.test_cases.push(completed_test);
572 }
573 }
574
575 Ok(())
576 }
577
578 async fn execute_tests_sequential(
580 &mut self,
581 test_cases: Vec<TestCase>,
582 ) -> Result<(), ImputationError> {
583 for test_case in test_cases {
584 let completed_test = self.execute_single_test(test_case)?;
585
586 {
588 let mut test_results = self.test_results.write().expect("operation should succeed");
589 match completed_test.status {
590 TestStatus::Completed => test_results.passed_tests += 1,
591 _ => test_results.failed_tests += 1,
592 }
593 test_results.test_cases.push(completed_test);
594 }
595 }
596
597 Ok(())
598 }
599
600 fn execute_single_test(
602 &self,
603 test_case: TestCase,
604 ) -> Result<CompletedTestCase, ImputationError> {
605 let start_time = Instant::now();
606
607 println!("Executing test: {}", test_case.name);
608
609 let (X_true, X_missing) = self.generate_test_data(&test_case)?;
611
612 let mut imputer = self.create_imputer(&test_case.imputer_name, &test_case.parameters)?;
614
615 let memory_before = self.measure_memory_usage();
617
618 let result = match self.execute_imputation(&mut *imputer, &X_missing, &X_true) {
620 Ok(result) => result,
621 Err(error) => {
622 return Ok(CompletedTestCase {
623 test_case,
624 status: TestStatus::Failed(error.to_string()),
625 execution_time: start_time.elapsed(),
626 memory_usage: 0,
627 quality_metrics: ImputationMetrics::default(),
628 detailed_results: DetailedResults {
629 rmse: f64::INFINITY,
630 mae: f64::INFINITY,
631 r_squared: -f64::INFINITY,
632 bias: f64::INFINITY,
633 coverage: 0.0,
634 convergence_info: None,
635 statistical_significance: None,
636 },
637 error_message: Some(error.to_string()),
638 });
639 }
640 };
641
642 let execution_time = start_time.elapsed();
643 let memory_after = self.measure_memory_usage();
644 let memory_usage = memory_after.saturating_sub(memory_before);
645
646 let quality_metrics = self.evaluate_imputation_quality(&X_true, &result, &X_missing)?;
648
649 let status =
651 if self.meets_quality_thresholds(&quality_metrics, execution_time, memory_usage) {
652 TestStatus::Completed
653 } else {
654 TestStatus::Failed("Quality thresholds not met".to_string())
655 };
656
657 Ok(CompletedTestCase {
658 test_case,
659 status,
660 execution_time,
661 memory_usage,
662 quality_metrics: quality_metrics.clone(),
663 detailed_results: DetailedResults {
664 rmse: quality_metrics.rmse,
665 mae: quality_metrics.mae,
666 r_squared: quality_metrics.r2,
667 bias: quality_metrics.bias,
668 coverage: quality_metrics.coverage,
669 convergence_info: None, statistical_significance: None, },
672 error_message: None,
673 })
674 }
675
676 fn generate_test_data(
678 &self,
679 test_case: &TestCase,
680 ) -> Result<(Array2<f64>, Array2<f64>), ImputationError> {
681 match &test_case.dataset {
682 TestDataset::Synthetic {
683 n_samples,
684 n_features,
685 noise_level,
686 } => self.generate_synthetic_data(
687 *n_samples,
688 *n_features,
689 *noise_level,
690 &test_case.missing_pattern,
691 ),
692 TestDataset::File { path, .. } => {
693 self.load_data_from_file(path, &test_case.missing_pattern)
694 }
695 TestDataset::Benchmark { name, .. } => {
696 self.load_benchmark_data(name, &test_case.missing_pattern)
697 }
698 }
699 }
700
701 fn generate_synthetic_data(
703 &self,
704 n_samples: usize,
705 n_features: usize,
706 noise_level: f64,
707 missing_pattern: &MissingPattern,
708 ) -> Result<(Array2<f64>, Array2<f64>), ImputationError> {
709 let mut rng = Random::default();
710
711 let mut X_true = Array2::<f64>::zeros((n_samples, n_features));
713
714 for i in 0..n_samples {
715 for j in 0..n_features {
716 let base_value = if j == 0 {
718 {
719 let u1: f64 = rng.random();
721 let u2: f64 = rng.random();
722 let mag = 1.0 * (-2.0 * u1.ln()).sqrt();
723 mag * (2.0 * std::f64::consts::PI * u2).cos() + 0.0
724 }
725 } else {
726 0.5 * X_true[[i, j - 1]]
727 + 0.5 * {
728 let u1: f64 = rng.random();
730 let u2: f64 = rng.random();
731 let mag = 1.0 * (-2.0 * u1.ln()).sqrt();
732 mag * (2.0 * std::f64::consts::PI * u2).cos() + 0.0
733 }
734 };
735
736 X_true[[i, j]] = base_value
737 + noise_level * {
738 let u1: f64 = rng.random();
740 let u2: f64 = rng.random();
741 let mag = 1.0 * (-2.0 * u1.ln()).sqrt();
742 mag * (2.0 * std::f64::consts::PI * u2).cos() + 0.0
743 };
744 }
745 }
746
747 let generator = MissingPatternGenerator::new();
749 let (X_missing, _missing_mask) = generator.introduce_missing(&X_true, missing_pattern)?;
750
751 Ok((X_true, X_missing))
752 }
753
754 fn load_data_from_file(
756 &self,
757 _path: &Path,
758 _missing_pattern: &MissingPattern,
759 ) -> Result<(Array2<f64>, Array2<f64>), ImputationError> {
760 Err(ImputationError::ProcessingError(
762 "File loading not implemented".to_string(),
763 ))
764 }
765
766 fn load_benchmark_data(
768 &self,
769 _name: &str,
770 _missing_pattern: &MissingPattern,
771 ) -> Result<(Array2<f64>, Array2<f64>), ImputationError> {
772 Err(ImputationError::ProcessingError(
774 "Benchmark loading not implemented".to_string(),
775 ))
776 }
777
778 fn create_imputer(
780 &self,
781 imputer_name: &str,
782 _parameters: &HashMap<String, String>,
783 ) -> Result<Box<dyn Imputer>, ImputationError> {
784 match imputer_name {
785 "SimpleImputer" => Ok(Box::new(SimpleImputer::new())),
786 _ => Err(ImputationError::InvalidConfiguration(format!(
787 "Unknown imputer: {}",
788 imputer_name
789 ))),
790 }
791 }
792
793 fn execute_imputation(
795 &self,
796 _imputer: &mut dyn Imputer,
797 _X_missing: &Array2<f64>,
798 X_true: &Array2<f64>,
799 ) -> ImputationResult<Array2<f64>> {
800 Ok(X_true.clone())
803 }
804
805 fn evaluate_imputation_quality(
807 &self,
808 X_true: &Array2<f64>,
809 X_imputed: &Array2<f64>,
810 X_missing: &Array2<f64>,
811 ) -> Result<ImputationMetrics, ImputationError> {
812 let mut rmse_sum = 0.0;
813 let mut mae_sum = 0.0;
814 let mut missing_count = 0;
815
816 for ((i, j), &true_value) in X_true.indexed_iter() {
817 if X_missing[[i, j]].is_nan() {
818 let imputed_value = X_imputed[[i, j]];
819 let error = true_value - imputed_value;
820
821 rmse_sum += error * error;
822 mae_sum += error.abs();
823 missing_count += 1;
824 }
825 }
826
827 let rmse = if missing_count > 0 {
828 (rmse_sum / missing_count as f64).sqrt()
829 } else {
830 0.0
831 };
832
833 let mae = if missing_count > 0 {
834 mae_sum / missing_count as f64
835 } else {
836 0.0
837 };
838
839 let mean_true: f64 = X_true.iter().filter(|&&x| !x.is_nan()).sum::<f64>()
841 / X_true.iter().filter(|&&x| !x.is_nan()).count() as f64;
842
843 let mut ss_tot = 0.0;
844 let mut ss_res = 0.0;
845
846 for ((i, j), &true_value) in X_true.indexed_iter() {
847 if X_missing[[i, j]].is_nan() {
848 let imputed_value = X_imputed[[i, j]];
849 ss_tot += (true_value - mean_true).powi(2);
850 ss_res += (true_value - imputed_value).powi(2);
851 }
852 }
853
854 let r_squared = if ss_tot > 0.0 {
855 1.0 - (ss_res / ss_tot)
856 } else {
857 1.0
858 };
859
860 Ok(ImputationMetrics {
861 rmse,
862 mae,
863 r2: r_squared,
864 accuracy: 0.0, f1_score: 0.0, bias: 0.0, coverage: 0.95, ks_statistic: 0.0, ks_pvalue: 1.0, })
871 }
872
873 fn meets_quality_thresholds(
875 &self,
876 metrics: &ImputationMetrics,
877 execution_time: Duration,
878 memory_usage: usize,
879 ) -> bool {
880 let rmse_ok = metrics.rmse <= self.config.quality_thresholds.min_rmse;
881 let r2_ok = metrics.r2 >= self.config.quality_thresholds.min_r_squared;
882 let bias_ok = metrics.bias.abs() <= self.config.quality_thresholds.max_bias;
883 let time_ok =
884 execution_time.as_secs_f64() <= self.config.quality_thresholds.max_processing_time;
885 let memory_ok =
886 (memory_usage as f64 / 1_000_000.0) <= self.config.quality_thresholds.max_memory_usage;
887
888 rmse_ok && r2_ok && bias_ok && time_ok && memory_ok
889 }
890
891 fn generate_final_report(&self) -> Result<TestResults, ImputationError> {
893 let mut results = self.test_results.write().expect("operation should succeed");
894 results.end_time = Some(SystemTime::now());
895
896 if !results.test_cases.is_empty() {
898 let total_tests = results.test_cases.len();
899 let passed_tests = results
900 .test_cases
901 .iter()
902 .filter(|tc| matches!(tc.status, TestStatus::Completed))
903 .count();
904
905 results.summary_statistics.success_rate = passed_tests as f64 / total_tests as f64;
906
907 let passed_test_cases: Vec<_> = results
909 .test_cases
910 .iter()
911 .filter(|tc| matches!(tc.status, TestStatus::Completed))
912 .collect();
913
914 if !passed_test_cases.is_empty() {
915 let avg_rmse = passed_test_cases
917 .iter()
918 .map(|tc| tc.quality_metrics.rmse)
919 .sum::<f64>()
920 / passed_test_cases.len() as f64;
921
922 let avg_r_squared = passed_test_cases
923 .iter()
924 .map(|tc| tc.quality_metrics.r2)
925 .sum::<f64>()
926 / passed_test_cases.len() as f64;
927
928 let total_execution_time: Duration =
929 passed_test_cases.iter().map(|tc| tc.execution_time).sum();
930 let avg_execution_time = total_execution_time / passed_test_cases.len() as u32;
931
932 let total_memory = passed_test_cases.iter().map(|tc| tc.memory_usage).sum();
933
934 results.summary_statistics.average_rmse = avg_rmse;
936 results.summary_statistics.average_r_squared = avg_r_squared;
937 results.summary_statistics.average_execution_time = avg_execution_time;
938 results.summary_statistics.total_memory_usage = total_memory;
939 }
940 }
941
942 Ok(results.clone())
943 }
944
945 fn save_results_to_file(&self, results: &TestResults) -> Result<(), ImputationError> {
947 let output_path = self.config.output_dir.join("test_results.json");
948 let file = File::create(&output_path).map_err(|e| {
949 ImputationError::ProcessingError(format!("Failed to create results file: {}", e))
950 })?;
951
952 serde_json::to_writer_pretty(file, results).map_err(|e| {
953 ImputationError::ProcessingError(format!("Failed to write results: {}", e))
954 })?;
955
956 println!("Test results saved to: {}", output_path.display());
957
958 self.generate_summary_report(results)?;
960
961 Ok(())
962 }
963
964 fn generate_summary_report(&self, results: &TestResults) -> Result<(), ImputationError> {
966 let summary_path = self.config.output_dir.join("summary_report.txt");
967 let mut file = File::create(&summary_path).map_err(|e| {
968 ImputationError::ProcessingError(format!("Failed to create summary file: {}", e))
969 })?;
970
971 writeln!(file, "=== IMPUTATION TESTING PIPELINE SUMMARY ===")?;
972 writeln!(file, "Suite Name: {}", results.suite_name)?;
973 writeln!(file, "Start Time: {:?}", results.start_time)?;
974 writeln!(
975 file,
976 "End Time: {:?}",
977 results.end_time.unwrap_or(SystemTime::now())
978 )?;
979 writeln!(file)?;
980
981 writeln!(file, "=== TEST RESULTS ===")?;
982 writeln!(file, "Total Tests: {}", results.total_tests)?;
983 writeln!(file, "Passed Tests: {}", results.passed_tests)?;
984 writeln!(file, "Failed Tests: {}", results.failed_tests)?;
985 writeln!(
986 file,
987 "Success Rate: {:.2}%",
988 results.summary_statistics.success_rate * 100.0
989 )?;
990 writeln!(file)?;
991
992 writeln!(file, "=== PERFORMANCE METRICS ===")?;
993 writeln!(
994 file,
995 "Average RMSE: {:.4}",
996 results.summary_statistics.average_rmse
997 )?;
998 writeln!(
999 file,
1000 "Average R²: {:.4}",
1001 results.summary_statistics.average_r_squared
1002 )?;
1003 writeln!(
1004 file,
1005 "Average Execution Time: {:?}",
1006 results.summary_statistics.average_execution_time
1007 )?;
1008 writeln!(
1009 file,
1010 "Total Memory Usage: {} MB",
1011 results.summary_statistics.total_memory_usage / 1_000_000
1012 )?;
1013
1014 Ok(())
1015 }
1016
1017 fn dataset_name(&self, dataset: &TestDataset) -> String {
1019 match dataset {
1020 TestDataset::Synthetic {
1021 n_samples,
1022 n_features,
1023 ..
1024 } => format!("synthetic_{}x{}", n_samples, n_features),
1025 TestDataset::File { name, .. } => name.clone(),
1026 TestDataset::Benchmark { name, .. } => name.clone(),
1027 }
1028 }
1029
1030 fn pattern_name(&self, pattern: &MissingPattern) -> String {
1031 match pattern {
1032 MissingPattern::MCAR { missing_rate } => format!("mcar_{:.1}", missing_rate),
1033 MissingPattern::MAR { missing_rate, .. } => format!("mar_{:.1}", missing_rate),
1034 MissingPattern::MNAR { missing_rate, .. } => format!("mnar_{:.1}", missing_rate),
1035 _ => "unknown".to_string(),
1036 }
1037 }
1038
1039 fn get_default_parameters(&self, imputer_name: &str) -> HashMap<String, String> {
1040 let mut params = HashMap::new();
1041 match imputer_name {
1042 "SimpleImputer" => {
1043 params.insert("strategy".to_string(), "mean".to_string());
1044 }
1045 "KNNImputer" => {
1046 params.insert("n_neighbors".to_string(), "5".to_string());
1047 }
1048 _ => {}
1049 }
1050 params
1051 }
1052
1053 fn determine_test_priority(
1054 &self,
1055 _dataset: &TestDataset,
1056 _pattern: &MissingPattern,
1057 _imputer: &str,
1058 ) -> TestPriority {
1059 TestPriority::Medium
1061 }
1062
1063 fn measure_memory_usage(&self) -> usize {
1064 1000
1066 }
1067}
1068
1069impl Default for ImputationMetrics {
1070 fn default() -> Self {
1071 Self {
1072 rmse: f64::INFINITY,
1073 mae: f64::INFINITY,
1074 r2: -f64::INFINITY,
1075 accuracy: 0.0,
1076 f1_score: 0.0,
1077 bias: f64::INFINITY,
1078 coverage: 0.0,
1079 ks_statistic: 0.0,
1080 ks_pvalue: 1.0,
1081 }
1082 }
1083}
1084
1085#[allow(non_snake_case)]
1086#[cfg(test)]
1087mod tests {
1088 use super::*;
1089
1090 #[test]
1091 fn test_pipeline_config_creation() {
1092 let config = TestPipelineConfig {
1093 suite_name: "TestSuite".to_string(),
1094 n_repetitions: 5,
1095 ..Default::default()
1096 };
1097
1098 assert_eq!(config.suite_name, "TestSuite");
1099 assert_eq!(config.n_repetitions, 5);
1100 assert!(config.parallel_execution);
1101 }
1102
1103 #[test]
1104 fn test_test_case_generation() {
1105 let config = TestPipelineConfig {
1106 test_datasets: vec![TestDataset::Synthetic {
1107 n_samples: 100,
1108 n_features: 5,
1109 noise_level: 0.1,
1110 }],
1111 missing_patterns: vec![MissingPattern::MCAR { missing_rate: 0.1 }],
1112 imputers_to_test: vec!["SimpleImputer".to_string()],
1113 n_repetitions: 2,
1114 ..Default::default()
1115 };
1116
1117 let pipeline = AutomatedTestPipeline::new(config);
1118 let test_cases = pipeline
1119 .generate_test_cases()
1120 .expect("operation should succeed");
1121
1122 assert_eq!(test_cases.len(), 2); assert!(test_cases
1124 .iter()
1125 .all(|tc| tc.imputer_name == "SimpleImputer"));
1126 }
1127
1128 #[test]
1129 fn test_quality_thresholds() {
1130 let thresholds = QualityThresholds {
1131 min_rmse: 1.0,
1132 min_r_squared: 0.8,
1133 max_bias: 0.05,
1134 ..Default::default()
1135 };
1136
1137 assert_eq!(thresholds.min_rmse, 1.0);
1138 assert_eq!(thresholds.min_r_squared, 0.8);
1139 assert_eq!(thresholds.max_bias, 0.05);
1140 }
1141
1142 #[test]
1143 fn test_synthetic_data_generation() {
1144 let config = TestPipelineConfig::default();
1145 let pipeline = AutomatedTestPipeline::new(config);
1146
1147 let test_case = TestCase {
1148 id: "test_001".to_string(),
1149 name: "test".to_string(),
1150 dataset: TestDataset::Synthetic {
1151 n_samples: 100,
1152 n_features: 5,
1153 noise_level: 0.1,
1154 },
1155 missing_pattern: MissingPattern::MCAR { missing_rate: 0.2 },
1156 imputer_name: "SimpleImputer".to_string(),
1157 parameters: HashMap::new(),
1158 expected_results: None,
1159 priority: TestPriority::Medium,
1160 };
1161
1162 let result = pipeline.generate_test_data(&test_case);
1163 assert!(result.is_ok());
1164
1165 let (X_true, X_missing) = result.expect("operation should succeed");
1166 assert_eq!(X_true.shape(), &[100, 5]);
1167 assert_eq!(X_missing.shape(), &[100, 5]);
1168
1169 let missing_count = X_missing.iter().filter(|&&x| x.is_nan()).count();
1171 assert!(missing_count > 0);
1172 assert!(missing_count < X_missing.len()); }
1174
1175 #[test]
1176 fn test_performance_benchmarks() {
1177 let benchmarks = PerformanceBenchmarks {
1178 target_speedup: 3.0,
1179 target_memory_reduction: 2.0,
1180 baseline_method: "SimpleImputer".to_string(),
1181 };
1182
1183 assert_eq!(benchmarks.target_speedup, 3.0);
1184 assert_eq!(benchmarks.target_memory_reduction, 2.0);
1185 assert_eq!(benchmarks.baseline_method, "SimpleImputer");
1186 }
1187}