sklears_compose/
property_testing.rs

1//! Property-based testing utilities for pipeline composition
2//!
3//! This module provides comprehensive property-based testing frameworks
4//! for validating pipeline properties, invariants, and correctness.
5
6use scirs2_core::ndarray::{Array1, Array2, ArrayView2};
7use sklears_core::traits::Transform;
8
9/// Property-based test generator for pipeline data
10pub struct PropertyTestGenerator {
11    /// Random seed for reproducible tests
12    seed: u64,
13    /// Minimum sample size for generated data
14    min_samples: usize,
15    /// Maximum sample size for generated data
16    max_samples: usize,
17    /// Minimum feature count
18    min_features: usize,
19    /// Maximum feature count
20    max_features: usize,
21    /// Value range for generated features
22    value_range: (f64, f64),
23}
24
25impl PropertyTestGenerator {
26    /// Create a new property test generator
27    #[must_use]
28    pub fn new() -> Self {
29        Self {
30            seed: 42,
31            min_samples: 10,
32            max_samples: 1000,
33            min_features: 1,
34            max_features: 20,
35            value_range: (-10.0, 10.0),
36        }
37    }
38
39    /// Set the random seed
40    #[must_use]
41    pub fn seed(mut self, seed: u64) -> Self {
42        self.seed = seed;
43        self
44    }
45
46    /// Set sample size range
47    #[must_use]
48    pub fn sample_range(mut self, min: usize, max: usize) -> Self {
49        self.min_samples = min;
50        self.max_samples = max;
51        self
52    }
53
54    /// Set feature count range
55    #[must_use]
56    pub fn feature_range(mut self, min: usize, max: usize) -> Self {
57        self.min_features = min;
58        self.max_features = max;
59        self
60    }
61
62    /// Set value range for generated features
63    #[must_use]
64    pub fn value_range(mut self, range: (f64, f64)) -> Self {
65        self.value_range = range;
66        self
67    }
68
69    /// Generate random matrix for testing
70    #[must_use]
71    pub fn generate_matrix(&self, n_samples: usize, n_features: usize) -> Array2<f64> {
72        use scirs2_core::random::rngs::StdRng;
73        use scirs2_core::random::{Rng, SeedableRng};
74
75        let mut rng = StdRng::seed_from_u64(self.seed);
76        let (min_val, max_val) = self.value_range;
77
78        Array2::from_shape_fn((n_samples, n_features), |_| {
79            rng.gen_range(min_val..max_val + 1.0)
80        })
81    }
82
83    /// Generate random target vector for testing
84    #[must_use]
85    pub fn generate_targets(&self, n_samples: usize) -> Array1<f64> {
86        use scirs2_core::random::rngs::StdRng;
87        use scirs2_core::random::{Rng, SeedableRng};
88
89        let mut rng = StdRng::seed_from_u64(self.seed + 1);
90        let (min_val, max_val) = self.value_range;
91
92        Array1::from_shape_fn(n_samples, |_| rng.gen_range(min_val..max_val + 1.0))
93    }
94
95    /// Generate classification targets
96    #[must_use]
97    pub fn generate_classification_targets(
98        &self,
99        n_samples: usize,
100        n_classes: usize,
101    ) -> Array1<usize> {
102        use scirs2_core::random::rngs::StdRng;
103        use scirs2_core::random::{Rng, SeedableRng};
104
105        let mut rng = StdRng::seed_from_u64(self.seed + 2);
106
107        Array1::from_shape_fn(n_samples, |_| rng.gen_range(0..n_classes))
108    }
109}
110
111impl Default for PropertyTestGenerator {
112    fn default() -> Self {
113        Self::new()
114    }
115}
116
117/// Property-based test suite for pipeline invariants
118pub struct PipelinePropertyTester {
119    generator: PropertyTestGenerator,
120}
121
122impl PipelinePropertyTester {
123    /// Create a new pipeline property tester
124    #[must_use]
125    pub fn new() -> Self {
126        Self {
127            generator: PropertyTestGenerator::new(),
128        }
129    }
130
131    /// Set the test generator
132    #[must_use]
133    pub fn generator(mut self, generator: PropertyTestGenerator) -> Self {
134        self.generator = generator;
135        self
136    }
137
138    /// Test that pipeline preserves sample count
139    pub fn test_sample_preservation<P>(&self, pipeline: &P, n_tests: usize) -> PropertyTestResult
140    where
141        P: for<'a> Transform<ArrayView2<'a, f64>, Array2<f64>>,
142    {
143        let mut results = Vec::new();
144
145        for i in 0..n_tests {
146            let n_samples = self.generator.min_samples
147                + (i % (self.generator.max_samples - self.generator.min_samples));
148            let n_features = self.generator.min_features
149                + (i % (self.generator.max_features - self.generator.min_features));
150
151            let data = self.generator.generate_matrix(n_samples, n_features);
152
153            match pipeline.transform(&data.view()) {
154                Ok(transformed) => {
155                    let property_holds = transformed.nrows() == n_samples;
156                    results.push(PropertyTestCase {
157                        test_name: "sample_preservation".to_string(),
158                        input_shape: (n_samples, n_features),
159                        output_shape: (transformed.nrows(), transformed.ncols()),
160                        property_holds,
161                        error: None,
162                    });
163                }
164                Err(e) => {
165                    results.push(PropertyTestCase {
166                        test_name: "sample_preservation".to_string(),
167                        input_shape: (n_samples, n_features),
168                        output_shape: (0, 0),
169                        property_holds: false,
170                        error: Some(format!("{e:?}")),
171                    });
172                }
173            }
174        }
175
176        PropertyTestResult::new("sample_preservation", results)
177    }
178
179    /// Test that pipeline transformations are consistent
180    pub fn test_transformation_consistency<P>(
181        &self,
182        pipeline: &P,
183        n_tests: usize,
184    ) -> PropertyTestResult
185    where
186        P: for<'a> Transform<ArrayView2<'a, f64>, Array2<f64>>,
187    {
188        let mut results = Vec::new();
189
190        for i in 0..n_tests {
191            let n_samples = 50;
192            let n_features = 5;
193
194            let data = self.generator.generate_matrix(n_samples, n_features);
195
196            match (
197                pipeline.transform(&data.view()),
198                pipeline.transform(&data.view()),
199            ) {
200                (Ok(result1), Ok(result2)) => {
201                    let property_holds = result1.abs_diff_eq(&result2, 1e-10);
202                    results.push(PropertyTestCase {
203                        test_name: "transformation_consistency".to_string(),
204                        input_shape: (n_samples, n_features),
205                        output_shape: (result1.nrows(), result1.ncols()),
206                        property_holds,
207                        error: None,
208                    });
209                }
210                (Err(e), _) | (_, Err(e)) => {
211                    results.push(PropertyTestCase {
212                        test_name: "transformation_consistency".to_string(),
213                        input_shape: (n_samples, n_features),
214                        output_shape: (0, 0),
215                        property_holds: false,
216                        error: Some(format!("{e:?}")),
217                    });
218                }
219            }
220        }
221
222        PropertyTestResult::new("transformation_consistency", results)
223    }
224
225    /// Test pipeline composition properties
226    pub fn test_composition_associativity<P1, P2, P3>(
227        &self,
228        p1: &P1,
229        p2: &P2,
230        p3: &P3,
231    ) -> PropertyTestResult
232    where
233        P1: for<'a> Transform<ArrayView2<'a, f64>, Array2<f64>>,
234        P2: for<'a> Transform<ArrayView2<'a, f64>, Array2<f64>>,
235        P3: for<'a> Transform<ArrayView2<'a, f64>, Array2<f64>>,
236    {
237        let mut results = Vec::new();
238
239        let n_samples = 50;
240        let n_features = 5;
241        let data = self.generator.generate_matrix(n_samples, n_features);
242
243        // Test (p1 ∘ p2) ∘ p3 = p1 ∘ (p2 ∘ p3)
244        let result = match (
245            p1.transform(&data.view())
246                .and_then(|r| p2.transform(&r.view()))
247                .and_then(|r| p3.transform(&r.view())),
248            p2.transform(&data.view())
249                .and_then(|r| p3.transform(&r.view()))
250                .and_then(|r| p1.transform(&r.view())),
251        ) {
252            (Ok(left), Ok(right)) => PropertyTestCase {
253                test_name: "composition_associativity".to_string(),
254                input_shape: (n_samples, n_features),
255                output_shape: (left.nrows(), left.ncols()),
256                property_holds: left.shape() == right.shape(),
257                error: None,
258            },
259            (Err(e), _) | (_, Err(e)) => PropertyTestCase {
260                test_name: "composition_associativity".to_string(),
261                input_shape: (n_samples, n_features),
262                output_shape: (0, 0),
263                property_holds: false,
264                error: Some(format!("{e:?}")),
265            },
266        };
267
268        results.push(result);
269        PropertyTestResult::new("composition_associativity", results)
270    }
271
272    /// Test that feature union preserves all input features
273    pub fn test_feature_union_completeness<T1, T2>(&self, t1: &T1, t2: &T2) -> PropertyTestResult
274    where
275        T1: for<'a> Transform<ArrayView2<'a, f64>, Array2<f64>>,
276        T2: for<'a> Transform<ArrayView2<'a, f64>, Array2<f64>>,
277    {
278        let mut results = Vec::new();
279
280        let n_samples = 50;
281        let n_features = 5;
282        let data = self.generator.generate_matrix(n_samples, n_features);
283
284        match (t1.transform(&data.view()), t2.transform(&data.view())) {
285            (Ok(result1), Ok(result2)) => {
286                let total_features = result1.ncols() + result2.ncols();
287                let property_holds = total_features >= n_features;
288
289                results.push(PropertyTestCase {
290                    test_name: "feature_union_completeness".to_string(),
291                    input_shape: (n_samples, n_features),
292                    output_shape: (n_samples, total_features),
293                    property_holds,
294                    error: None,
295                });
296            }
297            (Err(e), _) | (_, Err(e)) => {
298                results.push(PropertyTestCase {
299                    test_name: "feature_union_completeness".to_string(),
300                    input_shape: (n_samples, n_features),
301                    output_shape: (0, 0),
302                    property_holds: false,
303                    error: Some(format!("{e:?}")),
304                });
305            }
306        }
307
308        PropertyTestResult::new("feature_union_completeness", results)
309    }
310}
311
312impl Default for PipelinePropertyTester {
313    fn default() -> Self {
314        Self::new()
315    }
316}
317
318/// Result of a single property test case
319#[derive(Debug, Clone)]
320pub struct PropertyTestCase {
321    /// Name of the test
322    pub test_name: String,
323    /// Input data shape
324    pub input_shape: (usize, usize),
325    /// Output data shape
326    pub output_shape: (usize, usize),
327    /// Whether the property holds
328    pub property_holds: bool,
329    /// Error message if any
330    pub error: Option<String>,
331}
332
333/// Result of a property test suite
334#[derive(Debug, Clone)]
335pub struct PropertyTestResult {
336    /// Name of the property being tested
337    pub property_name: String,
338    /// Individual test cases
339    pub cases: Vec<PropertyTestCase>,
340    /// Success rate (0.0 to 1.0)
341    pub success_rate: f64,
342    /// Total number of tests
343    pub total_tests: usize,
344    /// Number of passing tests
345    pub passing_tests: usize,
346}
347
348impl PropertyTestResult {
349    /// Create a new property test result
350    #[must_use]
351    pub fn new(property_name: &str, cases: Vec<PropertyTestCase>) -> Self {
352        let total_tests = cases.len();
353        let passing_tests = cases.iter().filter(|c| c.property_holds).count();
354        let success_rate = if total_tests > 0 {
355            passing_tests as f64 / total_tests as f64
356        } else {
357            0.0
358        };
359
360        Self {
361            property_name: property_name.to_string(),
362            cases,
363            success_rate,
364            total_tests,
365            passing_tests,
366        }
367    }
368
369    /// Check if all tests passed
370    #[must_use]
371    pub fn all_passed(&self) -> bool {
372        self.success_rate == 1.0
373    }
374
375    /// Get failing test cases
376    #[must_use]
377    pub fn failing_cases(&self) -> Vec<&PropertyTestCase> {
378        self.cases.iter().filter(|c| !c.property_holds).collect()
379    }
380
381    /// Generate a summary report
382    #[must_use]
383    pub fn summary(&self) -> String {
384        format!(
385            "Property '{}': {}/{} tests passed ({:.1}%)",
386            self.property_name,
387            self.passing_tests,
388            self.total_tests,
389            self.success_rate * 100.0
390        )
391    }
392}
393
394/// Statistical validation utilities
395pub struct StatisticalValidator {
396    confidence_level: f64,
397    min_sample_size: usize,
398}
399
400impl StatisticalValidator {
401    /// Create a new statistical validator
402    #[must_use]
403    pub fn new() -> Self {
404        Self {
405            confidence_level: 0.95,
406            min_sample_size: 30,
407        }
408    }
409
410    /// Set confidence level for statistical tests
411    #[must_use]
412    pub fn confidence_level(mut self, level: f64) -> Self {
413        self.confidence_level = level.clamp(0.0, 1.0);
414        self
415    }
416
417    /// Set minimum sample size for tests
418    #[must_use]
419    pub fn min_sample_size(mut self, size: usize) -> Self {
420        self.min_sample_size = size;
421        self
422    }
423
424    /// Validate that pipeline predictions have reasonable statistical properties
425    #[must_use]
426    pub fn validate_prediction_distribution(&self, predictions: &Array1<f64>) -> ValidationResult {
427        let mut issues = Vec::new();
428
429        if predictions.len() < self.min_sample_size {
430            issues.push(format!(
431                "Sample size {} is below minimum {}",
432                predictions.len(),
433                self.min_sample_size
434            ));
435        }
436
437        // Check for NaN or infinite values
438        let nan_count = predictions.iter().filter(|&&x| x.is_nan()).count();
439        let inf_count = predictions.iter().filter(|&&x| x.is_infinite()).count();
440
441        if nan_count > 0 {
442            issues.push(format!("Found {nan_count} NaN values in predictions"));
443        }
444
445        if inf_count > 0 {
446            issues.push(format!("Found {inf_count} infinite values in predictions"));
447        }
448
449        // Basic statistical checks
450        let mean = predictions.mean().unwrap_or(0.0);
451        let variance = predictions.var(0.0);
452
453        if variance.is_nan() || variance.is_infinite() {
454            issues.push("Prediction variance is invalid".to_string());
455        }
456
457        ValidationResult {
458            is_valid: issues.is_empty(),
459            issues,
460            statistics: Some(ValidationStatistics {
461                mean,
462                variance,
463                sample_size: predictions.len(),
464            }),
465        }
466    }
467
468    /// Validate pipeline transformation properties
469    #[must_use]
470    pub fn validate_transformation(
471        &self,
472        input: &Array2<f64>,
473        output: &Array2<f64>,
474    ) -> ValidationResult {
475        let mut issues = Vec::new();
476
477        // Check shape consistency
478        if input.nrows() != output.nrows() {
479            issues.push(format!(
480                "Row count mismatch: input {} vs output {}",
481                input.nrows(),
482                output.nrows()
483            ));
484        }
485
486        // Check for valid values in output
487        let nan_count = output.iter().filter(|&&x| x.is_nan()).count();
488        let inf_count = output.iter().filter(|&&x| x.is_infinite()).count();
489
490        if nan_count > 0 {
491            issues.push(format!(
492                "Found {nan_count} NaN values in transformation output"
493            ));
494        }
495
496        if inf_count > 0 {
497            issues.push(format!(
498                "Found {inf_count} infinite values in transformation output"
499            ));
500        }
501
502        ValidationResult {
503            is_valid: issues.is_empty(),
504            issues,
505            statistics: None,
506        }
507    }
508}
509
510impl Default for StatisticalValidator {
511    fn default() -> Self {
512        Self::new()
513    }
514}
515
516/// Result of statistical validation
517#[derive(Debug, Clone)]
518pub struct ValidationResult {
519    /// Whether the validation passed
520    pub is_valid: bool,
521    /// List of validation issues
522    pub issues: Vec<String>,
523    /// Optional statistical summary
524    pub statistics: Option<ValidationStatistics>,
525}
526
527/// Statistical summary for validation
528#[derive(Debug, Clone)]
529pub struct ValidationStatistics {
530    /// Mean value
531    pub mean: f64,
532    /// Variance
533    pub variance: f64,
534    /// Sample size
535    pub sample_size: usize,
536}
537
538/// Comprehensive test suite runner
539pub struct TestSuiteRunner {
540    property_tester: PipelinePropertyTester,
541    statistical_validator: StatisticalValidator,
542}
543
544impl TestSuiteRunner {
545    /// Create a new test suite runner
546    #[must_use]
547    pub fn new() -> Self {
548        Self {
549            property_tester: PipelinePropertyTester::new(),
550            statistical_validator: StatisticalValidator::new(),
551        }
552    }
553
554    /// Run comprehensive tests on a pipeline
555    pub fn run_comprehensive_tests<P>(&self, pipeline: &P) -> TestSuiteResult
556    where
557        P: for<'a> Transform<ArrayView2<'a, f64>, Array2<f64>>,
558    {
559        let mut results = Vec::new();
560
561        // Property-based tests
562        results.push(self.property_tester.test_sample_preservation(pipeline, 100));
563        results.push(
564            self.property_tester
565                .test_transformation_consistency(pipeline, 50),
566        );
567
568        // Statistical validation
569        let test_data = self.property_tester.generator.generate_matrix(100, 5);
570        if let Ok(transformed) = pipeline.transform(&test_data.view()) {
571            let validation = self
572                .statistical_validator
573                .validate_transformation(&test_data, &transformed);
574            if !validation.is_valid {
575                // Convert validation issues to property test format
576                let failing_case = PropertyTestCase {
577                    test_name: "statistical_validation".to_string(),
578                    input_shape: test_data.dim(),
579                    output_shape: transformed.dim(),
580                    property_holds: false,
581                    error: Some(validation.issues.join("; ")),
582                };
583                results.push(PropertyTestResult::new(
584                    "statistical_validation",
585                    vec![failing_case],
586                ));
587            }
588        }
589
590        TestSuiteResult::new(results)
591    }
592}
593
594impl Default for TestSuiteRunner {
595    fn default() -> Self {
596        Self::new()
597    }
598}
599
600/// Result of running a complete test suite
601#[derive(Debug, Clone)]
602pub struct TestSuiteResult {
603    /// Individual property test results
604    pub property_results: Vec<PropertyTestResult>,
605    /// Overall success rate
606    pub overall_success_rate: f64,
607    /// Total number of tests across all properties
608    pub total_tests: usize,
609    /// Total number of passing tests
610    pub total_passing: usize,
611}
612
613impl TestSuiteResult {
614    /// Create a new test suite result
615    #[must_use]
616    pub fn new(property_results: Vec<PropertyTestResult>) -> Self {
617        let total_tests: usize = property_results.iter().map(|r| r.total_tests).sum();
618        let total_passing: usize = property_results.iter().map(|r| r.passing_tests).sum();
619        let overall_success_rate = if total_tests > 0 {
620            total_passing as f64 / total_tests as f64
621        } else {
622            0.0
623        };
624
625        Self {
626            property_results,
627            overall_success_rate,
628            total_tests,
629            total_passing,
630        }
631    }
632
633    /// Check if all tests passed
634    #[must_use]
635    pub fn all_passed(&self) -> bool {
636        self.overall_success_rate == 1.0
637    }
638
639    /// Generate a detailed report
640    #[must_use]
641    pub fn detailed_report(&self) -> String {
642        let mut report = String::new();
643        report.push_str(&format!(
644            "Test Suite Summary: {}/{} tests passed ({:.1}%)\n\n",
645            self.total_passing,
646            self.total_tests,
647            self.overall_success_rate * 100.0
648        ));
649
650        for result in &self.property_results {
651            report.push_str(&format!("  {}\n", result.summary()));
652
653            if !result.all_passed() {
654                for failing_case in result.failing_cases() {
655                    report.push_str(&format!(
656                        "    FAIL: {} - {:?}\n",
657                        failing_case.test_name, failing_case.error
658                    ));
659                }
660            }
661        }
662
663        report
664    }
665}
666
667#[allow(non_snake_case)]
668#[cfg(test)]
669mod tests {
670    use super::*;
671    use crate::mock::MockTransformer;
672
673    #[test]
674    fn test_property_test_generator() {
675        let generator = PropertyTestGenerator::new();
676        let matrix = generator.generate_matrix(10, 5);
677        assert_eq!(matrix.shape(), &[10, 5]);
678
679        let targets = generator.generate_targets(10);
680        assert_eq!(targets.len(), 10);
681    }
682
683    #[test]
684    fn test_pipeline_property_tester() {
685        let tester = PipelinePropertyTester::new();
686        let transformer = MockTransformer::new();
687
688        let result = tester.test_sample_preservation(&transformer, 10);
689        assert_eq!(result.property_name, "sample_preservation");
690        assert_eq!(result.total_tests, 10);
691    }
692
693    #[test]
694    fn test_statistical_validator() {
695        let validator = StatisticalValidator::new();
696        let predictions = Array1::from_vec(vec![1.0, 2.0, 3.0, 4.0, 5.0]);
697
698        let result = validator.validate_prediction_distribution(&predictions);
699        // Should fail due to small sample size
700        assert!(!result.is_valid);
701    }
702
703    #[test]
704    fn test_test_suite_runner() {
705        let runner = TestSuiteRunner::new();
706        let transformer = MockTransformer::new();
707
708        let result = runner.run_comprehensive_tests(&transformer);
709        assert!(result.total_tests > 0);
710    }
711
712    #[test]
713    fn test_property_test_result() {
714        let cases = vec![
715            PropertyTestCase {
716                test_name: "test1".to_string(),
717                input_shape: (10, 5),
718                output_shape: (10, 5),
719                property_holds: true,
720                error: None,
721            },
722            PropertyTestCase {
723                test_name: "test2".to_string(),
724                input_shape: (10, 5),
725                output_shape: (10, 5),
726                property_holds: false,
727                error: Some("Test error".to_string()),
728            },
729        ];
730
731        let result = PropertyTestResult::new("test_property", cases);
732        assert_eq!(result.success_rate, 0.5);
733        assert_eq!(result.failing_cases().len(), 1);
734    }
735}