Skip to main content

scirs2_core/validation/data/
mod.rs

1//! Comprehensive Data Validation System
2//!
3//! Production-grade data validation system for `SciRS2` Core providing schema
4//! validation, constraint enforcement, and data integrity checks for scientific
5//! computing applications in regulated environments.
6//!
7//! ## Features
8//!
9//! - JSON Schema validation with scientific extensions
10//! - Constraint-based validation (range, format, pattern)
11//! - Composite constraints with logical operators (AND, OR, NOT, IF-THEN)
12//! - Data integrity verification with checksums
13//! - Type safety validation for numeric data
14//! - Custom validation rules and plugins
15//! - Performance-optimized validation pipelines
16//! - Integration with ndarray for array validation
17//! - Support for complex nested data structures
18//! - Validation caching for repeated validations
19//! - Detailed error reporting with context
20//! - ConstraintBuilder for fluent constraint composition
21//!
22//! ## Example
23//!
24//! ```rust
25//! use scirs2_core::validation::data::{Validator, ValidationSchema, ValidationConfig, DataType, Constraint};
26//! use ::ndarray::Array2;
27//!
28//! // Create a validation schema
29//! let schema = ValidationSchema::new()
30//!     .require_field("name", DataType::String)
31//!     .require_field("age", DataType::Integer)
32//!     .add_constraint("age", Constraint::Range { min: 0.0, max: 150.0 })
33//!     .require_field("data", DataType::Array(Box::new(DataType::Float64)));
34//!
35//! let config = ValidationConfig::default();
36//! let validator = Validator::new(config)?;
37//!
38//! // For JSON validation (when serde feature is enabled)
39//!
40//! {
41//!     let data = serde_json::json!({
42//!         "name": "Test Dataset",
43//!         "age": 25,
44//!         "data": [[1.0, 2.0], [3.0, 4.0]]
45//!     });
46//!
47//!     let result = validator.validate(&data, &schema)?;
48//!     if result.is_valid() {
49//!         println!("Data is valid!");
50//!     } else {
51//!         println!("Validation errors: {:#?}", result.errors());
52//!     }
53//! }
54//!
55//! # Ok::<(), Box<dyn std::error::Error>>(())
56//! ```
57//!
58//! ## Using Composite Constraints
59//!
60//! The validation system now supports composite constraints using logical operators:
61//!
62//! ```rust
63//! use scirs2_core::validation::data::{Constraint, ConstraintBuilder, ValidationSchema, DataType};
64//!
65//! // Create complex constraints using the builder
66//! let age_constraint = ConstraintBuilder::new()
67//!     .range(18.0, 65.0)
68//!     .not_null()
69//!     .and();
70//!
71//! // Use logical operators for conditional validation
72//! let email_or_phone = Constraint::Or(vec![
73//!     Constraint::Pattern(r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$".to_string()),
74//!     Constraint::Pattern(r"^\+?[1-9]\d{1,14}$".to_string()),
75//! ]);
76//!
77//! // Conditional constraints: if age > 18, require consent field
78//! let consent_constraint = Constraint::if_then(
79//!     Constraint::Range { min: 18.0, max: f64::INFINITY },
80//!     Constraint::NotNull,
81//!     None
82//! );
83//!
84//! let schema = ValidationSchema::new()
85//!     .require_field("age", DataType::Integer)
86//!     .add_constraint("age", age_constraint)
87//!     .require_field("contact", DataType::String)
88//!     .add_constraint("contact", email_or_phone);
89//! ```
90//!
91//! ## Performance Features
92//!
93//! The validation system includes several performance optimizations:
94//!
95//! - **Validation Caching**: Results are cached for repeated validations with configurable TTL
96//! - **Parallel Validation**: Array elements can be validated in parallel when enabled
97//! - **Early Exit**: Validation stops at first error when configured for fail-fast mode
98//! - **Lazy Evaluation**: Composite constraints evaluate only as needed
99//! - **Memory Efficiency**: Streaming validation for large datasets
100//!
101//! ```rust
102//! use scirs2_core::validation::data::ValidationConfig;
103//!
104//! let mut config = ValidationConfig::default();
105//! config.strict_mode = true; // Fail fast on first error
106//! config.enable_caching = true; // Enable result caching
107//! config.cache_size_limit = 1000; // Cache up to 1000 results
108//! config.enable_parallel_validation = true; // Parallel array validation
109//! config.performance_mode = true; // Optimize for speed
110//! ```
111
112// Core modules
113pub mod array_validation;
114pub mod config;
115pub mod constraints;
116pub mod errors;
117pub mod quality;
118pub mod schema;
119pub mod validator;
120
121// Re-export main types and functions for backward compatibility
122
123// Configuration and types
124pub use config::{ErrorSeverity, QualityIssueType, ValidationConfig, ValidationErrorType};
125
126// Schema and constraints
127pub use schema::{DataType, FieldDefinition, ValidationSchema};
128
129pub use constraints::{
130    ArrayValidationConstraints, Constraint, ConstraintBuilder, ElementValidatorFn,
131    ShapeConstraints, SparseFormat, StatisticalConstraints, TimeConstraints,
132};
133
134// Errors and results
135pub use errors::{ValidationError, ValidationResult, ValidationStats};
136
137// Quality assessment
138pub use quality::{
139    DataQualityReport, QualityAnalyzer, QualityIssue, QualityMetrics, StatisticalSummary,
140};
141
142// Array validation
143pub use array_validation::ArrayValidator;
144
145// Main validator
146pub use validator::{ValidationRule, Validator};
147
148// Type aliases for convenience
149pub type Array1<T> = crate::ndarray::Array1<T>;
150pub type Array2<T> = crate::ndarray::Array2<T>;
151
152#[cfg(test)]
153mod tests {
154    use super::*;
155    use ::ndarray::Array2;
156
157    #[test]
158    fn test_module_integration() {
159        // Test that all major functionality is accessible through the module
160        let data = Array2::from_shape_vec(
161            (6, 2),
162            vec![
163                1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0,
164            ],
165        )
166        .expect("Operation failed");
167
168        // Test array validation
169        let constraints = ArrayValidationConstraints::new()
170            .withshape(vec![6, 2])
171            .with_fieldname("test_data")
172            .check_numeric_quality();
173
174        let config = ValidationConfig::default();
175        let validator = Validator::new(config.clone()).expect("Operation failed");
176
177        let result = validator
178            .validate_ndarray(&data, &constraints, &config)
179            .expect("Operation failed");
180        assert!(result.is_valid());
181
182        // Test quality report generation
183        let report = validator
184            .generate_quality_report(&data, "test_data")
185            .expect("Operation failed");
186        assert!(report.quality_score > 0.9);
187    }
188
189    #[test]
190    fn test_backward_compatibility() {
191        // Test that the old API still works after refactoring
192        use crate::validation::data::*;
193
194        let config = ValidationConfig::default();
195        let validator = Validator::new(config.clone()).expect("Operation failed");
196
197        let data = Array2::from_shape_vec((4, 2), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0])
198            .expect("Operation failed");
199
200        // These should all work exactly as they did before refactoring
201        let constraints = ArrayValidationConstraints::new()
202            .withshape(vec![4, 2])
203            .check_numeric_quality();
204
205        let result = validator
206            .validate_ndarray(&data, &constraints, &config)
207            .expect("Operation failed");
208        assert!(result.is_valid());
209    }
210
211    #[test]
212    fn test_json_validation_integration() {
213        // Test JSON validation functionality
214        let schema = ValidationSchema::new()
215            .name("test_schema")
216            .require_field("name", DataType::String)
217            .require_field("age", DataType::Integer)
218            .add_constraint(
219                "age",
220                Constraint::Range {
221                    min: 0.0,
222                    max: 150.0,
223                },
224            );
225
226        let config = ValidationConfig::default();
227        let validator = Validator::new(config).expect("Operation failed");
228
229        let data = serde_json::json!({
230            "name": "Test User",
231            "age": 25
232        });
233
234        let result = validator
235            .validate(&data, &schema)
236            .expect("Operation failed");
237        assert!(result.is_valid());
238    }
239
240    #[test]
241    fn test_constraint_types() {
242        // Test various constraint types
243        let range_constraint = Constraint::Range {
244            min: 0.0,
245            max: 100.0,
246        };
247        let length_constraint = Constraint::Length { min: 1, max: 50 };
248        let not_null_constraint = Constraint::NotNull;
249        let unique_constraint = Constraint::Unique;
250
251        // Test that constraints can be created and used
252        match range_constraint {
253            Constraint::Range { min, max } => {
254                assert_eq!(min, 0.0);
255                assert_eq!(max, 100.0);
256            }
257            _ => panic!("Expected Range constraint"),
258        }
259
260        match length_constraint {
261            Constraint::Length { min, max } => {
262                assert_eq!(min, 1);
263                assert_eq!(max, 50);
264            }
265            _ => panic!("Expected Length constraint"),
266        }
267    }
268
269    #[test]
270    fn test_datatypes() {
271        // Test data type definitions
272        let string_type = DataType::String;
273        let integer_type = DataType::Integer;
274        let array_type = DataType::Array(Box::new(DataType::Float64));
275        let matrix_type = DataType::Matrix(Box::new(DataType::Float32));
276
277        assert_eq!(string_type, DataType::String);
278        assert_eq!(integer_type, DataType::Integer);
279
280        match array_type {
281            DataType::Array(inner) => assert_eq!(*inner, DataType::Float64),
282            _ => panic!("Expected Array type"),
283        }
284
285        match matrix_type {
286            DataType::Matrix(inner) => assert_eq!(*inner, DataType::Float32),
287            _ => panic!("Expected Matrix type"),
288        }
289    }
290
291    #[test]
292    fn test_statistical_constraints() {
293        // Test statistical constraints
294        let constraints = StatisticalConstraints::new()
295            .with_mean_range(0.0, 10.0)
296            .with_std_range(1.0, 5.0)
297            .with_distribution("normal");
298
299        assert_eq!(constraints.min_mean, Some(0.0));
300        assert_eq!(constraints.max_mean, Some(10.0));
301        assert_eq!(constraints.min_std, Some(1.0));
302        assert_eq!(constraints.max_std, Some(5.0));
303        assert_eq!(
304            constraints.expected_distribution,
305            Some("normal".to_string())
306        );
307    }
308
309    #[test]
310    fn test_validationerror_creation() {
311        // Test validation error creation and formatting
312        let error = ValidationError::new(
313            ValidationErrorType::TypeMismatch,
314            "test_field",
315            "Type mismatch error",
316        )
317        .with_expected("string")
318        .with_actual("integer")
319        .with_constraint("type_check")
320        .with_severity(ErrorSeverity::Error);
321
322        assert_eq!(error.errortype, ValidationErrorType::TypeMismatch);
323        assert_eq!(error.fieldpath, "test_field");
324        assert_eq!(error.message, "Type mismatch error");
325
326        let formatted = error.formatted_message();
327        assert!(formatted.contains("test_field"));
328        assert!(formatted.contains("Type mismatch error"));
329    }
330
331    #[test]
332    fn test_schema_builder() {
333        // Test schema builder pattern
334        let schema = ValidationSchema::new()
335            .name("test_schema")
336            .version("1.0.0")
337            .require_field("name", DataType::String)
338            .optional_field("description", DataType::String)
339            .add_constraint("name", Constraint::Length { min: 1, max: 100 })
340            .allow_additional()
341            .with_metadata("author", "test");
342
343        assert_eq!(schema.name, "test_schema");
344        assert_eq!(schema.version, "1.0.0");
345        assert_eq!(schema.fields.len(), 2);
346        assert!(schema.allow_additional_fields);
347        assert_eq!(schema.metadata.get("author"), Some(&"test".to_string()));
348    }
349}