scirs2_core/validation/data/mod.rs
1//! Comprehensive Data Validation System
2//!
3//! Production-grade data validation system for `SciRS2` Core providing schema
4//! validation, constraint enforcement, and data integrity checks for scientific
5//! computing applications in regulated environments.
6//!
7//! ## Features
8//!
9//! - JSON Schema validation with scientific extensions
10//! - Constraint-based validation (range, format, pattern)
11//! - Composite constraints with logical operators (AND, OR, NOT, IF-THEN)
12//! - Data integrity verification with checksums
13//! - Type safety validation for numeric data
14//! - Custom validation rules and plugins
15//! - Performance-optimized validation pipelines
16//! - Integration with ndarray for array validation
17//! - Support for complex nested data structures
18//! - Validation caching for repeated validations
19//! - Detailed error reporting with context
20//! - ConstraintBuilder for fluent constraint composition
21//!
22//! ## Example
23//!
24//! ```rust
25//! use scirs2_core::validation::data::{Validator, ValidationSchema, ValidationConfig, DataType, Constraint};
26//! use ::ndarray::Array2;
27//!
28//! // Create a validation schema
29//! let schema = ValidationSchema::new()
30//! .require_field("name", DataType::String)
31//! .require_field("age", DataType::Integer)
32//! .add_constraint("age", Constraint::Range { min: 0.0, max: 150.0 })
33//! .require_field("data", DataType::Array(Box::new(DataType::Float64)));
34//!
35//! let config = ValidationConfig::default();
36//! let validator = Validator::new(config)?;
37//!
38//! // For JSON validation (when serde feature is enabled)
39//!
40//! {
41//! let data = serde_json::json!({
42//! "name": "Test Dataset",
43//! "age": 25,
44//! "data": [[1.0, 2.0], [3.0, 4.0]]
45//! });
46//!
47//! let result = validator.validate(&data, &schema)?;
48//! if result.is_valid() {
49//! println!("Data is valid!");
50//! } else {
51//! println!("Validation errors: {:#?}", result.errors());
52//! }
53//! }
54//!
55//! # Ok::<(), Box<dyn std::error::Error>>(())
56//! ```
57//!
58//! ## Using Composite Constraints
59//!
60//! The validation system now supports composite constraints using logical operators:
61//!
62//! ```rust
63//! use scirs2_core::validation::data::{Constraint, ConstraintBuilder, ValidationSchema, DataType};
64//!
65//! // Create complex constraints using the builder
66//! let age_constraint = ConstraintBuilder::new()
67//! .range(18.0, 65.0)
68//! .not_null()
69//! .and();
70//!
71//! // Use logical operators for conditional validation
72//! let email_or_phone = Constraint::Or(vec![
73//! Constraint::Pattern(r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$".to_string()),
74//! Constraint::Pattern(r"^\+?[1-9]\d{1,14}$".to_string()),
75//! ]);
76//!
77//! // Conditional constraints: if age > 18, require consent field
78//! let consent_constraint = Constraint::if_then(
79//! Constraint::Range { min: 18.0, max: f64::INFINITY },
80//! Constraint::NotNull,
81//! None
82//! );
83//!
84//! let schema = ValidationSchema::new()
85//! .require_field("age", DataType::Integer)
86//! .add_constraint("age", age_constraint)
87//! .require_field("contact", DataType::String)
88//! .add_constraint("contact", email_or_phone);
89//! ```
90//!
91//! ## Performance Features
92//!
93//! The validation system includes several performance optimizations:
94//!
95//! - **Validation Caching**: Results are cached for repeated validations with configurable TTL
96//! - **Parallel Validation**: Array elements can be validated in parallel when enabled
97//! - **Early Exit**: Validation stops at first error when configured for fail-fast mode
98//! - **Lazy Evaluation**: Composite constraints evaluate only as needed
99//! - **Memory Efficiency**: Streaming validation for large datasets
100//!
101//! ```rust
102//! use scirs2_core::validation::data::ValidationConfig;
103//!
104//! let mut config = ValidationConfig::default();
105//! config.strict_mode = true; // Fail fast on first error
106//! config.enable_caching = true; // Enable result caching
107//! config.cache_size_limit = 1000; // Cache up to 1000 results
108//! config.enable_parallel_validation = true; // Parallel array validation
109//! config.performance_mode = true; // Optimize for speed
110//! ```
111
112// Core modules
113pub mod array_validation;
114pub mod config;
115pub mod constraints;
116pub mod errors;
117pub mod quality;
118pub mod schema;
119pub mod validator;
120
121// Re-export main types and functions for backward compatibility
122
123// Configuration and types
124pub use config::{ErrorSeverity, QualityIssueType, ValidationConfig, ValidationErrorType};
125
126// Schema and constraints
127pub use schema::{DataType, FieldDefinition, ValidationSchema};
128
129pub use constraints::{
130 ArrayValidationConstraints, Constraint, ConstraintBuilder, ElementValidatorFn,
131 ShapeConstraints, SparseFormat, StatisticalConstraints, TimeConstraints,
132};
133
134// Errors and results
135pub use errors::{ValidationError, ValidationResult, ValidationStats};
136
137// Quality assessment
138pub use quality::{
139 DataQualityReport, QualityAnalyzer, QualityIssue, QualityMetrics, StatisticalSummary,
140};
141
142// Array validation
143pub use array_validation::ArrayValidator;
144
145// Main validator
146pub use validator::{ValidationRule, Validator};
147
148// Type aliases for convenience
149pub type Array1<T> = crate::ndarray::Array1<T>;
150pub type Array2<T> = crate::ndarray::Array2<T>;
151
152#[cfg(test)]
153mod tests {
154 use super::*;
155 use ::ndarray::Array2;
156
157 #[test]
158 fn test_module_integration() {
159 // Test that all major functionality is accessible through the module
160 let data = Array2::from_shape_vec(
161 (6, 2),
162 vec![
163 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0,
164 ],
165 )
166 .expect("Operation failed");
167
168 // Test array validation
169 let constraints = ArrayValidationConstraints::new()
170 .withshape(vec![6, 2])
171 .with_fieldname("test_data")
172 .check_numeric_quality();
173
174 let config = ValidationConfig::default();
175 let validator = Validator::new(config.clone()).expect("Operation failed");
176
177 let result = validator
178 .validate_ndarray(&data, &constraints, &config)
179 .expect("Operation failed");
180 assert!(result.is_valid());
181
182 // Test quality report generation
183 let report = validator
184 .generate_quality_report(&data, "test_data")
185 .expect("Operation failed");
186 assert!(report.quality_score > 0.9);
187 }
188
189 #[test]
190 fn test_backward_compatibility() {
191 // Test that the old API still works after refactoring
192 use crate::validation::data::*;
193
194 let config = ValidationConfig::default();
195 let validator = Validator::new(config.clone()).expect("Operation failed");
196
197 let data = Array2::from_shape_vec((4, 2), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0])
198 .expect("Operation failed");
199
200 // These should all work exactly as they did before refactoring
201 let constraints = ArrayValidationConstraints::new()
202 .withshape(vec![4, 2])
203 .check_numeric_quality();
204
205 let result = validator
206 .validate_ndarray(&data, &constraints, &config)
207 .expect("Operation failed");
208 assert!(result.is_valid());
209 }
210
211 #[test]
212 fn test_json_validation_integration() {
213 // Test JSON validation functionality
214 let schema = ValidationSchema::new()
215 .name("test_schema")
216 .require_field("name", DataType::String)
217 .require_field("age", DataType::Integer)
218 .add_constraint(
219 "age",
220 Constraint::Range {
221 min: 0.0,
222 max: 150.0,
223 },
224 );
225
226 let config = ValidationConfig::default();
227 let validator = Validator::new(config).expect("Operation failed");
228
229 let data = serde_json::json!({
230 "name": "Test User",
231 "age": 25
232 });
233
234 let result = validator
235 .validate(&data, &schema)
236 .expect("Operation failed");
237 assert!(result.is_valid());
238 }
239
240 #[test]
241 fn test_constraint_types() {
242 // Test various constraint types
243 let range_constraint = Constraint::Range {
244 min: 0.0,
245 max: 100.0,
246 };
247 let length_constraint = Constraint::Length { min: 1, max: 50 };
248 let not_null_constraint = Constraint::NotNull;
249 let unique_constraint = Constraint::Unique;
250
251 // Test that constraints can be created and used
252 match range_constraint {
253 Constraint::Range { min, max } => {
254 assert_eq!(min, 0.0);
255 assert_eq!(max, 100.0);
256 }
257 _ => panic!("Expected Range constraint"),
258 }
259
260 match length_constraint {
261 Constraint::Length { min, max } => {
262 assert_eq!(min, 1);
263 assert_eq!(max, 50);
264 }
265 _ => panic!("Expected Length constraint"),
266 }
267 }
268
269 #[test]
270 fn test_datatypes() {
271 // Test data type definitions
272 let string_type = DataType::String;
273 let integer_type = DataType::Integer;
274 let array_type = DataType::Array(Box::new(DataType::Float64));
275 let matrix_type = DataType::Matrix(Box::new(DataType::Float32));
276
277 assert_eq!(string_type, DataType::String);
278 assert_eq!(integer_type, DataType::Integer);
279
280 match array_type {
281 DataType::Array(inner) => assert_eq!(*inner, DataType::Float64),
282 _ => panic!("Expected Array type"),
283 }
284
285 match matrix_type {
286 DataType::Matrix(inner) => assert_eq!(*inner, DataType::Float32),
287 _ => panic!("Expected Matrix type"),
288 }
289 }
290
291 #[test]
292 fn test_statistical_constraints() {
293 // Test statistical constraints
294 let constraints = StatisticalConstraints::new()
295 .with_mean_range(0.0, 10.0)
296 .with_std_range(1.0, 5.0)
297 .with_distribution("normal");
298
299 assert_eq!(constraints.min_mean, Some(0.0));
300 assert_eq!(constraints.max_mean, Some(10.0));
301 assert_eq!(constraints.min_std, Some(1.0));
302 assert_eq!(constraints.max_std, Some(5.0));
303 assert_eq!(
304 constraints.expected_distribution,
305 Some("normal".to_string())
306 );
307 }
308
309 #[test]
310 fn test_validationerror_creation() {
311 // Test validation error creation and formatting
312 let error = ValidationError::new(
313 ValidationErrorType::TypeMismatch,
314 "test_field",
315 "Type mismatch error",
316 )
317 .with_expected("string")
318 .with_actual("integer")
319 .with_constraint("type_check")
320 .with_severity(ErrorSeverity::Error);
321
322 assert_eq!(error.errortype, ValidationErrorType::TypeMismatch);
323 assert_eq!(error.fieldpath, "test_field");
324 assert_eq!(error.message, "Type mismatch error");
325
326 let formatted = error.formatted_message();
327 assert!(formatted.contains("test_field"));
328 assert!(formatted.contains("Type mismatch error"));
329 }
330
331 #[test]
332 fn test_schema_builder() {
333 // Test schema builder pattern
334 let schema = ValidationSchema::new()
335 .name("test_schema")
336 .version("1.0.0")
337 .require_field("name", DataType::String)
338 .optional_field("description", DataType::String)
339 .add_constraint("name", Constraint::Length { min: 1, max: 100 })
340 .allow_additional()
341 .with_metadata("author", "test");
342
343 assert_eq!(schema.name, "test_schema");
344 assert_eq!(schema.version, "1.0.0");
345 assert_eq!(schema.fields.len(), 2);
346 assert!(schema.allow_additional_fields);
347 assert_eq!(schema.metadata.get("author"), Some(&"test".to_string()));
348 }
349}