scirs2_core/validation/data/
schema.rs

1//! Schema definition and types for data validation
2//!
3//! This module provides schema structures for defining the expected structure
4//! and constraints of data to be validated.
5
6use super::constraints::Constraint;
7use std::collections::HashMap;
8
9use serde::{Deserialize, Serialize};
10
11/// Data types supported by the validation system
12#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
13pub enum DataType {
14    /// Boolean value
15    Boolean,
16    /// Integer number
17    Integer,
18    /// Floating point number
19    Float32,
20    /// Double precision floating point
21    Float64,
22    /// UTF-8 string
23    String,
24    /// Array of elements
25    Array(Box<DataType>),
26    /// Object with fields
27    Object,
28    /// Null value
29    Null,
30    /// Binary data
31    Binary,
32    /// Date/time value
33    DateTime,
34    /// UUID value
35    Uuid,
36    /// Geographic coordinate
37    GeoCoordinate,
38    /// Complex number
39    Complex,
40    /// Matrix (2D array)
41    Matrix(Box<DataType>),
42    /// Tensor (N-dimensional array)
43    Tensor {
44        element_type: Box<DataType>,
45        dimensions: Option<Vec<usize>>,
46    },
47    /// Time series data
48    TimeSeries(Box<DataType>),
49    /// Sparse matrix
50    SparseMatrix {
51        element_type: Box<DataType>,
52        format: super::constraints::SparseFormat,
53    },
54}
55
56/// Field definition in a validation schema
57#[derive(Debug, Clone, Serialize, Deserialize)]
58pub struct FieldDefinition {
59    /// Field data type
60    pub datatype: DataType,
61    /// Whether field is required
62    pub required: bool,
63    /// Constraints applied to this field
64    pub constraints: Vec<Constraint>,
65    /// Field description
66    pub description: Option<String>,
67    /// Default value if not provided
68    pub defaultvalue: Option<String>,
69    /// Validation rule references
70    pub validation_rules: Vec<String>,
71}
72
73impl FieldDefinition {
74    /// Create a new field definition
75    pub fn new(datatype: DataType) -> Self {
76        Self {
77            datatype,
78            required: false,
79            constraints: Vec::new(),
80            description: None,
81            defaultvalue: None,
82            validation_rules: Vec::new(),
83        }
84    }
85
86    /// Mark field as required
87    pub fn required(mut self) -> Self {
88        self.required = true;
89        self
90    }
91
92    /// Add a constraint
93    pub fn with_constraint(mut self, constraint: Constraint) -> Self {
94        self.constraints.push(constraint);
95        self
96    }
97
98    /// Set description
99    pub fn with_description(mut self, description: &str) -> Self {
100        self.description = Some(description.to_string());
101        self
102    }
103
104    /// Set default value
105    pub fn with_default(mut self, default: &str) -> Self {
106        self.defaultvalue = Some(default.to_string());
107        self
108    }
109
110    /// Add validation rule
111    pub fn with_validation_rule(mut self, rule: &str) -> Self {
112        self.validation_rules.push(rule.to_string());
113        self
114    }
115}
116
117/// Validation schema definition
118#[derive(Debug, Clone, Serialize, Deserialize)]
119pub struct ValidationSchema {
120    /// Schema name
121    pub name: String,
122    /// Schema version
123    pub version: String,
124    /// Field definitions
125    pub fields: HashMap<String, FieldDefinition>,
126    /// Global constraints
127    pub global_constraints: Vec<Constraint>,
128    /// Schema metadata
129    pub metadata: HashMap<String, String>,
130    /// Allow additional fields not in schema
131    pub allow_additional_fields: bool,
132}
133
134impl ValidationSchema {
135    /// Create a new validation schema
136    pub fn new() -> Self {
137        Self {
138            name: "unnamed".to_string(),
139            version: "1.0.0".to_string(),
140            fields: HashMap::new(),
141            global_constraints: Vec::new(),
142            metadata: HashMap::new(),
143            allow_additional_fields: false,
144        }
145    }
146
147    /// Set schema name
148    pub fn name(mut self, name: &str) -> Self {
149        self.name = name.to_string();
150        self
151    }
152
153    /// Set schema version
154    pub fn version(mut self, version: &str) -> Self {
155        self.version = version.to_string();
156        self
157    }
158
159    /// Add a required field
160    pub fn require_field(mut self, name: &str, datatype: DataType) -> Self {
161        let field = FieldDefinition::new(datatype).required();
162        self.fields.insert(name.to_string(), field);
163        self
164    }
165
166    /// Add an optional field
167    pub fn optional_field(mut self, name: &str, datatype: DataType) -> Self {
168        let field = FieldDefinition::new(datatype);
169        self.fields.insert(name.to_string(), field);
170        self
171    }
172
173    /// Add a field with custom definition
174    pub fn add_field(mut self, name: &str, field: FieldDefinition) -> Self {
175        self.fields.insert(name.to_string(), field);
176        self
177    }
178
179    /// Add a constraint to a field
180    pub fn add_constraint(mut self, fieldname: &str, constraint: Constraint) -> Self {
181        if let Some(field) = self.fields.get_mut(fieldname) {
182            field.constraints.push(constraint);
183        }
184        self
185    }
186
187    /// Add a global constraint
188    pub fn add_global_constraint(mut self, constraint: Constraint) -> Self {
189        self.global_constraints.push(constraint);
190        self
191    }
192
193    /// Allow additional fields
194    pub fn allow_additional(mut self) -> Self {
195        self.allow_additional_fields = true;
196        self
197    }
198
199    /// Add metadata
200    pub fn with_metadata(mut self, key: &str, value: &str) -> Self {
201        self.metadata.insert(key.to_string(), value.to_string());
202        self
203    }
204
205    /// Get field definition
206    pub fn get_field(&self, name: &str) -> Option<&FieldDefinition> {
207        self.fields.get(name)
208    }
209
210    /// Check if field is required
211    pub fn is_field_required(&self, name: &str) -> bool {
212        self.fields.get(name).is_some_and(|f| f.required)
213    }
214
215    /// Get all required field names
216    pub fn get_required_fields(&self) -> Vec<&String> {
217        self.fields
218            .iter()
219            .filter(|(_, field)| field.required)
220            .map(|(name, _)| name)
221            .collect()
222    }
223
224    /// Validate schema consistency
225    pub fn validate_schema(&self) -> Result<(), String> {
226        // Check for empty schema name
227        if self.name.is_empty() {
228            return Err("Schema name cannot be empty".to_string());
229        }
230
231        // Check for empty version
232        if self.version.is_empty() {
233            return Err("Schema version cannot be empty".to_string());
234        }
235
236        // Check for circular references in array/matrix types
237        for (fieldname, field) in &self.fields {
238            self.check_circular_references(&field.datatype, fieldname)?;
239        }
240
241        Ok(())
242    }
243
244    /// Check for circular references in data types
245    #[allow(clippy::only_used_in_recursion)]
246    fn check_circular_references(
247        &self,
248        datatype: &DataType,
249        fieldname: &str,
250    ) -> Result<(), String> {
251        match datatype {
252            DataType::Array(inner) => self.check_circular_references(inner, fieldname),
253            DataType::Matrix(inner) => self.check_circular_references(inner, fieldname),
254            DataType::Tensor { element_type, .. } => {
255                self.check_circular_references(element_type, fieldname)
256            }
257            DataType::TimeSeries(inner) => self.check_circular_references(inner, fieldname),
258            DataType::SparseMatrix { element_type, .. } => {
259                self.check_circular_references(element_type, fieldname)
260            }
261            _ => Ok(()),
262        }
263    }
264}
265
266impl Default for ValidationSchema {
267    fn default() -> Self {
268        Self::new()
269    }
270}
271
272#[cfg(test)]
273mod tests {
274    use super::super::constraints::{Constraint, SparseFormat};
275    use super::*;
276
277    #[test]
278    fn test_field_definition() {
279        let field = FieldDefinition::new(DataType::String)
280            .required()
281            .with_description("Test field")
282            .with_default("defaultvalue")
283            .with_constraint(Constraint::NotNull)
284            .with_validation_rule("custom_rule");
285
286        assert_eq!(field.datatype, DataType::String);
287        assert!(field.required);
288        assert_eq!(field.description, Some("Test field".to_string()));
289        assert_eq!(field.defaultvalue, Some("defaultvalue".to_string()));
290        assert_eq!(field.constraints.len(), 1);
291        assert_eq!(field.validation_rules.len(), 1);
292    }
293
294    #[test]
295    fn test_validation_schema() {
296        let schema = ValidationSchema::new()
297            .name("test_schema")
298            .version("1.0.0")
299            .require_field("name", DataType::String)
300            .optional_field("age", DataType::Integer)
301            .add_constraint(
302                "age",
303                Constraint::Range {
304                    min: 0.0,
305                    max: 150.0,
306                },
307            )
308            .allow_additional()
309            .with_metadata("author", "test_author");
310
311        assert_eq!(schema.name, "test_schema");
312        assert_eq!(schema.version, "1.0.0");
313        assert_eq!(schema.fields.len(), 2);
314        assert!(schema.allow_additional_fields);
315        assert_eq!(
316            schema.metadata.get("author"),
317            Some(&"test_author".to_string())
318        );
319
320        // Test field access
321        assert!(schema.is_field_required("name"));
322        assert!(!schema.is_field_required("age"));
323
324        let required_fields = schema.get_required_fields();
325        assert_eq!(required_fields.len(), 1);
326        assert!(required_fields.contains(&&"name".to_string()));
327    }
328
329    #[test]
330    fn test_complex_datatypes() {
331        // Test array type
332        let array_type = DataType::Array(Box::new(DataType::Float64));
333        match array_type {
334            DataType::Array(inner) => assert_eq!(*inner, DataType::Float64),
335            _ => panic!("Expected Array type"),
336        }
337
338        // Test tensor type
339        let tensor_type = DataType::Tensor {
340            element_type: Box::new(DataType::Float32),
341            dimensions: Some(vec![10, 20, 30]),
342        };
343        match tensor_type {
344            DataType::Tensor {
345                element_type,
346                dimensions,
347            } => {
348                assert_eq!(*element_type, DataType::Float32);
349                assert_eq!(dimensions, Some(vec![10, 20, 30]));
350            }
351            _ => panic!("Expected Tensor type"),
352        }
353
354        // Test sparse matrix type
355        let sparse_type = DataType::SparseMatrix {
356            element_type: Box::new(DataType::Float64),
357            format: SparseFormat::CSR,
358        };
359        match sparse_type {
360            DataType::SparseMatrix {
361                element_type,
362                format,
363            } => {
364                assert_eq!(*element_type, DataType::Float64);
365                assert_eq!(format, SparseFormat::CSR);
366            }
367            _ => panic!("Expected SparseMatrix type"),
368        }
369    }
370
371    #[test]
372    fn test_schema_validation() {
373        let valid_schema = ValidationSchema::new()
374            .name("valid_schema")
375            .version("1.0.0");
376
377        assert!(valid_schema.validate_schema().is_ok());
378
379        let invalid_schema = ValidationSchema::new()
380            .name("")  // Empty name should be invalid
381            .version("1.0.0");
382
383        assert!(invalid_schema.validate_schema().is_err());
384    }
385
386    #[test]
387    fn test_datatype_equality() {
388        assert_eq!(DataType::String, DataType::String);
389        assert_eq!(DataType::Float64, DataType::Float64);
390        assert_ne!(DataType::Float32, DataType::Float64);
391
392        let array1 = DataType::Array(Box::new(DataType::Integer));
393        let array2 = DataType::Array(Box::new(DataType::Integer));
394        let array3 = DataType::Array(Box::new(DataType::Float64));
395
396        assert_eq!(array1, array2);
397        assert_ne!(array1, array3);
398    }
399}