Skip to main content

aegis_document/
validation.rs

1//! Aegis Document Validation
2//!
3//! Schema validation for documents.
4//!
5//! @version 0.1.0
6//! @author AutomataNexus Development Team
7
8use crate::types::{Document, Value};
9use serde::{Deserialize, Serialize};
10use std::collections::HashMap;
11
12// =============================================================================
13// Schema
14// =============================================================================
15
16/// Schema definition for document validation.
17#[derive(Debug, Clone, Serialize, Deserialize)]
18pub struct Schema {
19    pub name: String,
20    pub fields: HashMap<String, FieldSchema>,
21    pub required: Vec<String>,
22    pub additional_properties: bool,
23}
24
25impl Schema {
26    /// Create a new schema.
27    pub fn new(name: impl Into<String>) -> Self {
28        Self {
29            name: name.into(),
30            fields: HashMap::new(),
31            required: Vec::new(),
32            additional_properties: true,
33        }
34    }
35
36    /// Add a field to the schema.
37    pub fn field(mut self, name: impl Into<String>, schema: FieldSchema) -> Self {
38        self.fields.insert(name.into(), schema);
39        self
40    }
41
42    /// Add a required field.
43    pub fn require(mut self, name: impl Into<String>) -> Self {
44        self.required.push(name.into());
45        self
46    }
47
48    /// Set whether additional properties are allowed.
49    pub fn additional_properties(mut self, allow: bool) -> Self {
50        self.additional_properties = allow;
51        self
52    }
53
54    /// Validate a document against this schema.
55    pub fn validate(&self, doc: &Document) -> ValidationResult {
56        let mut errors = Vec::new();
57
58        for required in &self.required {
59            if !doc.contains(required) {
60                errors.push(format!("Missing required field: {}", required));
61            }
62        }
63
64        for (field_name, field_schema) in &self.fields {
65            if let Some(value) = doc.get(field_name) {
66                if let Err(err) = field_schema.validate(value) {
67                    errors.push(format!("Field '{}': {}", field_name, err));
68                }
69            }
70        }
71
72        if !self.additional_properties {
73            for key in doc.keys() {
74                if !self.fields.contains_key(key) {
75                    errors.push(format!("Unknown field: {}", key));
76                }
77            }
78        }
79
80        ValidationResult {
81            is_valid: errors.is_empty(),
82            errors,
83        }
84    }
85}
86
87// =============================================================================
88// Field Schema
89// =============================================================================
90
91/// Schema for a single field.
92#[derive(Debug, Clone, Serialize, Deserialize)]
93pub struct FieldSchema {
94    pub field_type: FieldType,
95    pub nullable: bool,
96    pub min: Option<f64>,
97    pub max: Option<f64>,
98    pub min_length: Option<usize>,
99    pub max_length: Option<usize>,
100    pub pattern: Option<String>,
101    pub enum_values: Option<Vec<Value>>,
102    pub items: Option<Box<FieldSchema>>,
103    pub properties: Option<HashMap<String, FieldSchema>>,
104}
105
106impl FieldSchema {
107    pub fn new(field_type: FieldType) -> Self {
108        Self {
109            field_type,
110            nullable: false,
111            min: None,
112            max: None,
113            min_length: None,
114            max_length: None,
115            pattern: None,
116            enum_values: None,
117            items: None,
118            properties: None,
119        }
120    }
121
122    pub fn string() -> Self {
123        Self::new(FieldType::String)
124    }
125
126    pub fn int() -> Self {
127        Self::new(FieldType::Int)
128    }
129
130    pub fn float() -> Self {
131        Self::new(FieldType::Float)
132    }
133
134    pub fn bool() -> Self {
135        Self::new(FieldType::Bool)
136    }
137
138    pub fn array(items: FieldSchema) -> Self {
139        let mut schema = Self::new(FieldType::Array);
140        schema.items = Some(Box::new(items));
141        schema
142    }
143
144    pub fn object() -> Self {
145        Self::new(FieldType::Object)
146    }
147
148    pub fn nullable(mut self) -> Self {
149        self.nullable = true;
150        self
151    }
152
153    pub fn min(mut self, min: f64) -> Self {
154        self.min = Some(min);
155        self
156    }
157
158    pub fn max(mut self, max: f64) -> Self {
159        self.max = Some(max);
160        self
161    }
162
163    pub fn min_length(mut self, len: usize) -> Self {
164        self.min_length = Some(len);
165        self
166    }
167
168    pub fn max_length(mut self, len: usize) -> Self {
169        self.max_length = Some(len);
170        self
171    }
172
173    pub fn pattern(mut self, pattern: impl Into<String>) -> Self {
174        self.pattern = Some(pattern.into());
175        self
176    }
177
178    pub fn enum_values(mut self, values: Vec<Value>) -> Self {
179        self.enum_values = Some(values);
180        self
181    }
182
183    /// Validate a value against this field schema.
184    pub fn validate(&self, value: &Value) -> Result<(), String> {
185        if value.is_null() {
186            if self.nullable {
187                return Ok(());
188            }
189            return Err("Value cannot be null".to_string());
190        }
191
192        if !self.field_type.matches(value) {
193            return Err(format!(
194                "Expected type {:?}, got {:?}",
195                self.field_type,
196                value_type(value)
197            ));
198        }
199
200        if let Some(ref enum_values) = self.enum_values {
201            if !enum_values.contains(value) {
202                return Err("Value not in allowed enum values".to_string());
203            }
204        }
205
206        match value {
207            Value::Int(n) => {
208                if let Some(min) = self.min {
209                    if (*n as f64) < min {
210                        return Err(format!("Value {} is less than minimum {}", n, min));
211                    }
212                }
213                if let Some(max) = self.max {
214                    if (*n as f64) > max {
215                        return Err(format!("Value {} is greater than maximum {}", n, max));
216                    }
217                }
218            }
219            Value::Float(f) => {
220                if let Some(min) = self.min {
221                    if *f < min {
222                        return Err(format!("Value {} is less than minimum {}", f, min));
223                    }
224                }
225                if let Some(max) = self.max {
226                    if *f > max {
227                        return Err(format!("Value {} is greater than maximum {}", f, max));
228                    }
229                }
230            }
231            Value::String(s) => {
232                if let Some(min_len) = self.min_length {
233                    if s.len() < min_len {
234                        return Err(format!(
235                            "String length {} is less than minimum {}",
236                            s.len(),
237                            min_len
238                        ));
239                    }
240                }
241                if let Some(max_len) = self.max_length {
242                    if s.len() > max_len {
243                        return Err(format!(
244                            "String length {} is greater than maximum {}",
245                            s.len(),
246                            max_len
247                        ));
248                    }
249                }
250                if let Some(ref pattern) = self.pattern {
251                    // Use RegexBuilder with size_limit to prevent ReDoS attacks
252                    // from catastrophic backtracking on malicious patterns.
253                    // 1MB compiled size limit is sufficient for legitimate patterns
254                    // while blocking pathological cases.
255                    let re = regex::RegexBuilder::new(pattern)
256                        .size_limit(1024 * 1024) // 1MB compiled size limit
257                        .build()
258                        .map_err(|e| format!("Invalid regex pattern: {}", e))?;
259                    if !re.is_match(s) {
260                        return Err(format!("String does not match pattern: {}", pattern));
261                    }
262                }
263            }
264            Value::Array(arr) => {
265                if let Some(min_len) = self.min_length {
266                    if arr.len() < min_len {
267                        return Err(format!(
268                            "Array length {} is less than minimum {}",
269                            arr.len(),
270                            min_len
271                        ));
272                    }
273                }
274                if let Some(max_len) = self.max_length {
275                    if arr.len() > max_len {
276                        return Err(format!(
277                            "Array length {} is greater than maximum {}",
278                            arr.len(),
279                            max_len
280                        ));
281                    }
282                }
283                if let Some(ref items_schema) = self.items {
284                    for (i, item) in arr.iter().enumerate() {
285                        if let Err(e) = items_schema.validate(item) {
286                            return Err(format!("Array item {}: {}", i, e));
287                        }
288                    }
289                }
290            }
291            Value::Object(obj) => {
292                if let Some(ref props) = self.properties {
293                    for (key, prop_schema) in props {
294                        if let Some(value) = obj.get(key) {
295                            if let Err(e) = prop_schema.validate(value) {
296                                return Err(format!("Property '{}': {}", key, e));
297                            }
298                        }
299                    }
300                }
301            }
302            _ => {}
303        }
304
305        Ok(())
306    }
307}
308
309fn value_type(value: &Value) -> &'static str {
310    match value {
311        Value::Null => "null",
312        Value::Bool(_) => "bool",
313        Value::Int(_) => "int",
314        Value::Float(_) => "float",
315        Value::String(_) => "string",
316        Value::Array(_) => "array",
317        Value::Object(_) => "object",
318    }
319}
320
321// =============================================================================
322// Field Type
323// =============================================================================
324
325/// Type of a field value.
326#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
327pub enum FieldType {
328    String,
329    Int,
330    Float,
331    Number,
332    Bool,
333    Array,
334    Object,
335    Any,
336}
337
338impl FieldType {
339    fn matches(&self, value: &Value) -> bool {
340        match (self, value) {
341            (Self::Any, _) => true,
342            (Self::String, Value::String(_)) => true,
343            (Self::Int, Value::Int(_)) => true,
344            (Self::Float, Value::Float(_)) => true,
345            (Self::Number, Value::Int(_) | Value::Float(_)) => true,
346            (Self::Bool, Value::Bool(_)) => true,
347            (Self::Array, Value::Array(_)) => true,
348            (Self::Object, Value::Object(_)) => true,
349            _ => false,
350        }
351    }
352}
353
354// =============================================================================
355// Validation Result
356// =============================================================================
357
358/// Result of schema validation.
359#[derive(Debug, Clone)]
360pub struct ValidationResult {
361    pub is_valid: bool,
362    pub errors: Vec<String>,
363}
364
365impl ValidationResult {
366    pub fn valid() -> Self {
367        Self {
368            is_valid: true,
369            errors: Vec::new(),
370        }
371    }
372
373    pub fn invalid(errors: Vec<String>) -> Self {
374        Self {
375            is_valid: false,
376            errors,
377        }
378    }
379}
380
381// =============================================================================
382// Schema Builder
383// =============================================================================
384
385/// Builder for creating schemas.
386pub struct SchemaBuilder {
387    schema: Schema,
388}
389
390impl SchemaBuilder {
391    pub fn new(name: impl Into<String>) -> Self {
392        Self {
393            schema: Schema::new(name),
394        }
395    }
396
397    pub fn field(mut self, name: impl Into<String>, schema: FieldSchema) -> Self {
398        self.schema.fields.insert(name.into(), schema);
399        self
400    }
401
402    pub fn required_field(mut self, name: impl Into<String>, schema: FieldSchema) -> Self {
403        let name = name.into();
404        self.schema.fields.insert(name.clone(), schema);
405        self.schema.required.push(name);
406        self
407    }
408
409    pub fn additional_properties(mut self, allow: bool) -> Self {
410        self.schema.additional_properties = allow;
411        self
412    }
413
414    pub fn build(self) -> Schema {
415        self.schema
416    }
417}
418
419// =============================================================================
420// Tests
421// =============================================================================
422
423#[cfg(test)]
424mod tests {
425    use super::*;
426
427    #[test]
428    fn test_type_validation() {
429        let schema = FieldSchema::string();
430        assert!(schema.validate(&Value::String("hello".to_string())).is_ok());
431        assert!(schema.validate(&Value::Int(42)).is_err());
432
433        let schema = FieldSchema::int();
434        assert!(schema.validate(&Value::Int(42)).is_ok());
435        assert!(schema.validate(&Value::String("42".to_string())).is_err());
436    }
437
438    #[test]
439    fn test_nullable() {
440        let schema = FieldSchema::string();
441        assert!(schema.validate(&Value::Null).is_err());
442
443        let schema = FieldSchema::string().nullable();
444        assert!(schema.validate(&Value::Null).is_ok());
445    }
446
447    #[test]
448    fn test_range_validation() {
449        let schema = FieldSchema::int().min(0.0).max(100.0);
450
451        assert!(schema.validate(&Value::Int(50)).is_ok());
452        assert!(schema.validate(&Value::Int(-1)).is_err());
453        assert!(schema.validate(&Value::Int(101)).is_err());
454    }
455
456    #[test]
457    fn test_string_length() {
458        let schema = FieldSchema::string().min_length(3).max_length(10);
459
460        assert!(schema.validate(&Value::String("hello".to_string())).is_ok());
461        assert!(schema.validate(&Value::String("hi".to_string())).is_err());
462        assert!(schema
463            .validate(&Value::String("hello world!".to_string()))
464            .is_err());
465    }
466
467    #[test]
468    fn test_pattern_validation() {
469        let schema = FieldSchema::string().pattern(r"^\d{3}-\d{4}$");
470
471        assert!(schema
472            .validate(&Value::String("123-4567".to_string()))
473            .is_ok());
474        assert!(schema
475            .validate(&Value::String("invalid".to_string()))
476            .is_err());
477    }
478
479    #[test]
480    fn test_schema_validation() {
481        let schema = SchemaBuilder::new("User")
482            .required_field("name", FieldSchema::string().min_length(1))
483            .required_field("age", FieldSchema::int().min(0.0))
484            .field("email", FieldSchema::string().nullable())
485            .build();
486
487        let mut doc = Document::new();
488        doc.set("name", "Alice");
489        doc.set("age", 30i64);
490
491        let result = schema.validate(&doc);
492        assert!(result.is_valid);
493
494        let mut invalid_doc = Document::new();
495        invalid_doc.set("name", "Bob");
496
497        let result = schema.validate(&invalid_doc);
498        assert!(!result.is_valid);
499        assert!(result.errors.iter().any(|e| e.contains("age")));
500    }
501
502    #[test]
503    fn test_enum_validation() {
504        let schema = FieldSchema::string().enum_values(vec![
505            Value::String("active".to_string()),
506            Value::String("inactive".to_string()),
507        ]);
508
509        assert!(schema
510            .validate(&Value::String("active".to_string()))
511            .is_ok());
512        assert!(schema
513            .validate(&Value::String("unknown".to_string()))
514            .is_err());
515    }
516
517    #[test]
518    fn test_array_validation() {
519        let schema = FieldSchema::array(FieldSchema::int())
520            .min_length(1)
521            .max_length(5);
522
523        assert!(schema
524            .validate(&Value::Array(vec![Value::Int(1), Value::Int(2)]))
525            .is_ok());
526        assert!(schema.validate(&Value::Array(vec![])).is_err());
527        assert!(schema
528            .validate(&Value::Array(vec![Value::String("not an int".to_string())]))
529            .is_err());
530    }
531}