rust_yaml/
schema.rs

1//! YAML schema validation system
2
3use crate::{Error, Position, Result, Value};
4use regex::Regex;
5use std::collections::HashMap;
6use std::fmt;
7
8/// Schema validation error with detailed context
9#[derive(Debug, Clone)]
10pub struct ValidationError {
11    /// Path to the invalid value (e.g., "config.database.port")
12    pub path: String,
13    /// The validation rule that failed
14    pub rule: String,
15    /// Human-readable error message
16    pub message: String,
17    /// The invalid value
18    pub value: Value,
19    /// Position in the YAML document (if available)
20    pub position: Option<Position>,
21}
22
23impl fmt::Display for ValidationError {
24    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
25        write!(f, "Validation error at '{}': {}", self.path, self.message)
26    }
27}
28
29impl std::error::Error for ValidationError {}
30
31/// Result type for schema validation
32pub type ValidationResult<T> = std::result::Result<T, Vec<ValidationError>>;
33
34/// Custom validator function type
35pub type ValidatorFn = Box<dyn Fn(&Value, &str) -> Result<()> + Send + Sync>;
36
37/// Schema validation rules
38#[derive(Debug, Clone)]
39pub enum SchemaRule {
40    /// Validate type (string, number, boolean, array, object, null)
41    Type(ValueType),
42    /// String must match regex pattern
43    Pattern(Regex),
44    /// String/Array length constraints
45    Length {
46        /// Minimum length (inclusive)
47        min: Option<usize>,
48        /// Maximum length (inclusive)
49        max: Option<usize>,
50    },
51    /// Number range constraints
52    Range {
53        /// Minimum value (inclusive)
54        min: Option<f64>,
55        /// Maximum value (inclusive)
56        max: Option<f64>,
57    },
58    /// Value must be one of the specified values
59    Enum(Vec<Value>),
60    /// Object property validation
61    Properties(HashMap<String, Schema>),
62    /// Array item validation
63    Items(Box<Schema>),
64    /// Required properties for objects
65    Required(Vec<String>),
66    /// Additional properties allowed for objects
67    AdditionalProperties(bool),
68    /// Custom validation function  
69    Custom(String),
70    /// Conditional validation (if-then-else)
71    Conditional {
72        /// Condition to check
73        if_schema: Box<Schema>,
74        /// Schema to apply if condition matches
75        then_schema: Option<Box<Schema>>,
76        /// Schema to apply if condition doesn't match
77        else_schema: Option<Box<Schema>>,
78    },
79    /// Value must not match this schema (negation)
80    Not(Box<Schema>),
81    /// Value must match any of these schemas (OR)
82    AnyOf(Vec<Schema>),
83    /// Value must match all of these schemas (AND)
84    AllOf(Vec<Schema>),
85    /// Value must match exactly one of these schemas (XOR)
86    OneOf(Vec<Schema>),
87}
88
89/// Supported value types for validation
90#[derive(Debug, Clone, PartialEq, Eq)]
91pub enum ValueType {
92    /// String type
93    String,
94    /// Number type (float or integer)
95    Number,
96    /// Integer type
97    Integer,
98    /// Boolean type
99    Boolean,
100    /// Array/Sequence type
101    Array,
102    /// Object/Mapping type
103    Object,
104    /// Null type
105    Null,
106}
107
108impl fmt::Display for ValueType {
109    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
110        match self {
111            ValueType::String => write!(f, "string"),
112            ValueType::Number => write!(f, "number"),
113            ValueType::Integer => write!(f, "integer"),
114            ValueType::Boolean => write!(f, "boolean"),
115            ValueType::Array => write!(f, "array"),
116            ValueType::Object => write!(f, "object"),
117            ValueType::Null => write!(f, "null"),
118        }
119    }
120}
121
122/// A complete schema definition
123#[derive(Debug, Clone)]
124pub struct Schema {
125    /// Schema title/description
126    pub title: Option<String>,
127    /// Schema description
128    pub description: Option<String>,
129    /// Validation rules for this schema
130    pub rules: Vec<SchemaRule>,
131    /// Whether this schema is optional
132    pub optional: bool,
133    /// Default value if not provided
134    pub default: Option<Value>,
135}
136
137impl Schema {
138    /// Create a new empty schema
139    pub fn new() -> Self {
140        Self {
141            title: None,
142            description: None,
143            rules: Vec::new(),
144            optional: false,
145            default: None,
146        }
147    }
148
149    /// Create a schema with a specific type
150    pub fn with_type(value_type: ValueType) -> Self {
151        Self {
152            title: None,
153            description: None,
154            rules: vec![SchemaRule::Type(value_type)],
155            optional: false,
156            default: None,
157        }
158    }
159
160    /// Add a validation rule
161    pub fn rule(mut self, rule: SchemaRule) -> Self {
162        self.rules.push(rule);
163        self
164    }
165
166    /// Add multiple validation rules
167    pub fn rules(mut self, rules: Vec<SchemaRule>) -> Self {
168        self.rules.extend(rules);
169        self
170    }
171
172    /// Set title and description
173    pub fn info(mut self, title: &str, description: &str) -> Self {
174        self.title = Some(title.to_string());
175        self.description = Some(description.to_string());
176        self
177    }
178
179    /// Mark schema as optional
180    pub fn optional(mut self) -> Self {
181        self.optional = true;
182        self
183    }
184
185    /// Set default value
186    pub fn default_value(mut self, value: Value) -> Self {
187        self.default = Some(value);
188        self
189    }
190
191    /// Validate a value against this schema
192    pub fn validate(&self, value: &Value, path: &str) -> ValidationResult<()> {
193        let mut errors = Vec::new();
194
195        // Apply each validation rule
196        for rule in &self.rules {
197            if let Err(rule_errors) = self.apply_rule(rule, value, path) {
198                errors.extend(rule_errors);
199            }
200        }
201
202        if errors.is_empty() {
203            Ok(())
204        } else {
205            Err(errors)
206        }
207    }
208
209    /// Apply a single validation rule
210    fn apply_rule(&self, rule: &SchemaRule, value: &Value, path: &str) -> ValidationResult<()> {
211        match rule {
212            SchemaRule::Type(expected_type) => self.validate_type(expected_type, value, path),
213            SchemaRule::Pattern(regex) => self.validate_pattern(regex, value, path),
214            SchemaRule::Length { min, max } => self.validate_length(*min, *max, value, path),
215            SchemaRule::Range { min, max } => self.validate_range(*min, *max, value, path),
216            SchemaRule::Enum(allowed_values) => self.validate_enum(allowed_values, value, path),
217            SchemaRule::Properties(properties) => self.validate_properties(properties, value, path),
218            SchemaRule::Items(item_schema) => self.validate_items(item_schema, value, path),
219            SchemaRule::Required(required_props) => {
220                self.validate_required(required_props, value, path)
221            }
222            SchemaRule::AdditionalProperties(allowed) => {
223                self.validate_additional_properties(*allowed, value, path)
224            }
225            SchemaRule::Custom(name) => self.validate_custom(name, value, path),
226            SchemaRule::Conditional {
227                if_schema,
228                then_schema,
229                else_schema,
230            } => self.validate_conditional(
231                if_schema,
232                then_schema.as_ref().map(|v| &**v),
233                else_schema.as_ref().map(|v| &**v),
234                value,
235                path,
236            ),
237            SchemaRule::Not(schema) => self.validate_not(schema, value, path),
238            SchemaRule::AnyOf(schemas) => self.validate_any_of(schemas, value, path),
239            SchemaRule::AllOf(schemas) => self.validate_all_of(schemas, value, path),
240            SchemaRule::OneOf(schemas) => self.validate_one_of(schemas, value, path),
241        }
242    }
243
244    /// Validate value type
245    fn validate_type(
246        &self,
247        expected_type: &ValueType,
248        value: &Value,
249        path: &str,
250    ) -> ValidationResult<()> {
251        let actual_type = match value {
252            Value::String(_) => ValueType::String,
253            Value::Int(_) => ValueType::Integer,
254            Value::Float(_) => ValueType::Number,
255            Value::Bool(_) => ValueType::Boolean,
256            Value::Sequence(_) => ValueType::Array,
257            Value::Mapping(_) => ValueType::Object,
258            Value::Null => ValueType::Null,
259        };
260
261        // Allow integer to be considered as number
262        let type_matches = match (expected_type, &actual_type) {
263            (ValueType::Number, ValueType::Integer) => true,
264            _ => expected_type == &actual_type,
265        };
266
267        if type_matches {
268            Ok(())
269        } else {
270            Err(vec![ValidationError {
271                path: path.to_string(),
272                rule: "type".to_string(),
273                message: format!("Expected {}, got {}", expected_type, actual_type),
274                value: value.clone(),
275                position: None,
276            }])
277        }
278    }
279
280    /// Validate regex pattern
281    fn validate_pattern(&self, regex: &Regex, value: &Value, path: &str) -> ValidationResult<()> {
282        if let Value::String(s) = value {
283            if regex.is_match(s) {
284                Ok(())
285            } else {
286                Err(vec![ValidationError {
287                    path: path.to_string(),
288                    rule: "pattern".to_string(),
289                    message: format!("String '{}' does not match required pattern", s),
290                    value: value.clone(),
291                    position: None,
292                }])
293            }
294        } else {
295            Err(vec![ValidationError {
296                path: path.to_string(),
297                rule: "pattern".to_string(),
298                message: "Pattern validation can only be applied to strings".to_string(),
299                value: value.clone(),
300                position: None,
301            }])
302        }
303    }
304
305    /// Validate length constraints
306    fn validate_length(
307        &self,
308        min: Option<usize>,
309        max: Option<usize>,
310        value: &Value,
311        path: &str,
312    ) -> ValidationResult<()> {
313        let length = match value {
314            Value::String(s) => s.len(),
315            Value::Sequence(seq) => seq.len(),
316            _ => {
317                return Err(vec![ValidationError {
318                    path: path.to_string(),
319                    rule: "length".to_string(),
320                    message: "Length validation can only be applied to strings or arrays"
321                        .to_string(),
322                    value: value.clone(),
323                    position: None,
324                }]);
325            }
326        };
327
328        let mut errors = Vec::new();
329
330        if let Some(min_len) = min {
331            if length < min_len {
332                errors.push(ValidationError {
333                    path: path.to_string(),
334                    rule: "minLength".to_string(),
335                    message: format!("Length {} is less than minimum {}", length, min_len),
336                    value: value.clone(),
337                    position: None,
338                });
339            }
340        }
341
342        if let Some(max_len) = max {
343            if length > max_len {
344                errors.push(ValidationError {
345                    path: path.to_string(),
346                    rule: "maxLength".to_string(),
347                    message: format!("Length {} is greater than maximum {}", length, max_len),
348                    value: value.clone(),
349                    position: None,
350                });
351            }
352        }
353
354        if errors.is_empty() {
355            Ok(())
356        } else {
357            Err(errors)
358        }
359    }
360
361    /// Validate numeric range
362    fn validate_range(
363        &self,
364        min: Option<f64>,
365        max: Option<f64>,
366        value: &Value,
367        path: &str,
368    ) -> ValidationResult<()> {
369        let number = match value {
370            Value::Int(i) => *i as f64,
371            Value::Float(f) => *f,
372            _ => {
373                return Err(vec![ValidationError {
374                    path: path.to_string(),
375                    rule: "range".to_string(),
376                    message: "Range validation can only be applied to numbers".to_string(),
377                    value: value.clone(),
378                    position: None,
379                }]);
380            }
381        };
382
383        let mut errors = Vec::new();
384
385        if let Some(min_val) = min {
386            if number < min_val {
387                errors.push(ValidationError {
388                    path: path.to_string(),
389                    rule: "minimum".to_string(),
390                    message: format!("Value {} is less than minimum {}", number, min_val),
391                    value: value.clone(),
392                    position: None,
393                });
394            }
395        }
396
397        if let Some(max_val) = max {
398            if number > max_val {
399                errors.push(ValidationError {
400                    path: path.to_string(),
401                    rule: "maximum".to_string(),
402                    message: format!("Value {} is greater than maximum {}", number, max_val),
403                    value: value.clone(),
404                    position: None,
405                });
406            }
407        }
408
409        if errors.is_empty() {
410            Ok(())
411        } else {
412            Err(errors)
413        }
414    }
415
416    /// Validate enum values
417    fn validate_enum(
418        &self,
419        allowed_values: &[Value],
420        value: &Value,
421        path: &str,
422    ) -> ValidationResult<()> {
423        if allowed_values.contains(value) {
424            Ok(())
425        } else {
426            Err(vec![ValidationError {
427                path: path.to_string(),
428                rule: "enum".to_string(),
429                message: format!(
430                    "Value is not one of the allowed values: {:?}",
431                    allowed_values
432                ),
433                value: value.clone(),
434                position: None,
435            }])
436        }
437    }
438
439    /// Validate object properties
440    fn validate_properties(
441        &self,
442        properties: &HashMap<String, Schema>,
443        value: &Value,
444        path: &str,
445    ) -> ValidationResult<()> {
446        if let Value::Mapping(map) = value {
447            let mut errors = Vec::new();
448
449            for (prop_name, prop_schema) in properties {
450                let prop_path = if path.is_empty() {
451                    prop_name.clone()
452                } else {
453                    format!("{}.{}", path, prop_name)
454                };
455
456                // Find the property in the mapping
457                let prop_value = map
458                    .iter()
459                    .find(|(k, _)| {
460                        if let Value::String(key_str) = k {
461                            key_str == prop_name
462                        } else {
463                            false
464                        }
465                    })
466                    .map(|(_, v)| v);
467
468                match prop_value {
469                    Some(value) => {
470                        // Validate the property
471                        if let Err(prop_errors) = prop_schema.validate(value, &prop_path) {
472                            errors.extend(prop_errors);
473                        }
474                    }
475                    None => {
476                        // Property is missing
477                        if !prop_schema.optional {
478                            errors.push(ValidationError {
479                                path: prop_path,
480                                rule: "required".to_string(),
481                                message: format!("Required property '{}' is missing", prop_name),
482                                value: Value::Null,
483                                position: None,
484                            });
485                        }
486                    }
487                }
488            }
489
490            if errors.is_empty() {
491                Ok(())
492            } else {
493                Err(errors)
494            }
495        } else {
496            Err(vec![ValidationError {
497                path: path.to_string(),
498                rule: "properties".to_string(),
499                message: "Properties validation can only be applied to objects".to_string(),
500                value: value.clone(),
501                position: None,
502            }])
503        }
504    }
505
506    /// Validate array items
507    fn validate_items(
508        &self,
509        item_schema: &Schema,
510        value: &Value,
511        path: &str,
512    ) -> ValidationResult<()> {
513        if let Value::Sequence(seq) = value {
514            let mut errors = Vec::new();
515
516            for (index, item) in seq.iter().enumerate() {
517                let item_path = format!("{}[{}]", path, index);
518                if let Err(item_errors) = item_schema.validate(item, &item_path) {
519                    errors.extend(item_errors);
520                }
521            }
522
523            if errors.is_empty() {
524                Ok(())
525            } else {
526                Err(errors)
527            }
528        } else {
529            Err(vec![ValidationError {
530                path: path.to_string(),
531                rule: "items".to_string(),
532                message: "Items validation can only be applied to arrays".to_string(),
533                value: value.clone(),
534                position: None,
535            }])
536        }
537    }
538
539    /// Validate required properties
540    fn validate_required(
541        &self,
542        required_props: &[String],
543        value: &Value,
544        path: &str,
545    ) -> ValidationResult<()> {
546        if let Value::Mapping(map) = value {
547            let mut errors = Vec::new();
548
549            for required_prop in required_props {
550                let has_property = map.keys().any(|k| {
551                    if let Value::String(key_str) = k {
552                        key_str == required_prop
553                    } else {
554                        false
555                    }
556                });
557
558                if !has_property {
559                    let prop_path = if path.is_empty() {
560                        required_prop.clone()
561                    } else {
562                        format!("{}.{}", path, required_prop)
563                    };
564
565                    errors.push(ValidationError {
566                        path: prop_path,
567                        rule: "required".to_string(),
568                        message: format!("Required property '{}' is missing", required_prop),
569                        value: Value::Null,
570                        position: None,
571                    });
572                }
573            }
574
575            if errors.is_empty() {
576                Ok(())
577            } else {
578                Err(errors)
579            }
580        } else {
581            Err(vec![ValidationError {
582                path: path.to_string(),
583                rule: "required".to_string(),
584                message: "Required validation can only be applied to objects".to_string(),
585                value: value.clone(),
586                position: None,
587            }])
588        }
589    }
590
591    /// Validate additional properties
592    fn validate_additional_properties(
593        &self,
594        allowed: bool,
595        value: &Value,
596        _path: &str,
597    ) -> ValidationResult<()> {
598        // This would be implemented in conjunction with Properties validation
599        // For now, always allow additional properties
600        if !allowed {
601            // TODO: Check for additional properties not defined in schema
602        }
603        Ok(())
604    }
605
606    /// Validate using custom function
607    fn validate_custom(&self, name: &str, value: &Value, path: &str) -> ValidationResult<()> {
608        // For now, custom validation always passes
609        // In a full implementation, this would call user-provided functions
610        Ok(())
611    }
612
613    /// Validate conditional logic
614    fn validate_conditional(
615        &self,
616        if_schema: &Schema,
617        then_schema: Option<&Schema>,
618        else_schema: Option<&Schema>,
619        value: &Value,
620        path: &str,
621    ) -> ValidationResult<()> {
622        // Check if the "if" condition matches
623        let if_matches = if_schema.validate(value, path).is_ok();
624
625        if if_matches {
626            if let Some(then_schema) = then_schema {
627                then_schema.validate(value, path)
628            } else {
629                Ok(())
630            }
631        } else if let Some(else_schema) = else_schema {
632            else_schema.validate(value, path)
633        } else {
634            Ok(())
635        }
636    }
637
638    /// Validate negation (NOT)
639    fn validate_not(&self, schema: &Schema, value: &Value, path: &str) -> ValidationResult<()> {
640        if schema.validate(value, path).is_ok() {
641            Err(vec![ValidationError {
642                path: path.to_string(),
643                rule: "not".to_string(),
644                message: "Value must not match the specified schema".to_string(),
645                value: value.clone(),
646                position: None,
647            }])
648        } else {
649            Ok(())
650        }
651    }
652
653    /// Validate any of (OR)
654    fn validate_any_of(
655        &self,
656        schemas: &[Schema],
657        value: &Value,
658        path: &str,
659    ) -> ValidationResult<()> {
660        for schema in schemas {
661            if schema.validate(value, path).is_ok() {
662                return Ok(());
663            }
664        }
665
666        Err(vec![ValidationError {
667            path: path.to_string(),
668            rule: "anyOf".to_string(),
669            message: "Value must match at least one of the specified schemas".to_string(),
670            value: value.clone(),
671            position: None,
672        }])
673    }
674
675    /// Validate all of (AND)
676    fn validate_all_of(
677        &self,
678        schemas: &[Schema],
679        value: &Value,
680        path: &str,
681    ) -> ValidationResult<()> {
682        let mut all_errors = Vec::new();
683
684        for schema in schemas {
685            if let Err(errors) = schema.validate(value, path) {
686                all_errors.extend(errors);
687            }
688        }
689
690        if all_errors.is_empty() {
691            Ok(())
692        } else {
693            Err(all_errors)
694        }
695    }
696
697    /// Validate one of (XOR)
698    fn validate_one_of(
699        &self,
700        schemas: &[Schema],
701        value: &Value,
702        path: &str,
703    ) -> ValidationResult<()> {
704        let mut valid_count = 0;
705
706        for schema in schemas {
707            if schema.validate(value, path).is_ok() {
708                valid_count += 1;
709            }
710        }
711
712        if valid_count == 1 {
713            Ok(())
714        } else if valid_count == 0 {
715            Err(vec![ValidationError {
716                path: path.to_string(),
717                rule: "oneOf".to_string(),
718                message: "Value must match exactly one of the specified schemas (matched none)"
719                    .to_string(),
720                value: value.clone(),
721                position: None,
722            }])
723        } else {
724            Err(vec![ValidationError {
725                path: path.to_string(),
726                rule: "oneOf".to_string(),
727                message: format!(
728                    "Value must match exactly one of the specified schemas (matched {})",
729                    valid_count
730                ),
731                value: value.clone(),
732                position: None,
733            }])
734        }
735    }
736}
737
738impl Default for Schema {
739    fn default() -> Self {
740        Self::new()
741    }
742}
743
744/// Schema validator for YAML documents
745#[derive(Debug)]
746pub struct SchemaValidator {
747    /// Root schema for validation
748    pub schema: Schema,
749    /// Whether to collect all errors or stop at first error
750    pub collect_all_errors: bool,
751}
752
753impl SchemaValidator {
754    /// Create a new schema validator
755    pub fn new(schema: Schema) -> Self {
756        Self {
757            schema,
758            collect_all_errors: true,
759        }
760    }
761
762    /// Create a validator that stops at first error
763    pub fn fail_fast(schema: Schema) -> Self {
764        Self {
765            schema,
766            collect_all_errors: false,
767        }
768    }
769
770    /// Validate a YAML value against the schema
771    pub fn validate(&self, value: &Value) -> ValidationResult<()> {
772        self.schema.validate(value, "")
773    }
774
775    /// Validate and return a formatted error report
776    pub fn validate_with_report(&self, value: &Value) -> Result<()> {
777        match self.validate(value) {
778            Ok(()) => Ok(()),
779            Err(errors) => {
780                let error_messages: Vec<String> =
781                    errors.iter().map(|e| format!("  - {}", e)).collect();
782
783                let message = format!(
784                    "Schema validation failed with {} error(s):\n{}",
785                    errors.len(),
786                    error_messages.join("\n")
787                );
788
789                Err(Error::parse(Position::start(), message))
790            }
791        }
792    }
793}
794
795#[cfg(test)]
796mod tests {
797    use super::*;
798    use indexmap::IndexMap;
799
800    #[test]
801    fn test_type_validation() {
802        let schema = Schema::with_type(ValueType::String);
803
804        // Valid case
805        assert!(schema
806            .validate(&Value::String("hello".to_string()), "test")
807            .is_ok());
808
809        // Invalid case
810        assert!(schema.validate(&Value::Int(42), "test").is_err());
811    }
812
813    #[test]
814    fn test_range_validation() {
815        let schema = Schema::new()
816            .rule(SchemaRule::Type(ValueType::Number))
817            .rule(SchemaRule::Range {
818                min: Some(0.0),
819                max: Some(100.0),
820            });
821
822        // Valid cases
823        assert!(schema.validate(&Value::Int(50), "test").is_ok());
824        assert!(schema.validate(&Value::Float(75.5), "test").is_ok());
825
826        // Invalid cases
827        assert!(schema.validate(&Value::Int(-1), "test").is_err());
828        assert!(schema.validate(&Value::Int(101), "test").is_err());
829    }
830
831    #[test]
832    fn test_object_validation() {
833        let mut properties = HashMap::new();
834        properties.insert("name".to_string(), Schema::with_type(ValueType::String));
835        properties.insert(
836            "age".to_string(),
837            Schema::with_type(ValueType::Integer).rule(SchemaRule::Range {
838                min: Some(0.0),
839                max: Some(150.0),
840            }),
841        );
842
843        let schema = Schema::with_type(ValueType::Object)
844            .rule(SchemaRule::Properties(properties))
845            .rule(SchemaRule::Required(vec!["name".to_string()]));
846
847        // Valid case
848        let mut map = IndexMap::new();
849        map.insert(
850            Value::String("name".to_string()),
851            Value::String("Alice".to_string()),
852        );
853        map.insert(Value::String("age".to_string()), Value::Int(30));
854
855        assert!(schema.validate(&Value::Mapping(map), "test").is_ok());
856
857        // Invalid case - missing required property
858        let mut invalid_map = IndexMap::new();
859        invalid_map.insert(Value::String("age".to_string()), Value::Int(30));
860
861        assert!(schema
862            .validate(&Value::Mapping(invalid_map), "test")
863            .is_err());
864    }
865}