Skip to main content

eure_schema/
validate.rs

1//! Document schema validation
2//!
3//! # Architecture
4//!
5//! Validation is built on `DocumentParser` composition:
6//! - `SchemaValidator`: Dispatches to type-specific validators based on `SchemaNodeContent`
7//! - Type validators: Implement `DocumentParser<Output = (), Error = ValidatorError>`
8//! - `ValidationContext`: Manages shared state (errors, warnings, path)
9//!
10//! # Error Handling
11//!
12//! Two categories of errors:
13//! - `ValidationError`: Type mismatches accumulated in `ValidationContext` (non-fatal)
14//! - `ValidatorError`: Internal validator errors causing fail-fast (e.g., undefined references)
15//!
16//! # Hole Values
17//!
18//! The hole value (`!`) represents an unfilled placeholder:
19//! - Type checking: Holes match any schema (always pass)
20//! - Completeness: Documents containing holes are valid but not complete
21
22mod compound;
23mod context;
24mod error;
25mod primitive;
26mod record;
27mod reference;
28mod union;
29
30pub use context::{ValidationContext, ValidationOutput, ValidationState};
31pub use error::{ValidationError, ValidationWarning, ValidatorError};
32
33// Re-export UnionTagMode for convenience
34pub use eure_document::parse::UnionTagMode;
35
36use eure_document::document::node::NodeValue;
37use eure_document::document::{EureDocument, NodeId};
38use eure_document::parse::{DocumentParser, ParseContext};
39
40use crate::{SchemaDocument, SchemaNodeContent, SchemaNodeId, identifiers};
41
42use compound::{ArrayValidator, MapValidator, TupleValidator};
43use primitive::{
44    AnyValidator, BooleanValidator, FloatValidator, IntegerValidator, LiteralValidator,
45    NullValidator, TextValidator,
46};
47use record::RecordValidator;
48use reference::ReferenceValidator;
49use union::UnionValidator;
50
51// =============================================================================
52// Public API
53// =============================================================================
54
55/// Validate a document against a schema.
56///
57/// Uses the default `Eure` union tag mode.
58///
59/// # Example
60///
61/// ```ignore
62/// let output = validate(&document, &schema);
63/// if output.is_valid {
64///     println!("Document is valid!");
65/// } else {
66///     for error in &output.errors {
67///         println!("Error: {}", error);
68///     }
69/// }
70/// ```
71pub fn validate(document: &EureDocument, schema: &SchemaDocument) -> ValidationOutput {
72    validate_with_mode(document, schema, UnionTagMode::default())
73}
74
75/// Validate a document against a schema with the specified union tag mode.
76///
77/// # Arguments
78///
79/// * `document` - The document to validate
80/// * `schema` - The schema to validate against
81/// * `mode` - The union tag mode to use:
82///   - `UnionTagMode::Eure`: Use `$variant` extension or untagged matching (native Eure documents)
83///   - `UnionTagMode::Repr`: Use only `VariantRepr` patterns (JSON/YAML imports)
84pub fn validate_with_mode(
85    document: &EureDocument,
86    schema: &SchemaDocument,
87    mode: UnionTagMode,
88) -> ValidationOutput {
89    let root_id = document.get_root_id();
90    validate_node_with_mode(document, schema, root_id, schema.root, mode)
91}
92
93/// Validate a specific node against a schema node.
94///
95/// Uses the default `Eure` union tag mode.
96pub fn validate_node(
97    document: &EureDocument,
98    schema: &SchemaDocument,
99    node_id: NodeId,
100    schema_id: SchemaNodeId,
101) -> ValidationOutput {
102    validate_node_with_mode(
103        document,
104        schema,
105        node_id,
106        schema_id,
107        UnionTagMode::default(),
108    )
109}
110
111/// Validate a specific node against a schema node with the specified union tag mode.
112pub fn validate_node_with_mode(
113    document: &EureDocument,
114    schema: &SchemaDocument,
115    node_id: NodeId,
116    schema_id: SchemaNodeId,
117    mode: UnionTagMode,
118) -> ValidationOutput {
119    let ctx = ValidationContext::with_mode(document, schema, mode);
120    let parse_ctx = ctx.parse_context(node_id);
121
122    let validator = SchemaValidator {
123        ctx: &ctx,
124        schema_node_id: schema_id,
125    };
126
127    // Errors are accumulated in ctx, result is always Ok unless internal error
128    let _ = parse_ctx.parse_with(validator);
129
130    ctx.finish()
131}
132
133// =============================================================================
134// SchemaValidator (main dispatcher)
135// =============================================================================
136
137/// Main validator that dispatches to type-specific validators.
138///
139/// Implements `DocumentParser` to enable composition with other parsers.
140pub struct SchemaValidator<'a, 'doc> {
141    pub ctx: &'a ValidationContext<'doc>,
142    pub schema_node_id: SchemaNodeId,
143}
144
145impl<'a, 'doc> DocumentParser<'doc> for SchemaValidator<'a, 'doc> {
146    type Output = ();
147    type Error = ValidatorError;
148
149    fn parse(&mut self, parse_ctx: &ParseContext<'doc>) -> Result<(), ValidatorError> {
150        let node = parse_ctx.node();
151
152        if node.get_extension(&identifiers::TYPE).is_some() {
153            // Inline schema validation are performed on other path.
154            return Ok(());
155        }
156
157        // Check for hole - holes match any schema
158        if matches!(&node.content, NodeValue::Hole(_)) {
159            self.ctx.mark_has_holes();
160            return Ok(());
161        }
162
163        let schema_node = self.ctx.schema.node(self.schema_node_id);
164
165        // Create a flattened context so extensions and content validation share AccessedSet
166        let parse_ctx = parse_ctx.flatten();
167
168        // Validate extensions (accesses tracked in flatten context)
169        self.validate_extensions(&parse_ctx)?;
170
171        // Dispatch to type-specific validator
172        match &schema_node.content {
173            SchemaNodeContent::Any => {
174                self.warn_unknown_extensions(&parse_ctx);
175                let mut v = AnyValidator;
176                v.parse(&parse_ctx)
177            }
178            SchemaNodeContent::Text(s) => {
179                self.warn_unknown_extensions(&parse_ctx);
180                let mut v = TextValidator {
181                    ctx: self.ctx,
182                    schema: s,
183                    schema_node_id: self.schema_node_id,
184                };
185                v.parse(&parse_ctx)
186            }
187            SchemaNodeContent::Integer(s) => {
188                self.warn_unknown_extensions(&parse_ctx);
189                let mut v = IntegerValidator {
190                    ctx: self.ctx,
191                    schema: s,
192                    schema_node_id: self.schema_node_id,
193                };
194                v.parse(&parse_ctx)
195            }
196            SchemaNodeContent::Float(s) => {
197                self.warn_unknown_extensions(&parse_ctx);
198                let mut v = FloatValidator {
199                    ctx: self.ctx,
200                    schema: s,
201                    schema_node_id: self.schema_node_id,
202                };
203                v.parse(&parse_ctx)
204            }
205            SchemaNodeContent::Boolean => {
206                self.warn_unknown_extensions(&parse_ctx);
207                let mut v = BooleanValidator {
208                    ctx: self.ctx,
209                    schema_node_id: self.schema_node_id,
210                };
211                v.parse(&parse_ctx)
212            }
213            SchemaNodeContent::Null => {
214                self.warn_unknown_extensions(&parse_ctx);
215                let mut v = NullValidator {
216                    ctx: self.ctx,
217                    schema_node_id: self.schema_node_id,
218                };
219                v.parse(&parse_ctx)
220            }
221            SchemaNodeContent::Literal(expected) => {
222                self.warn_unknown_extensions(&parse_ctx);
223                let mut v = LiteralValidator {
224                    ctx: self.ctx,
225                    expected,
226                    schema_node_id: self.schema_node_id,
227                };
228                v.parse(&parse_ctx)
229            }
230            SchemaNodeContent::Array(s) => {
231                self.warn_unknown_extensions(&parse_ctx);
232                let mut v = ArrayValidator {
233                    ctx: self.ctx,
234                    schema: s,
235                    schema_node_id: self.schema_node_id,
236                };
237                v.parse(&parse_ctx)
238            }
239            SchemaNodeContent::Map(s) => {
240                self.warn_unknown_extensions(&parse_ctx);
241                let mut v = MapValidator {
242                    ctx: self.ctx,
243                    schema: s,
244                    schema_node_id: self.schema_node_id,
245                };
246                v.parse(&parse_ctx)
247            }
248            SchemaNodeContent::Record(s) => {
249                self.warn_unknown_extensions(&parse_ctx);
250                let mut v = RecordValidator {
251                    ctx: self.ctx,
252                    schema: s,
253                    schema_node_id: self.schema_node_id,
254                };
255                v.parse(&parse_ctx)
256            }
257            SchemaNodeContent::Tuple(s) => {
258                self.warn_unknown_extensions(&parse_ctx);
259                let mut v = TupleValidator {
260                    ctx: self.ctx,
261                    schema: s,
262                    schema_node_id: self.schema_node_id,
263                };
264                v.parse(&parse_ctx)
265            }
266            SchemaNodeContent::Union(s) => {
267                self.warn_unknown_extensions(&parse_ctx);
268                let mut v = UnionValidator {
269                    ctx: self.ctx,
270                    schema: s,
271                    schema_node_id: self.schema_node_id,
272                };
273                v.parse(&parse_ctx)
274            }
275            SchemaNodeContent::Reference(r) => {
276                // Reference: recurse with resolved schema using the same flattened context
277                // This ensures extension tracking is shared through Reference indirection
278                let mut child_validator = ReferenceValidator {
279                    ctx: self.ctx,
280                    type_ref: r,
281                    schema_node_id: self.schema_node_id,
282                };
283                child_validator.parse(&parse_ctx)
284            }
285        }
286    }
287}
288
289impl<'a, 'doc> SchemaValidator<'a, 'doc> {
290    /// Validate extensions on the current node.
291    ///
292    /// This validates required and present extensions. Accesses are tracked
293    /// in the flatten context's AccessedSet.
294    fn validate_extensions(&self, parse_ctx: &ParseContext<'doc>) -> Result<(), ValidatorError> {
295        let schema_node = self.ctx.schema.node(self.schema_node_id);
296        let ext_types = &schema_node.ext_types;
297        let node = parse_ctx.node();
298        let node_id = parse_ctx.node_id();
299
300        // Check for missing required extensions
301        for (ext_ident, ext_schema) in ext_types {
302            if !ext_schema.optional && !node.extensions.contains_key(ext_ident) {
303                self.ctx
304                    .record_error(ValidationError::MissingRequiredExtension {
305                        extension: ext_ident.to_string(),
306                        path: self.ctx.path(),
307                        node_id,
308                        schema_node_id: self.schema_node_id,
309                    });
310            }
311        }
312
313        // Validate present extensions - accesses are tracked in the shared flatten context
314        for (ext_ident, ext_schema) in ext_types {
315            if let Some(ext_ctx) = parse_ctx.ext_optional(ext_ident.as_ref()) {
316                self.ctx.push_path_extension(ext_ident.clone());
317
318                let child_validator = SchemaValidator {
319                    ctx: self.ctx,
320                    schema_node_id: ext_schema.schema,
321                };
322                let _ = ext_ctx.parse_with(child_validator);
323
324                self.ctx.pop_path();
325            }
326        }
327
328        Ok(())
329    }
330
331    /// Warn about unknown extensions at terminal types.
332    ///
333    /// Extensions that are:
334    /// - Not accessed (not in schema's ext_types)
335    /// - Not built-in ($variant, $schema, $ext-type, etc.)
336    ///
337    /// Uses the shared AccessedSet from the flatten context to determine
338    /// which extensions have been accessed.
339    fn warn_unknown_extensions(&self, parse_ctx: &ParseContext<'doc>) {
340        for (ext_ident, _) in parse_ctx.unknown_extensions() {
341            // Skip built-in extensions used by the schema system
342            if Self::is_builtin_extension(ext_ident) {
343                continue;
344            }
345            self.ctx
346                .record_warning(ValidationWarning::UnknownExtension {
347                    name: ext_ident.to_string(),
348                    path: self.ctx.path(),
349                });
350        }
351    }
352
353    /// Check if an extension is a built-in schema system extension.
354    ///
355    /// Built-in extensions are always allowed and not warned about:
356    /// - $variant: used by union types
357    /// - $schema: used to specify the schema for a document
358    /// - $ext-type: used to define extension types in schemas
359    /// - $codegen: used for code generation hints
360    /// - $codegen-defaults: used for default codegen settings
361    /// - $flatten: used for record field flattening
362    fn is_builtin_extension(ident: &eure_document::identifier::Identifier) -> bool {
363        // Core schema extensions
364        ident == &identifiers::VARIANT
365            || ident == &identifiers::SCHEMA
366            || ident == &identifiers::EXT_TYPE
367            || ident == &identifiers::TYPE
368            // Codegen extensions
369            || ident.as_ref() == "codegen"
370            || ident.as_ref() == "codegen-defaults"
371            // FIXME: This seems not builtin so must be properly handled.
372            || ident.as_ref() == "flatten"
373    }
374}
375
376// =============================================================================
377// Tests
378// =============================================================================
379
380#[cfg(test)]
381mod tests {
382    use super::*;
383    use crate::{
384        ArraySchema, Bound, IntegerSchema, RecordFieldSchema, RecordSchema, TextSchema,
385        UnionSchema, UnknownFieldsPolicy,
386    };
387    use eure_document::data_model::VariantRepr;
388    use eure_document::text::Text;
389    use eure_document::value::{ObjectKey, PrimitiveValue};
390    use indexmap::{IndexMap, IndexSet};
391    use num_bigint::BigInt;
392
393    fn create_simple_schema(content: SchemaNodeContent) -> (SchemaDocument, SchemaNodeId) {
394        let mut schema = SchemaDocument {
395            nodes: Vec::new(),
396            root: SchemaNodeId(0),
397            types: IndexMap::new(),
398        };
399        let id = schema.create_node(content);
400        schema.root = id;
401        (schema, id)
402    }
403
404    fn create_doc_with_primitive(value: PrimitiveValue) -> EureDocument {
405        let mut doc = EureDocument::new();
406        let root_id = doc.get_root_id();
407        doc.node_mut(root_id).content = NodeValue::Primitive(value);
408        doc
409    }
410
411    #[test]
412    fn test_validate_text_basic() {
413        let (schema, _) = create_simple_schema(SchemaNodeContent::Text(TextSchema::default()));
414        let doc =
415            create_doc_with_primitive(PrimitiveValue::Text(Text::plaintext("hello".to_string())));
416        let result = validate(&doc, &schema);
417        assert!(result.is_valid);
418    }
419
420    #[test]
421    fn test_validate_text_pattern() {
422        let (schema, _) = create_simple_schema(SchemaNodeContent::Text(TextSchema {
423            pattern: Some(regex::Regex::new("^[a-z]+$").unwrap()),
424            ..Default::default()
425        }));
426
427        let doc =
428            create_doc_with_primitive(PrimitiveValue::Text(Text::plaintext("hello".to_string())));
429        let result = validate(&doc, &schema);
430        assert!(result.is_valid);
431
432        let doc = create_doc_with_primitive(PrimitiveValue::Text(Text::plaintext(
433            "Hello123".to_string(),
434        )));
435        let result = validate(&doc, &schema);
436        assert!(!result.is_valid);
437    }
438
439    #[test]
440    fn test_validate_integer() {
441        let (schema, _) = create_simple_schema(SchemaNodeContent::Integer(IntegerSchema {
442            min: Bound::Inclusive(BigInt::from(0)),
443            max: Bound::Inclusive(BigInt::from(100)),
444            multiple_of: None,
445        }));
446
447        let doc = create_doc_with_primitive(PrimitiveValue::Integer(BigInt::from(50)));
448        let result = validate(&doc, &schema);
449        assert!(result.is_valid);
450
451        let doc = create_doc_with_primitive(PrimitiveValue::Integer(BigInt::from(150)));
452        let result = validate(&doc, &schema);
453        assert!(!result.is_valid);
454    }
455
456    #[test]
457    fn test_validate_boolean() {
458        let (schema, _) = create_simple_schema(SchemaNodeContent::Boolean);
459
460        let doc = create_doc_with_primitive(PrimitiveValue::Bool(true));
461        let result = validate(&doc, &schema);
462        assert!(result.is_valid);
463
464        let doc = create_doc_with_primitive(PrimitiveValue::Integer(BigInt::from(1)));
465        let result = validate(&doc, &schema);
466        assert!(!result.is_valid);
467    }
468
469    #[test]
470    fn test_validate_array() {
471        let (mut schema, _) = create_simple_schema(SchemaNodeContent::Any);
472        let item_schema_id =
473            schema.create_node(SchemaNodeContent::Integer(IntegerSchema::default()));
474        schema.node_mut(schema.root).content = SchemaNodeContent::Array(ArraySchema {
475            item: item_schema_id,
476            min_length: Some(1),
477            max_length: Some(3),
478            unique: false,
479            contains: None,
480            binding_style: None,
481        });
482
483        let mut doc = EureDocument::new();
484        let root_id = doc.get_root_id();
485        doc.node_mut(root_id).content = NodeValue::Array(Default::default());
486        let child1 = doc.add_array_element(None, root_id).unwrap().node_id;
487        doc.node_mut(child1).content =
488            NodeValue::Primitive(PrimitiveValue::Integer(BigInt::from(1)));
489        let child2 = doc.add_array_element(None, root_id).unwrap().node_id;
490        doc.node_mut(child2).content =
491            NodeValue::Primitive(PrimitiveValue::Integer(BigInt::from(2)));
492
493        let result = validate(&doc, &schema);
494        assert!(result.is_valid);
495    }
496
497    #[test]
498    fn test_validate_record() {
499        let (mut schema, _) = create_simple_schema(SchemaNodeContent::Any);
500        let name_schema_id = schema.create_node(SchemaNodeContent::Text(TextSchema::default()));
501        let age_schema_id =
502            schema.create_node(SchemaNodeContent::Integer(IntegerSchema::default()));
503
504        let mut properties = IndexMap::new();
505        properties.insert(
506            "name".to_string(),
507            RecordFieldSchema {
508                schema: name_schema_id,
509                optional: false,
510                binding_style: None,
511            },
512        );
513        properties.insert(
514            "age".to_string(),
515            RecordFieldSchema {
516                schema: age_schema_id,
517                optional: true,
518                binding_style: None,
519            },
520        );
521
522        schema.node_mut(schema.root).content = SchemaNodeContent::Record(RecordSchema {
523            properties,
524            flatten: vec![],
525            unknown_fields: UnknownFieldsPolicy::Deny,
526        });
527
528        let mut doc = EureDocument::new();
529        let root_id = doc.get_root_id();
530        let name_id = doc
531            .add_map_child(ObjectKey::String("name".to_string()), root_id)
532            .unwrap()
533            .node_id;
534        doc.node_mut(name_id).content =
535            NodeValue::Primitive(PrimitiveValue::Text(Text::plaintext("Alice".to_string())));
536
537        let result = validate(&doc, &schema);
538        assert!(result.is_valid);
539    }
540
541    #[test]
542    fn test_validate_hole() {
543        let (schema, _) =
544            create_simple_schema(SchemaNodeContent::Integer(IntegerSchema::default()));
545
546        let mut doc = EureDocument::new();
547        let root_id = doc.get_root_id();
548        doc.node_mut(root_id).content = NodeValue::Hole(None);
549
550        let result = validate(&doc, &schema);
551        assert!(result.is_valid);
552        assert!(!result.is_complete);
553    }
554
555    /// Helper to create a literal schema from an EureDocument
556    fn create_literal_schema(
557        schema: &mut SchemaDocument,
558        literal_doc: EureDocument,
559    ) -> SchemaNodeId {
560        schema.create_node(SchemaNodeContent::Literal(literal_doc))
561    }
562
563    #[test]
564    fn test_validate_union_deny_untagged_without_tag() {
565        use eure_document::eure;
566
567        // Create a union with a literal variant that has deny_untagged = true
568        let (mut schema, _) = create_simple_schema(SchemaNodeContent::Any);
569
570        // Create literal schema for "active"
571        let literal_schema_id = create_literal_schema(&mut schema, eure!({ = "active" }));
572
573        // Create union with literal variant that requires explicit tagging
574        let mut variants = IndexMap::new();
575        variants.insert("literal".to_string(), literal_schema_id);
576
577        let mut deny_untagged = IndexSet::new();
578        deny_untagged.insert("literal".to_string());
579
580        schema.node_mut(schema.root).content = SchemaNodeContent::Union(UnionSchema {
581            variants,
582            unambiguous: IndexSet::new(),
583            repr: VariantRepr::Untagged,
584            deny_untagged,
585        });
586
587        // Create document with literal value but NO $variant tag
588        let doc = eure!({ = "active" });
589
590        // Validation should fail with RequiresExplicitVariant error
591        let result = validate(&doc, &schema);
592        assert!(!result.is_valid);
593        assert!(result.errors.iter().any(|e| matches!(
594            e,
595            ValidationError::RequiresExplicitVariant { variant, .. } if variant == "literal"
596        )));
597    }
598
599    #[test]
600    fn test_validate_union_deny_untagged_with_tag() {
601        use eure_document::eure;
602
603        // Create a union with a literal variant that has deny_untagged = true
604        let (mut schema, _) = create_simple_schema(SchemaNodeContent::Any);
605
606        // Create literal schema for "active"
607        let literal_schema_id = create_literal_schema(&mut schema, eure!({ = "active" }));
608
609        // Create union with literal variant that requires explicit tagging
610        let mut variants = IndexMap::new();
611        variants.insert("literal".to_string(), literal_schema_id);
612
613        let mut deny_untagged = IndexSet::new();
614        deny_untagged.insert("literal".to_string());
615
616        schema.node_mut(schema.root).content = SchemaNodeContent::Union(UnionSchema {
617            variants,
618            unambiguous: IndexSet::new(),
619            repr: VariantRepr::Untagged,
620            deny_untagged,
621        });
622
623        // Create document with literal value WITH $variant tag
624        let doc = eure!({
625            = "active"
626            %variant = "literal"
627        });
628
629        // Validation should succeed
630        let result = validate(&doc, &schema);
631        assert!(
632            result.is_valid,
633            "Expected valid, got errors: {:?}",
634            result.errors
635        );
636    }
637
638    #[test]
639    fn test_validate_union_mixed_deny_untagged() {
640        use eure_document::eure;
641
642        // Test that non-deny-untagged variants can still match via untagged
643        let (mut schema, _) = create_simple_schema(SchemaNodeContent::Any);
644
645        // Create literal schema for "active" (deny_untagged)
646        let literal_active_id = create_literal_schema(&mut schema, eure!({ = "active" }));
647
648        // Create text schema (not deny_untagged)
649        let text_schema_id = schema.create_node(SchemaNodeContent::Text(TextSchema::default()));
650
651        // Create union where literal requires explicit tag but text doesn't
652        let mut variants = IndexMap::new();
653        variants.insert("literal".to_string(), literal_active_id);
654        variants.insert("text".to_string(), text_schema_id);
655
656        let mut deny_untagged = IndexSet::new();
657        deny_untagged.insert("literal".to_string());
658
659        schema.node_mut(schema.root).content = SchemaNodeContent::Union(UnionSchema {
660            variants,
661            unambiguous: IndexSet::new(),
662            repr: VariantRepr::Untagged,
663            deny_untagged,
664        });
665
666        // Create document with value "active" but no tag
667        // This should fail because "literal" matches but requires explicit tag
668        let doc = eure!({ = "active" });
669
670        let result = validate(&doc, &schema);
671        assert!(!result.is_valid);
672        assert!(result.errors.iter().any(|e| matches!(
673            e,
674            ValidationError::RequiresExplicitVariant { variant, .. } if variant == "literal"
675        )));
676
677        // Create document with value "other text" - should match text variant via untagged
678        let doc2 = eure!({ = "other text" });
679
680        let result2 = validate(&doc2, &schema);
681        assert!(
682            result2.is_valid,
683            "Expected valid for text match, got errors: {:?}",
684            result2.errors
685        );
686    }
687
688    #[test]
689    fn test_validate_literal_with_inline_code() {
690        use eure_document::eure;
691
692        // Test that Literal comparison works correctly with inline code (Language::Implicit)
693        let mut schema = SchemaDocument::new();
694
695        // Create literal schema using inline code (like meta-schema does)
696        let literal_doc = eure!({ = @code("boolean") });
697
698        schema.node_mut(schema.root).content = SchemaNodeContent::Literal(literal_doc);
699
700        // Create document with inline code "boolean"
701        let doc = eure!({ = @code("boolean") });
702
703        // Validation should succeed
704        let result = validate(&doc, &schema);
705        assert!(
706            result.is_valid,
707            "Expected valid, got errors: {:?}",
708            result.errors
709        );
710    }
711}