eure_schema/
validate.rs

1//! Document schema validation
2//!
3//! # Architecture
4//!
5//! Validation is built on `DocumentParser` composition:
6//! - `SchemaValidator`: Dispatches to type-specific validators based on `SchemaNodeContent`
7//! - Type validators: Implement `DocumentParser<Output = (), Error = ValidatorError>`
8//! - `ValidationContext`: Manages shared state (errors, warnings, path)
9//!
10//! # Error Handling
11//!
12//! Two categories of errors:
13//! - `ValidationError`: Type mismatches accumulated in `ValidationContext` (non-fatal)
14//! - `ValidatorError`: Internal validator errors causing fail-fast (e.g., undefined references)
15//!
16//! # Hole Values
17//!
18//! The hole value (`!`) represents an unfilled placeholder:
19//! - Type checking: Holes match any schema (always pass)
20//! - Completeness: Documents containing holes are valid but not complete
21
22mod compound;
23mod context;
24mod error;
25mod primitive;
26mod record;
27mod reference;
28mod union;
29
30pub use context::{ValidationContext, ValidationOutput, ValidationState};
31pub use error::{ValidationError, ValidationWarning, ValidatorError};
32
33// Re-export UnionTagMode for convenience
34pub use eure_document::parse::UnionTagMode;
35
36use eure_document::document::node::NodeValue;
37use eure_document::document::{EureDocument, NodeId};
38use eure_document::parse::{DocumentParser, ParseContext};
39
40use crate::{SchemaDocument, SchemaNodeContent, SchemaNodeId};
41
42use compound::{ArrayValidator, MapValidator, TupleValidator};
43use primitive::{
44    AnyValidator, BooleanValidator, FloatValidator, IntegerValidator, LiteralValidator,
45    NullValidator, TextValidator,
46};
47use record::RecordValidator;
48use reference::ReferenceValidator;
49use union::UnionValidator;
50
51// =============================================================================
52// Public API
53// =============================================================================
54
55/// Validate a document against a schema.
56///
57/// Uses the default `Eure` union tag mode.
58///
59/// # Example
60///
61/// ```ignore
62/// let output = validate(&document, &schema);
63/// if output.is_valid {
64///     println!("Document is valid!");
65/// } else {
66///     for error in &output.errors {
67///         println!("Error: {}", error);
68///     }
69/// }
70/// ```
71pub fn validate(document: &EureDocument, schema: &SchemaDocument) -> ValidationOutput {
72    validate_with_mode(document, schema, UnionTagMode::default())
73}
74
75/// Validate a document against a schema with the specified union tag mode.
76///
77/// # Arguments
78///
79/// * `document` - The document to validate
80/// * `schema` - The schema to validate against
81/// * `mode` - The union tag mode to use:
82///   - `UnionTagMode::Eure`: Use `$variant` extension or untagged matching (native Eure documents)
83///   - `UnionTagMode::Repr`: Use only `VariantRepr` patterns (JSON/YAML imports)
84pub fn validate_with_mode(
85    document: &EureDocument,
86    schema: &SchemaDocument,
87    mode: UnionTagMode,
88) -> ValidationOutput {
89    let root_id = document.get_root_id();
90    validate_node_with_mode(document, schema, root_id, schema.root, mode)
91}
92
93/// Validate a specific node against a schema node.
94///
95/// Uses the default `Eure` union tag mode.
96pub fn validate_node(
97    document: &EureDocument,
98    schema: &SchemaDocument,
99    node_id: NodeId,
100    schema_id: SchemaNodeId,
101) -> ValidationOutput {
102    validate_node_with_mode(
103        document,
104        schema,
105        node_id,
106        schema_id,
107        UnionTagMode::default(),
108    )
109}
110
111/// Validate a specific node against a schema node with the specified union tag mode.
112pub fn validate_node_with_mode(
113    document: &EureDocument,
114    schema: &SchemaDocument,
115    node_id: NodeId,
116    schema_id: SchemaNodeId,
117    mode: UnionTagMode,
118) -> ValidationOutput {
119    let ctx = ValidationContext::with_mode(document, schema, mode);
120    let parse_ctx = ctx.parse_context(node_id);
121
122    let validator = SchemaValidator {
123        ctx: &ctx,
124        schema_node_id: schema_id,
125    };
126
127    // Errors are accumulated in ctx, result is always Ok unless internal error
128    let _ = parse_ctx.parse_with(validator);
129
130    ctx.finish()
131}
132
133// =============================================================================
134// SchemaValidator (main dispatcher)
135// =============================================================================
136
137/// Main validator that dispatches to type-specific validators.
138///
139/// Implements `DocumentParser` to enable composition with other parsers.
140pub struct SchemaValidator<'a, 'doc> {
141    pub ctx: &'a ValidationContext<'doc>,
142    pub schema_node_id: SchemaNodeId,
143}
144
145impl<'a, 'doc> DocumentParser<'doc> for SchemaValidator<'a, 'doc> {
146    type Output = ();
147    type Error = ValidatorError;
148
149    fn parse(&mut self, parse_ctx: &ParseContext<'doc>) -> Result<(), ValidatorError> {
150        let node = parse_ctx.node();
151
152        // Check for hole - holes match any schema
153        if matches!(&node.content, NodeValue::Hole(_)) {
154            self.ctx.mark_has_holes();
155            return Ok(());
156        }
157
158        let schema_node = self.ctx.schema.node(self.schema_node_id);
159
160        // Create a flattened context so extensions and content validation share AccessedSet
161        let parse_ctx = parse_ctx.flatten();
162
163        // Validate extensions (accesses tracked in flatten context)
164        self.validate_extensions(&parse_ctx)?;
165
166        // Dispatch to type-specific validator
167        match &schema_node.content {
168            SchemaNodeContent::Any => {
169                self.warn_unknown_extensions(&parse_ctx);
170                let mut v = AnyValidator;
171                v.parse(&parse_ctx)
172            }
173            SchemaNodeContent::Text(s) => {
174                self.warn_unknown_extensions(&parse_ctx);
175                let mut v = TextValidator {
176                    ctx: self.ctx,
177                    schema: s,
178                    schema_node_id: self.schema_node_id,
179                };
180                v.parse(&parse_ctx)
181            }
182            SchemaNodeContent::Integer(s) => {
183                self.warn_unknown_extensions(&parse_ctx);
184                let mut v = IntegerValidator {
185                    ctx: self.ctx,
186                    schema: s,
187                    schema_node_id: self.schema_node_id,
188                };
189                v.parse(&parse_ctx)
190            }
191            SchemaNodeContent::Float(s) => {
192                self.warn_unknown_extensions(&parse_ctx);
193                let mut v = FloatValidator {
194                    ctx: self.ctx,
195                    schema: s,
196                    schema_node_id: self.schema_node_id,
197                };
198                v.parse(&parse_ctx)
199            }
200            SchemaNodeContent::Boolean => {
201                self.warn_unknown_extensions(&parse_ctx);
202                let mut v = BooleanValidator {
203                    ctx: self.ctx,
204                    schema_node_id: self.schema_node_id,
205                };
206                v.parse(&parse_ctx)
207            }
208            SchemaNodeContent::Null => {
209                self.warn_unknown_extensions(&parse_ctx);
210                let mut v = NullValidator {
211                    ctx: self.ctx,
212                    schema_node_id: self.schema_node_id,
213                };
214                v.parse(&parse_ctx)
215            }
216            SchemaNodeContent::Literal(expected) => {
217                self.warn_unknown_extensions(&parse_ctx);
218                let mut v = LiteralValidator {
219                    ctx: self.ctx,
220                    expected,
221                    schema_node_id: self.schema_node_id,
222                };
223                v.parse(&parse_ctx)
224            }
225            SchemaNodeContent::Array(s) => {
226                self.warn_unknown_extensions(&parse_ctx);
227                let mut v = ArrayValidator {
228                    ctx: self.ctx,
229                    schema: s,
230                    schema_node_id: self.schema_node_id,
231                };
232                v.parse(&parse_ctx)
233            }
234            SchemaNodeContent::Map(s) => {
235                self.warn_unknown_extensions(&parse_ctx);
236                let mut v = MapValidator {
237                    ctx: self.ctx,
238                    schema: s,
239                    schema_node_id: self.schema_node_id,
240                };
241                v.parse(&parse_ctx)
242            }
243            SchemaNodeContent::Record(s) => {
244                self.warn_unknown_extensions(&parse_ctx);
245                let mut v = RecordValidator {
246                    ctx: self.ctx,
247                    schema: s,
248                    schema_node_id: self.schema_node_id,
249                };
250                v.parse(&parse_ctx)
251            }
252            SchemaNodeContent::Tuple(s) => {
253                self.warn_unknown_extensions(&parse_ctx);
254                let mut v = TupleValidator {
255                    ctx: self.ctx,
256                    schema: s,
257                    schema_node_id: self.schema_node_id,
258                };
259                v.parse(&parse_ctx)
260            }
261            SchemaNodeContent::Union(s) => {
262                self.warn_unknown_extensions(&parse_ctx);
263                let mut v = UnionValidator {
264                    ctx: self.ctx,
265                    schema: s,
266                    schema_node_id: self.schema_node_id,
267                };
268                v.parse(&parse_ctx)
269            }
270            SchemaNodeContent::Reference(r) => {
271                // Reference: recurse with resolved schema using the same flattened context
272                // This ensures extension tracking is shared through Reference indirection
273                let mut child_validator = ReferenceValidator {
274                    ctx: self.ctx,
275                    type_ref: r,
276                    schema_node_id: self.schema_node_id,
277                };
278                child_validator.parse(&parse_ctx)
279            }
280        }
281    }
282}
283
284impl<'a, 'doc> SchemaValidator<'a, 'doc> {
285    /// Validate extensions on the current node.
286    ///
287    /// This validates required and present extensions. Accesses are tracked
288    /// in the flatten context's AccessedSet.
289    fn validate_extensions(&self, parse_ctx: &ParseContext<'doc>) -> Result<(), ValidatorError> {
290        let schema_node = self.ctx.schema.node(self.schema_node_id);
291        let ext_types = &schema_node.ext_types;
292        let node = parse_ctx.node();
293        let node_id = parse_ctx.node_id();
294
295        // Check for missing required extensions
296        for (ext_ident, ext_schema) in ext_types {
297            if !ext_schema.optional && !node.extensions.contains_key(ext_ident) {
298                self.ctx
299                    .record_error(ValidationError::MissingRequiredExtension {
300                        extension: ext_ident.to_string(),
301                        path: self.ctx.path(),
302                        node_id,
303                        schema_node_id: self.schema_node_id,
304                    });
305            }
306        }
307
308        // Validate present extensions - accesses are tracked in the shared flatten context
309        for (ext_ident, ext_schema) in ext_types {
310            if let Some(ext_ctx) = parse_ctx.ext_optional(ext_ident.as_ref()) {
311                self.ctx.push_path_extension(ext_ident.clone());
312
313                let child_validator = SchemaValidator {
314                    ctx: self.ctx,
315                    schema_node_id: ext_schema.schema,
316                };
317                let _ = ext_ctx.parse_with(child_validator);
318
319                self.ctx.pop_path();
320            }
321        }
322
323        Ok(())
324    }
325
326    /// Warn about unknown extensions at terminal types.
327    ///
328    /// Extensions that are:
329    /// - Not accessed (not in schema's ext_types)
330    /// - Not built-in ($variant, $schema, $ext-type, etc.)
331    ///
332    /// Uses the shared AccessedSet from the flatten context to determine
333    /// which extensions have been accessed.
334    fn warn_unknown_extensions(&self, parse_ctx: &ParseContext<'doc>) {
335        for (ext_ident, _) in parse_ctx.unknown_extensions() {
336            // Skip built-in extensions used by the schema system
337            if Self::is_builtin_extension(ext_ident) {
338                continue;
339            }
340            self.ctx
341                .record_warning(ValidationWarning::UnknownExtension {
342                    name: ext_ident.to_string(),
343                    path: self.ctx.path(),
344                });
345        }
346    }
347
348    /// Check if an extension is a built-in schema system extension.
349    ///
350    /// Built-in extensions are always allowed and not warned about:
351    /// - $variant: used by union types
352    /// - $schema: used to specify the schema for a document
353    /// - $ext-type: used to define extension types in schemas
354    /// - $codegen: used for code generation hints
355    /// - $codegen-defaults: used for default codegen settings
356    /// - $flatten: used for record field flattening
357    fn is_builtin_extension(ident: &eure_document::identifier::Identifier) -> bool {
358        use crate::identifiers;
359
360        // Core schema extensions
361        ident == &identifiers::VARIANT
362            || ident == &identifiers::SCHEMA
363            || ident == &identifiers::EXT_TYPE
364            // Codegen extensions
365            || ident.as_ref() == "codegen"
366            || ident.as_ref() == "codegen-defaults"
367            // FIXME: This seems not builtin so must be properly handled.
368            || ident.as_ref() == "flatten"
369    }
370}
371
372// =============================================================================
373// Tests
374// =============================================================================
375
376#[cfg(test)]
377mod tests {
378    use super::*;
379    use crate::{
380        ArraySchema, Bound, IntegerSchema, RecordFieldSchema, RecordSchema, TextSchema,
381        UnionSchema, UnknownFieldsPolicy,
382    };
383    use eure_document::data_model::VariantRepr;
384    use eure_document::text::Text;
385    use eure_document::value::{ObjectKey, PrimitiveValue};
386    use indexmap::{IndexMap, IndexSet};
387    use num_bigint::BigInt;
388
389    fn create_simple_schema(content: SchemaNodeContent) -> (SchemaDocument, SchemaNodeId) {
390        let mut schema = SchemaDocument {
391            nodes: Vec::new(),
392            root: SchemaNodeId(0),
393            types: IndexMap::new(),
394        };
395        let id = schema.create_node(content);
396        schema.root = id;
397        (schema, id)
398    }
399
400    fn create_doc_with_primitive(value: PrimitiveValue) -> EureDocument {
401        let mut doc = EureDocument::new();
402        let root_id = doc.get_root_id();
403        doc.node_mut(root_id).content = NodeValue::Primitive(value);
404        doc
405    }
406
407    #[test]
408    fn test_validate_text_basic() {
409        let (schema, _) = create_simple_schema(SchemaNodeContent::Text(TextSchema::default()));
410        let doc =
411            create_doc_with_primitive(PrimitiveValue::Text(Text::plaintext("hello".to_string())));
412        let result = validate(&doc, &schema);
413        assert!(result.is_valid);
414    }
415
416    #[test]
417    fn test_validate_text_pattern() {
418        let (schema, _) = create_simple_schema(SchemaNodeContent::Text(TextSchema {
419            pattern: Some(regex::Regex::new("^[a-z]+$").unwrap()),
420            ..Default::default()
421        }));
422
423        let doc =
424            create_doc_with_primitive(PrimitiveValue::Text(Text::plaintext("hello".to_string())));
425        let result = validate(&doc, &schema);
426        assert!(result.is_valid);
427
428        let doc = create_doc_with_primitive(PrimitiveValue::Text(Text::plaintext(
429            "Hello123".to_string(),
430        )));
431        let result = validate(&doc, &schema);
432        assert!(!result.is_valid);
433    }
434
435    #[test]
436    fn test_validate_integer() {
437        let (schema, _) = create_simple_schema(SchemaNodeContent::Integer(IntegerSchema {
438            min: Bound::Inclusive(BigInt::from(0)),
439            max: Bound::Inclusive(BigInt::from(100)),
440            multiple_of: None,
441        }));
442
443        let doc = create_doc_with_primitive(PrimitiveValue::Integer(BigInt::from(50)));
444        let result = validate(&doc, &schema);
445        assert!(result.is_valid);
446
447        let doc = create_doc_with_primitive(PrimitiveValue::Integer(BigInt::from(150)));
448        let result = validate(&doc, &schema);
449        assert!(!result.is_valid);
450    }
451
452    #[test]
453    fn test_validate_boolean() {
454        let (schema, _) = create_simple_schema(SchemaNodeContent::Boolean);
455
456        let doc = create_doc_with_primitive(PrimitiveValue::Bool(true));
457        let result = validate(&doc, &schema);
458        assert!(result.is_valid);
459
460        let doc = create_doc_with_primitive(PrimitiveValue::Integer(BigInt::from(1)));
461        let result = validate(&doc, &schema);
462        assert!(!result.is_valid);
463    }
464
465    #[test]
466    fn test_validate_array() {
467        let (mut schema, _) = create_simple_schema(SchemaNodeContent::Any);
468        let item_schema_id =
469            schema.create_node(SchemaNodeContent::Integer(IntegerSchema::default()));
470        schema.node_mut(schema.root).content = SchemaNodeContent::Array(ArraySchema {
471            item: item_schema_id,
472            min_length: Some(1),
473            max_length: Some(3),
474            unique: false,
475            contains: None,
476            binding_style: None,
477        });
478
479        let mut doc = EureDocument::new();
480        let root_id = doc.get_root_id();
481        doc.node_mut(root_id).content = NodeValue::Array(Default::default());
482        let child1 = doc.add_array_element(None, root_id).unwrap().node_id;
483        doc.node_mut(child1).content =
484            NodeValue::Primitive(PrimitiveValue::Integer(BigInt::from(1)));
485        let child2 = doc.add_array_element(None, root_id).unwrap().node_id;
486        doc.node_mut(child2).content =
487            NodeValue::Primitive(PrimitiveValue::Integer(BigInt::from(2)));
488
489        let result = validate(&doc, &schema);
490        assert!(result.is_valid);
491    }
492
493    #[test]
494    fn test_validate_record() {
495        let (mut schema, _) = create_simple_schema(SchemaNodeContent::Any);
496        let name_schema_id = schema.create_node(SchemaNodeContent::Text(TextSchema::default()));
497        let age_schema_id =
498            schema.create_node(SchemaNodeContent::Integer(IntegerSchema::default()));
499
500        let mut properties = IndexMap::new();
501        properties.insert(
502            "name".to_string(),
503            RecordFieldSchema {
504                schema: name_schema_id,
505                optional: false,
506                binding_style: None,
507            },
508        );
509        properties.insert(
510            "age".to_string(),
511            RecordFieldSchema {
512                schema: age_schema_id,
513                optional: true,
514                binding_style: None,
515            },
516        );
517
518        schema.node_mut(schema.root).content = SchemaNodeContent::Record(RecordSchema {
519            properties,
520            unknown_fields: UnknownFieldsPolicy::Deny,
521        });
522
523        let mut doc = EureDocument::new();
524        let root_id = doc.get_root_id();
525        let name_id = doc
526            .add_map_child(ObjectKey::String("name".to_string()), root_id)
527            .unwrap()
528            .node_id;
529        doc.node_mut(name_id).content =
530            NodeValue::Primitive(PrimitiveValue::Text(Text::plaintext("Alice".to_string())));
531
532        let result = validate(&doc, &schema);
533        assert!(result.is_valid);
534    }
535
536    #[test]
537    fn test_validate_hole() {
538        let (schema, _) =
539            create_simple_schema(SchemaNodeContent::Integer(IntegerSchema::default()));
540
541        let mut doc = EureDocument::new();
542        let root_id = doc.get_root_id();
543        doc.node_mut(root_id).content = NodeValue::Hole(None);
544
545        let result = validate(&doc, &schema);
546        assert!(result.is_valid);
547        assert!(!result.is_complete);
548    }
549
550    /// Helper to create a literal schema from an EureDocument
551    fn create_literal_schema(
552        schema: &mut SchemaDocument,
553        literal_doc: EureDocument,
554    ) -> SchemaNodeId {
555        schema.create_node(SchemaNodeContent::Literal(literal_doc))
556    }
557
558    #[test]
559    fn test_validate_union_deny_untagged_without_tag() {
560        use eure_document::eure;
561
562        // Create a union with a literal variant that has deny_untagged = true
563        let (mut schema, _) = create_simple_schema(SchemaNodeContent::Any);
564
565        // Create literal schema for "active"
566        let literal_schema_id = create_literal_schema(&mut schema, eure!({ = "active" }));
567
568        // Create union with literal variant that requires explicit tagging
569        let mut variants = IndexMap::new();
570        variants.insert("literal".to_string(), literal_schema_id);
571
572        let mut deny_untagged = IndexSet::new();
573        deny_untagged.insert("literal".to_string());
574
575        schema.node_mut(schema.root).content = SchemaNodeContent::Union(UnionSchema {
576            variants,
577            unambiguous: IndexSet::new(),
578            repr: VariantRepr::Untagged,
579            deny_untagged,
580        });
581
582        // Create document with literal value but NO $variant tag
583        let doc = eure!({ = "active" });
584
585        // Validation should fail with RequiresExplicitVariant error
586        let result = validate(&doc, &schema);
587        assert!(!result.is_valid);
588        assert!(result.errors.iter().any(|e| matches!(
589            e,
590            ValidationError::RequiresExplicitVariant { variant, .. } if variant == "literal"
591        )));
592    }
593
594    #[test]
595    fn test_validate_union_deny_untagged_with_tag() {
596        use eure_document::eure;
597
598        // Create a union with a literal variant that has deny_untagged = true
599        let (mut schema, _) = create_simple_schema(SchemaNodeContent::Any);
600
601        // Create literal schema for "active"
602        let literal_schema_id = create_literal_schema(&mut schema, eure!({ = "active" }));
603
604        // Create union with literal variant that requires explicit tagging
605        let mut variants = IndexMap::new();
606        variants.insert("literal".to_string(), literal_schema_id);
607
608        let mut deny_untagged = IndexSet::new();
609        deny_untagged.insert("literal".to_string());
610
611        schema.node_mut(schema.root).content = SchemaNodeContent::Union(UnionSchema {
612            variants,
613            unambiguous: IndexSet::new(),
614            repr: VariantRepr::Untagged,
615            deny_untagged,
616        });
617
618        // Create document with literal value WITH $variant tag
619        let doc = eure!({
620            = "active"
621            %variant = "literal"
622        });
623
624        // Validation should succeed
625        let result = validate(&doc, &schema);
626        assert!(
627            result.is_valid,
628            "Expected valid, got errors: {:?}",
629            result.errors
630        );
631    }
632
633    #[test]
634    fn test_validate_union_mixed_deny_untagged() {
635        use eure_document::eure;
636
637        // Test that non-deny-untagged variants can still match via untagged
638        let (mut schema, _) = create_simple_schema(SchemaNodeContent::Any);
639
640        // Create literal schema for "active" (deny_untagged)
641        let literal_active_id = create_literal_schema(&mut schema, eure!({ = "active" }));
642
643        // Create text schema (not deny_untagged)
644        let text_schema_id = schema.create_node(SchemaNodeContent::Text(TextSchema::default()));
645
646        // Create union where literal requires explicit tag but text doesn't
647        let mut variants = IndexMap::new();
648        variants.insert("literal".to_string(), literal_active_id);
649        variants.insert("text".to_string(), text_schema_id);
650
651        let mut deny_untagged = IndexSet::new();
652        deny_untagged.insert("literal".to_string());
653
654        schema.node_mut(schema.root).content = SchemaNodeContent::Union(UnionSchema {
655            variants,
656            unambiguous: IndexSet::new(),
657            repr: VariantRepr::Untagged,
658            deny_untagged,
659        });
660
661        // Create document with value "active" but no tag
662        // This should fail because "literal" matches but requires explicit tag
663        let doc = eure!({ = "active" });
664
665        let result = validate(&doc, &schema);
666        assert!(!result.is_valid);
667        assert!(result.errors.iter().any(|e| matches!(
668            e,
669            ValidationError::RequiresExplicitVariant { variant, .. } if variant == "literal"
670        )));
671
672        // Create document with value "other text" - should match text variant via untagged
673        let doc2 = eure!({ = "other text" });
674
675        let result2 = validate(&doc2, &schema);
676        assert!(
677            result2.is_valid,
678            "Expected valid for text match, got errors: {:?}",
679            result2.errors
680        );
681    }
682
683    #[test]
684    fn test_validate_literal_with_inline_code() {
685        use eure_document::eure;
686
687        // Test that Literal comparison works correctly with inline code (Language::Implicit)
688        let mut schema = SchemaDocument::new();
689
690        // Create literal schema using inline code (like meta-schema does)
691        let literal_doc = eure!({ = @code("boolean") });
692
693        schema.node_mut(schema.root).content = SchemaNodeContent::Literal(literal_doc);
694
695        // Create document with inline code "boolean"
696        let doc = eure!({ = @code("boolean") });
697
698        // Validation should succeed
699        let result = validate(&doc, &schema);
700        assert!(
701            result.is_valid,
702            "Expected valid, got errors: {:?}",
703            result.errors
704        );
705    }
706}