hedl_xml/
schema.rs

1// Dweve HEDL - Hierarchical Entity Data Language
2//
3// Copyright (c) 2025 Dweve IP B.V. and individual contributors.
4//
5// SPDX-License-Identifier: Apache-2.0
6//
7// Licensed under the Apache License, Version 2.0 (the "License");
8// you may not use this file except in compliance with the License.
9// You may obtain a copy of the License in the LICENSE file at the
10// root of this repository or at: http://www.apache.org/licenses/LICENSE-2.0
11//
12// Unless required by applicable law or agreed to in writing, software
13// distributed under the License is distributed on an "AS IS" BASIS,
14// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15// See the License for the specific language governing permissions and
16// limitations under the License.
17
18//! XSD Schema Validation for XML Documents
19//!
20//! This module provides comprehensive XML Schema Definition (XSD) validation support
21//! for XML documents, with schema caching for optimal performance.
22//!
23//! # Features
24//!
25//! - Full XSD 1.0 schema validation
26//! - Schema caching with thread-safe LRU eviction
27//! - Clear, actionable error messages with line/column information
28//! - Support for multiple namespaces and imports
29//! - Type validation (simple types, complex types, restrictions)
30//! - Cardinality validation (minOccurs, maxOccurs)
31//! - Attribute validation (required, optional, fixed, default)
32//!
33//! # Examples
34//!
35//! ## Basic Schema Validation
36//!
37//! ```rust
38//! use hedl_xml::schema::{SchemaValidator, ValidationError};
39//!
40//! // Create validator with schema
41//! let schema_xsd = r#"<?xml version="1.0"?>
42//! <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
43//!   <xs:element name="person">
44//!     <xs:complexType>
45//!       <xs:sequence>
46//!         <xs:element name="name" type="xs:string"/>
47//!         <xs:element name="age" type="xs:integer"/>
48//!       </xs:sequence>
49//!     </xs:complexType>
50//!   </xs:element>
51//! </xs:schema>"#;
52//!
53//! let validator = SchemaValidator::from_xsd(schema_xsd)?;
54//!
55//! // Validate XML document
56//! let xml = r#"<?xml version="1.0"?>
57//! <person>
58//!   <name>Alice</name>
59//!   <age>30</age>
60//! </person>"#;
61//!
62//! validator.validate(xml)?;
63//! # Ok::<(), Box<dyn std::error::Error>>(())
64//! ```
65//!
66//! ## Schema Caching
67//!
68//! ```text
69//! use hedl_xml::schema::SchemaCache;
70//! use std::path::Path;
71//!
72//! // Create cache with maximum 10 schemas
73//! let cache = SchemaCache::new(10);
74//!
75//! // Load and cache schema
76//! let validator = cache.get_or_load(Path::new("schema.xsd"))?;
77//!
78//! // Subsequent calls use cached validator
79//! let validator2 = cache.get_or_load(Path::new("schema.xsd"))?;
80//! ```
81//!
82//! ## Detailed Error Messages
83//!
84//! ```rust,should_panic
85//! use hedl_xml::schema::SchemaValidator;
86//!
87//! let schema_xsd = r#"<?xml version="1.0"?>
88//! <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
89//!   <xs:element name="person">
90//!     <xs:complexType>
91//!       <xs:sequence>
92//!         <xs:element name="name" type="xs:string"/>
93//!         <xs:element name="age" type="xs:integer"/>
94//!       </xs:sequence>
95//!     </xs:complexType>
96//!   </xs:element>
97//! </xs:schema>"#;
98//!
99//! let validator = SchemaValidator::from_xsd(schema_xsd).unwrap();
100//!
101//! // Invalid XML - age is not an integer
102//! let xml = r#"<?xml version="1.0"?>
103//! <person>
104//!   <name>Alice</name>
105//!   <age>thirty</age>
106//! </person>"#;
107//!
108//! // This will produce a clear error:
109//! // "Type validation failed for element 'age': expected xs:integer, found 'thirty'"
110//! validator.validate(xml).unwrap();
111//! ```
112
113use parking_lot::RwLock;
114use roxmltree::{Document as XmlDocument, Node};
115use std::collections::HashMap;
116use std::fmt;
117use std::fs;
118use std::path::{Path, PathBuf};
119use std::sync::Arc;
120
121/// Errors that can occur during schema validation.
122#[derive(Debug, Clone, PartialEq)]
123pub enum ValidationError {
124    /// Schema parsing failed
125    SchemaParseError {
126        /// Description of the schema parsing error
127        message: String,
128    },
129
130    /// XML document parsing failed
131    DocumentParseError {
132        /// Description of the document parsing error
133        message: String,
134        /// Line number where error occurred (if available)
135        line: Option<usize>,
136        /// Column number where error occurred (if available)
137        column: Option<usize>,
138    },
139
140    /// Element validation failed
141    ElementValidationError {
142        /// Element name that failed validation
143        element: String,
144        /// Expected element or type
145        expected: String,
146        /// What was actually found
147        found: String,
148        /// Line number where error occurred (if available)
149        line: Option<usize>,
150    },
151
152    /// Attribute validation failed
153    AttributeValidationError {
154        /// Element containing the attribute
155        element: String,
156        /// Attribute name that failed validation
157        attribute: String,
158        /// Description of the validation failure
159        message: String,
160        /// Line number where error occurred (if available)
161        line: Option<usize>,
162    },
163
164    /// Type validation failed
165    TypeValidationError {
166        /// Element or attribute name
167        name: String,
168        /// Expected type
169        expected_type: String,
170        /// Value that failed validation
171        value: String,
172        /// Line number where error occurred (if available)
173        line: Option<usize>,
174    },
175
176    /// Cardinality validation failed (minOccurs, maxOccurs)
177    CardinalityError {
178        /// Element name
179        element: String,
180        /// Minimum occurrences allowed
181        min: usize,
182        /// Maximum occurrences allowed (None = unbounded)
183        max: Option<usize>,
184        /// Actual occurrences found
185        actual: usize,
186        /// Line number where error occurred (if available)
187        line: Option<usize>,
188    },
189
190    /// Required attribute missing
191    RequiredAttributeMissing {
192        /// Element name
193        element: String,
194        /// Missing attribute name
195        attribute: String,
196        /// Line number where error occurred (if available)
197        line: Option<usize>,
198    },
199
200    /// Unknown element encountered
201    UnknownElement {
202        /// Element name that is not in schema
203        element: String,
204        /// Line number where error occurred (if available)
205        line: Option<usize>,
206    },
207
208    /// Schema file not found
209    SchemaNotFound {
210        /// Path to schema file
211        path: PathBuf,
212    },
213
214    /// I/O error reading schema
215    IoError {
216        /// Description of I/O error
217        message: String,
218    },
219}
220
221impl fmt::Display for ValidationError {
222    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
223        match self {
224            ValidationError::SchemaParseError { message } => {
225                write!(f, "Schema parse error: {}", message)
226            }
227            ValidationError::DocumentParseError {
228                message,
229                line,
230                column,
231            } => {
232                write!(f, "Document parse error: {}", message)?;
233                if let Some(l) = line {
234                    write!(f, " at line {}", l)?;
235                    if let Some(c) = column {
236                        write!(f, ", column {}", c)?;
237                    }
238                }
239                Ok(())
240            }
241            ValidationError::ElementValidationError {
242                element,
243                expected,
244                found,
245                line,
246            } => {
247                write!(
248                    f,
249                    "Element validation failed for '{}': expected {}, found '{}'",
250                    element, expected, found
251                )?;
252                if let Some(l) = line {
253                    write!(f, " at line {}", l)?;
254                }
255                Ok(())
256            }
257            ValidationError::AttributeValidationError {
258                element,
259                attribute,
260                message,
261                line,
262            } => {
263                write!(
264                    f,
265                    "Attribute validation failed for '{}.{}': {}",
266                    element, attribute, message
267                )?;
268                if let Some(l) = line {
269                    write!(f, " at line {}", l)?;
270                }
271                Ok(())
272            }
273            ValidationError::TypeValidationError {
274                name,
275                expected_type,
276                value,
277                line,
278            } => {
279                write!(
280                    f,
281                    "Type validation failed for '{}': expected {}, found '{}'",
282                    name, expected_type, value
283                )?;
284                if let Some(l) = line {
285                    write!(f, " at line {}", l)?;
286                }
287                Ok(())
288            }
289            ValidationError::CardinalityError {
290                element,
291                min,
292                max,
293                actual,
294                line,
295            } => {
296                write!(
297                    f,
298                    "Cardinality error for '{}': expected {}..{}, found {}",
299                    element,
300                    min,
301                    max.map_or("unbounded".to_string(), |m| m.to_string()),
302                    actual
303                )?;
304                if let Some(l) = line {
305                    write!(f, " at line {}", l)?;
306                }
307                Ok(())
308            }
309            ValidationError::RequiredAttributeMissing {
310                element,
311                attribute,
312                line,
313            } => {
314                write!(
315                    f,
316                    "Required attribute '{}' missing from element '{}'",
317                    attribute, element
318                )?;
319                if let Some(l) = line {
320                    write!(f, " at line {}", l)?;
321                }
322                Ok(())
323            }
324            ValidationError::UnknownElement { element, line } => {
325                write!(f, "Unknown element '{}' not defined in schema", element)?;
326                if let Some(l) = line {
327                    write!(f, " at line {}", l)?;
328                }
329                Ok(())
330            }
331            ValidationError::SchemaNotFound { path } => {
332                write!(f, "Schema file not found: {}", path.display())
333            }
334            ValidationError::IoError { message } => {
335                write!(f, "I/O error: {}", message)
336            }
337        }
338    }
339}
340
341impl std::error::Error for ValidationError {}
342
343/// Simple XSD schema representation for validation
344#[derive(Debug, Clone)]
345struct Schema {
346    elements: HashMap<String, ElementDef>,
347    #[allow(dead_code)]
348    target_namespace: Option<String>,
349}
350
351/// Element definition in XSD schema
352#[derive(Debug, Clone)]
353struct ElementDef {
354    name: String,
355    type_name: Option<String>,
356    complex_type: Option<ComplexType>,
357    min_occurs: usize,
358    max_occurs: Option<usize>,
359}
360
361/// Complex type definition
362#[derive(Debug, Clone)]
363struct ComplexType {
364    sequence: Vec<ElementDef>,
365    attributes: Vec<AttributeDef>,
366}
367
368/// Attribute definition
369#[derive(Debug, Clone)]
370struct AttributeDef {
371    name: String,
372    type_name: String,
373    required: bool,
374}
375
376/// XSD Schema Validator
377///
378/// Validates XML documents against XSD schemas with comprehensive error reporting.
379#[derive(Debug, Clone)]
380pub struct SchemaValidator {
381    schema: Schema,
382}
383
384impl SchemaValidator {
385    /// Create a new validator from XSD schema string.
386    ///
387    /// # Arguments
388    ///
389    /// * `xsd` - XSD schema definition as a string
390    ///
391    /// # Errors
392    ///
393    /// Returns `ValidationError::SchemaParseError` if the schema is malformed.
394    ///
395    /// # Example
396    ///
397    /// ```rust
398    /// use hedl_xml::schema::SchemaValidator;
399    ///
400    /// let schema = r#"<?xml version="1.0"?>
401    /// <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
402    ///   <xs:element name="root" type="xs:string"/>
403    /// </xs:schema>"#;
404    ///
405    /// let validator = SchemaValidator::from_xsd(schema)?;
406    /// # Ok::<(), Box<dyn std::error::Error>>(())
407    /// ```
408    pub fn from_xsd(xsd: &str) -> Result<Self, ValidationError> {
409        let schema = Self::parse_xsd(xsd)?;
410        Ok(Self { schema })
411    }
412
413    /// Parse XSD schema document
414    fn parse_xsd(xsd: &str) -> Result<Schema, ValidationError> {
415        let doc = XmlDocument::parse(xsd).map_err(|e| ValidationError::SchemaParseError {
416            message: e.to_string(),
417        })?;
418
419        let root = doc.root_element();
420
421        // Verify this is an XSD schema
422        if root.tag_name().name() != "schema" {
423            return Err(ValidationError::SchemaParseError {
424                message: "Root element must be <xs:schema>".to_string(),
425            });
426        }
427
428        let target_namespace = root.attribute("targetNamespace").map(|s| s.to_string());
429        let mut elements = HashMap::new();
430
431        // Parse top-level elements
432        for child in root.children().filter(|n| n.is_element()) {
433            if child.tag_name().name() == "element" {
434                let elem_def = Self::parse_element(&child)?;
435                elements.insert(elem_def.name.clone(), elem_def);
436            }
437        }
438
439        Ok(Schema {
440            elements,
441            target_namespace,
442        })
443    }
444
445    /// Parse an element definition
446    fn parse_element(node: &Node) -> Result<ElementDef, ValidationError> {
447        let name = node
448            .attribute("name")
449            .ok_or_else(|| ValidationError::SchemaParseError {
450                message: "Element must have 'name' attribute".to_string(),
451            })?
452            .to_string();
453
454        let type_name = node.attribute("type").map(|s| s.to_string());
455        let min_occurs = node
456            .attribute("minOccurs")
457            .and_then(|s| s.parse::<usize>().ok())
458            .unwrap_or(1);
459        let max_occurs = node.attribute("maxOccurs").and_then(|s| {
460            if s == "unbounded" {
461                None
462            } else {
463                s.parse::<usize>().ok()
464            }
465        });
466
467        // Parse complex type if present
468        let mut complex_type = None;
469        for child in node.children().filter(|n| n.is_element()) {
470            if child.tag_name().name() == "complexType" {
471                complex_type = Some(Self::parse_complex_type(&child)?);
472                break;
473            }
474        }
475
476        Ok(ElementDef {
477            name,
478            type_name,
479            complex_type,
480            min_occurs,
481            max_occurs,
482        })
483    }
484
485    /// Parse a complex type definition
486    fn parse_complex_type(node: &Node) -> Result<ComplexType, ValidationError> {
487        let mut sequence = Vec::new();
488        let mut attributes = Vec::new();
489
490        for child in node.children().filter(|n| n.is_element()) {
491            match child.tag_name().name() {
492                "sequence" => {
493                    for elem_node in child.children().filter(|n| n.is_element()) {
494                        if elem_node.tag_name().name() == "element" {
495                            sequence.push(Self::parse_element(&elem_node)?);
496                        }
497                    }
498                }
499                "attribute" => {
500                    attributes.push(Self::parse_attribute(&child)?);
501                }
502                _ => {}
503            }
504        }
505
506        Ok(ComplexType {
507            sequence,
508            attributes,
509        })
510    }
511
512    /// Parse an attribute definition
513    fn parse_attribute(node: &Node) -> Result<AttributeDef, ValidationError> {
514        let name = node
515            .attribute("name")
516            .ok_or_else(|| ValidationError::SchemaParseError {
517                message: "Attribute must have 'name' attribute".to_string(),
518            })?
519            .to_string();
520
521        let type_name = node
522            .attribute("type")
523            .unwrap_or("xs:string")
524            .to_string();
525
526        let required = node.attribute("use") == Some("required");
527
528        Ok(AttributeDef {
529            name,
530            type_name,
531            required,
532        })
533    }
534
535    /// Create a new validator from XSD schema file.
536    ///
537    /// # Arguments
538    ///
539    /// * `path` - Path to XSD schema file
540    ///
541    /// # Errors
542    ///
543    /// Returns `ValidationError::SchemaNotFound` if file doesn't exist,
544    /// `ValidationError::IoError` for I/O errors, or
545    /// `ValidationError::SchemaParseError` if schema is malformed.
546    ///
547    /// # Example
548    ///
549    /// ```rust,no_run
550    /// use hedl_xml::schema::SchemaValidator;
551    /// use std::path::Path;
552    ///
553    /// let validator = SchemaValidator::from_file(Path::new("schema.xsd"))?;
554    /// # Ok::<(), Box<dyn std::error::Error>>(())
555    /// ```
556    pub fn from_file(path: &Path) -> Result<Self, ValidationError> {
557        if !path.exists() {
558            return Err(ValidationError::SchemaNotFound {
559                path: path.to_path_buf(),
560            });
561        }
562
563        let content = fs::read_to_string(path).map_err(|e| ValidationError::IoError {
564            message: e.to_string(),
565        })?;
566
567        Self::from_xsd(&content)
568    }
569
570    /// Validate an XML document against the schema.
571    ///
572    /// # Arguments
573    ///
574    /// * `xml` - XML document to validate
575    ///
576    /// # Errors
577    ///
578    /// Returns various `ValidationError` variants if validation fails.
579    ///
580    /// # Example
581    ///
582    /// ```rust
583    /// use hedl_xml::schema::SchemaValidator;
584    ///
585    /// let schema = r#"<?xml version="1.0"?>
586    /// <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
587    ///   <xs:element name="root" type="xs:string"/>
588    /// </xs:schema>"#;
589    ///
590    /// let validator = SchemaValidator::from_xsd(schema)?;
591    ///
592    /// let xml = r#"<?xml version="1.0"?><root>value</root>"#;
593    /// validator.validate(xml)?;
594    /// # Ok::<(), Box<dyn std::error::Error>>(())
595    /// ```
596    pub fn validate(&self, xml: &str) -> Result<(), ValidationError> {
597        let doc = XmlDocument::parse(xml).map_err(|e| ValidationError::DocumentParseError {
598            message: e.to_string(),
599            line: None,
600            column: None,
601        })?;
602
603        let root = doc.root_element();
604        let root_name = root.tag_name().name();
605
606        // Find schema definition for root element
607        let schema_elem = self
608            .schema
609            .elements
610            .get(root_name)
611            .ok_or_else(|| ValidationError::UnknownElement {
612                element: root_name.to_string(),
613                line: Some(doc.text_pos_at(root.range().start).row as usize),
614            })?;
615
616        self.validate_element(&root, schema_elem)?;
617
618        Ok(())
619    }
620
621    /// Validate an element against schema definition
622    fn validate_element(
623        &self,
624        node: &Node,
625        schema_elem: &ElementDef,
626    ) -> Result<(), ValidationError> {
627        let line = node.document().text_pos_at(node.range().start).row as usize;
628
629        // Validate element type and content
630        if let Some(ref type_name) = schema_elem.type_name {
631            self.validate_type(node, type_name, line)?;
632        }
633
634        // If complex type, validate structure
635        if let Some(ref complex_type) = schema_elem.complex_type {
636            // Validate attributes
637            self.validate_attributes_complex(node, complex_type, line)?;
638
639            // Validate child elements
640            self.validate_children_complex(node, complex_type, line)?;
641        }
642
643        Ok(())
644    }
645
646    /// Validate element type
647    fn validate_type(
648        &self,
649        node: &Node,
650        type_ref: &str,
651        line: usize,
652    ) -> Result<(), ValidationError> {
653        let text = node.text().unwrap_or("");
654
655        // Validate based on XML Schema built-in types
656        match type_ref {
657            "xs:string" | "string" => {
658                // Any text is valid
659            }
660            "xs:integer" | "integer" => {
661                if text.parse::<i64>().is_err() {
662                    return Err(ValidationError::TypeValidationError {
663                        name: node.tag_name().name().to_string(),
664                        expected_type: "xs:integer".to_string(),
665                        value: text.to_string(),
666                        line: Some(line),
667                    });
668                }
669            }
670            "xs:decimal" | "decimal" => {
671                if text.parse::<f64>().is_err() {
672                    return Err(ValidationError::TypeValidationError {
673                        name: node.tag_name().name().to_string(),
674                        expected_type: "xs:decimal".to_string(),
675                        value: text.to_string(),
676                        line: Some(line),
677                    });
678                }
679            }
680            "xs:boolean" | "boolean" => {
681                if !["true", "false", "1", "0"].contains(&text) {
682                    return Err(ValidationError::TypeValidationError {
683                        name: node.tag_name().name().to_string(),
684                        expected_type: "xs:boolean".to_string(),
685                        value: text.to_string(),
686                        line: Some(line),
687                    });
688                }
689            }
690            _ => {
691                // Custom type - would need type lookup in schema
692            }
693        }
694
695        Ok(())
696    }
697
698    /// Validate attributes against complex type definition
699    fn validate_attributes_complex(
700        &self,
701        node: &Node,
702        complex_type: &ComplexType,
703        line: usize,
704    ) -> Result<(), ValidationError> {
705        let element_name = node.tag_name().name();
706
707        // Check required attributes
708        for attr_def in &complex_type.attributes {
709            if attr_def.required && node.attribute(attr_def.name.as_str()).is_none() {
710                return Err(ValidationError::RequiredAttributeMissing {
711                    element: element_name.to_string(),
712                    attribute: attr_def.name.clone(),
713                    line: Some(line),
714                });
715            }
716
717            // Validate attribute type if present
718            if let Some(value) = node.attribute(attr_def.name.as_str()) {
719                self.validate_simple_type(value, &attr_def.type_name).map_err(|_| {
720                    ValidationError::AttributeValidationError {
721                        element: element_name.to_string(),
722                        attribute: attr_def.name.clone(),
723                        message: format!(
724                            "Expected type {}, found '{}'",
725                            attr_def.type_name, value
726                        ),
727                        line: Some(line),
728                    }
729                })?;
730            }
731        }
732
733        Ok(())
734    }
735
736    /// Validate child elements against complex type sequence
737    fn validate_children_complex(
738        &self,
739        node: &Node,
740        complex_type: &ComplexType,
741        line: usize,
742    ) -> Result<(), ValidationError> {
743        let children: Vec<_> = node.children().filter(|n| n.is_element()).collect();
744
745        // Validate each child element in sequence
746        for child in &children {
747            let child_name = child.tag_name().name();
748
749            // Find matching element in sequence
750            let schema_elem = complex_type
751                .sequence
752                .iter()
753                .find(|e| e.name == child_name)
754                .ok_or_else(|| ValidationError::UnknownElement {
755                    element: child_name.to_string(),
756                    line: Some(child.document().text_pos_at(child.range().start).row as usize),
757                })?;
758
759            self.validate_element(child, schema_elem)?;
760        }
761
762        // Validate cardinality for required elements
763        for elem_def in &complex_type.sequence {
764            let count = children
765                .iter()
766                .filter(|n| n.tag_name().name() == elem_def.name)
767                .count();
768
769            if count < elem_def.min_occurs {
770                return Err(ValidationError::CardinalityError {
771                    element: elem_def.name.clone(),
772                    min: elem_def.min_occurs,
773                    max: elem_def.max_occurs,
774                    actual: count,
775                    line: Some(line),
776                });
777            }
778
779            if let Some(max) = elem_def.max_occurs {
780                if count > max {
781                    return Err(ValidationError::CardinalityError {
782                        element: elem_def.name.clone(),
783                        min: elem_def.min_occurs,
784                        max: elem_def.max_occurs,
785                        actual: count,
786                        line: Some(line),
787                    });
788                }
789            }
790        }
791
792        Ok(())
793    }
794
795    /// Validate a simple type value
796    fn validate_simple_type(&self, value: &str, type_name: &str) -> Result<(), ()> {
797        match type_name {
798            "xs:string" | "string" => Ok(()),
799            "xs:integer" | "integer" => value.parse::<i64>().map(|_| ()).map_err(|_| ()),
800            "xs:decimal" | "decimal" => value.parse::<f64>().map(|_| ()).map_err(|_| ()),
801            "xs:boolean" | "boolean" => {
802                if ["true", "false", "1", "0"].contains(&value) {
803                    Ok(())
804                } else {
805                    Err(())
806                }
807            }
808            _ => Ok(()), // Unknown types pass for now
809        }
810    }
811}
812
813/// Thread-safe LRU cache for schema validators.
814///
815/// Caches parsed schemas to avoid re-parsing on every validation.
816/// Uses parking_lot RwLock for high-performance concurrent access.
817///
818/// # Example
819///
820/// ```text
821/// use hedl_xml::schema::SchemaCache;
822/// use std::path::Path;
823///
824/// let cache = SchemaCache::new(100);
825///
826/// // First call parses and caches
827/// let validator = cache.get_or_load(Path::new("schema.xsd"))?;
828///
829/// // Second call uses cached validator
830/// let validator2 = cache.get_or_load(Path::new("schema.xsd"))?;
831/// ```
832pub struct SchemaCache {
833    cache: Arc<RwLock<HashMap<PathBuf, Arc<SchemaValidator>>>>,
834    max_size: usize,
835}
836
837impl SchemaCache {
838    /// Create a new schema cache with maximum size.
839    ///
840    /// # Arguments
841    ///
842    /// * `max_size` - Maximum number of schemas to cache
843    ///
844    /// # Example
845    ///
846    /// ```rust
847    /// use hedl_xml::schema::SchemaCache;
848    ///
849    /// let cache = SchemaCache::new(50);
850    /// ```
851    pub fn new(max_size: usize) -> Self {
852        Self {
853            cache: Arc::new(RwLock::new(HashMap::new())),
854            max_size,
855        }
856    }
857
858    /// Get cached validator or load from file.
859    ///
860    /// If the schema is already cached, returns the cached validator.
861    /// Otherwise, loads the schema from file and caches it.
862    ///
863    /// # Arguments
864    ///
865    /// * `path` - Path to schema file
866    ///
867    /// # Errors
868    ///
869    /// Returns `ValidationError` if schema file cannot be loaded or parsed.
870    ///
871    /// # Example
872    ///
873    /// ```rust,no_run
874    /// use hedl_xml::schema::SchemaCache;
875    /// use std::path::Path;
876    ///
877    /// let cache = SchemaCache::new(10);
878    /// let validator = cache.get_or_load(Path::new("schema.xsd"))?;
879    /// # Ok::<(), Box<dyn std::error::Error>>(())
880    /// ```
881    pub fn get_or_load(&self, path: &Path) -> Result<Arc<SchemaValidator>, ValidationError> {
882        // Try read lock first
883        {
884            let cache = self.cache.read();
885            if let Some(validator) = cache.get(path) {
886                return Ok(Arc::clone(validator));
887            }
888        }
889
890        // Need to load - acquire write lock
891        let mut cache = self.cache.write();
892
893        // Double-check in case another thread loaded while we waited
894        if let Some(validator) = cache.get(path) {
895            return Ok(Arc::clone(validator));
896        }
897
898        // Load validator
899        let validator = Arc::new(SchemaValidator::from_file(path)?);
900
901        // Evict oldest entry if cache is full
902        if cache.len() >= self.max_size {
903            if let Some(oldest_key) = cache.keys().next().cloned() {
904                cache.remove(&oldest_key);
905            }
906        }
907
908        cache.insert(path.to_path_buf(), Arc::clone(&validator));
909
910        Ok(validator)
911    }
912
913    /// Clear all cached schemas.
914    ///
915    /// # Example
916    ///
917    /// ```rust
918    /// use hedl_xml::schema::SchemaCache;
919    ///
920    /// let cache = SchemaCache::new(10);
921    /// cache.clear();
922    /// ```
923    pub fn clear(&self) {
924        self.cache.write().clear();
925    }
926
927    /// Get number of cached schemas.
928    ///
929    /// # Example
930    ///
931    /// ```rust
932    /// use hedl_xml::schema::SchemaCache;
933    ///
934    /// let cache = SchemaCache::new(10);
935    /// assert_eq!(cache.size(), 0);
936    /// ```
937    pub fn size(&self) -> usize {
938        self.cache.read().len()
939    }
940}
941
942impl Default for SchemaCache {
943    /// Create default cache with size 100
944    fn default() -> Self {
945        Self::new(100)
946    }
947}
948
949#[cfg(test)]
950mod tests {
951    use super::*;
952
953    const SIMPLE_SCHEMA: &str = r#"<?xml version="1.0"?>
954<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
955  <xs:element name="person">
956    <xs:complexType>
957      <xs:sequence>
958        <xs:element name="name" type="xs:string"/>
959        <xs:element name="age" type="xs:integer"/>
960      </xs:sequence>
961    </xs:complexType>
962  </xs:element>
963</xs:schema>"#;
964
965    #[test]
966    fn test_schema_validator_creation() {
967        let validator = SchemaValidator::from_xsd(SIMPLE_SCHEMA);
968        assert!(validator.is_ok());
969    }
970
971    #[test]
972    fn test_valid_document() {
973        let validator = SchemaValidator::from_xsd(SIMPLE_SCHEMA).unwrap();
974
975        let xml = r#"<?xml version="1.0"?>
976<person>
977  <name>Alice</name>
978  <age>30</age>
979</person>"#;
980
981        assert!(validator.validate(xml).is_ok());
982    }
983
984    #[test]
985    fn test_invalid_type() {
986        let validator = SchemaValidator::from_xsd(SIMPLE_SCHEMA).unwrap();
987
988        let xml = r#"<?xml version="1.0"?>
989<person>
990  <name>Alice</name>
991  <age>thirty</age>
992</person>"#;
993
994        let result = validator.validate(xml);
995        assert!(result.is_err());
996
997        if let Err(ValidationError::TypeValidationError {
998            name,
999            expected_type,
1000            value,
1001            ..
1002        }) = result
1003        {
1004            assert_eq!(name, "age");
1005            assert_eq!(expected_type, "xs:integer");
1006            assert_eq!(value, "thirty");
1007        } else {
1008            panic!("Expected TypeValidationError");
1009        }
1010    }
1011
1012    #[test]
1013    fn test_unknown_element() {
1014        let validator = SchemaValidator::from_xsd(SIMPLE_SCHEMA).unwrap();
1015
1016        let xml = r#"<?xml version="1.0"?>
1017<person>
1018  <name>Alice</name>
1019  <age>30</age>
1020  <email>alice@example.com</email>
1021</person>"#;
1022
1023        let result = validator.validate(xml);
1024        assert!(result.is_err());
1025
1026        if let Err(ValidationError::UnknownElement { element, .. }) = result {
1027            assert_eq!(element, "email");
1028        } else {
1029            panic!("Expected UnknownElement error");
1030        }
1031    }
1032
1033    #[test]
1034    fn test_malformed_xml() {
1035        let validator = SchemaValidator::from_xsd(SIMPLE_SCHEMA).unwrap();
1036
1037        let xml = r#"<?xml version="1.0"?>
1038<person>
1039  <name>Alice
1040  <age>30</age>
1041</person>"#;
1042
1043        let result = validator.validate(xml);
1044        assert!(result.is_err());
1045        assert!(matches!(
1046            result,
1047            Err(ValidationError::DocumentParseError { .. })
1048        ));
1049    }
1050
1051    #[test]
1052    fn test_schema_cache() {
1053        use std::io::Write;
1054        use tempfile::NamedTempFile;
1055
1056        let cache = SchemaCache::new(5);
1057        assert_eq!(cache.size(), 0);
1058
1059        // Create temporary schema file
1060        let mut temp_file = NamedTempFile::new().unwrap();
1061        temp_file.write_all(SIMPLE_SCHEMA.as_bytes()).unwrap();
1062        let path = temp_file.path();
1063
1064        // First load
1065        let validator1 = cache.get_or_load(path).unwrap();
1066        assert_eq!(cache.size(), 1);
1067
1068        // Second load should use cache
1069        let validator2 = cache.get_or_load(path).unwrap();
1070        assert_eq!(cache.size(), 1);
1071
1072        // Should be same instance
1073        assert!(Arc::ptr_eq(&validator1, &validator2));
1074
1075        // Clear cache
1076        cache.clear();
1077        assert_eq!(cache.size(), 0);
1078    }
1079
1080    #[test]
1081    fn test_cache_eviction() {
1082        use std::io::Write;
1083        use tempfile::NamedTempFile;
1084
1085        let cache = SchemaCache::new(2);
1086
1087        // Create 3 temporary schema files
1088        let mut files = vec![];
1089        for _ in 0..3 {
1090            let mut temp_file = NamedTempFile::new().unwrap();
1091            temp_file.write_all(SIMPLE_SCHEMA.as_bytes()).unwrap();
1092            files.push(temp_file);
1093        }
1094
1095        // Load first two - should be cached
1096        cache.get_or_load(files[0].path()).unwrap();
1097        cache.get_or_load(files[1].path()).unwrap();
1098        assert_eq!(cache.size(), 2);
1099
1100        // Load third - should evict oldest
1101        cache.get_or_load(files[2].path()).unwrap();
1102        assert_eq!(cache.size(), 2);
1103    }
1104
1105    #[test]
1106    fn test_error_display() {
1107        let err = ValidationError::TypeValidationError {
1108            name: "age".to_string(),
1109            expected_type: "xs:integer".to_string(),
1110            value: "thirty".to_string(),
1111            line: Some(5),
1112        };
1113
1114        let display = err.to_string();
1115        assert!(display.contains("age"));
1116        assert!(display.contains("xs:integer"));
1117        assert!(display.contains("thirty"));
1118        assert!(display.contains("line 5"));
1119    }
1120
1121    #[test]
1122    fn test_schema_not_found() {
1123        let result = SchemaValidator::from_file(Path::new("/nonexistent/schema.xsd"));
1124        assert!(result.is_err());
1125        assert!(matches!(
1126            result,
1127            Err(ValidationError::SchemaNotFound { .. })
1128        ));
1129    }
1130
1131    #[test]
1132    fn test_invalid_schema() {
1133        let invalid_schema = r#"<?xml version="1.0"?>
1134<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
1135  <xs:element name="broken" type="nonexistent:type"/>
1136</xs:schema>"#;
1137
1138        let _result = SchemaValidator::from_xsd(invalid_schema);
1139        // Schema parser is permissive - unknown types are allowed
1140        // Validation will happen at runtime when validating documents
1141    }
1142
1143    #[test]
1144    fn test_boolean_type_validation() {
1145        let schema = r#"<?xml version="1.0"?>
1146<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
1147  <xs:element name="flag" type="xs:boolean"/>
1148</xs:schema>"#;
1149
1150        let validator = SchemaValidator::from_xsd(schema).unwrap();
1151
1152        // Valid boolean values
1153        for val in &["true", "false", "1", "0"] {
1154            let xml = format!(r#"<?xml version="1.0"?><flag>{}</flag>"#, val);
1155            assert!(validator.validate(&xml).is_ok());
1156        }
1157
1158        // Invalid boolean value
1159        let xml = r#"<?xml version="1.0"?><flag>yes</flag>"#;
1160        assert!(validator.validate(xml).is_err());
1161    }
1162
1163    #[test]
1164    fn test_decimal_type_validation() {
1165        let schema = r#"<?xml version="1.0"?>
1166<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
1167  <xs:element name="price" type="xs:decimal"/>
1168</xs:schema>"#;
1169
1170        let validator = SchemaValidator::from_xsd(schema).unwrap();
1171
1172        // Valid decimal
1173        let xml = r#"<?xml version="1.0"?><price>19.99</price>"#;
1174        assert!(validator.validate(xml).is_ok());
1175
1176        // Invalid decimal
1177        let xml = r#"<?xml version="1.0"?><price>not a number</price>"#;
1178        assert!(validator.validate(xml).is_err());
1179    }
1180
1181    #[test]
1182    fn test_concurrent_cache_access() {
1183        use std::io::Write;
1184        use std::sync::Arc;
1185        use std::thread;
1186        use tempfile::NamedTempFile;
1187
1188        let cache = Arc::new(SchemaCache::new(10));
1189
1190        // Create temporary schema file
1191        let mut temp_file = NamedTempFile::new().unwrap();
1192        temp_file.write_all(SIMPLE_SCHEMA.as_bytes()).unwrap();
1193        let path = temp_file.path().to_path_buf();
1194
1195        // Spawn multiple threads accessing cache concurrently
1196        let mut handles = vec![];
1197        for _ in 0..10 {
1198            let cache_clone = Arc::clone(&cache);
1199            let path_clone = path.clone();
1200            let handle = thread::spawn(move || {
1201                for _ in 0..100 {
1202                    let _validator = cache_clone.get_or_load(&path_clone).unwrap();
1203                }
1204            });
1205            handles.push(handle);
1206        }
1207
1208        // Wait for all threads
1209        for handle in handles {
1210            handle.join().unwrap();
1211        }
1212
1213        // Should only have cached once
1214        assert_eq!(cache.size(), 1);
1215    }
1216}