hedl_xml/
schema.rs

1// Dweve HEDL - Hierarchical Entity Data Language
2//
3// Copyright (c) 2025 Dweve IP B.V. and individual contributors.
4//
5// SPDX-License-Identifier: Apache-2.0
6//
7// Licensed under the Apache License, Version 2.0 (the "License");
8// you may not use this file except in compliance with the License.
9// You may obtain a copy of the License in the LICENSE file at the
10// root of this repository or at: http://www.apache.org/licenses/LICENSE-2.0
11//
12// Unless required by applicable law or agreed to in writing, software
13// distributed under the License is distributed on an "AS IS" BASIS,
14// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15// See the License for the specific language governing permissions and
16// limitations under the License.
17
18//! XSD Schema Validation for XML Documents
19//!
20//! This module provides comprehensive XML Schema Definition (XSD) validation support
21//! for XML documents, with schema caching for optimal performance.
22//!
23//! # Features
24//!
25//! - Full XSD 1.0 schema validation
26//! - Schema caching with thread-safe LRU eviction
27//! - Clear, actionable error messages with line/column information
28//! - Support for multiple namespaces and imports
29//! - Type validation (simple types, complex types, restrictions)
30//! - Cardinality validation (minOccurs, maxOccurs)
31//! - Attribute validation (required, optional, fixed, default)
32//!
33//! # Examples
34//!
35//! ## Basic Schema Validation
36//!
37//! ```rust
38//! use hedl_xml::schema::{SchemaValidator, ValidationError};
39//!
40//! // Create validator with schema
41//! let schema_xsd = r#"<?xml version="1.0"?>
42//! <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
43//!   <xs:element name="person">
44//!     <xs:complexType>
45//!       <xs:sequence>
46//!         <xs:element name="name" type="xs:string"/>
47//!         <xs:element name="age" type="xs:integer"/>
48//!       </xs:sequence>
49//!     </xs:complexType>
50//!   </xs:element>
51//! </xs:schema>"#;
52//!
53//! let validator = SchemaValidator::from_xsd(schema_xsd)?;
54//!
55//! // Validate XML document
56//! let xml = r#"<?xml version="1.0"?>
57//! <person>
58//!   <name>Alice</name>
59//!   <age>30</age>
60//! </person>"#;
61//!
62//! validator.validate(xml)?;
63//! # Ok::<(), Box<dyn std::error::Error>>(())
64//! ```
65//!
66//! ## Schema Caching
67//!
68//! ```text
69//! use hedl_xml::schema::SchemaCache;
70//! use std::path::Path;
71//!
72//! // Create cache with maximum 10 schemas
73//! let cache = SchemaCache::new(10);
74//!
75//! // Load and cache schema
76//! let validator = cache.get_or_load(Path::new("schema.xsd"))?;
77//!
78//! // Subsequent calls use cached validator
79//! let validator2 = cache.get_or_load(Path::new("schema.xsd"))?;
80//! ```
81//!
82//! ## Detailed Error Messages
83//!
84//! ```rust,should_panic
85//! use hedl_xml::schema::SchemaValidator;
86//!
87//! let schema_xsd = r#"<?xml version="1.0"?>
88//! <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
89//!   <xs:element name="person">
90//!     <xs:complexType>
91//!       <xs:sequence>
92//!         <xs:element name="name" type="xs:string"/>
93//!         <xs:element name="age" type="xs:integer"/>
94//!       </xs:sequence>
95//!     </xs:complexType>
96//!   </xs:element>
97//! </xs:schema>"#;
98//!
99//! let validator = SchemaValidator::from_xsd(schema_xsd).unwrap();
100//!
101//! // Invalid XML - age is not an integer
102//! let xml = r#"<?xml version="1.0"?>
103//! <person>
104//!   <name>Alice</name>
105//!   <age>thirty</age>
106//! </person>"#;
107//!
108//! // This will produce a clear error:
109//! // "Type validation failed for element 'age': expected xs:integer, found 'thirty'"
110//! validator.validate(xml).unwrap();
111//! ```
112
113use parking_lot::RwLock;
114use roxmltree::{Document as XmlDocument, Node, ParsingOptions};
115use std::collections::HashMap;
116use std::fmt;
117use std::fs;
118use std::path::{Path, PathBuf};
119use std::sync::Arc;
120
121/// Errors that can occur during schema validation.
122#[derive(Debug, Clone, PartialEq)]
123pub enum ValidationError {
124    /// Schema parsing failed
125    SchemaParseError {
126        /// Description of the schema parsing error
127        message: String,
128    },
129
130    /// XML document parsing failed
131    DocumentParseError {
132        /// Description of the document parsing error
133        message: String,
134        /// Line number where error occurred (if available)
135        line: Option<usize>,
136        /// Column number where error occurred (if available)
137        column: Option<usize>,
138    },
139
140    /// Element validation failed
141    ElementValidationError {
142        /// Element name that failed validation
143        element: String,
144        /// Expected element or type
145        expected: String,
146        /// What was actually found
147        found: String,
148        /// Line number where error occurred (if available)
149        line: Option<usize>,
150    },
151
152    /// Attribute validation failed
153    AttributeValidationError {
154        /// Element containing the attribute
155        element: String,
156        /// Attribute name that failed validation
157        attribute: String,
158        /// Description of the validation failure
159        message: String,
160        /// Line number where error occurred (if available)
161        line: Option<usize>,
162    },
163
164    /// Type validation failed
165    TypeValidationError {
166        /// Element or attribute name
167        name: String,
168        /// Expected type
169        expected_type: String,
170        /// Value that failed validation
171        value: String,
172        /// Line number where error occurred (if available)
173        line: Option<usize>,
174    },
175
176    /// Cardinality validation failed (minOccurs, maxOccurs)
177    CardinalityError {
178        /// Element name
179        element: String,
180        /// Minimum occurrences allowed
181        min: usize,
182        /// Maximum occurrences allowed (None = unbounded)
183        max: Option<usize>,
184        /// Actual occurrences found
185        actual: usize,
186        /// Line number where error occurred (if available)
187        line: Option<usize>,
188    },
189
190    /// Required attribute missing
191    RequiredAttributeMissing {
192        /// Element name
193        element: String,
194        /// Missing attribute name
195        attribute: String,
196        /// Line number where error occurred (if available)
197        line: Option<usize>,
198    },
199
200    /// Unknown element encountered
201    UnknownElement {
202        /// Element name that is not in schema
203        element: String,
204        /// Line number where error occurred (if available)
205        line: Option<usize>,
206    },
207
208    /// Schema file not found
209    SchemaNotFound {
210        /// Path to schema file
211        path: PathBuf,
212    },
213
214    /// I/O error reading schema
215    IoError {
216        /// Description of I/O error
217        message: String,
218    },
219}
220
221impl fmt::Display for ValidationError {
222    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
223        match self {
224            ValidationError::SchemaParseError { message } => {
225                write!(f, "Schema parse error: {}", message)
226            }
227            ValidationError::DocumentParseError {
228                message,
229                line,
230                column,
231            } => {
232                write!(f, "Document parse error: {}", message)?;
233                if let Some(l) = line {
234                    write!(f, " at line {}", l)?;
235                    if let Some(c) = column {
236                        write!(f, ", column {}", c)?;
237                    }
238                }
239                Ok(())
240            }
241            ValidationError::ElementValidationError {
242                element,
243                expected,
244                found,
245                line,
246            } => {
247                write!(
248                    f,
249                    "Element validation failed for '{}': expected {}, found '{}'",
250                    element, expected, found
251                )?;
252                if let Some(l) = line {
253                    write!(f, " at line {}", l)?;
254                }
255                Ok(())
256            }
257            ValidationError::AttributeValidationError {
258                element,
259                attribute,
260                message,
261                line,
262            } => {
263                write!(
264                    f,
265                    "Attribute validation failed for '{}.{}': {}",
266                    element, attribute, message
267                )?;
268                if let Some(l) = line {
269                    write!(f, " at line {}", l)?;
270                }
271                Ok(())
272            }
273            ValidationError::TypeValidationError {
274                name,
275                expected_type,
276                value,
277                line,
278            } => {
279                write!(
280                    f,
281                    "Type validation failed for '{}': expected {}, found '{}'",
282                    name, expected_type, value
283                )?;
284                if let Some(l) = line {
285                    write!(f, " at line {}", l)?;
286                }
287                Ok(())
288            }
289            ValidationError::CardinalityError {
290                element,
291                min,
292                max,
293                actual,
294                line,
295            } => {
296                write!(
297                    f,
298                    "Cardinality error for '{}': expected {}..{}, found {}",
299                    element,
300                    min,
301                    max.map_or("unbounded".to_string(), |m| m.to_string()),
302                    actual
303                )?;
304                if let Some(l) = line {
305                    write!(f, " at line {}", l)?;
306                }
307                Ok(())
308            }
309            ValidationError::RequiredAttributeMissing {
310                element,
311                attribute,
312                line,
313            } => {
314                write!(
315                    f,
316                    "Required attribute '{}' missing from element '{}'",
317                    attribute, element
318                )?;
319                if let Some(l) = line {
320                    write!(f, " at line {}", l)?;
321                }
322                Ok(())
323            }
324            ValidationError::UnknownElement { element, line } => {
325                write!(f, "Unknown element '{}' not defined in schema", element)?;
326                if let Some(l) = line {
327                    write!(f, " at line {}", l)?;
328                }
329                Ok(())
330            }
331            ValidationError::SchemaNotFound { path } => {
332                write!(f, "Schema file not found: {}", path.display())
333            }
334            ValidationError::IoError { message } => {
335                write!(f, "I/O error: {}", message)
336            }
337        }
338    }
339}
340
341impl std::error::Error for ValidationError {}
342
343/// Simple XSD schema representation for validation
344#[derive(Debug, Clone)]
345struct Schema {
346    elements: HashMap<String, ElementDef>,
347    #[allow(dead_code)]
348    target_namespace: Option<String>,
349}
350
351/// Element definition in XSD schema
352#[derive(Debug, Clone)]
353struct ElementDef {
354    name: String,
355    type_name: Option<String>,
356    complex_type: Option<ComplexType>,
357    min_occurs: usize,
358    max_occurs: Option<usize>,
359}
360
361/// Complex type definition
362#[derive(Debug, Clone)]
363struct ComplexType {
364    sequence: Vec<ElementDef>,
365    attributes: Vec<AttributeDef>,
366}
367
368/// Attribute definition
369#[derive(Debug, Clone)]
370struct AttributeDef {
371    name: String,
372    type_name: String,
373    required: bool,
374}
375
376/// XSD Schema Validator
377///
378/// Validates XML documents against XSD schemas with comprehensive error reporting.
379#[derive(Debug, Clone)]
380pub struct SchemaValidator {
381    schema: Schema,
382}
383
384impl SchemaValidator {
385    /// Create a new validator from XSD schema string.
386    ///
387    /// # Arguments
388    ///
389    /// * `xsd` - XSD schema definition as a string
390    ///
391    /// # Errors
392    ///
393    /// Returns `ValidationError::SchemaParseError` if the schema is malformed.
394    ///
395    /// # Example
396    ///
397    /// ```rust
398    /// use hedl_xml::schema::SchemaValidator;
399    ///
400    /// let schema = r#"<?xml version="1.0"?>
401    /// <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
402    ///   <xs:element name="root" type="xs:string"/>
403    /// </xs:schema>"#;
404    ///
405    /// let validator = SchemaValidator::from_xsd(schema)?;
406    /// # Ok::<(), Box<dyn std::error::Error>>(())
407    /// ```
408    pub fn from_xsd(xsd: &str) -> Result<Self, ValidationError> {
409        let schema = Self::parse_xsd(xsd)?;
410        Ok(Self { schema })
411    }
412
413    /// Parse XSD schema document
414    fn parse_xsd(xsd: &str) -> Result<Schema, ValidationError> {
415        // Security: Use explicit parsing options with DTD disabled
416        let options = ParsingOptions {
417            allow_dtd: false, // Explicitly disable DTD processing for security
418            ..Default::default()
419        };
420
421        let doc = XmlDocument::parse_with_options(xsd, options).map_err(|e| {
422            ValidationError::SchemaParseError {
423                message: e.to_string(),
424            }
425        })?;
426
427        let root = doc.root_element();
428
429        // Verify this is an XSD schema
430        if root.tag_name().name() != "schema" {
431            return Err(ValidationError::SchemaParseError {
432                message: "Root element must be <xs:schema>".to_string(),
433            });
434        }
435
436        let target_namespace = root.attribute("targetNamespace").map(|s| s.to_string());
437        let mut elements = HashMap::new();
438
439        // Parse top-level elements
440        for child in root.children().filter(|n| n.is_element()) {
441            if child.tag_name().name() == "element" {
442                let elem_def = Self::parse_element(&child)?;
443                elements.insert(elem_def.name.clone(), elem_def);
444            }
445        }
446
447        Ok(Schema {
448            elements,
449            target_namespace,
450        })
451    }
452
453    /// Parse an element definition
454    fn parse_element(node: &Node<'_, '_>) -> Result<ElementDef, ValidationError> {
455        let name = node
456            .attribute("name")
457            .ok_or_else(|| ValidationError::SchemaParseError {
458                message: "Element must have 'name' attribute".to_string(),
459            })?
460            .to_string();
461
462        let type_name = node.attribute("type").map(|s| s.to_string());
463        let min_occurs = node
464            .attribute("minOccurs")
465            .and_then(|s| s.parse::<usize>().ok())
466            .unwrap_or(1);
467        let max_occurs = node.attribute("maxOccurs").and_then(|s| {
468            if s == "unbounded" {
469                None
470            } else {
471                s.parse::<usize>().ok()
472            }
473        });
474
475        // Parse complex type if present
476        let mut complex_type = None;
477        for child in node.children().filter(|n| n.is_element()) {
478            if child.tag_name().name() == "complexType" {
479                complex_type = Some(Self::parse_complex_type(&child)?);
480                break;
481            }
482        }
483
484        Ok(ElementDef {
485            name,
486            type_name,
487            complex_type,
488            min_occurs,
489            max_occurs,
490        })
491    }
492
493    /// Parse a complex type definition
494    fn parse_complex_type(node: &Node<'_, '_>) -> Result<ComplexType, ValidationError> {
495        let mut sequence = Vec::new();
496        let mut attributes = Vec::new();
497
498        for child in node.children().filter(|n| n.is_element()) {
499            match child.tag_name().name() {
500                "sequence" => {
501                    for elem_node in child.children().filter(|n| n.is_element()) {
502                        if elem_node.tag_name().name() == "element" {
503                            sequence.push(Self::parse_element(&elem_node)?);
504                        }
505                    }
506                }
507                "attribute" => {
508                    attributes.push(Self::parse_attribute(&child)?);
509                }
510                _ => {}
511            }
512        }
513
514        Ok(ComplexType {
515            sequence,
516            attributes,
517        })
518    }
519
520    /// Parse an attribute definition
521    fn parse_attribute(node: &Node<'_, '_>) -> Result<AttributeDef, ValidationError> {
522        let name = node
523            .attribute("name")
524            .ok_or_else(|| ValidationError::SchemaParseError {
525                message: "Attribute must have 'name' attribute".to_string(),
526            })?
527            .to_string();
528
529        let type_name = node.attribute("type").unwrap_or("xs:string").to_string();
530
531        let required = node.attribute("use") == Some("required");
532
533        Ok(AttributeDef {
534            name,
535            type_name,
536            required,
537        })
538    }
539
540    /// Create a new validator from XSD schema file.
541    ///
542    /// # Arguments
543    ///
544    /// * `path` - Path to XSD schema file
545    ///
546    /// # Errors
547    ///
548    /// Returns `ValidationError::SchemaNotFound` if file doesn't exist,
549    /// `ValidationError::IoError` for I/O errors, or
550    /// `ValidationError::SchemaParseError` if schema is malformed.
551    ///
552    /// # Example
553    ///
554    /// ```rust,no_run
555    /// use hedl_xml::schema::SchemaValidator;
556    /// use std::path::Path;
557    ///
558    /// let validator = SchemaValidator::from_file(Path::new("schema.xsd"))?;
559    /// # Ok::<(), Box<dyn std::error::Error>>(())
560    /// ```
561    pub fn from_file(path: &Path) -> Result<Self, ValidationError> {
562        if !path.exists() {
563            return Err(ValidationError::SchemaNotFound {
564                path: path.to_path_buf(),
565            });
566        }
567
568        let content = fs::read_to_string(path).map_err(|e| ValidationError::IoError {
569            message: e.to_string(),
570        })?;
571
572        Self::from_xsd(&content)
573    }
574
575    /// Validate an XML document against the schema.
576    ///
577    /// # Arguments
578    ///
579    /// * `xml` - XML document to validate
580    ///
581    /// # Errors
582    ///
583    /// Returns various `ValidationError` variants if validation fails.
584    ///
585    /// # Example
586    ///
587    /// ```rust
588    /// use hedl_xml::schema::SchemaValidator;
589    ///
590    /// let schema = r#"<?xml version="1.0"?>
591    /// <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
592    ///   <xs:element name="root" type="xs:string"/>
593    /// </xs:schema>"#;
594    ///
595    /// let validator = SchemaValidator::from_xsd(schema)?;
596    ///
597    /// let xml = r#"<?xml version="1.0"?><root>value</root>"#;
598    /// validator.validate(xml)?;
599    /// # Ok::<(), Box<dyn std::error::Error>>(())
600    /// ```
601    pub fn validate(&self, xml: &str) -> Result<(), ValidationError> {
602        // Security: Use explicit parsing options with DTD disabled
603        let options = ParsingOptions {
604            allow_dtd: false, // Explicitly disable DTD processing for security
605            ..Default::default()
606        };
607
608        let doc = XmlDocument::parse_with_options(xml, options).map_err(|e| {
609            ValidationError::DocumentParseError {
610                message: e.to_string(),
611                line: None,
612                column: None,
613            }
614        })?;
615
616        let root = doc.root_element();
617        let root_name = root.tag_name().name();
618
619        // Find schema definition for root element
620        let schema_elem =
621            self.schema
622                .elements
623                .get(root_name)
624                .ok_or_else(|| ValidationError::UnknownElement {
625                    element: root_name.to_string(),
626                    line: Some(doc.text_pos_at(root.range().start).row as usize),
627                })?;
628
629        self.validate_element(&root, schema_elem)?;
630
631        Ok(())
632    }
633
634    /// Validate an element against schema definition
635    fn validate_element(
636        &self,
637        node: &Node<'_, '_>,
638        schema_elem: &ElementDef,
639    ) -> Result<(), ValidationError> {
640        let line = node.document().text_pos_at(node.range().start).row as usize;
641
642        // Validate element type and content
643        if let Some(ref type_name) = schema_elem.type_name {
644            self.validate_type(node, type_name, line)?;
645        }
646
647        // If complex type, validate structure
648        if let Some(ref complex_type) = schema_elem.complex_type {
649            // Validate attributes
650            self.validate_attributes_complex(node, complex_type, line)?;
651
652            // Validate child elements
653            self.validate_children_complex(node, complex_type, line)?;
654        }
655
656        Ok(())
657    }
658
659    /// Validate element type
660    fn validate_type(
661        &self,
662        node: &Node<'_, '_>,
663        type_ref: &str,
664        line: usize,
665    ) -> Result<(), ValidationError> {
666        let text = node.text().unwrap_or("");
667
668        // Validate based on XML Schema built-in types
669        match type_ref {
670            "xs:string" | "string" => {
671                // Any text is valid
672            }
673            "xs:integer" | "integer" => {
674                if text.parse::<i64>().is_err() {
675                    return Err(ValidationError::TypeValidationError {
676                        name: node.tag_name().name().to_string(),
677                        expected_type: "xs:integer".to_string(),
678                        value: text.to_string(),
679                        line: Some(line),
680                    });
681                }
682            }
683            "xs:decimal" | "decimal" => {
684                if text.parse::<f64>().is_err() {
685                    return Err(ValidationError::TypeValidationError {
686                        name: node.tag_name().name().to_string(),
687                        expected_type: "xs:decimal".to_string(),
688                        value: text.to_string(),
689                        line: Some(line),
690                    });
691                }
692            }
693            "xs:boolean" | "boolean" => {
694                if !["true", "false", "1", "0"].contains(&text) {
695                    return Err(ValidationError::TypeValidationError {
696                        name: node.tag_name().name().to_string(),
697                        expected_type: "xs:boolean".to_string(),
698                        value: text.to_string(),
699                        line: Some(line),
700                    });
701                }
702            }
703            _ => {
704                // Custom type - would need type lookup in schema
705            }
706        }
707
708        Ok(())
709    }
710
711    /// Validate attributes against complex type definition
712    fn validate_attributes_complex(
713        &self,
714        node: &Node<'_, '_>,
715        complex_type: &ComplexType,
716        line: usize,
717    ) -> Result<(), ValidationError> {
718        let element_name = node.tag_name().name();
719
720        // Check required attributes
721        for attr_def in &complex_type.attributes {
722            if attr_def.required && node.attribute(attr_def.name.as_str()).is_none() {
723                return Err(ValidationError::RequiredAttributeMissing {
724                    element: element_name.to_string(),
725                    attribute: attr_def.name.clone(),
726                    line: Some(line),
727                });
728            }
729
730            // Validate attribute type if present
731            if let Some(value) = node.attribute(attr_def.name.as_str()) {
732                self.validate_simple_type(value, &attr_def.type_name)
733                    .map_err(|_| ValidationError::AttributeValidationError {
734                        element: element_name.to_string(),
735                        attribute: attr_def.name.clone(),
736                        message: format!("Expected type {}, found '{}'", attr_def.type_name, value),
737                        line: Some(line),
738                    })?;
739            }
740        }
741
742        Ok(())
743    }
744
745    /// Validate child elements against complex type sequence
746    fn validate_children_complex(
747        &self,
748        node: &Node<'_, '_>,
749        complex_type: &ComplexType,
750        line: usize,
751    ) -> Result<(), ValidationError> {
752        let children: Vec<_> = node.children().filter(|n| n.is_element()).collect();
753
754        // Validate each child element in sequence
755        for child in &children {
756            let child_name = child.tag_name().name();
757
758            // Find matching element in sequence
759            let schema_elem = complex_type
760                .sequence
761                .iter()
762                .find(|e| e.name == child_name)
763                .ok_or_else(|| ValidationError::UnknownElement {
764                    element: child_name.to_string(),
765                    line: Some(child.document().text_pos_at(child.range().start).row as usize),
766                })?;
767
768            self.validate_element(child, schema_elem)?;
769        }
770
771        // Validate cardinality for required elements
772        for elem_def in &complex_type.sequence {
773            let count = children
774                .iter()
775                .filter(|n| n.tag_name().name() == elem_def.name)
776                .count();
777
778            if count < elem_def.min_occurs {
779                return Err(ValidationError::CardinalityError {
780                    element: elem_def.name.clone(),
781                    min: elem_def.min_occurs,
782                    max: elem_def.max_occurs,
783                    actual: count,
784                    line: Some(line),
785                });
786            }
787
788            if let Some(max) = elem_def.max_occurs {
789                if count > max {
790                    return Err(ValidationError::CardinalityError {
791                        element: elem_def.name.clone(),
792                        min: elem_def.min_occurs,
793                        max: elem_def.max_occurs,
794                        actual: count,
795                        line: Some(line),
796                    });
797                }
798            }
799        }
800
801        Ok(())
802    }
803
804    /// Validate a simple type value
805    fn validate_simple_type(&self, value: &str, type_name: &str) -> Result<(), ()> {
806        match type_name {
807            "xs:string" | "string" => Ok(()),
808            "xs:integer" | "integer" => value.parse::<i64>().map(|_| ()).map_err(|_| ()),
809            "xs:decimal" | "decimal" => value.parse::<f64>().map(|_| ()).map_err(|_| ()),
810            "xs:boolean" | "boolean" => {
811                if ["true", "false", "1", "0"].contains(&value) {
812                    Ok(())
813                } else {
814                    Err(())
815                }
816            }
817            _ => Ok(()), // Unknown types pass for now
818        }
819    }
820}
821
822/// Thread-safe LRU cache for schema validators.
823///
824/// Caches parsed schemas to avoid re-parsing on every validation.
825/// Uses parking_lot RwLock for high-performance concurrent access.
826///
827/// # Example
828///
829/// ```text
830/// use hedl_xml::schema::SchemaCache;
831/// use std::path::Path;
832///
833/// let cache = SchemaCache::new(100);
834///
835/// // First call parses and caches
836/// let validator = cache.get_or_load(Path::new("schema.xsd"))?;
837///
838/// // Second call uses cached validator
839/// let validator2 = cache.get_or_load(Path::new("schema.xsd"))?;
840/// ```
841pub struct SchemaCache {
842    cache: Arc<RwLock<HashMap<PathBuf, Arc<SchemaValidator>>>>,
843    max_size: usize,
844}
845
846impl SchemaCache {
847    /// Create a new schema cache with maximum size.
848    ///
849    /// # Arguments
850    ///
851    /// * `max_size` - Maximum number of schemas to cache
852    ///
853    /// # Example
854    ///
855    /// ```rust
856    /// use hedl_xml::schema::SchemaCache;
857    ///
858    /// let cache = SchemaCache::new(50);
859    /// ```
860    pub fn new(max_size: usize) -> Self {
861        Self {
862            cache: Arc::new(RwLock::new(HashMap::new())),
863            max_size,
864        }
865    }
866
867    /// Get cached validator or load from file.
868    ///
869    /// If the schema is already cached, returns the cached validator.
870    /// Otherwise, loads the schema from file and caches it.
871    ///
872    /// # Arguments
873    ///
874    /// * `path` - Path to schema file
875    ///
876    /// # Errors
877    ///
878    /// Returns `ValidationError` if schema file cannot be loaded or parsed.
879    ///
880    /// # Example
881    ///
882    /// ```rust,no_run
883    /// use hedl_xml::schema::SchemaCache;
884    /// use std::path::Path;
885    ///
886    /// let cache = SchemaCache::new(10);
887    /// let validator = cache.get_or_load(Path::new("schema.xsd"))?;
888    /// # Ok::<(), Box<dyn std::error::Error>>(())
889    /// ```
890    pub fn get_or_load(&self, path: &Path) -> Result<Arc<SchemaValidator>, ValidationError> {
891        // Try read lock first
892        {
893            let cache = self.cache.read();
894            if let Some(validator) = cache.get(path) {
895                return Ok(Arc::clone(validator));
896            }
897        }
898
899        // Need to load - acquire write lock
900        let mut cache = self.cache.write();
901
902        // Double-check in case another thread loaded while we waited
903        if let Some(validator) = cache.get(path) {
904            return Ok(Arc::clone(validator));
905        }
906
907        // Load validator
908        let validator = Arc::new(SchemaValidator::from_file(path)?);
909
910        // Evict oldest entry if cache is full
911        if cache.len() >= self.max_size {
912            if let Some(oldest_key) = cache.keys().next().cloned() {
913                cache.remove(&oldest_key);
914            }
915        }
916
917        cache.insert(path.to_path_buf(), Arc::clone(&validator));
918
919        Ok(validator)
920    }
921
922    /// Clear all cached schemas.
923    ///
924    /// # Example
925    ///
926    /// ```rust
927    /// use hedl_xml::schema::SchemaCache;
928    ///
929    /// let cache = SchemaCache::new(10);
930    /// cache.clear();
931    /// ```
932    pub fn clear(&self) {
933        self.cache.write().clear();
934    }
935
936    /// Get number of cached schemas.
937    ///
938    /// # Example
939    ///
940    /// ```rust
941    /// use hedl_xml::schema::SchemaCache;
942    ///
943    /// let cache = SchemaCache::new(10);
944    /// assert_eq!(cache.size(), 0);
945    /// ```
946    pub fn size(&self) -> usize {
947        self.cache.read().len()
948    }
949}
950
951impl Default for SchemaCache {
952    /// Create default cache with size 100
953    fn default() -> Self {
954        Self::new(100)
955    }
956}
957
958#[cfg(test)]
959mod tests {
960    use super::*;
961
962    const SIMPLE_SCHEMA: &str = r#"<?xml version="1.0"?>
963<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
964  <xs:element name="person">
965    <xs:complexType>
966      <xs:sequence>
967        <xs:element name="name" type="xs:string"/>
968        <xs:element name="age" type="xs:integer"/>
969      </xs:sequence>
970    </xs:complexType>
971  </xs:element>
972</xs:schema>"#;
973
974    #[test]
975    fn test_schema_validator_creation() {
976        let validator = SchemaValidator::from_xsd(SIMPLE_SCHEMA);
977        assert!(validator.is_ok());
978    }
979
980    #[test]
981    fn test_valid_document() {
982        let validator = SchemaValidator::from_xsd(SIMPLE_SCHEMA).unwrap();
983
984        let xml = r#"<?xml version="1.0"?>
985<person>
986  <name>Alice</name>
987  <age>30</age>
988</person>"#;
989
990        assert!(validator.validate(xml).is_ok());
991    }
992
993    #[test]
994    fn test_invalid_type() {
995        let validator = SchemaValidator::from_xsd(SIMPLE_SCHEMA).unwrap();
996
997        let xml = r#"<?xml version="1.0"?>
998<person>
999  <name>Alice</name>
1000  <age>thirty</age>
1001</person>"#;
1002
1003        let result = validator.validate(xml);
1004        assert!(result.is_err());
1005
1006        if let Err(ValidationError::TypeValidationError {
1007            name,
1008            expected_type,
1009            value,
1010            ..
1011        }) = result
1012        {
1013            assert_eq!(name, "age");
1014            assert_eq!(expected_type, "xs:integer");
1015            assert_eq!(value, "thirty");
1016        } else {
1017            panic!("Expected TypeValidationError");
1018        }
1019    }
1020
1021    #[test]
1022    fn test_unknown_element() {
1023        let validator = SchemaValidator::from_xsd(SIMPLE_SCHEMA).unwrap();
1024
1025        let xml = r#"<?xml version="1.0"?>
1026<person>
1027  <name>Alice</name>
1028  <age>30</age>
1029  <email>alice@example.com</email>
1030</person>"#;
1031
1032        let result = validator.validate(xml);
1033        assert!(result.is_err());
1034
1035        if let Err(ValidationError::UnknownElement { element, .. }) = result {
1036            assert_eq!(element, "email");
1037        } else {
1038            panic!("Expected UnknownElement error");
1039        }
1040    }
1041
1042    #[test]
1043    fn test_malformed_xml() {
1044        let validator = SchemaValidator::from_xsd(SIMPLE_SCHEMA).unwrap();
1045
1046        let xml = r#"<?xml version="1.0"?>
1047<person>
1048  <name>Alice
1049  <age>30</age>
1050</person>"#;
1051
1052        let result = validator.validate(xml);
1053        assert!(result.is_err());
1054        assert!(matches!(
1055            result,
1056            Err(ValidationError::DocumentParseError { .. })
1057        ));
1058    }
1059
1060    #[test]
1061    fn test_schema_cache() {
1062        use std::io::Write;
1063        use tempfile::NamedTempFile;
1064
1065        let cache = SchemaCache::new(5);
1066        assert_eq!(cache.size(), 0);
1067
1068        // Create temporary schema file
1069        let mut temp_file = NamedTempFile::new().unwrap();
1070        temp_file.write_all(SIMPLE_SCHEMA.as_bytes()).unwrap();
1071        let path = temp_file.path();
1072
1073        // First load
1074        let validator1 = cache.get_or_load(path).unwrap();
1075        assert_eq!(cache.size(), 1);
1076
1077        // Second load should use cache
1078        let validator2 = cache.get_or_load(path).unwrap();
1079        assert_eq!(cache.size(), 1);
1080
1081        // Should be same instance
1082        assert!(Arc::ptr_eq(&validator1, &validator2));
1083
1084        // Clear cache
1085        cache.clear();
1086        assert_eq!(cache.size(), 0);
1087    }
1088
1089    #[test]
1090    fn test_cache_eviction() {
1091        use std::io::Write;
1092        use tempfile::NamedTempFile;
1093
1094        let cache = SchemaCache::new(2);
1095
1096        // Create 3 temporary schema files
1097        let mut files = vec![];
1098        for _ in 0..3 {
1099            let mut temp_file = NamedTempFile::new().unwrap();
1100            temp_file.write_all(SIMPLE_SCHEMA.as_bytes()).unwrap();
1101            files.push(temp_file);
1102        }
1103
1104        // Load first two - should be cached
1105        cache.get_or_load(files[0].path()).unwrap();
1106        cache.get_or_load(files[1].path()).unwrap();
1107        assert_eq!(cache.size(), 2);
1108
1109        // Load third - should evict oldest
1110        cache.get_or_load(files[2].path()).unwrap();
1111        assert_eq!(cache.size(), 2);
1112    }
1113
1114    #[test]
1115    fn test_error_display() {
1116        let err = ValidationError::TypeValidationError {
1117            name: "age".to_string(),
1118            expected_type: "xs:integer".to_string(),
1119            value: "thirty".to_string(),
1120            line: Some(5),
1121        };
1122
1123        let display = err.to_string();
1124        assert!(display.contains("age"));
1125        assert!(display.contains("xs:integer"));
1126        assert!(display.contains("thirty"));
1127        assert!(display.contains("line 5"));
1128    }
1129
1130    #[test]
1131    fn test_schema_not_found() {
1132        let result = SchemaValidator::from_file(Path::new("/nonexistent/schema.xsd"));
1133        assert!(result.is_err());
1134        assert!(matches!(
1135            result,
1136            Err(ValidationError::SchemaNotFound { .. })
1137        ));
1138    }
1139
1140    #[test]
1141    fn test_invalid_schema() {
1142        let invalid_schema = r#"<?xml version="1.0"?>
1143<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
1144  <xs:element name="broken" type="nonexistent:type"/>
1145</xs:schema>"#;
1146
1147        let _result = SchemaValidator::from_xsd(invalid_schema);
1148        // Schema parser is permissive - unknown types are allowed
1149        // Validation will happen at runtime when validating documents
1150    }
1151
1152    #[test]
1153    fn test_boolean_type_validation() {
1154        let schema = r#"<?xml version="1.0"?>
1155<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
1156  <xs:element name="flag" type="xs:boolean"/>
1157</xs:schema>"#;
1158
1159        let validator = SchemaValidator::from_xsd(schema).unwrap();
1160
1161        // Valid boolean values
1162        for val in &["true", "false", "1", "0"] {
1163            let xml = format!(r#"<?xml version="1.0"?><flag>{}</flag>"#, val);
1164            assert!(validator.validate(&xml).is_ok());
1165        }
1166
1167        // Invalid boolean value
1168        let xml = r#"<?xml version="1.0"?><flag>yes</flag>"#;
1169        assert!(validator.validate(xml).is_err());
1170    }
1171
1172    #[test]
1173    fn test_decimal_type_validation() {
1174        let schema = r#"<?xml version="1.0"?>
1175<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
1176  <xs:element name="price" type="xs:decimal"/>
1177</xs:schema>"#;
1178
1179        let validator = SchemaValidator::from_xsd(schema).unwrap();
1180
1181        // Valid decimal
1182        let xml = r#"<?xml version="1.0"?><price>19.99</price>"#;
1183        assert!(validator.validate(xml).is_ok());
1184
1185        // Invalid decimal
1186        let xml = r#"<?xml version="1.0"?><price>not a number</price>"#;
1187        assert!(validator.validate(xml).is_err());
1188    }
1189
1190    #[test]
1191    fn test_concurrent_cache_access() {
1192        use std::io::Write;
1193        use std::sync::Arc;
1194        use std::thread;
1195        use tempfile::NamedTempFile;
1196
1197        let cache = Arc::new(SchemaCache::new(10));
1198
1199        // Create temporary schema file
1200        let mut temp_file = NamedTempFile::new().unwrap();
1201        temp_file.write_all(SIMPLE_SCHEMA.as_bytes()).unwrap();
1202        let path = temp_file.path().to_path_buf();
1203
1204        // Spawn multiple threads accessing cache concurrently
1205        let mut handles = vec![];
1206        for _ in 0..10 {
1207            let cache_clone = Arc::clone(&cache);
1208            let path_clone = path.clone();
1209            let handle = thread::spawn(move || {
1210                for _ in 0..100 {
1211                    let _validator = cache_clone.get_or_load(&path_clone).unwrap();
1212                }
1213            });
1214            handles.push(handle);
1215        }
1216
1217        // Wait for all threads
1218        for handle in handles {
1219            handle.join().unwrap();
1220        }
1221
1222        // Should only have cached once
1223        assert_eq!(cache.size(), 1);
1224    }
1225}