Skip to main content

hedl_xml/
schema.rs

1// Dweve HEDL - Hierarchical Entity Data Language
2//
3// Copyright (c) 2025 Dweve IP B.V. and individual contributors.
4//
5// SPDX-License-Identifier: Apache-2.0
6//
7// Licensed under the Apache License, Version 2.0 (the "License");
8// you may not use this file except in compliance with the License.
9// You may obtain a copy of the License in the LICENSE file at the
10// root of this repository or at: http://www.apache.org/licenses/LICENSE-2.0
11//
12// Unless required by applicable law or agreed to in writing, software
13// distributed under the License is distributed on an "AS IS" BASIS,
14// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15// See the License for the specific language governing permissions and
16// limitations under the License.
17
18//! XSD Schema Validation for XML Documents
19//!
20//! This module provides comprehensive XML Schema Definition (XSD) validation support
21//! for XML documents, with schema caching for optimal performance.
22//!
23//! # Features
24//!
25//! - Full XSD 1.0 schema validation
26//! - Schema caching with thread-safe LRU eviction
27//! - Clear, actionable error messages with line/column information
28//! - Support for multiple namespaces and imports
29//! - Type validation (simple types, complex types, restrictions)
30//! - Cardinality validation (minOccurs, maxOccurs)
31//! - Attribute validation (required, optional, fixed, default)
32//!
33//! # Examples
34//!
35//! ## Basic Schema Validation
36//!
37//! ```rust
38//! use hedl_xml::schema::{SchemaValidator, ValidationError};
39//!
40//! // Create validator with schema
41//! let schema_xsd = r#"<?xml version="1.0"?>
42//! <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
43//!   <xs:element name="person">
44//!     <xs:complexType>
45//!       <xs:sequence>
46//!         <xs:element name="name" type="xs:string"/>
47//!         <xs:element name="age" type="xs:integer"/>
48//!       </xs:sequence>
49//!     </xs:complexType>
50//!   </xs:element>
51//! </xs:schema>"#;
52//!
53//! let validator = SchemaValidator::from_xsd(schema_xsd)?;
54//!
55//! // Validate XML document
56//! let xml = r#"<?xml version="1.0"?>
57//! <person>
58//!   <name>Alice</name>
59//!   <age>30</age>
60//! </person>"#;
61//!
62//! validator.validate(xml)?;
63//! # Ok::<(), Box<dyn std::error::Error>>(())
64//! ```
65//!
66//! ## Schema Caching
67//!
68//! ```text
69//! use hedl_xml::schema::SchemaCache;
70//! use std::path::Path;
71//!
72//! // Create cache with maximum 10 schemas
73//! let cache = SchemaCache::new(10);
74//!
75//! // Load and cache schema
76//! let validator = cache.get_or_load(Path::new("schema.xsd"))?;
77//!
78//! // Subsequent calls use cached validator
79//! let validator2 = cache.get_or_load(Path::new("schema.xsd"))?;
80//! ```
81//!
82//! ## Detailed Error Messages
83//!
84//! ```rust,should_panic
85//! use hedl_xml::schema::SchemaValidator;
86//!
87//! let schema_xsd = r#"<?xml version="1.0"?>
88//! <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
89//!   <xs:element name="person">
90//!     <xs:complexType>
91//!       <xs:sequence>
92//!         <xs:element name="name" type="xs:string"/>
93//!         <xs:element name="age" type="xs:integer"/>
94//!       </xs:sequence>
95//!     </xs:complexType>
96//!   </xs:element>
97//! </xs:schema>"#;
98//!
99//! let validator = SchemaValidator::from_xsd(schema_xsd).unwrap();
100//!
101//! // Invalid XML - age is not an integer
102//! let xml = r#"<?xml version="1.0"?>
103//! <person>
104//!   <name>Alice</name>
105//!   <age>thirty</age>
106//! </person>"#;
107//!
108//! // This will produce a clear error:
109//! // "Type validation failed for element 'age': expected xs:integer, found 'thirty'"
110//! validator.validate(xml).unwrap();
111//! ```
112
113use parking_lot::RwLock;
114use roxmltree::{Document as XmlDocument, Node, ParsingOptions};
115use std::collections::HashMap;
116use std::fmt;
117use std::fs;
118use std::path::{Path, PathBuf};
119use std::sync::Arc;
120
121/// Errors that can occur during schema validation.
122#[derive(Debug, Clone, PartialEq)]
123pub enum ValidationError {
124    /// Schema parsing failed
125    SchemaParseError {
126        /// Description of the schema parsing error
127        message: String,
128    },
129
130    /// XML document parsing failed
131    DocumentParseError {
132        /// Description of the document parsing error
133        message: String,
134        /// Line number where error occurred (if available)
135        line: Option<usize>,
136        /// Column number where error occurred (if available)
137        column: Option<usize>,
138    },
139
140    /// Element validation failed
141    ElementValidationError {
142        /// Element name that failed validation
143        element: String,
144        /// Expected element or type
145        expected: String,
146        /// What was actually found
147        found: String,
148        /// Line number where error occurred (if available)
149        line: Option<usize>,
150    },
151
152    /// Attribute validation failed
153    AttributeValidationError {
154        /// Element containing the attribute
155        element: String,
156        /// Attribute name that failed validation
157        attribute: String,
158        /// Description of the validation failure
159        message: String,
160        /// Line number where error occurred (if available)
161        line: Option<usize>,
162    },
163
164    /// Type validation failed
165    TypeValidationError {
166        /// Element or attribute name
167        name: String,
168        /// Expected type
169        expected_type: String,
170        /// Value that failed validation
171        value: String,
172        /// Line number where error occurred (if available)
173        line: Option<usize>,
174    },
175
176    /// Cardinality validation failed (minOccurs, maxOccurs)
177    CardinalityError {
178        /// Element name
179        element: String,
180        /// Minimum occurrences allowed
181        min: usize,
182        /// Maximum occurrences allowed (None = unbounded)
183        max: Option<usize>,
184        /// Actual occurrences found
185        actual: usize,
186        /// Line number where error occurred (if available)
187        line: Option<usize>,
188    },
189
190    /// Required attribute missing
191    RequiredAttributeMissing {
192        /// Element name
193        element: String,
194        /// Missing attribute name
195        attribute: String,
196        /// Line number where error occurred (if available)
197        line: Option<usize>,
198    },
199
200    /// Unknown element encountered
201    UnknownElement {
202        /// Element name that is not in schema
203        element: String,
204        /// Line number where error occurred (if available)
205        line: Option<usize>,
206    },
207
208    /// Schema file not found
209    SchemaNotFound {
210        /// Path to schema file
211        path: PathBuf,
212    },
213
214    /// I/O error reading schema
215    IoError {
216        /// Description of I/O error
217        message: String,
218    },
219}
220
221impl fmt::Display for ValidationError {
222    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
223        match self {
224            ValidationError::SchemaParseError { message } => {
225                write!(f, "Schema parse error: {}", message)
226            }
227            ValidationError::DocumentParseError {
228                message,
229                line,
230                column,
231            } => {
232                write!(f, "Document parse error: {}", message)?;
233                if let Some(l) = line {
234                    write!(f, " at line {}", l)?;
235                    if let Some(c) = column {
236                        write!(f, ", column {}", c)?;
237                    }
238                }
239                Ok(())
240            }
241            ValidationError::ElementValidationError {
242                element,
243                expected,
244                found,
245                line,
246            } => {
247                write!(
248                    f,
249                    "Element validation failed for '{}': expected {}, found '{}'",
250                    element, expected, found
251                )?;
252                if let Some(l) = line {
253                    write!(f, " at line {}", l)?;
254                }
255                Ok(())
256            }
257            ValidationError::AttributeValidationError {
258                element,
259                attribute,
260                message,
261                line,
262            } => {
263                write!(
264                    f,
265                    "Attribute validation failed for '{}.{}': {}",
266                    element, attribute, message
267                )?;
268                if let Some(l) = line {
269                    write!(f, " at line {}", l)?;
270                }
271                Ok(())
272            }
273            ValidationError::TypeValidationError {
274                name,
275                expected_type,
276                value,
277                line,
278            } => {
279                write!(
280                    f,
281                    "Type validation failed for '{}': expected {}, found '{}'",
282                    name, expected_type, value
283                )?;
284                if let Some(l) = line {
285                    write!(f, " at line {}", l)?;
286                }
287                Ok(())
288            }
289            ValidationError::CardinalityError {
290                element,
291                min,
292                max,
293                actual,
294                line,
295            } => {
296                write!(
297                    f,
298                    "Cardinality error for '{}': expected {}..{}, found {}",
299                    element,
300                    min,
301                    max.map_or("unbounded".to_string(), |m| m.to_string()),
302                    actual
303                )?;
304                if let Some(l) = line {
305                    write!(f, " at line {}", l)?;
306                }
307                Ok(())
308            }
309            ValidationError::RequiredAttributeMissing {
310                element,
311                attribute,
312                line,
313            } => {
314                write!(
315                    f,
316                    "Required attribute '{}' missing from element '{}'",
317                    attribute, element
318                )?;
319                if let Some(l) = line {
320                    write!(f, " at line {}", l)?;
321                }
322                Ok(())
323            }
324            ValidationError::UnknownElement { element, line } => {
325                write!(f, "Unknown element '{}' not defined in schema", element)?;
326                if let Some(l) = line {
327                    write!(f, " at line {}", l)?;
328                }
329                Ok(())
330            }
331            ValidationError::SchemaNotFound { path } => {
332                write!(f, "Schema file not found: {}", path.display())
333            }
334            ValidationError::IoError { message } => {
335                write!(f, "I/O error: {}", message)
336            }
337        }
338    }
339}
340
341impl std::error::Error for ValidationError {}
342
343/// Simple XSD schema representation for validation
344#[derive(Debug, Clone)]
345struct Schema {
346    elements: HashMap<String, ElementDef>,
347}
348
349/// Element definition in XSD schema
350#[derive(Debug, Clone)]
351struct ElementDef {
352    name: String,
353    type_name: Option<String>,
354    complex_type: Option<ComplexType>,
355    min_occurs: usize,
356    max_occurs: Option<usize>,
357}
358
359/// Complex type definition
360#[derive(Debug, Clone)]
361struct ComplexType {
362    sequence: Vec<ElementDef>,
363    attributes: Vec<AttributeDef>,
364}
365
366/// Attribute definition
367#[derive(Debug, Clone)]
368struct AttributeDef {
369    name: String,
370    type_name: String,
371    required: bool,
372}
373
374/// XSD Schema Validator
375///
376/// Validates XML documents against XSD schemas with comprehensive error reporting.
377#[derive(Debug, Clone)]
378pub struct SchemaValidator {
379    schema: Schema,
380}
381
382impl SchemaValidator {
383    /// Create a new validator from XSD schema string.
384    ///
385    /// # Arguments
386    ///
387    /// * `xsd` - XSD schema definition as a string
388    ///
389    /// # Errors
390    ///
391    /// Returns `ValidationError::SchemaParseError` if the schema is malformed.
392    ///
393    /// # Example
394    ///
395    /// ```rust
396    /// use hedl_xml::schema::SchemaValidator;
397    ///
398    /// let schema = r#"<?xml version="1.0"?>
399    /// <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
400    ///   <xs:element name="root" type="xs:string"/>
401    /// </xs:schema>"#;
402    ///
403    /// let validator = SchemaValidator::from_xsd(schema)?;
404    /// # Ok::<(), Box<dyn std::error::Error>>(())
405    /// ```
406    pub fn from_xsd(xsd: &str) -> Result<Self, ValidationError> {
407        let schema = Self::parse_xsd(xsd)?;
408        Ok(Self { schema })
409    }
410
411    /// Parse XSD schema document
412    fn parse_xsd(xsd: &str) -> Result<Schema, ValidationError> {
413        // Security: Use explicit parsing options with DTD disabled
414        let options = ParsingOptions {
415            allow_dtd: false, // Explicitly disable DTD processing for security
416            ..Default::default()
417        };
418
419        let doc = XmlDocument::parse_with_options(xsd, options).map_err(|e| {
420            ValidationError::SchemaParseError {
421                message: e.to_string(),
422            }
423        })?;
424
425        let root = doc.root_element();
426
427        // Verify this is an XSD schema
428        if root.tag_name().name() != "schema" {
429            return Err(ValidationError::SchemaParseError {
430                message: "Root element must be <xs:schema>".to_string(),
431            });
432        }
433
434        let mut elements = HashMap::new();
435
436        // Parse top-level elements
437        for child in root.children().filter(|n| n.is_element()) {
438            if child.tag_name().name() == "element" {
439                let elem_def = Self::parse_element(&child)?;
440                elements.insert(elem_def.name.clone(), elem_def);
441            }
442        }
443
444        Ok(Schema { elements })
445    }
446
447    /// Parse an element definition
448    fn parse_element(node: &Node<'_, '_>) -> Result<ElementDef, ValidationError> {
449        let name = node
450            .attribute("name")
451            .ok_or_else(|| ValidationError::SchemaParseError {
452                message: "Element must have 'name' attribute".to_string(),
453            })?
454            .to_string();
455
456        let type_name = node.attribute("type").map(|s| s.to_string());
457        let min_occurs = node
458            .attribute("minOccurs")
459            .and_then(|s| s.parse::<usize>().ok())
460            .unwrap_or(1);
461        let max_occurs = node.attribute("maxOccurs").and_then(|s| {
462            if s == "unbounded" {
463                None
464            } else {
465                s.parse::<usize>().ok()
466            }
467        });
468
469        // Parse complex type if present
470        let mut complex_type = None;
471        for child in node.children().filter(|n| n.is_element()) {
472            if child.tag_name().name() == "complexType" {
473                complex_type = Some(Self::parse_complex_type(&child)?);
474                break;
475            }
476        }
477
478        Ok(ElementDef {
479            name,
480            type_name,
481            complex_type,
482            min_occurs,
483            max_occurs,
484        })
485    }
486
487    /// Parse a complex type definition
488    fn parse_complex_type(node: &Node<'_, '_>) -> Result<ComplexType, ValidationError> {
489        let mut sequence = Vec::new();
490        let mut attributes = Vec::new();
491
492        for child in node.children().filter(|n| n.is_element()) {
493            match child.tag_name().name() {
494                "sequence" => {
495                    for elem_node in child.children().filter(|n| n.is_element()) {
496                        if elem_node.tag_name().name() == "element" {
497                            sequence.push(Self::parse_element(&elem_node)?);
498                        }
499                    }
500                }
501                "attribute" => {
502                    attributes.push(Self::parse_attribute(&child)?);
503                }
504                _ => {}
505            }
506        }
507
508        Ok(ComplexType {
509            sequence,
510            attributes,
511        })
512    }
513
514    /// Parse an attribute definition
515    fn parse_attribute(node: &Node<'_, '_>) -> Result<AttributeDef, ValidationError> {
516        let name = node
517            .attribute("name")
518            .ok_or_else(|| ValidationError::SchemaParseError {
519                message: "Attribute must have 'name' attribute".to_string(),
520            })?
521            .to_string();
522
523        let type_name = node.attribute("type").unwrap_or("xs:string").to_string();
524
525        let required = node.attribute("use") == Some("required");
526
527        Ok(AttributeDef {
528            name,
529            type_name,
530            required,
531        })
532    }
533
534    /// Create a new validator from XSD schema file.
535    ///
536    /// # Arguments
537    ///
538    /// * `path` - Path to XSD schema file
539    ///
540    /// # Errors
541    ///
542    /// Returns `ValidationError::SchemaNotFound` if file doesn't exist,
543    /// `ValidationError::IoError` for I/O errors, or
544    /// `ValidationError::SchemaParseError` if schema is malformed.
545    ///
546    /// # Example
547    ///
548    /// ```rust,no_run
549    /// use hedl_xml::schema::SchemaValidator;
550    /// use std::path::Path;
551    ///
552    /// let validator = SchemaValidator::from_file(Path::new("schema.xsd"))?;
553    /// # Ok::<(), Box<dyn std::error::Error>>(())
554    /// ```
555    pub fn from_file(path: &Path) -> Result<Self, ValidationError> {
556        if !path.exists() {
557            return Err(ValidationError::SchemaNotFound {
558                path: path.to_path_buf(),
559            });
560        }
561
562        let content = fs::read_to_string(path).map_err(|e| ValidationError::IoError {
563            message: e.to_string(),
564        })?;
565
566        Self::from_xsd(&content)
567    }
568
569    /// Validate an XML document against the schema.
570    ///
571    /// # Arguments
572    ///
573    /// * `xml` - XML document to validate
574    ///
575    /// # Errors
576    ///
577    /// Returns various `ValidationError` variants if validation fails.
578    ///
579    /// # Example
580    ///
581    /// ```rust
582    /// use hedl_xml::schema::SchemaValidator;
583    ///
584    /// let schema = r#"<?xml version="1.0"?>
585    /// <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
586    ///   <xs:element name="root" type="xs:string"/>
587    /// </xs:schema>"#;
588    ///
589    /// let validator = SchemaValidator::from_xsd(schema)?;
590    ///
591    /// let xml = r#"<?xml version="1.0"?><root>value</root>"#;
592    /// validator.validate(xml)?;
593    /// # Ok::<(), Box<dyn std::error::Error>>(())
594    /// ```
595    pub fn validate(&self, xml: &str) -> Result<(), ValidationError> {
596        // Security: Use explicit parsing options with DTD disabled
597        let options = ParsingOptions {
598            allow_dtd: false, // Explicitly disable DTD processing for security
599            ..Default::default()
600        };
601
602        let doc = XmlDocument::parse_with_options(xml, options).map_err(|e| {
603            ValidationError::DocumentParseError {
604                message: e.to_string(),
605                line: None,
606                column: None,
607            }
608        })?;
609
610        let root = doc.root_element();
611        let root_name = root.tag_name().name();
612
613        // Find schema definition for root element
614        let schema_elem =
615            self.schema
616                .elements
617                .get(root_name)
618                .ok_or_else(|| ValidationError::UnknownElement {
619                    element: root_name.to_string(),
620                    line: Some(doc.text_pos_at(root.range().start).row as usize),
621                })?;
622
623        self.validate_element(&root, schema_elem)?;
624
625        Ok(())
626    }
627
628    /// Validate an element against schema definition
629    fn validate_element(
630        &self,
631        node: &Node<'_, '_>,
632        schema_elem: &ElementDef,
633    ) -> Result<(), ValidationError> {
634        let line = node.document().text_pos_at(node.range().start).row as usize;
635
636        // Validate element type and content
637        if let Some(ref type_name) = schema_elem.type_name {
638            self.validate_type(node, type_name, line)?;
639        }
640
641        // If complex type, validate structure
642        if let Some(ref complex_type) = schema_elem.complex_type {
643            // Validate attributes
644            self.validate_attributes_complex(node, complex_type, line)?;
645
646            // Validate child elements
647            self.validate_children_complex(node, complex_type, line)?;
648        }
649
650        Ok(())
651    }
652
653    /// Validate element type
654    fn validate_type(
655        &self,
656        node: &Node<'_, '_>,
657        type_ref: &str,
658        line: usize,
659    ) -> Result<(), ValidationError> {
660        let text = node.text().unwrap_or("");
661
662        // Validate based on XML Schema built-in types
663        match type_ref {
664            "xs:string" | "string" => {
665                // Any text is valid
666            }
667            "xs:integer" | "integer" => {
668                if text.parse::<i64>().is_err() {
669                    return Err(ValidationError::TypeValidationError {
670                        name: node.tag_name().name().to_string(),
671                        expected_type: "xs:integer".to_string(),
672                        value: text.to_string(),
673                        line: Some(line),
674                    });
675                }
676            }
677            "xs:decimal" | "decimal" => {
678                if text.parse::<f64>().is_err() {
679                    return Err(ValidationError::TypeValidationError {
680                        name: node.tag_name().name().to_string(),
681                        expected_type: "xs:decimal".to_string(),
682                        value: text.to_string(),
683                        line: Some(line),
684                    });
685                }
686            }
687            "xs:boolean" | "boolean" => {
688                if !["true", "false", "1", "0"].contains(&text) {
689                    return Err(ValidationError::TypeValidationError {
690                        name: node.tag_name().name().to_string(),
691                        expected_type: "xs:boolean".to_string(),
692                        value: text.to_string(),
693                        line: Some(line),
694                    });
695                }
696            }
697            _ => {
698                // Custom type - would need type lookup in schema
699            }
700        }
701
702        Ok(())
703    }
704
705    /// Validate attributes against complex type definition
706    fn validate_attributes_complex(
707        &self,
708        node: &Node<'_, '_>,
709        complex_type: &ComplexType,
710        line: usize,
711    ) -> Result<(), ValidationError> {
712        let element_name = node.tag_name().name();
713
714        // Check required attributes
715        for attr_def in &complex_type.attributes {
716            if attr_def.required && node.attribute(attr_def.name.as_str()).is_none() {
717                return Err(ValidationError::RequiredAttributeMissing {
718                    element: element_name.to_string(),
719                    attribute: attr_def.name.clone(),
720                    line: Some(line),
721                });
722            }
723
724            // Validate attribute type if present
725            if let Some(value) = node.attribute(attr_def.name.as_str()) {
726                self.validate_simple_type(value, &attr_def.type_name)
727                    .map_err(|_| ValidationError::AttributeValidationError {
728                        element: element_name.to_string(),
729                        attribute: attr_def.name.clone(),
730                        message: format!("Expected type {}, found '{}'", attr_def.type_name, value),
731                        line: Some(line),
732                    })?;
733            }
734        }
735
736        Ok(())
737    }
738
739    /// Validate child elements against complex type sequence
740    fn validate_children_complex(
741        &self,
742        node: &Node<'_, '_>,
743        complex_type: &ComplexType,
744        line: usize,
745    ) -> Result<(), ValidationError> {
746        let children: Vec<_> = node.children().filter(|n| n.is_element()).collect();
747
748        // Validate each child element in sequence
749        for child in &children {
750            let child_name = child.tag_name().name();
751
752            // Find matching element in sequence
753            let schema_elem = complex_type
754                .sequence
755                .iter()
756                .find(|e| e.name == child_name)
757                .ok_or_else(|| ValidationError::UnknownElement {
758                    element: child_name.to_string(),
759                    line: Some(child.document().text_pos_at(child.range().start).row as usize),
760                })?;
761
762            self.validate_element(child, schema_elem)?;
763        }
764
765        // Validate cardinality for required elements
766        for elem_def in &complex_type.sequence {
767            let count = children
768                .iter()
769                .filter(|n| n.tag_name().name() == elem_def.name)
770                .count();
771
772            if count < elem_def.min_occurs {
773                return Err(ValidationError::CardinalityError {
774                    element: elem_def.name.clone(),
775                    min: elem_def.min_occurs,
776                    max: elem_def.max_occurs,
777                    actual: count,
778                    line: Some(line),
779                });
780            }
781
782            if let Some(max) = elem_def.max_occurs {
783                if count > max {
784                    return Err(ValidationError::CardinalityError {
785                        element: elem_def.name.clone(),
786                        min: elem_def.min_occurs,
787                        max: elem_def.max_occurs,
788                        actual: count,
789                        line: Some(line),
790                    });
791                }
792            }
793        }
794
795        Ok(())
796    }
797
798    /// Validate a simple type value
799    fn validate_simple_type(&self, value: &str, type_name: &str) -> Result<(), ()> {
800        match type_name {
801            "xs:string" | "string" => Ok(()),
802            "xs:integer" | "integer" => value.parse::<i64>().map(|_| ()).map_err(|_| ()),
803            "xs:decimal" | "decimal" => value.parse::<f64>().map(|_| ()).map_err(|_| ()),
804            "xs:boolean" | "boolean" => {
805                if ["true", "false", "1", "0"].contains(&value) {
806                    Ok(())
807                } else {
808                    Err(())
809                }
810            }
811            _ => Ok(()), // Unknown types pass for now
812        }
813    }
814}
815
816/// Thread-safe LRU cache for schema validators.
817///
818/// Caches parsed schemas to avoid re-parsing on every validation.
819/// Uses parking_lot RwLock for high-performance concurrent access.
820///
821/// # Example
822///
823/// ```text
824/// use hedl_xml::schema::SchemaCache;
825/// use std::path::Path;
826///
827/// let cache = SchemaCache::new(100);
828///
829/// // First call parses and caches
830/// let validator = cache.get_or_load(Path::new("schema.xsd"))?;
831///
832/// // Second call uses cached validator
833/// let validator2 = cache.get_or_load(Path::new("schema.xsd"))?;
834/// ```
835pub struct SchemaCache {
836    cache: Arc<RwLock<HashMap<PathBuf, Arc<SchemaValidator>>>>,
837    max_size: usize,
838}
839
840impl SchemaCache {
841    /// Create a new schema cache with maximum size.
842    ///
843    /// # Arguments
844    ///
845    /// * `max_size` - Maximum number of schemas to cache
846    ///
847    /// # Example
848    ///
849    /// ```rust
850    /// use hedl_xml::schema::SchemaCache;
851    ///
852    /// let cache = SchemaCache::new(50);
853    /// ```
854    pub fn new(max_size: usize) -> Self {
855        Self {
856            cache: Arc::new(RwLock::new(HashMap::new())),
857            max_size,
858        }
859    }
860
861    /// Get cached validator or load from file.
862    ///
863    /// If the schema is already cached, returns the cached validator.
864    /// Otherwise, loads the schema from file and caches it.
865    ///
866    /// # Arguments
867    ///
868    /// * `path` - Path to schema file
869    ///
870    /// # Errors
871    ///
872    /// Returns `ValidationError` if schema file cannot be loaded or parsed.
873    ///
874    /// # Example
875    ///
876    /// ```rust,no_run
877    /// use hedl_xml::schema::SchemaCache;
878    /// use std::path::Path;
879    ///
880    /// let cache = SchemaCache::new(10);
881    /// let validator = cache.get_or_load(Path::new("schema.xsd"))?;
882    /// # Ok::<(), Box<dyn std::error::Error>>(())
883    /// ```
884    pub fn get_or_load(&self, path: &Path) -> Result<Arc<SchemaValidator>, ValidationError> {
885        // Try read lock first
886        {
887            let cache = self.cache.read();
888            if let Some(validator) = cache.get(path) {
889                return Ok(Arc::clone(validator));
890            }
891        }
892
893        // Need to load - acquire write lock
894        let mut cache = self.cache.write();
895
896        // Double-check in case another thread loaded while we waited
897        if let Some(validator) = cache.get(path) {
898            return Ok(Arc::clone(validator));
899        }
900
901        // Load validator
902        let validator = Arc::new(SchemaValidator::from_file(path)?);
903
904        // Evict oldest entry if cache is full
905        if cache.len() >= self.max_size {
906            if let Some(oldest_key) = cache.keys().next().cloned() {
907                cache.remove(&oldest_key);
908            }
909        }
910
911        cache.insert(path.to_path_buf(), Arc::clone(&validator));
912
913        Ok(validator)
914    }
915
916    /// Clear all cached schemas.
917    ///
918    /// # Example
919    ///
920    /// ```rust
921    /// use hedl_xml::schema::SchemaCache;
922    ///
923    /// let cache = SchemaCache::new(10);
924    /// cache.clear();
925    /// ```
926    pub fn clear(&self) {
927        self.cache.write().clear();
928    }
929
930    /// Get number of cached schemas.
931    ///
932    /// # Example
933    ///
934    /// ```rust
935    /// use hedl_xml::schema::SchemaCache;
936    ///
937    /// let cache = SchemaCache::new(10);
938    /// assert_eq!(cache.size(), 0);
939    /// ```
940    pub fn size(&self) -> usize {
941        self.cache.read().len()
942    }
943}
944
945impl Default for SchemaCache {
946    /// Create default cache with size 100
947    fn default() -> Self {
948        Self::new(100)
949    }
950}
951
952#[cfg(test)]
953mod tests {
954    use super::*;
955
956    const SIMPLE_SCHEMA: &str = r#"<?xml version="1.0"?>
957<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
958  <xs:element name="person">
959    <xs:complexType>
960      <xs:sequence>
961        <xs:element name="name" type="xs:string"/>
962        <xs:element name="age" type="xs:integer"/>
963      </xs:sequence>
964    </xs:complexType>
965  </xs:element>
966</xs:schema>"#;
967
968    #[test]
969    fn test_schema_validator_creation() {
970        let validator = SchemaValidator::from_xsd(SIMPLE_SCHEMA);
971        assert!(validator.is_ok());
972    }
973
974    #[test]
975    fn test_valid_document() {
976        let validator = SchemaValidator::from_xsd(SIMPLE_SCHEMA).unwrap();
977
978        let xml = r#"<?xml version="1.0"?>
979<person>
980  <name>Alice</name>
981  <age>30</age>
982</person>"#;
983
984        assert!(validator.validate(xml).is_ok());
985    }
986
987    #[test]
988    fn test_invalid_type() {
989        let validator = SchemaValidator::from_xsd(SIMPLE_SCHEMA).unwrap();
990
991        let xml = r#"<?xml version="1.0"?>
992<person>
993  <name>Alice</name>
994  <age>thirty</age>
995</person>"#;
996
997        let result = validator.validate(xml);
998        assert!(result.is_err());
999
1000        if let Err(ValidationError::TypeValidationError {
1001            name,
1002            expected_type,
1003            value,
1004            ..
1005        }) = result
1006        {
1007            assert_eq!(name, "age");
1008            assert_eq!(expected_type, "xs:integer");
1009            assert_eq!(value, "thirty");
1010        } else {
1011            panic!("Expected TypeValidationError");
1012        }
1013    }
1014
1015    #[test]
1016    fn test_unknown_element() {
1017        let validator = SchemaValidator::from_xsd(SIMPLE_SCHEMA).unwrap();
1018
1019        let xml = r#"<?xml version="1.0"?>
1020<person>
1021  <name>Alice</name>
1022  <age>30</age>
1023  <email>alice@example.com</email>
1024</person>"#;
1025
1026        let result = validator.validate(xml);
1027        assert!(result.is_err());
1028
1029        if let Err(ValidationError::UnknownElement { element, .. }) = result {
1030            assert_eq!(element, "email");
1031        } else {
1032            panic!("Expected UnknownElement error");
1033        }
1034    }
1035
1036    #[test]
1037    fn test_malformed_xml() {
1038        let validator = SchemaValidator::from_xsd(SIMPLE_SCHEMA).unwrap();
1039
1040        let xml = r#"<?xml version="1.0"?>
1041<person>
1042  <name>Alice
1043  <age>30</age>
1044</person>"#;
1045
1046        let result = validator.validate(xml);
1047        assert!(result.is_err());
1048        assert!(matches!(
1049            result,
1050            Err(ValidationError::DocumentParseError { .. })
1051        ));
1052    }
1053
1054    #[test]
1055    fn test_schema_cache() {
1056        use std::io::Write;
1057        use tempfile::NamedTempFile;
1058
1059        let cache = SchemaCache::new(5);
1060        assert_eq!(cache.size(), 0);
1061
1062        // Create temporary schema file
1063        let mut temp_file = NamedTempFile::new().unwrap();
1064        temp_file.write_all(SIMPLE_SCHEMA.as_bytes()).unwrap();
1065        let path = temp_file.path();
1066
1067        // First load
1068        let validator1 = cache.get_or_load(path).unwrap();
1069        assert_eq!(cache.size(), 1);
1070
1071        // Second load should use cache
1072        let validator2 = cache.get_or_load(path).unwrap();
1073        assert_eq!(cache.size(), 1);
1074
1075        // Should be same instance
1076        assert!(Arc::ptr_eq(&validator1, &validator2));
1077
1078        // Clear cache
1079        cache.clear();
1080        assert_eq!(cache.size(), 0);
1081    }
1082
1083    #[test]
1084    fn test_cache_eviction() {
1085        use std::io::Write;
1086        use tempfile::NamedTempFile;
1087
1088        let cache = SchemaCache::new(2);
1089
1090        // Create 3 temporary schema files
1091        let mut files = vec![];
1092        for _ in 0..3 {
1093            let mut temp_file = NamedTempFile::new().unwrap();
1094            temp_file.write_all(SIMPLE_SCHEMA.as_bytes()).unwrap();
1095            files.push(temp_file);
1096        }
1097
1098        // Load first two - should be cached
1099        cache.get_or_load(files[0].path()).unwrap();
1100        cache.get_or_load(files[1].path()).unwrap();
1101        assert_eq!(cache.size(), 2);
1102
1103        // Load third - should evict oldest
1104        cache.get_or_load(files[2].path()).unwrap();
1105        assert_eq!(cache.size(), 2);
1106    }
1107
1108    #[test]
1109    fn test_error_display() {
1110        let err = ValidationError::TypeValidationError {
1111            name: "age".to_string(),
1112            expected_type: "xs:integer".to_string(),
1113            value: "thirty".to_string(),
1114            line: Some(5),
1115        };
1116
1117        let display = err.to_string();
1118        assert!(display.contains("age"));
1119        assert!(display.contains("xs:integer"));
1120        assert!(display.contains("thirty"));
1121        assert!(display.contains("line 5"));
1122    }
1123
1124    #[test]
1125    fn test_schema_not_found() {
1126        let result = SchemaValidator::from_file(Path::new("/nonexistent/schema.xsd"));
1127        assert!(result.is_err());
1128        assert!(matches!(
1129            result,
1130            Err(ValidationError::SchemaNotFound { .. })
1131        ));
1132    }
1133
1134    #[test]
1135    fn test_invalid_schema() {
1136        let invalid_schema = r#"<?xml version="1.0"?>
1137<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
1138  <xs:element name="broken" type="nonexistent:type"/>
1139</xs:schema>"#;
1140
1141        let _result = SchemaValidator::from_xsd(invalid_schema);
1142        // Schema parser is permissive - unknown types are allowed
1143        // Validation will happen at runtime when validating documents
1144    }
1145
1146    #[test]
1147    fn test_boolean_type_validation() {
1148        let schema = r#"<?xml version="1.0"?>
1149<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
1150  <xs:element name="flag" type="xs:boolean"/>
1151</xs:schema>"#;
1152
1153        let validator = SchemaValidator::from_xsd(schema).unwrap();
1154
1155        // Valid boolean values
1156        for val in &["true", "false", "1", "0"] {
1157            let xml = format!(r#"<?xml version="1.0"?><flag>{}</flag>"#, val);
1158            assert!(validator.validate(&xml).is_ok());
1159        }
1160
1161        // Invalid boolean value
1162        let xml = r#"<?xml version="1.0"?><flag>yes</flag>"#;
1163        assert!(validator.validate(xml).is_err());
1164    }
1165
1166    #[test]
1167    fn test_decimal_type_validation() {
1168        let schema = r#"<?xml version="1.0"?>
1169<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
1170  <xs:element name="price" type="xs:decimal"/>
1171</xs:schema>"#;
1172
1173        let validator = SchemaValidator::from_xsd(schema).unwrap();
1174
1175        // Valid decimal
1176        let xml = r#"<?xml version="1.0"?><price>19.99</price>"#;
1177        assert!(validator.validate(xml).is_ok());
1178
1179        // Invalid decimal
1180        let xml = r#"<?xml version="1.0"?><price>not a number</price>"#;
1181        assert!(validator.validate(xml).is_err());
1182    }
1183
1184    #[test]
1185    fn test_concurrent_cache_access() {
1186        use std::io::Write;
1187        use std::sync::Arc;
1188        use std::thread;
1189        use tempfile::NamedTempFile;
1190
1191        let cache = Arc::new(SchemaCache::new(10));
1192
1193        // Create temporary schema file
1194        let mut temp_file = NamedTempFile::new().unwrap();
1195        temp_file.write_all(SIMPLE_SCHEMA.as_bytes()).unwrap();
1196        let path = temp_file.path().to_path_buf();
1197
1198        // Spawn multiple threads accessing cache concurrently
1199        let mut handles = vec![];
1200        for _ in 0..10 {
1201            let cache_clone = Arc::clone(&cache);
1202            let path_clone = path.clone();
1203            let handle = thread::spawn(move || {
1204                for _ in 0..100 {
1205                    let _validator = cache_clone.get_or_load(&path_clone).unwrap();
1206                }
1207            });
1208            handles.push(handle);
1209        }
1210
1211        // Wait for all threads
1212        for handle in handles {
1213            handle.join().unwrap();
1214        }
1215
1216        // Should only have cached once
1217        assert_eq!(cache.size(), 1);
1218    }
1219}