fastxml 0.8.1

A fast, memory-efficient XML library with XPath and XSD validation support
Documentation
//! One-pass streaming schema validator implementation.

mod content;
mod event_handler;
mod lookup;
mod occurrence;

use std::io::BufRead;
use std::sync::Arc;

use crate::error::{Result, StructuredError, ValidationErrorType};
use crate::event::StreamingParser;
use crate::schema::types::CompiledSchema;
use crate::schema::xsd::constraints::ConstraintValidator;

use super::ValidationMode;
use super::state::ValidationState;

/// Options for controlling which validations are performed.
///
/// By default, all validations are enabled. Disabling specific validations
/// can significantly improve performance for large documents.
#[derive(Debug, Clone, Default)]
pub struct ValidationOptions {
    /// Skip minOccurs validation (required child element checks).
    /// Disabling this can improve performance but may miss missing required elements.
    pub skip_min_occurs: bool,

    /// Skip maxOccurs validation (element count limit checks).
    /// Disabling this can significantly improve performance (~50%) but may miss
    /// element count violations.
    pub skip_max_occurs: bool,

    /// Skip substitution group resolution in occurs validation.
    /// Disabling this can improve performance but may cause false positives/negatives
    /// for elements that use substitution groups.
    pub skip_substitution_groups: bool,
}

/// One-pass streaming schema validator.
///
/// Validates XML documents against an XSD schema during streaming parsing
/// in a single pass. Best for memory-constrained environments or non-seekable streams.
pub struct OnePassSchemaValidator {
    pub(crate) schema: Arc<CompiledSchema>,
    pub(crate) state: ValidationState,
    pub(crate) errors: Vec<StructuredError>,
    pub(crate) current_line: Option<usize>,
    pub(crate) current_column: Option<usize>,
    /// Constraint validator for identity constraints (unique, key, keyref)
    pub(crate) constraint_validator: ConstraintValidator,
    /// Validation mode (strict or lenient)
    pub(crate) mode: ValidationMode,
    /// Maximum number of errors to collect (0 = unlimited)
    pub(crate) max_errors: usize,
    /// Options for controlling which validations are performed
    pub(crate) options: ValidationOptions,
}

impl OnePassSchemaValidator {
    /// Creates a new one-pass validator in strict mode.
    pub fn new(schema: Arc<CompiledSchema>) -> Self {
        Self {
            schema,
            state: ValidationState::new(),
            errors: Vec::new(),
            current_line: None,
            current_column: Some(1),
            constraint_validator: ConstraintValidator::new(),
            mode: ValidationMode::Strict,
            max_errors: 0,
            options: ValidationOptions::default(),
        }
    }

    /// Creates a new streaming validator with specified mode.
    #[deprecated(
        since = "0.3.0",
        note = "use OnePassSchemaValidator::new(schema).set_mode(mode) instead"
    )]
    pub fn with_mode(schema: Arc<CompiledSchema>, mode: ValidationMode) -> Self {
        Self {
            mode,
            ..Self::new(schema)
        }
    }

    /// Creates a new streaming validator with specified options.
    #[deprecated(
        since = "0.3.0",
        note = "use OnePassSchemaValidator::new(schema).set_options(options) instead"
    )]
    pub fn with_options(schema: Arc<CompiledSchema>, options: ValidationOptions) -> Self {
        Self {
            options,
            ..Self::new(schema)
        }
    }

    /// Sets the validation mode (builder pattern).
    pub fn set_mode(mut self, mode: ValidationMode) -> Self {
        self.mode = mode;
        self
    }

    /// Sets validation options.
    pub fn set_options(&mut self, options: ValidationOptions) {
        self.options = options;
    }

    /// Returns the current validation options.
    pub fn options(&self) -> &ValidationOptions {
        &self.options
    }

    /// Sets the maximum number of errors to collect (builder pattern).
    ///
    /// Set to 0 for unlimited errors (default).
    pub fn with_max_errors(mut self, max: usize) -> Self {
        self.max_errors = max;
        self
    }

    /// Sets the maximum number of errors to collect (setter pattern).
    ///
    /// Set to 0 for unlimited errors (default).
    pub fn set_max_errors(&mut self, max: usize) {
        self.max_errors = max;
    }

    /// Validates an XML document from a reader and returns validation errors.
    ///
    /// This is a convenience method that internally creates a `StreamingParser`,
    /// runs validation, and returns the collected errors.
    ///
    /// # Example
    ///
    /// ```ignore
    /// use std::fs::File;
    /// use std::io::BufReader;
    /// use std::sync::Arc;
    /// use fastxml::schema::validator::OnePassSchemaValidator;
    ///
    /// let file = File::open("document.xml")?;
    /// let reader = BufReader::new(file);
    ///
    /// let errors = OnePassSchemaValidator::new(schema)
    ///     .with_max_errors(100)
    ///     .validate(reader)?;
    /// ```
    pub fn validate<R: BufRead>(self, reader: R) -> Result<Vec<StructuredError>> {
        let mut parser = StreamingParser::new(reader);
        parser.add_handler(Box::new(self));
        parser.parse()?;

        // Extract the validator from the parser to get errors
        let handlers = parser.into_handlers();
        for handler in handlers {
            if let Ok(validator) = handler.as_any().downcast::<OnePassSchemaValidator>() {
                return Ok(validator.into_errors());
            }
        }

        // Fallback: return empty errors (should not happen)
        Ok(Vec::new())
    }

    /// Returns collected validation errors.
    pub fn errors(&self) -> &[StructuredError] {
        &self.errors
    }

    /// Returns only errors (excludes warnings).
    pub fn errors_only(&self) -> Vec<&StructuredError> {
        self.errors.iter().filter(|e| e.is_error()).collect()
    }

    /// Returns only warnings.
    pub fn warnings(&self) -> Vec<&StructuredError> {
        self.errors.iter().filter(|e| e.is_warning()).collect()
    }

    /// Takes ownership of collected errors.
    pub fn into_errors(self) -> Vec<StructuredError> {
        self.errors
    }

    /// Returns true if validation passed without errors (warnings are OK).
    pub fn is_valid(&self) -> bool {
        !self.errors.iter().any(|e| e.is_error())
    }

    /// Returns true if there are no errors or warnings.
    pub fn is_clean(&self) -> bool {
        self.errors.is_empty()
    }

    /// Returns the error count (excluding warnings).
    pub fn error_count(&self) -> usize {
        self.errors.iter().filter(|e| e.is_error()).count()
    }

    /// Returns the warning count.
    pub fn warning_count(&self) -> usize {
        self.errors.iter().filter(|e| e.is_warning()).count()
    }

    pub(crate) fn should_collect_more(&self) -> bool {
        self.max_errors == 0 || self.errors.len() < self.max_errors
    }

    pub(crate) fn add_error(&mut self, error: StructuredError) {
        if self.should_collect_more() {
            self.errors.push(error);
        }
    }

    pub(crate) fn make_error(
        &self,
        error_type: ValidationErrorType,
        message: impl Into<String>,
    ) -> StructuredError {
        let mut error = StructuredError::new(message, error_type);
        if let Some(line) = self.current_line {
            error = error.with_line(line);
        }
        if let Some(column) = self.current_column {
            error = error.with_column(column);
        }
        error = error.with_element_path(self.state.element_path());
        error
    }
}

/// Alias for [`OnePassSchemaValidator`].
///
/// This provides a convenient name for the recommended streaming validator.
pub type StreamValidator = OnePassSchemaValidator;

#[cfg(test)]
mod tests;