xdoc-rs 0.1.1

Declarative XML engine for Rust
Documentation
//! Security limits and safe defaults shared by higher-level modules.
//!
//! Defaults are intentionally conservative for an in-memory XML engine:
//! external entities, network access, and filesystem access are disabled unless
//! a future caller opts into a different policy explicitly.

use crate::core::{ErrorKind, XmlError, XmlResult};

pub const DEFAULT_MAX_DOCUMENT_BYTES: usize = 10 * 1024 * 1024;
pub const DEFAULT_MAX_TEXT_BYTES: usize = 1024 * 1024;
pub const DEFAULT_MAX_DEPTH: usize = 128;
pub const DEFAULT_MAX_NODES: usize = 100_000;
pub const DEFAULT_MAX_QUERY_STEPS: usize = 100_000;
pub const DEFAULT_MAX_TRANSFORM_EXPANSION: usize = 100_000;

/// Shared resource limits for parser, query, transform, and signature modules.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct SecurityLimits {
    max_document_bytes: usize,
    max_text_bytes: usize,
    max_depth: usize,
    max_nodes: usize,
    max_query_steps: usize,
    max_transform_expansion: usize,
}

impl SecurityLimits {
    pub fn new() -> Self {
        Self::default()
    }

    pub fn with_max_document_bytes(mut self, limit: usize) -> Self {
        self.max_document_bytes = limit;
        self
    }

    pub fn with_max_text_bytes(mut self, limit: usize) -> Self {
        self.max_text_bytes = limit;
        self
    }

    pub fn with_max_depth(mut self, limit: usize) -> Self {
        self.max_depth = limit;
        self
    }

    pub fn with_max_nodes(mut self, limit: usize) -> Self {
        self.max_nodes = limit;
        self
    }

    pub fn with_max_query_steps(mut self, limit: usize) -> Self {
        self.max_query_steps = limit;
        self
    }

    pub fn with_max_transform_expansion(mut self, limit: usize) -> Self {
        self.max_transform_expansion = limit;
        self
    }

    pub fn max_document_bytes(&self) -> usize {
        self.max_document_bytes
    }

    pub fn max_text_bytes(&self) -> usize {
        self.max_text_bytes
    }

    pub fn max_depth(&self) -> usize {
        self.max_depth
    }

    pub fn max_nodes(&self) -> usize {
        self.max_nodes
    }

    pub fn max_query_steps(&self) -> usize {
        self.max_query_steps
    }

    pub fn max_transform_expansion(&self) -> usize {
        self.max_transform_expansion
    }

    pub fn check_document_size(&self, bytes: usize) -> XmlResult<()> {
        if bytes > self.max_document_bytes {
            return Err(limit_error(format!(
                "XML document exceeds maximum size of {} bytes",
                self.max_document_bytes
            )));
        }
        Ok(())
    }

    pub fn check_text_size(&self, bytes: usize) -> XmlResult<()> {
        if bytes > self.max_text_bytes {
            return Err(limit_error(format!(
                "XML text exceeds maximum of {} bytes",
                self.max_text_bytes
            )));
        }
        Ok(())
    }

    pub fn check_depth(&self, depth: usize) -> XmlResult<()> {
        if depth > self.max_depth {
            return Err(limit_error(format!(
                "XML depth exceeds maximum of {}",
                self.max_depth
            )));
        }
        Ok(())
    }

    pub fn check_nodes(&self, nodes: usize) -> XmlResult<()> {
        if nodes > self.max_nodes {
            return Err(limit_error(format!(
                "XML node count exceeds maximum of {}",
                self.max_nodes
            )));
        }
        Ok(())
    }

    pub fn check_query_steps(&self, steps: usize) -> XmlResult<()> {
        if steps > self.max_query_steps {
            return Err(limit_error(format!(
                "query step count exceeds maximum of {}",
                self.max_query_steps
            )));
        }
        Ok(())
    }

    pub fn check_transform_expansion(&self, expansions: usize) -> XmlResult<()> {
        if expansions > self.max_transform_expansion {
            return Err(limit_error(format!(
                "transform expansion exceeds maximum of {}",
                self.max_transform_expansion
            )));
        }
        Ok(())
    }
}

impl Default for SecurityLimits {
    fn default() -> Self {
        Self {
            max_document_bytes: DEFAULT_MAX_DOCUMENT_BYTES,
            max_text_bytes: DEFAULT_MAX_TEXT_BYTES,
            max_depth: DEFAULT_MAX_DEPTH,
            max_nodes: DEFAULT_MAX_NODES,
            max_query_steps: DEFAULT_MAX_QUERY_STEPS,
            max_transform_expansion: DEFAULT_MAX_TRANSFORM_EXPANSION,
        }
    }
}

/// Controls entity and external resource handling.
#[derive(Debug, Clone, PartialEq, Eq, Default)]
pub struct EntityPolicy {
    allow_external_entities: bool,
    allow_network: bool,
    allow_filesystem: bool,
}

impl EntityPolicy {
    pub fn secure() -> Self {
        Self::default()
    }

    pub fn with_external_entities(mut self, allow: bool) -> Self {
        self.allow_external_entities = allow;
        self
    }

    pub fn with_network(mut self, allow: bool) -> Self {
        self.allow_network = allow;
        self
    }

    pub fn with_filesystem(mut self, allow: bool) -> Self {
        self.allow_filesystem = allow;
        self
    }

    pub fn external_entities_allowed(&self) -> bool {
        self.allow_external_entities
    }

    pub fn network_allowed(&self) -> bool {
        self.allow_network
    }

    pub fn filesystem_allowed(&self) -> bool {
        self.allow_filesystem
    }

    pub fn reject_external_entity(&self, entity: &str) -> XmlResult<()> {
        if self.allow_external_entities {
            return Ok(());
        }
        Err(XmlError::new(
            ErrorKind::Parse,
            format!("entity reference `&{entity};` is disabled by default"),
        ))
    }

    pub fn reject_doctype(&self) -> XmlResult<()> {
        if self.allow_external_entities {
            return Ok(());
        }
        Err(XmlError::new(
            ErrorKind::Parse,
            "DOCTYPE declarations are disabled by default",
        ))
    }
}

#[derive(Debug, Clone, PartialEq, Eq, Default)]
pub struct ParserSecurityConfig {
    limits: SecurityLimits,
    entity_policy: EntityPolicy,
}

impl ParserSecurityConfig {
    pub fn new() -> Self {
        Self::default()
    }

    pub fn with_limits(mut self, limits: SecurityLimits) -> Self {
        self.limits = limits;
        self
    }

    pub fn with_entity_policy(mut self, policy: EntityPolicy) -> Self {
        self.entity_policy = policy;
        self
    }

    pub fn limits(&self) -> &SecurityLimits {
        &self.limits
    }

    pub fn entity_policy(&self) -> &EntityPolicy {
        &self.entity_policy
    }
}

#[derive(Debug, Clone, PartialEq, Eq, Default)]
pub struct QuerySecurityConfig {
    limits: SecurityLimits,
}

impl QuerySecurityConfig {
    pub fn new() -> Self {
        Self::default()
    }

    pub fn with_limits(mut self, limits: SecurityLimits) -> Self {
        self.limits = limits;
        self
    }

    pub fn limits(&self) -> &SecurityLimits {
        &self.limits
    }

    pub fn check_steps(&self, steps: usize) -> XmlResult<()> {
        self.limits.check_query_steps(steps)
    }
}

#[derive(Debug, Clone, PartialEq, Eq, Default)]
pub struct TransformSecurityConfig {
    limits: SecurityLimits,
}

impl TransformSecurityConfig {
    pub fn new() -> Self {
        Self::default()
    }

    pub fn with_limits(mut self, limits: SecurityLimits) -> Self {
        self.limits = limits;
        self
    }

    pub fn limits(&self) -> &SecurityLimits {
        &self.limits
    }

    pub fn check_expansion(&self, expansions: usize) -> XmlResult<()> {
        self.limits.check_transform_expansion(expansions)
    }
}

fn limit_error(message: String) -> XmlError {
    XmlError::new(ErrorKind::Parse, message)
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn security_defaults_are_safe() {
        let parser = ParserSecurityConfig::default();

        assert!(!parser.entity_policy().external_entities_allowed());
        assert!(!parser.entity_policy().network_allowed());
        assert!(!parser.entity_policy().filesystem_allowed());
        assert_eq!(parser.limits().max_depth(), DEFAULT_MAX_DEPTH);
        assert_eq!(
            parser.limits().max_document_bytes(),
            DEFAULT_MAX_DOCUMENT_BYTES
        );
    }

    #[test]
    fn security_limits_reject_depth_size_and_nodes() {
        let limits = SecurityLimits::default()
            .with_max_depth(1)
            .with_max_document_bytes(4)
            .with_max_text_bytes(2)
            .with_max_nodes(1);

        assert!(limits.check_depth(2).is_err());
        assert!(limits.check_document_size(5).is_err());
        assert!(limits.check_text_size(3).is_err());
        assert!(limits.check_nodes(2).is_err());
    }

    #[test]
    fn security_entity_policy_blocks_external_entities_by_default() {
        let policy = EntityPolicy::default();

        assert_eq!(
            policy.reject_external_entity("xxe").unwrap_err().kind(),
            &ErrorKind::Parse
        );
        assert_eq!(
            policy.reject_doctype().unwrap_err().kind(),
            &ErrorKind::Parse
        );
    }

    #[test]
    fn security_query_has_step_limit() {
        let config = QuerySecurityConfig::default()
            .with_limits(SecurityLimits::default().with_max_query_steps(1));

        assert!(config.check_steps(2).is_err());
    }

    #[test]
    fn security_transform_rejects_expansion_excess() {
        let config = TransformSecurityConfig::default()
            .with_limits(SecurityLimits::default().with_max_transform_expansion(1));

        assert!(config.check_expansion(2).is_err());
    }
}