index-security 1.0.0

Security policy checks for hostile Index inputs.
Documentation
//! Security policy checks for hostile inputs.
//!
//! This crate contains deterministic guards shared by fetchers, CLI entry
//! points, and future decoders. It performs no network IO and parses no HTML.

use std::collections::BTreeSet;
use std::fmt::{Display, Formatter};

use index_core::IndexUrl;

/// Default maximum input body size accepted by local entry points.
pub const DEFAULT_MAX_CONTENT_BYTES: usize = 2 * 1024 * 1024;

/// Default maximum decompressed body size accepted by future decoders.
pub const DEFAULT_MAX_DECOMPRESSED_BYTES: usize = 8 * 1024 * 1024;

/// Default maximum decompression expansion ratio.
pub const DEFAULT_MAX_DECOMPRESSION_RATIO: usize = 20;

/// Default maximum redirect hops.
pub const DEFAULT_MAX_REDIRECTS: usize = 10;

/// Security limits for hostile inputs.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct ContentLimits {
    /// Maximum fetched or local body size.
    pub max_content_bytes: usize,
    /// Maximum decoded body size after decompression.
    pub max_decompressed_bytes: usize,
    /// Maximum decompression expansion ratio.
    pub max_decompression_ratio: usize,
    /// Maximum redirect hops.
    pub max_redirects: usize,
}

impl ContentLimits {
    /// Creates limits with explicit values.
    #[must_use]
    pub const fn new(
        max_content_bytes: usize,
        max_decompressed_bytes: usize,
        max_decompression_ratio: usize,
        max_redirects: usize,
    ) -> Self {
        Self {
            max_content_bytes,
            max_decompressed_bytes,
            max_decompression_ratio,
            max_redirects,
        }
    }
}

impl Default for ContentLimits {
    fn default() -> Self {
        Self {
            max_content_bytes: DEFAULT_MAX_CONTENT_BYTES,
            max_decompressed_bytes: DEFAULT_MAX_DECOMPRESSED_BYTES,
            max_decompression_ratio: DEFAULT_MAX_DECOMPRESSION_RATIO,
            max_redirects: DEFAULT_MAX_REDIRECTS,
        }
    }
}

/// Security policy errors.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum SecurityError {
    /// Input body exceeded the configured size limit.
    ContentTooLarge {
        /// Actual bytes observed.
        actual_bytes: usize,
        /// Limit in bytes.
        limit_bytes: usize,
    },
    /// Decoded body exceeded decompression limits.
    DecompressionBomb {
        /// Compressed byte count.
        compressed_bytes: usize,
        /// Decompressed byte count.
        decompressed_bytes: usize,
        /// Maximum permitted ratio.
        ratio_limit: usize,
    },
    /// Redirect chain contains a repeated URL.
    RedirectLoop {
        /// Repeated URL.
        url: IndexUrl,
    },
    /// Redirect chain exceeded the configured hop limit.
    TooManyRedirects {
        /// Redirect hop count.
        redirects: usize,
        /// Redirect hop limit.
        limit: usize,
    },
}

impl Display for SecurityError {
    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
        match self {
            Self::ContentTooLarge {
                actual_bytes,
                limit_bytes,
            } => write!(
                f,
                "content size limit exceeded: {actual_bytes} bytes is greater than {limit_bytes} bytes"
            ),
            Self::DecompressionBomb {
                compressed_bytes,
                decompressed_bytes,
                ratio_limit,
            } => write!(
                f,
                "decompression limit exceeded: {compressed_bytes} bytes expanded to {decompressed_bytes} bytes over ratio {ratio_limit}"
            ),
            Self::RedirectLoop { url } => write!(f, "redirect loop detected at {url}"),
            Self::TooManyRedirects { redirects, limit } => {
                write!(f, "too many redirects: {redirects} exceeds limit {limit}")
            }
        }
    }
}

impl std::error::Error for SecurityError {}

/// Validates raw content size.
pub fn check_content_size(input: &str, limits: ContentLimits) -> Result<(), SecurityError> {
    let actual_bytes = input.len();
    if actual_bytes > limits.max_content_bytes {
        Err(SecurityError::ContentTooLarge {
            actual_bytes,
            limit_bytes: limits.max_content_bytes,
        })
    } else {
        Ok(())
    }
}

/// Validates decompression expansion before decoded content is accepted.
pub fn check_decompression_size(
    compressed_bytes: usize,
    decompressed_bytes: usize,
    limits: ContentLimits,
) -> Result<(), SecurityError> {
    let expanded_too_large = decompressed_bytes > limits.max_decompressed_bytes;
    let ratio_too_large = compressed_bytes > 0
        && decompressed_bytes / compressed_bytes > limits.max_decompression_ratio;

    if expanded_too_large || ratio_too_large {
        Err(SecurityError::DecompressionBomb {
            compressed_bytes,
            decompressed_bytes,
            ratio_limit: limits.max_decompression_ratio,
        })
    } else {
        Ok(())
    }
}

/// Validates a redirect chain for loops and hop count.
pub fn validate_redirect_chain(
    requested_url: &IndexUrl,
    redirects: &[IndexUrl],
    final_url: &IndexUrl,
    limits: ContentLimits,
) -> Result<(), SecurityError> {
    if redirects.len() > limits.max_redirects {
        return Err(SecurityError::TooManyRedirects {
            redirects: redirects.len(),
            limit: limits.max_redirects,
        });
    }

    let mut seen = BTreeSet::new();
    for url in std::iter::once(requested_url).chain(redirects.iter()) {
        if !seen.insert(url.as_str().to_owned()) {
            return Err(SecurityError::RedirectLoop { url: url.clone() });
        }
    }
    if !redirects.is_empty() && seen.contains(final_url.as_str()) {
        return Err(SecurityError::RedirectLoop {
            url: final_url.clone(),
        });
    }

    Ok(())
}

#[cfg(test)]
mod tests {
    use index_core::{IndexUrl, UrlError};

    use super::{
        ContentLimits, SecurityError, check_content_size, check_decompression_size,
        validate_redirect_chain,
    };

    #[test]
    fn content_size_limit_rejects_large_input() {
        let limits = ContentLimits::new(4, 100, 20, 10);
        assert_eq!(
            check_content_size("12345", limits),
            Err(SecurityError::ContentTooLarge {
                actual_bytes: 5,
                limit_bytes: 4
            })
        );
    }

    #[test]
    fn content_size_limit_allows_input_at_or_below_limit() {
        let limits = ContentLimits::new(5, 100, 20, 10);
        assert_eq!(check_content_size("12345", limits), Ok(()));
        assert_eq!(check_content_size("1234", limits), Ok(()));
    }

    #[test]
    fn decompression_limit_rejects_large_expansion() {
        let limits = ContentLimits::new(100, 100, 5, 10);
        assert_eq!(
            check_decompression_size(10, 101, limits),
            Err(SecurityError::DecompressionBomb {
                compressed_bytes: 10,
                decompressed_bytes: 101,
                ratio_limit: 5
            })
        );
    }

    #[test]
    fn decompression_limit_rejects_high_ratio_and_allows_zero_compressed_bytes() {
        let limits = ContentLimits::new(100, 1_000, 2, 10);
        assert_eq!(
            check_decompression_size(10, 30, limits),
            Err(SecurityError::DecompressionBomb {
                compressed_bytes: 10,
                decompressed_bytes: 30,
                ratio_limit: 2
            })
        );
        assert_eq!(check_decompression_size(0, 999, limits), Ok(()));
    }

    #[test]
    fn redirect_chain_detects_loop() -> Result<(), Box<dyn std::error::Error>> {
        let requested = IndexUrl::parse("https://example.com/start")?;
        let hop = IndexUrl::parse("https://example.com/hop")?;
        let result = validate_redirect_chain(
            &requested,
            std::slice::from_ref(&hop),
            &hop,
            ContentLimits::default(),
        );

        assert_eq!(result, Err(SecurityError::RedirectLoop { url: hop }));
        Ok(())
    }

    #[test]
    fn redirect_chain_allows_non_redirected_final_url() -> Result<(), Box<dyn std::error::Error>> {
        let requested = IndexUrl::parse("https://example.com/start")?;
        assert_eq!(
            validate_redirect_chain(&requested, &[], &requested, ContentLimits::default()),
            Ok(())
        );
        Ok(())
    }

    #[test]
    fn redirect_chain_rejects_too_many_hops() -> Result<(), Box<dyn std::error::Error>> {
        let requested = IndexUrl::parse("https://example.com/start")?;
        let final_url = IndexUrl::parse("https://example.com/final")?;
        let redirects = vec![
            IndexUrl::parse("https://example.com/1")?,
            IndexUrl::parse("https://example.com/2")?,
        ];
        let limits = ContentLimits::new(100, 100, 5, 1);

        assert_eq!(
            validate_redirect_chain(&requested, &redirects, &final_url, limits),
            Err(SecurityError::TooManyRedirects {
                redirects: 2,
                limit: 1
            })
        );
        Ok(())
    }

    #[test]
    fn redirect_chain_rejects_duplicate_url_in_hops() -> Result<(), Box<dyn std::error::Error>> {
        let requested = IndexUrl::parse("https://example.com/start")?;
        let repeated = IndexUrl::parse("https://example.com/hop")?;
        let final_url = IndexUrl::parse("https://example.com/final")?;
        let redirects = vec![repeated.clone(), repeated.clone()];

        assert_eq!(
            validate_redirect_chain(&requested, &redirects, &final_url, ContentLimits::default()),
            Err(SecurityError::RedirectLoop { url: repeated })
        );
        Ok(())
    }

    #[test]
    fn security_error_display_messages_are_stable() -> Result<(), Box<dyn std::error::Error>> {
        let url = IndexUrl::parse("https://example.com/loop")?;
        let cases = vec![
            (
                SecurityError::ContentTooLarge {
                    actual_bytes: 9,
                    limit_bytes: 4,
                },
                "content size limit exceeded: 9 bytes is greater than 4 bytes",
            ),
            (
                SecurityError::DecompressionBomb {
                    compressed_bytes: 10,
                    decompressed_bytes: 101,
                    ratio_limit: 5,
                },
                "decompression limit exceeded: 10 bytes expanded to 101 bytes over ratio 5",
            ),
            (
                SecurityError::RedirectLoop { url },
                "redirect loop detected at https://example.com/loop",
            ),
            (
                SecurityError::TooManyRedirects {
                    redirects: 11,
                    limit: 10,
                },
                "too many redirects: 11 exceeds limit 10",
            ),
        ];

        for (error, expected) in cases {
            assert_eq!(error.to_string(), expected);
        }

        Ok(())
    }

    #[test]
    fn unsafe_scheme_rejection_remains_centralized() {
        assert_eq!(
            IndexUrl::parse("data:text/html,hello"),
            Err(UrlError::DisallowedScheme("data".to_owned()))
        );
    }
}