Skip to main content

index_security/
lib.rs

1//! Security policy checks for hostile inputs.
2//!
3//! This crate contains deterministic guards shared by fetchers, CLI entry
4//! points, and future decoders. It performs no network IO and parses no HTML.
5
6use std::collections::BTreeSet;
7use std::fmt::{Display, Formatter};
8
9use index_core::IndexUrl;
10
11/// Default maximum input body size accepted by local entry points.
12pub const DEFAULT_MAX_CONTENT_BYTES: usize = 2 * 1024 * 1024;
13
14/// Default maximum decompressed body size accepted by future decoders.
15pub const DEFAULT_MAX_DECOMPRESSED_BYTES: usize = 8 * 1024 * 1024;
16
17/// Default maximum decompression expansion ratio.
18pub const DEFAULT_MAX_DECOMPRESSION_RATIO: usize = 20;
19
20/// Default maximum redirect hops.
21pub const DEFAULT_MAX_REDIRECTS: usize = 10;
22
23/// Security limits for hostile inputs.
24#[derive(Debug, Clone, Copy, PartialEq, Eq)]
25pub struct ContentLimits {
26    /// Maximum fetched or local body size.
27    pub max_content_bytes: usize,
28    /// Maximum decoded body size after decompression.
29    pub max_decompressed_bytes: usize,
30    /// Maximum decompression expansion ratio.
31    pub max_decompression_ratio: usize,
32    /// Maximum redirect hops.
33    pub max_redirects: usize,
34}
35
36impl ContentLimits {
37    /// Creates limits with explicit values.
38    #[must_use]
39    pub const fn new(
40        max_content_bytes: usize,
41        max_decompressed_bytes: usize,
42        max_decompression_ratio: usize,
43        max_redirects: usize,
44    ) -> Self {
45        Self {
46            max_content_bytes,
47            max_decompressed_bytes,
48            max_decompression_ratio,
49            max_redirects,
50        }
51    }
52}
53
54impl Default for ContentLimits {
55    fn default() -> Self {
56        Self {
57            max_content_bytes: DEFAULT_MAX_CONTENT_BYTES,
58            max_decompressed_bytes: DEFAULT_MAX_DECOMPRESSED_BYTES,
59            max_decompression_ratio: DEFAULT_MAX_DECOMPRESSION_RATIO,
60            max_redirects: DEFAULT_MAX_REDIRECTS,
61        }
62    }
63}
64
65/// Security policy errors.
66#[derive(Debug, Clone, PartialEq, Eq)]
67pub enum SecurityError {
68    /// Input body exceeded the configured size limit.
69    ContentTooLarge {
70        /// Actual bytes observed.
71        actual_bytes: usize,
72        /// Limit in bytes.
73        limit_bytes: usize,
74    },
75    /// Decoded body exceeded decompression limits.
76    DecompressionBomb {
77        /// Compressed byte count.
78        compressed_bytes: usize,
79        /// Decompressed byte count.
80        decompressed_bytes: usize,
81        /// Maximum permitted ratio.
82        ratio_limit: usize,
83    },
84    /// Redirect chain contains a repeated URL.
85    RedirectLoop {
86        /// Repeated URL.
87        url: IndexUrl,
88    },
89    /// Redirect chain exceeded the configured hop limit.
90    TooManyRedirects {
91        /// Redirect hop count.
92        redirects: usize,
93        /// Redirect hop limit.
94        limit: usize,
95    },
96}
97
98impl Display for SecurityError {
99    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
100        match self {
101            Self::ContentTooLarge {
102                actual_bytes,
103                limit_bytes,
104            } => write!(
105                f,
106                "content size limit exceeded: {actual_bytes} bytes is greater than {limit_bytes} bytes"
107            ),
108            Self::DecompressionBomb {
109                compressed_bytes,
110                decompressed_bytes,
111                ratio_limit,
112            } => write!(
113                f,
114                "decompression limit exceeded: {compressed_bytes} bytes expanded to {decompressed_bytes} bytes over ratio {ratio_limit}"
115            ),
116            Self::RedirectLoop { url } => write!(f, "redirect loop detected at {url}"),
117            Self::TooManyRedirects { redirects, limit } => {
118                write!(f, "too many redirects: {redirects} exceeds limit {limit}")
119            }
120        }
121    }
122}
123
124impl std::error::Error for SecurityError {}
125
126/// Validates raw content size.
127pub fn check_content_size(input: &str, limits: ContentLimits) -> Result<(), SecurityError> {
128    let actual_bytes = input.len();
129    if actual_bytes > limits.max_content_bytes {
130        Err(SecurityError::ContentTooLarge {
131            actual_bytes,
132            limit_bytes: limits.max_content_bytes,
133        })
134    } else {
135        Ok(())
136    }
137}
138
139/// Validates decompression expansion before decoded content is accepted.
140pub fn check_decompression_size(
141    compressed_bytes: usize,
142    decompressed_bytes: usize,
143    limits: ContentLimits,
144) -> Result<(), SecurityError> {
145    let expanded_too_large = decompressed_bytes > limits.max_decompressed_bytes;
146    let ratio_too_large = compressed_bytes > 0
147        && decompressed_bytes / compressed_bytes > limits.max_decompression_ratio;
148
149    if expanded_too_large || ratio_too_large {
150        Err(SecurityError::DecompressionBomb {
151            compressed_bytes,
152            decompressed_bytes,
153            ratio_limit: limits.max_decompression_ratio,
154        })
155    } else {
156        Ok(())
157    }
158}
159
160/// Validates a redirect chain for loops and hop count.
161pub fn validate_redirect_chain(
162    requested_url: &IndexUrl,
163    redirects: &[IndexUrl],
164    final_url: &IndexUrl,
165    limits: ContentLimits,
166) -> Result<(), SecurityError> {
167    if redirects.len() > limits.max_redirects {
168        return Err(SecurityError::TooManyRedirects {
169            redirects: redirects.len(),
170            limit: limits.max_redirects,
171        });
172    }
173
174    let mut seen = BTreeSet::new();
175    for url in std::iter::once(requested_url).chain(redirects.iter()) {
176        if !seen.insert(url.as_str().to_owned()) {
177            return Err(SecurityError::RedirectLoop { url: url.clone() });
178        }
179    }
180    if !redirects.is_empty() && seen.contains(final_url.as_str()) {
181        return Err(SecurityError::RedirectLoop {
182            url: final_url.clone(),
183        });
184    }
185
186    Ok(())
187}
188
189#[cfg(test)]
190mod tests {
191    use index_core::{IndexUrl, UrlError};
192
193    use super::{
194        ContentLimits, SecurityError, check_content_size, check_decompression_size,
195        validate_redirect_chain,
196    };
197
198    #[test]
199    fn content_size_limit_rejects_large_input() {
200        let limits = ContentLimits::new(4, 100, 20, 10);
201        assert_eq!(
202            check_content_size("12345", limits),
203            Err(SecurityError::ContentTooLarge {
204                actual_bytes: 5,
205                limit_bytes: 4
206            })
207        );
208    }
209
210    #[test]
211    fn content_size_limit_allows_input_at_or_below_limit() {
212        let limits = ContentLimits::new(5, 100, 20, 10);
213        assert_eq!(check_content_size("12345", limits), Ok(()));
214        assert_eq!(check_content_size("1234", limits), Ok(()));
215    }
216
217    #[test]
218    fn decompression_limit_rejects_large_expansion() {
219        let limits = ContentLimits::new(100, 100, 5, 10);
220        assert_eq!(
221            check_decompression_size(10, 101, limits),
222            Err(SecurityError::DecompressionBomb {
223                compressed_bytes: 10,
224                decompressed_bytes: 101,
225                ratio_limit: 5
226            })
227        );
228    }
229
230    #[test]
231    fn decompression_limit_rejects_high_ratio_and_allows_zero_compressed_bytes() {
232        let limits = ContentLimits::new(100, 1_000, 2, 10);
233        assert_eq!(
234            check_decompression_size(10, 30, limits),
235            Err(SecurityError::DecompressionBomb {
236                compressed_bytes: 10,
237                decompressed_bytes: 30,
238                ratio_limit: 2
239            })
240        );
241        assert_eq!(check_decompression_size(0, 999, limits), Ok(()));
242    }
243
244    #[test]
245    fn redirect_chain_detects_loop() -> Result<(), Box<dyn std::error::Error>> {
246        let requested = IndexUrl::parse("https://example.com/start")?;
247        let hop = IndexUrl::parse("https://example.com/hop")?;
248        let result = validate_redirect_chain(
249            &requested,
250            std::slice::from_ref(&hop),
251            &hop,
252            ContentLimits::default(),
253        );
254
255        assert_eq!(result, Err(SecurityError::RedirectLoop { url: hop }));
256        Ok(())
257    }
258
259    #[test]
260    fn redirect_chain_allows_non_redirected_final_url() -> Result<(), Box<dyn std::error::Error>> {
261        let requested = IndexUrl::parse("https://example.com/start")?;
262        assert_eq!(
263            validate_redirect_chain(&requested, &[], &requested, ContentLimits::default()),
264            Ok(())
265        );
266        Ok(())
267    }
268
269    #[test]
270    fn redirect_chain_rejects_too_many_hops() -> Result<(), Box<dyn std::error::Error>> {
271        let requested = IndexUrl::parse("https://example.com/start")?;
272        let final_url = IndexUrl::parse("https://example.com/final")?;
273        let redirects = vec![
274            IndexUrl::parse("https://example.com/1")?,
275            IndexUrl::parse("https://example.com/2")?,
276        ];
277        let limits = ContentLimits::new(100, 100, 5, 1);
278
279        assert_eq!(
280            validate_redirect_chain(&requested, &redirects, &final_url, limits),
281            Err(SecurityError::TooManyRedirects {
282                redirects: 2,
283                limit: 1
284            })
285        );
286        Ok(())
287    }
288
289    #[test]
290    fn redirect_chain_rejects_duplicate_url_in_hops() -> Result<(), Box<dyn std::error::Error>> {
291        let requested = IndexUrl::parse("https://example.com/start")?;
292        let repeated = IndexUrl::parse("https://example.com/hop")?;
293        let final_url = IndexUrl::parse("https://example.com/final")?;
294        let redirects = vec![repeated.clone(), repeated.clone()];
295
296        assert_eq!(
297            validate_redirect_chain(&requested, &redirects, &final_url, ContentLimits::default()),
298            Err(SecurityError::RedirectLoop { url: repeated })
299        );
300        Ok(())
301    }
302
303    #[test]
304    fn security_error_display_messages_are_stable() -> Result<(), Box<dyn std::error::Error>> {
305        let url = IndexUrl::parse("https://example.com/loop")?;
306        let cases = vec![
307            (
308                SecurityError::ContentTooLarge {
309                    actual_bytes: 9,
310                    limit_bytes: 4,
311                },
312                "content size limit exceeded: 9 bytes is greater than 4 bytes",
313            ),
314            (
315                SecurityError::DecompressionBomb {
316                    compressed_bytes: 10,
317                    decompressed_bytes: 101,
318                    ratio_limit: 5,
319                },
320                "decompression limit exceeded: 10 bytes expanded to 101 bytes over ratio 5",
321            ),
322            (
323                SecurityError::RedirectLoop { url },
324                "redirect loop detected at https://example.com/loop",
325            ),
326            (
327                SecurityError::TooManyRedirects {
328                    redirects: 11,
329                    limit: 10,
330                },
331                "too many redirects: 11 exceeds limit 10",
332            ),
333        ];
334
335        for (error, expected) in cases {
336            assert_eq!(error.to_string(), expected);
337        }
338
339        Ok(())
340    }
341
342    #[test]
343    fn unsafe_scheme_rejection_remains_centralized() {
344        assert_eq!(
345            IndexUrl::parse("data:text/html,hello"),
346            Err(UrlError::DisallowedScheme("data".to_owned()))
347        );
348    }
349}