infiniloom_engine/embedding/
limits.rs

1//! Resource limits to prevent DoS attacks
2//!
3//! This module provides configurable limits for:
4//! - AST recursion depth (prevent stack overflow)
5//! - File sizes (prevent memory exhaustion)
6//! - Total chunks (prevent output explosion)
7//! - Concurrent operations (prevent resource exhaustion)
8
9use serde::{Deserialize, Serialize};
10
11/// Resource limits to prevent DoS attacks and resource exhaustion
12#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
13pub struct ResourceLimits {
14    /// Maximum recursion depth for AST traversal
15    /// Default: 500 (handles deeply nested code)
16    pub max_recursion_depth: u32,
17
18    /// Maximum file size to process (bytes)
19    /// Default: 10 MB (larger files are skipped with warning)
20    pub max_file_size: u64,
21
22    /// Maximum total chunks to generate
23    /// Default: 1,000,000 (enterprise scale)
24    pub max_total_chunks: usize,
25
26    /// Maximum files to process
27    /// Default: 500,000 (large monorepo scale)
28    pub max_files: usize,
29
30    /// Maximum concurrent file operations
31    /// Default: 32 (reasonable for most systems)
32    pub max_concurrent_loads: usize,
33
34    /// Maximum line length to process (bytes)
35    /// Default: 10,000 (prevents single-line minified files)
36    pub max_line_length: usize,
37
38    /// Maximum content size per chunk (bytes)
39    /// Default: 1 MB (prevents extremely large chunks)
40    pub max_chunk_size: usize,
41}
42
43impl Default for ResourceLimits {
44    fn default() -> Self {
45        Self {
46            max_recursion_depth: 500,
47            max_file_size: 10 * 1024 * 1024,     // 10 MB
48            max_total_chunks: 1_000_000,         // 1 million chunks
49            max_files: 500_000,                  // 500k files
50            max_concurrent_loads: 32,            // 32 concurrent operations
51            max_line_length: 10_000,             // 10k chars per line
52            max_chunk_size: 1024 * 1024,         // 1 MB per chunk
53        }
54    }
55}
56
57impl ResourceLimits {
58    /// Create limits suitable for trusted input (development)
59    pub fn relaxed() -> Self {
60        Self {
61            max_recursion_depth: 1000,
62            max_file_size: 50 * 1024 * 1024,    // 50 MB
63            max_total_chunks: 10_000_000,        // 10 million
64            max_files: 1_000_000,                // 1 million files
65            max_concurrent_loads: 64,            // More concurrency
66            max_line_length: 100_000,            // 100k chars
67            max_chunk_size: 5 * 1024 * 1024,     // 5 MB
68        }
69    }
70
71    /// Strict limits for untrusted input (CI/CD, public APIs)
72    pub fn strict() -> Self {
73        Self {
74            max_recursion_depth: 100,
75            max_file_size: 1024 * 1024,          // 1 MB
76            max_total_chunks: 100_000,           // 100k chunks
77            max_files: 50_000,                   // 50k files
78            max_concurrent_loads: 8,             // Limited concurrency
79            max_line_length: 1000,               // 1k chars
80            max_chunk_size: 100 * 1024,          // 100 KB
81        }
82    }
83
84    /// Create limits suitable for a quick scan or test
85    pub fn minimal() -> Self {
86        Self {
87            max_recursion_depth: 50,
88            max_file_size: 100 * 1024,           // 100 KB
89            max_total_chunks: 1000,              // 1k chunks
90            max_files: 100,                      // 100 files
91            max_concurrent_loads: 4,             // Minimal concurrency
92            max_line_length: 500,                // 500 chars
93            max_chunk_size: 10 * 1024,           // 10 KB
94        }
95    }
96
97    /// Check if a file size is within limits
98    #[inline]
99    pub fn check_file_size(&self, size: u64) -> bool {
100        size <= self.max_file_size
101    }
102
103    /// Check if recursion depth is within limits
104    #[inline]
105    pub fn check_recursion_depth(&self, depth: u32) -> bool {
106        depth <= self.max_recursion_depth
107    }
108
109    /// Check if chunk count is within limits
110    #[inline]
111    pub fn check_chunk_count(&self, count: usize) -> bool {
112        count <= self.max_total_chunks
113    }
114
115    /// Check if file count is within limits
116    #[inline]
117    pub fn check_file_count(&self, count: usize) -> bool {
118        count <= self.max_files
119    }
120
121    /// Check if line length is within limits
122    #[inline]
123    pub fn check_line_length(&self, length: usize) -> bool {
124        length <= self.max_line_length
125    }
126
127    /// Check if chunk size is within limits
128    #[inline]
129    pub fn check_chunk_size(&self, size: usize) -> bool {
130        size <= self.max_chunk_size
131    }
132
133    /// Builder-style: set max recursion depth
134    pub fn with_max_recursion_depth(mut self, depth: u32) -> Self {
135        self.max_recursion_depth = depth;
136        self
137    }
138
139    /// Builder-style: set max file size
140    pub fn with_max_file_size(mut self, size: u64) -> Self {
141        self.max_file_size = size;
142        self
143    }
144
145    /// Builder-style: set max total chunks
146    pub fn with_max_total_chunks(mut self, count: usize) -> Self {
147        self.max_total_chunks = count;
148        self
149    }
150
151    /// Builder-style: set max files
152    pub fn with_max_files(mut self, count: usize) -> Self {
153        self.max_files = count;
154        self
155    }
156
157    /// Builder-style: set max concurrent loads
158    pub fn with_max_concurrent_loads(mut self, count: usize) -> Self {
159        self.max_concurrent_loads = count;
160        self
161    }
162
163    /// Builder-style: set max line length
164    pub fn with_max_line_length(mut self, length: usize) -> Self {
165        self.max_line_length = length;
166        self
167    }
168
169    /// Builder-style: set max chunk size
170    pub fn with_max_chunk_size(mut self, size: usize) -> Self {
171        self.max_chunk_size = size;
172        self
173    }
174}
175
176#[cfg(test)]
177mod tests {
178    use super::*;
179
180    #[test]
181    fn test_default_limits() {
182        let limits = ResourceLimits::default();
183        assert_eq!(limits.max_recursion_depth, 500);
184        assert_eq!(limits.max_file_size, 10 * 1024 * 1024);
185        assert_eq!(limits.max_total_chunks, 1_000_000);
186        assert_eq!(limits.max_files, 500_000);
187    }
188
189    #[test]
190    fn test_strict_limits() {
191        let limits = ResourceLimits::strict();
192        assert_eq!(limits.max_recursion_depth, 100);
193        assert_eq!(limits.max_file_size, 1024 * 1024);
194        assert!(limits.max_total_chunks < ResourceLimits::default().max_total_chunks);
195    }
196
197    #[test]
198    fn test_relaxed_limits() {
199        let limits = ResourceLimits::relaxed();
200        assert!(limits.max_recursion_depth > ResourceLimits::default().max_recursion_depth);
201        assert!(limits.max_file_size > ResourceLimits::default().max_file_size);
202    }
203
204    #[test]
205    fn test_check_file_size() {
206        let limits = ResourceLimits::default();
207        assert!(limits.check_file_size(1024)); // 1 KB
208        assert!(limits.check_file_size(10 * 1024 * 1024)); // Exactly 10 MB
209        assert!(!limits.check_file_size(11 * 1024 * 1024)); // 11 MB
210    }
211
212    #[test]
213    fn test_check_recursion_depth() {
214        let limits = ResourceLimits::default();
215        assert!(limits.check_recursion_depth(100));
216        assert!(limits.check_recursion_depth(500)); // Exactly at limit
217        assert!(!limits.check_recursion_depth(501)); // Over limit
218    }
219
220    #[test]
221    fn test_builder_pattern() {
222        let limits = ResourceLimits::default()
223            .with_max_file_size(5 * 1024 * 1024)
224            .with_max_recursion_depth(200)
225            .with_max_total_chunks(50_000);
226
227        assert_eq!(limits.max_file_size, 5 * 1024 * 1024);
228        assert_eq!(limits.max_recursion_depth, 200);
229        assert_eq!(limits.max_total_chunks, 50_000);
230    }
231
232    #[test]
233    fn test_serialization() {
234        let limits = ResourceLimits::default();
235        let json = serde_json::to_string(&limits).unwrap();
236        let deserialized: ResourceLimits = serde_json::from_str(&json).unwrap();
237        assert_eq!(limits, deserialized);
238    }
239
240    #[test]
241    fn test_minimal_limits() {
242        let limits = ResourceLimits::minimal();
243        assert!(limits.max_files <= 100);
244        assert!(limits.max_total_chunks <= 1000);
245    }
246}