fast_yaml_parallel/
config.rs

1//! Configuration for parallel processing behavior.
2
3/// Maximum number of threads allowed (security limit).
4const MAX_THREADS: usize = 128;
5
6/// Maximum input size in bytes (100MB default).
7const DEFAULT_MAX_INPUT_SIZE: usize = 100 * 1024 * 1024;
8
9/// Maximum number of documents allowed (denial-of-service protection).
10const DEFAULT_MAX_DOCUMENTS: usize = 100_000;
11
12/// Configuration for parallel processing behavior.
13///
14/// Controls thread pool size, chunking thresholds, resource limits,
15/// and performance tuning parameters.
16///
17/// # Security Limits
18///
19/// To prevent denial-of-service attacks and resource exhaustion, the following limits are enforced:
20/// - Maximum threads: 128
21/// - Maximum input size: 100MB (configurable)
22/// - Maximum document count: 100,000 (configurable)
23///
24/// # Examples
25///
26/// ```
27/// use fast_yaml_parallel::ParallelConfig;
28///
29/// let config = ParallelConfig::new()
30///     .with_thread_count(Some(8))
31///     .with_min_chunk_size(2048);
32/// ```
33#[derive(Debug, Clone)]
34pub struct ParallelConfig {
35    /// Thread pool size (None = CPU count, Some(0) = sequential).
36    pub(crate) thread_count: Option<usize>,
37
38    /// Minimum bytes per chunk (prevents over-chunking small files).
39    pub(crate) min_chunk_size: usize,
40
41    /// Maximum bytes per chunk (prevents memory spikes).
42    pub(crate) max_chunk_size: usize,
43
44    /// Maximum total input size in bytes (denial-of-service protection).
45    pub(crate) max_input_size: usize,
46
47    /// Maximum number of documents allowed (denial-of-service protection).
48    pub(crate) max_documents: usize,
49}
50
51impl ParallelConfig {
52    /// Creates default configuration (auto thread count).
53    ///
54    /// # Examples
55    ///
56    /// ```
57    /// use fast_yaml_parallel::ParallelConfig;
58    ///
59    /// let config = ParallelConfig::new();
60    /// ```
61    #[inline]
62    #[must_use]
63    pub fn new() -> Self {
64        Self::default()
65    }
66
67    /// Sets thread pool size.
68    ///
69    /// - `None`: Use all available CPU cores (default, capped at 128)
70    /// - `Some(0)`: Sequential processing (no parallelism)
71    /// - `Some(n)`: Use exactly `n` threads (max 128)
72    ///
73    /// # Security
74    ///
75    /// Thread count is capped at 128 to prevent resource exhaustion.
76    /// Values exceeding this limit will be clamped at runtime.
77    ///
78    /// # Examples
79    ///
80    /// ```
81    /// use fast_yaml_parallel::ParallelConfig;
82    ///
83    /// let config = ParallelConfig::new().with_thread_count(Some(4));
84    /// ```
85    #[must_use]
86    pub const fn with_thread_count(mut self, count: Option<usize>) -> Self {
87        self.thread_count = count;
88        self
89    }
90
91    /// Sets maximum total input size in bytes.
92    ///
93    /// Input exceeding this size will be rejected with `ConfigError`.
94    /// Default: 100MB
95    ///
96    /// # Security
97    ///
98    /// This limit prevents denial-of-service attacks via extremely large inputs.
99    ///
100    /// # Examples
101    ///
102    /// ```
103    /// use fast_yaml_parallel::ParallelConfig;
104    ///
105    /// // Allow up to 200MB
106    /// let config = ParallelConfig::new()
107    ///     .with_max_input_size(200 * 1024 * 1024);
108    /// ```
109    #[must_use]
110    pub const fn with_max_input_size(mut self, size: usize) -> Self {
111        self.max_input_size = size;
112        self
113    }
114
115    /// Sets maximum number of documents allowed.
116    ///
117    /// Input with more documents than this will be rejected with `ConfigError`.
118    /// Default: 100,000
119    ///
120    /// # Security
121    ///
122    /// This limit prevents denial-of-service attacks via excessive document counts.
123    ///
124    /// # Examples
125    ///
126    /// ```
127    /// use fast_yaml_parallel::ParallelConfig;
128    ///
129    /// // Allow up to 1 million documents
130    /// let config = ParallelConfig::new()
131    ///     .with_max_documents(1_000_000);
132    /// ```
133    #[must_use]
134    pub const fn with_max_documents(mut self, count: usize) -> Self {
135        self.max_documents = count;
136        self
137    }
138
139    /// Sets minimum total size in bytes for parallel processing.
140    ///
141    /// If total input size is below this threshold AND fewer than 4 documents,
142    /// sequential processing will be used to avoid parallelism overhead.
143    /// Default: 4KB
144    ///
145    /// # Examples
146    ///
147    /// ```
148    /// use fast_yaml_parallel::ParallelConfig;
149    ///
150    /// let config = ParallelConfig::new().with_min_chunk_size(2048);
151    /// ```
152    #[must_use]
153    pub const fn with_min_chunk_size(mut self, size: usize) -> Self {
154        self.min_chunk_size = size;
155        self
156    }
157
158    /// Sets maximum chunk size in bytes.
159    ///
160    /// Large documents exceeding this will be processed sequentially.
161    /// Default: 10MB (prevents memory spikes)
162    ///
163    /// # Examples
164    ///
165    /// ```
166    /// use fast_yaml_parallel::ParallelConfig;
167    ///
168    /// let config = ParallelConfig::new().with_max_chunk_size(5 * 1024 * 1024);
169    /// ```
170    #[must_use]
171    pub const fn with_max_chunk_size(mut self, size: usize) -> Self {
172        self.max_chunk_size = size;
173        self
174    }
175}
176
177impl Default for ParallelConfig {
178    fn default() -> Self {
179        Self {
180            thread_count: None,                     // Auto-detect CPU count
181            min_chunk_size: 4096,                   // 4KB minimum total size
182            max_chunk_size: 10 * 1024 * 1024,       // 10MB maximum
183            max_input_size: DEFAULT_MAX_INPUT_SIZE, // 100MB maximum input
184            max_documents: DEFAULT_MAX_DOCUMENTS,   // 100k maximum documents
185        }
186    }
187}
188
189impl ParallelConfig {
190    /// Returns the effective thread count, capped at security limit.
191    ///
192    /// # Security
193    ///
194    /// Thread count is capped at 128 to prevent resource exhaustion,
195    /// even if user requests more or CPU count exceeds this.
196    pub(crate) fn effective_thread_count(&self) -> usize {
197        let count = self.thread_count.unwrap_or_else(num_cpus::get);
198        count.min(MAX_THREADS)
199    }
200
201    /// Returns maximum input size limit.
202    pub(crate) const fn max_input_size(&self) -> usize {
203        self.max_input_size
204    }
205
206    /// Returns maximum document count limit.
207    pub(crate) const fn max_documents(&self) -> usize {
208        self.max_documents
209    }
210}
211
212#[cfg(test)]
213mod tests {
214    use super::*;
215
216    #[test]
217    fn test_default_config() {
218        let config = ParallelConfig::default();
219        assert_eq!(config.thread_count, None);
220        assert_eq!(config.min_chunk_size, 4096);
221        assert_eq!(config.max_chunk_size, 10 * 1024 * 1024);
222        assert_eq!(config.max_input_size, 100 * 1024 * 1024);
223        assert_eq!(config.max_documents, 100_000);
224    }
225
226    #[test]
227    fn test_config_builder() {
228        let config = ParallelConfig::new()
229            .with_thread_count(Some(4))
230            .with_min_chunk_size(2048)
231            .with_max_chunk_size(5 * 1024 * 1024)
232            .with_max_input_size(50 * 1024 * 1024)
233            .with_max_documents(50_000);
234
235        assert_eq!(config.thread_count, Some(4));
236        assert_eq!(config.min_chunk_size, 2048);
237        assert_eq!(config.max_chunk_size, 5 * 1024 * 1024);
238        assert_eq!(config.max_input_size, 50 * 1024 * 1024);
239        assert_eq!(config.max_documents, 50_000);
240    }
241
242    #[test]
243    fn test_sequential_mode() {
244        let config = ParallelConfig::new().with_thread_count(Some(0));
245        assert_eq!(config.thread_count, Some(0));
246    }
247
248    #[test]
249    fn test_effective_thread_count_capping() {
250        // Normal case
251        let config = ParallelConfig::new().with_thread_count(Some(4));
252        assert_eq!(config.effective_thread_count(), 4);
253
254        // Excessive thread count (should be capped)
255        let config = ParallelConfig::new().with_thread_count(Some(10_000));
256        assert_eq!(config.effective_thread_count(), MAX_THREADS);
257
258        // Auto-detect (should be capped if CPU count > MAX_THREADS)
259        let config = ParallelConfig::new();
260        assert!(config.effective_thread_count() <= MAX_THREADS);
261
262        // Sequential mode
263        let config = ParallelConfig::new().with_thread_count(Some(0));
264        assert_eq!(config.effective_thread_count(), 0);
265    }
266}