fast_yaml_parallel/
config.rs

1//! Configuration for parallel processing behavior.
2
3/// Maximum number of threads allowed (security limit).
4const MAX_THREADS: usize = 128;
5
6/// Configuration for parallel processing behavior.
7///
8/// Simplified configuration with essential fields for both document-level
9/// and file-level parallelism.
10///
11/// # Security Limits
12///
13/// To prevent denial-of-service attacks and resource exhaustion:
14/// - Maximum threads: 128
15/// - Maximum input size: 100MB (configurable via `max_input_size`)
16///
17/// # Examples
18///
19/// ```
20/// use fast_yaml_parallel::Config;
21///
22/// let config = Config::new()
23///     .with_workers(Some(8))
24///     .with_sequential_threshold(2048);
25/// ```
26#[derive(Debug, Clone)]
27pub struct Config {
28    /// Worker count: None = auto (CPU count), Some(0) = sequential, Some(n) = n threads
29    pub(crate) workers: Option<usize>,
30
31    /// Mmap threshold for large file reading (default: 512KB)
32    pub(crate) mmap_threshold: usize,
33
34    /// Maximum input size (`DoS` protection, default: 100MB)
35    pub(crate) max_input_size: usize,
36
37    /// Sequential threshold: use sequential for small inputs (default: 4KB)
38    pub(crate) sequential_threshold: usize,
39}
40
41impl Config {
42    /// Creates default configuration.
43    ///
44    /// # Examples
45    ///
46    /// ```
47    /// use fast_yaml_parallel::Config;
48    ///
49    /// let config = Config::new();
50    /// ```
51    #[inline]
52    #[must_use]
53    pub fn new() -> Self {
54        Self::default()
55    }
56
57    /// Sets worker count.
58    ///
59    /// - `None`: Auto-detect CPU count (default, capped at 128)
60    /// - `Some(0)`: Sequential processing (no parallelism)
61    /// - `Some(n)`: Use exactly `n` threads (capped at 128)
62    ///
63    /// # Security
64    ///
65    /// Thread count is capped at 128 to prevent resource exhaustion.
66    ///
67    /// # Examples
68    ///
69    /// ```
70    /// use fast_yaml_parallel::Config;
71    ///
72    /// let config = Config::new().with_workers(Some(4));
73    /// ```
74    #[must_use]
75    pub const fn with_workers(mut self, workers: Option<usize>) -> Self {
76        self.workers = workers;
77        self
78    }
79
80    /// Sets memory-map threshold for file reading.
81    ///
82    /// Files larger than this threshold will use memory-mapped I/O.
83    /// Default: 512KB
84    ///
85    /// # Tuning Guidance
86    ///
87    /// The optimal threshold depends on your workload:
88    ///
89    /// - **Lower (256KB-512KB)**: Better for many medium files (100KB-1MB)
90    ///   - Pros: Less virtual memory pressure, faster for small-to-medium files
91    ///   - Cons: More heap allocations for files just above threshold
92    ///
93    /// - **Higher (1MB-2MB)**: Better for fewer large files (>2MB)
94    ///   - Pros: Fewer mmaps, better for very large files
95    ///   - Cons: More heap usage for medium files
96    ///
97    /// Consider your typical file size distribution and available memory.
98    /// Profile with real data before changing the default.
99    ///
100    /// # Examples
101    ///
102    /// ```
103    /// use fast_yaml_parallel::Config;
104    ///
105    /// let config = Config::new()
106    ///     .with_mmap_threshold(1024 * 1024); // 1MB
107    /// ```
108    #[must_use]
109    pub const fn with_mmap_threshold(mut self, threshold: usize) -> Self {
110        self.mmap_threshold = threshold;
111        self
112    }
113
114    /// Sets maximum input size in bytes.
115    ///
116    /// Input exceeding this size will be rejected.
117    /// Default: 100MB
118    ///
119    /// # Security
120    ///
121    /// This limit prevents denial-of-service attacks via extremely large inputs.
122    ///
123    /// # Examples
124    ///
125    /// ```
126    /// use fast_yaml_parallel::Config;
127    ///
128    /// let config = Config::new()
129    ///     .with_max_input_size(200 * 1024 * 1024); // 200MB
130    /// ```
131    #[must_use]
132    pub const fn with_max_input_size(mut self, size: usize) -> Self {
133        self.max_input_size = size;
134        self
135    }
136
137    /// Sets sequential processing threshold.
138    ///
139    /// Inputs smaller than this threshold will use sequential processing
140    /// to avoid parallelism overhead. Default: 4KB
141    ///
142    /// # Examples
143    ///
144    /// ```
145    /// use fast_yaml_parallel::Config;
146    ///
147    /// let config = Config::new()
148    ///     .with_sequential_threshold(2048);
149    /// ```
150    #[must_use]
151    pub const fn with_sequential_threshold(mut self, threshold: usize) -> Self {
152        self.sequential_threshold = threshold;
153        self
154    }
155
156    /// Returns worker count setting.
157    #[must_use]
158    pub const fn workers(&self) -> Option<usize> {
159        self.workers
160    }
161
162    /// Returns mmap threshold.
163    #[must_use]
164    pub const fn mmap_threshold(&self) -> usize {
165        self.mmap_threshold
166    }
167
168    /// Returns maximum input size.
169    #[must_use]
170    pub const fn max_input_size(&self) -> usize {
171        self.max_input_size
172    }
173
174    /// Returns sequential threshold.
175    #[must_use]
176    pub const fn sequential_threshold(&self) -> usize {
177        self.sequential_threshold
178    }
179}
180
181impl Default for Config {
182    fn default() -> Self {
183        Self {
184            workers: None,                     // Auto-detect CPU count
185            mmap_threshold: 512 * 1024,        // 512KB
186            max_input_size: 100 * 1024 * 1024, // 100MB
187            sequential_threshold: 4096,        // 4KB
188        }
189    }
190}
191
192impl Config {
193    /// Returns the effective worker count, capped at security limit.
194    ///
195    /// # Security
196    ///
197    /// Worker count is capped at 128 to prevent resource exhaustion.
198    pub(crate) fn effective_workers(&self) -> usize {
199        let count = self.workers.unwrap_or_else(num_cpus::get);
200        count.min(MAX_THREADS)
201    }
202}
203
204#[cfg(test)]
205mod tests {
206    use super::*;
207
208    #[test]
209    fn test_default_config() {
210        let config = Config::default();
211        assert_eq!(config.workers, None);
212        assert_eq!(config.mmap_threshold, 512 * 1024);
213        assert_eq!(config.max_input_size, 100 * 1024 * 1024);
214        assert_eq!(config.sequential_threshold, 4096);
215    }
216
217    #[test]
218    fn test_config_builder() {
219        let config = Config::new()
220            .with_workers(Some(4))
221            .with_mmap_threshold(1024 * 1024)
222            .with_max_input_size(50 * 1024 * 1024)
223            .with_sequential_threshold(2048);
224
225        assert_eq!(config.workers, Some(4));
226        assert_eq!(config.mmap_threshold, 1024 * 1024);
227        assert_eq!(config.max_input_size, 50 * 1024 * 1024);
228        assert_eq!(config.sequential_threshold, 2048);
229    }
230
231    #[test]
232    fn test_sequential_mode() {
233        let config = Config::new().with_workers(Some(0));
234        assert_eq!(config.workers, Some(0));
235    }
236
237    #[test]
238    fn test_effective_workers_capping() {
239        // Normal case
240        let config = Config::new().with_workers(Some(4));
241        assert_eq!(config.effective_workers(), 4);
242
243        // Excessive worker count (should be capped)
244        let config = Config::new().with_workers(Some(10_000));
245        assert_eq!(config.effective_workers(), MAX_THREADS);
246
247        // Auto-detect (should be capped if CPU count > MAX_THREADS)
248        let config = Config::new();
249        assert!(config.effective_workers() <= MAX_THREADS);
250
251        // Sequential mode
252        let config = Config::new().with_workers(Some(0));
253        assert_eq!(config.effective_workers(), 0);
254    }
255
256    #[test]
257    fn test_getters() {
258        let config = Config::new()
259            .with_workers(Some(8))
260            .with_mmap_threshold(2048)
261            .with_max_input_size(50_000_000)
262            .with_sequential_threshold(8192);
263
264        assert_eq!(config.workers(), Some(8));
265        assert_eq!(config.mmap_threshold(), 2048);
266        assert_eq!(config.max_input_size(), 50_000_000);
267        assert_eq!(config.sequential_threshold(), 8192);
268    }
269
270    #[test]
271    fn test_new_equals_default() {
272        let config1 = Config::new();
273        let config2 = Config::default();
274
275        assert_eq!(config1.workers, config2.workers);
276        assert_eq!(config1.mmap_threshold, config2.mmap_threshold);
277        assert_eq!(config1.max_input_size, config2.max_input_size);
278        assert_eq!(config1.sequential_threshold, config2.sequential_threshold);
279    }
280}