fast_yaml_parallel/config.rs
1//! Configuration for parallel processing behavior.
2
3/// Maximum number of threads allowed (security limit).
4const MAX_THREADS: usize = 128;
5
6/// Configuration for parallel processing behavior.
7///
8/// Simplified configuration with essential fields for both document-level
9/// and file-level parallelism.
10///
11/// # Security Limits
12///
13/// To prevent denial-of-service attacks and resource exhaustion:
14/// - Maximum threads: 128
15/// - Maximum input size: 100MB (configurable via `max_input_size`)
16///
17/// # Examples
18///
19/// ```
20/// use fast_yaml_parallel::Config;
21///
22/// let config = Config::new()
23/// .with_workers(Some(8))
24/// .with_sequential_threshold(2048);
25/// ```
26#[derive(Debug, Clone)]
27pub struct Config {
28 /// Worker count: None = auto (CPU count), Some(0) = sequential, Some(n) = n threads
29 pub(crate) workers: Option<usize>,
30
31 /// Mmap threshold for large file reading (default: 512KB)
32 pub(crate) mmap_threshold: usize,
33
34 /// Maximum input size (`DoS` protection, default: 100MB)
35 pub(crate) max_input_size: usize,
36
37 /// Sequential threshold: use sequential for small inputs (default: 4KB)
38 pub(crate) sequential_threshold: usize,
39}
40
41impl Config {
42 /// Creates default configuration.
43 ///
44 /// # Examples
45 ///
46 /// ```
47 /// use fast_yaml_parallel::Config;
48 ///
49 /// let config = Config::new();
50 /// ```
51 #[inline]
52 #[must_use]
53 pub fn new() -> Self {
54 Self::default()
55 }
56
57 /// Sets worker count.
58 ///
59 /// - `None`: Auto-detect CPU count (default, capped at 128)
60 /// - `Some(0)`: Sequential processing (no parallelism)
61 /// - `Some(n)`: Use exactly `n` threads (capped at 128)
62 ///
63 /// # Security
64 ///
65 /// Thread count is capped at 128 to prevent resource exhaustion.
66 ///
67 /// # Examples
68 ///
69 /// ```
70 /// use fast_yaml_parallel::Config;
71 ///
72 /// let config = Config::new().with_workers(Some(4));
73 /// ```
74 #[must_use]
75 pub const fn with_workers(mut self, workers: Option<usize>) -> Self {
76 self.workers = workers;
77 self
78 }
79
80 /// Sets memory-map threshold for file reading.
81 ///
82 /// Files larger than this threshold will use memory-mapped I/O.
83 /// Default: 512KB
84 ///
85 /// # Tuning Guidance
86 ///
87 /// The optimal threshold depends on your workload:
88 ///
89 /// - **Lower (256KB-512KB)**: Better for many medium files (100KB-1MB)
90 /// - Pros: Less virtual memory pressure, faster for small-to-medium files
91 /// - Cons: More heap allocations for files just above threshold
92 ///
93 /// - **Higher (1MB-2MB)**: Better for fewer large files (>2MB)
94 /// - Pros: Fewer mmaps, better for very large files
95 /// - Cons: More heap usage for medium files
96 ///
97 /// Consider your typical file size distribution and available memory.
98 /// Profile with real data before changing the default.
99 ///
100 /// # Examples
101 ///
102 /// ```
103 /// use fast_yaml_parallel::Config;
104 ///
105 /// let config = Config::new()
106 /// .with_mmap_threshold(1024 * 1024); // 1MB
107 /// ```
108 #[must_use]
109 pub const fn with_mmap_threshold(mut self, threshold: usize) -> Self {
110 self.mmap_threshold = threshold;
111 self
112 }
113
114 /// Sets maximum input size in bytes.
115 ///
116 /// Input exceeding this size will be rejected.
117 /// Default: 100MB
118 ///
119 /// # Security
120 ///
121 /// This limit prevents denial-of-service attacks via extremely large inputs.
122 ///
123 /// # Examples
124 ///
125 /// ```
126 /// use fast_yaml_parallel::Config;
127 ///
128 /// let config = Config::new()
129 /// .with_max_input_size(200 * 1024 * 1024); // 200MB
130 /// ```
131 #[must_use]
132 pub const fn with_max_input_size(mut self, size: usize) -> Self {
133 self.max_input_size = size;
134 self
135 }
136
137 /// Sets sequential processing threshold.
138 ///
139 /// Inputs smaller than this threshold will use sequential processing
140 /// to avoid parallelism overhead. Default: 4KB
141 ///
142 /// # Examples
143 ///
144 /// ```
145 /// use fast_yaml_parallel::Config;
146 ///
147 /// let config = Config::new()
148 /// .with_sequential_threshold(2048);
149 /// ```
150 #[must_use]
151 pub const fn with_sequential_threshold(mut self, threshold: usize) -> Self {
152 self.sequential_threshold = threshold;
153 self
154 }
155
156 /// Returns worker count setting.
157 #[must_use]
158 pub const fn workers(&self) -> Option<usize> {
159 self.workers
160 }
161
162 /// Returns mmap threshold.
163 #[must_use]
164 pub const fn mmap_threshold(&self) -> usize {
165 self.mmap_threshold
166 }
167
168 /// Returns maximum input size.
169 #[must_use]
170 pub const fn max_input_size(&self) -> usize {
171 self.max_input_size
172 }
173
174 /// Returns sequential threshold.
175 #[must_use]
176 pub const fn sequential_threshold(&self) -> usize {
177 self.sequential_threshold
178 }
179}
180
181impl Default for Config {
182 fn default() -> Self {
183 Self {
184 workers: None, // Auto-detect CPU count
185 mmap_threshold: 512 * 1024, // 512KB
186 max_input_size: 100 * 1024 * 1024, // 100MB
187 sequential_threshold: 4096, // 4KB
188 }
189 }
190}
191
192impl Config {
193 /// Returns the effective worker count, capped at security limit.
194 ///
195 /// # Security
196 ///
197 /// Worker count is capped at 128 to prevent resource exhaustion.
198 pub(crate) fn effective_workers(&self) -> usize {
199 let count = self.workers.unwrap_or_else(num_cpus::get);
200 count.min(MAX_THREADS)
201 }
202}
203
204#[cfg(test)]
205mod tests {
206 use super::*;
207
208 #[test]
209 fn test_default_config() {
210 let config = Config::default();
211 assert_eq!(config.workers, None);
212 assert_eq!(config.mmap_threshold, 512 * 1024);
213 assert_eq!(config.max_input_size, 100 * 1024 * 1024);
214 assert_eq!(config.sequential_threshold, 4096);
215 }
216
217 #[test]
218 fn test_config_builder() {
219 let config = Config::new()
220 .with_workers(Some(4))
221 .with_mmap_threshold(1024 * 1024)
222 .with_max_input_size(50 * 1024 * 1024)
223 .with_sequential_threshold(2048);
224
225 assert_eq!(config.workers, Some(4));
226 assert_eq!(config.mmap_threshold, 1024 * 1024);
227 assert_eq!(config.max_input_size, 50 * 1024 * 1024);
228 assert_eq!(config.sequential_threshold, 2048);
229 }
230
231 #[test]
232 fn test_sequential_mode() {
233 let config = Config::new().with_workers(Some(0));
234 assert_eq!(config.workers, Some(0));
235 }
236
237 #[test]
238 fn test_effective_workers_capping() {
239 // Normal case
240 let config = Config::new().with_workers(Some(4));
241 assert_eq!(config.effective_workers(), 4);
242
243 // Excessive worker count (should be capped)
244 let config = Config::new().with_workers(Some(10_000));
245 assert_eq!(config.effective_workers(), MAX_THREADS);
246
247 // Auto-detect (should be capped if CPU count > MAX_THREADS)
248 let config = Config::new();
249 assert!(config.effective_workers() <= MAX_THREADS);
250
251 // Sequential mode
252 let config = Config::new().with_workers(Some(0));
253 assert_eq!(config.effective_workers(), 0);
254 }
255
256 #[test]
257 fn test_getters() {
258 let config = Config::new()
259 .with_workers(Some(8))
260 .with_mmap_threshold(2048)
261 .with_max_input_size(50_000_000)
262 .with_sequential_threshold(8192);
263
264 assert_eq!(config.workers(), Some(8));
265 assert_eq!(config.mmap_threshold(), 2048);
266 assert_eq!(config.max_input_size(), 50_000_000);
267 assert_eq!(config.sequential_threshold(), 8192);
268 }
269
270 #[test]
271 fn test_new_equals_default() {
272 let config1 = Config::new();
273 let config2 = Config::default();
274
275 assert_eq!(config1.workers, config2.workers);
276 assert_eq!(config1.mmap_threshold, config2.mmap_threshold);
277 assert_eq!(config1.max_input_size, config2.max_input_size);
278 assert_eq!(config1.sequential_threshold, config2.sequential_threshold);
279 }
280}