hedl_stream/buffer_config.rs
1// Dweve HEDL - Hierarchical Entity Data Language
2//
3// Copyright (c) 2025 Dweve IP B.V. and individual contributors.
4//
5// SPDX-License-Identifier: Apache-2.0
6//
7// Licensed under the Apache License, Version 2.0 (the "License");
8// you may not use this file except in compliance with the License.
9// You may obtain a copy of the LICENSE file at the
10// root of this repository or at: http://www.apache.org/licenses/LICENSE-2.0
11//
12// Unless required by applicable law or agreed to in writing, software
13// distributed under the License is distributed on an "AS IS" BASIS,
14// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15// See the License for the specific language governing permissions and
16// limitations under the License.
17
18//! Buffer sizing configuration.
19//!
20//! Provides size classes and hints for optimizing buffer allocation
21//! based on workload characteristics.
22
23/// Buffer size hints for different workload profiles.
24///
25/// These size classes provide pre-configured buffer sizes optimized
26/// for common use cases, from embedded systems to high-throughput
27/// data processing.
28///
29/// # Size Classes
30///
31/// - **Small (8KB)**: Embedded systems, small config files, memory-constrained environments
32/// - **Medium (64KB)**: Default for general use, good balance of performance and memory
33/// - **Large (256KB)**: Large files, high-throughput scenarios, server workloads
34/// - **Huge (1MB)**: Multi-GB files, maximum performance, minimal syscall overhead
35///
36/// # Performance Characteristics
37///
38/// Larger buffers reduce system call overhead but use more memory.
39/// The optimal size depends on:
40/// - File size (larger files benefit from larger buffers)
41/// - Available memory (constrained systems need smaller buffers)
42/// - I/O characteristics (fast storage benefits more from large buffers)
43/// - Access pattern (sequential vs. random)
44///
45/// # Examples
46///
47/// ## Automatic Selection
48///
49/// ```rust
50/// use hedl_stream::{StreamingParserConfig, BufferSizeHint};
51///
52/// let config = StreamingParserConfig::default()
53/// .with_buffer_hint(BufferSizeHint::Large);
54///
55/// assert_eq!(config.buffer_size, 256 * 1024);
56/// ```
57///
58/// ## Custom Configuration
59///
60/// ```rust
61/// use hedl_stream::{StreamingParserConfig, BufferSizeHint};
62///
63/// // Small embedded device
64/// let embedded_config = StreamingParserConfig::default()
65/// .with_buffer_hint(BufferSizeHint::Small);
66///
67/// // High-throughput server
68/// let server_config = StreamingParserConfig::default()
69/// .with_buffer_hint(BufferSizeHint::Huge);
70/// ```
71#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
72pub enum BufferSizeHint {
73 /// 8KB buffer - for embedded systems and small files.
74 ///
75 /// **Use when:**
76 /// - Parsing config files (<1MB)
77 /// - Running on embedded systems
78 /// - Memory is very limited (<10MB available)
79 /// - Processing many small files concurrently
80 ///
81 /// **Trade-offs:**
82 /// - Minimal memory footprint
83 /// - More system calls for large files
84 /// - Lower throughput on fast storage
85 Small,
86
87 /// 64KB buffer - default for general use.
88 ///
89 /// **Use when:**
90 /// - General-purpose parsing
91 /// - No specific performance requirements
92 /// - Mixed file sizes
93 /// - Standard development environments
94 ///
95 /// **Trade-offs:**
96 /// - Good balance of memory and performance
97 /// - Suitable for most workloads
98 /// - May not be optimal for extremes
99 #[default]
100 Medium,
101
102 /// 256KB buffer - for large files and high throughput.
103 ///
104 /// **Use when:**
105 /// - Parsing large files (>100MB)
106 /// - High-throughput ETL pipelines
107 /// - Fast storage (`NVMe` SSD)
108 /// - Server environments with available memory
109 ///
110 /// **Trade-offs:**
111 /// - Reduced syscall overhead
112 /// - Better throughput on large files
113 /// - Higher memory usage per parser
114 Large,
115
116 /// 1MB buffer - maximum performance for huge files.
117 ///
118 /// **Use when:**
119 /// - Parsing multi-GB files
120 /// - Maximum throughput required
121 /// - Abundant memory available
122 /// - Single-threaded processing
123 ///
124 /// **Trade-offs:**
125 /// - Minimal syscall overhead
126 /// - Maximum throughput
127 /// - Significant memory per parser (limits concurrency)
128 Huge,
129}
130
131impl BufferSizeHint {
132 /// Get the buffer size in bytes for this hint.
133 ///
134 /// # Examples
135 ///
136 /// ```rust
137 /// use hedl_stream::BufferSizeHint;
138 ///
139 /// assert_eq!(BufferSizeHint::Small.size(), 8 * 1024);
140 /// assert_eq!(BufferSizeHint::Medium.size(), 64 * 1024);
141 /// assert_eq!(BufferSizeHint::Large.size(), 256 * 1024);
142 /// assert_eq!(BufferSizeHint::Huge.size(), 1024 * 1024);
143 /// ```
144 #[inline]
145 #[must_use]
146 pub const fn size(self) -> usize {
147 match self {
148 Self::Small => 8 * 1024,
149 Self::Medium => 64 * 1024,
150 Self::Large => 256 * 1024,
151 Self::Huge => 1024 * 1024,
152 }
153 }
154
155 /// Get a buffer size hint based on file size.
156 ///
157 /// Automatically selects an appropriate buffer size based on the
158 /// total size of the file being parsed.
159 ///
160 /// # Heuristics
161 ///
162 /// - Files <1MB: Small (8KB)
163 /// - Files 1-100MB: Medium (64KB)
164 /// - Files 100MB-1GB: Large (256KB)
165 /// - Files >1GB: Huge (1MB)
166 ///
167 /// # Examples
168 ///
169 /// ```rust
170 /// use hedl_stream::BufferSizeHint;
171 ///
172 /// let hint = BufferSizeHint::for_file_size(500 * 1024); // 500KB
173 /// assert_eq!(hint, BufferSizeHint::Small);
174 ///
175 /// let hint = BufferSizeHint::for_file_size(50 * 1024 * 1024); // 50MB
176 /// assert_eq!(hint, BufferSizeHint::Medium);
177 ///
178 /// let hint = BufferSizeHint::for_file_size(500 * 1024 * 1024); // 500MB
179 /// assert_eq!(hint, BufferSizeHint::Large);
180 ///
181 /// let hint = BufferSizeHint::for_file_size(2 * 1024 * 1024 * 1024); // 2GB
182 /// assert_eq!(hint, BufferSizeHint::Huge);
183 /// ```
184 #[must_use]
185 pub fn for_file_size(size_bytes: u64) -> Self {
186 const MB: u64 = 1024 * 1024;
187 const GB: u64 = 1024 * MB;
188
189 if size_bytes < MB {
190 Self::Small
191 } else if size_bytes < 100 * MB {
192 Self::Medium
193 } else if size_bytes < GB {
194 Self::Large
195 } else {
196 Self::Huge
197 }
198 }
199
200 /// Get a buffer size hint for memory-constrained environments.
201 ///
202 /// Recommends a buffer size that won't exceed the given memory budget
203 /// when running `concurrent_parsers` simultaneously.
204 ///
205 /// # Examples
206 ///
207 /// ```rust
208 /// use hedl_stream::BufferSizeHint;
209 ///
210 /// // 10MB available, running 10 parsers concurrently
211 /// let hint = BufferSizeHint::for_memory_budget(10 * 1024 * 1024, 10);
212 /// // Should suggest Small (8KB) since 10 * 64KB = 640KB is reasonable
213 /// ```
214 #[must_use]
215 pub fn for_memory_budget(available_memory: usize, concurrent_parsers: usize) -> Self {
216 if concurrent_parsers == 0 {
217 return Self::Medium;
218 }
219
220 let budget_per_parser = available_memory / concurrent_parsers;
221
222 // Reserve 2x buffer size for other allocations (line buffers, etc.)
223 let effective_budget = budget_per_parser / 2;
224
225 if effective_budget >= Self::Huge.size() {
226 Self::Huge
227 } else if effective_budget >= Self::Large.size() {
228 Self::Large
229 } else if effective_budget >= Self::Medium.size() {
230 Self::Medium
231 } else {
232 Self::Small
233 }
234 }
235}
236
237#[cfg(test)]
238mod tests {
239 use super::*;
240
241 // ==================== BufferSizeHint::size tests ====================
242
243 #[test]
244 fn test_buffer_size_hint_sizes() {
245 assert_eq!(BufferSizeHint::Small.size(), 8 * 1024);
246 assert_eq!(BufferSizeHint::Medium.size(), 64 * 1024);
247 assert_eq!(BufferSizeHint::Large.size(), 256 * 1024);
248 assert_eq!(BufferSizeHint::Huge.size(), 1024 * 1024);
249 }
250
251 #[test]
252 fn test_buffer_size_hint_ordering() {
253 assert!(BufferSizeHint::Small.size() < BufferSizeHint::Medium.size());
254 assert!(BufferSizeHint::Medium.size() < BufferSizeHint::Large.size());
255 assert!(BufferSizeHint::Large.size() < BufferSizeHint::Huge.size());
256 }
257
258 // ==================== BufferSizeHint::for_file_size tests ====================
259
260 #[test]
261 fn test_for_file_size_tiny() {
262 let hint = BufferSizeHint::for_file_size(1024); // 1KB
263 assert_eq!(hint, BufferSizeHint::Small);
264 }
265
266 #[test]
267 fn test_for_file_size_small() {
268 let hint = BufferSizeHint::for_file_size(500 * 1024); // 500KB
269 assert_eq!(hint, BufferSizeHint::Small);
270 }
271
272 #[test]
273 fn test_for_file_size_boundary_1mb() {
274 let hint = BufferSizeHint::for_file_size(1024 * 1024 - 1); // Just under 1MB
275 assert_eq!(hint, BufferSizeHint::Small);
276
277 let hint = BufferSizeHint::for_file_size(1024 * 1024); // Exactly 1MB
278 assert_eq!(hint, BufferSizeHint::Medium);
279
280 let hint = BufferSizeHint::for_file_size(1024 * 1024 + 1); // Just over 1MB
281 assert_eq!(hint, BufferSizeHint::Medium);
282 }
283
284 #[test]
285 fn test_for_file_size_medium() {
286 let hint = BufferSizeHint::for_file_size(10 * 1024 * 1024); // 10MB
287 assert_eq!(hint, BufferSizeHint::Medium);
288
289 let hint = BufferSizeHint::for_file_size(50 * 1024 * 1024); // 50MB
290 assert_eq!(hint, BufferSizeHint::Medium);
291 }
292
293 #[test]
294 fn test_for_file_size_boundary_100mb() {
295 let hint = BufferSizeHint::for_file_size(100 * 1024 * 1024 - 1); // Just under 100MB
296 assert_eq!(hint, BufferSizeHint::Medium);
297
298 let hint = BufferSizeHint::for_file_size(100 * 1024 * 1024); // Exactly 100MB
299 assert_eq!(hint, BufferSizeHint::Large);
300
301 let hint = BufferSizeHint::for_file_size(100 * 1024 * 1024 + 1); // Just over 100MB
302 assert_eq!(hint, BufferSizeHint::Large);
303 }
304
305 #[test]
306 fn test_for_file_size_large() {
307 let hint = BufferSizeHint::for_file_size(500 * 1024 * 1024); // 500MB
308 assert_eq!(hint, BufferSizeHint::Large);
309 }
310
311 #[test]
312 fn test_for_file_size_boundary_1gb() {
313 let hint = BufferSizeHint::for_file_size(1024 * 1024 * 1024 - 1); // Just under 1GB
314 assert_eq!(hint, BufferSizeHint::Large);
315
316 let hint = BufferSizeHint::for_file_size(1024 * 1024 * 1024); // Exactly 1GB
317 assert_eq!(hint, BufferSizeHint::Huge);
318
319 let hint = BufferSizeHint::for_file_size(1024 * 1024 * 1024 + 1); // Just over 1GB
320 assert_eq!(hint, BufferSizeHint::Huge);
321 }
322
323 #[test]
324 fn test_for_file_size_huge() {
325 let hint = BufferSizeHint::for_file_size(10 * 1024 * 1024 * 1024); // 10GB
326 assert_eq!(hint, BufferSizeHint::Huge);
327 }
328
329 #[test]
330 fn test_for_file_size_zero() {
331 let hint = BufferSizeHint::for_file_size(0);
332 assert_eq!(hint, BufferSizeHint::Small);
333 }
334
335 // ==================== BufferSizeHint::for_memory_budget tests ====================
336
337 #[test]
338 fn test_for_memory_budget_abundant() {
339 // 100MB available, 1 parser -> should suggest Huge
340 let hint = BufferSizeHint::for_memory_budget(100 * 1024 * 1024, 1);
341 assert_eq!(hint, BufferSizeHint::Huge);
342 }
343
344 #[test]
345 fn test_for_memory_budget_comfortable() {
346 // 50MB available, 10 parsers -> 5MB per parser -> 2.5MB effective -> Huge
347 let hint = BufferSizeHint::for_memory_budget(50 * 1024 * 1024, 10);
348 assert_eq!(hint, BufferSizeHint::Huge);
349 }
350
351 #[test]
352 fn test_for_memory_budget_moderate() {
353 // 10MB available, 10 parsers -> 1MB per parser -> 512KB effective -> Large
354 let hint = BufferSizeHint::for_memory_budget(10 * 1024 * 1024, 10);
355 assert_eq!(hint, BufferSizeHint::Large);
356 }
357
358 #[test]
359 fn test_for_memory_budget_constrained() {
360 // 2MB available, 10 parsers -> 200KB per parser -> 100KB effective -> Medium
361 let hint = BufferSizeHint::for_memory_budget(2 * 1024 * 1024, 10);
362 assert_eq!(hint, BufferSizeHint::Medium);
363 }
364
365 #[test]
366 fn test_for_memory_budget_very_constrained() {
367 // 500KB available, 10 parsers -> 50KB per parser -> Small
368 let hint = BufferSizeHint::for_memory_budget(500 * 1024, 10);
369 assert_eq!(hint, BufferSizeHint::Small);
370 }
371
372 #[test]
373 fn test_for_memory_budget_zero_parsers() {
374 // Edge case: 0 parsers should default to Medium
375 let hint = BufferSizeHint::for_memory_budget(100 * 1024 * 1024, 0);
376 assert_eq!(hint, BufferSizeHint::Medium);
377 }
378
379 #[test]
380 fn test_for_memory_budget_one_parser() {
381 let hint = BufferSizeHint::for_memory_budget(10 * 1024 * 1024, 1);
382 assert_eq!(hint, BufferSizeHint::Huge);
383 }
384
385 #[test]
386 fn test_for_memory_budget_many_parsers() {
387 // 10MB available, 100 parsers -> 100KB per parser -> Small
388 let hint = BufferSizeHint::for_memory_budget(10 * 1024 * 1024, 100);
389 assert_eq!(hint, BufferSizeHint::Small);
390 }
391
392 #[test]
393 fn test_for_memory_budget_boundary_huge_to_large() {
394 // Boundary between Huge and Large
395 // Huge needs 1MB, with 2x overhead = 2MB per parser
396 let hint = BufferSizeHint::for_memory_budget(4 * 1024 * 1024, 1); // 4MB total
397 assert_eq!(hint, BufferSizeHint::Huge);
398
399 let hint = BufferSizeHint::for_memory_budget(4 * 1024 * 1024, 2); // 2MB per parser
400 assert_eq!(hint, BufferSizeHint::Huge);
401
402 let hint = BufferSizeHint::for_memory_budget(4 * 1024 * 1024, 3); // ~1.3MB per parser
403 assert_eq!(hint, BufferSizeHint::Large);
404 }
405
406 // ==================== Default and basic trait tests ====================
407
408 #[test]
409 fn test_buffer_size_hint_default() {
410 assert_eq!(BufferSizeHint::default(), BufferSizeHint::Medium);
411 }
412
413 #[test]
414 fn test_buffer_size_hint_debug() {
415 let small = BufferSizeHint::Small;
416 let debug = format!("{small:?}");
417 assert!(debug.contains("Small"));
418 }
419
420 #[test]
421 fn test_buffer_size_hint_clone() {
422 let hint1 = BufferSizeHint::Large;
423 let hint2 = hint1;
424 assert_eq!(hint1, hint2);
425 }
426
427 #[test]
428 fn test_buffer_size_hint_equality() {
429 assert_eq!(BufferSizeHint::Small, BufferSizeHint::Small);
430 assert_ne!(BufferSizeHint::Small, BufferSizeHint::Medium);
431 }
432
433 #[test]
434 fn test_buffer_size_hint_hash() {
435 use std::collections::HashMap;
436
437 let mut map = HashMap::new();
438 map.insert(BufferSizeHint::Small, "small");
439 map.insert(BufferSizeHint::Medium, "medium");
440
441 assert_eq!(map.get(&BufferSizeHint::Small), Some(&"small"));
442 assert_eq!(map.get(&BufferSizeHint::Medium), Some(&"medium"));
443 }
444
445 // ==================== const function tests ====================
446
447 #[test]
448 fn test_size_is_const() {
449 // Verify that size() can be used in const contexts
450 const SMALL_SIZE: usize = BufferSizeHint::Small.size();
451 assert_eq!(SMALL_SIZE, 8 * 1024);
452 }
453}