Skip to main content

hedl_stream/
buffer_config.rs

1// Dweve HEDL - Hierarchical Entity Data Language
2//
3// Copyright (c) 2025 Dweve IP B.V. and individual contributors.
4//
5// SPDX-License-Identifier: Apache-2.0
6//
7// Licensed under the Apache License, Version 2.0 (the "License");
8// you may not use this file except in compliance with the License.
9// You may obtain a copy of the LICENSE file at the
10// root of this repository or at: http://www.apache.org/licenses/LICENSE-2.0
11//
12// Unless required by applicable law or agreed to in writing, software
13// distributed under the License is distributed on an "AS IS" BASIS,
14// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15// See the License for the specific language governing permissions and
16// limitations under the License.
17
18//! Buffer sizing configuration.
19//!
20//! Provides size classes and hints for optimizing buffer allocation
21//! based on workload characteristics.
22
23/// Buffer size hints for different workload profiles.
24///
25/// These size classes provide pre-configured buffer sizes optimized
26/// for common use cases, from embedded systems to high-throughput
27/// data processing.
28///
29/// # Size Classes
30///
31/// - **Small (8KB)**: Embedded systems, small config files, memory-constrained environments
32/// - **Medium (64KB)**: Default for general use, good balance of performance and memory
33/// - **Large (256KB)**: Large files, high-throughput scenarios, server workloads
34/// - **Huge (1MB)**: Multi-GB files, maximum performance, minimal syscall overhead
35///
36/// # Performance Characteristics
37///
38/// Larger buffers reduce system call overhead but use more memory.
39/// The optimal size depends on:
40/// - File size (larger files benefit from larger buffers)
41/// - Available memory (constrained systems need smaller buffers)
42/// - I/O characteristics (fast storage benefits more from large buffers)
43/// - Access pattern (sequential vs. random)
44///
45/// # Examples
46///
47/// ## Automatic Selection
48///
49/// ```rust
50/// use hedl_stream::{StreamingParserConfig, BufferSizeHint};
51///
52/// let config = StreamingParserConfig::default()
53///     .with_buffer_hint(BufferSizeHint::Large);
54///
55/// assert_eq!(config.buffer_size, 256 * 1024);
56/// ```
57///
58/// ## Custom Configuration
59///
60/// ```rust
61/// use hedl_stream::{StreamingParserConfig, BufferSizeHint};
62///
63/// // Small embedded device
64/// let embedded_config = StreamingParserConfig::default()
65///     .with_buffer_hint(BufferSizeHint::Small);
66///
67/// // High-throughput server
68/// let server_config = StreamingParserConfig::default()
69///     .with_buffer_hint(BufferSizeHint::Huge);
70/// ```
71#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
72pub enum BufferSizeHint {
73    /// 8KB buffer - for embedded systems and small files.
74    ///
75    /// **Use when:**
76    /// - Parsing config files (<1MB)
77    /// - Running on embedded systems
78    /// - Memory is very limited (<10MB available)
79    /// - Processing many small files concurrently
80    ///
81    /// **Trade-offs:**
82    /// - Minimal memory footprint
83    /// - More system calls for large files
84    /// - Lower throughput on fast storage
85    Small,
86
87    /// 64KB buffer - default for general use.
88    ///
89    /// **Use when:**
90    /// - General-purpose parsing
91    /// - No specific performance requirements
92    /// - Mixed file sizes
93    /// - Standard development environments
94    ///
95    /// **Trade-offs:**
96    /// - Good balance of memory and performance
97    /// - Suitable for most workloads
98    /// - May not be optimal for extremes
99    #[default]
100    Medium,
101
102    /// 256KB buffer - for large files and high throughput.
103    ///
104    /// **Use when:**
105    /// - Parsing large files (>100MB)
106    /// - High-throughput ETL pipelines
107    /// - Fast storage (`NVMe` SSD)
108    /// - Server environments with available memory
109    ///
110    /// **Trade-offs:**
111    /// - Reduced syscall overhead
112    /// - Better throughput on large files
113    /// - Higher memory usage per parser
114    Large,
115
116    /// 1MB buffer - maximum performance for huge files.
117    ///
118    /// **Use when:**
119    /// - Parsing multi-GB files
120    /// - Maximum throughput required
121    /// - Abundant memory available
122    /// - Single-threaded processing
123    ///
124    /// **Trade-offs:**
125    /// - Minimal syscall overhead
126    /// - Maximum throughput
127    /// - Significant memory per parser (limits concurrency)
128    Huge,
129}
130
131impl BufferSizeHint {
132    /// Get the buffer size in bytes for this hint.
133    ///
134    /// # Examples
135    ///
136    /// ```rust
137    /// use hedl_stream::BufferSizeHint;
138    ///
139    /// assert_eq!(BufferSizeHint::Small.size(), 8 * 1024);
140    /// assert_eq!(BufferSizeHint::Medium.size(), 64 * 1024);
141    /// assert_eq!(BufferSizeHint::Large.size(), 256 * 1024);
142    /// assert_eq!(BufferSizeHint::Huge.size(), 1024 * 1024);
143    /// ```
144    #[inline]
145    #[must_use]
146    pub const fn size(self) -> usize {
147        match self {
148            Self::Small => 8 * 1024,
149            Self::Medium => 64 * 1024,
150            Self::Large => 256 * 1024,
151            Self::Huge => 1024 * 1024,
152        }
153    }
154
155    /// Get a buffer size hint based on file size.
156    ///
157    /// Automatically selects an appropriate buffer size based on the
158    /// total size of the file being parsed.
159    ///
160    /// # Heuristics
161    ///
162    /// - Files <1MB: Small (8KB)
163    /// - Files 1-100MB: Medium (64KB)
164    /// - Files 100MB-1GB: Large (256KB)
165    /// - Files >1GB: Huge (1MB)
166    ///
167    /// # Examples
168    ///
169    /// ```rust
170    /// use hedl_stream::BufferSizeHint;
171    ///
172    /// let hint = BufferSizeHint::for_file_size(500 * 1024); // 500KB
173    /// assert_eq!(hint, BufferSizeHint::Small);
174    ///
175    /// let hint = BufferSizeHint::for_file_size(50 * 1024 * 1024); // 50MB
176    /// assert_eq!(hint, BufferSizeHint::Medium);
177    ///
178    /// let hint = BufferSizeHint::for_file_size(500 * 1024 * 1024); // 500MB
179    /// assert_eq!(hint, BufferSizeHint::Large);
180    ///
181    /// let hint = BufferSizeHint::for_file_size(2 * 1024 * 1024 * 1024); // 2GB
182    /// assert_eq!(hint, BufferSizeHint::Huge);
183    /// ```
184    #[must_use]
185    pub fn for_file_size(size_bytes: u64) -> Self {
186        const MB: u64 = 1024 * 1024;
187        const GB: u64 = 1024 * MB;
188
189        if size_bytes < MB {
190            Self::Small
191        } else if size_bytes < 100 * MB {
192            Self::Medium
193        } else if size_bytes < GB {
194            Self::Large
195        } else {
196            Self::Huge
197        }
198    }
199
200    /// Get a buffer size hint for memory-constrained environments.
201    ///
202    /// Recommends a buffer size that won't exceed the given memory budget
203    /// when running `concurrent_parsers` simultaneously.
204    ///
205    /// # Examples
206    ///
207    /// ```rust
208    /// use hedl_stream::BufferSizeHint;
209    ///
210    /// // 10MB available, running 10 parsers concurrently
211    /// let hint = BufferSizeHint::for_memory_budget(10 * 1024 * 1024, 10);
212    /// // Should suggest Small (8KB) since 10 * 64KB = 640KB is reasonable
213    /// ```
214    #[must_use]
215    pub fn for_memory_budget(available_memory: usize, concurrent_parsers: usize) -> Self {
216        if concurrent_parsers == 0 {
217            return Self::Medium;
218        }
219
220        let budget_per_parser = available_memory / concurrent_parsers;
221
222        // Reserve 2x buffer size for other allocations (line buffers, etc.)
223        let effective_budget = budget_per_parser / 2;
224
225        if effective_budget >= Self::Huge.size() {
226            Self::Huge
227        } else if effective_budget >= Self::Large.size() {
228            Self::Large
229        } else if effective_budget >= Self::Medium.size() {
230            Self::Medium
231        } else {
232            Self::Small
233        }
234    }
235}
236
237#[cfg(test)]
238mod tests {
239    use super::*;
240
241    // ==================== BufferSizeHint::size tests ====================
242
243    #[test]
244    fn test_buffer_size_hint_sizes() {
245        assert_eq!(BufferSizeHint::Small.size(), 8 * 1024);
246        assert_eq!(BufferSizeHint::Medium.size(), 64 * 1024);
247        assert_eq!(BufferSizeHint::Large.size(), 256 * 1024);
248        assert_eq!(BufferSizeHint::Huge.size(), 1024 * 1024);
249    }
250
251    #[test]
252    fn test_buffer_size_hint_ordering() {
253        assert!(BufferSizeHint::Small.size() < BufferSizeHint::Medium.size());
254        assert!(BufferSizeHint::Medium.size() < BufferSizeHint::Large.size());
255        assert!(BufferSizeHint::Large.size() < BufferSizeHint::Huge.size());
256    }
257
258    // ==================== BufferSizeHint::for_file_size tests ====================
259
260    #[test]
261    fn test_for_file_size_tiny() {
262        let hint = BufferSizeHint::for_file_size(1024); // 1KB
263        assert_eq!(hint, BufferSizeHint::Small);
264    }
265
266    #[test]
267    fn test_for_file_size_small() {
268        let hint = BufferSizeHint::for_file_size(500 * 1024); // 500KB
269        assert_eq!(hint, BufferSizeHint::Small);
270    }
271
272    #[test]
273    fn test_for_file_size_boundary_1mb() {
274        let hint = BufferSizeHint::for_file_size(1024 * 1024 - 1); // Just under 1MB
275        assert_eq!(hint, BufferSizeHint::Small);
276
277        let hint = BufferSizeHint::for_file_size(1024 * 1024); // Exactly 1MB
278        assert_eq!(hint, BufferSizeHint::Medium);
279
280        let hint = BufferSizeHint::for_file_size(1024 * 1024 + 1); // Just over 1MB
281        assert_eq!(hint, BufferSizeHint::Medium);
282    }
283
284    #[test]
285    fn test_for_file_size_medium() {
286        let hint = BufferSizeHint::for_file_size(10 * 1024 * 1024); // 10MB
287        assert_eq!(hint, BufferSizeHint::Medium);
288
289        let hint = BufferSizeHint::for_file_size(50 * 1024 * 1024); // 50MB
290        assert_eq!(hint, BufferSizeHint::Medium);
291    }
292
293    #[test]
294    fn test_for_file_size_boundary_100mb() {
295        let hint = BufferSizeHint::for_file_size(100 * 1024 * 1024 - 1); // Just under 100MB
296        assert_eq!(hint, BufferSizeHint::Medium);
297
298        let hint = BufferSizeHint::for_file_size(100 * 1024 * 1024); // Exactly 100MB
299        assert_eq!(hint, BufferSizeHint::Large);
300
301        let hint = BufferSizeHint::for_file_size(100 * 1024 * 1024 + 1); // Just over 100MB
302        assert_eq!(hint, BufferSizeHint::Large);
303    }
304
305    #[test]
306    fn test_for_file_size_large() {
307        let hint = BufferSizeHint::for_file_size(500 * 1024 * 1024); // 500MB
308        assert_eq!(hint, BufferSizeHint::Large);
309    }
310
311    #[test]
312    fn test_for_file_size_boundary_1gb() {
313        let hint = BufferSizeHint::for_file_size(1024 * 1024 * 1024 - 1); // Just under 1GB
314        assert_eq!(hint, BufferSizeHint::Large);
315
316        let hint = BufferSizeHint::for_file_size(1024 * 1024 * 1024); // Exactly 1GB
317        assert_eq!(hint, BufferSizeHint::Huge);
318
319        let hint = BufferSizeHint::for_file_size(1024 * 1024 * 1024 + 1); // Just over 1GB
320        assert_eq!(hint, BufferSizeHint::Huge);
321    }
322
323    #[test]
324    fn test_for_file_size_huge() {
325        let hint = BufferSizeHint::for_file_size(10 * 1024 * 1024 * 1024); // 10GB
326        assert_eq!(hint, BufferSizeHint::Huge);
327    }
328
329    #[test]
330    fn test_for_file_size_zero() {
331        let hint = BufferSizeHint::for_file_size(0);
332        assert_eq!(hint, BufferSizeHint::Small);
333    }
334
335    // ==================== BufferSizeHint::for_memory_budget tests ====================
336
337    #[test]
338    fn test_for_memory_budget_abundant() {
339        // 100MB available, 1 parser -> should suggest Huge
340        let hint = BufferSizeHint::for_memory_budget(100 * 1024 * 1024, 1);
341        assert_eq!(hint, BufferSizeHint::Huge);
342    }
343
344    #[test]
345    fn test_for_memory_budget_comfortable() {
346        // 50MB available, 10 parsers -> 5MB per parser -> 2.5MB effective -> Huge
347        let hint = BufferSizeHint::for_memory_budget(50 * 1024 * 1024, 10);
348        assert_eq!(hint, BufferSizeHint::Huge);
349    }
350
351    #[test]
352    fn test_for_memory_budget_moderate() {
353        // 10MB available, 10 parsers -> 1MB per parser -> 512KB effective -> Large
354        let hint = BufferSizeHint::for_memory_budget(10 * 1024 * 1024, 10);
355        assert_eq!(hint, BufferSizeHint::Large);
356    }
357
358    #[test]
359    fn test_for_memory_budget_constrained() {
360        // 2MB available, 10 parsers -> 200KB per parser -> 100KB effective -> Medium
361        let hint = BufferSizeHint::for_memory_budget(2 * 1024 * 1024, 10);
362        assert_eq!(hint, BufferSizeHint::Medium);
363    }
364
365    #[test]
366    fn test_for_memory_budget_very_constrained() {
367        // 500KB available, 10 parsers -> 50KB per parser -> Small
368        let hint = BufferSizeHint::for_memory_budget(500 * 1024, 10);
369        assert_eq!(hint, BufferSizeHint::Small);
370    }
371
372    #[test]
373    fn test_for_memory_budget_zero_parsers() {
374        // Edge case: 0 parsers should default to Medium
375        let hint = BufferSizeHint::for_memory_budget(100 * 1024 * 1024, 0);
376        assert_eq!(hint, BufferSizeHint::Medium);
377    }
378
379    #[test]
380    fn test_for_memory_budget_one_parser() {
381        let hint = BufferSizeHint::for_memory_budget(10 * 1024 * 1024, 1);
382        assert_eq!(hint, BufferSizeHint::Huge);
383    }
384
385    #[test]
386    fn test_for_memory_budget_many_parsers() {
387        // 10MB available, 100 parsers -> 100KB per parser -> Small
388        let hint = BufferSizeHint::for_memory_budget(10 * 1024 * 1024, 100);
389        assert_eq!(hint, BufferSizeHint::Small);
390    }
391
392    #[test]
393    fn test_for_memory_budget_boundary_huge_to_large() {
394        // Boundary between Huge and Large
395        // Huge needs 1MB, with 2x overhead = 2MB per parser
396        let hint = BufferSizeHint::for_memory_budget(4 * 1024 * 1024, 1); // 4MB total
397        assert_eq!(hint, BufferSizeHint::Huge);
398
399        let hint = BufferSizeHint::for_memory_budget(4 * 1024 * 1024, 2); // 2MB per parser
400        assert_eq!(hint, BufferSizeHint::Huge);
401
402        let hint = BufferSizeHint::for_memory_budget(4 * 1024 * 1024, 3); // ~1.3MB per parser
403        assert_eq!(hint, BufferSizeHint::Large);
404    }
405
406    // ==================== Default and basic trait tests ====================
407
408    #[test]
409    fn test_buffer_size_hint_default() {
410        assert_eq!(BufferSizeHint::default(), BufferSizeHint::Medium);
411    }
412
413    #[test]
414    fn test_buffer_size_hint_debug() {
415        let small = BufferSizeHint::Small;
416        let debug = format!("{small:?}");
417        assert!(debug.contains("Small"));
418    }
419
420    #[test]
421    fn test_buffer_size_hint_clone() {
422        let hint1 = BufferSizeHint::Large;
423        let hint2 = hint1;
424        assert_eq!(hint1, hint2);
425    }
426
427    #[test]
428    fn test_buffer_size_hint_equality() {
429        assert_eq!(BufferSizeHint::Small, BufferSizeHint::Small);
430        assert_ne!(BufferSizeHint::Small, BufferSizeHint::Medium);
431    }
432
433    #[test]
434    fn test_buffer_size_hint_hash() {
435        use std::collections::HashMap;
436
437        let mut map = HashMap::new();
438        map.insert(BufferSizeHint::Small, "small");
439        map.insert(BufferSizeHint::Medium, "medium");
440
441        assert_eq!(map.get(&BufferSizeHint::Small), Some(&"small"));
442        assert_eq!(map.get(&BufferSizeHint::Medium), Some(&"medium"));
443    }
444
445    // ==================== const function tests ====================
446
447    #[test]
448    fn test_size_is_const() {
449        // Verify that size() can be used in const contexts
450        const SMALL_SIZE: usize = BufferSizeHint::Small.size();
451        assert_eq!(SMALL_SIZE, 8 * 1024);
452    }
453}