hedl_stream/parser/config.rs
1// Dweve HEDL - Hierarchical Entity Data Language
2//
3// Copyright (c) 2025 Dweve IP B.V. and individual contributors.
4//
5// SPDX-License-Identifier: Apache-2.0
6//
7// Licensed under the Apache License, Version 2.0 (the "License");
8// you may not use this file except in compliance with the License.
9// You may obtain a copy of the License in the LICENSE file at the
10// root of this repository or at: http://www.apache.org/licenses/LICENSE-2.0
11//
12// Unless required by applicable law or agreed to in writing, software
13// distributed under the License is distributed on an "AS IS" BASIS,
14// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15// See the License for the specific language governing permissions and
16// limitations under the License.
17
18//! Configuration for streaming parser
19
20use crate::buffer_config::BufferSizeHint;
21use crate::buffer_pool::MemoryLimits;
22use std::time::Duration;
23
24/// Configuration options for the streaming parser.
25///
26/// Controls memory limits, buffer sizes, timeout behavior, and buffer pooling.
27#[derive(Debug, Clone)]
28pub struct StreamingParserConfig {
29 /// Maximum line length in bytes.
30 ///
31 /// Lines exceeding this length will cause a parsing error. This protects against
32 /// malformed input with extremely long lines that could exhaust memory.
33 ///
34 /// Default: 1,000,000 bytes (1MB)
35 pub max_line_length: usize,
36
37 /// Maximum indentation depth.
38 ///
39 /// Indentation levels exceeding this depth will cause a parsing error. This
40 /// protects against deeply nested structures that could cause stack overflow
41 /// or performance issues.
42 ///
43 /// Default: 100 levels
44 pub max_indent_depth: usize,
45
46 /// Buffer size for reading input.
47 ///
48 /// Larger buffers can improve performance for large files by reducing the
49 /// number of system calls, but use more memory.
50 ///
51 /// Default: 64KB
52 pub buffer_size: usize,
53
54 /// Timeout for parsing operations.
55 ///
56 /// If set, the parser will return a `StreamError::Timeout` if parsing takes
57 /// longer than the specified duration. This protects against infinite loops
58 /// from malicious or malformed input.
59 ///
60 /// Set to `None` to disable timeout checking (default for trusted input).
61 ///
62 /// Default: None (no timeout)
63 ///
64 /// # Performance Note
65 ///
66 /// Timeout checking is performed periodically (every 100 operations) to minimize
67 /// overhead. For very fast parsing, the actual timeout may slightly exceed the
68 /// configured limit.
69 pub timeout: Option<Duration>,
70
71 /// Memory limits for buffer management.
72 ///
73 /// Controls maximum buffer sizes, line lengths, and pool configuration.
74 /// See [`MemoryLimits`] for preset configurations.
75 ///
76 /// Default: `MemoryLimits::default()`
77 pub memory_limits: MemoryLimits,
78
79 /// Enable buffer pooling for high-throughput scenarios.
80 ///
81 /// When enabled, the parser reuses string and value buffers across operations,
82 /// reducing allocation overhead. Beneficial for processing many files in sequence
83 /// or high-throughput server workloads.
84 ///
85 /// Default: false (for backward compatibility)
86 pub enable_pooling: bool,
87}
88
89impl Default for StreamingParserConfig {
90 fn default() -> Self {
91 Self {
92 max_line_length: 1_000_000,
93 max_indent_depth: 100,
94 buffer_size: 64 * 1024,
95 timeout: None,
96 memory_limits: MemoryLimits::default(),
97 enable_pooling: false,
98 }
99 }
100}
101
102impl StreamingParserConfig {
103 /// Config with no limits (use for trusted input only).
104 ///
105 /// # Security Warning
106 ///
107 /// This configuration removes the line length limit, which can expose
108 /// your application to denial-of-service attacks if processing untrusted input.
109 /// Only use this for trusted, controlled environments.
110 ///
111 /// # Examples
112 ///
113 /// ```rust
114 /// use hedl_stream::StreamingParserConfig;
115 ///
116 /// // For trusted input where you want to allow arbitrarily long lines
117 /// let config = StreamingParserConfig::unlimited();
118 /// ```
119 #[must_use]
120 pub fn unlimited() -> Self {
121 Self {
122 max_line_length: usize::MAX,
123 ..Default::default()
124 }
125 }
126
127 /// Configure buffer size using a size hint.
128 ///
129 /// # Examples
130 ///
131 /// ```rust
132 /// use hedl_stream::{StreamingParserConfig, BufferSizeHint};
133 ///
134 /// let config = StreamingParserConfig::default()
135 /// .with_buffer_hint(BufferSizeHint::Large);
136 /// assert_eq!(config.buffer_size, 256 * 1024);
137 /// ```
138 #[must_use]
139 pub fn with_buffer_hint(mut self, hint: BufferSizeHint) -> Self {
140 self.buffer_size = hint.size();
141 self
142 }
143
144 /// Enable or disable buffer pooling.
145 ///
146 /// # Examples
147 ///
148 /// ```rust
149 /// use hedl_stream::StreamingParserConfig;
150 ///
151 /// let config = StreamingParserConfig::default()
152 /// .with_buffer_pooling(true);
153 /// assert_eq!(config.enable_pooling, true);
154 /// ```
155 #[must_use]
156 pub fn with_buffer_pooling(mut self, enabled: bool) -> Self {
157 self.enable_pooling = enabled;
158 self
159 }
160
161 /// Configure memory limits.
162 ///
163 /// # Examples
164 ///
165 /// ```rust
166 /// use hedl_stream::{StreamingParserConfig, MemoryLimits};
167 ///
168 /// let config = StreamingParserConfig::default()
169 /// .with_memory_limits(MemoryLimits::high_throughput());
170 /// ```
171 #[must_use]
172 pub fn with_memory_limits(mut self, limits: MemoryLimits) -> Self {
173 self.memory_limits = limits;
174 // Sync max_line_length with memory limits
175 self.max_line_length = limits.max_line_length;
176 self
177 }
178
179 /// Configure buffer pool size (when pooling is enabled).
180 ///
181 /// # Examples
182 ///
183 /// ```rust
184 /// use hedl_stream::StreamingParserConfig;
185 ///
186 /// let config = StreamingParserConfig::default()
187 /// .with_buffer_pooling(true)
188 /// .with_pool_size(50);
189 /// assert_eq!(config.memory_limits.max_pool_size, 50);
190 /// ```
191 #[must_use]
192 pub fn with_pool_size(mut self, size: usize) -> Self {
193 self.memory_limits.max_pool_size = size;
194 self
195 }
196}