hedl_core/parser/options.rs
1// Dweve HEDL - Hierarchical Entity Data Language
2//
3// Copyright (c) 2025 Dweve IP B.V. and individual contributors.
4//
5// SPDX-License-Identifier: Apache-2.0
6//
7// Licensed under the Apache License, Version 2.0 (the "License");
8// you may not use this file except in compliance with the License.
9// You may obtain a copy of the License in the LICENSE file at the
10// root of this repository or at: http://www.apache.org/licenses/LICENSE-2.0
11//
12// Unless required by applicable law or agreed to in writing, software
13// distributed under the License is distributed on an "AS IS" BASIS,
14// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15// See the License for the specific language governing permissions and
16// limitations under the License.
17
18//! Parser configuration options.
19
20use crate::limits::Limits;
21use crate::reference::ReferenceMode;
22
23/// Parsing options for configuring HEDL document parsing behavior.
24///
25/// ParseOptions provides both direct field access and a fluent builder API
26/// for convenient configuration. All parsing functions accept ParseOptions
27/// to customize limits, security settings, and error handling behavior.
28///
29/// # Creating ParseOptions
30///
31/// ## Using the builder pattern (recommended)
32///
33/// ```text
34/// use hedl_core::ParseOptions;
35///
36/// // Typical strict parsing with custom depth limit
37/// let opts = ParseOptions::builder()
38/// .max_depth(100)
39/// .strict(true)
40/// .build();
41///
42/// // Lenient parsing for large datasets
43/// let opts = ParseOptions::builder()
44/// .max_array_length(50_000)
45/// .strict(false)
46/// .max_block_string_size(50 * 1024 * 1024)
47/// .build();
48///
49/// // Restrictive parsing for security
50/// let opts = ParseOptions::builder()
51/// .max_file_size(10 * 1024 * 1024)
52/// .max_line_length(64 * 1024)
53/// .max_depth(20)
54/// .max_array_length(1000)
55/// .strict(true)
56/// .build();
57/// ```
58///
59/// ## Using defaults
60///
61/// ```text
62/// use hedl_core::{ParseOptions, parse_with_limits};
63///
64/// // Default options: strict refs, normal limits
65/// let opts = ParseOptions::default();
66///
67/// // Parse with defaults
68/// let doc = parse_with_limits(input, opts)?;
69/// ```
70///
71/// ## Direct field access
72///
73/// ```text
74/// use hedl_core::{ParseOptions, Limits};
75///
76/// let mut opts = ParseOptions::default();
77/// opts.reference_mode = false;
78/// opts.limits.max_nodes = 5000;
79/// ```
80///
81/// # Security Considerations
82///
83/// ParseOptions includes multiple security limits to prevent denial-of-service attacks:
84///
85/// - `max_file_size`: Prevents loading extremely large files
86/// - `max_line_length`: Prevents regex DOS via extremely long lines
87/// - `max_indent_depth`: Prevents stack overflow via deep nesting
88/// - `max_nodes`: Prevents memory exhaustion via large matrix lists
89/// - `max_object_keys` and `max_total_keys`: Prevent memory exhaustion via many objects
90/// - `max_nest_depth`: Prevents stack overflow via deeply nested NEST hierarchies
91/// - `max_block_string_size`: Prevents memory exhaustion via large block strings
92///
93/// # Fields
94///
95/// - `limits`: Security limits for parser resources
96/// - `reference_mode`: Reference resolution mode (strict or lenient)
97#[derive(Debug, Clone)]
98pub struct ParseOptions {
99 /// Security limits.
100 pub limits: Limits,
101 /// Reference resolution mode (strict or lenient).
102 ///
103 /// Controls how unresolved references are handled:
104 /// - `ReferenceMode::Strict`: Errors on unresolved references (default)
105 /// - `ReferenceMode::Lenient`: Ignores unresolved references
106 ///
107 /// Note: Ambiguous references always error regardless of mode.
108 pub reference_mode: ReferenceMode,
109}
110
111impl Default for ParseOptions {
112 fn default() -> Self {
113 Self {
114 limits: Limits::default(),
115 reference_mode: ReferenceMode::Strict,
116 }
117 }
118}
119
120impl ParseOptions {
121 /// Create a new builder for ParseOptions.
122 ///
123 /// # Examples
124 ///
125 /// ```text
126 /// let opts = ParseOptions::builder()
127 /// .max_depth(100)
128 /// .strict(true)
129 /// .build();
130 /// ```
131 pub fn builder() -> ParseOptionsBuilder {
132 ParseOptionsBuilder::new()
133 }
134}
135
136/// Builder for ergonomic construction of ParseOptions.
137///
138/// Provides a fluent API for configuring parser options with sensible defaults.
139///
140/// # Examples
141///
142/// ```text
143/// // Using builder with custom limits
144/// let opts = ParseOptions::builder()
145/// .max_depth(200)
146/// .max_array_length(5000)
147/// .strict(false)
148/// .build();
149///
150/// // Using builder with defaults
151/// let opts = ParseOptions::builder().build();
152/// ```
153#[derive(Debug, Clone)]
154pub struct ParseOptionsBuilder {
155 limits: Limits,
156 reference_mode: ReferenceMode,
157}
158
159impl ParseOptionsBuilder {
160 /// Create a new builder with default options.
161 pub fn new() -> Self {
162 Self {
163 limits: Limits::default(),
164 reference_mode: ReferenceMode::Strict,
165 }
166 }
167
168 /// Set the maximum nesting depth (indent depth).
169 ///
170 /// # Parameters
171 ///
172 /// - `depth`: Maximum nesting level (default: 50)
173 ///
174 /// # Examples
175 ///
176 /// ```text
177 /// ParseOptions::builder().max_depth(100)
178 /// ```
179 pub fn max_depth(mut self, depth: usize) -> Self {
180 self.limits.max_indent_depth = depth;
181 self
182 }
183
184 /// Set the maximum array length (nodes in matrix lists).
185 ///
186 /// # Parameters
187 ///
188 /// - `length`: Maximum number of nodes (default: 10M)
189 ///
190 /// # Examples
191 ///
192 /// ```text
193 /// ParseOptions::builder().max_array_length(5000)
194 /// ```
195 pub fn max_array_length(mut self, length: usize) -> Self {
196 self.limits.max_nodes = length;
197 self
198 }
199 /// Set reference resolution mode.
200 ///
201 /// # Arguments
202 /// - `mode`: The reference resolution mode to use
203 ///
204 /// # Examples
205 ///
206 /// ```text
207 /// use hedl_core::{ParseOptionsBuilder, ReferenceMode};
208 ///
209 /// let opts = ParseOptionsBuilder::new()
210 /// .reference_mode(ReferenceMode::Lenient)
211 /// .build();
212 /// ```
213 pub fn reference_mode(mut self, mode: ReferenceMode) -> Self {
214 self.reference_mode = mode;
215 self
216 }
217
218 /// Enable strict reference resolution (error on unresolved).
219 ///
220 /// Shorthand for `.reference_mode(ReferenceMode::Strict)`.
221 ///
222 /// # Examples
223 ///
224 /// ```text
225 /// let opts = ParseOptions::builder()
226 /// .strict_refs()
227 /// .build();
228 /// ```
229 pub fn strict_refs(mut self) -> Self {
230 self.reference_mode = ReferenceMode::Strict;
231 self
232 }
233
234 /// Enable lenient reference resolution (ignore unresolved).
235 ///
236 /// Shorthand for `.reference_mode(ReferenceMode::Lenient)`.
237 ///
238 /// # Examples
239 ///
240 /// ```text
241 /// let opts = ParseOptions::builder()
242 /// .lenient_refs()
243 /// .build();
244 /// ```
245 pub fn lenient_refs(mut self) -> Self {
246 self.reference_mode = ReferenceMode::Lenient;
247 self
248 }
249
250 /// Set strict reference resolution mode (legacy compatibility).
251 ///
252 pub fn strict(mut self, strict: bool) -> Self {
253 self.reference_mode = ReferenceMode::from(strict);
254 self
255 }
256
257 /// Set the maximum file size in bytes.
258 ///
259 /// # Parameters
260 ///
261 /// - `size`: Maximum file size in bytes (default: 1GB)
262 ///
263 /// # Examples
264 ///
265 /// ```text
266 /// ParseOptions::builder().max_file_size(500 * 1024 * 1024)
267 /// ```
268 pub fn max_file_size(mut self, size: usize) -> Self {
269 self.limits.max_file_size = size;
270 self
271 }
272
273 /// Set the maximum line length in bytes.
274 ///
275 /// # Parameters
276 ///
277 /// - `length`: Maximum line length in bytes (default: 1MB)
278 ///
279 /// # Examples
280 ///
281 /// ```text
282 /// ParseOptions::builder().max_line_length(512 * 1024)
283 /// ```
284 pub fn max_line_length(mut self, length: usize) -> Self {
285 self.limits.max_line_length = length;
286 self
287 }
288
289 /// Set the maximum number of aliases.
290 ///
291 /// # Parameters
292 ///
293 /// - `count`: Maximum number of aliases (default: 10k)
294 ///
295 /// # Examples
296 ///
297 /// ```text
298 /// ParseOptions::builder().max_aliases(5000)
299 /// ```
300 pub fn max_aliases(mut self, count: usize) -> Self {
301 self.limits.max_aliases = count;
302 self
303 }
304
305 /// Set the maximum columns per schema.
306 ///
307 /// # Parameters
308 ///
309 /// - `count`: Maximum columns (default: 100)
310 ///
311 /// # Examples
312 ///
313 /// ```text
314 /// ParseOptions::builder().max_columns(50)
315 /// ```
316 pub fn max_columns(mut self, count: usize) -> Self {
317 self.limits.max_columns = count;
318 self
319 }
320
321 /// Set the maximum NEST hierarchy depth.
322 ///
323 /// # Parameters
324 ///
325 /// - `depth`: Maximum nesting depth (default: 100)
326 ///
327 /// # Examples
328 ///
329 /// ```text
330 /// ParseOptions::builder().max_nest_depth(50)
331 /// ```
332 pub fn max_nest_depth(mut self, depth: usize) -> Self {
333 self.limits.max_nest_depth = depth;
334 self
335 }
336
337 /// Set the maximum block string size in bytes.
338 ///
339 /// # Parameters
340 ///
341 /// - `size`: Maximum block string size (default: 10MB)
342 ///
343 /// # Examples
344 ///
345 /// ```text
346 /// ParseOptions::builder().max_block_string_size(5 * 1024 * 1024)
347 /// ```
348 pub fn max_block_string_size(mut self, size: usize) -> Self {
349 self.limits.max_block_string_size = size;
350 self
351 }
352
353 /// Set the maximum keys per object.
354 ///
355 /// # Parameters
356 ///
357 /// - `count`: Maximum keys per object (default: 10k)
358 ///
359 /// # Examples
360 ///
361 /// ```text
362 /// ParseOptions::builder().max_object_keys(5000)
363 /// ```
364 pub fn max_object_keys(mut self, count: usize) -> Self {
365 self.limits.max_object_keys = count;
366 self
367 }
368
369 /// Set the maximum total keys across all objects.
370 ///
371 /// This provides defense-in-depth against memory exhaustion attacks.
372 ///
373 /// # Parameters
374 ///
375 /// - `count`: Maximum total keys (default: 10M)
376 ///
377 /// # Examples
378 ///
379 /// ```text
380 /// ParseOptions::builder().max_total_keys(5_000_000)
381 /// ```
382 pub fn max_total_keys(mut self, count: usize) -> Self {
383 self.limits.max_total_keys = count;
384 self
385 }
386
387 /// Build the ParseOptions.
388 pub fn build(self) -> ParseOptions {
389 ParseOptions {
390 limits: self.limits,
391 reference_mode: self.reference_mode,
392 }
393 }
394}
395
396impl Default for ParseOptionsBuilder {
397 fn default() -> Self {
398 Self::new()
399 }
400}