Skip to main content

hedl_core/parser/
options.rs

1// Dweve HEDL - Hierarchical Entity Data Language
2//
3// Copyright (c) 2025 Dweve IP B.V. and individual contributors.
4//
5// SPDX-License-Identifier: Apache-2.0
6//
7// Licensed under the Apache License, Version 2.0 (the "License");
8// you may not use this file except in compliance with the License.
9// You may obtain a copy of the License in the LICENSE file at the
10// root of this repository or at: http://www.apache.org/licenses/LICENSE-2.0
11//
12// Unless required by applicable law or agreed to in writing, software
13// distributed under the License is distributed on an "AS IS" BASIS,
14// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15// See the License for the specific language governing permissions and
16// limitations under the License.
17
18//! Parser configuration options.
19
20use crate::limits::Limits;
21use crate::reference::ReferenceMode;
22
23/// Parsing options for configuring HEDL document parsing behavior.
24///
25/// ParseOptions provides both direct field access and a fluent builder API
26/// for convenient configuration. All parsing functions accept ParseOptions
27/// to customize limits, security settings, and error handling behavior.
28///
29/// # Creating ParseOptions
30///
31/// ## Using the builder pattern (recommended)
32///
33/// ```text
34/// use hedl_core::ParseOptions;
35///
36/// // Typical strict parsing with custom depth limit
37/// let opts = ParseOptions::builder()
38///     .max_depth(100)
39///     .strict(true)
40///     .build();
41///
42/// // Lenient parsing for large datasets
43/// let opts = ParseOptions::builder()
44///     .max_array_length(50_000)
45///     .strict(false)
46///     .max_block_string_size(50 * 1024 * 1024)
47///     .build();
48///
49/// // Restrictive parsing for security
50/// let opts = ParseOptions::builder()
51///     .max_file_size(10 * 1024 * 1024)
52///     .max_line_length(64 * 1024)
53///     .max_depth(20)
54///     .max_array_length(1000)
55///     .strict(true)
56///     .build();
57/// ```
58///
59/// ## Using defaults
60///
61/// ```text
62/// use hedl_core::{ParseOptions, parse_with_limits};
63///
64/// // Default options: strict refs, normal limits
65/// let opts = ParseOptions::default();
66///
67/// // Parse with defaults
68/// let doc = parse_with_limits(input, opts)?;
69/// ```
70///
71/// ## Direct field access
72///
73/// ```text
74/// use hedl_core::{ParseOptions, Limits};
75///
76/// let mut opts = ParseOptions::default();
77/// opts.reference_mode = false;
78/// opts.limits.max_nodes = 5000;
79/// ```
80///
81/// # Security Considerations
82///
83/// ParseOptions includes multiple security limits to prevent denial-of-service attacks:
84///
85/// - `max_file_size`: Prevents loading extremely large files
86/// - `max_line_length`: Prevents regex DOS via extremely long lines
87/// - `max_indent_depth`: Prevents stack overflow via deep nesting
88/// - `max_nodes`: Prevents memory exhaustion via large matrix lists
89/// - `max_object_keys` and `max_total_keys`: Prevent memory exhaustion via many objects
90/// - `max_nest_depth`: Prevents stack overflow via deeply nested NEST hierarchies
91/// - `max_block_string_size`: Prevents memory exhaustion via large block strings
92///
93/// # Fields
94///
95/// - `limits`: Security limits for parser resources
96/// - `reference_mode`: Reference resolution mode (strict or lenient)
97#[derive(Debug, Clone)]
98pub struct ParseOptions {
99    /// Security limits.
100    pub limits: Limits,
101    /// Reference resolution mode (strict or lenient).
102    ///
103    /// Controls how unresolved references are handled:
104    /// - `ReferenceMode::Strict`: Errors on unresolved references (default)
105    /// - `ReferenceMode::Lenient`: Ignores unresolved references
106    ///
107    /// Note: Ambiguous references always error regardless of mode.
108    pub reference_mode: ReferenceMode,
109}
110
111impl Default for ParseOptions {
112    fn default() -> Self {
113        Self {
114            limits: Limits::default(),
115            reference_mode: ReferenceMode::Strict,
116        }
117    }
118}
119
120impl ParseOptions {
121    /// Create a new builder for ParseOptions.
122    ///
123    /// # Examples
124    ///
125    /// ```text
126    /// let opts = ParseOptions::builder()
127    ///     .max_depth(100)
128    ///     .strict(true)
129    ///     .build();
130    /// ```
131    pub fn builder() -> ParseOptionsBuilder {
132        ParseOptionsBuilder::new()
133    }
134}
135
136/// Builder for ergonomic construction of ParseOptions.
137///
138/// Provides a fluent API for configuring parser options with sensible defaults.
139///
140/// # Examples
141///
142/// ```text
143/// // Using builder with custom limits
144/// let opts = ParseOptions::builder()
145///     .max_depth(200)
146///     .max_array_length(5000)
147///     .strict(false)
148///     .build();
149///
150/// // Using builder with defaults
151/// let opts = ParseOptions::builder().build();
152/// ```
153#[derive(Debug, Clone)]
154pub struct ParseOptionsBuilder {
155    limits: Limits,
156    reference_mode: ReferenceMode,
157}
158
159impl ParseOptionsBuilder {
160    /// Create a new builder with default options.
161    pub fn new() -> Self {
162        Self {
163            limits: Limits::default(),
164            reference_mode: ReferenceMode::Strict,
165        }
166    }
167
168    /// Set the maximum nesting depth (indent depth).
169    ///
170    /// # Parameters
171    ///
172    /// - `depth`: Maximum nesting level (default: 50)
173    ///
174    /// # Examples
175    ///
176    /// ```text
177    /// ParseOptions::builder().max_depth(100)
178    /// ```
179    pub fn max_depth(mut self, depth: usize) -> Self {
180        self.limits.max_indent_depth = depth;
181        self
182    }
183
184    /// Set the maximum array length (nodes in matrix lists).
185    ///
186    /// # Parameters
187    ///
188    /// - `length`: Maximum number of nodes (default: 10M)
189    ///
190    /// # Examples
191    ///
192    /// ```text
193    /// ParseOptions::builder().max_array_length(5000)
194    /// ```
195    pub fn max_array_length(mut self, length: usize) -> Self {
196        self.limits.max_nodes = length;
197        self
198    }
199    /// Set reference resolution mode.
200    ///
201    /// # Arguments
202    /// - `mode`: The reference resolution mode to use
203    ///
204    /// # Examples
205    ///
206    /// ```text
207    /// use hedl_core::{ParseOptionsBuilder, ReferenceMode};
208    ///
209    /// let opts = ParseOptionsBuilder::new()
210    ///     .reference_mode(ReferenceMode::Lenient)
211    ///     .build();
212    /// ```
213    pub fn reference_mode(mut self, mode: ReferenceMode) -> Self {
214        self.reference_mode = mode;
215        self
216    }
217
218    /// Enable strict reference resolution (error on unresolved).
219    ///
220    /// Shorthand for `.reference_mode(ReferenceMode::Strict)`.
221    ///
222    /// # Examples
223    ///
224    /// ```text
225    /// let opts = ParseOptions::builder()
226    ///     .strict_refs()
227    ///     .build();
228    /// ```
229    pub fn strict_refs(mut self) -> Self {
230        self.reference_mode = ReferenceMode::Strict;
231        self
232    }
233
234    /// Enable lenient reference resolution (ignore unresolved).
235    ///
236    /// Shorthand for `.reference_mode(ReferenceMode::Lenient)`.
237    ///
238    /// # Examples
239    ///
240    /// ```text
241    /// let opts = ParseOptions::builder()
242    ///     .lenient_refs()
243    ///     .build();
244    /// ```
245    pub fn lenient_refs(mut self) -> Self {
246        self.reference_mode = ReferenceMode::Lenient;
247        self
248    }
249
250    /// Set strict reference resolution mode (legacy compatibility).
251    ///
252    pub fn strict(mut self, strict: bool) -> Self {
253        self.reference_mode = ReferenceMode::from(strict);
254        self
255    }
256
257    /// Set the maximum file size in bytes.
258    ///
259    /// # Parameters
260    ///
261    /// - `size`: Maximum file size in bytes (default: 1GB)
262    ///
263    /// # Examples
264    ///
265    /// ```text
266    /// ParseOptions::builder().max_file_size(500 * 1024 * 1024)
267    /// ```
268    pub fn max_file_size(mut self, size: usize) -> Self {
269        self.limits.max_file_size = size;
270        self
271    }
272
273    /// Set the maximum line length in bytes.
274    ///
275    /// # Parameters
276    ///
277    /// - `length`: Maximum line length in bytes (default: 1MB)
278    ///
279    /// # Examples
280    ///
281    /// ```text
282    /// ParseOptions::builder().max_line_length(512 * 1024)
283    /// ```
284    pub fn max_line_length(mut self, length: usize) -> Self {
285        self.limits.max_line_length = length;
286        self
287    }
288
289    /// Set the maximum number of aliases.
290    ///
291    /// # Parameters
292    ///
293    /// - `count`: Maximum number of aliases (default: 10k)
294    ///
295    /// # Examples
296    ///
297    /// ```text
298    /// ParseOptions::builder().max_aliases(5000)
299    /// ```
300    pub fn max_aliases(mut self, count: usize) -> Self {
301        self.limits.max_aliases = count;
302        self
303    }
304
305    /// Set the maximum columns per schema.
306    ///
307    /// # Parameters
308    ///
309    /// - `count`: Maximum columns (default: 100)
310    ///
311    /// # Examples
312    ///
313    /// ```text
314    /// ParseOptions::builder().max_columns(50)
315    /// ```
316    pub fn max_columns(mut self, count: usize) -> Self {
317        self.limits.max_columns = count;
318        self
319    }
320
321    /// Set the maximum NEST hierarchy depth.
322    ///
323    /// # Parameters
324    ///
325    /// - `depth`: Maximum nesting depth (default: 100)
326    ///
327    /// # Examples
328    ///
329    /// ```text
330    /// ParseOptions::builder().max_nest_depth(50)
331    /// ```
332    pub fn max_nest_depth(mut self, depth: usize) -> Self {
333        self.limits.max_nest_depth = depth;
334        self
335    }
336
337    /// Set the maximum block string size in bytes.
338    ///
339    /// # Parameters
340    ///
341    /// - `size`: Maximum block string size (default: 10MB)
342    ///
343    /// # Examples
344    ///
345    /// ```text
346    /// ParseOptions::builder().max_block_string_size(5 * 1024 * 1024)
347    /// ```
348    pub fn max_block_string_size(mut self, size: usize) -> Self {
349        self.limits.max_block_string_size = size;
350        self
351    }
352
353    /// Set the maximum keys per object.
354    ///
355    /// # Parameters
356    ///
357    /// - `count`: Maximum keys per object (default: 10k)
358    ///
359    /// # Examples
360    ///
361    /// ```text
362    /// ParseOptions::builder().max_object_keys(5000)
363    /// ```
364    pub fn max_object_keys(mut self, count: usize) -> Self {
365        self.limits.max_object_keys = count;
366        self
367    }
368
369    /// Set the maximum total keys across all objects.
370    ///
371    /// This provides defense-in-depth against memory exhaustion attacks.
372    ///
373    /// # Parameters
374    ///
375    /// - `count`: Maximum total keys (default: 10M)
376    ///
377    /// # Examples
378    ///
379    /// ```text
380    /// ParseOptions::builder().max_total_keys(5_000_000)
381    /// ```
382    pub fn max_total_keys(mut self, count: usize) -> Self {
383        self.limits.max_total_keys = count;
384        self
385    }
386
387    /// Build the ParseOptions.
388    pub fn build(self) -> ParseOptions {
389        ParseOptions {
390            limits: self.limits,
391            reference_mode: self.reference_mode,
392        }
393    }
394}
395
396impl Default for ParseOptionsBuilder {
397    fn default() -> Self {
398        Self::new()
399    }
400}