hedl_core/
parser.rs

1// Dweve HEDL - Hierarchical Entity Data Language
2//
3// Copyright (c) 2025 Dweve IP B.V. and individual contributors.
4//
5// SPDX-License-Identifier: Apache-2.0
6//
7// Licensed under the Apache License, Version 2.0 (the "License");
8// you may not use this file except in compliance with the License.
9// You may obtain a copy of the License in the LICENSE file at the
10// root of this repository or at: http://www.apache.org/licenses/LICENSE-2.0
11//
12// Unless required by applicable law or agreed to in writing, software
13// distributed under the License is distributed on an "AS IS" BASIS,
14// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15// See the License for the specific language governing permissions and
16// limitations under the License.
17
18//! Main parser for HEDL documents.
19//!
20//! # Security Limits
21//!
22//! The parser enforces several security limits to prevent denial-of-service attacks:
23//!
24//! - `max_file_size`: Maximum input file size (default: 1GB)
25//! - `max_line_length`: Maximum line length (default: 1MB)
26//! - `max_indent_depth`: Maximum nesting depth for objects (default: 50)
27//! - `max_nodes`: Maximum number of matrix list nodes (default: 10M)
28//! - `max_aliases`: Maximum number of aliases (default: 10k)
29//! - `max_columns`: Maximum columns per schema (default: 100)
30//! - `max_nest_depth`: Maximum NEST hierarchy depth (default: 100)
31//! - `max_block_string_size`: Maximum block string size (default: 10MB)
32//! - `max_object_keys`: Maximum keys per object (default: 10k)
33//! - **`max_total_keys`**: Maximum total keys across all objects (default: 10M)
34//!
35//! ## max_total_keys: Defense in Depth
36//!
37//! The `max_total_keys` limit is a critical security feature that prevents
38//! memory exhaustion attacks via cumulative key allocation. Without this limit,
39//! an attacker could create many small objects, each under `max_object_keys`,
40//! but collectively consuming excessive memory.
41//!
42//! ### Attack Scenario (Without max_total_keys)
43//!
44//! ```text
45//! # Attacker creates 100,000 objects with 10 keys each
46//! # Each object is "valid" (under max_object_keys = 10,000)
47//! # But total memory usage is excessive: 1,000,000 keys!
48//! object0:
49//!   key0: val0
50//!   key1: val1
51//!   ...
52//!   key9: val9
53//! object1:
54//!   key0: val0
55//!   ...
56//! # ... 99,998 more objects
57//! ```
58//!
59//! ### Defense (With max_total_keys = 10,000,000)
60//!
61//! The parser tracks cumulative keys across all objects and rejects documents
62//! that exceed the limit, preventing this attack vector while allowing legitimate
63//! large documents. The 10M default accommodates most real-world datasets while
64//! still providing protection. For extremely large datasets, this limit can be
65//! increased via `ParseOptions`.
66
67use crate::block_string::{try_start_block_string, BlockStringResult, BlockStringState};
68use crate::document::{Document, Item, MatrixList, Node};
69use crate::error::{HedlError, HedlResult};
70use crate::header::parse_header;
71use crate::inference::{infer_quoted_value, infer_value, InferenceContext};
72use crate::lex::row::parse_csv_row;
73use crate::lex::{calculate_indent, is_valid_key_token, is_valid_type_name, strip_comment};
74use crate::limits::{Limits, TimeoutCheckExt, TimeoutContext};
75use crate::preprocess::{is_blank_line, is_comment_line, preprocess};
76use crate::reference::{register_node, resolve_references, ReferenceMode, TypeRegistry};
77use crate::value::Value;
78use std::collections::BTreeMap;
79
80/// Parsing options for configuring HEDL document parsing behavior.
81///
82/// ParseOptions provides both direct field access and a fluent builder API
83/// for convenient configuration. All parsing functions accept ParseOptions
84/// to customize limits, security settings, and error handling behavior.
85///
86/// # Creating ParseOptions
87///
88/// ## Using the builder pattern (recommended)
89///
90/// ```text
91/// use hedl_core::ParseOptions;
92///
93/// // Typical strict parsing with custom depth limit
94/// let opts = ParseOptions::builder()
95///     .max_depth(100)
96///     .strict(true)
97///     .build();
98///
99/// // Lenient parsing for large datasets
100/// let opts = ParseOptions::builder()
101///     .max_array_length(50_000)
102///     .strict(false)
103///     .max_block_string_size(50 * 1024 * 1024)
104///     .build();
105///
106/// // Restrictive parsing for security
107/// let opts = ParseOptions::builder()
108///     .max_file_size(10 * 1024 * 1024)
109///     .max_line_length(64 * 1024)
110///     .max_depth(20)
111///     .max_array_length(1000)
112///     .strict(true)
113///     .build();
114/// ```
115///
116/// ## Using defaults
117///
118/// ```text
119/// use hedl_core::{ParseOptions, parse_with_limits};
120///
121/// // Default options: strict refs, normal limits
122/// let opts = ParseOptions::default();
123///
124/// // Parse with defaults
125/// let doc = parse_with_limits(input, opts)?;
126/// ```
127///
128/// ## Direct field access
129///
130/// ```text
131/// use hedl_core::{ParseOptions, Limits};
132///
133/// let mut opts = ParseOptions::default();
134/// opts.reference_mode = false;
135/// opts.limits.max_nodes = 5000;
136/// ```
137///
138/// # Security Considerations
139///
140/// ParseOptions includes multiple security limits to prevent denial-of-service attacks:
141///
142/// - `max_file_size`: Prevents loading extremely large files
143/// - `max_line_length`: Prevents regex DOS via extremely long lines
144/// - `max_indent_depth`: Prevents stack overflow via deep nesting
145/// - `max_nodes`: Prevents memory exhaustion via large matrix lists
146/// - `max_object_keys` and `max_total_keys`: Prevent memory exhaustion via many objects
147/// - `max_nest_depth`: Prevents stack overflow via deeply nested NEST hierarchies
148/// - `max_block_string_size`: Prevents memory exhaustion via large block strings
149///
150/// # Fields
151///
152/// - `limits`: Security limits for parser resources
153/// - `reference_mode`: Reference resolution mode (strict or lenient)
154#[derive(Debug, Clone)]
155pub struct ParseOptions {
156    /// Security limits.
157    pub limits: Limits,
158    /// Reference resolution mode (strict or lenient).
159    ///
160    /// Controls how unresolved references are handled:
161    /// - `ReferenceMode::Strict`: Errors on unresolved references (default)
162    /// - `ReferenceMode::Lenient`: Ignores unresolved references
163    ///
164    /// Note: Ambiguous references always error regardless of mode.
165    pub reference_mode: ReferenceMode,
166}
167
168impl Default for ParseOptions {
169    fn default() -> Self {
170        Self {
171            limits: Limits::default(),
172            reference_mode: ReferenceMode::Strict,
173        }
174    }
175}
176
177impl ParseOptions {
178    /// Create a new builder for ParseOptions.
179    ///
180    /// # Examples
181    ///
182    /// ```text
183    /// let opts = ParseOptions::builder()
184    ///     .max_depth(100)
185    ///     .strict(true)
186    ///     .build();
187    /// ```
188    pub fn builder() -> ParseOptionsBuilder {
189        ParseOptionsBuilder::new()
190    }
191}
192
193/// Builder for ergonomic construction of ParseOptions.
194///
195/// Provides a fluent API for configuring parser options with sensible defaults.
196///
197/// # Examples
198///
199/// ```text
200/// // Using builder with custom limits
201/// let opts = ParseOptions::builder()
202///     .max_depth(200)
203///     .max_array_length(5000)
204///     .strict(false)
205///     .build();
206///
207/// // Using builder with defaults
208/// let opts = ParseOptions::builder().build();
209/// ```
210#[derive(Debug, Clone)]
211pub struct ParseOptionsBuilder {
212    limits: Limits,
213    reference_mode: ReferenceMode,
214}
215
216impl ParseOptionsBuilder {
217    /// Create a new builder with default options.
218    pub fn new() -> Self {
219        Self {
220            limits: Limits::default(),
221            reference_mode: ReferenceMode::Strict,
222        }
223    }
224
225    /// Set the maximum nesting depth (indent depth).
226    ///
227    /// # Parameters
228    ///
229    /// - `depth`: Maximum nesting level (default: 50)
230    ///
231    /// # Examples
232    ///
233    /// ```text
234    /// ParseOptions::builder().max_depth(100)
235    /// ```
236    pub fn max_depth(mut self, depth: usize) -> Self {
237        self.limits.max_indent_depth = depth;
238        self
239    }
240
241    /// Set the maximum array length (nodes in matrix lists).
242    ///
243    /// # Parameters
244    ///
245    /// - `length`: Maximum number of nodes (default: 10M)
246    ///
247    /// # Examples
248    ///
249    /// ```text
250    /// ParseOptions::builder().max_array_length(5000)
251    /// ```
252    pub fn max_array_length(mut self, length: usize) -> Self {
253        self.limits.max_nodes = length;
254        self
255    }
256    /// Set reference resolution mode.
257    ///
258    /// # Arguments
259    /// - `mode`: The reference resolution mode to use
260    ///
261    /// # Examples
262    ///
263    /// ```text
264    /// use hedl_core::{ParseOptionsBuilder, ReferenceMode};
265    ///
266    /// let opts = ParseOptionsBuilder::new()
267    ///     .reference_mode(ReferenceMode::Lenient)
268    ///     .build();
269    /// ```
270    pub fn reference_mode(mut self, mode: ReferenceMode) -> Self {
271        self.reference_mode = mode;
272        self
273    }
274
275    /// Enable strict reference resolution (error on unresolved).
276    ///
277    /// Shorthand for `.reference_mode(ReferenceMode::Strict)`.
278    ///
279    /// # Examples
280    ///
281    /// ```text
282    /// let opts = ParseOptions::builder()
283    ///     .strict_refs()
284    ///     .build();
285    /// ```
286    pub fn strict_refs(mut self) -> Self {
287        self.reference_mode = ReferenceMode::Strict;
288        self
289    }
290
291    /// Enable lenient reference resolution (ignore unresolved).
292    ///
293    /// Shorthand for `.reference_mode(ReferenceMode::Lenient)`.
294    ///
295    /// # Examples
296    ///
297    /// ```text
298    /// let opts = ParseOptions::builder()
299    ///     .lenient_refs()
300    ///     .build();
301    /// ```
302    pub fn lenient_refs(mut self) -> Self {
303        self.reference_mode = ReferenceMode::Lenient;
304        self
305    }
306
307    /// Set strict reference resolution mode (legacy compatibility).
308    ///
309    pub fn strict(mut self, strict: bool) -> Self {
310        self.reference_mode = ReferenceMode::from(strict);
311        self
312    }
313
314    /// Set the maximum file size in bytes.
315    ///
316    /// # Parameters
317    ///
318    /// - `size`: Maximum file size in bytes (default: 1GB)
319    ///
320    /// # Examples
321    ///
322    /// ```text
323    /// ParseOptions::builder().max_file_size(500 * 1024 * 1024)
324    /// ```
325    pub fn max_file_size(mut self, size: usize) -> Self {
326        self.limits.max_file_size = size;
327        self
328    }
329
330    /// Set the maximum line length in bytes.
331    ///
332    /// # Parameters
333    ///
334    /// - `length`: Maximum line length in bytes (default: 1MB)
335    ///
336    /// # Examples
337    ///
338    /// ```text
339    /// ParseOptions::builder().max_line_length(512 * 1024)
340    /// ```
341    pub fn max_line_length(mut self, length: usize) -> Self {
342        self.limits.max_line_length = length;
343        self
344    }
345
346    /// Set the maximum number of aliases.
347    ///
348    /// # Parameters
349    ///
350    /// - `count`: Maximum number of aliases (default: 10k)
351    ///
352    /// # Examples
353    ///
354    /// ```text
355    /// ParseOptions::builder().max_aliases(5000)
356    /// ```
357    pub fn max_aliases(mut self, count: usize) -> Self {
358        self.limits.max_aliases = count;
359        self
360    }
361
362    /// Set the maximum columns per schema.
363    ///
364    /// # Parameters
365    ///
366    /// - `count`: Maximum columns (default: 100)
367    ///
368    /// # Examples
369    ///
370    /// ```text
371    /// ParseOptions::builder().max_columns(50)
372    /// ```
373    pub fn max_columns(mut self, count: usize) -> Self {
374        self.limits.max_columns = count;
375        self
376    }
377
378    /// Set the maximum NEST hierarchy depth.
379    ///
380    /// # Parameters
381    ///
382    /// - `depth`: Maximum nesting depth (default: 100)
383    ///
384    /// # Examples
385    ///
386    /// ```text
387    /// ParseOptions::builder().max_nest_depth(50)
388    /// ```
389    pub fn max_nest_depth(mut self, depth: usize) -> Self {
390        self.limits.max_nest_depth = depth;
391        self
392    }
393
394    /// Set the maximum block string size in bytes.
395    ///
396    /// # Parameters
397    ///
398    /// - `size`: Maximum block string size (default: 10MB)
399    ///
400    /// # Examples
401    ///
402    /// ```text
403    /// ParseOptions::builder().max_block_string_size(5 * 1024 * 1024)
404    /// ```
405    pub fn max_block_string_size(mut self, size: usize) -> Self {
406        self.limits.max_block_string_size = size;
407        self
408    }
409
410    /// Set the maximum keys per object.
411    ///
412    /// # Parameters
413    ///
414    /// - `count`: Maximum keys per object (default: 10k)
415    ///
416    /// # Examples
417    ///
418    /// ```text
419    /// ParseOptions::builder().max_object_keys(5000)
420    /// ```
421    pub fn max_object_keys(mut self, count: usize) -> Self {
422        self.limits.max_object_keys = count;
423        self
424    }
425
426    /// Set the maximum total keys across all objects.
427    ///
428    /// This provides defense-in-depth against memory exhaustion attacks.
429    ///
430    /// # Parameters
431    ///
432    /// - `count`: Maximum total keys (default: 10M)
433    ///
434    /// # Examples
435    ///
436    /// ```text
437    /// ParseOptions::builder().max_total_keys(5_000_000)
438    /// ```
439    pub fn max_total_keys(mut self, count: usize) -> Self {
440        self.limits.max_total_keys = count;
441        self
442    }
443
444    /// Build the ParseOptions.
445    pub fn build(self) -> ParseOptions {
446        ParseOptions {
447            limits: self.limits,
448            reference_mode: self.reference_mode,
449        }
450    }
451}
452
453impl Default for ParseOptionsBuilder {
454    fn default() -> Self {
455        Self::new()
456    }
457}
458
459/// Parse a HEDL document from bytes.
460pub fn parse(input: &[u8]) -> HedlResult<Document> {
461    parse_with_limits(input, ParseOptions::default())
462}
463
464/// Parse a HEDL document with custom options.
465pub fn parse_with_limits(input: &[u8], options: ParseOptions) -> HedlResult<Document> {
466    // Create timeout context for parsing
467    let timeout_ctx = TimeoutContext::new(options.limits.timeout);
468
469    // Phase 1: Preprocess (zero-copy line splitting)
470    let preprocessed = preprocess(input, &options.limits)?;
471
472    // Collect lines as borrowed slices (no per-line allocation)
473    let lines: Vec<(usize, &str)> = preprocessed.lines().collect();
474
475    // Phase 2: Parse header
476    let (header, body_start_idx) = parse_header(&lines, &options.limits, &timeout_ctx)?;
477
478    // Phase 3: Parse body
479    let body_lines = &lines[body_start_idx..];
480    let mut type_registries = TypeRegistry::new();
481    let root = parse_body(
482        body_lines,
483        &header,
484        &options.limits,
485        &mut type_registries,
486        &timeout_ctx,
487    )?;
488
489    // Build document
490    let mut doc = Document::new(header.version);
491    doc.aliases = header.aliases;
492    doc.structs = header.structs;
493    doc.nests = header.nests;
494    doc.root = root;
495
496    // Phase 4: Reference resolution (with timeout check)
497    timeout_ctx.check_timeout(0)?;
498    resolve_references(&doc, options.reference_mode)?;
499
500    Ok(doc)
501}
502
503// --- Context Stack ---
504
505#[derive(Debug)]
506enum Frame {
507    Root {
508        object: BTreeMap<String, Item>,
509    },
510    Object {
511        indent: usize,
512        key: String,
513        object: BTreeMap<String, Item>,
514    },
515    List {
516        #[allow(dead_code)]
517        list_start_indent: usize,
518        row_indent: usize,
519        type_name: String,
520        schema: Vec<String>,
521        last_row_values: Option<Vec<Value>>,
522        list: Vec<Node>,
523        key: String,
524        count_hint: Option<usize>,
525    },
526}
527
528// --- Body Parsing ---
529
530/// Context for body parsing, holding references to shared state.
531struct ParseContext<'a> {
532    header: &'a crate::header::Header,
533    limits: &'a Limits,
534    type_registries: &'a mut TypeRegistry,
535    node_count: &'a mut usize,
536}
537
538fn parse_body(
539    lines: &[(usize, &str)],
540    header: &crate::header::Header,
541    limits: &Limits,
542    type_registries: &mut TypeRegistry,
543    timeout_ctx: &TimeoutContext,
544) -> HedlResult<BTreeMap<String, Item>> {
545    let mut stack: Vec<Frame> = vec![Frame::Root {
546        object: BTreeMap::new(),
547    }];
548    let mut node_count = 0usize;
549    let mut total_keys = 0usize;
550    let mut block_string: Option<BlockStringState> = None;
551
552    // Create parsing context once for reuse throughout the loop
553    let ctx = ParseContext {
554        header,
555        limits,
556        type_registries,
557        node_count: &mut node_count,
558    };
559
560    // Automatic timeout checking every 10,000 iterations
561    for result in lines.iter().copied().with_timeout_check(timeout_ctx) {
562        let (line_num, line) = result?;
563        // Handle block string accumulation mode
564        if let Some(ref mut state) = block_string {
565            // Process the line and check if block string is complete
566            if let Some(full_content) = state.process_line(line, line_num, limits)? {
567                // Block string is complete
568                let value = Value::String(full_content.into());
569                pop_frames(&mut stack, state.indent);
570                insert_into_current(&mut stack, state.key.clone(), Item::Scalar(value));
571                block_string = None;
572            }
573            continue;
574        }
575
576        // Skip blank and comment lines
577        if is_blank_line(line) || is_comment_line(line) {
578            continue;
579        }
580
581        // Calculate indentation
582        let indent_info = calculate_indent(line, line_num as u32)
583            .map_err(|e| HedlError::syntax(e.to_string(), line_num))?;
584
585        let indent_info = match indent_info {
586            Some(info) => info,
587            None => continue, // Blank line
588        };
589
590        if indent_info.level > limits.max_indent_depth {
591            return Err(HedlError::security(
592                format!(
593                    "indent depth {} exceeds limit {}",
594                    indent_info.level, limits.max_indent_depth
595                ),
596                line_num,
597            ));
598        }
599
600        let indent = indent_info.level;
601        let content = &line[indent_info.spaces..];
602
603        // Pop frames as needed based on indentation
604        pop_frames(&mut stack, indent);
605
606        // Classify and parse line
607        if content.starts_with('|') {
608            parse_matrix_row(
609                &mut stack,
610                content,
611                indent,
612                line_num,
613                ctx.header,
614                ctx.limits,
615                ctx.type_registries,
616                ctx.node_count,
617            )?;
618        } else {
619            // Check if this starts a block string
620            match try_start_block_string(content, indent, line_num)? {
621                BlockStringResult::MultiLineStarted(state) => {
622                    // Validate indent and check for duplicate key
623                    validate_indent_for_child(&stack, indent, line_num)?;
624                    check_duplicate_key(&stack, &state.key, line_num, limits, &mut total_keys)?;
625                    block_string = Some(state);
626                }
627                BlockStringResult::NotBlockString => {
628                    parse_non_matrix_line(
629                        &mut stack,
630                        content,
631                        indent,
632                        line_num,
633                        header,
634                        limits,
635                        &mut total_keys,
636                    )?;
637                }
638            }
639        }
640    }
641
642    // Check for unclosed block string
643    if let Some(state) = block_string {
644        return Err(HedlError::syntax(
645            format!(
646                "unclosed block string starting at line {}",
647                state.start_line
648            ),
649            state.start_line,
650        ));
651    }
652
653    // Finalize: pop all frames and build result
654    finalize_stack(stack)
655}
656
657fn pop_frames(stack: &mut Vec<Frame>, current_indent: usize) {
658    while stack.len() > 1 {
659        let should_pop = match stack.last().unwrap() {
660            Frame::Root { .. } => false,
661            Frame::Object { indent, .. } => current_indent <= *indent,
662            Frame::List { row_indent, .. } => current_indent < *row_indent,
663        };
664
665        if should_pop {
666            let frame = stack.pop().unwrap();
667            attach_frame_to_parent(stack, frame);
668        } else {
669            break;
670        }
671    }
672}
673
674fn attach_frame_to_parent(stack: &mut [Frame], frame: Frame) {
675    match frame {
676        Frame::Object { key, object, .. } => {
677            let item = Item::Object(object);
678            insert_into_parent(stack, key, item);
679        }
680        Frame::List {
681            key,
682            type_name,
683            schema,
684            list,
685            count_hint,
686            ..
687        } => {
688            let mut matrix_list = if let Some(count) = count_hint {
689                MatrixList::with_count_hint(type_name, schema, count)
690            } else {
691                MatrixList::new(type_name, schema)
692            };
693            matrix_list.rows = list;
694            insert_into_parent(stack, key, Item::List(matrix_list));
695        }
696        Frame::Root { .. } => {}
697    }
698}
699
700fn insert_into_parent(stack: &mut [Frame], key: String, item: Item) {
701    if let Some(parent) = stack.last_mut() {
702        match parent {
703            Frame::Root { object } | Frame::Object { object, .. } => {
704                // Note: max_object_keys limit check is performed at a higher level
705                // during parsing, not here, to provide better error context
706                object.insert(key, item);
707            }
708            Frame::List { list, .. } => {
709                // Attach children to the last node in the list
710                if let Some(parent_node) = list.last_mut() {
711                    if let Item::List(child_list) = item {
712                        let children = parent_node
713                            .children
714                            .get_or_insert_with(|| Box::new(BTreeMap::new()));
715                        children
716                            .entry(child_list.type_name.clone())
717                            .or_default()
718                            .extend(child_list.rows);
719                    }
720                }
721            }
722        }
723    }
724}
725
726fn parse_non_matrix_line(
727    stack: &mut Vec<Frame>,
728    content: &str,
729    indent: usize,
730    line_num: usize,
731    header: &crate::header::Header,
732    limits: &Limits,
733    total_keys: &mut usize,
734) -> HedlResult<()> {
735    let content = strip_comment(content);
736
737    // Find colon
738    let colon_pos = content
739        .find(':')
740        .ok_or_else(|| HedlError::syntax("expected ':' in line", line_num))?;
741
742    let key_with_hint = content[..colon_pos].trim();
743    let after_colon = &content[colon_pos + 1..];
744
745    // Extract count hint from key if present (e.g., "teams(3)")
746    let (key, count_hint) = parse_key_with_count_hint(key_with_hint, line_num)?;
747
748    // Validate key
749    if !is_valid_key_token(&key) {
750        return Err(HedlError::syntax(format!("invalid key: {}", key), line_num));
751    }
752
753    // Check for duplicate key
754    check_duplicate_key(stack, &key, line_num, limits, total_keys)?;
755
756    // Determine line type
757    let after_colon_trimmed = after_colon.trim();
758
759    if after_colon_trimmed.is_empty() {
760        // Object start
761        if count_hint.is_some() {
762            return Err(HedlError::syntax(
763                "count hint not allowed on object declarations",
764                line_num,
765            ));
766        }
767        validate_indent_for_child(stack, indent, line_num)?;
768        stack.push(Frame::Object {
769            indent,
770            key: key.to_string(),
771            object: BTreeMap::new(),
772        });
773    } else if after_colon_trimmed.starts_with('@') && is_list_start(after_colon_trimmed) {
774        // Matrix list start
775        if !after_colon.starts_with(' ') {
776            return Err(HedlError::syntax(
777                "space required after ':' before '@'",
778                line_num,
779            ));
780        }
781
782        // Check if this is a nested list declaration inside a list context
783        let parent_list_idx = validate_nested_list_indent(stack, indent, line_num)?;
784
785        let (type_name, schema) = parse_list_start(after_colon_trimmed, line_num, header, limits)?;
786
787        if let Some(_parent_idx) = parent_list_idx {
788            // This is a nested list inside a list context (e.g., divisions(3): @Division under a company row)
789            // Push the new list frame - it will be attached to parent row when finalized
790            stack.push(Frame::List {
791                list_start_indent: indent,
792                row_indent: indent + 1,
793                type_name,
794                schema,
795                last_row_values: None,
796                list: Vec::new(),
797                key: key.to_string(),
798                count_hint,
799            });
800        } else {
801            // Normal top-level or object-nested list
802            stack.push(Frame::List {
803                list_start_indent: indent,
804                row_indent: indent + 1,
805                type_name,
806                schema,
807                last_row_values: None,
808                list: Vec::new(),
809                key: key.to_string(),
810                count_hint,
811            });
812        }
813    } else {
814        // Key-value pair
815        if count_hint.is_some() {
816            return Err(HedlError::syntax(
817                "count hint not allowed on scalar values",
818                line_num,
819            ));
820        }
821        if !after_colon.starts_with(' ') {
822            return Err(HedlError::syntax(
823                "space required after ':' in key-value",
824                line_num,
825            ));
826        }
827        validate_indent_for_child(stack, indent, line_num)?;
828        let value_str = after_colon.trim();
829        let ctx = InferenceContext::for_key_value(&header.aliases);
830        let value = if value_str.starts_with('"') {
831            // Quoted value
832            let inner = parse_quoted_string(value_str, line_num)?;
833            infer_quoted_value(&inner)
834        } else {
835            infer_value(value_str, &ctx, line_num)?
836        };
837        insert_into_current(stack, key.to_string(), Item::Scalar(value));
838    }
839
840    Ok(())
841}
842
843/// Parse a key that may have a count hint in parentheses.
844/// Examples: "teams" -> ("teams", None), "teams(3)" -> ("teams", Some(3))
845///
846/// DEPRECATED: The `name(N): @Type` syntax for count hints is being replaced by
847/// the new row-level `|N|data` syntax. This function is maintained for backward
848/// compatibility but the old syntax is deprecated and may be removed in future versions.
849fn parse_key_with_count_hint(key: &str, line_num: usize) -> HedlResult<(String, Option<usize>)> {
850    if let Some(paren_pos) = key.find('(') {
851        // Extract key and count
852        let key_part = &key[..paren_pos];
853
854        // Find closing parenthesis
855        if !key.ends_with(')') {
856            return Err(HedlError::syntax(
857                "unclosed count hint parenthesis",
858                line_num,
859            ));
860        }
861
862        let count_str = &key[paren_pos + 1..key.len() - 1];
863
864        // Parse count
865        let count = count_str.parse::<usize>().map_err(|_| {
866            HedlError::syntax(format!("invalid count hint: '{}'", count_str), line_num)
867        })?;
868
869        if count == 0 {
870            return Err(HedlError::syntax(
871                "count hint must be greater than zero",
872                line_num,
873            ));
874        }
875
876        Ok((key_part.to_string(), Some(count)))
877    } else {
878        Ok((key.to_string(), None))
879    }
880}
881
882fn is_list_start(s: &str) -> bool {
883    // @TypeName or @TypeName[...]
884    let s = s.trim();
885    if !s.starts_with('@') {
886        return false;
887    }
888    let rest = &s[1..];
889    // Find end of type name
890    let type_end = rest
891        .find(|c: char| c == '[' || c.is_whitespace())
892        .unwrap_or(rest.len());
893    let type_name = &rest[..type_end];
894    is_valid_type_name(type_name)
895}
896
897fn parse_list_start(
898    s: &str,
899    line_num: usize,
900    header: &crate::header::Header,
901    limits: &Limits,
902) -> HedlResult<(String, Vec<String>)> {
903    let s = s.trim();
904    let rest = &s[1..]; // Skip @
905
906    if let Some(bracket_pos) = rest.find('[') {
907        // Inline schema: @TypeName[col1, col2]
908        let type_name = &rest[..bracket_pos];
909        if !is_valid_type_name(type_name) {
910            return Err(HedlError::syntax(
911                format!("invalid type name: {}", type_name),
912                line_num,
913            ));
914        }
915
916        let schema_str = &rest[bracket_pos..];
917        let schema = parse_inline_schema(schema_str, line_num, limits)?;
918
919        // Check against declared schema if exists
920        if let Some(declared) = header.structs.get(type_name) {
921            if declared != &schema {
922                return Err(HedlError::schema(
923                    format!(
924                        "inline schema for '{}' doesn't match declared schema",
925                        type_name
926                    ),
927                    line_num,
928                ));
929            }
930        }
931
932        Ok((type_name.to_string(), schema))
933    } else {
934        // Reference to declared schema: @TypeName
935        let type_name = rest.trim();
936        if !is_valid_type_name(type_name) {
937            return Err(HedlError::syntax(
938                format!("invalid type name: {}", type_name),
939                line_num,
940            ));
941        }
942
943        let schema = header
944            .structs
945            .get(type_name)
946            .ok_or_else(|| HedlError::schema(format!("undefined type: {}", type_name), line_num))?;
947
948        Ok((type_name.to_string(), schema.clone()))
949    }
950}
951
952fn parse_inline_schema(s: &str, line_num: usize, limits: &Limits) -> HedlResult<Vec<String>> {
953    if !s.starts_with('[') || !s.ends_with(']') {
954        return Err(HedlError::syntax("invalid inline schema format", line_num));
955    }
956
957    let inner = &s[1..s.len() - 1];
958    let mut columns = Vec::new();
959
960    for part in inner.split(',') {
961        let col = part.trim();
962        if col.is_empty() {
963            continue;
964        }
965        if !is_valid_key_token(col) {
966            return Err(HedlError::syntax(
967                format!("invalid column name: {}", col),
968                line_num,
969            ));
970        }
971        columns.push(col.to_string());
972    }
973
974    if columns.is_empty() {
975        return Err(HedlError::syntax("empty inline schema", line_num));
976    }
977
978    if columns.len() > limits.max_columns {
979        return Err(HedlError::security(
980            format!("too many columns: {}", columns.len()),
981            line_num,
982        ));
983    }
984
985    Ok(columns)
986}
987
988/// Parse the row prefix to extract optional child count.
989/// Patterns:
990/// - `|[N] data` -> (Some(N), "data")  - parent with N children
991/// - `|data`     -> (None, "data")     - leaf node (no count)
992fn parse_row_prefix(content: &str, line_num: usize) -> HedlResult<(Option<usize>, &str)> {
993    // Content should start with |
994    if !content.starts_with('|') {
995        return Err(HedlError::syntax(
996            "matrix row must start with '|'",
997            line_num,
998        ));
999    }
1000
1001    let rest = &content[1..]; // Skip first |
1002
1003    // Check for |[N] pattern
1004    if rest.starts_with('[') {
1005        if let Some(bracket_end) = rest.find(']') {
1006            let count_str = &rest[1..bracket_end];
1007            if let Ok(count) = count_str.parse::<usize>() {
1008                // Count 0 is valid - means row has no children (empty parent)
1009                // Skip |[N] and any following space
1010                let data = rest[bracket_end + 1..].trim_start();
1011                return Ok((Some(count), data));
1012            }
1013        }
1014    }
1015
1016    // No count pattern, treat as |data (leaf node)
1017    Ok((None, rest))
1018}
1019
1020#[allow(clippy::too_many_arguments)]
1021fn parse_matrix_row(
1022    stack: &mut Vec<Frame>,
1023    content: &str,
1024    indent: usize,
1025    line_num: usize,
1026    header: &crate::header::Header,
1027    limits: &Limits,
1028    type_registries: &mut TypeRegistry,
1029    node_count: &mut usize,
1030) -> HedlResult<()> {
1031    // Find the active list frame
1032    let list_frame_idx = find_list_frame(stack, indent, line_num, header, limits)?;
1033
1034    // Parse the row prefix to extract optional child count and CSV content
1035    let (child_count, csv_content) = parse_row_prefix(content, line_num)?;
1036    let csv_content = strip_comment(csv_content).trim();
1037
1038    // Get list info
1039    let (type_name, schema, prev_row) = {
1040        let frame = &stack[list_frame_idx];
1041        match frame {
1042            Frame::List {
1043                type_name,
1044                schema,
1045                last_row_values,
1046                ..
1047            } => (type_name.clone(), schema.clone(), last_row_values.clone()),
1048            _ => unreachable!(),
1049        }
1050    };
1051
1052    // Parse CSV
1053    let fields =
1054        parse_csv_row(csv_content).map_err(|e| HedlError::syntax(e.to_string(), line_num))?;
1055
1056    // Validate shape
1057    if fields.len() != schema.len() {
1058        return Err(HedlError::shape(
1059            format!("expected {} columns, got {}", schema.len(), fields.len()),
1060            line_num,
1061        ));
1062    }
1063
1064    // Infer values
1065    let mut values = Vec::with_capacity(fields.len());
1066    for (col_idx, field) in fields.iter().enumerate() {
1067        let ctx = InferenceContext::for_matrix_cell(
1068            &header.aliases,
1069            col_idx,
1070            prev_row.as_deref(),
1071            &type_name,
1072        );
1073
1074        let value = if field.is_quoted {
1075            infer_quoted_value(&field.value)
1076        } else {
1077            infer_value(&field.value, &ctx, line_num)?
1078        };
1079
1080        values.push(value);
1081    }
1082
1083    // Get ID from first column
1084    let id = match &values[0] {
1085        Value::String(s) => s.clone(),
1086        _ => {
1087            return Err(HedlError::semantic("ID column must be a string", line_num));
1088        }
1089    };
1090
1091    // Register node ID
1092    register_node(type_registries, &type_name, &id, line_num, limits)?;
1093
1094    // Check node count limit with checked arithmetic to prevent overflow
1095    *node_count = node_count
1096        .checked_add(1)
1097        .ok_or_else(|| HedlError::security("node count overflow", line_num))?;
1098    if *node_count > limits.max_nodes {
1099        return Err(HedlError::security(
1100            format!("too many nodes: exceeds limit of {}", limits.max_nodes),
1101            line_num,
1102        ));
1103    }
1104
1105    // Update list frame - avoid clone by storing values first, then creating node
1106    if let Frame::List {
1107        last_row_values,
1108        list,
1109        ..
1110    } = &mut stack[list_frame_idx]
1111    {
1112        // Store values for ditto support before moving to node
1113        *last_row_values = Some(values.clone());
1114        // Create node taking ownership of values - no extra clone needed
1115        let mut node = Node::new(&type_name, &*id, values);
1116
1117        // Store child count from |N| syntax if present
1118        if let Some(count) = child_count {
1119            node.set_child_count(count);
1120        }
1121
1122        list.push(node);
1123    }
1124
1125    Ok(())
1126}
1127
1128/// Finds the appropriate list frame for a matrix row at the given indent level.
1129///
1130/// This function performs critical depth checking to prevent stack overflow attacks
1131/// via deeply nested NEST hierarchies. When a child row is detected (indent = parent + 1),
1132/// it validates that adding a new NEST level would not exceed `max_nest_depth`.
1133///
1134/// # Security
1135///
1136/// **DoS Prevention**: Without depth limits, an attacker could craft a HEDL document
1137/// with thousands of nested NEST levels, causing stack overflow or excessive memory
1138/// consumption during parsing. The depth check prevents this attack vector.
1139///
1140/// # Parameters
1141///
1142/// - `stack`: The parsing stack containing current frame hierarchy
1143/// - `indent`: Indentation level of the current matrix row
1144/// - `line_num`: Line number for error reporting
1145/// - `header`: Document header containing NEST rules and schemas
1146/// - `limits`: Security limits including `max_nest_depth`
1147///
1148/// # Returns
1149///
1150/// Returns the index of the list frame where this row should be added.
1151///
1152/// # Errors
1153///
1154/// - `HedlError::Security` if nesting depth exceeds `limits.max_nest_depth`
1155/// - `HedlError::OrphanRow` if child row has no parent or no NEST rule exists
1156/// - `HedlError::Schema` if child type is not defined
1157/// - `HedlError::Syntax` if row is outside list context
1158///
1159/// # Examples
1160///
1161/// ```text
1162/// # Valid nested structure within depth limit
1163/// TYPE Person id name
1164/// TYPE Address street city
1165/// NEST Person Address
1166///
1167/// Person
1168/// 1, Alice    # depth 0
1169///   1, Main St, NYC    # depth 1 - child of Person row
1170/// ```
1171fn find_list_frame(
1172    stack: &mut Vec<Frame>,
1173    indent: usize,
1174    line_num: usize,
1175    header: &crate::header::Header,
1176    limits: &Limits,
1177) -> HedlResult<usize> {
1178    // Look for a list frame where this indent makes sense
1179    for (idx, frame) in stack.iter().enumerate().rev() {
1180        if let Frame::List {
1181            row_indent,
1182            type_name,
1183            list,
1184            ..
1185        } = frame
1186        {
1187            if indent == *row_indent {
1188                // Peer row
1189                return Ok(idx);
1190            } else if indent == *row_indent + 1 {
1191                // Child row - need NEST rule
1192                // Check if there's a parent row to attach to
1193                if list.is_empty() {
1194                    return Err(HedlError::orphan_row(
1195                        "child row has no parent row",
1196                        line_num,
1197                    ));
1198                }
1199
1200                let child_type = header.nests.get(type_name).ok_or_else(|| {
1201                    HedlError::orphan_row(
1202                        format!("no NEST rule for parent type '{}'", type_name),
1203                        line_num,
1204                    )
1205                })?;
1206
1207                // Get child schema
1208                let child_schema = header.structs.get(child_type).ok_or_else(|| {
1209                    HedlError::schema(format!("child type '{}' not defined", child_type), line_num)
1210                })?;
1211
1212                // SECURITY: Check NEST depth before pushing child frame to prevent DoS
1213                // Count current depth by counting List frames in the stack
1214                // Each List frame represents one level in the NEST hierarchy
1215                let current_depth = stack
1216                    .iter()
1217                    .filter(|f| matches!(f, Frame::List { .. }))
1218                    .count();
1219
1220                if current_depth >= limits.max_nest_depth {
1221                    return Err(HedlError::security(
1222                        format!(
1223                            "NEST hierarchy depth {} exceeds maximum allowed depth {}",
1224                            current_depth + 1,
1225                            limits.max_nest_depth
1226                        ),
1227                        line_num,
1228                    ));
1229                }
1230
1231                // Push a new list frame for the child
1232                stack.push(Frame::List {
1233                    list_start_indent: indent - 1,
1234                    row_indent: indent,
1235                    type_name: child_type.clone(),
1236                    schema: child_schema.clone(),
1237                    last_row_values: None,
1238                    list: Vec::new(),
1239                    key: child_type.clone(),
1240                    count_hint: None, // Child lists from NEST don't have count hints
1241                });
1242
1243                return Ok(stack.len() - 1);
1244            }
1245        }
1246    }
1247
1248    Err(HedlError::syntax(
1249        "matrix row outside of list context",
1250        line_num,
1251    ))
1252}
1253
1254fn validate_indent_for_child(stack: &[Frame], indent: usize, line_num: usize) -> HedlResult<()> {
1255    let expected = match stack.last() {
1256        Some(Frame::Root { .. }) => 0,
1257        Some(Frame::Object {
1258            indent: parent_indent,
1259            ..
1260        }) => parent_indent + 1,
1261        Some(Frame::List { row_indent: _, .. }) => {
1262            return Err(HedlError::syntax(
1263                "cannot add key-value inside list context",
1264                line_num,
1265            ));
1266        }
1267        None => 0,
1268    };
1269
1270    if indent != expected {
1271        return Err(HedlError::syntax(
1272            format!("expected indent level {}, got {}", expected, indent),
1273            line_num,
1274        ));
1275    }
1276
1277    Ok(())
1278}
1279
1280/// Validate indent for nested list declarations inside a list context.
1281/// Unlike scalar key-values, nested list declarations ARE allowed inside lists.
1282/// Returns the parent list frame index if valid, or error if invalid.
1283fn validate_nested_list_indent(
1284    stack: &[Frame],
1285    indent: usize,
1286    line_num: usize,
1287) -> HedlResult<Option<usize>> {
1288    // Check if we're inside a list context
1289    for (idx, frame) in stack.iter().enumerate().rev() {
1290        match frame {
1291            Frame::List {
1292                row_indent, list, ..
1293            } => {
1294                // Nested list declaration should be at row_indent + 1 (child level)
1295                if indent == *row_indent + 1 {
1296                    // Must have a parent row to attach to
1297                    if list.is_empty() {
1298                        return Err(HedlError::orphan_row(
1299                            "nested list declaration has no parent row",
1300                            line_num,
1301                        ));
1302                    }
1303                    return Ok(Some(idx));
1304                }
1305            }
1306            Frame::Root { .. } => {
1307                if indent == 0 {
1308                    return Ok(None); // Normal top-level list
1309                }
1310            }
1311            Frame::Object {
1312                indent: obj_indent, ..
1313            } => {
1314                if indent == obj_indent + 1 {
1315                    return Ok(None); // Normal list inside object
1316                }
1317            }
1318        }
1319    }
1320
1321    Err(HedlError::syntax(
1322        format!(
1323            "invalid indent level {} for nested list declaration",
1324            indent
1325        ),
1326        line_num,
1327    ))
1328}
1329
1330/// Check for duplicate keys and enforce security limits.
1331///
1332/// This function validates that:
1333/// 1. The key is not already present in the current object
1334/// 2. The object doesn't exceed max_object_keys limit
1335/// 3. The total number of keys across all objects doesn't exceed max_total_keys limit
1336///
1337/// # Security
1338///
1339/// The total_keys counter prevents DoS attacks where an attacker creates many small
1340/// objects, each under the max_object_keys limit, but collectively consuming excessive
1341/// memory. This provides defense-in-depth against memory exhaustion attacks.
1342fn check_duplicate_key(
1343    stack: &[Frame],
1344    key: &str,
1345    line_num: usize,
1346    limits: &Limits,
1347    total_keys: &mut usize,
1348) -> HedlResult<()> {
1349    let object_opt = match stack.last() {
1350        Some(Frame::Root { object }) | Some(Frame::Object { object, .. }) => Some(object),
1351        _ => None,
1352    };
1353
1354    if let Some(object) = object_opt {
1355        // Check for duplicate key
1356        if object.contains_key(key) {
1357            return Err(HedlError::semantic(
1358                format!("duplicate key: {}", key),
1359                line_num,
1360            ));
1361        }
1362
1363        // Security: Enforce max_object_keys limit to prevent memory exhaustion per object
1364        if object.len() >= limits.max_object_keys {
1365            return Err(HedlError::security(
1366                format!(
1367                    "object has too many keys: {} (max: {})",
1368                    object.len() + 1,
1369                    limits.max_object_keys
1370                ),
1371                line_num,
1372            ));
1373        }
1374
1375        // Security: Enforce max_total_keys limit to prevent cumulative memory exhaustion
1376        *total_keys = total_keys
1377            .checked_add(1)
1378            .ok_or_else(|| HedlError::security("total key count overflow", line_num))?;
1379
1380        if *total_keys > limits.max_total_keys {
1381            return Err(HedlError::security(
1382                format!(
1383                    "too many total keys: {} exceeds limit {}",
1384                    *total_keys, limits.max_total_keys
1385                ),
1386                line_num,
1387            ));
1388        }
1389    }
1390
1391    Ok(())
1392}
1393
1394fn insert_into_current(stack: &mut [Frame], key: String, item: Item) {
1395    if let Some(Frame::Root { object } | Frame::Object { object, .. }) = stack.last_mut() {
1396        object.insert(key, item);
1397    }
1398}
1399
1400fn parse_quoted_string(s: &str, line_num: usize) -> HedlResult<String> {
1401    if !s.starts_with('"') {
1402        return Err(HedlError::syntax("expected quoted string", line_num));
1403    }
1404
1405    let mut result = String::new();
1406    let mut chars = s[1..].chars().peekable();
1407
1408    while let Some(ch) = chars.next() {
1409        if ch == '"' {
1410            if chars.peek() == Some(&'"') {
1411                // Escaped quote
1412                chars.next();
1413                result.push('"');
1414            } else {
1415                // End of string
1416                return Ok(result);
1417            }
1418        } else {
1419            result.push(ch);
1420        }
1421    }
1422
1423    Err(HedlError::syntax("unclosed quoted string", line_num))
1424}
1425
1426fn finalize_stack(mut stack: Vec<Frame>) -> HedlResult<BTreeMap<String, Item>> {
1427    // Per SPEC Section 14.5: Detect truncated input.
1428    // Check only the DEEPEST (last) non-Root frame for truncation.
1429    // Intermediate frames will be empty until children are attached during pop.
1430    // Only if the deepest frame is an empty Object do we have actual truncation.
1431    // Note: Empty lists declared with @TypeName are allowed.
1432    if stack.len() > 1 {
1433        if let Some(Frame::Object { key, object, .. }) = stack.last() {
1434            if object.is_empty() {
1435                return Err(HedlError::syntax(
1436                    format!("truncated input: object '{}' has no children", key),
1437                    0,
1438                ));
1439            }
1440        }
1441    }
1442
1443    // Pop all frames back to root
1444    while stack.len() > 1 {
1445        let frame = stack.pop().unwrap();
1446        attach_frame_to_parent(&mut stack, frame);
1447    }
1448
1449    // Extract root object
1450    match stack.pop() {
1451        Some(Frame::Root { object }) => Ok(object),
1452        _ => Ok(BTreeMap::new()),
1453    }
1454}
1455
1456#[cfg(test)]
1457mod tests {
1458    use super::*;
1459
1460    // ==================== ParseOptionsBuilder::new() tests ====================
1461
1462    #[test]
1463    fn test_builder_new_creates_default_options() {
1464        let builder = ParseOptionsBuilder::new();
1465        let opts = builder.build();
1466
1467        assert_eq!(opts.reference_mode, ReferenceMode::Strict);
1468        assert_eq!(opts.limits.max_indent_depth, 50);
1469        assert_eq!(opts.limits.max_nodes, 10_000_000);
1470    }
1471
1472    #[test]
1473    fn test_builder_default_trait() {
1474        let builder1 = ParseOptionsBuilder::new();
1475        let builder2 = ParseOptionsBuilder::default();
1476        let opts1 = builder1.build();
1477        let opts2 = builder2.build();
1478
1479        assert_eq!(opts1.reference_mode, opts2.reference_mode);
1480        assert_eq!(opts1.limits.max_indent_depth, opts2.limits.max_indent_depth);
1481    }
1482
1483    // ==================== ParseOptions::builder() tests ====================
1484
1485    #[test]
1486    fn test_parse_options_builder_method() {
1487        let opts = ParseOptions::builder().build();
1488        assert_eq!(opts.reference_mode, ReferenceMode::Strict);
1489    }
1490
1491    // ==================== Chainable method tests ====================
1492
1493    #[test]
1494    fn test_builder_max_depth() {
1495        let opts = ParseOptions::builder().max_depth(100).build();
1496
1497        assert_eq!(opts.limits.max_indent_depth, 100);
1498    }
1499
1500    #[test]
1501    fn test_builder_max_array_length() {
1502        let opts = ParseOptions::builder().max_array_length(5000).build();
1503
1504        assert_eq!(opts.limits.max_nodes, 5000);
1505    }
1506
1507    #[test]
1508    fn test_builder_strict_true() {
1509        let opts = ParseOptions::builder().strict(true).build();
1510
1511        assert_eq!(opts.reference_mode, ReferenceMode::Strict);
1512    }
1513
1514    #[test]
1515    fn test_builder_strict_false() {
1516        let opts = ParseOptions::builder().strict(false).build();
1517
1518        assert_eq!(opts.reference_mode, ReferenceMode::Lenient);
1519    }
1520
1521    #[test]
1522    fn test_builder_max_file_size() {
1523        let size = 500 * 1024 * 1024;
1524        let opts = ParseOptions::builder().max_file_size(size).build();
1525
1526        assert_eq!(opts.limits.max_file_size, size);
1527    }
1528
1529    #[test]
1530    fn test_builder_max_line_length() {
1531        let length = 512 * 1024;
1532        let opts = ParseOptions::builder().max_line_length(length).build();
1533
1534        assert_eq!(opts.limits.max_line_length, length);
1535    }
1536
1537    #[test]
1538    fn test_builder_max_aliases() {
1539        let opts = ParseOptions::builder().max_aliases(5000).build();
1540
1541        assert_eq!(opts.limits.max_aliases, 5000);
1542    }
1543
1544    #[test]
1545    fn test_builder_max_columns() {
1546        let opts = ParseOptions::builder().max_columns(50).build();
1547
1548        assert_eq!(opts.limits.max_columns, 50);
1549    }
1550
1551    #[test]
1552    fn test_builder_max_nest_depth() {
1553        let opts = ParseOptions::builder().max_nest_depth(50).build();
1554
1555        assert_eq!(opts.limits.max_nest_depth, 50);
1556    }
1557
1558    #[test]
1559    fn test_builder_max_block_string_size() {
1560        let size = 5 * 1024 * 1024;
1561        let opts = ParseOptions::builder().max_block_string_size(size).build();
1562
1563        assert_eq!(opts.limits.max_block_string_size, size);
1564    }
1565
1566    #[test]
1567    fn test_builder_max_object_keys() {
1568        let opts = ParseOptions::builder().max_object_keys(5000).build();
1569
1570        assert_eq!(opts.limits.max_object_keys, 5000);
1571    }
1572
1573    #[test]
1574    fn test_builder_max_total_keys() {
1575        let opts = ParseOptions::builder().max_total_keys(5_000_000).build();
1576
1577        assert_eq!(opts.limits.max_total_keys, 5_000_000);
1578    }
1579
1580    // ==================== Multiple chained methods tests ====================
1581
1582    #[test]
1583    fn test_builder_multiple_chains() {
1584        let opts = ParseOptions::builder()
1585            .max_depth(100)
1586            .max_array_length(5000)
1587            .strict(false)
1588            .build();
1589
1590        assert_eq!(opts.limits.max_indent_depth, 100);
1591        assert_eq!(opts.limits.max_nodes, 5000);
1592        assert_eq!(opts.reference_mode, ReferenceMode::Lenient);
1593    }
1594
1595    #[test]
1596    fn test_builder_all_options_chained() {
1597        let opts = ParseOptions::builder()
1598            .max_depth(75)
1599            .max_array_length(2000)
1600            .strict(false)
1601            .max_file_size(100 * 1024 * 1024)
1602            .max_line_length(256 * 1024)
1603            .max_aliases(1000)
1604            .max_columns(25)
1605            .max_nest_depth(30)
1606            .max_block_string_size(1024 * 1024)
1607            .max_object_keys(1000)
1608            .max_total_keys(1_000_000)
1609            .build();
1610
1611        assert_eq!(opts.limits.max_indent_depth, 75);
1612        assert_eq!(opts.limits.max_nodes, 2000);
1613        assert_eq!(opts.reference_mode, ReferenceMode::Lenient);
1614        assert_eq!(opts.limits.max_file_size, 100 * 1024 * 1024);
1615        assert_eq!(opts.limits.max_line_length, 256 * 1024);
1616        assert_eq!(opts.limits.max_aliases, 1000);
1617        assert_eq!(opts.limits.max_columns, 25);
1618        assert_eq!(opts.limits.max_nest_depth, 30);
1619        assert_eq!(opts.limits.max_block_string_size, 1024 * 1024);
1620        assert_eq!(opts.limits.max_object_keys, 1000);
1621        assert_eq!(opts.limits.max_total_keys, 1_000_000);
1622    }
1623
1624    // ==================== Override tests ====================
1625
1626    #[test]
1627    fn test_builder_override_previous_value() {
1628        let opts = ParseOptions::builder().max_depth(50).max_depth(100).build();
1629
1630        assert_eq!(opts.limits.max_indent_depth, 100);
1631    }
1632
1633    #[test]
1634    fn test_builder_override_multiple_times() {
1635        let opts = ParseOptions::builder()
1636            .max_array_length(1000)
1637            .max_array_length(2000)
1638            .max_array_length(3000)
1639            .build();
1640
1641        assert_eq!(opts.limits.max_nodes, 3000);
1642    }
1643
1644    // ==================== Default behavior tests ====================
1645
1646    #[test]
1647    fn test_builder_default_keeps_other_defaults() {
1648        let opts = ParseOptions::builder().max_depth(100).build();
1649
1650        assert_eq!(opts.limits.max_indent_depth, 100);
1651        // Other values should remain at defaults
1652        assert_eq!(opts.limits.max_file_size, 1024 * 1024 * 1024);
1653        assert_eq!(opts.limits.max_line_length, 1024 * 1024);
1654        assert_eq!(opts.limits.max_nodes, 10_000_000);
1655        assert_eq!(opts.reference_mode, ReferenceMode::Strict);
1656    }
1657
1658    // ==================== Edge case tests ====================
1659
1660    #[test]
1661    fn test_builder_zero_values() {
1662        let opts = ParseOptions::builder()
1663            .max_depth(0)
1664            .max_array_length(0)
1665            .max_aliases(0)
1666            .build();
1667
1668        assert_eq!(opts.limits.max_indent_depth, 0);
1669        assert_eq!(opts.limits.max_nodes, 0);
1670        assert_eq!(opts.limits.max_aliases, 0);
1671    }
1672
1673    #[test]
1674    fn test_builder_max_values() {
1675        let opts = ParseOptions::builder()
1676            .max_depth(usize::MAX)
1677            .max_array_length(usize::MAX)
1678            .max_file_size(usize::MAX)
1679            .build();
1680
1681        assert_eq!(opts.limits.max_indent_depth, usize::MAX);
1682        assert_eq!(opts.limits.max_nodes, usize::MAX);
1683        assert_eq!(opts.limits.max_file_size, usize::MAX);
1684    }
1685
1686    // ==================== Equivalence tests ====================
1687
1688    #[test]
1689    fn test_builder_build_equivalent_to_default() {
1690        let builder_opts = ParseOptions::builder().build();
1691        let default_opts = ParseOptions::default();
1692
1693        assert_eq!(builder_opts.reference_mode, default_opts.reference_mode);
1694        assert_eq!(
1695            builder_opts.limits.max_indent_depth,
1696            default_opts.limits.max_indent_depth
1697        );
1698        assert_eq!(builder_opts.limits.max_nodes, default_opts.limits.max_nodes);
1699        assert_eq!(
1700            builder_opts.limits.max_file_size,
1701            default_opts.limits.max_file_size
1702        );
1703    }
1704
1705    #[test]
1706    fn test_builder_clone_independent() {
1707        let builder1 = ParseOptions::builder().max_depth(100);
1708        let builder2 = builder1.clone().max_depth(200);
1709
1710        let opts1 = builder1.build();
1711        let opts2 = builder2.build();
1712
1713        assert_eq!(opts1.limits.max_indent_depth, 100);
1714        assert_eq!(opts2.limits.max_indent_depth, 200);
1715    }
1716
1717    // ==================== Usage pattern tests ====================
1718
1719    #[test]
1720    fn test_builder_typical_usage_pattern() {
1721        // Typical use case: strict parsing with moderate limits
1722        let opts = ParseOptions::builder().max_depth(100).strict(true).build();
1723
1724        assert_eq!(opts.reference_mode, ReferenceMode::Strict);
1725        assert_eq!(opts.limits.max_indent_depth, 100);
1726    }
1727
1728    #[test]
1729    fn test_builder_lenient_parsing_pattern() {
1730        // Lenient parsing with higher limits
1731        let opts = ParseOptions::builder()
1732            .max_array_length(50_000)
1733            .strict(false)
1734            .max_block_string_size(50 * 1024 * 1024)
1735            .build();
1736
1737        assert_eq!(opts.reference_mode, ReferenceMode::Lenient);
1738        assert_eq!(opts.limits.max_nodes, 50_000);
1739        assert_eq!(opts.limits.max_block_string_size, 50 * 1024 * 1024);
1740    }
1741
1742    #[test]
1743    fn test_builder_restricted_parsing_pattern() {
1744        // Restricted parsing for security
1745        let opts = ParseOptions::builder()
1746            .max_file_size(10 * 1024 * 1024)
1747            .max_line_length(64 * 1024)
1748            .max_depth(20)
1749            .max_array_length(1000)
1750            .strict(true)
1751            .build();
1752
1753        assert_eq!(opts.limits.max_file_size, 10 * 1024 * 1024);
1754        assert_eq!(opts.limits.max_line_length, 64 * 1024);
1755        assert_eq!(opts.limits.max_indent_depth, 20);
1756        assert_eq!(opts.limits.max_nodes, 1000);
1757        assert_eq!(opts.reference_mode, ReferenceMode::Strict);
1758    }
1759
1760    // ==================== Timeout integration tests ====================
1761
1762    #[test]
1763    fn test_parse_with_generous_timeout_succeeds() {
1764        let doc = b"%VERSION: 1.0\n---\nkey: value\n";
1765        let mut opts = ParseOptions::default();
1766        opts.limits.timeout = Some(std::time::Duration::from_secs(10));
1767        let result = parse_with_limits(doc, opts);
1768        assert!(result.is_ok());
1769    }
1770
1771    #[test]
1772    fn test_parse_with_no_timeout_succeeds() {
1773        let doc = b"%VERSION: 1.0\n---\nkey: value\n";
1774        let mut opts = ParseOptions::default();
1775        opts.limits.timeout = None;
1776        let result = parse_with_limits(doc, opts);
1777        assert!(result.is_ok());
1778    }
1779
1780    #[test]
1781    fn test_parse_with_very_short_timeout_fails() {
1782        // Create a document large enough to take some time
1783        let mut doc = String::from("%VERSION: 1.0\n---\ndata:\n");
1784        for i in 0..100_000 {
1785            doc.push_str(&format!("  key{}: value{}\n", i, i));
1786        }
1787
1788        let mut opts = ParseOptions::default();
1789        // Set an impossibly short timeout (1 microsecond)
1790        opts.limits.timeout = Some(std::time::Duration::from_micros(1));
1791
1792        let result = parse_with_limits(doc.as_bytes(), opts);
1793        assert!(result.is_err());
1794
1795        if let Err(e) = result {
1796            let msg = e.to_string();
1797            assert!(msg.contains("timeout") || msg.contains("Timeout"));
1798        }
1799    }
1800
1801    #[test]
1802    fn test_default_timeout_is_reasonable() {
1803        let opts = ParseOptions::default();
1804        assert_eq!(
1805            opts.limits.timeout,
1806            Some(std::time::Duration::from_secs(30))
1807        );
1808    }
1809
1810    #[test]
1811    fn test_unlimited_has_no_timeout() {
1812        let limits = Limits::unlimited();
1813        assert_eq!(limits.timeout, None);
1814    }
1815}