rlsp_yaml_parser/
lib.rs

1// SPDX-License-Identifier: MIT
2#![deny(clippy::panic)]
3
4mod chars;
5pub mod encoding;
6mod error;
7mod event;
8mod lexer;
9mod lines;
10pub mod loader;
11pub mod node;
12mod pos;
13
14pub use error::Error;
15pub use event::{Chomp, CollectionStyle, Event, ScalarStyle};
16pub use lines::{BreakType, Line, LineBuffer};
17pub use loader::{LoadError, LoadMode, Loader, LoaderBuilder, LoaderOptions, load};
18pub use node::{Document, Node};
19pub use pos::{Pos, Span};
20
21use std::collections::{HashMap, VecDeque};
22
23use lexer::Lexer;
24
25/// Parse a YAML string into a lazy event stream.
26///
27/// The iterator yields <code>Result<([Event], [Span]), [Error]></code> items.
28/// The first event is always [`Event::StreamStart`] and the last is always
29/// [`Event::StreamEnd`].
30///
31/// # Example
32///
33/// ```
34/// use rlsp_yaml_parser::{parse_events, Event};
35///
36/// let events: Vec<_> = parse_events("").collect();
37/// assert!(matches!(events.first(), Some(Ok((Event::StreamStart, _)))));
38/// assert!(matches!(events.last(), Some(Ok((Event::StreamEnd, _)))));
39/// ```
40pub fn parse_events(input: &str) -> impl Iterator<Item = Result<(Event<'_>, Span), Error>> + '_ {
41    EventIter::new(input)
42}
43
44// ---------------------------------------------------------------------------
45// Depth limit (security: DoS via deeply nested collections)
46// ---------------------------------------------------------------------------
47
48/// Maximum combined block-collection nesting depth accepted from untrusted
49/// input.
50///
51/// This limit covers all open [`Event::SequenceStart`] and
52/// [`Event::MappingStart`] events combined.  Using a unified limit prevents
53/// an attacker from nesting 512 sequences inside 512 mappings (total depth
54/// 1024) by exploiting separate per-type limits.
55///
56/// 512 is generous for all real-world YAML (Kubernetes / Helm documents are
57/// typically under 20 levels deep) and small enough that the explicit-stack
58/// overhead stays within a few KB.
59pub const MAX_COLLECTION_DEPTH: usize = 512;
60
61/// Maximum byte length of an anchor name accepted from untrusted input.
62///
63/// Maximum byte length of an anchor or alias name.
64///
65/// The YAML spec places no upper limit on anchor names, but scanning a name
66/// consisting of millions of valid `ns-anchor-char` bytes would exhaust CPU
67/// time without any heap allocation.  This limit caps anchor and alias name
68/// scanning at 1 KiB — generous for all real-world YAML (Kubernetes names are
69/// typically under 64 bytes) while preventing degenerate-input stalls.
70///
71/// The limit is enforced by [`parse_events`] for both `&name` (anchors) and
72/// `*name` (aliases).  Exceeding it returns an [`Error`], not a panic.
73pub const MAX_ANCHOR_NAME_BYTES: usize = 1024;
74
75/// Maximum byte length of a tag accepted from untrusted input.
76///
77/// The YAML spec places no upper limit on tag length, but scanning a tag
78/// consisting of millions of valid bytes would exhaust CPU time without any
79/// heap allocation.  This limit caps tag scanning at 4 KiB — generous for all
80/// real-world YAML (standard tags like `tag:yaml.org,2002:str` are under 30
81/// bytes; custom namespace URIs are rarely over 200 bytes) while preventing
82/// degenerate-input stalls.
83///
84/// The limit applies to the raw scanned portion: the URI content between `<`
85/// and `>` for verbatim tags, or the suffix portion for shorthand tags.
86/// Exceeding it returns an [`Error`], not a panic.
87pub const MAX_TAG_LEN: usize = 4096;
88
89/// Maximum byte length of a comment body accepted from untrusted input.
90///
91/// The YAML spec places no upper limit on comment length.  With zero-copy
92/// `&'input str` slices, comment scanning itself allocates nothing, but
93/// character-by-character iteration over a very long comment line still burns
94/// CPU proportional to the line length.  This limit matches `MAX_TAG_LEN` —
95/// comment-only files produce one `Comment` event per line (O(input size),
96/// acceptable) as long as individual lines are bounded.
97///
98/// Exceeding this limit returns an [`Error`], not a panic or truncation.
99pub const MAX_COMMENT_LEN: usize = 4096;
100
101/// Maximum number of directives (`%YAML` + `%TAG` combined) per document.
102///
103/// Without this cap, an attacker could supply thousands of distinct `%TAG`
104/// directives, each allocating a `HashMap` entry, to exhaust heap memory.
105/// 64 is generous for all real-world YAML (the typical document has 0–2
106/// directives) while bounding per-document directive overhead.
107///
108/// Exceeding this limit returns an [`Error`], not a panic.
109pub const MAX_DIRECTIVES_PER_DOC: usize = 64;
110
111/// Maximum byte length of a `%TAG` handle (e.g. `!foo!`) accepted from
112/// untrusted input.
113///
114/// Tag handles are short by design; a 256-byte cap is generous while
115/// preventing `DoS` via scanning very long handle strings.
116///
117/// Exceeding this limit returns an [`Error`], not a panic.
118pub const MAX_TAG_HANDLE_BYTES: usize = 256;
119
120/// Maximum byte length of the fully-resolved tag string after prefix expansion.
121///
122/// When a shorthand tag `!foo!bar` is resolved against its `%TAG` prefix, the
123/// result is `prefix + suffix`.  This cap prevents the resolved string from
124/// exceeding a safe bound even when the prefix and suffix are both at their
125/// individual limits.  Reuses [`MAX_TAG_LEN`] so the bound is consistent with
126/// verbatim tag limits.
127///
128/// The check is performed before allocation; exceeding this limit returns an
129/// [`Error`], not a panic.
130pub const MAX_RESOLVED_TAG_LEN: usize = MAX_TAG_LEN;
131
132// ---------------------------------------------------------------------------
133// Directive scope
134// ---------------------------------------------------------------------------
135
136/// Per-document directive state accumulated from `%YAML` and `%TAG` directives.
137///
138/// Cleared at the start of each new document (on `---` in `BetweenDocs`, on
139/// `...`, or at EOF).  The default handles (`!!` and `!`) are **not** stored
140/// here — they are resolved directly in [`DirectiveScope::resolve_tag`].
141#[derive(Debug, Default)]
142struct DirectiveScope {
143    /// Version from `%YAML`, if any.
144    version: Option<(u8, u8)>,
145    /// Custom tag handles declared via `%TAG` directives.
146    ///
147    /// Key: handle (e.g. `"!foo!"`).  Value: prefix (e.g. `"tag:example.com:"`).
148    tag_handles: HashMap<String, String>,
149    /// Total directive count (YAML + TAG combined) for the `DoS` limit check.
150    directive_count: usize,
151}
152
153impl DirectiveScope {
154    /// Resolve a raw tag slice (as stored in `pending_tag`) to its final form.
155    ///
156    /// Resolution rules:
157    /// - Verbatim tag (no leading `!`, i.e. already a bare URI from `!<URI>` scanning) → returned as-is.
158    /// - `!!suffix` → look up `"!!"` in custom handles; fall back to default `tag:yaml.org,2002:`.
159    /// - `!suffix` (no inner `!`) → returned as-is (local tag, no expansion).
160    /// - `!handle!suffix` → look up `"!handle!"` in custom handles; error if not found.
161    /// - `!` (bare) → returned as-is.
162    ///
163    /// Returns `Ok(Cow::Borrowed(raw))` when no allocation is needed, or
164    /// `Ok(Cow::Owned(resolved))` after prefix expansion.  Returns `Err` when
165    /// a named handle has no registered prefix.
166    fn resolve_tag<'a>(
167        &self,
168        raw: &'a str,
169        indicator_pos: Pos,
170    ) -> Result<std::borrow::Cow<'a, str>, Error> {
171        use std::borrow::Cow;
172
173        // Verbatim tags arrive as bare URIs (scan_tag strips the `!<` / `>` wrappers).
174        // They do not start with `!`, so no resolution is needed.
175        if !raw.starts_with('!') {
176            return Ok(Cow::Borrowed(raw));
177        }
178
179        let after_first_bang = &raw[1..];
180
181        // `!!suffix` — primary handle.
182        if let Some(suffix) = after_first_bang.strip_prefix('!') {
183            let prefix = self
184                .tag_handles
185                .get("!!")
186                .map_or("tag:yaml.org,2002:", String::as_str);
187            let resolved = format!("{prefix}{suffix}");
188            if resolved.len() > MAX_RESOLVED_TAG_LEN {
189                return Err(Error {
190                    pos: indicator_pos,
191                    message: format!(
192                        "resolved tag exceeds maximum length of {MAX_RESOLVED_TAG_LEN} bytes"
193                    ),
194                });
195            }
196            return Ok(Cow::Owned(resolved));
197        }
198
199        // `!handle!suffix` — named handle.
200        if let Some(inner_bang) = after_first_bang.find('!') {
201            let handle = &raw[..inner_bang + 2]; // `!handle!`
202            let suffix = &after_first_bang[inner_bang + 1..];
203            if let Some(prefix) = self.tag_handles.get(handle) {
204                let resolved = format!("{prefix}{suffix}");
205                if resolved.len() > MAX_RESOLVED_TAG_LEN {
206                    return Err(Error {
207                        pos: indicator_pos,
208                        message: format!(
209                            "resolved tag exceeds maximum length of {MAX_RESOLVED_TAG_LEN} bytes"
210                        ),
211                    });
212                }
213                return Ok(Cow::Owned(resolved));
214            }
215            return Err(Error {
216                pos: indicator_pos,
217                message: format!("undefined tag handle: {handle}"),
218            });
219        }
220
221        // `!suffix` (local tag) or bare `!` — no expansion.
222        Ok(Cow::Borrowed(raw))
223    }
224
225    /// Collect the tag handle/prefix pairs for inclusion in `DocumentStart`.
226    fn tag_directives(&self) -> Vec<(String, String)> {
227        let mut pairs: Vec<(String, String)> = self
228            .tag_handles
229            .iter()
230            .map(|(h, p)| (h.clone(), p.clone()))
231            .collect();
232        // Sort for deterministic ordering in tests and events.
233        pairs.sort_unstable_by(|a, b| a.0.cmp(&b.0));
234        pairs
235    }
236}
237
238// ---------------------------------------------------------------------------
239// Iterator implementation
240// ---------------------------------------------------------------------------
241
242/// Outcome of one state-machine step inside [`EventIter::next`].
243enum StepResult<'input> {
244    /// The step pushed to `queue` or changed state; loop again to drain.
245    Continue,
246    /// The step produced an event or error to return immediately.
247    Yield(Result<(Event<'input>, Span), Error>),
248}
249
250/// State of the top-level event iterator.
251#[derive(Debug, Clone, Copy, PartialEq, Eq)]
252enum IterState {
253    /// About to emit `StreamStart`.
254    BeforeStream,
255    /// Between documents: skip blanks/comments/directives, detect next document.
256    BetweenDocs,
257    /// Inside a document: consume lines until a boundary marker or EOF.
258    InDocument,
259    /// `StreamEnd` emitted; done.
260    Done,
261}
262
263/// What the state machine expects next for an open mapping entry.
264#[derive(Debug, Clone, Copy, PartialEq, Eq)]
265enum MappingPhase {
266    /// The next node is a key (first half of a pair).
267    Key,
268    /// The next node is a value (second half of a pair).
269    Value,
270}
271
272/// An entry on the collection stack, tracking open block sequences and mappings.
273///
274/// Flow collections are fully parsed by [`EventIter::handle_flow_collection`]
275/// before returning; they never leave an entry on this stack.  The combined
276/// depth limit (block + flow) is enforced inside `handle_flow_collection` by
277/// summing `coll_stack.len()` with the local flow-frame count.
278#[derive(Debug, Clone, Copy, PartialEq, Eq)]
279enum CollectionEntry {
280    /// An open block sequence.  Holds the column of its `-` indicator and
281    /// whether at least one complete item has been delivered.  `has_had_item`
282    /// is `false` for a freshly opened sequence and becomes `true` once a
283    /// complete item (scalar or sub-collection) has been emitted.  Used by
284    /// `handle_sequence_entry` to detect a `-` at the wrong indentation level.
285    Sequence(usize, bool),
286    /// An open block mapping.  Holds the column of its first key, the
287    /// current phase (expecting key or value), and whether the mapping has
288    /// had at least one key advanced to the value phase (`has_had_value`).
289    /// `has_had_value` is `false` for a freshly opened mapping and becomes
290    /// `true` the first time `advance_mapping_to_value` is called on it.
291    /// The wrong-indentation check in `handle_mapping_entry` uses this flag
292    /// to avoid false positives on explicit-key content nodes (e.g. V9D5).
293    Mapping(usize, MappingPhase, bool),
294}
295
296/// Whether the next expected token in a flow mapping is a key or value.
297#[derive(Debug, Clone, Copy, PartialEq, Eq)]
298enum FlowMappingPhase {
299    /// Expecting the next key (or the closing `}`).
300    Key,
301    /// Expecting the value after a key has been consumed.
302    Value,
303}
304
305impl CollectionEntry {
306    /// The indentation column of this collection's indicator/key.
307    const fn indent(self) -> usize {
308        match self {
309            Self::Sequence(col, _) | Self::Mapping(col, _, _) => col,
310        }
311    }
312}
313
314/// Lazy iterator that yields events by walking a [`Lexer`].
315#[allow(clippy::struct_excessive_bools)]
316struct EventIter<'input> {
317    lexer: Lexer<'input>,
318    state: IterState,
319    /// Queued events to emit before resuming normal state dispatch.
320    ///
321    /// Used when a single parse step must produce multiple consecutive events —
322    /// e.g. `SequenceStart` before the first item, or multiple close events
323    /// when a dedent closes several nested collections at once.
324    queue: VecDeque<(Event<'input>, Span)>,
325    /// Stack of open block collections (sequences and mappings).
326    ///
327    /// Each entry records whether the open collection is a sequence or a
328    /// mapping, its indentation column, and (for mappings) whether the next
329    /// expected node is a key or a value.  The combined length of this stack
330    /// is bounded by [`MAX_COLLECTION_DEPTH`].
331    coll_stack: Vec<CollectionEntry>,
332    /// Set to `true` after an `Err` is yielded.
333    ///
334    /// Once set, `next()` immediately returns `None` to prevent infinite
335    /// error loops (e.g. depth-limit firing on the same prepended synthetic
336    /// line).
337    failed: bool,
338    /// A pending anchor name (`&name`) that has been scanned but not yet
339    /// attached to a node event.
340    ///
341    /// Anchors in YAML precede the node they annotate.  After scanning
342    /// `&name`, the parser stores the name here and attaches it to the next
343    /// `Scalar`, `SequenceStart`, or `MappingStart` event.
344    ///
345    /// `pending_anchor_for_collection` distinguishes two cases:
346    /// - `true`: anchor was on its own line (`&name\n- item`) — the anchor
347    ///   annotates the next node regardless of type (collection or scalar).
348    /// - `false`: anchor was inline with key content
349    ///   (`&name key: value`) — the anchor annotates the key scalar, not
350    ///   the enclosing mapping.
351    pending_anchor: Option<&'input str>,
352    /// True when `pending_anchor` was set from a standalone anchor line (no
353    /// inline content after the name).  False when set from an inline anchor
354    /// that precedes a key or scalar on the same line.
355    pending_anchor_for_collection: bool,
356    /// A pending tag that has been scanned but not yet attached to a node event.
357    ///
358    /// Tags in YAML precede the node they annotate (YAML 1.2 §6.8.1).  After
359    /// scanning `!tag`, `!!tag`, `!<uri>`, or `!`, the parser stores the tag
360    /// here and attaches it to the next `Scalar`, `SequenceStart`, or
361    /// `MappingStart` event.
362    ///
363    /// Tags are resolved against the current directive scope at scan time:
364    /// - `!<URI>`  → stored as `Cow::Borrowed("URI")` (verbatim, no change)
365    /// - `!!suffix` → resolved via `!!` handle (default: `tag:yaml.org,2002:suffix`)
366    /// - `!suffix` → stored as `Cow::Borrowed("!suffix")` (local tag, no expansion)
367    /// - `!`       → stored as `Cow::Borrowed("!")`
368    /// - `!handle!suffix` → resolved via `%TAG !handle! prefix` directive
369    pending_tag: Option<std::borrow::Cow<'input, str>>,
370    /// True when `pending_tag` was set from a standalone tag line (no inline
371    /// content after the tag).  False when set inline.
372    pending_tag_for_collection: bool,
373    /// Directive scope for the current document.
374    ///
375    /// Accumulated from `%YAML` and `%TAG` directives seen in `BetweenDocs`
376    /// state.  Reset at document boundaries.
377    directive_scope: DirectiveScope,
378    /// Set to `true` once the root node of the current document has been
379    /// fully emitted (a scalar at the top level, or a collection after its
380    /// closing event empties `coll_stack`).
381    ///
382    /// Used to detect invalid extra content after the document root, such as
383    /// `foo:\n  bar\ninvalid` where `invalid` appears after the root mapping
384    /// closes.  Reset to `false` at each document boundary.
385    root_node_emitted: bool,
386    /// Set to `true` after consuming a `? ` explicit key indicator whose key
387    /// content will appear on the NEXT line (i.e., `had_key_inline = false`).
388    /// Cleared when the key content is processed.
389    ///
390    /// Used to allow a block sequence indicator on a line following `? ` to be
391    /// treated as the explicit key's content rather than triggering the
392    /// "invalid block sequence entry" guard.
393    explicit_key_pending: bool,
394    /// When a tag or anchor appears inline on a physical line (e.g. `!!str &a key:`),
395    /// the key content is prepended as a synthetic line with the key's column as its
396    /// indent.  This field records the indent of the ORIGINAL physical line so that
397    /// `handle_mapping_entry` can open the mapping at the correct (original) indent
398    /// rather than the synthetic line's offset.
399    property_origin_indent: Option<usize>,
400}
401
402impl<'input> EventIter<'input> {
403    fn new(input: &'input str) -> Self {
404        Self {
405            lexer: Lexer::new(input),
406            state: IterState::BeforeStream,
407            queue: VecDeque::new(),
408            coll_stack: Vec::new(),
409            failed: false,
410            pending_anchor: None,
411            pending_anchor_for_collection: false,
412            pending_tag: None,
413            pending_tag_for_collection: false,
414            directive_scope: DirectiveScope::default(),
415            root_node_emitted: false,
416            explicit_key_pending: false,
417            property_origin_indent: None,
418        }
419    }
420
421    /// Current combined collection depth (sequences + mappings).
422    const fn collection_depth(&self) -> usize {
423        self.coll_stack.len()
424    }
425
426    /// Push close events for all collections whose indent is `>= threshold`,
427    /// from innermost to outermost.
428    ///
429    /// After each close, if the new top of the stack is a mapping in Value
430    /// phase, flips it to Key phase — the closed collection was that
431    /// mapping's value.
432    fn close_collections_at_or_above(&mut self, threshold: usize, pos: Pos) {
433        while let Some(&top) = self.coll_stack.last() {
434            if top.indent() >= threshold {
435                self.coll_stack.pop();
436                let ev = match top {
437                    CollectionEntry::Sequence(_, _) => Event::SequenceEnd,
438                    CollectionEntry::Mapping(_, _, _) => Event::MappingEnd,
439                };
440                self.queue.push_back((ev, zero_span(pos)));
441                // After closing a collection, the parent mapping (if any)
442                // transitions from Value phase to Key phase.  The parent
443                // sequence (if any) marks its current item as completed.
444                match self.coll_stack.last_mut() {
445                    Some(CollectionEntry::Mapping(_, phase, _)) => {
446                        if *phase == MappingPhase::Value {
447                            *phase = MappingPhase::Key;
448                        }
449                    }
450                    Some(CollectionEntry::Sequence(_, has_had_item)) => {
451                        *has_had_item = true;
452                    }
453                    None => {}
454                }
455            } else {
456                break;
457            }
458        }
459    }
460
461    /// Push close events for all open collections (document-end).
462    ///
463    /// If a mapping is in Value phase when it closes, an empty plain scalar is
464    /// emitted first to satisfy the pending key that had no inline value —
465    /// **unless** the previous closed item was a collection (sequence or
466    /// mapping), which was itself the value.  After each closed collection,
467    /// the parent mapping (if any) is advanced from Value to Key phase.
468    fn close_all_collections(&mut self, pos: Pos) {
469        while let Some(top) = self.coll_stack.pop() {
470            let ev = match top {
471                CollectionEntry::Sequence(_, _) => Event::SequenceEnd,
472                CollectionEntry::Mapping(_, MappingPhase::Value, _) => {
473                    // Mapping closed while waiting for a value — emit empty value.
474                    // Consume any pending anchor so `&anchor\n` at end of doc
475                    // is properly attached to the empty value.
476                    self.queue.push_back((
477                        Event::Scalar {
478                            value: std::borrow::Cow::Borrowed(""),
479                            style: ScalarStyle::Plain,
480                            anchor: self.pending_anchor.take(),
481                            tag: None,
482                        },
483                        zero_span(pos),
484                    ));
485                    Event::MappingEnd
486                }
487                CollectionEntry::Mapping(_, MappingPhase::Key, _) => Event::MappingEnd,
488            };
489            self.queue.push_back((ev, zero_span(pos)));
490            // After closing any collection, advance the parent mapping (if in
491            // Value phase) to Key phase — the just-closed collection was its value.
492            if let Some(CollectionEntry::Mapping(_, phase, _)) = self.coll_stack.last_mut() {
493                if *phase == MappingPhase::Value {
494                    *phase = MappingPhase::Key;
495                }
496            }
497        }
498    }
499
500    /// Check whether the next available line is a block-sequence entry
501    /// indicator (`-` followed by space, tab, or end-of-content).
502    ///
503    /// Returns `(dash_indent, dash_pos)` where:
504    /// - `dash_indent` is the effective document column of the `-`.
505    /// - `dash_pos` is the absolute [`Pos`] of the `-` character.
506    fn peek_sequence_entry(&self) -> Option<(usize, Pos)> {
507        let line = self.lexer.peek_next_line()?;
508        let dash_indent = line.indent;
509        let trimmed = line.content.trim_start_matches(' ');
510
511        if !trimmed.starts_with('-') {
512            return None;
513        }
514        let after_dash = &trimmed[1..];
515        let is_entry =
516            after_dash.is_empty() || after_dash.starts_with(' ') || after_dash.starts_with('\t');
517        if !is_entry {
518            return None;
519        }
520
521        let leading_spaces = line.content.len() - trimmed.len();
522        let dash_pos = Pos {
523            byte_offset: line.pos.byte_offset + leading_spaces,
524            char_offset: line.pos.char_offset + leading_spaces,
525            line: line.pos.line,
526            column: line.pos.column + leading_spaces,
527        };
528        Some((dash_indent, dash_pos))
529    }
530
531    /// Check whether the next available line looks like an implicit mapping
532    /// key: a non-empty line whose plain-scalar content is followed by `: `
533    /// (colon + space) or `:\n` (colon at end-of-line) or `:\t`.
534    ///
535    /// Also recognises the explicit key indicator `? ` at the start of a line.
536    ///
537    /// Returns `(key_indent, key_pos)` on success, where `key_indent` is the
538    /// document column of the first character of the key (or `?` indicator),
539    /// and `key_pos` is its absolute [`Pos`].
540    fn peek_mapping_entry(&self) -> Option<(usize, Pos)> {
541        let line = self.lexer.peek_next_line()?;
542        let key_indent = line.indent;
543
544        let leading_spaces = line.content.len() - line.content.trim_start_matches(' ').len();
545        let trimmed = &line.content[leading_spaces..];
546
547        if trimmed.is_empty() {
548            return None;
549        }
550
551        let key_pos = Pos {
552            byte_offset: line.pos.byte_offset + leading_spaces,
553            char_offset: line.pos.char_offset + leading_spaces,
554            line: line.pos.line,
555            column: line.pos.column + leading_spaces,
556        };
557
558        // Explicit key indicator: `? ` or `?` at EOL.
559        if let Some(after_q) = trimmed.strip_prefix('?') {
560            if after_q.is_empty()
561                || after_q.starts_with(' ')
562                || after_q.starts_with('\t')
563                || after_q.starts_with('\n')
564                || after_q.starts_with('\r')
565            {
566                return Some((key_indent, key_pos));
567            }
568        }
569
570        // Implicit key: line contains `: ` or ends with `:`.
571        // We scan the plain-scalar portion of the line for the value indicator.
572        if is_implicit_mapping_line(trimmed) {
573            return Some((key_indent, key_pos));
574        }
575
576        None
577    }
578
579    /// Try to consume a scalar from the current lexer position.
580    ///
581    /// `plain_parent_indent` — the indent of the current line; plain scalar
582    /// continuation stops when the next line is less-indented than this.
583    ///
584    /// `block_parent_indent` — the indent of the enclosing block context;
585    /// block scalars collect content that is more indented than this value.
586    ///
587    /// Consumes `self.pending_anchor` and attaches it to the emitted scalar.
588    fn try_consume_scalar(
589        &mut self,
590        plain_parent_indent: usize,
591        block_parent_indent: usize,
592    ) -> Result<Option<(Event<'input>, Span)>, Error> {
593        if let Some(result) = self
594            .lexer
595            .try_consume_literal_block_scalar(block_parent_indent)
596        {
597            let (value, chomp, span) = result?;
598            return Ok(Some((
599                Event::Scalar {
600                    value,
601                    style: ScalarStyle::Literal(chomp),
602                    anchor: self.pending_anchor.take(),
603                    tag: self.pending_tag.take(),
604                },
605                span,
606            )));
607        }
608        if let Some(result) = self
609            .lexer
610            .try_consume_folded_block_scalar(block_parent_indent)
611        {
612            let (value, chomp, span) = result?;
613            return Ok(Some((
614                Event::Scalar {
615                    value,
616                    style: ScalarStyle::Folded(chomp),
617                    anchor: self.pending_anchor.take(),
618                    tag: self.pending_tag.take(),
619                },
620                span,
621            )));
622        }
623        if let Some((value, span)) = self.lexer.try_consume_single_quoted(plain_parent_indent)? {
624            return Ok(Some((
625                Event::Scalar {
626                    value,
627                    style: ScalarStyle::SingleQuoted,
628                    anchor: self.pending_anchor.take(),
629                    tag: self.pending_tag.take(),
630                },
631                span,
632            )));
633        }
634        // Pass Some(parent_indent) when inside a block collection so
635        // collect_double_quoted_continuations can validate continuation-line
636        // indentation (YAML 1.2 §7.3.1).  At document root (coll_stack empty)
637        // there is no enclosing block, so no indent constraint: pass None.
638        let dq_block_indent = if self.coll_stack.is_empty() {
639            None
640        } else {
641            Some(plain_parent_indent)
642        };
643        if let Some((value, span)) = self.lexer.try_consume_double_quoted(dq_block_indent)? {
644            // In block context, after a double-quoted scalar closes, the only
645            // valid trailing content is optional whitespace followed by an
646            // optional comment (with mandatory preceding whitespace before `#`).
647            // Non-comment, non-whitespace content is an error.
648            if let Some((tail, tail_pos)) = self.lexer.pending_multiline_tail.take() {
649                let first_non_ws = tail.trim_start_matches([' ', '\t']);
650                if !first_non_ws.is_empty() {
651                    let ws_len = tail.len() - first_non_ws.len();
652                    if first_non_ws.starts_with('#') && ws_len == 0 {
653                        // `#` immediately after closing quote — not a comment.
654                        self.failed = true;
655                        return Err(Error {
656                            pos: tail_pos,
657                            message: "comment requires at least one space before '#'".into(),
658                        });
659                    } else if !first_non_ws.starts_with('#') {
660                        // Non-comment content after quoted scalar.
661                        self.failed = true;
662                        return Err(Error {
663                            pos: tail_pos,
664                            message: "unexpected content after quoted scalar".into(),
665                        });
666                    }
667                    // Valid comment: discard (the comment event is not emitted
668                    // in block context here; it will be picked up by drain_trailing_comment
669                    // in the normal flow).
670                }
671            }
672            return Ok(Some((
673                Event::Scalar {
674                    value,
675                    style: ScalarStyle::DoubleQuoted,
676                    anchor: self.pending_anchor.take(),
677                    tag: self.pending_tag.take(),
678                },
679                span,
680            )));
681        }
682        if let Some((value, span)) = self.lexer.try_consume_plain_scalar(plain_parent_indent) {
683            // Check for invalid content in the suffix (e.g. NUL or mid-stream
684            // BOM that stopped the scanner but is not valid at this position).
685            if let Some(e) = self.lexer.plain_scalar_suffix_error.take() {
686                return Err(e);
687            }
688            return Ok(Some((
689                Event::Scalar {
690                    value,
691                    style: ScalarStyle::Plain,
692                    anchor: self.pending_anchor.take(),
693                    tag: self.pending_tag.take(),
694                },
695                span,
696            )));
697        }
698        Ok(None)
699    }
700
701    /// Consume the leading `-` indicator from the current line and (if
702    /// present) prepend a synthetic line for the inline content.
703    ///
704    /// Returns `true` if inline content was found and prepended.
705    fn consume_sequence_dash(&mut self, dash_indent: usize) -> bool {
706        // SAFETY: caller verified via peek_sequence_entry — the line exists.
707        let Some(line) = self.lexer.peek_next_line() else {
708            unreachable!("consume_sequence_dash called without a pending line")
709        };
710
711        let content = line.content;
712        let after_spaces = content.trim_start_matches(' ');
713        debug_assert!(
714            after_spaces.starts_with('-'),
715            "sequence dash not at expected position"
716        );
717        let rest_of_line = &after_spaces[1..];
718        let inline = rest_of_line.trim_start_matches([' ', '\t']);
719        let had_inline = !inline.is_empty();
720
721        if had_inline {
722            let leading_spaces = content.len() - after_spaces.len();
723            let spaces_after_dash = rest_of_line.len() - inline.len();
724            let offset_from_dash = 1 + spaces_after_dash;
725            let total_offset = leading_spaces + offset_from_dash;
726            let inline_col = dash_indent + offset_from_dash;
727            let inline_pos = Pos {
728                byte_offset: line.pos.byte_offset + total_offset,
729                char_offset: line.pos.char_offset + total_offset,
730                line: line.pos.line,
731                column: line.pos.column + total_offset,
732            };
733            let synthetic = Line {
734                content: inline,
735                offset: inline_pos.byte_offset,
736                indent: inline_col,
737                break_type: line.break_type,
738                pos: inline_pos,
739            };
740            self.lexer.consume_line();
741            self.lexer.prepend_inline_line(synthetic);
742        } else {
743            self.lexer.consume_line();
744        }
745
746        had_inline
747    }
748
749    /// Consume the current mapping-entry line.
750    ///
751    /// Handles both forms:
752    /// - **Explicit key** (`? key`): consume the `?` indicator line, extract
753    ///   any inline key content and prepend a synthetic line for it.
754    /// - **Implicit key** (`key: value`): split the line at the `: ` / `:\n`
755    ///   boundary.  Return the key as a pre-extracted slice so the caller can
756    ///   emit it as a `Scalar` event directly (bypassing the plain-scalar
757    ///   continuation logic).  Prepend the value portion (if non-empty) as a
758    ///   synthetic line.
759    ///
760    /// Returns a `ConsumedMapping` describing what was found.
761    #[allow(clippy::too_many_lines)]
762    fn consume_mapping_entry(&mut self, key_indent: usize) -> ConsumedMapping<'input> {
763        // SAFETY: caller verified via peek_mapping_entry — the line exists.
764        let Some(line) = self.lexer.peek_next_line() else {
765            unreachable!("consume_mapping_entry called without a pending line")
766        };
767
768        // Extract all data from the borrowed line before any mutable lexer calls.
769        // `content` is `'input`-lived (borrows the original input string, not
770        // the lexer's internal buffer), so it remains valid after consume_line().
771        let content: &'input str = line.content;
772        let line_pos = line.pos;
773        let line_break_type = line.break_type;
774
775        let leading_spaces = content.len() - content.trim_start_matches(' ').len();
776        let trimmed = &content[leading_spaces..];
777
778        // --- Explicit key: `? ` or `?` at EOL ---
779        //
780        // The explicit key indicator is `?` followed by whitespace or end of
781        // line (YAML 1.2 §8.2.2).  A `?` followed by a non-whitespace character
782        // (e.g. `?foo: val`) is NOT an explicit key — `?foo` is an implicit key
783        // that starts with `?`, just like `?foo: val` being a mapping entry where
784        // the key is the plain scalar `?foo`.  This check must mirror the
785        // condition in peek_mapping_entry to keep consume and peek consistent.
786        if let Some(after_q) = trimmed.strip_prefix('?') {
787            let is_explicit_key = after_q.is_empty()
788                || after_q.starts_with(' ')
789                || after_q.starts_with('\t')
790                || after_q.starts_with('\n')
791                || after_q.starts_with('\r');
792            if is_explicit_key {
793                let inline = after_q.trim_start_matches([' ', '\t']);
794                // A trailing comment (`# ...`) is not key content — treat as
795                // if nothing followed the `?` indicator.
796                let had_key_inline = !inline.is_empty() && !inline.starts_with('#');
797
798                if had_key_inline {
799                    // Offset from line start to inline key content.
800                    let spaces_after_q = after_q.len() - inline.len();
801                    let total_offset = leading_spaces + 1 + spaces_after_q;
802                    let inline_col = key_indent + 1 + spaces_after_q;
803                    let inline_pos = Pos {
804                        byte_offset: line_pos.byte_offset + total_offset,
805                        char_offset: line_pos.char_offset + total_offset,
806                        line: line_pos.line,
807                        column: line_pos.column + total_offset,
808                    };
809                    let synthetic = Line {
810                        content: inline,
811                        offset: inline_pos.byte_offset,
812                        indent: inline_col,
813                        break_type: line_break_type,
814                        pos: inline_pos,
815                    };
816                    self.lexer.consume_line();
817                    self.lexer.prepend_inline_line(synthetic);
818                } else {
819                    self.lexer.consume_line();
820                }
821                return ConsumedMapping::ExplicitKey { had_key_inline };
822            }
823        }
824
825        // --- Implicit key: `key: value` or `key:` ---
826        // Find the `: ` (or `:\t` or `:\n` or `:` at EOL) boundary.
827        // SAFETY: peek_mapping_entry already confirmed this line is a mapping
828        // entry, so find_value_indicator_offset will return Some.
829        let Some(colon_offset) = find_value_indicator_offset(trimmed) else {
830            unreachable!("consume_mapping_entry: implicit key line has no value indicator")
831        };
832
833        let key_content = trimmed[..colon_offset].trim_end_matches([' ', '\t']);
834        let after_colon = &trimmed[colon_offset + 1..]; // skip ':'
835        let value_content = after_colon.trim_start_matches([' ', '\t']);
836
837        // Key span: starts at the first non-space character.
838        let key_start_pos = Pos {
839            byte_offset: line_pos.byte_offset + leading_spaces,
840            char_offset: line_pos.char_offset + leading_spaces,
841            line: line_pos.line,
842            column: line_pos.column + leading_spaces,
843        };
844        let key_end_pos = {
845            let mut p = key_start_pos;
846            for ch in key_content.chars() {
847                p = p.advance(ch);
848            }
849            p
850        };
851        let key_span = Span {
852            start: key_start_pos,
853            end: key_end_pos,
854        };
855
856        // Compute position of value content (after `: ` / `:\t`).
857        let spaces_after_colon = after_colon.len() - value_content.len();
858        let value_offset_in_trimmed = colon_offset + 1 + spaces_after_colon;
859        let value_col = key_indent + value_offset_in_trimmed;
860        let value_pos = Pos {
861            byte_offset: line_pos.byte_offset + leading_spaces + value_offset_in_trimmed,
862            char_offset: line_pos.char_offset + leading_spaces + value_offset_in_trimmed,
863            line: line_pos.line,
864            column: line_pos.column + leading_spaces + value_offset_in_trimmed,
865        };
866
867        // Detect whether the key is a quoted scalar.  `key_content` already
868        // has its outer whitespace stripped; if it starts with `'` or `"` the
869        // key is quoted and must be decoded rather than emitted as Plain.
870        let key_is_quoted = matches!(key_content.as_bytes().first(), Some(b'"' | b'\''));
871
872        // Consume the physical line, then (if inline value content exists)
873        // prepend one synthetic line for the value.  The key is returned
874        // directly in the ConsumedMapping variant — not via a synthetic line —
875        // so that the caller can push a Scalar event without routing through
876        // try_consume_plain_scalar (which would incorrectly treat the value
877        // synthetic line as a plain-scalar continuation).
878        self.lexer.consume_line();
879
880        // If the key is quoted, decode it now using the lexer's existing
881        // quoted-scalar methods.  We prepend a synthetic line containing only
882        // the key text (including the surrounding quote characters) so the
883        // method can parse it normally, then discard the synthetic line.
884        //
885        // libfyaml (fy-parse.c, fy_attach_comments_if_any / token scanner):
886        // all scalar tokens — quoted or plain — flow through the same token
887        // queue; the *scanner* decodes the scalar at the token level before
888        // the parser ever sees it.  We replicate that by decoding quoted keys
889        // here, at the point where we know the key is quoted.
890        let (decoded_key, key_style) = if key_is_quoted {
891            let key_synthetic = Line {
892                content: key_content,
893                offset: key_start_pos.byte_offset,
894                indent: leading_spaces,
895                break_type: line_break_type,
896                pos: key_start_pos,
897            };
898            self.lexer.prepend_inline_line(key_synthetic);
899
900            if key_content.starts_with('\'') {
901                match self.lexer.try_consume_single_quoted(0) {
902                    Ok(Some((value, _))) => (value, ScalarStyle::SingleQuoted),
903                    Ok(None) => {
904                        return ConsumedMapping::QuotedKeyError {
905                            pos: key_start_pos,
906                            message: "single-quoted key could not be parsed".into(),
907                        };
908                    }
909                    Err(e) => {
910                        return ConsumedMapping::QuotedKeyError {
911                            pos: e.pos,
912                            message: e.message,
913                        };
914                    }
915                }
916            } else {
917                match self.lexer.try_consume_double_quoted(None) {
918                    Ok(Some((value, _))) => (value, ScalarStyle::DoubleQuoted),
919                    Ok(None) => {
920                        return ConsumedMapping::QuotedKeyError {
921                            pos: key_start_pos,
922                            message: "double-quoted key could not be parsed".into(),
923                        };
924                    }
925                    Err(e) => {
926                        return ConsumedMapping::QuotedKeyError {
927                            pos: e.pos,
928                            message: e.message,
929                        };
930                    }
931                }
932            }
933        } else {
934            (std::borrow::Cow::Borrowed(key_content), ScalarStyle::Plain)
935        };
936
937        if !value_content.is_empty() {
938            // Detect illegal inline implicit mapping: if the inline value itself
939            // contains a value indicator (`:` followed by space/EOL), this is an
940            // attempt to start a block mapping inline (e.g. `a: b: c: d` or
941            // `a: 'b': c`).  Block mappings cannot appear inline — their entries
942            // must start on new lines.  Return an error before prepending the value.
943            if find_value_indicator_offset(value_content).is_some() {
944                return ConsumedMapping::InlineImplicitMappingError { pos: value_pos };
945            }
946
947            // Detect illegal inline block sequence: `key: - item` is invalid
948            // because a block sequence indicator (`-`) cannot appear as an
949            // inline value of a block mapping entry — the sequence must start
950            // on a new line.  Only `- `, `-\t`, or bare `-` (at EOL) qualify
951            // as sequence indicators.
952            {
953                let after_dash = value_content.strip_prefix('-');
954                let is_seq_indicator = after_dash.is_some_and(|rest| {
955                    rest.is_empty() || rest.starts_with(' ') || rest.starts_with('\t')
956                });
957                if is_seq_indicator {
958                    return ConsumedMapping::InlineImplicitMappingError { pos: value_pos };
959                }
960            }
961
962            let value_synthetic = Line {
963                content: value_content,
964                offset: value_pos.byte_offset,
965                indent: value_col,
966                break_type: line_break_type,
967                pos: value_pos,
968            };
969            self.lexer.prepend_inline_line(value_synthetic);
970        }
971
972        ConsumedMapping::ImplicitKey {
973            key_value: decoded_key,
974            key_style,
975            key_span,
976        }
977    }
978
979    /// After emitting a key scalar, flip the innermost mapping to `Value` phase.
980    ///
981    /// **Call-site invariant:** the top of `coll_stack` must be a
982    /// `CollectionEntry::Mapping`.  This function is only called from
983    /// mapping-emission paths (`handle_mapping_entry`, explicit-key handling)
984    /// where the caller has already verified that a mapping is the active
985    /// collection.  Do **not** call this after emitting a scalar that may be a
986    /// sequence item — use `tick_mapping_phase_after_scalar` instead, which
987    /// stops at a Sequence entry and handles the ambiguity correctly.
988    fn advance_mapping_to_value(&mut self) {
989        debug_assert!(
990            matches!(self.coll_stack.last(), Some(CollectionEntry::Mapping(..))),
991            "advance_mapping_to_value called but top of coll_stack is not a Mapping"
992        );
993        // The explicit key's content has been processed; clear the pending flag.
994        self.explicit_key_pending = false;
995        for entry in self.coll_stack.iter_mut().rev() {
996            if let CollectionEntry::Mapping(_, phase, has_had_value) = entry {
997                *phase = MappingPhase::Value;
998                *has_had_value = true;
999                return;
1000            }
1001        }
1002    }
1003
1004    /// Drain any pending trailing comment from the lexer into the event queue.
1005    ///
1006    /// Called after emitting a scalar event.  If a trailing comment was
1007    /// detected on the scalar's line (e.g. `foo # comment`), it is pushed to
1008    /// `self.queue` as `Event::Comment`.
1009    ///
1010    /// Trailing comments are bounded by the physical line length, which is
1011    /// itself bounded by the total input size.  No separate length limit is
1012    /// applied here; the security constraint (`MAX_COMMENT_LEN`) applies to
1013    /// standalone comment lines (scanned in [`Self::skip_and_collect_comments_in_doc`]
1014    /// and [`Self::skip_and_collect_comments_between_docs`]).
1015    fn drain_trailing_comment(&mut self) {
1016        if let Some((text, span)) = self.lexer.trailing_comment.take() {
1017            self.queue.push_back((Event::Comment { text }, span));
1018        }
1019    }
1020
1021    /// After emitting a value scalar/collection, flip the innermost mapping
1022    /// back to `Key` phase.
1023    ///
1024    /// **Call-site invariant:** the top of `coll_stack` must be a
1025    /// `CollectionEntry::Mapping`.  This function is only called from
1026    /// mapping-emission paths where the caller has already verified that a
1027    /// mapping is the active collection.  Do **not** call this after emitting a
1028    /// scalar that may be a sequence item — use `tick_mapping_phase_after_scalar`
1029    /// instead.
1030    fn advance_mapping_to_key(&mut self) {
1031        debug_assert!(
1032            matches!(self.coll_stack.last(), Some(CollectionEntry::Mapping(..))),
1033            "advance_mapping_to_key called but top of coll_stack is not a Mapping"
1034        );
1035        for entry in self.coll_stack.iter_mut().rev() {
1036            if let CollectionEntry::Mapping(_, phase, _) = entry {
1037                *phase = MappingPhase::Key;
1038                return;
1039            }
1040        }
1041    }
1042
1043    /// Returns the minimum column at which a standalone block-node property
1044    /// (anchor or tag on its own line) is valid in the current context.
1045    ///
1046    /// - Mapping in Value phase at indent `n`: the value node must be at col > n.
1047    /// - Sequence at indent `n`: item content must be at col > n.
1048    /// - Mapping in Key phase at indent `n`: a key at col `n` is valid.
1049    /// - Root (empty stack): any column is valid.
1050    fn min_standalone_property_indent(&self) -> usize {
1051        match self.coll_stack.last() {
1052            Some(
1053                CollectionEntry::Mapping(n, MappingPhase::Value, _)
1054                | CollectionEntry::Sequence(n, _),
1055            ) => n + 1,
1056            Some(CollectionEntry::Mapping(n, MappingPhase::Key, _)) => *n,
1057            None => 0,
1058        }
1059    }
1060}
1061
1062/// Result of consuming a mapping-entry line.
1063enum ConsumedMapping<'input> {
1064    /// Explicit key (`? key`).
1065    ExplicitKey {
1066        /// Whether there was key content on the same line as `?`.
1067        had_key_inline: bool,
1068    },
1069    /// Implicit key (`key: value`).
1070    ///
1071    /// The key content and span are pre-extracted so the caller can push the
1072    /// key `Scalar` event directly without routing it through
1073    /// `try_consume_plain_scalar` — which would treat the adjacent value
1074    /// synthetic line as a plain-scalar continuation.
1075    ImplicitKey {
1076        /// The decoded key value (may be owned if escapes were resolved).
1077        key_value: std::borrow::Cow<'input, str>,
1078        /// The scalar style of the key (`Plain`, `SingleQuoted`, or `DoubleQuoted`).
1079        key_style: ScalarStyle,
1080        /// Span covering the key text (including quotes if quoted).
1081        key_span: Span,
1082    },
1083    /// The inline value of an implicit key itself contained a value indicator,
1084    /// making it an illegal inline block mapping (e.g. `a: b: c` or `a: 'b': c`).
1085    /// The error position points to the start of the inline value content.
1086    InlineImplicitMappingError { pos: Pos },
1087    /// A quoted implicit key could not be decoded (e.g. bad escape sequence).
1088    QuotedKeyError { pos: Pos, message: String },
1089}
1090
1091/// True when `trimmed` (content after stripping leading spaces) represents
1092/// an implicit mapping key: it contains `: `, `:\t`, or ends with `:`.
1093fn is_implicit_mapping_line(trimmed: &str) -> bool {
1094    find_value_indicator_offset(trimmed).is_some()
1095}
1096
1097/// Returns `true` when `s` is a block structure indicator that cannot appear
1098/// at tab-based indentation: a block sequence entry (`-` followed by
1099/// whitespace or EOL), an explicit key marker (`?` followed by whitespace or
1100/// EOL), or an implicit mapping key (contains a `:` value indicator).
1101///
1102/// Used to detect tab-as-block-indentation violations (YAML 1.2 §6.1).
1103fn is_tab_indented_block_indicator(s: &str) -> bool {
1104    s.strip_prefix(['-', '?']).map_or_else(
1105        || is_implicit_mapping_line(s),
1106        |after| after.is_empty() || after.starts_with([' ', '\t']),
1107    )
1108}
1109
1110/// Like `find_value_indicator_offset`, but skips any leading anchor (`&name`)
1111/// and/or tag (`!tag`) tokens before checking for a mapping key indicator.
1112///
1113/// This handles cases like `&anchor key: value` or `!!str &a key: value`
1114/// where the actual key content starts after the properties.
1115fn inline_contains_mapping_key(inline: &str) -> bool {
1116    if find_value_indicator_offset(inline).is_some() {
1117        return true;
1118    }
1119    // Skip leading anchor/tag tokens and retry
1120    let mut s = inline;
1121    loop {
1122        let trimmed = s.trim_start_matches([' ', '\t']);
1123        if let Some(after_amp) = trimmed.strip_prefix('&') {
1124            // skip anchor name (non-space chars)
1125            let name_end = after_amp.find([' ', '\t']).unwrap_or(after_amp.len());
1126            s = &after_amp[name_end..];
1127        } else if trimmed.starts_with('!') {
1128            // skip tag token (non-space chars)
1129            let tag_end = trimmed.find([' ', '\t']).unwrap_or(trimmed.len());
1130            s = &trimmed[tag_end..];
1131        } else {
1132            break;
1133        }
1134        if find_value_indicator_offset(s.trim_start_matches([' ', '\t'])).is_some() {
1135            return true;
1136        }
1137    }
1138    false
1139}
1140
1141/// Return the byte offset of the `:` value indicator within `trimmed`, or
1142/// `None` if the line is not a mapping entry.
1143///
1144/// The `:` must be followed by a space, tab, newline/CR, or end-of-string to
1145/// count as a value indicator (YAML 1.2 §7.4).  A `:` immediately followed by
1146/// a non-space `ns-char` is part of a plain scalar.
1147///
1148/// Double-quoted and single-quoted spans are skipped correctly: a `:` inside
1149/// quotes is not a value indicator.
1150///
1151/// Lines that begin with YAML indicator characters that cannot start a plain
1152/// scalar (e.g. `%`, `@`, `` ` ``, `,`, `[`, `]`, `{`, `}`, `#`, `&`, `*`,
1153/// `!`, `|`, `>`) are rejected immediately — they are not implicit mapping
1154/// keys.  Quoted-scalar starts (`"`, `'`) and bare-indicator starts (`?`, `-`,
1155/// `:`) are handled specially.
1156fn find_value_indicator_offset(trimmed: &str) -> Option<usize> {
1157    // Reject lines that start with indicator characters that cannot begin a
1158    // plain scalar (and are thus not valid implicit mapping keys).
1159    // Also reject lines starting with `\t`: YAML 1.2 §6.1 forbids tabs as
1160    // indentation, so a line beginning with a tab cannot be a mapping entry.
1161    if matches!(
1162        trimmed.as_bytes().first().copied(),
1163        Some(
1164            b'\t'
1165                | b'%'
1166                | b'@'
1167                | b'`'
1168                | b','
1169                | b'['
1170                | b']'
1171                | b'{'
1172                | b'}'
1173                | b'#'
1174                | b'&'
1175                | b'*'
1176                | b'!'
1177                | b'|'
1178                | b'>'
1179        )
1180    ) {
1181        return None;
1182    }
1183
1184    let bytes = trimmed.as_bytes();
1185    let mut i = 0;
1186    let mut prev_was_space = false; // tracks whether the previous byte was whitespace
1187    while let Some(&ch) = bytes.get(i) {
1188        // Stop at an unquoted `#` preceded by whitespace (or at position 0):
1189        // YAML 1.2 §6.6 — a `#` after whitespace begins a comment; any `:` that
1190        // follows is inside the comment and cannot be a value indicator.
1191        if ch == b'#' && (i == 0 || prev_was_space) {
1192            return None;
1193        }
1194
1195        // Skip double-quoted span (handles `\"` escapes).
1196        // Only treat `"` as a quoted-span delimiter when it appears at the
1197        // very start of the key (i == 0) — in YAML, `"key": value` has a
1198        // double-quoted key, but `a"b": value` has a literal `"` inside a
1199        // plain scalar key, which must not be mistaken for a quoted span.
1200        // After a quoted span, `prev_was_space` is false — a closing `"` is
1201        // not whitespace.
1202        if ch == b'"' && i == 0 {
1203            i += 1; // skip opening `"`
1204            while let Some(&inner) = bytes.get(i) {
1205                match inner {
1206                    b'\\' => i += 2, // skip escape sequence (two bytes)
1207                    b'"' => {
1208                        i += 1; // skip closing `"`
1209                        break;
1210                    }
1211                    _ => i += 1,
1212                }
1213            }
1214            prev_was_space = false;
1215            continue;
1216        }
1217
1218        // Skip single-quoted span (handles `''` escape).
1219        // Same rule: only treat `'` as a quoted-span delimiter at position 0.
1220        // After a quoted span, `prev_was_space` is false — a closing `'` is
1221        // not whitespace.
1222        if ch == b'\'' && i == 0 {
1223            i += 1; // skip opening `'`
1224            while let Some(&inner) = bytes.get(i) {
1225                i += 1;
1226                if inner == b'\'' {
1227                    // `''` is an escaped single-quote; a lone `'` ends the span.
1228                    if bytes.get(i).copied() == Some(b'\'') {
1229                        i += 1; // consume the second `'` of the `''` escape
1230                    } else {
1231                        break; // lone `'` — end of quoted span
1232                    }
1233                }
1234            }
1235            prev_was_space = false;
1236            continue;
1237        }
1238
1239        if ch == b':' {
1240            match bytes.get(i + 1).copied() {
1241                None | Some(b' ' | b'\t' | b'\n' | b'\r') => return Some(i),
1242                _ => {}
1243            }
1244        }
1245
1246        prev_was_space = ch == b' ' || ch == b'\t';
1247
1248        // Multi-byte char: advance by UTF-8 lead-byte length.
1249        i += if ch < 0x80 {
1250            1
1251        } else if ch & 0xE0 == 0xC0 {
1252            2
1253        } else if ch & 0xF0 == 0xE0 {
1254            3
1255        } else {
1256            4
1257        };
1258    }
1259    None
1260}
1261
1262/// Scan an anchor name from `content`, returning the name slice.
1263///
1264/// `content` must begin immediately after the `&` or `*` indicator — the first
1265/// character is the first character of the name.  The name continues until
1266/// a character that is not `ns-anchor-char` (i.e., whitespace, flow indicator,
1267/// or end of content).
1268///
1269/// Returns `Ok(name)` where `name` is a non-empty borrowed slice of `content`.
1270/// Returns `Err` if:
1271/// - The name would be empty (first character is not `ns-anchor-char`).
1272/// - The name exceeds [`MAX_ANCHOR_NAME_BYTES`] bytes.
1273///
1274/// The caller is responsible for providing the correct [`Pos`] for error
1275/// reporting.
1276fn scan_anchor_name(content: &str, indicator_pos: Pos) -> Result<&str, Error> {
1277    use crate::chars::is_ns_anchor_char;
1278    let end = content
1279        .char_indices()
1280        .take_while(|&(_, ch)| is_ns_anchor_char(ch))
1281        .last()
1282        .map_or(0, |(i, ch)| i + ch.len_utf8());
1283    if end == 0 {
1284        return Err(Error {
1285            pos: indicator_pos,
1286            message: "anchor name must not be empty".into(),
1287        });
1288    }
1289    if end > MAX_ANCHOR_NAME_BYTES {
1290        return Err(Error {
1291            pos: indicator_pos,
1292            message: format!("anchor name exceeds maximum length of {MAX_ANCHOR_NAME_BYTES} bytes"),
1293        });
1294    }
1295    Ok(&content[..end])
1296}
1297
1298/// Scan a tag from `content`, returning the tag slice and its byte length in `content`.
1299///
1300/// `content` must begin immediately after the `!` indicator.  The function
1301/// handles all four YAML 1.2 §6.8.1 tag forms:
1302///
1303/// - **Verbatim** `!<URI>` → `content` starts with `<`; returns the URI
1304///   (between the angle brackets) and its length including the `<` and `>`.
1305/// - **Primary shorthand** `!!suffix` → `content` starts with `!`; returns
1306///   the full `!!suffix` slice (including the leading `!` that is part of
1307///   `content`).
1308/// - **Named-handle shorthand** `!handle!suffix` → returns the full slice
1309///   `!handle!suffix` (the leading `!` of `handle` is in `content`).
1310/// - **Secondary shorthand** `!suffix` → `content` starts with a tag-char;
1311///   returns `!suffix` via a slice that includes one byte before `content`
1312///   (the caller provides `full_tag_start` for this).
1313/// - **Non-specific** `!` alone → `content` is empty or starts with a
1314///   separator; returns `"!"` as a one-byte slice of the `!` indicator.
1315///
1316/// # Parameters
1317///
1318/// - `content`: the input slice immediately after the `!` indicator character.
1319/// - `tag_start`: the input slice starting at the `!` (one byte before `content`).
1320/// - `indicator_pos`: the [`Pos`] of the `!` indicator (for error reporting).
1321///
1322/// # Returns
1323///
1324/// `Ok((tag_slice, advance_past_exclamation))` where:
1325/// - `tag_slice` is the borrowed slice to store in `pending_tag`.
1326/// - `advance_past_exclamation` is the number of bytes to advance past the
1327///   `!` indicator (i.e. the advance for the entire tag token, not counting
1328///   the `!` itself).
1329///
1330/// Returns `Err` on invalid verbatim tags (unmatched `<`, empty URI, control
1331/// character in URI) or when the tag length exceeds [`MAX_TAG_LEN`].
1332fn scan_tag<'i>(
1333    content: &'i str,
1334    tag_start: &'i str,
1335    indicator_pos: Pos,
1336) -> Result<(&'i str, usize), Error> {
1337    // ---- Verbatim tag: `!<URI>` ----
1338    if let Some(after_open) = content.strip_prefix('<') {
1339        // Find the closing `>`.
1340        let close = after_open.find('>').ok_or_else(|| Error {
1341            pos: indicator_pos,
1342            message: "verbatim tag missing closing '>'".into(),
1343        })?;
1344        let uri = &after_open[..close];
1345        if uri.is_empty() {
1346            return Err(Error {
1347                pos: indicator_pos,
1348                message: "verbatim tag URI must not be empty".into(),
1349            });
1350        }
1351        if uri.len() > MAX_TAG_LEN {
1352            return Err(Error {
1353                pos: indicator_pos,
1354                message: format!("verbatim tag URI exceeds maximum length of {MAX_TAG_LEN} bytes"),
1355            });
1356        }
1357        // Reject control characters in the URI.
1358        for ch in uri.chars() {
1359            if ch < '\x20' || ch == '\x7F' {
1360                return Err(Error {
1361                    pos: indicator_pos,
1362                    message: format!("verbatim tag URI contains invalid character {ch:?}"),
1363                });
1364            }
1365        }
1366        // advance = 1 (for '<') + uri.len() + 1 (for '>') bytes past the `!`
1367        let advance = 1 + uri.len() + 1;
1368        return Ok((uri, advance));
1369    }
1370
1371    // ---- Primary handle: `!!suffix` ----
1372    if let Some(suffix) = content.strip_prefix('!') {
1373        // suffix starts after the second `!`
1374        let suffix_bytes = scan_tag_suffix(suffix);
1375        // `!!` alone with no suffix is valid (empty suffix shorthand).
1376        if suffix_bytes > MAX_TAG_LEN {
1377            return Err(Error {
1378                pos: indicator_pos,
1379                message: format!("tag exceeds maximum length of {MAX_TAG_LEN} bytes"),
1380            });
1381        }
1382        // tag_slice = `!!suffix` — one byte back for the first `!` (in `tag_start`)
1383        // plus `!` in content plus suffix.
1384        let tag_slice = &tag_start[..2 + suffix_bytes]; // `!` + `!` + suffix
1385        let advance = 1 + suffix_bytes; // past the `!` in content and suffix
1386        return Ok((tag_slice, advance));
1387    }
1388
1389    // ---- Non-specific tag: bare `!` (content is empty or starts with non-tag-char) ----
1390    // A `%` alone (without two following hex digits) also falls here via scan_tag_suffix.
1391    if scan_tag_suffix(content) == 0 {
1392        // The tag is just `!` — a one-byte slice from `tag_start`.
1393        let tag_slice = &tag_start[..1];
1394        return Ok((tag_slice, 0)); // 0 bytes advance past `!` (nothing follows the `!`)
1395    }
1396
1397    // ---- Named handle `!handle!suffix` or secondary handle `!suffix` ----
1398    // Scan tag chars until we hit a `!` (named handle delimiter) or non-tag-char.
1399    let mut end = 0;
1400    let mut found_inner_bang = false;
1401    for (i, ch) in content.char_indices() {
1402        if ch == '!' {
1403            // Named handle: `!handle!suffix` — scan the suffix after the inner `!`.
1404            found_inner_bang = true;
1405            end = i + 1; // include the `!`
1406            // Scan suffix chars (and %HH sequences) after the inner `!`.
1407            end += scan_tag_suffix(&content[i + 1..]);
1408            break;
1409        } else if is_tag_char(ch) {
1410            end = i + ch.len_utf8();
1411        } else if ch == '%' {
1412            // Percent-encoded sequence: %HH.
1413            let pct_len = scan_tag_suffix(&content[i..]);
1414            if pct_len == 0 {
1415                break; // bare `%` without two hex digits — stop
1416            }
1417            end = i + pct_len;
1418        } else {
1419            break;
1420        }
1421    }
1422
1423    if end == 0 && !found_inner_bang {
1424        // No tag chars at all (covered by non-specific check above, but defensive).
1425        let tag_slice = &tag_start[..1];
1426        return Ok((tag_slice, 0));
1427    }
1428
1429    if end > MAX_TAG_LEN {
1430        return Err(Error {
1431            pos: indicator_pos,
1432            message: format!("tag exceeds maximum length of {MAX_TAG_LEN} bytes"),
1433        });
1434    }
1435
1436    // tag_slice = `!` + content[..end] — includes the leading `!` from tag_start.
1437    let tag_slice = &tag_start[..=end];
1438    Ok((tag_slice, end))
1439}
1440
1441/// Returns true if `ch` is a valid YAML 1.2 `ns-tag-char` (§6.8.1) single character.
1442///
1443/// This is the *closed* set defined in the spec: `ns-uri-char` minus `!` and
1444/// the flow indicators.  `%` is NOT included here — percent-encoded sequences
1445/// (`%HH`) are handled separately via [`scan_tag_suffix`].
1446const fn is_tag_char(ch: char) -> bool {
1447    ch.is_ascii_alphanumeric()
1448        || matches!(
1449            ch,
1450            '-' | '_'
1451                | '.'
1452                | '~'
1453                | '*'
1454                | '\''
1455                | '('
1456                | ')'
1457                | '#'
1458                | ';'
1459                | '/'
1460                | '?'
1461                | ':'
1462                | '@'
1463                | '&'
1464                | '='
1465                | '+'
1466                | '$'
1467        )
1468}
1469
1470/// Returns the byte length of the valid tag suffix starting at `s`.
1471///
1472/// A tag suffix is a sequence of `ns-tag-char` characters and percent-encoded
1473/// `%HH` sequences (YAML 1.2 §6.8.1).  Scanning stops at the first character
1474/// that does not satisfy either condition.
1475fn scan_tag_suffix(s: &str) -> usize {
1476    let bytes = s.as_bytes();
1477    let mut pos = 0;
1478    while pos < bytes.len() {
1479        // Percent-encoded sequence: `%` followed by exactly two hex digits.
1480        if bytes.get(pos) == Some(&b'%') {
1481            let h1 = bytes
1482                .get(pos + 1)
1483                .copied()
1484                .is_some_and(|b| b.is_ascii_hexdigit());
1485            let h2 = bytes
1486                .get(pos + 2)
1487                .copied()
1488                .is_some_and(|b| b.is_ascii_hexdigit());
1489            if h1 && h2 {
1490                pos += 3;
1491                continue;
1492            }
1493            break;
1494        }
1495        // Safe to decode the next char: all is_tag_char matches are ASCII,
1496        // so multi-byte UTF-8 chars will fail is_tag_char and stop the scan.
1497        let Some(ch) = s[pos..].chars().next() else {
1498            break;
1499        };
1500        if is_tag_char(ch) {
1501            pos += ch.len_utf8();
1502        } else {
1503            break;
1504        }
1505    }
1506    pos
1507}
1508
1509/// Build an empty plain scalar event.
1510const fn empty_scalar_event<'input>() -> Event<'input> {
1511    Event::Scalar {
1512        value: std::borrow::Cow::Borrowed(""),
1513        style: ScalarStyle::Plain,
1514        anchor: None,
1515        tag: None,
1516    }
1517}
1518
1519/// Build a span that covers exactly the 3-byte document marker at `marker_pos`.
1520const fn marker_span(marker_pos: Pos) -> Span {
1521    Span {
1522        start: marker_pos,
1523        end: Pos {
1524            byte_offset: marker_pos.byte_offset + 3,
1525            char_offset: marker_pos.char_offset + 3,
1526            line: marker_pos.line,
1527            column: marker_pos.column + 3,
1528        },
1529    }
1530}
1531
1532/// Build a zero-width span at `pos`.
1533const fn zero_span(pos: Pos) -> Span {
1534    Span {
1535        start: pos,
1536        end: pos,
1537    }
1538}
1539
1540/// Returns `true` if `handle` is a syntactically valid YAML tag handle.
1541///
1542/// Valid forms per YAML 1.2 §6.8.1 productions [89]–[92]:
1543/// - `!`   — primary tag handle
1544/// - `!!`  — secondary tag handle
1545/// - `!<word-chars>!` — named tag handle, where word chars are `[a-zA-Z0-9_-]`
1546fn is_valid_tag_handle(handle: &str) -> bool {
1547    match handle {
1548        "!" | "!!" => true,
1549        _ => {
1550            // Named handle: starts and ends with `!`, interior non-empty word chars.
1551            let inner = handle.strip_prefix('!').and_then(|s| s.strip_suffix('!'));
1552            match inner {
1553                Some(word) if !word.is_empty() => word
1554                    .chars()
1555                    .all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_'),
1556                _ => false,
1557            }
1558        }
1559    }
1560}
1561
1562impl<'input> EventIter<'input> {
1563    /// Consume blank lines, comment lines, and directive lines in `BetweenDocs`
1564    /// context.
1565    ///
1566    /// - Blank lines: silently consumed.
1567    /// - Comment lines: emitted as `Event::Comment` items into `self.queue`.
1568    /// - Directive lines (`%`-prefixed): parsed and accumulated into
1569    ///   `self.directive_scope`.
1570    ///
1571    /// Returns `Err` on malformed directives, exceeded limits, or comment
1572    /// bodies exceeding `MAX_COMMENT_LEN`.  Stops at the first non-blank,
1573    /// non-comment, non-directive line (i.e. `---`, `...`, or content).
1574    ///
1575    /// The caller is responsible for resetting `self.directive_scope` before
1576    /// entering the `BetweenDocs` state (at each document boundary transition).
1577    /// This function does NOT reset it — `step_between_docs` re-enters it on
1578    /// every comment yield, so resetting here would clobber directives parsed
1579    /// on earlier re-entries for the same document.
1580    fn consume_preamble_between_docs(&mut self) -> Result<(), Error> {
1581        loop {
1582            // Skip blank lines first.
1583            self.lexer.skip_blank_lines_between_docs();
1584
1585            // Collect comment lines.
1586            while self.lexer.is_comment_line() {
1587                match self.lexer.try_consume_comment(MAX_COMMENT_LEN) {
1588                    Ok(Some((text, span))) => {
1589                        self.queue.push_back((Event::Comment { text }, span));
1590                    }
1591                    Ok(None) => break,
1592                    Err(e) => return Err(e),
1593                }
1594                self.lexer.skip_blank_lines_between_docs();
1595            }
1596
1597            // Parse directive lines.
1598            while self.lexer.is_directive_line() {
1599                let Some((content, dir_pos)) = self.lexer.try_consume_directive_line() else {
1600                    break;
1601                };
1602                self.parse_directive(content, dir_pos)?;
1603                self.lexer.skip_blank_lines_between_docs();
1604            }
1605
1606            // After parsing directives, there may be more blank lines or comments.
1607            if !self.lexer.is_comment_line() && !self.lexer.is_directive_line() {
1608                return Ok(());
1609            }
1610        }
1611    }
1612
1613    /// Parse a single directive line and update `self.directive_scope`.
1614    ///
1615    /// `content` is the full line content starting with `%` (e.g. `"%YAML 1.2"`).
1616    /// `dir_pos` is the position of the `%` character.
1617    fn parse_directive(&mut self, content: &'input str, dir_pos: Pos) -> Result<(), Error> {
1618        // Enforce per-document directive count limit.
1619        if self.directive_scope.directive_count >= MAX_DIRECTIVES_PER_DOC {
1620            return Err(Error {
1621                pos: dir_pos,
1622                message: format!(
1623                    "directive count exceeds maximum of {MAX_DIRECTIVES_PER_DOC} per document"
1624                ),
1625            });
1626        }
1627
1628        // `content` starts with `%`; the rest is `NAME[ params...]`.
1629        let after_percent = &content[1..];
1630
1631        // Determine directive name (up to first whitespace).
1632        let name_end = after_percent
1633            .find([' ', '\t'])
1634            .unwrap_or(after_percent.len());
1635        let name = &after_percent[..name_end];
1636        let rest = after_percent[name_end..].trim_start_matches([' ', '\t']);
1637
1638        match name {
1639            "YAML" => self.parse_yaml_directive(rest, dir_pos),
1640            "TAG" => self.parse_tag_directive(rest, dir_pos),
1641            _ => {
1642                // Reserved directive — silently ignore per YAML 1.2 spec.
1643                self.directive_scope.directive_count += 1;
1644                Ok(())
1645            }
1646        }
1647    }
1648
1649    /// Parse `%YAML major.minor` and store in directive scope.
1650    fn parse_yaml_directive(&mut self, params: &str, dir_pos: Pos) -> Result<(), Error> {
1651        if self.directive_scope.version.is_some() {
1652            return Err(Error {
1653                pos: dir_pos,
1654                message: "duplicate %YAML directive in the same document".into(),
1655            });
1656        }
1657
1658        // Parse `major.minor`.
1659        let dot = params.find('.').ok_or_else(|| Error {
1660            pos: dir_pos,
1661            message: format!("malformed %YAML directive: expected 'major.minor', got {params:?}"),
1662        })?;
1663        let major_str = &params[..dot];
1664        let after_dot = &params[dot + 1..];
1665        // Minor version ends at first whitespace or end of string.
1666        let minor_end = after_dot.find([' ', '\t']).unwrap_or(after_dot.len());
1667        let minor_str = &after_dot[..minor_end];
1668        // Anything after the minor version must be empty or a comment (# ...).
1669        let trailing = after_dot[minor_end..].trim_start_matches([' ', '\t']);
1670        if !trailing.is_empty() && !trailing.starts_with('#') {
1671            return Err(Error {
1672                pos: dir_pos,
1673                message: format!(
1674                    "malformed %YAML directive: unexpected trailing content {trailing:?}"
1675                ),
1676            });
1677        }
1678
1679        let major = major_str.parse::<u8>().map_err(|_| Error {
1680            pos: dir_pos,
1681            message: format!("malformed %YAML major version: {major_str:?}"),
1682        })?;
1683        let minor = minor_str.parse::<u8>().map_err(|_| Error {
1684            pos: dir_pos,
1685            message: format!("malformed %YAML minor version: {minor_str:?}"),
1686        })?;
1687
1688        // Only major version 1 is accepted; 2+ is a hard error.
1689        if major != 1 {
1690            return Err(Error {
1691                pos: dir_pos,
1692                message: format!("unsupported YAML version {major}.{minor}: only 1.x is supported"),
1693            });
1694        }
1695
1696        self.directive_scope.version = Some((major, minor));
1697        self.directive_scope.directive_count += 1;
1698        Ok(())
1699    }
1700
1701    /// Parse `%TAG !handle! prefix` and store in directive scope.
1702    fn parse_tag_directive(&mut self, params: &'input str, dir_pos: Pos) -> Result<(), Error> {
1703        // Split on whitespace to get handle and prefix.
1704        let handle_end = params.find([' ', '\t']).ok_or_else(|| Error {
1705            pos: dir_pos,
1706            message: format!("malformed %TAG directive: expected 'handle prefix', got {params:?}"),
1707        })?;
1708        let handle = &params[..handle_end];
1709        let prefix = params[handle_end..].trim_start_matches([' ', '\t']);
1710
1711        if prefix.is_empty() {
1712            return Err(Error {
1713                pos: dir_pos,
1714                message: "malformed %TAG directive: missing prefix".into(),
1715            });
1716        }
1717
1718        // Validate handle shape: must be `!`, `!!`, or `!<word-chars>!`
1719        // where word chars are ASCII alphanumeric, `-`, or `_`
1720        // (YAML 1.2 §6.8.1 productions [89]–[92]).
1721        if !is_valid_tag_handle(handle) {
1722            return Err(Error {
1723                pos: dir_pos,
1724                message: format!("malformed %TAG handle: {handle:?} is not a valid tag handle"),
1725            });
1726        }
1727
1728        // Validate handle length.
1729        if handle.len() > MAX_TAG_HANDLE_BYTES {
1730            return Err(Error {
1731                pos: dir_pos,
1732                message: format!(
1733                    "tag handle exceeds maximum length of {MAX_TAG_HANDLE_BYTES} bytes"
1734                ),
1735            });
1736        }
1737
1738        // Validate prefix length.
1739        if prefix.len() > MAX_TAG_LEN {
1740            return Err(Error {
1741                pos: dir_pos,
1742                message: format!("tag prefix exceeds maximum length of {MAX_TAG_LEN} bytes"),
1743            });
1744        }
1745
1746        // Reject control characters in prefix.
1747        for ch in prefix.chars() {
1748            if (ch as u32) < 0x20 || ch == '\x7F' {
1749                return Err(Error {
1750                    pos: dir_pos,
1751                    message: format!("tag prefix contains invalid control character {ch:?}"),
1752                });
1753            }
1754        }
1755
1756        // Duplicate handle check.
1757        if self.directive_scope.tag_handles.contains_key(handle) {
1758            return Err(Error {
1759                pos: dir_pos,
1760                message: format!("duplicate %TAG directive for handle {handle:?}"),
1761            });
1762        }
1763
1764        self.directive_scope
1765            .tag_handles
1766            .insert(handle.to_owned(), prefix.to_owned());
1767        self.directive_scope.directive_count += 1;
1768        Ok(())
1769    }
1770
1771    /// Skip blank lines while collecting any comment lines encountered as
1772    /// `Event::Comment` items pushed to `self.queue`.
1773    ///
1774    /// Used in `InDocument` context.
1775    /// Returns `Err` if a comment body exceeds `MAX_COMMENT_LEN`.
1776    fn skip_and_collect_comments_in_doc(&mut self) -> Result<(), Error> {
1777        loop {
1778            // Skip truly blank lines (not comments).
1779            self.lexer.skip_empty_lines();
1780            // Collect any comment lines.
1781            if !self.lexer.is_comment_line() {
1782                return Ok(());
1783            }
1784            while self.lexer.is_comment_line() {
1785                match self.lexer.try_consume_comment(MAX_COMMENT_LEN) {
1786                    Ok(Some((text, span))) => {
1787                        self.queue.push_back((Event::Comment { text }, span));
1788                    }
1789                    Ok(None) => break,
1790                    Err(e) => return Err(e),
1791                }
1792            }
1793            // Loop to skip any blank lines that follow the comments.
1794        }
1795    }
1796
1797    /// Handle one iteration step in the `BetweenDocs` state.
1798    fn step_between_docs(&mut self) -> StepResult<'input> {
1799        match self.consume_preamble_between_docs() {
1800            Ok(()) => {}
1801            Err(e) => {
1802                self.failed = true;
1803                return StepResult::Yield(Err(e));
1804            }
1805        }
1806        // If comments were queued, drain them before checking document state.
1807        if !self.queue.is_empty() {
1808            return StepResult::Continue;
1809        }
1810
1811        if self.lexer.at_eof() {
1812            // Per YAML 1.2 §9.2, directives require a `---` marker.
1813            // A directive followed by EOF (no `---`) is a spec violation.
1814            if self.directive_scope.directive_count > 0 {
1815                let pos = self.lexer.current_pos();
1816                self.failed = true;
1817                return StepResult::Yield(Err(Error {
1818                    pos,
1819                    message: "directives must be followed by a '---' document-start marker".into(),
1820                }));
1821            }
1822            let end = self.lexer.current_pos();
1823            self.state = IterState::Done;
1824            return StepResult::Yield(Ok((Event::StreamEnd, zero_span(end))));
1825        }
1826        if self.lexer.is_directives_end() {
1827            let (marker_pos, _) = self.lexer.consume_marker_line(false);
1828            if let Some(e) = self.lexer.marker_inline_error.take() {
1829                self.failed = true;
1830                return StepResult::Yield(Err(e));
1831            }
1832            self.state = IterState::InDocument;
1833            self.root_node_emitted = false;
1834            // Take the accumulated directives — scope stays active for document body tag resolution.
1835            let version = self.directive_scope.version;
1836            let tag_directives = self.directive_scope.tag_directives();
1837            self.queue.push_back((
1838                Event::DocumentStart {
1839                    explicit: true,
1840                    version,
1841                    tag_directives,
1842                },
1843                marker_span(marker_pos),
1844            ));
1845            self.drain_trailing_comment();
1846            return StepResult::Continue;
1847        }
1848        if self.lexer.is_document_end() {
1849            // Orphan `...` — if directives were parsed without a `---` marker,
1850            // that is a spec violation (YAML 1.2 §9.2: directives require `---`).
1851            if self.directive_scope.directive_count > 0 {
1852                let pos = self.lexer.current_pos();
1853                self.failed = true;
1854                return StepResult::Yield(Err(Error {
1855                    pos,
1856                    message: "directives must be followed by a '---' document-start marker".into(),
1857                }));
1858            }
1859            self.lexer.consume_marker_line(true);
1860            if let Some(e) = self.lexer.marker_inline_error.take() {
1861                self.failed = true;
1862                return StepResult::Yield(Err(e));
1863            }
1864            return StepResult::Continue; // orphan `...`, no event
1865        }
1866        // Per YAML 1.2 §9.2, directives require a `---` marker.  If the next
1867        // line is not `---` and we have already parsed directives, that is a
1868        // spec violation — reject before emitting an implicit DocumentStart.
1869        if self.directive_scope.directive_count > 0 {
1870            let pos = self.lexer.current_pos();
1871            self.failed = true;
1872            return StepResult::Yield(Err(Error {
1873                pos,
1874                message: "directives must be followed by a '---' document-start marker".into(),
1875            }));
1876        }
1877        debug_assert!(
1878            self.lexer.has_content(),
1879            "expected content after consuming blank/comment/directive lines"
1880        );
1881        let content_pos = self.lexer.current_pos();
1882        self.state = IterState::InDocument;
1883        self.root_node_emitted = false;
1884        // Take the accumulated directives — scope stays active for document body tag resolution.
1885        let version = self.directive_scope.version;
1886        let tag_directives = self.directive_scope.tag_directives();
1887        StepResult::Yield(Ok((
1888            Event::DocumentStart {
1889                explicit: false,
1890                version,
1891                tag_directives,
1892            },
1893            zero_span(content_pos),
1894        )))
1895    }
1896
1897    /// Handle one iteration step in the `InDocument` state.
1898    #[allow(clippy::too_many_lines)]
1899    fn step_in_document(&mut self) -> StepResult<'input> {
1900        match self.skip_and_collect_comments_in_doc() {
1901            Ok(()) => {}
1902            Err(e) => {
1903                self.failed = true;
1904                return StepResult::Yield(Err(e));
1905            }
1906        }
1907        // If comments were queued, drain them before checking document state.
1908        if !self.queue.is_empty() {
1909            return StepResult::Continue;
1910        }
1911
1912        // ---- Tab indentation check ----
1913        //
1914        // YAML 1.2 §6.1: tabs cannot be used for indentation in block context.
1915        // Only lines whose VERY FIRST character is `\t` (no leading spaces) are
1916        // using a tab as the indentation character and must be rejected.
1917        //
1918        // Exceptions: `\t[`, `\t{`, `\t]`, `\t}` are allowed because flow
1919        // collection delimiters can follow tabs (YAML test suite 6CA3, Q5MG).
1920        // Lines like `  \tx` have SPACES as indentation; the tab is content.
1921        if let Some(line) = self.lexer.peek_next_line() {
1922            if line.content.starts_with('\t') {
1923                // First char is a tab — check what the first non-tab character
1924                // is.  Flow collection delimiters are allowed after leading tabs.
1925                let first_non_tab = line.content.trim_start_matches('\t').chars().next();
1926                if !matches!(first_non_tab, Some('[' | '{' | ']' | '}')) {
1927                    let err_pos = line.pos;
1928                    self.failed = true;
1929                    self.lexer.consume_line();
1930                    return StepResult::Yield(Err(Error {
1931                        pos: err_pos,
1932                        message: "tabs are not allowed as indentation (YAML 1.2 §6.1)".into(),
1933                    }));
1934                }
1935            }
1936        }
1937
1938        // ---- Document / stream boundaries ----
1939
1940        if self.lexer.at_eof() && !self.lexer.has_inline_scalar() {
1941            let end = self.lexer.drain_to_end();
1942            self.close_all_collections(end);
1943            self.queue
1944                .push_back((Event::DocumentEnd { explicit: false }, zero_span(end)));
1945            self.queue.push_back((Event::StreamEnd, zero_span(end)));
1946            self.state = IterState::Done;
1947            return StepResult::Continue;
1948        }
1949        if self.lexer.is_document_end() {
1950            let pos = self.lexer.current_pos();
1951            self.close_all_collections(pos);
1952            let (marker_pos, _) = self.lexer.consume_marker_line(true);
1953            if let Some(e) = self.lexer.marker_inline_error.take() {
1954                self.failed = true;
1955                return StepResult::Yield(Err(e));
1956            }
1957            // Reset directive scope at the document boundary so directives from
1958            // this document do not leak into the next one.
1959            self.directive_scope = DirectiveScope::default();
1960            self.state = IterState::BetweenDocs;
1961            self.queue.push_back((
1962                Event::DocumentEnd { explicit: true },
1963                marker_span(marker_pos),
1964            ));
1965            self.drain_trailing_comment();
1966            return StepResult::Continue;
1967        }
1968        if self.lexer.is_directives_end() {
1969            let pos = self.lexer.current_pos();
1970            self.close_all_collections(pos);
1971            let (marker_pos, _) = self.lexer.consume_marker_line(false);
1972            if let Some(e) = self.lexer.marker_inline_error.take() {
1973                self.failed = true;
1974                return StepResult::Yield(Err(e));
1975            }
1976            // A bare `---` inside a document implicitly ends the current document
1977            // and starts a new one without a preamble.  Reset the directive scope
1978            // here since consume_preamble_between_docs will not be called for this
1979            // transition.
1980            self.directive_scope = DirectiveScope::default();
1981            // Validate any inline tag on this `---` line against the new
1982            // document's (empty) directive scope.  Tags defined in the previous
1983            // document do not carry over (YAML §9.2), so an undefined handle
1984            // must fail immediately.
1985            if let Some((tag_val, tag_pos)) = self.lexer.peek_inline_scalar() {
1986                if tag_val.starts_with('!') {
1987                    if let Err(e) = self.directive_scope.resolve_tag(tag_val, tag_pos) {
1988                        self.lexer.drain_inline_scalar();
1989                        self.failed = true;
1990                        return StepResult::Yield(Err(e));
1991                    }
1992                }
1993            }
1994            self.state = IterState::InDocument;
1995            self.root_node_emitted = false;
1996            self.queue.push_back((
1997                Event::DocumentEnd { explicit: false },
1998                zero_span(marker_pos),
1999            ));
2000            self.queue.push_back((
2001                Event::DocumentStart {
2002                    explicit: true,
2003                    version: None,
2004                    tag_directives: Vec::new(),
2005                },
2006                marker_span(marker_pos),
2007            ));
2008            self.drain_trailing_comment();
2009            return StepResult::Continue;
2010        }
2011
2012        // ---- Directive lines (`%YAML`/`%TAG`) inside document body ----
2013        //
2014        // YAML 1.2 §9.2: directives can only appear in the preamble (before
2015        // `---`).  A `%YAML` or `%TAG` line inside a document body, followed
2016        // by `---`, indicates the author forgot to close the previous document
2017        // with `...` before writing the next document's preamble.
2018        //
2019        // We only fire the error when:
2020        //   1. The current line starts with `%YAML ` or `%TAG ` (a genuine
2021        //      YAML directive keyword, not arbitrary content like `%!PS-Adobe`).
2022        //   2. The following line is a `---` document-start marker.
2023        //
2024        // This avoids false positives when `%` appears as content in plain
2025        // scalars (XLQ9) or inside block scalar bodies (M7A3, W4TN).
2026        if let Some(line) = self.lexer.peek_next_line() {
2027            let is_yaml_directive =
2028                line.content.starts_with("%YAML ") || line.content.starts_with("%TAG ");
2029            if is_yaml_directive {
2030                let next_is_doc_start = self.lexer.peek_second_line().is_some_and(|l| {
2031                    l.content == "---"
2032                        || l.content.starts_with("--- ")
2033                        || l.content.starts_with("---\t")
2034                });
2035                if next_is_doc_start {
2036                    let err_pos = line.pos;
2037                    self.failed = true;
2038                    self.lexer.consume_line();
2039                    return StepResult::Yield(Err(Error {
2040                        pos: err_pos,
2041                        message:
2042                            "directive '%' is only valid before the document-start marker '---'"
2043                                .into(),
2044                    }));
2045                }
2046            }
2047        }
2048
2049        // ---- Root-node guard ----
2050        //
2051        // A YAML document contains exactly one root node.  Once the root has
2052        // been fully emitted (`root_node_emitted = true`) and the collection
2053        // stack is empty, any further non-comment, non-blank content is invalid.
2054        if self.root_node_emitted && self.coll_stack.is_empty() && !self.lexer.has_inline_scalar() {
2055            if let Some(line) = self.lexer.peek_next_line() {
2056                let err_pos = line.pos;
2057                self.failed = true;
2058                self.lexer.consume_line();
2059                return StepResult::Yield(Err(Error {
2060                    pos: err_pos,
2061                    message: "unexpected content after document root node".into(),
2062                }));
2063            }
2064        }
2065
2066        // ---- Alias node: `*name` is a complete node ----
2067
2068        if let Some(peek) = self.lexer.peek_next_line() {
2069            let content: &'input str = peek.content;
2070            let line_pos = peek.pos;
2071            let line_break_type = peek.break_type;
2072            let line_char_offset = line_pos.char_offset;
2073            let trimmed = content.trim_start_matches(' ');
2074            if let Some(after_star) = trimmed.strip_prefix('*') {
2075                let leading = content.len() - trimmed.len();
2076                let star_pos = Pos {
2077                    byte_offset: line_pos.byte_offset + leading,
2078                    char_offset: line_char_offset + leading,
2079                    line: line_pos.line,
2080                    column: line_pos.column + leading,
2081                };
2082                // YAML 1.2 §7.1: alias nodes cannot have properties (anchor or tag).
2083                if self.pending_tag.is_some() {
2084                    self.failed = true;
2085                    return StepResult::Yield(Err(Error {
2086                        pos: star_pos,
2087                        message: "alias node cannot have a tag property".into(),
2088                    }));
2089                }
2090                // An anchor is only a property of the alias if it's item-level
2091                // (pending_anchor_for_collection=false).  A collection-level anchor
2092                // (pending_anchor_for_collection=true) belongs to the surrounding
2093                // collection, not the alias node.
2094                if self.pending_anchor.is_some() && !self.pending_anchor_for_collection {
2095                    self.failed = true;
2096                    return StepResult::Yield(Err(Error {
2097                        pos: star_pos,
2098                        message: "alias node cannot have an anchor property".into(),
2099                    }));
2100                }
2101                match scan_anchor_name(after_star, star_pos) {
2102                    Err(e) => {
2103                        self.failed = true;
2104                        return StepResult::Yield(Err(e));
2105                    }
2106                    Ok(name) => {
2107                        let name_char_count = name.chars().count();
2108                        // Build alias span: from `*` through end of name.
2109                        let alias_end = Pos {
2110                            byte_offset: star_pos.byte_offset + 1 + name.len(),
2111                            char_offset: star_pos.char_offset + 1 + name_char_count,
2112                            line: star_pos.line,
2113                            column: star_pos.column + 1 + name_char_count,
2114                        };
2115                        let alias_span = Span {
2116                            start: star_pos,
2117                            end: alias_end,
2118                        };
2119                        // Compute remaining content after the alias name, before
2120                        // consuming the line (which would invalidate the borrow).
2121                        let after_name = &after_star[name.len()..];
2122                        let remaining: &'input str = after_name.trim_start_matches([' ', '\t']);
2123                        let spaces = after_name.len() - remaining.len();
2124                        let had_remaining = !remaining.is_empty();
2125                        let rem_byte_offset = star_pos.byte_offset + 1 + name.len() + spaces;
2126                        let rem_char_offset = line_char_offset + leading + 1 + name.len() + spaces;
2127                        let rem_col = star_pos.column + 1 + name_char_count + spaces;
2128                        self.lexer.consume_line();
2129                        if had_remaining {
2130                            let rem_pos = Pos {
2131                                byte_offset: rem_byte_offset,
2132                                char_offset: rem_char_offset,
2133                                line: star_pos.line,
2134                                column: rem_col,
2135                            };
2136                            let synthetic = crate::lines::Line {
2137                                content: remaining,
2138                                offset: rem_byte_offset,
2139                                indent: rem_col,
2140                                break_type: line_break_type,
2141                                pos: rem_pos,
2142                            };
2143                            self.lexer.prepend_inline_line(synthetic);
2144                        }
2145                        self.tick_mapping_phase_after_scalar();
2146                        return StepResult::Yield(Ok((Event::Alias { name }, alias_span)));
2147                    }
2148                }
2149            }
2150        }
2151
2152        // ---- Tag: `!tag`, `!!tag`, `!<uri>`, or `!` — attach to next node ----
2153
2154        if let Some(peek) = self.lexer.peek_next_line() {
2155            let content: &'input str = peek.content;
2156            let line_pos = peek.pos;
2157            let line_indent = peek.indent;
2158            let line_break_type = peek.break_type;
2159            let trimmed = content.trim_start_matches(' ');
2160            if trimmed.starts_with('!') {
2161                let leading = content.len() - trimmed.len();
2162                let bang_pos = Pos {
2163                    byte_offset: line_pos.byte_offset + leading,
2164                    char_offset: line_pos.char_offset + leading,
2165                    line: line_pos.line,
2166                    column: line_pos.column + leading,
2167                };
2168                // `tag_start` starts at the `!`; `after_bang` is everything after it.
2169                let tag_start: &'input str = &content[leading..];
2170                let after_bang: &'input str = &content[leading + 1..];
2171                match scan_tag(after_bang, tag_start, bang_pos) {
2172                    Err(e) => {
2173                        self.failed = true;
2174                        return StepResult::Yield(Err(e));
2175                    }
2176                    Ok((tag_slice, advance_past_bang)) => {
2177                        // Total bytes consumed for the tag token: 1 (`!`) + advance.
2178                        let tag_token_bytes = 1 + advance_past_bang;
2179                        let after_tag = &trimmed[tag_token_bytes..];
2180                        let inline: &'input str = after_tag.trim_start_matches([' ', '\t']);
2181                        let spaces = after_tag.len() - inline.len();
2182                        let had_inline = !inline.is_empty();
2183                        // YAML 1.2 §6.8.1: a tag property must be separated from
2184                        // the following node content by `s-separate` when the first
2185                        // character after the tag could be confused with a tag
2186                        // continuation or creates structural ambiguity:
2187                        // - `!` starts another tag property
2188                        // - flow indicators (`,`, `[`, `]`, `{`, `}`) cause
2189                        //   structural confusion (e.g. `!!str,`)
2190                        // - `%` may be a valid percent-encoded continuation that
2191                        //   should have been part of the tag, or an invalid
2192                        //   percent-sequence that makes the input unparseable
2193                        // When the tag scanner stopped at a plain non-tag char like
2194                        // `<`, the tag ended naturally and the content is the value
2195                        // (e.g. `!foo<bar val` → tag=`!foo`, scalar=`<bar val`).
2196                        if had_inline && spaces == 0 {
2197                            let first = inline.chars().next().unwrap_or('\0');
2198                            if first == '!'
2199                                || first == '%'
2200                                || matches!(first, ',' | '[' | ']' | '{' | '}')
2201                            {
2202                                self.failed = true;
2203                                return StepResult::Yield(Err(Error {
2204                                    pos: bang_pos,
2205                                    message:
2206                                        "tag must be separated from node content by whitespace"
2207                                            .into(),
2208                                }));
2209                            }
2210                        }
2211                        let inline_offset =
2212                            line_pos.byte_offset + leading + tag_token_bytes + spaces;
2213                        let inline_char_offset =
2214                            line_pos.char_offset + leading + tag_token_bytes + spaces;
2215                        let inline_col = line_pos.column + leading + tag_token_bytes + spaces;
2216                        // Duplicate tags on the same node are an error.
2217                        // Exception: if the existing tag is collection-level
2218                        // (pending_tag_for_collection=true) and the new tag has
2219                        // inline content that is (or contains) a mapping key line,
2220                        // they apply to different nodes (collection vs. key scalar).
2221                        if self.pending_tag.is_some() {
2222                            let is_different_node = self.pending_tag_for_collection
2223                                && had_inline
2224                                && inline_contains_mapping_key(inline);
2225                            if !is_different_node {
2226                                self.failed = true;
2227                                return StepResult::Yield(Err(Error {
2228                                    pos: bang_pos,
2229                                    message: "a node may not have more than one tag".into(),
2230                                }));
2231                            }
2232                        }
2233                        // Resolve tag handle against directive scope at scan time.
2234                        let resolved_tag =
2235                            match self.directive_scope.resolve_tag(tag_slice, bang_pos) {
2236                                Ok(t) => t,
2237                                Err(e) => {
2238                                    self.failed = true;
2239                                    return StepResult::Yield(Err(e));
2240                                }
2241                            };
2242                        self.pending_tag = Some(resolved_tag);
2243                        self.lexer.consume_line();
2244                        if had_inline {
2245                            self.pending_tag_for_collection = false;
2246                            // Record the original physical line's indent so that
2247                            // handle_mapping_entry can open the mapping at the correct
2248                            // indent when the key is on a synthetic (offset) line.
2249                            // Only set when the inline content is (or leads to) a
2250                            // mapping key — if it's a plain value, there is no
2251                            // handle_mapping_entry call to consume this, and leaving
2252                            // it set would corrupt the next unrelated mapping entry.
2253                            if self.property_origin_indent.is_none()
2254                                && inline_contains_mapping_key(inline)
2255                            {
2256                                self.property_origin_indent = Some(line_indent);
2257                            }
2258                            let inline_pos = Pos {
2259                                byte_offset: inline_offset,
2260                                char_offset: inline_char_offset,
2261                                line: line_pos.line,
2262                                column: inline_col,
2263                            };
2264                            let synthetic = crate::lines::Line {
2265                                content: inline,
2266                                offset: inline_offset,
2267                                indent: inline_col,
2268                                break_type: line_break_type,
2269                                pos: inline_pos,
2270                            };
2271                            self.lexer.prepend_inline_line(synthetic);
2272                        } else {
2273                            // Standalone tag line — applies to whatever node comes next.
2274                            // Validate: the tag must be indented enough for this context.
2275                            let min = self.min_standalone_property_indent();
2276                            if line_indent < min {
2277                                self.pending_tag = None;
2278                                self.failed = true;
2279                                return StepResult::Yield(Err(Error {
2280                                    pos: bang_pos,
2281                                    message:
2282                                        "node property is not indented enough for this context"
2283                                            .into(),
2284                                }));
2285                            }
2286                            self.pending_tag_for_collection = true;
2287                        }
2288                        return StepResult::Continue;
2289                    }
2290                }
2291            }
2292        }
2293
2294        // ---- Anchor: `&name` — attach to the next node ----
2295
2296        if let Some(peek) = self.lexer.peek_next_line() {
2297            let content: &'input str = peek.content;
2298            let line_pos = peek.pos;
2299            let line_indent = peek.indent;
2300            let line_break_type = peek.break_type;
2301            let trimmed = content.trim_start_matches(' ');
2302            if let Some(after_amp) = trimmed.strip_prefix('&') {
2303                // We only look for `&` at the start of the trimmed line.
2304                // Tags (`!`) before `&` are handled in Task 17.
2305                //
2306                // IMPORTANT for Task 17: when implementing tag-skip, the skip
2307                // logic must consume the *full* tag token (all `ns-anchor-char`
2308                // bytes after `!`), not just the `!` character alone.  The `!`
2309                // character is itself a valid `ns-anchor-char`, so skipping
2310                // only `!` and then re-entering anchor detection would silently
2311                // include the tag body in the anchor name.  Example: `!tag &a`
2312                // — skip must advance past `tag` before looking for `&a`.
2313                let leading = content.len() - trimmed.len();
2314                let amp_pos = Pos {
2315                    byte_offset: line_pos.byte_offset + leading,
2316                    char_offset: line_pos.char_offset + leading,
2317                    line: line_pos.line,
2318                    column: line_pos.column + leading,
2319                };
2320                match scan_anchor_name(after_amp, amp_pos) {
2321                    Err(e) => {
2322                        self.failed = true;
2323                        return StepResult::Yield(Err(e));
2324                    }
2325                    Ok(name) => {
2326                        // Determine what follows the anchor name on this line,
2327                        // before consuming the line (borrow ends here).
2328                        let after_name = &after_amp[name.len()..];
2329                        let inline: &'input str = after_name.trim_start_matches([' ', '\t']);
2330                        let spaces = after_name.len() - inline.len();
2331                        let had_inline = !inline.is_empty();
2332                        let inline_offset =
2333                            line_pos.byte_offset + leading + 1 + name.len() + spaces;
2334                        let inline_char_offset =
2335                            line_pos.char_offset + leading + 1 + name.len() + spaces;
2336                        let inline_col = line_pos.column + leading + 1 + name.len() + spaces;
2337                        // Duplicate anchors on the same node are an error.
2338                        //
2339                        // Case 1: existing anchor is item-level (pending_anchor_for_collection=false)
2340                        // and no collection tag is pending — both this and the existing anchor
2341                        // are for the same item-level node.
2342                        //
2343                        // Case 2: existing anchor is collection-level (pending_anchor_for_collection=true)
2344                        // and the new anchor has inline content that is NOT a collection opener
2345                        // ([, {) or property (!, &) — both anchors apply to the same scalar node.
2346                        let amp_pos2 = amp_pos;
2347                        let is_duplicate = if self.pending_anchor.is_some()
2348                            && !self.pending_anchor_for_collection
2349                            && !self.pending_tag_for_collection
2350                        {
2351                            true
2352                        } else if self.pending_anchor.is_some()
2353                            && self.pending_anchor_for_collection
2354                            && had_inline
2355                            && !self.pending_tag_for_collection
2356                        {
2357                            // The existing anchor is collection-level, but the new anchor
2358                            // has inline content.  If that content is a mapping key line
2359                            // (contains `: ` etc.), the new anchor is for the key and the
2360                            // existing anchor is for the mapping — different nodes, no error.
2361                            // If the inline is a plain scalar (no key indicator), both
2362                            // anchors apply to the same scalar node — error.
2363                            let first_ch = inline.chars().next();
2364                            // If inline starts with a collection/property opener, treat as
2365                            // different node — no error.
2366                            let starts_with_opener =
2367                                matches!(first_ch, Some('[' | '{' | '!' | '&' | '*' | '|' | '>'));
2368                            // If inline contains a mapping key indicator (`: `), the new
2369                            // anchor is for a key — different node from the collection.
2370                            let is_mapping_key = find_value_indicator_offset(inline).is_some();
2371                            !starts_with_opener && !is_mapping_key
2372                        } else {
2373                            false
2374                        };
2375                        if is_duplicate {
2376                            self.failed = true;
2377                            return StepResult::Yield(Err(Error {
2378                                pos: amp_pos2,
2379                                message: "a node may not have more than one anchor".into(),
2380                            }));
2381                        }
2382                        self.pending_anchor = Some(name);
2383                        self.lexer.consume_line();
2384                        if had_inline {
2385                            // Detect illegal inline block sequence: `&anchor - item`
2386                            // is invalid — a block sequence indicator cannot appear
2387                            // inline after an anchor property in block context.
2388                            let is_seq = inline.strip_prefix('-').is_some_and(|rest| {
2389                                rest.is_empty() || rest.starts_with(' ') || rest.starts_with('\t')
2390                            });
2391                            if is_seq {
2392                                self.pending_anchor = None;
2393                                self.failed = true;
2394                                let seq_pos = Pos {
2395                                    byte_offset: inline_offset,
2396                                    char_offset: inline_char_offset,
2397                                    line: line_pos.line,
2398                                    column: inline_col,
2399                                };
2400                                return StepResult::Yield(Err(Error {
2401                                    pos: seq_pos,
2402                                    message:
2403                                        "block sequence indicator cannot appear inline after a node property"
2404                                            .into(),
2405                                }));
2406                            }
2407                            // Inline content after anchor — anchor applies to the
2408                            // inline node (scalar or key), not to any enclosing
2409                            // collection opened on this same line.
2410                            self.pending_anchor_for_collection = false;
2411                            // Record the original physical line's indent so that
2412                            // handle_mapping_entry can open the mapping at the correct
2413                            // indent when the key is on a synthetic (offset) line.
2414                            // Only set when the inline content leads to a mapping key;
2415                            // value-context anchors must not corrupt the next entry.
2416                            if self.property_origin_indent.is_none()
2417                                && inline_contains_mapping_key(inline)
2418                            {
2419                                self.property_origin_indent = Some(line_indent);
2420                            }
2421                            let inline_pos = Pos {
2422                                byte_offset: inline_offset,
2423                                char_offset: inline_char_offset,
2424                                line: line_pos.line,
2425                                column: inline_col,
2426                            };
2427                            let synthetic = crate::lines::Line {
2428                                content: inline,
2429                                offset: inline_offset,
2430                                indent: inline_col,
2431                                break_type: line_break_type,
2432                                pos: inline_pos,
2433                            };
2434                            self.lexer.prepend_inline_line(synthetic);
2435                        } else {
2436                            // Standalone anchor line — anchor applies to whatever
2437                            // node comes next (collection or scalar).
2438                            // Validate: the anchor must be indented enough for this context.
2439                            let min = self.min_standalone_property_indent();
2440                            if line_indent < min {
2441                                self.pending_anchor = None;
2442                                self.failed = true;
2443                                let err_pos = amp_pos;
2444                                return StepResult::Yield(Err(Error {
2445                                    pos: err_pos,
2446                                    message:
2447                                        "node property is not indented enough for this context"
2448                                            .into(),
2449                                }));
2450                            }
2451                            self.pending_anchor_for_collection = true;
2452                        }
2453                        // Let the next iteration handle whatever follows.
2454                        return StepResult::Continue;
2455                    }
2456                }
2457            }
2458        }
2459
2460        // ---- Flow collection detection: `[` or `{` starts a flow collection ----
2461        // Stray closing flow indicators (`]`, `}`) in block context are errors.
2462
2463        if let Some(line) = self.lexer.peek_next_line() {
2464            let trimmed = line.content.trim_start_matches(' ');
2465            if trimmed.starts_with('[') || trimmed.starts_with('{') {
2466                return self.handle_flow_collection();
2467            }
2468            if trimmed.starts_with(']') || trimmed.starts_with('}') {
2469                let err_pos = line.pos;
2470                let ch = trimmed.chars().next().unwrap_or(']');
2471                self.failed = true;
2472                self.lexer.consume_line();
2473                return StepResult::Yield(Err(Error {
2474                    pos: err_pos,
2475                    message: format!("unexpected '{ch}' outside flow collection"),
2476                }));
2477            }
2478        }
2479
2480        // ---- Block sequence / mapping entry detection ----
2481
2482        if let Some((dash_indent, dash_pos)) = self.peek_sequence_entry() {
2483            return self.handle_sequence_entry(dash_indent, dash_pos);
2484        }
2485        if let Some((key_indent, key_pos)) = self.peek_mapping_entry() {
2486            return self.handle_mapping_entry(key_indent, key_pos);
2487        }
2488
2489        // ---- Dedent: close collections more deeply nested than the current line ----
2490
2491        if let Some(line) = self.lexer.peek_next_line() {
2492            let line_indent = line.indent;
2493            let close_pos = self.lexer.current_pos();
2494            // Record the minimum indent across all open collections before
2495            // closing. A root collection has indent 0. If the minimum indent
2496            // before closure was 0 and the stack empties, the root node is
2497            // complete. When a tag-inline mapping opens at a column > 0 (a
2498            // pre-existing indent-tracking limitation), closing it must not
2499            // prematurely mark the root as emitted.
2500            let min_indent_before = self.coll_stack.iter().map(|e| e.indent()).min();
2501            self.close_collections_at_or_above(line_indent.saturating_add(1), close_pos);
2502            // If closing collections emptied the stack, the root node is
2503            // complete — but only if the outermost collection was at indent 0
2504            // (a true root collection, not a spuriously-indented inline tag).
2505            if self.coll_stack.is_empty() && !self.queue.is_empty() && min_indent_before == Some(0)
2506            {
2507                self.root_node_emitted = true;
2508            }
2509            if !self.queue.is_empty() {
2510                return StepResult::Continue;
2511            }
2512        }
2513
2514        // ---- Block structure validity checks ----
2515        //
2516        // After closing deeper collections and before consuming a scalar,
2517        // validate that the current line's indentation is consistent with
2518        // the innermost open block collection.
2519        //
2520        // For block sequences: the only valid content at the sequence's own
2521        // indent level is `- ` (handled by peek_sequence_entry above).
2522        // Any other content at that indent level is invalid YAML.
2523        //
2524        // For block mappings in Key phase: the only valid content at the
2525        // mapping's indent level is a mapping entry (handled by
2526        // peek_mapping_entry above). A plain scalar without `: ` is not
2527        // a valid implicit mapping key.
2528        if let Some(line) = self.lexer.peek_next_line() {
2529            let line_indent = line.indent;
2530            match self.coll_stack.last() {
2531                Some(&CollectionEntry::Sequence(seq_indent, _)) if line_indent == seq_indent => {
2532                    // Content at the sequence indent level that is NOT `- ` is
2533                    // invalid. peek_sequence_entry already returned None, so this
2534                    // line is not a sequence entry.
2535                    let err_pos = line.pos;
2536                    self.failed = true;
2537                    self.lexer.consume_line();
2538                    return StepResult::Yield(Err(Error {
2539                        pos: err_pos,
2540                        message: "invalid content at block sequence indent level: expected '- '"
2541                            .into(),
2542                    }));
2543                }
2544                Some(&CollectionEntry::Mapping(map_indent, MappingPhase::Key, _))
2545                    if line_indent == map_indent =>
2546                {
2547                    let err_pos = line.pos;
2548                    self.failed = true;
2549                    self.lexer.consume_line();
2550                    return StepResult::Yield(Err(Error {
2551                        pos: err_pos,
2552                        message:
2553                            "invalid content at block mapping indent level: expected mapping key"
2554                                .into(),
2555                    }));
2556                }
2557                // Content more deeply indented than the mapping key level is only
2558                // valid as an explicit-key continuation (explicit_key_pending=true)
2559                // or as the very first key (has_had_value=false — the first key may
2560                // be at any indent >= map_indent).  After at least one key-value pair
2561                // has been processed (has_had_value=true) with no explicit-key pending,
2562                // deeper content that is not a valid mapping key is an error.
2563                Some(&CollectionEntry::Mapping(map_indent, MappingPhase::Key, true))
2564                    if line_indent > map_indent
2565                        && !self.explicit_key_pending
2566                        && !self.lexer.is_next_line_synthetic() =>
2567                {
2568                    let err_pos = line.pos;
2569                    self.failed = true;
2570                    self.lexer.consume_line();
2571                    return StepResult::Yield(Err(Error {
2572                        pos: err_pos,
2573                        message: "unexpected indented content after mapping value".into(),
2574                    }));
2575                }
2576                _ => {}
2577            }
2578        }
2579
2580        // ---- Scalars ----
2581
2582        // `block_parent_indent` — the indent of the enclosing block context;
2583        // block scalars (`|`, `>`) must have content lines more indented than
2584        // this value.  For a block scalar embedded as inline content after `? `
2585        // or `- `, the enclosing block's indent is the *collection's* indent,
2586        // not the column of the inline `|`/`>` token.
2587        //
2588        // `plain_parent_indent` — the enclosing block's indent level.
2589        // Plain scalar continuation lines must be indented strictly more than
2590        // `plain_parent_indent` (YAML 1.2), with a special exception for
2591        // tab-indented lines when `plain_parent_indent == 0` (the tab provides
2592        // the s-separate-in-line separator required by s-flow-folded(0)).
2593        // Use usize::MAX as a sentinel for "root level" — the root node has no
2594        // parent collection, so block scalar body lines may start at column 0
2595        // (equivalent to a parent indent of -1 in the YAML spec).
2596        let block_parent_indent = self.coll_stack.last().map_or(usize::MAX, |e| e.indent());
2597        let plain_parent_indent = self.coll_stack.last().map_or(0, |e| e.indent());
2598        // Capture whether an inline scalar (from `--- text`) was pending before
2599        // the scalar dispatch call.  If it was, the emitted plain scalar came
2600        // from the `---` marker line and is NOT necessarily the complete root
2601        // node — the lexer emits `--- >` / `--- |` / `--- "text` inline content
2602        // as a plain scalar, but the actual node body follows on subsequent
2603        // lines.  Marking root_node_emitted in those cases would incorrectly
2604        // reject the body lines as "content after root node".
2605        let had_inline_scalar = self.lexer.has_inline_scalar();
2606        match self.try_consume_scalar(plain_parent_indent, block_parent_indent) {
2607            Ok(Some(event)) => {
2608                self.tick_mapping_phase_after_scalar();
2609                // Drain any trailing comment detected on the scalar's line.
2610                self.drain_trailing_comment();
2611                // A scalar emitted at the document root (no open collection)
2612                // is the complete root node — unless it came from inline
2613                // content after `---` (had_inline_scalar), in which case the
2614                // body on subsequent lines is part of the same node.
2615                if self.coll_stack.is_empty() && !had_inline_scalar {
2616                    self.root_node_emitted = true;
2617                }
2618                return StepResult::Yield(Ok(event));
2619            }
2620            Err(e) => {
2621                self.failed = true;
2622                return StepResult::Yield(Err(e));
2623            }
2624            Ok(None) => {}
2625        }
2626
2627        // Check for invalid characters at the start of an unrecognised line.
2628        // A line that starts with a character that is neither whitespace nor a
2629        // valid YAML ns-char (e.g. NUL U+0000 or mid-stream BOM U+FEFF) is a
2630        // parse error.
2631        if let Some(line) = self.lexer.peek_next_line() {
2632            let first_ch = line.content.chars().next();
2633            if let Some(ch) = first_ch {
2634                if ch != ' ' && ch != '\t' && !crate::lexer::is_ns_char(ch) {
2635                    let err_pos = line.pos;
2636                    self.failed = true;
2637                    self.lexer.consume_line();
2638                    return StepResult::Yield(Err(Error {
2639                        pos: err_pos,
2640                        message: format!("invalid character U+{:04X} in document", ch as u32),
2641                    }));
2642                }
2643            }
2644        }
2645
2646        // Fallback: unrecognised content line — consume and loop.
2647        self.lexer.consume_line();
2648        StepResult::Continue
2649    }
2650
2651    /// Handle a block-sequence dash entry (`-`).
2652    #[allow(clippy::too_many_lines)]
2653    fn handle_sequence_entry(&mut self, dash_indent: usize, dash_pos: Pos) -> StepResult<'input> {
2654        let cur_pos = self.lexer.current_pos();
2655        self.close_collections_at_or_above(dash_indent.saturating_add(1), cur_pos);
2656        if !self.queue.is_empty() {
2657            return StepResult::Continue;
2658        }
2659        // YAML §8.2.1 seq-spaces rule: a block sequence used as a mapping
2660        // value in `block-out` context may start at the same column as its
2661        // parent key (seq-spaces(n, block-out) = n, not n+1).  We therefore
2662        // open a new sequence when:
2663        //   - the stack is empty, OR
2664        //   - dash_indent is greater than the current top's indent (normal
2665        //     case: sequence is nested deeper than its parent), OR
2666        //   - the top is a Mapping in Value phase at the same indent (the
2667        //     seq-spaces case: the sequence is the value of the current key).
2668        let opens_new = match self.coll_stack.last() {
2669            None => true,
2670            Some(
2671                &(CollectionEntry::Sequence(col, _)
2672                | CollectionEntry::Mapping(col, MappingPhase::Key, _)),
2673            ) => dash_indent > col,
2674            Some(&CollectionEntry::Mapping(col, MappingPhase::Value, _)) => dash_indent >= col,
2675        };
2676        if opens_new {
2677            // A block sequence cannot be an implicit mapping key — only flow nodes
2678            // may appear as implicit keys.  If the parent is a mapping in Key phase
2679            // and we are about to open a new sequence, this is a block sequence
2680            // where a mapping key is expected: an error.
2681            // Exception: when explicit_key_pending is set, the sequence IS the
2682            // content of an explicit key (`? \n- seq_key`), which is valid.
2683            if matches!(
2684                self.coll_stack.last(),
2685                Some(&CollectionEntry::Mapping(_, MappingPhase::Key, true))
2686            ) && !self.explicit_key_pending
2687            {
2688                self.failed = true;
2689                return StepResult::Yield(Err(Error {
2690                    pos: dash_pos,
2691                    message: "block sequence cannot appear as an implicit mapping key".into(),
2692                }));
2693            }
2694            // A block sequence item at a wrong indent level is invalid.  When the
2695            // parent is a sequence that has already completed at least one item
2696            // (`has_had_item = true`) and the new dash is NOT at the parent
2697            // sequence's column (not a new sibling item), this is a wrong-indent
2698            // sequence entry.
2699            if let Some(&CollectionEntry::Sequence(parent_col, true)) = self.coll_stack.last() {
2700                if dash_indent != parent_col {
2701                    self.failed = true;
2702                    return StepResult::Yield(Err(Error {
2703                        pos: dash_pos,
2704                        message: "block sequence entry at wrong indentation level".into(),
2705                    }));
2706                }
2707            }
2708            if self.collection_depth() >= MAX_COLLECTION_DEPTH {
2709                self.failed = true;
2710                return StepResult::Yield(Err(Error {
2711                    pos: dash_pos,
2712                    message: "collection nesting depth exceeds limit".into(),
2713                }));
2714            }
2715            // Sequence opening consumes any pending explicit-key context.
2716            self.explicit_key_pending = false;
2717            // Mark the parent sequence (if any) as having started an item.
2718            if let Some(CollectionEntry::Sequence(_, current_item_started)) =
2719                self.coll_stack.last_mut()
2720            {
2721                *current_item_started = true;
2722            }
2723            self.coll_stack
2724                .push(CollectionEntry::Sequence(dash_indent, false));
2725            self.queue.push_back((
2726                Event::SequenceStart {
2727                    anchor: self.pending_anchor.take(),
2728                    tag: self.pending_tag.take(),
2729                    style: CollectionStyle::Block,
2730                },
2731                zero_span(dash_pos),
2732            ));
2733        }
2734        // When continuing an existing sequence (opens_new = false), reset
2735        // `current_item_started` so that the new item can receive content.
2736        if !opens_new {
2737            if let Some(CollectionEntry::Sequence(_, current_item_started)) =
2738                self.coll_stack.last_mut()
2739            {
2740                *current_item_started = false;
2741            }
2742        }
2743        // When continuing an existing sequence (opens_new = false) and there is
2744        // a pending tag/anchor from the previous item's content (e.g. `- !!str`
2745        // whose inline extraction left a standalone tag line), that tag/anchor
2746        // applies to an empty scalar for the previous item.  Emit it now before
2747        // processing the current `-`.
2748        if !opens_new
2749            && (self.pending_tag_for_collection || self.pending_anchor_for_collection)
2750            && (self.pending_tag.is_some() || self.pending_anchor.is_some())
2751        {
2752            let item_pos = self.lexer.current_pos();
2753            self.queue.push_back((
2754                Event::Scalar {
2755                    value: std::borrow::Cow::Borrowed(""),
2756                    style: ScalarStyle::Plain,
2757                    anchor: self.pending_anchor.take(),
2758                    tag: self.pending_tag.take(),
2759                },
2760                zero_span(item_pos),
2761            ));
2762            self.pending_tag_for_collection = false;
2763            self.pending_anchor_for_collection = false;
2764        }
2765        // Check for tab-indented block structure before consuming the dash.
2766        // In YAML, tabs cannot be used for block-level indentation.  When the
2767        // separator between the dash and the inline content is (or contains) a
2768        // tab, and the inline content is a block structure indicator, the tab
2769        // is acting as indentation for a block node — which is invalid
2770        // (YAML 1.2 §6.1).
2771        if let Some(line) = self.lexer.peek_next_line() {
2772            let after_spaces = line.content.trim_start_matches(' ');
2773            if let Some(rest) = after_spaces.strip_prefix('-') {
2774                let inline = rest.trim_start_matches([' ', '\t']);
2775                let separator = &rest[..rest.len() - inline.len()];
2776                if separator.contains('\t') && is_tab_indented_block_indicator(inline) {
2777                    let err_pos = line.pos;
2778                    self.failed = true;
2779                    self.lexer.consume_line();
2780                    return StepResult::Yield(Err(Error {
2781                        pos: err_pos,
2782                        message: "tab character is not valid block indentation".into(),
2783                    }));
2784                }
2785            }
2786        }
2787        let had_inline = self.consume_sequence_dash(dash_indent);
2788        if !had_inline {
2789            // Only emit an empty scalar for a bare `-` when there is no
2790            // following indented content that could be the item's value.
2791            // If the next line is at an indent strictly greater than
2792            // `dash_indent`, it belongs to this sequence item — let the
2793            // main loop handle it.  Otherwise the item is truly empty.
2794            let next_indent = self.lexer.peek_next_line().map_or(0, |l| l.indent);
2795            if next_indent <= dash_indent {
2796                let item_pos = self.lexer.current_pos();
2797                self.queue.push_back((
2798                    Event::Scalar {
2799                        value: std::borrow::Cow::Borrowed(""),
2800                        style: ScalarStyle::Plain,
2801                        anchor: self.pending_anchor.take(),
2802                        tag: None,
2803                    },
2804                    zero_span(item_pos),
2805                ));
2806            }
2807        }
2808        StepResult::Continue
2809    }
2810
2811    /// Handle a block-mapping key entry.
2812    #[allow(clippy::too_many_lines)]
2813    fn handle_mapping_entry(&mut self, key_indent: usize, key_pos: Pos) -> StepResult<'input> {
2814        let cur_pos = self.lexer.current_pos();
2815
2816        // When an anchor or tag appeared inline on the physical line before
2817        // the key content (e.g. `&anchor key: value`), the key is prepended
2818        // as a synthetic line at the property's column (e.g. column 8).
2819        // All indent-relative decisions below must use the PHYSICAL line's
2820        // indent (column 0 in that example), not the synthetic line's column.
2821        let effective_key_indent = self.property_origin_indent.unwrap_or(key_indent);
2822
2823        self.close_collections_at_or_above(effective_key_indent.saturating_add(1), cur_pos);
2824        if !self.queue.is_empty() {
2825            return StepResult::Continue;
2826        }
2827
2828        // YAML §8.2.1 seq-spaces close: a block sequence opened as a mapping
2829        // value in `block-out` context may reside at the *same* column as its
2830        // parent key (seq-spaces(n, block-out) = n).  When a new mapping key
2831        // appears at column `n`, such a same-indent sequence must be closed —
2832        // the standard `close_collections_at_or_above(n+1)` above does not
2833        // reach it because its indent is exactly `n`, not `>= n+1`.
2834        //
2835        // Close the sequence only when the collection immediately beneath it
2836        // (the next item down the stack) is a Mapping at the same indent in
2837        // Value phase — that confirms it was opened by the seq-spaces rule,
2838        // not as an independent sequence at column 0.
2839        if let Some(&CollectionEntry::Sequence(seq_col, _)) = self.coll_stack.last() {
2840            if seq_col == effective_key_indent {
2841                let parent_is_seq_spaces_mapping = self.coll_stack.iter().rev().nth(1).is_some_and(
2842                    |e| matches!(e, CollectionEntry::Mapping(col, _, _) if *col == effective_key_indent),
2843                );
2844                if parent_is_seq_spaces_mapping {
2845                    self.coll_stack.pop();
2846                    self.queue
2847                        .push_back((Event::SequenceEnd, zero_span(cur_pos)));
2848                    // Advance parent mapping from Value to Key phase — the
2849                    // sequence was its value and is now fully closed.
2850                    if let Some(CollectionEntry::Mapping(_, phase, _)) = self.coll_stack.last_mut()
2851                    {
2852                        *phase = MappingPhase::Key;
2853                    }
2854                    return StepResult::Continue;
2855                }
2856            }
2857        }
2858
2859        let is_in_mapping_at_this_indent = self.coll_stack.last().is_some_and(
2860            |top| matches!(top, CollectionEntry::Mapping(col, _, _) if *col == effective_key_indent),
2861        );
2862
2863        if !is_in_mapping_at_this_indent {
2864            // A mapping entry at `effective_key_indent` cannot be opened when:
2865            //
2866            // 1. The top of the stack is a block sequence at the same indent —
2867            //    this would nest a mapping inside the sequence without a `- `
2868            //    prefix (BD7L pattern).
2869            //
2870            // 2. The top of the stack is a block mapping in Key phase at a
2871            //    lesser indent that has already had at least one entry — this
2872            //    would open a nested mapping when no current key exists for it
2873            //    to be the value of (EW3V, DMG6, N4JP, U44R patterns: wrong
2874            //    indentation).  The `has_had_value` flag suppresses this check
2875            //    for fresh mappings whose first key node is nested deeper than
2876            //    the mapping indicator (e.g. V9D5 explicit-key content).
2877            //    Also skip when a value-indicator line (`: value`) is next
2878            //    because it is the value portion of an alias/anchor mapping key
2879            //    split across tokens (e.g. `*alias : scalar` in 26DV), or when
2880            //    a pending tag or anchor is present (tags prepend synthetic
2881            //    inlines at their column — 74H7).
2882            match self.coll_stack.last() {
2883                Some(&CollectionEntry::Sequence(seq_col, _)) if seq_col == effective_key_indent => {
2884                    self.failed = true;
2885                    return StepResult::Yield(Err(Error {
2886                        pos: key_pos,
2887                        message:
2888                            "invalid mapping entry at block sequence indent level: expected '- '"
2889                                .into(),
2890                    }));
2891                }
2892                Some(&CollectionEntry::Mapping(map_col, MappingPhase::Key, true))
2893                    if map_col < effective_key_indent
2894                        && self.pending_tag.is_none()
2895                        && self.pending_anchor.is_none()
2896                        && !self.is_value_indicator_line() =>
2897                {
2898                    self.failed = true;
2899                    return StepResult::Yield(Err(Error {
2900                        pos: key_pos,
2901                        message: "wrong indentation: mapping key is more indented than the enclosing mapping".into(),
2902                    }));
2903                }
2904                _ => {}
2905            }
2906            if self.collection_depth() >= MAX_COLLECTION_DEPTH {
2907                self.failed = true;
2908                return StepResult::Yield(Err(Error {
2909                    pos: key_pos,
2910                    message: "collection nesting depth exceeds limit".into(),
2911                }));
2912            }
2913            // Mark the parent sequence (if any) as having started an item.
2914            if let Some(CollectionEntry::Sequence(_, current_item_started)) =
2915                self.coll_stack.last_mut()
2916            {
2917                *current_item_started = true;
2918            }
2919            // Note: property_origin_indent is NOT consumed here.  It remains set
2920            // so the next call (which processes the synthetic key line at the
2921            // synthetic column) can again compute effective_key_indent = origin
2922            // indent and recognize the already-open mapping.  It will be cleared
2923            // in the "continuing existing mapping" branch below.
2924            self.coll_stack.push(CollectionEntry::Mapping(
2925                effective_key_indent,
2926                MappingPhase::Key,
2927                false,
2928            ));
2929            // Consume pending anchor/tag for the mapping only for standalone
2930            // properties (e.g. `&a\nkey: v`) where `pending_*_for_collection`
2931            // is true.
2932            //
2933            // Inline properties (e.g. `&a key: v`) leave `pending_*_for_collection`
2934            // false — they annotate the key scalar, not the mapping (YAML test
2935            // suite 9KAX: inline property → key scalar).  The pending anchor/tag
2936            // is left on `self.pending_anchor`/`self.pending_tag` and will be
2937            // consumed by `consume_mapping_entry` when it emits the key scalar.
2938            let mapping_anchor = if self.pending_anchor_for_collection {
2939                self.pending_anchor.take()
2940            } else {
2941                None
2942            };
2943            let mapping_tag = if self.pending_tag_for_collection {
2944                self.pending_tag.take()
2945            } else {
2946                None
2947            };
2948            self.queue.push_back((
2949                Event::MappingStart {
2950                    anchor: mapping_anchor,
2951                    tag: mapping_tag,
2952                    style: CollectionStyle::Block,
2953                },
2954                zero_span(key_pos),
2955            ));
2956            return StepResult::Continue;
2957        }
2958
2959        // Continuing an existing mapping.
2960        if self.is_value_indicator_line() {
2961            // If there is a pending tag/anchor that is not designated for the
2962            // mapping collection itself (i.e. it came from an inline `!!tag`
2963            // or `&anchor` before the `:` value indicator), it applies to the
2964            // empty implicit key scalar.  Emit that key scalar first so the
2965            // pending properties are not lost and the mapping phase advances
2966            // correctly before the value indicator is consumed.
2967            let in_key_phase = self.coll_stack.last().is_some_and(|top| {
2968                matches!(top, CollectionEntry::Mapping(col, MappingPhase::Key, _) if *col == effective_key_indent)
2969            });
2970            if in_key_phase
2971                && !self.pending_tag_for_collection
2972                && !self.pending_anchor_for_collection
2973                && (self.pending_tag.is_some() || self.pending_anchor.is_some())
2974            {
2975                let pos = self.lexer.current_pos();
2976                self.queue.push_back((
2977                    Event::Scalar {
2978                        value: std::borrow::Cow::Borrowed(""),
2979                        style: ScalarStyle::Plain,
2980                        anchor: self.pending_anchor.take(),
2981                        tag: self.pending_tag.take(),
2982                    },
2983                    zero_span(pos),
2984                ));
2985                self.advance_mapping_to_value();
2986                return StepResult::Continue;
2987            }
2988            // Check for tab-indented block structure after explicit value marker.
2989            // `: TAB -`, `: TAB ?`, or `: TAB key:` are invalid because the tab
2990            // makes the following block-structure-forming content block-indented
2991            // via a tab, which is forbidden (YAML 1.2 §6.1).
2992            if let Some(line) = self.lexer.peek_next_line() {
2993                let after_spaces = line.content.trim_start_matches(' ');
2994                if let Some(after_colon) = after_spaces.strip_prefix(':') {
2995                    if !after_colon.is_empty() {
2996                        let value = after_colon.trim_start_matches([' ', '\t']);
2997                        let separator = &after_colon[..after_colon.len() - value.len()];
2998                        if separator.contains('\t') && is_tab_indented_block_indicator(value) {
2999                            let err_pos = line.pos;
3000                            self.failed = true;
3001                            self.lexer.consume_line();
3002                            return StepResult::Yield(Err(Error {
3003                                pos: err_pos,
3004                                message: "tab character is not valid block indentation".into(),
3005                            }));
3006                        }
3007                    }
3008                }
3009            }
3010            self.consume_explicit_value_line(key_indent);
3011            return StepResult::Continue;
3012        }
3013
3014        // If the mapping is in Value phase and the next line is another key
3015        // (not a `: value` line), the previous key had no value — emit empty.
3016        if self.coll_stack.last().is_some_and(|top| {
3017            matches!(top, CollectionEntry::Mapping(col, MappingPhase::Value, _) if *col == effective_key_indent)
3018        }) {
3019            let pos = self.lexer.current_pos();
3020            self.queue.push_back((
3021                Event::Scalar {
3022                    value: std::borrow::Cow::Borrowed(""),
3023                    style: ScalarStyle::Plain,
3024                    anchor: self.pending_anchor.take(),
3025                    tag: None,
3026                },
3027                zero_span(pos),
3028            ));
3029            self.advance_mapping_to_key();
3030            return StepResult::Continue;
3031        }
3032
3033        // Check for tab-indented block structure after explicit key marker.
3034        // `? TAB -`, `? TAB ?`, or `? TAB key:` are invalid because the tab
3035        // makes the following block-structure-forming content block-indented
3036        // via a tab, which is forbidden (YAML 1.2 §6.1).
3037        if let Some(line) = self.lexer.peek_next_line() {
3038            let after_spaces = line.content.trim_start_matches(' ');
3039            if let Some(after_q) = after_spaces.strip_prefix('?') {
3040                if !after_q.is_empty() {
3041                    let inline = after_q.trim_start_matches([' ', '\t']);
3042                    let separator = &after_q[..after_q.len() - inline.len()];
3043                    if separator.contains('\t') && is_tab_indented_block_indicator(inline) {
3044                        let err_pos = line.pos;
3045                        self.failed = true;
3046                        self.lexer.consume_line();
3047                        return StepResult::Yield(Err(Error {
3048                            pos: err_pos,
3049                            message: "tab character is not valid block indentation".into(),
3050                        }));
3051                    }
3052                }
3053            }
3054        }
3055        // Normal key line: consume and emit key scalar.
3056        // property_origin_indent has served its purpose (selecting effective
3057        // indent for the mapping-open and for subsequent continues).  Clear it
3058        // so it does not affect unrelated subsequent entries.
3059        self.property_origin_indent = None;
3060        let consumed = self.consume_mapping_entry(key_indent);
3061        match consumed {
3062            ConsumedMapping::ExplicitKey { had_key_inline } => {
3063                if had_key_inline {
3064                    // The key content will appear inline (already prepended).
3065                    // No explicit-key-pending needed since the key content is
3066                    // already in the buffer.
3067                } else {
3068                    let pos = self.lexer.current_pos();
3069                    self.queue.push_back((
3070                        Event::Scalar {
3071                            value: std::borrow::Cow::Borrowed(""),
3072                            style: ScalarStyle::Plain,
3073                            anchor: self.pending_anchor.take(),
3074                            tag: self.pending_tag.take(),
3075                        },
3076                        zero_span(pos),
3077                    ));
3078                    self.advance_mapping_to_value();
3079                    // The key content is on the NEXT line — mark that an explicit
3080                    // key is pending so block sequence entries are allowed
3081                    // (e.g. `?\n- seq_key`).
3082                    self.explicit_key_pending = true;
3083                }
3084            }
3085            ConsumedMapping::ImplicitKey {
3086                key_value,
3087                key_style,
3088                key_span,
3089            } => {
3090                self.queue.push_back((
3091                    Event::Scalar {
3092                        value: key_value,
3093                        style: key_style,
3094                        anchor: self.pending_anchor.take(),
3095                        tag: self.pending_tag.take(),
3096                    },
3097                    key_span,
3098                ));
3099                self.advance_mapping_to_value();
3100            }
3101            ConsumedMapping::QuotedKeyError { pos, message } => {
3102                self.failed = true;
3103                return StepResult::Yield(Err(Error { pos, message }));
3104            }
3105            ConsumedMapping::InlineImplicitMappingError { pos } => {
3106                // The inline value is a block node (mapping or sequence indicator)
3107                // which cannot appear inline as a mapping value — block nodes must
3108                // start on a new line.
3109                self.failed = true;
3110                return StepResult::Yield(Err(Error {
3111                    pos,
3112                    message:
3113                        "block node cannot appear as inline value; use a new line or a flow node"
3114                            .into(),
3115                }));
3116            }
3117        }
3118        StepResult::Continue
3119    }
3120
3121    /// True when the next line is a bare value indicator (`: ` or `:`
3122    /// followed by space/EOL), used for the explicit-key form.
3123    fn is_value_indicator_line(&self) -> bool {
3124        let Some(line) = self.lexer.peek_next_line() else {
3125            return false;
3126        };
3127        let trimmed = line.content.trim_start_matches(' ');
3128        if !trimmed.starts_with(':') {
3129            return false;
3130        }
3131        let after_colon = &trimmed[1..];
3132        after_colon.is_empty()
3133            || after_colon.starts_with(' ')
3134            || after_colon.starts_with('\t')
3135            || after_colon.starts_with('\n')
3136            || after_colon.starts_with('\r')
3137    }
3138
3139    /// Consume a `: value` line (explicit value indicator).
3140    ///
3141    /// If there is inline content after `: `, prepend a synthetic line for it
3142    /// so the next iteration emits it as the value scalar.
3143    fn consume_explicit_value_line(&mut self, key_indent: usize) {
3144        // SAFETY: caller checked is_value_indicator_line() — the line exists.
3145        let Some(line) = self.lexer.peek_next_line() else {
3146            unreachable!("consume_explicit_value_line called without a pending line")
3147        };
3148
3149        // Extract all data from the borrowed line before any mutable lexer calls.
3150        let content: &'input str = line.content;
3151        let line_pos = line.pos;
3152        let line_break_type = line.break_type;
3153
3154        let leading_spaces = content.len() - content.trim_start_matches(' ').len();
3155        let trimmed = &content[leading_spaces..];
3156
3157        // Advance past `:` and any whitespace.
3158        let after_colon = &trimmed[1..]; // skip ':'
3159        let value_content = after_colon.trim_start_matches([' ', '\t']);
3160        // A comment-only value (e.g. `: # lala`) is not a real inline value.
3161        let had_value_inline = !value_content.is_empty() && !value_content.starts_with('#');
3162
3163        if had_value_inline {
3164            let spaces_after_colon = after_colon.len() - value_content.len();
3165            let total_offset = leading_spaces + 1 + spaces_after_colon;
3166            let value_col = key_indent + 1 + spaces_after_colon;
3167            let value_pos = Pos {
3168                byte_offset: line_pos.byte_offset + total_offset,
3169                char_offset: line_pos.char_offset + total_offset,
3170                line: line_pos.line,
3171                column: line_pos.column + total_offset,
3172            };
3173            let synthetic = Line {
3174                content: value_content,
3175                offset: value_pos.byte_offset,
3176                indent: value_col,
3177                break_type: line_break_type,
3178                pos: value_pos,
3179            };
3180            self.lexer.consume_line();
3181            self.lexer.prepend_inline_line(synthetic);
3182        } else {
3183            // `:` with no real value content (either bare or comment-only).
3184            // Consume the indicator line and advance to Value phase — the next
3185            // line may be a block node (the actual value), or if the next line
3186            // is another key at the same indent, the main loop emits an empty
3187            // scalar at that point (see the Value-phase empty-scalar guard).
3188            self.lexer.consume_line();
3189            self.advance_mapping_to_value();
3190        }
3191    }
3192
3193    /// Handle a flow collection (`[...]` or `{...}`) starting on the current line.
3194    ///
3195    /// This method reads the complete flow collection — potentially spanning
3196    /// multiple physical lines — and pushes all events (SequenceStart/End,
3197    /// MappingStart/End, Scalar) to `self.queue`.  It returns when the
3198    /// outermost closing delimiter (`]` or `}`) is consumed.
3199    ///
3200    /// ## Security invariants
3201    ///
3202    /// - **No recursion:** the parser uses an explicit `Vec<FlowFrame>` stack
3203    ///   rather than recursive function calls, preventing stack overflow on
3204    ///   deeply nested input.
3205    /// - **Unified depth limit:** each new nested collection checks against
3206    ///   `MAX_COLLECTION_DEPTH` using the same `coll_stack.len()` counter as
3207    ///   block collections, so flow and block nesting depths are additive.
3208    /// - **Incremental parsing:** content is processed line-by-line; no
3209    ///   `String` buffer holds the entire flow body.
3210    /// - **Unterminated collection:** reaching EOF without the matching closing
3211    ///   delimiter returns `Err`.
3212    #[allow(clippy::too_many_lines)]
3213    fn handle_flow_collection(&mut self) -> StepResult<'input> {
3214        use crate::lexer::scan_plain_line_flow;
3215        use std::borrow::Cow;
3216
3217        // -----------------------------------------------------------------------
3218        // Local types for the explicit flow-parser stack.
3219        // -----------------------------------------------------------------------
3220
3221        /// One frame on the explicit flow-parser stack.
3222        #[derive(Clone, Copy)]
3223        enum FlowFrame {
3224            /// An open `[...]` sequence.
3225            ///
3226            /// `has_value` is `false` immediately after opening and immediately
3227            /// after each comma; it becomes `true` when a scalar or nested
3228            /// collection is emitted.  A comma arriving when `has_value` is
3229            /// `false` is a leading comma error.
3230            ///
3231            /// `after_colon` is `true` when we have just consumed a `:` value
3232            /// separator in a single-pair implicit mapping context.  In this
3233            /// state a new scalar or collection is the value of the single-pair
3234            /// mapping — not a new entry — so the missing-comma check must not
3235            /// fire.
3236            ///
3237            /// `last_was_plain` is `true` when the most recent emitted item was
3238            /// a plain scalar.  Plain scalars may span multiple lines in flow
3239            /// context, so the missing-comma check must not fire after a plain
3240            /// scalar (the next line's content may be a continuation).
3241            Sequence {
3242                has_value: bool,
3243                after_colon: bool,
3244                last_was_plain: bool,
3245            },
3246            /// An open `{...}` mapping.
3247            ///
3248            /// `has_value` tracks the same invariant as in `Sequence` but for
3249            /// the mapping as a whole (not per key/value pair).
3250            ///
3251            /// `last_was_plain` mirrors the same concept as in `Sequence`: when
3252            /// the most recent emitted item was a plain scalar, the next line
3253            /// may be a multi-line continuation, so indicator-start validation
3254            /// must be deferred until we know whether it is a continuation.
3255            Mapping {
3256                phase: FlowMappingPhase,
3257                has_value: bool,
3258                last_was_plain: bool,
3259            },
3260        }
3261
3262        // Design note — phase-advance pattern
3263        //
3264        // Four sites below repeat the same `if let Some(frame) = flow_stack.last_mut()
3265        // { match frame { Sequence { has_value } => ... Mapping { phase, has_value } =>
3266        // ... } }` shape.  Extracting a helper function would require moving `FlowFrame`
3267        // and `FlowMappingPhase` to module scope — adding module-level types whose sole
3268        // purpose is to enable this refactor adds more complexity than the duplication
3269        // costs.  Each site is 6–8 lines and clearly labelled by its comment; the
3270        // repetition is intentional and stable.
3271
3272        // -----------------------------------------------------------------------
3273        // Buffer-management invariant
3274        // -----------------------------------------------------------------------
3275        //
3276        // The line buffer always holds the current line un-consumed.  We peek to
3277        // read content and only consume the line when we need to advance past it
3278        // (end-of-line or quoted-scalar delegation).
3279        //
3280        // `cur_content` / `cur_base_pos` always mirror what `peek_next_line()`
3281        // returns.  After any call that changes the buffer (consume_line /
3282        // prepend_inline_line), we immediately re-sync via peek.
3283        //
3284        // Helper: advance `pos` over `content[..byte_len]`, one char at a time.
3285
3286        let abs_pos = |base: Pos, content: &str, i: usize| -> Pos {
3287            let mut p = base;
3288            for ch in content[..i].chars() {
3289                p = p.advance(ch);
3290            }
3291            p
3292        };
3293
3294        // -----------------------------------------------------------------------
3295        // Initialise: read the current line, locate the opening delimiter.
3296        // -----------------------------------------------------------------------
3297
3298        // SAFETY: caller verified via peek in step_in_document.
3299        let Some(first_line) = self.lexer.peek_next_line() else {
3300            unreachable!("handle_flow_collection called without a pending line")
3301        };
3302
3303        let leading = first_line.content.len() - first_line.content.trim_start_matches(' ').len();
3304        // The physical line number where the outermost flow collection opened.
3305        // Used to detect multi-line flow keys (C2SP).
3306        let start_line = first_line.pos.line;
3307        // The physical line number of the most recent emitted value (scalar or
3308        // inner-collection close).  Used to detect multi-line implicit keys (DK4H):
3309        // a `:` value separator on a different line than the preceding key is invalid.
3310        let mut last_token_line = first_line.pos.line;
3311        // Set when a `?` explicit-key indicator is consumed inside a flow sequence.
3312        // Suppresses the DK4H single-line check for the corresponding `:` separator —
3313        // explicit keys in flow sequences may span multiple lines (YAML 1.2 §7.4.2).
3314        let mut explicit_key_in_seq = false;
3315
3316        // Stack for tracking open flow collections (nested via explicit iteration,
3317        // not recursion — security requirement).
3318        let mut flow_stack: Vec<FlowFrame> = Vec::new();
3319        // All events assembled during this call (pushed to self.queue at end).
3320        let mut events: Vec<(Event<'input>, Span)> = Vec::new();
3321        // Current byte offset within `cur_content`.
3322        let mut pos_in_line: usize = leading;
3323        // Pending anchor for the next node in this flow collection.
3324        // Seeded from any block-context anchor that was pending when this flow
3325        // collection was entered (e.g. `&seq [a, b]` sets pending_anchor before
3326        // the `[` is dispatched to handle_flow_collection).
3327        let mut pending_flow_anchor: Option<&'input str> = self.pending_anchor.take();
3328        // Pending tag for the next node in this flow collection.
3329        // Seeded from any block-context tag that was pending when this flow
3330        // collection was entered (e.g. `!!seq [a, b]` sets pending_tag before
3331        // the `[` is dispatched to handle_flow_collection).
3332        let mut pending_flow_tag: Option<std::borrow::Cow<'input, str>> = self.pending_tag.take();
3333
3334        // Re-sync `cur_content` / `cur_base_pos` from the buffer.
3335        // Returns false when the buffer is empty (EOF mid-flow).
3336        // INVARIANT: called every time after consuming or prepending a line.
3337        macro_rules! resync {
3338            () => {{
3339                match self.lexer.peek_next_line() {
3340                    Some(l) => {
3341                        // Safe: we re-assign these immediately without holding
3342                        // a borrow on `self.lexer` at the same time.
3343                        (l.content, l.pos)
3344                    }
3345                    None => {
3346                        // EOF
3347                        ("", self.lexer.current_pos())
3348                    }
3349                }
3350            }};
3351        }
3352
3353        let (mut cur_content, mut cur_base_pos) = resync!();
3354
3355        // The minimum indent for continuation lines in this flow collection.
3356        // When the flow collection is inside an enclosing block collection,
3357        // continuation lines must be indented more than the enclosing block's
3358        // indent level (YAML 1.2: flow context lines must not regress to or
3359        // below the enclosing block indent level).
3360        // At document root (coll_stack empty), there is no enclosing block, so
3361        // no constraint — represented as None.
3362        let flow_min_indent: Option<usize> = self.coll_stack.last().map(|e| e.indent());
3363
3364        // -----------------------------------------------------------------------
3365        // Main parse loop — iterates over characters in the current (and
3366        // subsequent) lines until the outermost closing delimiter is found.
3367        // -----------------------------------------------------------------------
3368
3369        'outer: loop {
3370            // Document markers (`---` and `...`) are only valid at the document
3371            // level — they are illegal inside flow collections (YAML 1.2 §8.1).
3372            // A document marker must appear at the very beginning of a line
3373            // (column 0) and be followed by whitespace or end-of-line.
3374            if pos_in_line == 0
3375                && (cur_content.starts_with("---") || cur_content.starts_with("..."))
3376            {
3377                let rest = &cur_content[3..];
3378                if rest.is_empty() || rest.starts_with(' ') || rest.starts_with('\t') {
3379                    let err_pos = abs_pos(cur_base_pos, cur_content, 0);
3380                    self.failed = true;
3381                    return StepResult::Yield(Err(Error {
3382                        pos: err_pos,
3383                        message: "document marker is not allowed inside a flow collection".into(),
3384                    }));
3385                }
3386            }
3387
3388            // Tabs as indentation on a new line in flow context are invalid
3389            // (YAML 1.2 §6.2 — indentation uses spaces only).  A tab at the
3390            // start of a continuation line (before the first non-whitespace
3391            // character) is a tab used as indentation.  Blank lines (tab only,
3392            // no content) are exempt — they are treated as empty separator lines.
3393            if pos_in_line == 0 {
3394                let has_tab_indent =
3395                    cur_content.starts_with('\t') && !cur_content.trim().is_empty();
3396                if has_tab_indent {
3397                    let err_pos = abs_pos(cur_base_pos, cur_content, 0);
3398                    self.failed = true;
3399                    return StepResult::Yield(Err(Error {
3400                        pos: err_pos,
3401                        message: "tab character is not allowed as indentation in flow context"
3402                            .into(),
3403                    }));
3404                }
3405            }
3406
3407            // Skip leading spaces/tabs and comments.
3408            // `#` is a comment start only when preceded by whitespace (or at
3409            // start of line, i.e. pos_in_line == 0 with all prior chars being
3410            // whitespace).  A `#` immediately after a token (e.g. `,#`) is not
3411            // a comment — it is an error character that will be caught below.
3412            let prev_was_ws_at_loop_entry = pos_in_line == 0
3413                || cur_content[..pos_in_line]
3414                    .chars()
3415                    .next_back()
3416                    .is_some_and(|c| c == ' ' || c == '\t');
3417            let mut prev_was_ws = prev_was_ws_at_loop_entry;
3418            while pos_in_line < cur_content.len() {
3419                let Some(ch) = cur_content[pos_in_line..].chars().next() else {
3420                    break;
3421                };
3422                if ch == ' ' || ch == '\t' {
3423                    prev_was_ws = true;
3424                    pos_in_line += 1;
3425                } else if ch == '#' && prev_was_ws {
3426                    // Emit a Comment event for this `# comment` to end of line.
3427                    // No MAX_COMMENT_LEN check here — this comment is bounded by the
3428                    // physical line length (itself bounded by total input size), the
3429                    // same reason drain_trailing_comment does not apply the limit.
3430                    let hash_pos = abs_pos(cur_base_pos, cur_content, pos_in_line);
3431                    // Comment text: everything after `#` (byte at pos_in_line is `#`,
3432                    // ASCII 1 byte, so text starts at pos_in_line + 1).
3433                    let text_start = pos_in_line + 1;
3434                    // SAFETY: text_start <= cur_content.len() because we found
3435                    // `#` at pos_in_line which is < cur_content.len().
3436                    let comment_text: &'input str = cur_content.get(text_start..).unwrap_or("");
3437                    let mut comment_end = hash_pos.advance('#');
3438                    for c in comment_text.chars() {
3439                        comment_end = comment_end.advance(c);
3440                    }
3441                    let comment_span = Span {
3442                        start: hash_pos,
3443                        end: comment_end,
3444                    };
3445                    events.push((Event::Comment { text: comment_text }, comment_span));
3446                    pos_in_line = cur_content.len();
3447                } else {
3448                    break;
3449                }
3450            }
3451
3452            // ----------------------------------------------------------------
3453            // End of line — consume and advance.
3454            // ----------------------------------------------------------------
3455            if pos_in_line >= cur_content.len() {
3456                self.lexer.consume_line();
3457
3458                if flow_stack.is_empty() {
3459                    // Outermost collection closed; done.
3460                    break 'outer;
3461                }
3462
3463                (cur_content, cur_base_pos) = resync!();
3464                if cur_content.is_empty() && self.lexer.at_eof() {
3465                    let err_pos = self.lexer.current_pos();
3466                    self.failed = true;
3467                    return StepResult::Yield(Err(Error {
3468                        pos: err_pos,
3469                        message: "unterminated flow collection: unexpected end of input".into(),
3470                    }));
3471                }
3472
3473                // Flow continuation lines must be indented more than the
3474                // enclosing block context (YAML 1.2: flow lines must not
3475                // regress to the block indent level).  Blank/whitespace-only
3476                // lines are exempt — they act as line separators.
3477                // At document root (no enclosing block), there is no
3478                // indentation constraint.
3479                if let Some(min_indent) = flow_min_indent {
3480                    if let Some(next_line) = self.lexer.peek_next_line() {
3481                        let trimmed = next_line.content.trim();
3482                        if !trimmed.is_empty() && next_line.indent <= min_indent {
3483                            let err_pos = next_line.pos;
3484                            self.failed = true;
3485                            return StepResult::Yield(Err(Error {
3486                                pos: err_pos,
3487                                message: "flow collection continuation line is not indented enough"
3488                                    .into(),
3489                            }));
3490                        }
3491                    }
3492                }
3493
3494                pos_in_line = 0;
3495                continue 'outer;
3496            }
3497
3498            let Some(ch) = cur_content[pos_in_line..].chars().next() else {
3499                continue 'outer;
3500            };
3501
3502            // ----------------------------------------------------------------
3503            // Opening delimiters `[` and `{`
3504            // ----------------------------------------------------------------
3505            if ch == '[' || ch == '{' {
3506                // Check unified depth limit (flow + block combined).
3507                let total_depth = self.coll_stack.len() + flow_stack.len();
3508                if total_depth >= MAX_COLLECTION_DEPTH {
3509                    let err_pos = abs_pos(cur_base_pos, cur_content, pos_in_line);
3510                    self.failed = true;
3511                    return StepResult::Yield(Err(Error {
3512                        pos: err_pos,
3513                        message: "collection nesting depth exceeds limit".into(),
3514                    }));
3515                }
3516
3517                let open_pos = abs_pos(cur_base_pos, cur_content, pos_in_line);
3518                let open_span = zero_span(open_pos);
3519                pos_in_line += 1;
3520
3521                if ch == '[' {
3522                    flow_stack.push(FlowFrame::Sequence {
3523                        has_value: false,
3524                        after_colon: false,
3525                        last_was_plain: false,
3526                    });
3527                    events.push((
3528                        Event::SequenceStart {
3529                            anchor: pending_flow_anchor.take(),
3530                            tag: pending_flow_tag.take(),
3531                            style: CollectionStyle::Flow,
3532                        },
3533                        open_span,
3534                    ));
3535                } else {
3536                    flow_stack.push(FlowFrame::Mapping {
3537                        phase: FlowMappingPhase::Key,
3538                        has_value: false,
3539                        last_was_plain: false,
3540                    });
3541                    events.push((
3542                        Event::MappingStart {
3543                            anchor: pending_flow_anchor.take(),
3544                            tag: pending_flow_tag.take(),
3545                            style: CollectionStyle::Flow,
3546                        },
3547                        open_span,
3548                    ));
3549                }
3550                continue 'outer;
3551            }
3552
3553            // ----------------------------------------------------------------
3554            // Closing delimiters `]` and `}`
3555            // ----------------------------------------------------------------
3556            if ch == ']' || ch == '}' {
3557                let close_pos = abs_pos(cur_base_pos, cur_content, pos_in_line);
3558                let close_span = zero_span(close_pos);
3559                pos_in_line += 1;
3560
3561                let Some(top) = flow_stack.pop() else {
3562                    // Closing delimiter with empty stack — mismatched.
3563                    self.failed = true;
3564                    return StepResult::Yield(Err(Error {
3565                        pos: close_pos,
3566                        message: format!("unexpected '{ch}' in flow context"),
3567                    }));
3568                };
3569
3570                match (ch, top) {
3571                    (']', FlowFrame::Sequence { .. }) => {
3572                        events.push((Event::SequenceEnd, close_span));
3573                    }
3574                    ('}', FlowFrame::Mapping { phase, .. }) => {
3575                        // If mapping is in Value phase (key emitted, no value yet),
3576                        // emit empty value before closing.
3577                        if phase == FlowMappingPhase::Value {
3578                            events.push((empty_scalar_event(), close_span));
3579                        }
3580                        events.push((Event::MappingEnd, close_span));
3581                    }
3582                    (']', FlowFrame::Mapping { .. }) => {
3583                        self.failed = true;
3584                        return StepResult::Yield(Err(Error {
3585                            pos: close_pos,
3586                            message: "expected '}' to close flow mapping, found ']'".into(),
3587                        }));
3588                    }
3589                    ('}', FlowFrame::Sequence { .. }) => {
3590                        self.failed = true;
3591                        return StepResult::Yield(Err(Error {
3592                            pos: close_pos,
3593                            message: "expected ']' to close flow sequence, found '}'".into(),
3594                        }));
3595                    }
3596                    _ => unreachable!("all (ch, top) combinations covered above"),
3597                }
3598
3599                // After a nested collection closes inside a parent frame,
3600                // mark the parent as having a value (the nested collection was it),
3601                // and if it's a mapping in Value phase, advance to Key phase.
3602                if let Some(parent) = flow_stack.last_mut() {
3603                    // Update the last-token-line tracker so the multi-line implicit
3604                    // key check (DK4H) knows where the key (inner collection) ended.
3605                    last_token_line = cur_base_pos.line;
3606                    match parent {
3607                        FlowFrame::Sequence {
3608                            has_value,
3609                            after_colon,
3610                            last_was_plain,
3611                        } => {
3612                            *has_value = true;
3613                            *after_colon = false;
3614                            *last_was_plain = false;
3615                        }
3616                        FlowFrame::Mapping {
3617                            phase,
3618                            has_value,
3619                            last_was_plain,
3620                        } => {
3621                            *has_value = true;
3622                            *last_was_plain = false;
3623                            if *phase == FlowMappingPhase::Value {
3624                                *phase = FlowMappingPhase::Key;
3625                            }
3626                        }
3627                    }
3628                }
3629
3630                if flow_stack.is_empty() {
3631                    // Outermost collection closed.
3632                    // Consume the current line; prepend any non-empty tail so the
3633                    // block state machine can process content after the `]`/`}`.
3634                    let tail_content = &cur_content[pos_in_line..];
3635                    let tail_trimmed = tail_content.trim_start_matches([' ', '\t']);
3636                    // `#` is a comment only when preceded by whitespace.  If the
3637                    // closing bracket is immediately followed by `#` (no space),
3638                    // that is not a valid comment — it is a syntax error.
3639                    if tail_trimmed.starts_with('#') {
3640                        let prev_was_ws = pos_in_line == 0
3641                            || cur_content[..pos_in_line]
3642                                .chars()
3643                                .next_back()
3644                                .is_some_and(|c| c == ' ' || c == '\t');
3645                        if !prev_was_ws {
3646                            let err_pos = abs_pos(cur_base_pos, cur_content, pos_in_line);
3647                            self.failed = true;
3648                            return StepResult::Yield(Err(Error {
3649                                pos: err_pos,
3650                                message: "comment requires at least one space before '#'".into(),
3651                            }));
3652                        }
3653                    }
3654                    // A flow collection used as an implicit mapping key must
3655                    // fit on a single line (YAML 1.2 §7.4.2).  If the tail
3656                    // begins with `:` (making this collection a mapping key) and
3657                    // the closing delimiter is on a different line than the
3658                    // opening delimiter, reject as a multi-line flow key.
3659                    if tail_trimmed.starts_with(':') && cur_base_pos.line != start_line {
3660                        let err_pos = abs_pos(cur_base_pos, cur_content, pos_in_line);
3661                        self.failed = true;
3662                        return StepResult::Yield(Err(Error {
3663                            pos: err_pos,
3664                            message: "multi-line flow collection cannot be used as an implicit mapping key".into(),
3665                        }));
3666                    }
3667                    // If the block collection stack is empty AND the tail does not
3668                    // start with `:` (which would indicate this flow collection is a
3669                    // mapping key), the flow collection is the document root node.
3670                    // Mark it so subsequent content on the NEXT LINE triggers the
3671                    // root-node guard in `step_in_document`.
3672                    if self.coll_stack.is_empty() && !tail_trimmed.starts_with(':') {
3673                        self.root_node_emitted = true;
3674                    }
3675                    self.lexer.consume_line();
3676                    if !tail_trimmed.is_empty() {
3677                        let tail_pos = abs_pos(cur_base_pos, cur_content, pos_in_line);
3678                        let synthetic = crate::lines::Line {
3679                            content: tail_content,
3680                            offset: tail_pos.byte_offset,
3681                            indent: tail_pos.column,
3682                            break_type: crate::lines::BreakType::Eof,
3683                            pos: tail_pos,
3684                        };
3685                        self.lexer.prepend_inline_line(synthetic);
3686                    }
3687                    break 'outer;
3688                }
3689                continue 'outer;
3690            }
3691
3692            // ----------------------------------------------------------------
3693            // Comma separator
3694            // ----------------------------------------------------------------
3695            if ch == ',' {
3696                // Leading-comma check: if the current frame has not yet produced
3697                // any value since it was opened (or since the last comma), this
3698                // comma is invalid — e.g. `[,]` or `{,}`.
3699                let leading = match flow_stack.last() {
3700                    Some(
3701                        FlowFrame::Sequence { has_value, .. }
3702                        | FlowFrame::Mapping { has_value, .. },
3703                    ) => !has_value,
3704                    None => false,
3705                };
3706                if leading {
3707                    let err_pos = abs_pos(cur_base_pos, cur_content, pos_in_line);
3708                    self.failed = true;
3709                    return StepResult::Yield(Err(Error {
3710                        pos: err_pos,
3711                        message: "invalid leading comma in flow collection".into(),
3712                    }));
3713                }
3714
3715                pos_in_line += 1;
3716
3717                // Skip whitespace after comma.
3718                while pos_in_line < cur_content.len() {
3719                    match cur_content[pos_in_line..].chars().next() {
3720                        Some(c) if c == ' ' || c == '\t' => pos_in_line += 1,
3721                        _ => break,
3722                    }
3723                }
3724
3725                // Double-comma check: next char must not be another comma.
3726                if cur_content[pos_in_line..].starts_with(',') {
3727                    let err_pos = abs_pos(cur_base_pos, cur_content, pos_in_line);
3728                    self.failed = true;
3729                    return StepResult::Yield(Err(Error {
3730                        pos: err_pos,
3731                        message: "invalid empty entry: consecutive commas in flow collection"
3732                            .into(),
3733                    }));
3734                }
3735
3736                // If a tag or anchor is pending but no scalar was emitted yet,
3737                // the comma terminates an implicit empty-scalar node.  Emit it
3738                // so the pending properties are attached to the correct node
3739                // rather than carried forward to the next entry.
3740                if pending_flow_tag.is_some() || pending_flow_anchor.is_some() {
3741                    let empty_pos = abs_pos(cur_base_pos, cur_content, pos_in_line);
3742                    events.push((
3743                        Event::Scalar {
3744                            value: Cow::Borrowed(""),
3745                            style: ScalarStyle::Plain,
3746                            anchor: pending_flow_anchor.take(),
3747                            tag: pending_flow_tag.take(),
3748                        },
3749                        zero_span(empty_pos),
3750                    ));
3751                    // Advance phase: this scalar acts as a value (or key).
3752                    if let Some(frame) = flow_stack.last_mut() {
3753                        match frame {
3754                            FlowFrame::Sequence {
3755                                has_value,
3756                                after_colon,
3757                                last_was_plain,
3758                            } => {
3759                                *has_value = true;
3760                                *after_colon = false;
3761                                *last_was_plain = false;
3762                            }
3763                            FlowFrame::Mapping {
3764                                phase,
3765                                has_value,
3766                                last_was_plain,
3767                            } => {
3768                                *has_value = true;
3769                                *last_was_plain = false;
3770                                *phase = match *phase {
3771                                    FlowMappingPhase::Key => FlowMappingPhase::Value,
3772                                    FlowMappingPhase::Value => FlowMappingPhase::Key,
3773                                };
3774                            }
3775                        }
3776                    }
3777                }
3778
3779                // Reset has_value and (for mappings) go back to Key phase.
3780                if let Some(frame) = flow_stack.last_mut() {
3781                    match frame {
3782                        FlowFrame::Sequence {
3783                            has_value,
3784                            after_colon,
3785                            last_was_plain,
3786                        } => {
3787                            *has_value = false;
3788                            *after_colon = false;
3789                            *last_was_plain = false;
3790                        }
3791                        FlowFrame::Mapping {
3792                            phase,
3793                            has_value,
3794                            last_was_plain,
3795                        } => {
3796                            *has_value = false;
3797                            *last_was_plain = false;
3798                            if *phase == FlowMappingPhase::Value {
3799                                *phase = FlowMappingPhase::Key;
3800                            }
3801                        }
3802                    }
3803                }
3804                // Reset last_token_line after a comma — the next key can start
3805                // on the same line as the comma (or any subsequent line) without
3806                // triggering the multi-line implicit key error.
3807                last_token_line = cur_base_pos.line;
3808
3809                continue 'outer;
3810            }
3811
3812            // ----------------------------------------------------------------
3813            // Block scalar indicators forbidden in flow context
3814            // ----------------------------------------------------------------
3815            if ch == '|' || ch == '>' {
3816                let err_pos = abs_pos(cur_base_pos, cur_content, pos_in_line);
3817                self.failed = true;
3818                return StepResult::Yield(Err(Error {
3819                    pos: err_pos,
3820                    message: format!(
3821                        "block scalar indicator '{ch}' is not allowed inside a flow collection"
3822                    ),
3823                }));
3824            }
3825
3826            // ----------------------------------------------------------------
3827            // Block sequence entry indicator `-` forbidden in flow context.
3828            //
3829            // Per YAML 1.2 §7.4, block collections cannot appear inside flow
3830            // context.  A `-` followed by space, tab, or end-of-content is
3831            // the block-sequence entry indicator; a `-` followed by any other
3832            // non-separator character is a valid plain-scalar start (e.g. `-x`
3833            // or `-1` are legal plain scalars in flow context).
3834            // ----------------------------------------------------------------
3835            if ch == '-' {
3836                let after = &cur_content[pos_in_line + 1..];
3837                let next_c = after.chars().next();
3838                if next_c.is_none_or(|c| matches!(c, ' ' | '\t')) {
3839                    let err_pos = abs_pos(cur_base_pos, cur_content, pos_in_line);
3840                    self.failed = true;
3841                    return StepResult::Yield(Err(Error {
3842                        pos: err_pos,
3843                        message: "block sequence entry '-' is not allowed inside a flow collection"
3844                            .into(),
3845                    }));
3846                }
3847            }
3848
3849            // ----------------------------------------------------------------
3850            // Quoted scalars — delegate to existing lexer methods.
3851            //
3852            // Strategy: consume the current line, prepend a synthetic line
3853            // starting exactly at the quote character, call the method, then
3854            // re-sync `cur_content` / `cur_base_pos` from the buffer.
3855            // ----------------------------------------------------------------
3856            if ch == '\'' || ch == '"' {
3857                // `remaining` borrows from `cur_content` which borrows from `'input`.
3858                // We capture it before touching the lexer buffer.
3859                let remaining: &'input str = &cur_content[pos_in_line..];
3860                let cur_abs_pos = abs_pos(cur_base_pos, cur_content, pos_in_line);
3861
3862                // Consume the current line from the buffer and replace it with
3863                // a synthetic line that starts at the quote character.  The
3864                // quoted-scalar method will consume this synthetic line entirely,
3865                // including any content after the closing quote — so we must
3866                // reconstruct the tail from `remaining` and `span` below.
3867                self.lexer.consume_line();
3868                let synthetic = crate::lines::Line {
3869                    content: remaining,
3870                    offset: cur_abs_pos.byte_offset,
3871                    indent: cur_abs_pos.column,
3872                    break_type: crate::lines::BreakType::Eof,
3873                    pos: cur_abs_pos,
3874                };
3875                self.lexer.prepend_inline_line(synthetic);
3876
3877                // Call the appropriate quoted-scalar method.
3878                let result = if ch == '\'' {
3879                    self.lexer.try_consume_single_quoted(0)
3880                } else {
3881                    // Flow context: no block-indentation constraint on
3882                    // continuation lines of double-quoted scalars.
3883                    self.lexer.try_consume_double_quoted(None)
3884                };
3885
3886                let (value, span) = match result {
3887                    Ok(Some(vs)) => vs,
3888                    Ok(None) => {
3889                        self.failed = true;
3890                        return StepResult::Yield(Err(Error {
3891                            pos: cur_abs_pos,
3892                            message: "expected quoted scalar".into(),
3893                        }));
3894                    }
3895                    Err(e) => {
3896                        self.failed = true;
3897                        return StepResult::Yield(Err(e));
3898                    }
3899                };
3900
3901                let style = if ch == '\'' {
3902                    ScalarStyle::SingleQuoted
3903                } else {
3904                    ScalarStyle::DoubleQuoted
3905                };
3906                events.push((
3907                    Event::Scalar {
3908                        value,
3909                        style,
3910                        anchor: pending_flow_anchor.take(),
3911                        tag: pending_flow_tag.take(),
3912                    },
3913                    span,
3914                ));
3915
3916                // Reconstruct the tail after the closing quote so the flow
3917                // parser can continue with `,`, `]`, `}`, etc.
3918                //
3919                // For single-line scalars, the tail is in `remaining` at byte
3920                // offset `span.end.byte_offset - cur_abs_pos.byte_offset`.
3921                //
3922                // For multiline scalars, the lexer's continuation loop consumed
3923                // additional input lines; the tail on the closing-quote line is
3924                // stored in `self.lexer.pending_multiline_tail`.  Drain it here.
3925                if let Some((tail, tail_pos)) = self.lexer.pending_multiline_tail.take() {
3926                    if !tail.is_empty() {
3927                        let tail_syn = crate::lines::Line {
3928                            content: tail,
3929                            offset: tail_pos.byte_offset,
3930                            indent: tail_pos.column,
3931                            break_type: crate::lines::BreakType::Eof,
3932                            pos: tail_pos,
3933                        };
3934                        self.lexer.prepend_inline_line(tail_syn);
3935                    }
3936                } else {
3937                    // Single-line scalar: derive tail from `remaining`.
3938                    let consumed_bytes = span.end.byte_offset - cur_abs_pos.byte_offset;
3939                    let tail_in_remaining = remaining.get(consumed_bytes..).unwrap_or("");
3940                    if !tail_in_remaining.is_empty() {
3941                        let tail_syn = crate::lines::Line {
3942                            content: tail_in_remaining,
3943                            offset: span.end.byte_offset,
3944                            indent: span.end.column,
3945                            break_type: crate::lines::BreakType::Eof,
3946                            pos: span.end,
3947                        };
3948                        self.lexer.prepend_inline_line(tail_syn);
3949                    }
3950                }
3951
3952                // Re-sync from the buffer.
3953                (cur_content, cur_base_pos) = resync!();
3954                pos_in_line = 0;
3955                // Track where this quoted scalar (potential key) ended.
3956                last_token_line = cur_base_pos.line;
3957
3958                if cur_content.is_empty() && self.lexer.at_eof() && !flow_stack.is_empty() {
3959                    let err_pos = self.lexer.current_pos();
3960                    self.failed = true;
3961                    return StepResult::Yield(Err(Error {
3962                        pos: err_pos,
3963                        message: "unterminated flow collection: unexpected end of input".into(),
3964                    }));
3965                }
3966
3967                // Advance mapping phase for the emitted scalar; mark frame as having a value.
3968                if let Some(frame) = flow_stack.last_mut() {
3969                    match frame {
3970                        FlowFrame::Sequence {
3971                            has_value,
3972                            after_colon,
3973                            last_was_plain,
3974                        } => {
3975                            *has_value = true;
3976                            *after_colon = false;
3977                            *last_was_plain = false;
3978                        }
3979                        FlowFrame::Mapping {
3980                            phase,
3981                            has_value,
3982                            last_was_plain,
3983                        } => {
3984                            *has_value = true;
3985                            *last_was_plain = false;
3986                            *phase = match *phase {
3987                                FlowMappingPhase::Key => FlowMappingPhase::Value,
3988                                FlowMappingPhase::Value => FlowMappingPhase::Key,
3989                            };
3990                        }
3991                    }
3992                }
3993
3994                continue 'outer;
3995            }
3996
3997            // ----------------------------------------------------------------
3998            // Explicit key indicator `?` in flow mappings and sequences
3999            // ----------------------------------------------------------------
4000            if ch == '?' {
4001                let next_ch = cur_content[pos_in_line + 1..].chars().next();
4002                if next_ch.is_none_or(|c| matches!(c, ' ' | '\t' | '\n' | '\r')) {
4003                    // `?` followed by whitespace/EOL: explicit key indicator.
4004                    // In a flow sequence, remember this so the DK4H single-line
4005                    // check is suppressed for the corresponding `:` separator.
4006                    if matches!(flow_stack.last(), Some(FlowFrame::Sequence { .. })) {
4007                        explicit_key_in_seq = true;
4008                    }
4009                    pos_in_line += 1;
4010                    continue 'outer;
4011                }
4012                // `?` not followed by whitespace — treat as plain scalar start.
4013            }
4014
4015            // ----------------------------------------------------------------
4016            // `:` value separator in flow mappings
4017            // ----------------------------------------------------------------
4018            if ch == ':' {
4019                let next_ch = cur_content[pos_in_line + 1..].chars().next();
4020                // `:` is a value separator when followed by whitespace/delimiter
4021                // (standard case) OR when in a flow sequence with a synthetic
4022                // current line (adjacent `:` from JSON-like key — YAML 1.2
4023                // §7.4.2).  A synthetic line means the `:` is on the same
4024                // physical line as the preceding quoted scalar / collection.
4025                let is_standard_sep =
4026                    next_ch.is_none_or(|c| matches!(c, ' ' | '\t' | ',' | ']' | '}' | '\n' | '\r'));
4027                let is_adjacent_json_sep = !is_standard_sep
4028                    && matches!(
4029                        flow_stack.last(),
4030                        Some(FlowFrame::Sequence {
4031                            has_value: true,
4032                            ..
4033                        })
4034                    )
4035                    && self.lexer.is_next_line_synthetic();
4036                let is_value_sep = is_standard_sep || is_adjacent_json_sep;
4037                if is_value_sep {
4038                    // Multi-line implicit single-pair mapping key check (YAML 1.2 §7.4.1):
4039                    // inside a flow sequence `[...]`, a single-pair mapping entry's key must
4040                    // be on the same line as the `:` separator.  (Flow mappings `{...}` allow
4041                    // multi-line implicit keys — see YAML 1.2 §7.4.2.)
4042                    // Exception: when a `?` explicit-key indicator was seen in this sequence
4043                    // (`explicit_key_in_seq`), the key may span multiple lines.
4044                    let in_sequence = matches!(flow_stack.last(), Some(FlowFrame::Sequence { .. }));
4045                    if in_sequence && cur_base_pos.line != last_token_line && !explicit_key_in_seq {
4046                        let colon_pos = abs_pos(cur_base_pos, cur_content, pos_in_line);
4047                        self.failed = true;
4048                        return StepResult::Yield(Err(Error {
4049                            pos: colon_pos,
4050                            message: "implicit flow mapping key must be on a single line".into(),
4051                        }));
4052                    }
4053                    explicit_key_in_seq = false;
4054                    if let Some(frame) = flow_stack.last_mut() {
4055                        match frame {
4056                            FlowFrame::Mapping {
4057                                phase,
4058                                has_value,
4059                                last_was_plain,
4060                            } => {
4061                                *last_was_plain = false;
4062                                if *phase == FlowMappingPhase::Key {
4063                                    // If a tag or anchor is pending but no key scalar was
4064                                    // emitted yet, the `:` terminates an implicit empty key.
4065                                    // Emit the empty key scalar now so the pending properties
4066                                    // are attached to the key, not carried to the value.
4067                                    if pending_flow_tag.is_some() || pending_flow_anchor.is_some() {
4068                                        let key_pos =
4069                                            abs_pos(cur_base_pos, cur_content, pos_in_line);
4070                                        events.push((
4071                                            Event::Scalar {
4072                                                value: Cow::Borrowed(""),
4073                                                style: ScalarStyle::Plain,
4074                                                anchor: pending_flow_anchor.take(),
4075                                                tag: pending_flow_tag.take(),
4076                                            },
4077                                            zero_span(key_pos),
4078                                        ));
4079                                        *has_value = true;
4080                                    }
4081                                    *phase = FlowMappingPhase::Value;
4082                                }
4083                            }
4084                            FlowFrame::Sequence {
4085                                after_colon,
4086                                last_was_plain,
4087                                ..
4088                            } => {
4089                                // `:` as value separator in a sequence means we are
4090                                // entering the value part of a single-pair implicit
4091                                // mapping.  Mark `after_colon` so the next scalar or
4092                                // collection is not rejected for missing a comma.
4093                                *after_colon = true;
4094                                // Reset last_was_plain so the value scalar on the next
4095                                // line is not appended to the key via multi-line
4096                                // plain-scalar continuation logic.
4097                                *last_was_plain = false;
4098                            }
4099                        }
4100                    }
4101                    pos_in_line += 1;
4102                    continue 'outer;
4103                }
4104                // `:` not followed by separator — treat as plain scalar char.
4105            }
4106
4107            // ----------------------------------------------------------------
4108            // Tag `!tag`, `!!tag`, `!<uri>`, or `!` in flow context
4109            // ----------------------------------------------------------------
4110            if ch == '!' {
4111                let bang_pos = abs_pos(cur_base_pos, cur_content, pos_in_line);
4112                let after_bang = &cur_content[pos_in_line + 1..];
4113                let tag_start = &cur_content[pos_in_line..];
4114                match scan_tag(after_bang, tag_start, bang_pos) {
4115                    Err(e) => {
4116                        self.failed = true;
4117                        return StepResult::Yield(Err(e));
4118                    }
4119                    Ok((tag_slice, advance_past_bang)) => {
4120                        // Total bytes: 1 (`!`) + advance_past_bang.
4121                        // `!<URI>`: advance_past_bang = 1 + uri.len() + 1
4122                        // `!!suffix`: advance_past_bang = 1 + suffix.len()
4123                        // `!suffix`: advance_past_bang = suffix.len()
4124                        // `!` alone: advance_past_bang = 0
4125                        if pending_flow_tag.is_some() {
4126                            self.failed = true;
4127                            return StepResult::Yield(Err(Error {
4128                                pos: bang_pos,
4129                                message: "a node may not have more than one tag".into(),
4130                            }));
4131                        }
4132                        // Resolve tag handle against directive scope at scan time.
4133                        let resolved_flow_tag =
4134                            match self.directive_scope.resolve_tag(tag_slice, bang_pos) {
4135                                Ok(t) => t,
4136                                Err(e) => {
4137                                    self.failed = true;
4138                                    return StepResult::Yield(Err(e));
4139                                }
4140                            };
4141                        pending_flow_tag = Some(resolved_flow_tag);
4142                        pos_in_line += 1 + advance_past_bang;
4143                        // Skip any whitespace after the tag.
4144                        while pos_in_line < cur_content.len() {
4145                            match cur_content[pos_in_line..].chars().next() {
4146                                Some(c) if c == ' ' || c == '\t' => pos_in_line += 1,
4147                                _ => break,
4148                            }
4149                        }
4150                        continue 'outer;
4151                    }
4152                }
4153            }
4154
4155            // ----------------------------------------------------------------
4156            // Anchor `&name` in flow context
4157            // ----------------------------------------------------------------
4158            if ch == '&' {
4159                let after_amp = &cur_content[pos_in_line + 1..];
4160                let amp_pos = abs_pos(cur_base_pos, cur_content, pos_in_line);
4161                match scan_anchor_name(after_amp, amp_pos) {
4162                    Err(e) => {
4163                        self.failed = true;
4164                        return StepResult::Yield(Err(e));
4165                    }
4166                    Ok(name) => {
4167                        // Two anchors on the same flow node are an error.
4168                        if pending_flow_anchor.is_some() {
4169                            let amp_pos2 = abs_pos(cur_base_pos, cur_content, pos_in_line);
4170                            self.failed = true;
4171                            return StepResult::Yield(Err(Error {
4172                                pos: amp_pos2,
4173                                message: "a node may not have more than one anchor".into(),
4174                            }));
4175                        }
4176                        pending_flow_anchor = Some(name);
4177                        pos_in_line += 1 + name.len();
4178                        // Skip any whitespace after the anchor name.
4179                        while pos_in_line < cur_content.len() {
4180                            match cur_content[pos_in_line..].chars().next() {
4181                                Some(c) if c == ' ' || c == '\t' => pos_in_line += 1,
4182                                _ => break,
4183                            }
4184                        }
4185                        continue 'outer;
4186                    }
4187                }
4188            }
4189
4190            // ----------------------------------------------------------------
4191            // Alias `*name` in flow context
4192            // ----------------------------------------------------------------
4193            if ch == '*' {
4194                let after_star = &cur_content[pos_in_line + 1..];
4195                let star_pos = abs_pos(cur_base_pos, cur_content, pos_in_line);
4196                // YAML 1.2 §7.1: alias nodes cannot have properties (anchor or tag).
4197                if pending_flow_tag.is_some() {
4198                    self.failed = true;
4199                    return StepResult::Yield(Err(Error {
4200                        pos: star_pos,
4201                        message: "alias node cannot have a tag property".into(),
4202                    }));
4203                }
4204                if pending_flow_anchor.is_some() {
4205                    self.failed = true;
4206                    return StepResult::Yield(Err(Error {
4207                        pos: star_pos,
4208                        message: "alias node cannot have an anchor property".into(),
4209                    }));
4210                }
4211                match scan_anchor_name(after_star, star_pos) {
4212                    Err(e) => {
4213                        self.failed = true;
4214                        return StepResult::Yield(Err(e));
4215                    }
4216                    Ok(name) => {
4217                        let alias_end = Pos {
4218                            byte_offset: star_pos.byte_offset + 1 + name.len(),
4219                            char_offset: star_pos.char_offset + 1 + name.chars().count(),
4220                            line: star_pos.line,
4221                            column: star_pos.column + 1 + name.chars().count(),
4222                        };
4223                        let alias_span = Span {
4224                            start: star_pos,
4225                            end: alias_end,
4226                        };
4227                        events.push((Event::Alias { name }, alias_span));
4228                        pos_in_line += 1 + name.len();
4229                        // Advance mapping phase; mark frame as having a value.
4230                        if let Some(frame) = flow_stack.last_mut() {
4231                            match frame {
4232                                FlowFrame::Sequence {
4233                                    has_value,
4234                                    after_colon,
4235                                    last_was_plain,
4236                                } => {
4237                                    *has_value = true;
4238                                    *after_colon = false;
4239                                    *last_was_plain = false;
4240                                }
4241                                FlowFrame::Mapping {
4242                                    phase,
4243                                    has_value,
4244                                    last_was_plain,
4245                                } => {
4246                                    *has_value = true;
4247                                    *last_was_plain = false;
4248                                    *phase = match *phase {
4249                                        FlowMappingPhase::Key => FlowMappingPhase::Value,
4250                                        FlowMappingPhase::Value => FlowMappingPhase::Key,
4251                                    };
4252                                }
4253                            }
4254                        }
4255                        continue 'outer;
4256                    }
4257                }
4258            }
4259
4260            // ----------------------------------------------------------------
4261            // Multi-line plain scalar continuation in flow context
4262            //
4263            // A plain scalar may span multiple lines (YAML §7.3.3).  When the
4264            // previous emitted token was a plain scalar (`last_was_plain`) and
4265            // the current character is a valid `ns-plain-char` (i.e. it can
4266            // appear within a plain scalar body, even if it cannot *start* one),
4267            // extend the in-progress scalar rather than treating the character
4268            // as the start of a new token.
4269            //
4270            // `ns-plain-char` in flow context: any `ns-char` that is not `:` or
4271            // `#`, plus `:` followed by ns-plain-safe, plus `#` not preceded by
4272            // whitespace.  At the start of a continuation line all leading
4273            // whitespace has been consumed, so `#` at position 0 here would be
4274            // `#` after whitespace — a comment start, not a continuation char.
4275            // ----------------------------------------------------------------
4276            {
4277                // For flow MAPPINGS: a plain scalar may continue a key only when
4278                // the phase is currently Value — meaning the previous scalar was
4279                // a KEY (Key→Value phase advance was done when emitting it).  A
4280                // VALUE scalar (phase Value→Key) must NOT continue: the next line
4281                // is a new key that requires a preceding comma.
4282                // For flow SEQUENCES: `last_was_plain` alone is enough (single-pair
4283                // implicit mapping keys can span lines, and regular sequence items
4284                // can also continue, though commas terminate them).
4285                let frame_last_was_plain = matches!(
4286                    flow_stack.last(),
4287                    Some(
4288                        FlowFrame::Mapping {
4289                            last_was_plain: true,
4290                            phase: FlowMappingPhase::Value,
4291                            ..
4292                        } | FlowFrame::Sequence {
4293                            last_was_plain: true,
4294                            ..
4295                        }
4296                    )
4297                );
4298                // `ns-plain-char` check: ch must not be a flow terminator, `:` (alone),
4299                // or `#` (comment start after whitespace, which is the only `#` we can
4300                // see here since whitespace was consumed).
4301                let is_ns_plain_char_continuation = frame_last_was_plain
4302                    && !matches!(ch, ',' | '[' | ']' | '{' | '}' | '#')
4303                    && (ch != ':' || {
4304                        let after = &cur_content[pos_in_line + 1..];
4305                        let next_c = after.chars().next();
4306                        // `:` is a valid continuation char only when NOT followed by
4307                        // a separator (space, tab, flow indicator, or end-of-line).
4308                        next_c.is_some_and(|nc| {
4309                            !matches!(nc, ' ' | '\t' | ',' | '[' | ']' | '{' | '}')
4310                        })
4311                    });
4312
4313                if is_ns_plain_char_continuation {
4314                    let slice = &cur_content[pos_in_line..];
4315                    let scanned = scan_plain_line_flow(slice);
4316                    if !scanned.is_empty() {
4317                        // Extend the most-recently-emitted scalar event with a
4318                        // line-fold (space) and the continuation content.
4319                        if let Some((
4320                            Event::Scalar {
4321                                value,
4322                                style: ScalarStyle::Plain,
4323                                ..
4324                            },
4325                            _,
4326                        )) = events.last_mut()
4327                        {
4328                            let extended = format!("{value} {scanned}");
4329                            *value = Cow::Owned(extended);
4330                        }
4331                        pos_in_line += scanned.len();
4332                        // Update last_token_line to this line so the DK4H
4333                        // multi-line implicit-key check remains anchored to the
4334                        // last real token (the continuation content).
4335                        last_token_line = cur_base_pos.line;
4336                        // The continuation may itself end at EOL, leaving the scalar
4337                        // still incomplete.  Keep `last_was_plain` true and, for
4338                        // mappings, revert the phase back to Key so that the `: `
4339                        // separator is still recognised.
4340                        if let Some(frame) = flow_stack.last_mut() {
4341                            match frame {
4342                                FlowFrame::Mapping {
4343                                    phase,
4344                                    last_was_plain,
4345                                    ..
4346                                } => {
4347                                    // Undo the premature Key→Value advance: the key is not
4348                                    // yet complete until `: ` is seen.
4349                                    *phase = FlowMappingPhase::Key;
4350                                    *last_was_plain = true;
4351                                }
4352                                FlowFrame::Sequence { last_was_plain, .. } => {
4353                                    *last_was_plain = true;
4354                                }
4355                            }
4356                        }
4357                        continue 'outer;
4358                    }
4359                }
4360            }
4361
4362            // ----------------------------------------------------------------
4363            // Plain scalar in flow context
4364            // ----------------------------------------------------------------
4365            {
4366                // Indicator characters that cannot start a plain scalar in flow.
4367                let is_plain_first = if matches!(
4368                    ch,
4369                    ',' | '['
4370                        | ']'
4371                        | '{'
4372                        | '}'
4373                        | '#'
4374                        | '&'
4375                        | '*'
4376                        | '!'
4377                        | '|'
4378                        | '>'
4379                        | '\''
4380                        | '"'
4381                        | '%'
4382                        | '@'
4383                        | '`'
4384                ) {
4385                    false
4386                } else if matches!(ch, '?' | ':' | '-') {
4387                    // These start a plain scalar only if followed by a safe char.
4388                    let after = &cur_content[pos_in_line + ch.len_utf8()..];
4389                    let next_c = after.chars().next();
4390                    next_c.is_some_and(|nc| !matches!(nc, ' ' | '\t' | ',' | '[' | ']' | '{' | '}'))
4391                } else {
4392                    true
4393                };
4394
4395                if is_plain_first {
4396                    // Missing-comma check: in a flow collection with has_value=true,
4397                    // a new plain scalar is starting without a preceding comma —
4398                    // YAML 1.2 §7.4 requires commas between entries.
4399                    match flow_stack.last() {
4400                        Some(FlowFrame::Mapping {
4401                            phase: FlowMappingPhase::Key,
4402                            has_value: true,
4403                            ..
4404                        }) => {
4405                            let err_pos = abs_pos(cur_base_pos, cur_content, pos_in_line);
4406                            self.failed = true;
4407                            return StepResult::Yield(Err(Error {
4408                                pos: err_pos,
4409                                message: "missing comma between flow mapping entries".into(),
4410                            }));
4411                        }
4412                        Some(FlowFrame::Sequence {
4413                            has_value: true,
4414                            after_colon: false,
4415                            last_was_plain: false,
4416                        }) => {
4417                            let err_pos = abs_pos(cur_base_pos, cur_content, pos_in_line);
4418                            self.failed = true;
4419                            return StepResult::Yield(Err(Error {
4420                                pos: err_pos,
4421                                message: "missing comma between flow sequence entries".into(),
4422                            }));
4423                        }
4424                        _ => {}
4425                    }
4426                    let slice = &cur_content[pos_in_line..];
4427                    let scanned = scan_plain_line_flow(slice);
4428                    if !scanned.is_empty() {
4429                        let scalar_start = abs_pos(cur_base_pos, cur_content, pos_in_line);
4430                        let scalar_end =
4431                            abs_pos(cur_base_pos, cur_content, pos_in_line + scanned.len());
4432                        let scalar_span = Span {
4433                            start: scalar_start,
4434                            end: scalar_end,
4435                        };
4436
4437                        events.push((
4438                            Event::Scalar {
4439                                value: Cow::Borrowed(scanned),
4440                                style: ScalarStyle::Plain,
4441                                anchor: pending_flow_anchor.take(),
4442                                tag: pending_flow_tag.take(),
4443                            },
4444                            scalar_span,
4445                        ));
4446                        pos_in_line += scanned.len();
4447                        // Track where this scalar (potential key) ended for the
4448                        // multi-line implicit key check (DK4H).
4449                        last_token_line = cur_base_pos.line;
4450
4451                        // Advance mapping phase; mark frame as having a value.
4452                        if let Some(frame) = flow_stack.last_mut() {
4453                            match frame {
4454                                FlowFrame::Sequence {
4455                                    has_value,
4456                                    after_colon,
4457                                    last_was_plain,
4458                                } => {
4459                                    *has_value = true;
4460                                    *after_colon = false;
4461                                    *last_was_plain = true; // plain scalars may continue
4462                                }
4463                                FlowFrame::Mapping {
4464                                    phase,
4465                                    has_value,
4466                                    last_was_plain,
4467                                } => {
4468                                    *has_value = true;
4469                                    *last_was_plain = true; // plain scalars may continue on next line
4470                                    *phase = match *phase {
4471                                        FlowMappingPhase::Key => FlowMappingPhase::Value,
4472                                        FlowMappingPhase::Value => FlowMappingPhase::Key,
4473                                    };
4474                                }
4475                            }
4476                        }
4477                        continue 'outer;
4478                    }
4479                }
4480
4481                // Reserved indicators — task 19 will handle directives.
4482                // `!` (tags), `&`/`*` (anchors/aliases) are handled above.
4483                // Silently skipping remaining reserved indicators would mangle
4484                // YAML structure, so we error early here.
4485                if matches!(ch, '%' | '@' | '`') {
4486                    let err_pos = abs_pos(cur_base_pos, cur_content, pos_in_line);
4487                    self.failed = true;
4488                    return StepResult::Yield(Err(Error {
4489                        pos: err_pos,
4490                        message: format!(
4491                            "indicator '{ch}' inside flow collection is not yet supported"
4492                        ),
4493                    }));
4494                }
4495
4496                // Any other character that is not a plain-scalar start and is
4497                // not an indicator handled above (e.g. C0 control characters,
4498                // DEL, C1 controls, surrogates) is invalid here. Error rather
4499                // than panicking — this is user-supplied input.
4500                let err_pos = abs_pos(cur_base_pos, cur_content, pos_in_line);
4501                self.failed = true;
4502                return StepResult::Yield(Err(Error {
4503                    pos: err_pos,
4504                    message: format!("invalid character {ch:?} inside flow collection"),
4505                }));
4506            }
4507        }
4508
4509        // Tick the parent block mapping phase (if any) after completing a flow
4510        // collection that was a key or value in a block mapping.
4511        self.tick_mapping_phase_after_scalar();
4512
4513        // Push all accumulated events to the queue.
4514        self.queue.extend(events);
4515        StepResult::Continue
4516    }
4517
4518    /// Tick the key/value phase of the innermost open mapping after emitting a
4519    /// scalar event.
4520    ///
4521    /// - If the mapping was in `Key` phase, it flips to `Value`.
4522    /// - If the mapping was in `Value` phase (or there is no open mapping), it
4523    ///   flips back to `Key`.
4524    fn tick_mapping_phase_after_scalar(&mut self) {
4525        // A scalar was consumed — clear any pending explicit-key context.
4526        self.explicit_key_pending = false;
4527        // Find the innermost mapping entry on the stack.
4528        for entry in self.coll_stack.iter_mut().rev() {
4529            if let CollectionEntry::Mapping(_, phase, has_had_value) = entry {
4530                *phase = match *phase {
4531                    MappingPhase::Key => {
4532                        *has_had_value = true;
4533                        MappingPhase::Value
4534                    }
4535                    MappingPhase::Value => MappingPhase::Key,
4536                };
4537                return;
4538            }
4539            // Sequences between this mapping and the top don't count.
4540            if let CollectionEntry::Sequence(_, has_had_item) = entry {
4541                // A scalar here is an item in a sequence, not a mapping value.
4542                // Mark the sequence as having a completed item.
4543                *has_had_item = true;
4544                return;
4545            }
4546        }
4547    }
4548}
4549
4550impl<'input> Iterator for EventIter<'input> {
4551    type Item = Result<(Event<'input>, Span), Error>;
4552
4553    fn next(&mut self) -> Option<Self::Item> {
4554        // After an error, stop immediately — prevent infinite loops on the
4555        // same problematic input (e.g. depth-limit on a prepended synthetic line).
4556        if self.failed {
4557            return None;
4558        }
4559
4560        // Iterative dispatch — avoids unbounded recursion on large bare docs.
4561        loop {
4562            // Drain the event queue first.
4563            if let Some(event) = self.queue.pop_front() {
4564                return Some(Ok(event));
4565            }
4566
4567            let step = match self.state {
4568                IterState::BeforeStream => {
4569                    self.state = IterState::BetweenDocs;
4570                    return Some(Ok((Event::StreamStart, zero_span(Pos::ORIGIN))));
4571                }
4572                IterState::BetweenDocs => self.step_between_docs(),
4573                IterState::InDocument => self.step_in_document(),
4574                IterState::Done => return None,
4575            };
4576
4577            match step {
4578                StepResult::Continue => {}
4579                StepResult::Yield(result) => return Some(result),
4580            }
4581        }
4582    }
4583}
4584
4585// ---------------------------------------------------------------------------
4586// Unit tests for private helpers (Gap 2: peek/consume divergence guard)
4587// ---------------------------------------------------------------------------
4588
4589#[cfg(test)]
4590mod tests {
4591    use super::{find_value_indicator_offset, is_implicit_mapping_line};
4592
4593    /// Every line that `is_implicit_mapping_line` accepts must also produce
4594    /// `Some` from `find_value_indicator_offset`.  This is the contract
4595    /// enforced by the `unreachable!` at the `consume_mapping_entry` call site —
4596    /// if the two ever diverge a future change will trigger a runtime panic
4597    /// under `#[deny(clippy::panic)]`.
4598    ///
4599    /// The table covers: trailing colon, colon-space, colon-tab, colon in
4600    /// quoted spans (must be accepted by peek but offset still returned),
4601    /// multi-byte characters before the colon, and lines that should not
4602    /// be accepted.
4603    #[test]
4604    fn find_value_indicator_agrees_with_is_implicit_mapping_line() {
4605        let accepted = [
4606            "key:",
4607            "key: value",
4608            "key:\t",
4609            "key:  multiple spaces",
4610            "\"quoted key\": val",
4611            "'single quoted': val",
4612            "key with spaces: val",
4613            "k:",
4614            "longer-key-with-dashes: v",
4615            "unicode_\u{00e9}: v",
4616        ];
4617        for line in accepted {
4618            assert!(
4619                is_implicit_mapping_line(line),
4620                "expected is_implicit_mapping_line to accept: {line:?}"
4621            );
4622            assert!(
4623                find_value_indicator_offset(line).is_some(),
4624                "find_value_indicator_offset must return Some for accepted line: {line:?}"
4625            );
4626        }
4627
4628        let rejected = [
4629            "plain scalar",
4630            "http://example.com",
4631            "no colon here",
4632            "# comment: not a key",
4633            "",
4634        ];
4635        for line in rejected {
4636            assert!(
4637                !is_implicit_mapping_line(line),
4638                "expected is_implicit_mapping_line to reject: {line:?}"
4639            );
4640            assert!(
4641                find_value_indicator_offset(line).is_none(),
4642                "find_value_indicator_offset must return None for rejected line: {line:?}"
4643            );
4644        }
4645    }
4646}
rlsp_yaml_parser/lib.rs

rlsp_yaml_parser/
lib.rs