rust_yaml/parser/mod.rs
1//! YAML parser for converting tokens to events
2
3use crate::{
4 BasicScanner, Error, Limits, Position, Result, Scanner, Token, TokenType, error::ErrorContext,
5 tag::TagResolver,
6};
7
8pub mod events;
9pub mod streaming;
10// pub mod optimizations; // Temporarily disabled
11pub use events::*;
12pub use streaming::*;
13// pub use optimizations::*;
14
15/// Trait for YAML parsers that convert token streams to events
16pub trait Parser {
17 /// Check if there are more events available
18 fn check_event(&self) -> bool;
19
20 /// Peek at the next event without consuming it
21 fn peek_event(&self) -> Result<Option<&Event>>;
22
23 /// Get the next event, consuming it
24 fn get_event(&mut self) -> Result<Option<Event>>;
25
26 /// Reset the parser state
27 fn reset(&mut self);
28
29 /// Get the current position in the input
30 fn position(&self) -> Position;
31}
32
33/// Walks back through `events` looking for an unclosed `DocumentStart`
34/// (i.e. one without a matching `DocumentEnd` after it). Returns true if
35/// the parser is still inside a document.
36fn has_open_document(events: &[Event]) -> bool {
37 for ev in events.iter().rev() {
38 match &ev.event_type {
39 EventType::DocumentEnd { .. } => return false,
40 EventType::DocumentStart { .. } => return true,
41 _ => {}
42 }
43 }
44 false
45}
46
47/// Emit `MappingEnd` / `SequenceEnd` events to close any unbalanced
48/// collection starts in `events`. Called before emitting a `DocumentEnd`
49/// when an outer construct (e.g. a new `---` marker) forces the previous
50/// document closed without going through the usual indent-driven
51/// `BlockEnd` token path. Also synthesises implicit empty scalars for
52/// mappings that have an odd child-event count (i.e. a key without a
53/// value before the close).
54/// Return true when there is at least one *flow* collection still open
55/// (a `SequenceStart` / `MappingStart` with `flow_style=true` without a
56/// matching `…End` afterwards). Used at end-of-stream to enforce §7.4.
57fn has_unclosed_flow_collection(events: &[Event]) -> bool {
58 let mut depth: i32 = 0;
59 for ev in events.iter() {
60 match &ev.event_type {
61 EventType::SequenceStart {
62 flow_style: true, ..
63 }
64 | EventType::MappingStart {
65 flow_style: true, ..
66 } => depth += 1,
67 EventType::SequenceEnd | EventType::MappingEnd if depth > 0 => {
68 depth -= 1;
69 }
70 _ => {}
71 }
72 }
73 depth > 0
74}
75
76/// Walk `events` to detect a document that already contains a closed
77/// root-level node. Returns true when the second root node arrives and
78/// the existing event stack has no unmatched MapStart/SeqStart.
79fn second_root_node_present(events: &[Event]) -> bool {
80 let mut after_doc_start = false;
81 let mut has_root_node = false;
82 let mut depth = 0i32;
83 for e in events.iter() {
84 match &e.event_type {
85 EventType::DocumentStart { .. } => {
86 after_doc_start = true;
87 has_root_node = false;
88 depth = 0;
89 }
90 EventType::DocumentEnd { .. } => after_doc_start = false,
91 EventType::MappingStart { .. } | EventType::SequenceStart { .. } => depth += 1,
92 EventType::MappingEnd | EventType::SequenceEnd => {
93 depth -= 1;
94 if depth == 0 {
95 has_root_node = true;
96 }
97 }
98 EventType::Scalar { .. } | EventType::Alias { .. } if depth == 0 => {
99 has_root_node = true;
100 }
101 _ => {}
102 }
103 }
104 after_doc_start && has_root_node && depth == 0
105}
106
107/// Return true when the innermost still-open mapping has an odd number
108/// of children — i.e. a key has been emitted but its value has not.
109/// Used to decide when to synthesise an implicit empty scalar.
110fn innermost_mapping_has_odd_children(events: &[Event]) -> bool {
111 let mut stack: Vec<(&'static str, usize)> = Vec::new();
112 for ev in events.iter() {
113 match &ev.event_type {
114 EventType::DocumentStart { .. } | EventType::DocumentEnd { .. } => {
115 stack.clear();
116 }
117 EventType::MappingStart { .. } => stack.push(("map", 0)),
118 EventType::SequenceStart { .. } => stack.push(("seq", 0)),
119 EventType::MappingEnd | EventType::SequenceEnd => {
120 stack.pop();
121 if let Some(parent) = stack.last_mut() {
122 parent.1 += 1;
123 }
124 }
125 EventType::Scalar { .. } | EventType::Alias { .. } => {
126 if let Some(parent) = stack.last_mut() {
127 parent.1 += 1;
128 }
129 }
130 _ => {}
131 }
132 }
133 matches!(stack.last(), Some(("map", n)) if n % 2 == 1)
134}
135
136fn close_open_collections(events: &mut Vec<Event>, pos: Position) {
137 // Each entry: (kind, children_at_this_depth) where `kind` is "map"
138 // or "seq". `children` counts top-level node events emitted inside
139 // this collection (Scalar, Alias, or a closed nested collection).
140 let mut stack: Vec<(&'static str, usize)> = Vec::new();
141 for ev in events.iter() {
142 match &ev.event_type {
143 EventType::DocumentStart { .. } | EventType::DocumentEnd { .. } => {
144 stack.clear();
145 }
146 EventType::MappingStart { .. } => stack.push(("map", 0)),
147 EventType::SequenceStart { .. } => stack.push(("seq", 0)),
148 EventType::MappingEnd | EventType::SequenceEnd => {
149 stack.pop();
150 if let Some(parent) = stack.last_mut() {
151 parent.1 += 1;
152 }
153 }
154 EventType::Scalar { .. } | EventType::Alias { .. } => {
155 if let Some(parent) = stack.last_mut() {
156 parent.1 += 1;
157 }
158 }
159 _ => {}
160 }
161 }
162 while let Some((kind, children)) = stack.pop() {
163 if kind == "map" && children % 2 == 1 {
164 // Odd child count → last key has no value yet. Spec says
165 // emit implicit empty scalar (YAML 1.2 §6.9.1).
166 events.push(Event::scalar(
167 pos,
168 None,
169 None,
170 String::new(),
171 true,
172 false,
173 ScalarStyle::Plain,
174 ));
175 }
176 match kind {
177 "map" => events.push(Event::mapping_end(pos)),
178 "seq" => events.push(Event::sequence_end(pos)),
179 _ => {}
180 }
181 }
182}
183
184/// Basic parser implementation that converts tokens to events
185#[derive(Debug)]
186pub struct BasicParser {
187 scanner: BasicScanner,
188 events: Vec<Event>,
189 event_index: usize,
190 state: ParserState,
191 state_stack: Vec<ParserState>,
192 position: Position,
193 pending_anchor: Option<String>,
194 /// Line where `pending_anchor` was set. Used to distinguish a
195 /// "freestanding" anchor (alone on its own line — belongs to the
196 /// upcoming collection) from an "inline" anchor (same line as the
197 /// next key — belongs to that key). yaml-test-suite 6BFJ, 9KAX.
198 pending_anchor_line: Option<usize>,
199 /// Line of the most recent `?` Key marker. Used to detect when
200 /// an explicit-key construct has an inline single-pair mapping as
201 /// its key (yaml-test-suite M2N8/00, M2N8/01, V9D5).
202 last_key_marker_line: Option<usize>,
203 /// Column of the most recent `?` Key marker. Used in V9D5: when
204 /// a `:` arrives at the same column as the most recent `?` on
205 /// a later line, it's the explicit value separator — close any
206 /// inline-wrapped inner mapping first.
207 last_key_marker_column: Option<usize>,
208 /// Set when an explicit value separator just closed an inline-
209 /// wrapped key. The next `<scalar>:<scalar>` on the same line
210 /// should also be wrapped in an inner mapping (V9D5's value side).
211 just_closed_inline_wrap: bool,
212 /// Column of an open inline-wrap mapping (V9D5). Used to detect
213 /// the matching explicit-value separator and close it.
214 inline_wrap_column: Option<usize>,
215 pending_tag: Option<String>,
216 /// Same idea as `pending_anchor_line` but for tags. Used to detect
217 /// a freestanding tag in block-sequence context that should be
218 /// flushed as the previous item's empty value rather than carried
219 /// onto the next item (yaml-test-suite FH7J).
220 pending_tag_line: Option<usize>,
221 last_token_type: Option<TokenType>,
222 scanning_error: Option<Error>,
223 yaml_version: Option<(u8, u8)>,
224 tag_directives: Vec<(String, String)>,
225 tag_resolver: TagResolver,
226 /// Anchor names that have been defined so far in the stream. Used to
227 /// validate that aliases (`*name`) reference a known anchor (YAML 1.2
228 /// §6.9.2). Forward references are forbidden, and we never reset this
229 /// set — once defined, an anchor remains referenceable for the rest of
230 /// the parse, matching common loader semantics.
231 defined_anchors: std::collections::HashSet<String>,
232 /// Line of the most recent `:` Value token. Used by the
233 /// BlockMappingValue heuristic to tell apart "same-line value
234 /// scalar" (6M2F) from "next-line sibling key" (6KGN).
235 last_value_token_line: Option<usize>,
236 /// True while we're holding an explicit `?` key that has not yet
237 /// received its `:`. Used at end-of-stream to distinguish a
238 /// spec-legal `? key` with implicit empty value from a missing-`:`
239 /// bare scalar (yaml-test-suite 7MNF).
240 explicit_key_pending: bool,
241 /// Counts implicit single-pair flow mappings still open. A `,` or
242 /// `]` while this is > 0 closes the innermost implicit mapping
243 /// before continuing the outer flow sequence (§7.5).
244 implicit_flow_pair_depth: usize,
245}
246
247/// Parser state for tracking context
248#[derive(Debug, Clone, Copy, PartialEq)]
249#[allow(dead_code)]
250enum ParserState {
251 StreamStart,
252 StreamEnd,
253 ImplicitDocumentStart,
254 DocumentStart,
255 DocumentContent,
256 DocumentEnd,
257 BlockNode,
258 BlockMapping,
259 BlockMappingKey,
260 BlockMappingValue,
261 BlockSequence,
262 FlowMapping,
263 FlowMappingKey,
264 FlowMappingValue,
265 FlowSequence,
266 BlockEnd,
267}
268
269impl BasicParser {
270 /// Create a new streaming parser (lazy parsing)
271 pub fn new(input: String) -> Self {
272 Self::with_limits(input, Limits::default())
273 }
274
275 /// Create a new streaming parser with custom limits
276 pub fn with_limits(input: String, limits: Limits) -> Self {
277 let scanner = BasicScanner::with_limits(input, limits);
278 let position = scanner.position();
279
280 Self {
281 scanner,
282 events: Vec::new(),
283 event_index: 0,
284 state: ParserState::StreamStart,
285 state_stack: Vec::new(),
286 position,
287 pending_anchor: None,
288 pending_anchor_line: None,
289 last_key_marker_line: None,
290 last_key_marker_column: None,
291 just_closed_inline_wrap: false,
292 inline_wrap_column: None,
293 pending_tag: None,
294 pending_tag_line: None,
295 last_token_type: None,
296 scanning_error: None,
297 yaml_version: None,
298 tag_directives: Vec::new(),
299 tag_resolver: TagResolver::new(),
300 defined_anchors: std::collections::HashSet::new(),
301 last_value_token_line: None,
302 explicit_key_pending: false,
303 implicit_flow_pair_depth: 0,
304 }
305 }
306
307 /// Create a new parser with eager parsing (for compatibility)
308 pub fn new_eager(input: String) -> Self {
309 Self::new_eager_with_limits(input, Limits::default())
310 }
311
312 /// Create a new parser with eager parsing and custom limits
313 pub fn new_eager_with_limits(input: String, limits: Limits) -> Self {
314 let mut scanner = BasicScanner::new_eager_with_limits(input, limits);
315 let position = scanner.position();
316
317 // Check if there were any scanning errors and store them
318 let scanning_error = scanner.take_scanning_error();
319
320 let mut parser = Self {
321 scanner,
322 events: Vec::new(),
323 event_index: 0,
324 state: ParserState::StreamStart,
325 state_stack: Vec::new(),
326 position,
327 pending_anchor: None,
328 pending_anchor_line: None,
329 last_key_marker_line: None,
330 last_key_marker_column: None,
331 just_closed_inline_wrap: false,
332 inline_wrap_column: None,
333 pending_tag: None,
334 pending_tag_line: None,
335 last_token_type: None,
336 scanning_error: None,
337 yaml_version: None,
338 tag_directives: Vec::new(),
339 tag_resolver: TagResolver::new(),
340 defined_anchors: std::collections::HashSet::new(),
341 last_value_token_line: None,
342 explicit_key_pending: false,
343 implicit_flow_pair_depth: 0,
344 };
345
346 // If there was a scanning error, store it for later propagation.
347 // Likewise, surface eager-parse errors via the same field so
348 // `take_scanning_error` reports them.
349 if let Some(error) = scanning_error {
350 parser.scanning_error = Some(error);
351 } else if let Err(error) = parser.parse_all() {
352 parser.scanning_error = Some(error);
353 }
354
355 parser
356 }
357
358 /// Create parser from existing scanner
359 pub fn from_scanner(scanner: BasicScanner) -> Self {
360 let position = scanner.position();
361
362 let mut parser = Self {
363 scanner,
364 events: Vec::new(),
365 event_index: 0,
366 state: ParserState::StreamStart,
367 state_stack: Vec::new(),
368 position,
369 pending_anchor: None,
370 pending_anchor_line: None,
371 last_key_marker_line: None,
372 last_key_marker_column: None,
373 just_closed_inline_wrap: false,
374 inline_wrap_column: None,
375 pending_tag: None,
376 pending_tag_line: None,
377 last_token_type: None,
378 scanning_error: None,
379 yaml_version: None,
380 tag_directives: Vec::new(),
381 tag_resolver: TagResolver::new(),
382 defined_anchors: std::collections::HashSet::new(),
383 last_value_token_line: None,
384 explicit_key_pending: false,
385 implicit_flow_pair_depth: 0,
386 };
387
388 parser.parse_all().unwrap_or(());
389 parser
390 }
391
392 /// Parse all tokens into events
393 fn parse_all(&mut self) -> Result<()> {
394 while self.scanner.check_token() {
395 let token = match self.scanner.get_token()? {
396 Some(token) => token,
397 None => break,
398 };
399
400 self.position = token.end_position;
401 self.process_token(token)?;
402 }
403
404 // Check for unclosed structures
405 self.validate_final_state()?;
406
407 // Ensure stream end
408 if !self
409 .events
410 .iter()
411 .any(|e| matches!(e.event_type, EventType::StreamEnd))
412 {
413 self.events.push(Event::stream_end(self.position));
414 }
415
416 Ok(())
417 }
418
419 /// YAML 1.2 §6.8: directives may appear only before the first
420 /// document (`StreamStart` / `ImplicitDocumentStart`) or after an
421 /// explicit `...` (`DocumentEnd`). Anywhere else they're invalid.
422 fn check_directive_context(&self, pos: Position, name: &str) -> Result<()> {
423 if matches!(
424 self.state,
425 ParserState::StreamStart
426 | ParserState::ImplicitDocumentStart
427 | ParserState::DocumentEnd
428 ) {
429 Ok(())
430 } else {
431 Err(Error::parse(
432 pos,
433 format!("{name} directive is only allowed before a document or after `...`"),
434 ))
435 }
436 }
437
438 /// Create implicit document start event with directives
439 fn create_implicit_document_start(&mut self, position: Position) -> Event {
440 let event = Event::document_start(
441 position,
442 self.yaml_version.take(),
443 self.tag_directives.clone(),
444 true,
445 );
446 self.tag_directives.clear();
447 event
448 }
449
450 /// Validate that the parser is in a valid final state
451 fn validate_final_state(&self) -> Result<()> {
452 match self.state {
453 ParserState::StreamEnd | ParserState::DocumentEnd | ParserState::DocumentContent => {
454 // These are valid final states
455 Ok(())
456 }
457 ParserState::BlockSequence | ParserState::FlowSequence => {
458 let context = ErrorContext::from_input(self.scanner.input(), &self.position, 2)
459 .with_suggestion(
460 "Close the sequence with proper indentation or closing bracket".to_string(),
461 );
462 Err(Error::unclosed_delimiter_with_context(
463 self.position,
464 self.position,
465 "sequence",
466 context,
467 ))
468 }
469 ParserState::BlockMapping | ParserState::FlowMapping => {
470 let context = ErrorContext::from_input(self.scanner.input(), &self.position, 2)
471 .with_suggestion(
472 "Close the mapping with proper indentation or closing brace".to_string(),
473 );
474 Err(Error::unclosed_delimiter_with_context(
475 self.position,
476 self.position,
477 "mapping",
478 context,
479 ))
480 }
481 _ => {
482 let context = ErrorContext::from_input(self.scanner.input(), &self.position, 2)
483 .with_suggestion("Complete the YAML document structure".to_string());
484 Err(Error::parse_with_context(
485 self.position,
486 format!("Document ended in unexpected state: {:?}", self.state),
487 context,
488 ))
489 }
490 }
491 }
492
493 /// Generate the next event by processing the next token
494 fn generate_next_event(&mut self) -> Result<()> {
495 if let Some(token) = self.scanner.get_token()? {
496 self.position = token.end_position;
497 self.process_token(token)?;
498 }
499 Ok(())
500 }
501
502 /// Process a single token and generate appropriate events
503 #[allow(clippy::cognitive_complexity)]
504 fn process_token(&mut self, token: Token) -> Result<()> {
505 // Store the token type for later use without cloning
506 let token_type_for_tracking = match &token.token_type {
507 TokenType::Scalar(..) => Some(TokenType::Scalar(
508 String::new(),
509 crate::scanner::QuoteStyle::Plain,
510 )),
511 TokenType::BlockScalarLiteral(..) => Some(TokenType::BlockScalarLiteral(String::new())),
512 TokenType::BlockScalarFolded(..) => Some(TokenType::BlockScalarFolded(String::new())),
513 TokenType::Alias(..) => Some(TokenType::Alias(String::new())),
514 TokenType::Anchor(..) => Some(TokenType::Anchor(String::new())),
515 TokenType::Tag(..) => Some(TokenType::Tag(String::new())),
516 TokenType::Comment(..) => Some(TokenType::Comment(String::new())),
517 other => {
518 // For simple token types without data, we can safely clone
519 match other {
520 TokenType::StreamStart => Some(TokenType::StreamStart),
521 TokenType::StreamEnd => Some(TokenType::StreamEnd),
522 TokenType::DocumentStart => Some(TokenType::DocumentStart),
523 TokenType::DocumentEnd => Some(TokenType::DocumentEnd),
524 TokenType::BlockSequenceStart => Some(TokenType::BlockSequenceStart),
525 TokenType::BlockMappingStart => Some(TokenType::BlockMappingStart),
526 TokenType::BlockEnd => Some(TokenType::BlockEnd),
527 TokenType::FlowSequenceStart => Some(TokenType::FlowSequenceStart),
528 TokenType::FlowSequenceEnd => Some(TokenType::FlowSequenceEnd),
529 TokenType::FlowMappingStart => Some(TokenType::FlowMappingStart),
530 TokenType::FlowMappingEnd => Some(TokenType::FlowMappingEnd),
531 TokenType::BlockEntry => Some(TokenType::BlockEntry),
532 TokenType::FlowEntry => Some(TokenType::FlowEntry),
533 TokenType::Key => Some(TokenType::Key),
534 TokenType::Value => Some(TokenType::Value),
535 TokenType::YamlDirective(_, _) => Some(TokenType::YamlDirective(0, 0)),
536 TokenType::TagDirective(_, _) => {
537 Some(TokenType::TagDirective(String::new(), String::new()))
538 }
539 _ => None,
540 }
541 }
542 };
543
544 match &token.token_type {
545 TokenType::StreamStart => {
546 self.events.push(Event::stream_start(token.start_position));
547 self.state = ParserState::ImplicitDocumentStart;
548 }
549
550 TokenType::StreamEnd => {
551 // YAML 1.2 §6.8: a directive must be followed by a
552 // document body. If we reach end-of-stream with pending
553 // `%YAML` / `%TAG` directives and no document was ever
554 // opened, that's a parse error (yaml-test-suite 9MMA, B63P).
555 if matches!(
556 self.state,
557 ParserState::ImplicitDocumentStart | ParserState::StreamStart
558 ) && (self.yaml_version.is_some() || !self.tag_directives.is_empty())
559 {
560 return Err(Error::parse(
561 token.start_position,
562 "Directive without a document body",
563 ));
564 }
565 // YAML 1.2 §7.4: every `[` / `{` must be closed before
566 // end-of-stream. Walk the events; an unmatched
567 // FlowSequenceStart / FlowMappingStart is invalid
568 // (yaml-test-suite 6JTT, 9HCY, 9MQT/01).
569 if has_unclosed_flow_collection(&self.events) {
570 return Err(Error::parse(
571 token.start_position,
572 "Unclosed flow collection at end of stream",
573 ));
574 }
575 // YAML 1.2 §8.1.3.1: an implicit mapping key must be
576 // followed by `:`. A bare scalar at a mapping position
577 // with no \`:\` (and no explicit `?` marker) is invalid
578 // (yaml-test-suite 7MNF).
579 if matches!(self.state, ParserState::BlockMappingKey)
580 && !self.explicit_key_pending
581 && innermost_mapping_has_odd_children(&self.events)
582 {
583 // §8.22 carve-out: when the unmatched "key" is
584 // actually a collection node (the inline-wrapped
585 // explicit-key from yaml-test-suite M2N8 cluster),
586 // synth an empty value rather than erroring — the
587 // explicit-key construct allows omitted values.
588 let key_was_collection = matches!(
589 self.events.last().map(|e| &e.event_type),
590 Some(EventType::MappingEnd | EventType::SequenceEnd)
591 );
592 if key_was_collection {
593 self.events.push(Event::scalar(
594 token.start_position,
595 None,
596 None,
597 String::new(),
598 true,
599 false,
600 ScalarStyle::Plain,
601 ));
602 } else {
603 return Err(Error::parse(
604 token.start_position,
605 "Mapping key not followed by `:`",
606 ));
607 }
608 }
609 // YAML 1.2: an explicit `---` with NO body needs an
610 // implicit empty scalar as the doc's content (yaml-test-
611 // suite MUS6/02). We detect that case by checking the
612 // last emitted event — if it's still `DocumentStart`,
613 // nothing has been pushed to the body yet.
614 if matches!(
615 self.events.last().map(|e| &e.event_type),
616 Some(EventType::DocumentStart { .. })
617 ) {
618 self.events.push(Event::scalar(
619 token.start_position,
620 None,
621 None,
622 String::new(),
623 true,
624 false,
625 ScalarStyle::Plain,
626 ));
627 }
628 // §6.9: a stand-alone anchor or tag at end-of-stream
629 // produces a document with a tagged/anchored empty
630 // scalar (yaml-test-suite UKK6/02 — a bare \`!\`).
631 if matches!(self.state, ParserState::ImplicitDocumentStart)
632 && (self.pending_anchor.is_some() || self.pending_tag.is_some())
633 {
634 let event = self.create_implicit_document_start(token.start_position);
635 self.events.push(event);
636 self.events.push(Event::scalar(
637 token.start_position,
638 self.pending_anchor.take(),
639 self.pending_tag.take(),
640 String::new(),
641 true,
642 false,
643 ScalarStyle::Plain,
644 ));
645 self.state = ParserState::DocumentContent;
646 }
647 // Close any open document. A document is "open" in every
648 // state except: not-yet-started (StreamStart /
649 // ImplicitDocumentStart), or already closed (DocumentEnd /
650 // StreamEnd). If still open, also flush any unclosed
651 // block collections first.
652 if !matches!(
653 self.state,
654 ParserState::StreamStart
655 | ParserState::ImplicitDocumentStart
656 | ParserState::DocumentEnd
657 | ParserState::StreamEnd
658 ) {
659 close_open_collections(&mut self.events, token.start_position);
660 self.events
661 .push(Event::document_end(token.start_position, true));
662 }
663 self.events.push(Event::stream_end(token.start_position));
664 self.state = ParserState::StreamEnd;
665 }
666
667 TokenType::YamlDirective(major, minor) => {
668 // YAML 1.2 §6.8: directives may appear only before the
669 // first document or after an explicit `...` document end.
670 self.check_directive_context(token.start_position, "%YAML")?;
671 // §6.8.1: a document may have at most one `%YAML` directive.
672 if self.yaml_version.is_some() {
673 return Err(Error::parse(
674 token.start_position,
675 "Multiple %YAML directives in the same document",
676 ));
677 }
678 self.yaml_version = Some((*major, *minor));
679 }
680
681 TokenType::TagDirective(handle, prefix) => {
682 self.check_directive_context(token.start_position, "%TAG")?;
683 self.tag_directives.push((handle.clone(), prefix.clone()));
684 self.tag_resolver
685 .add_directive(handle.clone(), prefix.clone());
686 }
687
688 TokenType::DocumentStart => {
689 // If the most-recent event is still `DocumentStart`, the
690 // previous document had no body — emit an implicit empty
691 // scalar before closing it (yaml-test-suite 6XDY).
692 if matches!(
693 self.events.last().map(|e| &e.event_type),
694 Some(EventType::DocumentStart { .. })
695 ) {
696 self.events.push(Event::scalar(
697 token.start_position,
698 None,
699 None,
700 String::new(),
701 true,
702 false,
703 ScalarStyle::Plain,
704 ));
705 self.events
706 .push(Event::document_end(token.start_position, true));
707 // §6.8: \`%TAG\` and \`%YAML\` are scoped to one document.
708 // After the implicit close, reset the tag resolver
709 // so directives from the prior doc don't leak
710 // (yaml-test-suite QLJ7).
711 self.tag_resolver = TagResolver::new();
712 } else if has_open_document(&self.events) {
713 // The previous document is still open — its outer
714 // collection(s) and the document itself need closing
715 // before the new `---` (yaml-test-suite 35KP).
716 close_open_collections(&mut self.events, token.start_position);
717 self.events
718 .push(Event::document_end(token.start_position, true));
719 self.tag_resolver = TagResolver::new();
720 }
721
722 // Create document start with directives
723 self.events.push(Event::document_start(
724 token.start_position,
725 self.yaml_version.take(),
726 self.tag_directives.clone(),
727 false,
728 ));
729
730 // Clear tag directives after using them (YAML version persists across documents)
731 // But keep them in the tag resolver for this document
732 self.tag_directives.clear();
733
734 self.state = ParserState::DocumentStart;
735 }
736
737 TokenType::DocumentEnd => {
738 // §6.8: `...` only terminates an *open* document. If
739 // the stream so far has no DocumentStart (e.g. the
740 // input is just `...\n`, yaml-test-suite HWV9), the
741 // marker is a no-op.
742 if !has_open_document(&self.events) {
743 self.state = ParserState::ImplicitDocumentStart;
744 self.last_token_type = token_type_for_tracking;
745 return Ok(());
746 }
747 // Same empty-doc fixup as in DocumentStart/StreamEnd:
748 // `---\n...` needs an implicit empty scalar.
749 if matches!(
750 self.events.last().map(|e| &e.event_type),
751 Some(EventType::DocumentStart { .. })
752 ) {
753 self.events.push(Event::scalar(
754 token.start_position,
755 None,
756 None,
757 String::new(),
758 true,
759 false,
760 ScalarStyle::Plain,
761 ));
762 } else {
763 // Flush any still-open block collections so the
764 // event stream is balanced before -DOC.
765 close_open_collections(&mut self.events, token.start_position);
766 }
767 self.events
768 .push(Event::document_end(token.start_position, false));
769 // YAML 1.2: after `...`, the stream may continue with
770 // either another `---`, more directives, or implicit
771 // document content.
772 self.state = ParserState::ImplicitDocumentStart;
773 }
774
775 TokenType::BlockSequenceStart => {
776 // §3.2.1.1: reject a second root-level node
777 // (yaml-test-suite BD7L: `- a\n- b\ninvalid: x`).
778 if matches!(self.state, ParserState::DocumentContent)
779 && second_root_node_present(&self.events)
780 {
781 return Err(Error::parse(
782 token.start_position,
783 "Document already contains a root node",
784 ));
785 }
786 if matches!(self.state, ParserState::ImplicitDocumentStart) {
787 let event = self.create_implicit_document_start(token.start_position);
788 self.events.push(event);
789 }
790
791 // If we're starting a sequence within a mapping or
792 // outer-sequence context, push the current state so the
793 // outer collection can be restored on close. Without
794 // BlockSequence in this list, a nested `- -` sequence's
795 // inner close falls through to DocumentContent and the
796 // next BlockEntry spuriously opens a fresh sequence
797 // (yaml-test-suite 3ALJ, 57H4).
798 if matches!(
799 self.state,
800 ParserState::BlockMappingValue
801 | ParserState::BlockMappingKey
802 | ParserState::BlockSequence
803 ) {
804 self.state_stack.push(self.state);
805 }
806
807 self.events.push(Event::sequence_start(
808 token.start_position,
809 self.pending_anchor.take(),
810 self.pending_tag.take(),
811 false,
812 ));
813 self.state = ParserState::BlockSequence;
814 }
815
816 TokenType::BlockMappingStart => {
817 // §3.2.1.1: reject a second root-level node
818 // (yaml-test-suite BD7L variants).
819 if matches!(self.state, ParserState::DocumentContent)
820 && second_root_node_present(&self.events)
821 {
822 return Err(Error::parse(
823 token.start_position,
824 "Document already contains a root node",
825 ));
826 }
827 // §9.1.1: an anchor on the \`---\` doc-start line cannot
828 // be followed by an implicit single-pair mapping —
829 // the anchor would have nowhere to attach (it's not
830 // the mapping itself, not the key). \`--- &anchor a: b\`
831 // is invalid (yaml-test-suite CXX2).
832 if matches!(self.state, ParserState::DocumentStart)
833 && self.pending_anchor.is_some()
834 && self.pending_anchor_line == Some(token.start_position.line)
835 {
836 return Err(Error::parse(
837 token.start_position,
838 "Anchor on `---` doc-start line cannot precede an implicit mapping",
839 ));
840 }
841 // Determine whether to create a new mapping or continue existing one
842 // This token is generated when we encounter a key at the start of a line with nested content
843 // It doesn't always mean we need to create a new mapping - sometimes we're just continuing
844
845 let should_create_new_mapping = match self.state {
846 ParserState::ImplicitDocumentStart => {
847 // At document start, we need a new mapping
848 true
849 }
850 ParserState::DocumentStart => {
851 // After explicit document start (---), we need a new mapping
852 true
853 }
854 ParserState::DocumentContent => {
855 // This is a tricky case - we could be:
856 // 1. Starting a new root mapping
857 // 2. Continuing an existing root mapping
858 // The key is to check if we have an unclosed root mapping
859
860 // Count mapping depth from the end
861 let mut mapping_depth = 0;
862 let mut has_unclosed_mapping = false;
863
864 for event in self.events.iter().rev() {
865 match &event.event_type {
866 EventType::MappingEnd => mapping_depth += 1,
867 EventType::MappingStart { .. } => {
868 if mapping_depth == 0 {
869 has_unclosed_mapping = true;
870 break;
871 }
872 mapping_depth -= 1;
873 }
874 EventType::DocumentStart { .. } => break,
875 _ => {}
876 }
877 }
878
879 // Don't create a new mapping if we have an unclosed one
880 !has_unclosed_mapping
881 }
882 ParserState::BlockMappingValue => {
883 // If we're expecting a value and see BlockMappingStart, it's a nested mapping
884 true
885 }
886 ParserState::BlockMappingKey => {
887 // We're already in a mapping key context
888 // BlockMappingStart here means we're continuing the mapping unless:
889 // - After a Key token (complex key)
890 // - After a Value token (nested mapping as value)
891 matches!(
892 &self.last_token_type,
893 Some(TokenType::Key | TokenType::Value)
894 )
895 }
896 ParserState::BlockSequence => {
897 // In a sequence context, BlockMappingStart means we're starting
898 // a nested mapping as a sequence item
899 true
900 }
901 _ => {
902 // For other states, check last token
903 matches!(
904 &self.last_token_type,
905 Some(TokenType::Key | TokenType::Value)
906 )
907 }
908 };
909
910 if should_create_new_mapping {
911 // Create a new nested mapping
912 if matches!(self.state, ParserState::ImplicitDocumentStart) {
913 let event = self.create_implicit_document_start(token.start_position);
914 self.events.push(event);
915 }
916
917 // If we're in a mapping value or sequence context, push state to stack
918 if matches!(
919 self.state,
920 ParserState::BlockMappingValue | ParserState::BlockSequence
921 ) {
922 self.state_stack.push(self.state);
923 }
924
925 // If the BlockMappingStart wraps an implicit key
926 // at the document root and the next token is the
927 // key scalar on the SAME line as the pending
928 // anchor/tag, those properties belong to the key —
929 // not to the surrounding mapping (yaml-test-suite
930 // ZH7C, E76Z, 74H7). For mappings nested in a value
931 // or sequence position, or when the anchor is
932 // "freestanding" on a previous line, the anchor
933 // attaches to the mapping itself (yaml-test-suite
934 // 6BFJ, 9KAX).
935 let in_value_position = matches!(
936 self.state,
937 ParserState::BlockMappingValue | ParserState::BlockSequence
938 );
939 let next_token_line = self
940 .scanner
941 .peek_token()
942 .ok()
943 .and_then(|t| t.map(|tt| tt.start_position.line));
944 let next_is_scalar = matches!(
945 self.scanner.peek_token(),
946 Ok(Some(t)) if matches!(
947 t.token_type,
948 TokenType::Scalar(..) | TokenType::Anchor(_) | TokenType::Tag(_)
949 )
950 );
951 let anchor_same_line_as_key = matches!(
952 (self.pending_anchor_line, next_token_line),
953 (Some(a), Some(k)) if a == k
954 );
955 let (anchor, tag) =
956 if !in_value_position && next_is_scalar && anchor_same_line_as_key {
957 (None, None)
958 } else {
959 self.pending_anchor_line = None;
960 (self.pending_anchor.take(), self.pending_tag.take())
961 };
962 self.events.push(Event::mapping_start(
963 token.start_position,
964 anchor,
965 tag,
966 false,
967 ));
968 self.state = ParserState::BlockMappingKey;
969 } else {
970 // Continue existing mapping
971 // Ensure we're in the right state to handle the next key-value pair
972 match self.state {
973 ParserState::DocumentContent => {
974 // We should be continuing a mapping, so transition to BlockMappingKey
975 self.state = ParserState::BlockMappingKey;
976 }
977 ParserState::BlockMappingValue => {
978 // We just processed a value, now ready for next key
979 self.state = ParserState::BlockMappingKey;
980 }
981 ParserState::BlockMappingKey => {
982 // Already ready for next key, no state change needed
983 }
984 _ => {
985 // For other states, check if we can restore from state stack
986 if let Some(prev_state) = self.state_stack.last() {
987 if matches!(prev_state, ParserState::BlockMappingValue) {
988 if let Some(mapping_state) = self.state_stack.pop() {
989 self.state = mapping_state;
990 self.handle_node_completion();
991 }
992 }
993 }
994 }
995 }
996 }
997 }
998
999 TokenType::FlowSequenceStart => {
1000 if matches!(self.state, ParserState::ImplicitDocumentStart) {
1001 self.events.push(Event::document_start(
1002 token.start_position,
1003 None,
1004 vec![],
1005 true,
1006 ));
1007 }
1008
1009 // Save the enclosing state so we can restore it
1010 // after the flow collection closes. The save list
1011 // includes any state that can legitimately contain a
1012 // flow node — block contexts AND flow-mapping
1013 // key/value positions (yaml-test-suite SBG9
1014 // \`{a: [b,c], [d,e]: f}\` — without FlowMappingValue
1015 // in this list, state_stack pop'd None and we fell
1016 // through to DocumentContent).
1017 if matches!(
1018 self.state,
1019 ParserState::BlockMappingValue
1020 | ParserState::BlockMappingKey
1021 | ParserState::BlockSequence
1022 | ParserState::FlowSequence
1023 | ParserState::FlowMapping
1024 | ParserState::FlowMappingKey
1025 | ParserState::FlowMappingValue
1026 ) {
1027 self.state_stack.push(self.state);
1028 }
1029
1030 self.events.push(Event::sequence_start(
1031 token.start_position,
1032 self.pending_anchor.take(),
1033 self.pending_tag.take(),
1034 true,
1035 ));
1036 self.state = ParserState::FlowSequence;
1037 }
1038
1039 TokenType::FlowMappingStart => {
1040 if matches!(self.state, ParserState::ImplicitDocumentStart) {
1041 self.events.push(Event::document_start(
1042 token.start_position,
1043 None,
1044 vec![],
1045 true,
1046 ));
1047 }
1048
1049 // Save the enclosing state so we can restore it
1050 // after the flow collection closes. The save list
1051 // includes any state that can legitimately contain a
1052 // flow node — block contexts AND flow-mapping
1053 // key/value positions (yaml-test-suite SBG9
1054 // \`{a: [b,c], [d,e]: f}\` — without FlowMappingValue
1055 // in this list, state_stack pop'd None and we fell
1056 // through to DocumentContent).
1057 if matches!(
1058 self.state,
1059 ParserState::BlockMappingValue
1060 | ParserState::BlockMappingKey
1061 | ParserState::BlockSequence
1062 | ParserState::FlowSequence
1063 | ParserState::FlowMapping
1064 | ParserState::FlowMappingKey
1065 | ParserState::FlowMappingValue
1066 ) {
1067 self.state_stack.push(self.state);
1068 }
1069
1070 self.events.push(Event::mapping_start(
1071 token.start_position,
1072 self.pending_anchor.take(),
1073 self.pending_tag.take(),
1074 true,
1075 ));
1076 self.state = ParserState::FlowMapping;
1077 }
1078
1079 TokenType::FlowSequenceEnd => {
1080 // §7.5: close an open implicit single-pair flow mapping
1081 // before the outer flow sequence ends.
1082 if self.implicit_flow_pair_depth > 0
1083 && matches!(
1084 self.state,
1085 ParserState::FlowMapping
1086 | ParserState::FlowMappingKey
1087 | ParserState::FlowMappingValue
1088 )
1089 && matches!(self.state_stack.last(), Some(ParserState::FlowSequence))
1090 {
1091 if innermost_mapping_has_odd_children(&self.events) {
1092 self.events.push(Event::scalar(
1093 token.start_position,
1094 None,
1095 None,
1096 String::new(),
1097 true,
1098 false,
1099 ScalarStyle::Plain,
1100 ));
1101 }
1102 self.events.push(Event::mapping_end(token.start_position));
1103 self.state = self.state_stack.pop().ok_or_else(|| {
1104 Error::parse(
1105 token.start_position,
1106 "internal: parser state stack underflow at implicit flow mapping end (in flow sequence)",
1107 )
1108 })?;
1109 self.implicit_flow_pair_depth -= 1;
1110 }
1111 self.events.push(Event::sequence_end(token.start_position));
1112
1113 // Restore the previous state from the stack if available
1114 if let Some(prev_state) = self.state_stack.pop() {
1115 self.state = prev_state;
1116 } else {
1117 self.state = ParserState::DocumentContent;
1118 }
1119
1120 // Handle state transitions for mapping key/value processing
1121 self.handle_node_completion();
1122 }
1123
1124 TokenType::FlowMappingEnd => {
1125 // §7.5: explicit `?` with no key (yaml-test-suite
1126 // DFF7 \`{... ?\n}\`) — synth empty key AND empty
1127 // value before closing.
1128 if matches!(self.state, ParserState::FlowMappingKey)
1129 && self.explicit_key_pending
1130 && !innermost_mapping_has_odd_children(&self.events)
1131 {
1132 self.events.push(Event::scalar(
1133 token.start_position,
1134 None,
1135 None,
1136 String::new(),
1137 true,
1138 false,
1139 ScalarStyle::Plain,
1140 ));
1141 }
1142 // Spec §7.5: implicit empty value for a flow-mapping
1143 // entry that has only a key, e.g. `{ key }` or
1144 // `{ key, a: b }` (yaml-test-suite 8KB6).
1145 if innermost_mapping_has_odd_children(&self.events) {
1146 self.events.push(Event::scalar(
1147 token.start_position,
1148 None,
1149 None,
1150 String::new(),
1151 true,
1152 false,
1153 ScalarStyle::Plain,
1154 ));
1155 }
1156 self.events.push(Event::mapping_end(token.start_position));
1157
1158 // Restore the previous state from the stack if available
1159 if let Some(prev_state) = self.state_stack.pop() {
1160 self.state = prev_state;
1161 } else {
1162 self.state = ParserState::DocumentContent;
1163 }
1164
1165 // Handle state transitions for mapping key/value processing
1166 self.handle_node_completion();
1167 }
1168
1169 TokenType::BlockEnd => {
1170 // Determine what we're ending based on current state
1171 match self.state {
1172 ParserState::BlockSequence => {
1173 // §6.9: an anchor or tag left unused at the
1174 // close of the sequence belongs to an empty
1175 // scalar that is the final sequence item
1176 // (yaml-test-suite LE5A: \`- !!str\` produces
1177 // a tagged empty scalar before -SEQ).
1178 // §6.9.1: also if the previous token was
1179 // BlockEntry with no item between — the last
1180 // entry was an implicit empty (yaml-test-suite
1181 // SM9W cluster).
1182 let last_was_block_entry =
1183 matches!(self.last_token_type, Some(TokenType::BlockEntry));
1184 if self.pending_anchor.is_some()
1185 || self.pending_tag.is_some()
1186 || last_was_block_entry
1187 {
1188 self.events.push(Event::scalar(
1189 token.start_position,
1190 self.pending_anchor.take(),
1191 self.pending_tag.take(),
1192 String::new(),
1193 true,
1194 false,
1195 ScalarStyle::Plain,
1196 ));
1197 }
1198 self.events.push(Event::sequence_end(token.start_position));
1199 // Pop previous state from stack if available
1200 if let Some(prev_state) = self.state_stack.pop() {
1201 self.state = prev_state;
1202 // Handle state transitions for mapping key/value processing
1203 self.handle_node_completion();
1204 } else {
1205 self.state = ParserState::DocumentContent;
1206 }
1207 }
1208 ParserState::BlockMapping
1209 | ParserState::BlockMappingKey
1210 | ParserState::BlockMappingValue => {
1211 // §6.9.1: if the innermost mapping has odd
1212 // children (last key has no value), synth an
1213 // implicit empty value before closing
1214 // (yaml-test-suite 7W2P). If the unmatched key
1215 // came from a bare scalar with no `:`
1216 // (yaml-test-suite 7MNF), error instead.
1217 if innermost_mapping_has_odd_children(&self.events) {
1218 if matches!(self.state, ParserState::BlockMappingKey)
1219 && !self.explicit_key_pending
1220 {
1221 // §8.22 carve-out: if the unmatched
1222 // 'key' is a collection node (the
1223 // inline-wrapped explicit-key from
1224 // yaml-test-suite M2N8), synth empty
1225 // value instead of erroring.
1226 let key_was_collection = matches!(
1227 self.events.last().map(|e| &e.event_type),
1228 Some(EventType::MappingEnd | EventType::SequenceEnd)
1229 );
1230 if !key_was_collection {
1231 return Err(Error::parse(
1232 token.start_position,
1233 "Mapping key not followed by `:`",
1234 ));
1235 }
1236 }
1237 // §6.9: when synthesising the missing value
1238 // for the last key, consume any pending
1239 // anchor/tag — they were the property of
1240 // that absent value (yaml-test-suite PW8X
1241 // \`b: &b\\n- ...\` — &b belongs to b's empty
1242 // value, not a separate tagged scalar).
1243 self.events.push(Event::scalar(
1244 token.start_position,
1245 self.pending_anchor.take(),
1246 self.pending_tag.take(),
1247 String::new(),
1248 true,
1249 false,
1250 ScalarStyle::Plain,
1251 ));
1252 }
1253 // Flush leftover anchor/tag as a final tagged
1254 // empty scalar (mirror of the BlockSequence
1255 // arm). Skipped above when the missing-value
1256 // synth already consumed it.
1257 if self.pending_anchor.is_some() || self.pending_tag.is_some() {
1258 self.events.push(Event::scalar(
1259 token.start_position,
1260 self.pending_anchor.take(),
1261 self.pending_tag.take(),
1262 String::new(),
1263 true,
1264 false,
1265 ScalarStyle::Plain,
1266 ));
1267 }
1268 // If the pending-property flush above just
1269 // emitted a KEY (leaving odd children), we
1270 // still need the missing implicit empty
1271 // VALUE before closing the mapping (yaml-
1272 // test-suite PW8X \`? &d\` close case).
1273 if innermost_mapping_has_odd_children(&self.events) {
1274 self.events.push(Event::scalar(
1275 token.start_position,
1276 None,
1277 None,
1278 String::new(),
1279 true,
1280 false,
1281 ScalarStyle::Plain,
1282 ));
1283 }
1284 self.events.push(Event::mapping_end(token.start_position));
1285 // Pop previous state from stack if available
1286 if let Some(prev_state) = self.state_stack.pop() {
1287 self.state = prev_state;
1288 // If we popped back to a mapping value state, complete it
1289 if matches!(self.state, ParserState::BlockMappingValue) {
1290 self.handle_node_completion();
1291 }
1292 } else {
1293 // No state on stack - check if we're still in a root mapping
1294 // Count the mapping depth including the one we just closed
1295 let mut mapping_depth = 0;
1296
1297 for event in self.events.iter().rev() {
1298 match &event.event_type {
1299 EventType::MappingEnd => {
1300 mapping_depth += 1;
1301 }
1302 EventType::MappingStart { .. } => {
1303 if mapping_depth > 0 {
1304 mapping_depth -= 1;
1305 } else {
1306 // Found an unclosed mapping - we're still in the root mapping
1307 self.state = ParserState::BlockMappingKey;
1308 return Ok(());
1309 }
1310 }
1311 EventType::DocumentStart { .. } => break,
1312 _ => {}
1313 }
1314 }
1315
1316 // All mappings are closed
1317 self.state = ParserState::DocumentContent;
1318 }
1319 }
1320 _ => {}
1321 }
1322 }
1323
1324 TokenType::Scalar(value, quote_style) => {
1325 if matches!(self.state, ParserState::ImplicitDocumentStart) {
1326 self.events.push(Event::document_start(
1327 token.start_position,
1328 None,
1329 vec![],
1330 true,
1331 ));
1332 self.state = ParserState::DocumentContent;
1333 }
1334
1335 // §3.2.1.1: a document has exactly one root node.
1336 if matches!(self.state, ParserState::DocumentContent)
1337 && second_root_node_present(&self.events)
1338 {
1339 return Err(Error::parse(
1340 token.start_position,
1341 "Document already contains a root node",
1342 ));
1343 }
1344
1345 // §8.22: in BlockSequence state, every item must be
1346 // introduced by \`-\`. A Scalar arriving when the
1347 // previous token was already a scalar / block-scalar
1348 // / closed-flow-collection means \`- a\\n b\` style —
1349 // \`b\` is bogus content at the sequence's indent
1350 // (yaml-test-suite 6S55).
1351 if matches!(self.state, ParserState::BlockSequence)
1352 && matches!(
1353 self.last_token_type,
1354 Some(
1355 TokenType::Scalar(..)
1356 | TokenType::BlockScalarLiteral(..)
1357 | TokenType::BlockScalarFolded(..)
1358 | TokenType::FlowSequenceEnd
1359 | TokenType::FlowMappingEnd
1360 )
1361 )
1362 {
1363 return Err(Error::parse(
1364 token.start_position,
1365 "Block sequence item must start with `-`",
1366 ));
1367 }
1368
1369 // §7.4: in flow mapping/sequence between entries
1370 // (even children = ready for next key/item) a Scalar
1371 // must be preceded by a separator. If the previous
1372 // token was a Scalar (i.e. previous value just emitted)
1373 // and not a comma, this is a missing-comma error
1374 // (yaml-test-suite T833, CML9).
1375 if matches!(
1376 self.state,
1377 ParserState::FlowMapping | ParserState::FlowSequence
1378 ) && matches!(
1379 self.last_token_type,
1380 Some(
1381 TokenType::Scalar(..)
1382 | TokenType::FlowSequenceEnd
1383 | TokenType::FlowMappingEnd
1384 )
1385 ) {
1386 return Err(Error::parse(
1387 token.start_position,
1388 "Missing `,` separator between flow collection entries",
1389 ));
1390 }
1391
1392 // Check if we're in a sequence and the next token is Value (indicating a mapping key)
1393 if matches!(self.state, ParserState::BlockSequence) {
1394 if let Ok(Some(next_token)) = self.scanner.peek_token() {
1395 if matches!(next_token.token_type, TokenType::Value) {
1396 // This scalar is a mapping key within a sequence item
1397 // Push current state to stack and start a new mapping
1398 self.state_stack.push(self.state);
1399 self.events.push(Event::mapping_start(
1400 token.start_position,
1401 self.pending_anchor.take(),
1402 self.pending_tag.take(),
1403 false,
1404 ));
1405 self.state = ParserState::BlockMappingKey;
1406 }
1407 }
1408 }
1409
1410 // §7.5: a flow-sequence entry that is itself `key: value`
1411 // is an implicit single-pair flow mapping. Any
1412 // pending anchor/tag belongs to the KEY scalar, not
1413 // to the synthesised mapping (yaml-test-suite QF4Y,
1414 // L9U5, 87E4, 8UDB, 9MMW, LX3P, CN3R).
1415 //
1416 // §7.5 also says: an implicit key in flow context
1417 // must be on a SINGLE LINE. If the \`:\` is on a
1418 // different line from the key scalar, it's invalid
1419 // (yaml-test-suite DK4H, ZXT5).
1420 if matches!(self.state, ParserState::FlowSequence) {
1421 if let Ok(Some(next_token)) = self.scanner.peek_token() {
1422 if matches!(next_token.token_type, TokenType::Value) {
1423 if next_token.start_position.line != token.start_position.line {
1424 return Err(Error::parse(
1425 next_token.start_position,
1426 "Implicit key in flow context must be on a single line",
1427 ));
1428 }
1429 self.state_stack.push(self.state);
1430 self.events.push(Event::mapping_start(
1431 token.start_position,
1432 None,
1433 None,
1434 true,
1435 ));
1436 self.state = ParserState::FlowMappingKey;
1437 self.implicit_flow_pair_depth += 1;
1438 }
1439 }
1440 }
1441
1442 // YAML 1.2: if we're in BlockMappingValue and the next
1443 // token is `:` (Value), the current scalar is actually a
1444 // NEW KEY — the previous key's value is implicit empty
1445 // (yaml-test-suite 6KGN: `a: &anchor\nb: *anchor`).
1446 // Emit the empty value first (consuming any pending
1447 // anchor/tag — those were intended for the missing
1448 // value), then transition back to BlockMappingKey.
1449 //
1450 // BUT skip the heuristic when:
1451 // * the most recent event was an implicit empty scalar
1452 // (we just synthesised an empty key for a leading-`:`
1453 // mapping, yaml-test-suite 2JQS), or
1454 // * the current scalar is on the SAME line as the
1455 // previous `:` Value token — that puts the scalar
1456 // in the value slot of the current key
1457 // (yaml-test-suite 6M2F: `? &a a\n: &b b\n: *a`).
1458 // §8.22 V9D5: when we JUST closed an inline-wrapped
1459 // explicit key, the value position can also hold an
1460 // inline single-pair mapping. If the next token is
1461 // \`:\` (a key/value separator on this scalar's line),
1462 // wrap the scalar as the key of an inner mapping.
1463 if matches!(self.state, ParserState::BlockMappingValue)
1464 && self.just_closed_inline_wrap
1465 {
1466 self.just_closed_inline_wrap = false;
1467 if let Ok(Some(next_token)) = self.scanner.peek_token() {
1468 if matches!(next_token.token_type, TokenType::Value)
1469 && next_token.start_position.line == token.start_position.line
1470 {
1471 self.state_stack.push(self.state);
1472 self.events.push(Event::mapping_start(
1473 token.start_position,
1474 None,
1475 None,
1476 false,
1477 ));
1478 self.state = ParserState::BlockMappingKey;
1479 // Fall through to the normal Scalar push;
1480 // the scalar will become the inner key.
1481 }
1482 }
1483 }
1484
1485 if matches!(self.state, ParserState::BlockMappingValue) {
1486 let last_was_implicit_empty = matches!(self.events.last(), Some(ev) if matches!(
1487 &ev.event_type,
1488 EventType::Scalar { value, plain_implicit: true, style: ScalarStyle::Plain, .. }
1489 if value.is_empty()
1490 ));
1491 let same_line_as_value = self
1492 .last_value_token_line
1493 .map_or(false, |line| line == token.start_position.line);
1494 // Skip the "new key" pattern when the scalar IS
1495 // the inline value of a just-synthesised empty
1496 // key — both must hold (yaml-test-suite 2JQS).
1497 // S3PD shows it must NOT skip when the empty
1498 // key was on a different line from the current
1499 // scalar.
1500 let skip_pattern = last_was_implicit_empty && same_line_as_value;
1501 if !skip_pattern && !same_line_as_value {
1502 if let Ok(Some(next_token)) = self.scanner.peek_token() {
1503 if matches!(next_token.token_type, TokenType::Value) {
1504 self.events.push(Event::scalar(
1505 token.start_position,
1506 self.pending_anchor.take(),
1507 self.pending_tag.take(),
1508 String::new(),
1509 true,
1510 false,
1511 ScalarStyle::Plain,
1512 ));
1513 self.state = ParserState::BlockMappingKey;
1514 }
1515 }
1516 }
1517 }
1518
1519 // YAML 1.2 §6.9.1: if we're back at a key position but the
1520 // previous key still owes a value (odd children in the
1521 // active mapping), synthesise the implicit empty scalar
1522 // now — this scalar then becomes the next key
1523 // (yaml-test-suite 7W2P: `? a\n? b\nc:`).
1524 if matches!(self.state, ParserState::BlockMappingKey)
1525 && innermost_mapping_has_odd_children(&self.events)
1526 {
1527 self.events.push(Event::scalar(
1528 token.start_position,
1529 None,
1530 None,
1531 String::new(),
1532 true,
1533 false,
1534 ScalarStyle::Plain,
1535 ));
1536 }
1537
1538 // Convert QuoteStyle to ScalarStyle
1539 let style = match quote_style {
1540 crate::scanner::QuoteStyle::Plain => ScalarStyle::Plain,
1541 crate::scanner::QuoteStyle::Single => ScalarStyle::SingleQuoted,
1542 crate::scanner::QuoteStyle::Double => ScalarStyle::DoubleQuoted,
1543 };
1544
1545 self.events.push(Event::scalar(
1546 token.start_position,
1547 self.pending_anchor.take(), // Use pending anchor
1548 self.pending_tag.take(), // Use pending tag
1549 value.clone(),
1550 style == ScalarStyle::Plain,
1551 style != ScalarStyle::Plain,
1552 style,
1553 ));
1554
1555 // Handle state transitions for mapping key/value processing
1556 self.handle_node_completion();
1557 }
1558
1559 TokenType::BlockScalarLiteral(value) => {
1560 if matches!(self.state, ParserState::ImplicitDocumentStart) {
1561 self.events.push(Event::document_start(
1562 token.start_position,
1563 None,
1564 vec![],
1565 true,
1566 ));
1567 self.state = ParserState::DocumentContent;
1568 }
1569
1570 self.events.push(Event::scalar(
1571 token.start_position,
1572 self.pending_anchor.take(), // Use pending anchor
1573 self.pending_tag.take(), // Use pending tag
1574 value.clone(),
1575 false, // Not plain
1576 true, // Quoted style
1577 ScalarStyle::Literal,
1578 ));
1579
1580 // Handle state transitions for mapping key/value processing
1581 self.handle_node_completion();
1582 }
1583
1584 TokenType::BlockScalarFolded(value) => {
1585 if matches!(self.state, ParserState::ImplicitDocumentStart) {
1586 self.events.push(Event::document_start(
1587 token.start_position,
1588 None,
1589 vec![],
1590 true,
1591 ));
1592 self.state = ParserState::DocumentContent;
1593 }
1594
1595 self.events.push(Event::scalar(
1596 token.start_position,
1597 self.pending_anchor.take(), // Use pending anchor
1598 self.pending_tag.take(), // Use pending tag
1599 value.clone(),
1600 false, // Not plain
1601 true, // Quoted style
1602 ScalarStyle::Folded,
1603 ));
1604
1605 // Handle state transitions for mapping key/value processing
1606 self.handle_node_completion();
1607 }
1608
1609 TokenType::BlockEntry => {
1610 // Block sequence entry - this indicates a new item in a sequence
1611 // We need to ensure proper state management for nested structures
1612 match self.state {
1613 ParserState::BlockSequence => {
1614 // We're already in a sequence, this is a new item.
1615 // §6.9.1: if the previous token was also a
1616 // BlockEntry, the previous item had no value —
1617 // synthesise an implicit empty scalar before
1618 // accepting this new BlockEntry (yaml-test-suite
1619 // SM9W cluster). Also synth when a pending
1620 // anchor/tag was left on a PREVIOUS line — the
1621 // property was the previous item's empty value
1622 // (yaml-test-suite PW8X).
1623 let last_was_block_entry =
1624 matches!(self.last_token_type, Some(TokenType::BlockEntry));
1625 let earliest_property_line =
1626 match (self.pending_anchor_line, self.pending_tag_line) {
1627 (Some(a), Some(t)) => Some(a.min(t)),
1628 (Some(a), None) => Some(a),
1629 (None, Some(t)) => Some(t),
1630 (None, None) => None,
1631 };
1632 let property_from_prev_line = (self.pending_anchor.is_some()
1633 || self.pending_tag.is_some())
1634 && earliest_property_line
1635 .map_or(false, |a| a < token.start_position.line);
1636 if last_was_block_entry || property_from_prev_line {
1637 self.events.push(Event::scalar(
1638 token.start_position,
1639 self.pending_anchor.take(),
1640 self.pending_tag.take(),
1641 String::new(),
1642 true,
1643 false,
1644 ScalarStyle::Plain,
1645 ));
1646 self.pending_anchor_line = None;
1647 self.pending_tag_line = None;
1648 }
1649 }
1650 ParserState::BlockMapping | ParserState::BlockMappingValue => {
1651 // If we encounter a BlockEntry while in a mapping,
1652 // we need to close the mapping and continue the sequence
1653 self.events.push(Event::mapping_end(token.start_position));
1654 self.state = ParserState::BlockSequence;
1655 }
1656 _ => {
1657 // BlockEntry in other contexts might indicate we need to start a sequence
1658 // This handles implicit sequence starts
1659 if matches!(self.state, ParserState::ImplicitDocumentStart) {
1660 self.events.push(Event::document_start(
1661 token.start_position,
1662 None,
1663 vec![],
1664 true,
1665 ));
1666 }
1667
1668 // Start an implicit sequence if we're not already in one
1669 self.events.push(Event::sequence_start(
1670 token.start_position,
1671 self.pending_anchor.take(),
1672 self.pending_tag.take(),
1673 false,
1674 ));
1675 self.state = ParserState::BlockSequence;
1676 }
1677 }
1678 }
1679
1680 TokenType::Value => {
1681 // Snapshot the PREVIOUS Value token's line BEFORE
1682 // updating to the current one. The match arms below
1683 // need this to detect multi-`:`-on-same-line (yaml-
1684 // test-suite ZL4Z, ZCZ6).
1685 let prev_value_line = self.last_value_token_line;
1686 self.last_value_token_line = Some(token.start_position.line);
1687 self.explicit_key_pending = false;
1688 // YAML 1.2 §6.9.1: a `:` with no preceding key implies an
1689 // empty key. Handle the four states where this can arise:
1690 // * ImplicitDocumentStart — open `+DOC`, `+MAP`, empty key.
1691 // * DocumentContent — open `+MAP`, empty key.
1692 // * BlockMappingKey with EVEN children — empty key for
1693 // the next entry (no scalar preceded the `:`).
1694 // * Normal cases (`BlockMappingKey` with odd children,
1695 // `FlowMappingKey`) — just transition state.
1696 match self.state {
1697 ParserState::ImplicitDocumentStart => {
1698 let event = self.create_implicit_document_start(token.start_position);
1699 self.events.push(event);
1700 // The mapping itself has no anchor/tag here —
1701 // those (if any) belong to the (empty) key.
1702 self.events.push(Event::mapping_start(
1703 token.start_position,
1704 None,
1705 None,
1706 false,
1707 ));
1708 self.events.push(Event::scalar(
1709 token.start_position,
1710 self.pending_anchor.take(),
1711 self.pending_tag.take(),
1712 String::new(),
1713 true,
1714 false,
1715 ScalarStyle::Plain,
1716 ));
1717 self.state = ParserState::BlockMappingValue;
1718 }
1719 ParserState::DocumentContent | ParserState::DocumentStart => {
1720 self.events.push(Event::mapping_start(
1721 token.start_position,
1722 None,
1723 None,
1724 false,
1725 ));
1726 self.events.push(Event::scalar(
1727 token.start_position,
1728 self.pending_anchor.take(),
1729 self.pending_tag.take(),
1730 String::new(),
1731 true,
1732 false,
1733 ScalarStyle::Plain,
1734 ));
1735 self.state = ParserState::BlockMappingValue;
1736 }
1737 ParserState::BlockMappingKey => {
1738 // §8.22 V9D5: when a `:` arrives at the same
1739 // column as the most recent `?` on a LATER
1740 // line, it's the explicit value separator of
1741 // that `?` key. If we previously wrapped an
1742 // inline single-pair mapping for the explicit
1743 // key (via the path below), close it first so
1744 // the outer mapping receives the value (yaml-
1745 // test-suite V9D5).
1746 // Use inline_wrap_column (set when we opened a
1747 // V9D5-style inline wrap) for matching the
1748 // close. Don't depend on last_key_marker_*
1749 // since those get reset on wrap open.
1750 if self
1751 .inline_wrap_column
1752 .map_or(false, |c| c == token.start_position.column)
1753 && !self.state_stack.is_empty()
1754 && matches!(self.state_stack.last(), Some(ParserState::BlockMappingKey))
1755 {
1756 // Close inline-wrapped key mapping if its
1757 // children are even (complete pairs).
1758 if !innermost_mapping_has_odd_children(&self.events) {
1759 self.events.push(Event::mapping_end(token.start_position));
1760 self.state = self.state_stack.pop().ok_or_else(|| {
1761 Error::parse(
1762 token.start_position,
1763 "internal: parser state stack underflow closing inline-wrapped key mapping",
1764 )
1765 })?;
1766 self.inline_wrap_column = None;
1767 self.just_closed_inline_wrap = true;
1768 }
1769 }
1770 // §8.22: when the explicit key marker (\`?\`) is
1771 // followed by a node + \`:\` on the SAME line,
1772 // that whole construct is an inline single-pair
1773 // mapping (the explicit key node itself).
1774 // Wrap retroactively by inserting an inner
1775 // MappingStart before the just-emitted key
1776 // node. yaml-test-suite M2N8/01 \`? []: x\`,
1777 // and the empty-prefix variant M2N8/00
1778 // \`- ? : x\`.
1779 let odd_children = innermost_mapping_has_odd_children(&self.events);
1780 let key_marker_same_line = self
1781 .last_key_marker_line
1782 .map_or(false, |l| l == token.start_position.line);
1783 // Empty-prefix variant: `?` then `:` directly
1784 // (no node between). Open inner mapping with
1785 // empty key and transition to inner value.
1786 if !odd_children && key_marker_same_line {
1787 self.state_stack.push(self.state);
1788 self.events.push(Event::mapping_start(
1789 token.start_position,
1790 None,
1791 None,
1792 false,
1793 ));
1794 self.events.push(Event::scalar(
1795 token.start_position,
1796 None,
1797 None,
1798 String::new(),
1799 true,
1800 false,
1801 ScalarStyle::Plain,
1802 ));
1803 self.last_key_marker_line = None;
1804 self.state = ParserState::BlockMappingValue;
1805 self.last_token_type = token_type_for_tracking;
1806 return Ok(());
1807 }
1808 if odd_children && key_marker_same_line {
1809 // Find the most recent emitted KEY-position
1810 // node within the active mapping (it'll be
1811 // either a Scalar or a flow-collection
1812 // open). Insert MappingStart BEFORE it.
1813 let mut depth = 0i32;
1814 let mut insert_at = None;
1815 for (idx, ev) in self.events.iter().enumerate().rev() {
1816 match &ev.event_type {
1817 EventType::MappingEnd | EventType::SequenceEnd => {
1818 depth += 1;
1819 }
1820 EventType::MappingStart { .. }
1821 | EventType::SequenceStart { .. } => {
1822 if depth == 0 {
1823 insert_at = Some(idx);
1824 break;
1825 }
1826 depth -= 1;
1827 }
1828 EventType::Scalar { .. } if depth == 0 => {
1829 insert_at = Some(idx);
1830 break;
1831 }
1832 _ => {}
1833 }
1834 }
1835 if let Some(ii) = insert_at {
1836 self.state_stack.push(self.state);
1837 self.events.insert(
1838 ii,
1839 Event::mapping_start(
1840 self.events[ii].position,
1841 None,
1842 None,
1843 false,
1844 ),
1845 );
1846 // Record the wrap's "outer key column"
1847 // so the matching explicit-value `:`
1848 // (on a later line at the same column
1849 // as the `?` marker) can close it.
1850 self.inline_wrap_column = self.last_key_marker_column;
1851 self.last_key_marker_line = None;
1852 self.state = ParserState::BlockMappingValue;
1853 self.last_token_type = token_type_for_tracking;
1854 return Ok(());
1855 }
1856 }
1857 let even_children = !innermost_mapping_has_odd_children(&self.events);
1858 if even_children {
1859 // §8.22: two implicit \`:\` on the same line
1860 // in a block mapping (e.g. \`a: 'b': c\`) is
1861 // invalid — block mappings cannot express
1862 // nested implicit single-pair mappings
1863 // inline (yaml-test-suite ZL4Z, ZCZ6).
1864 //
1865 // Carve-out: when the PREVIOUS `:` on this
1866 // line was an explicit value separator
1867 // (paired with `?`), the value position
1868 // legitimately holds an inline mapping
1869 // (yaml-test-suite V9D5 \`: moon: white\`
1870 // after \`? earth: blue\`). We detect this
1871 // by checking whether the synth'd empty key
1872 // (or any structural emission) happened on
1873 // THIS line — if so, allow.
1874 let prev_was_scalar = matches!(
1875 self.last_token_type,
1876 Some(
1877 TokenType::Scalar(..)
1878 | TokenType::BlockScalarLiteral(..)
1879 | TokenType::BlockScalarFolded(..)
1880 )
1881 );
1882 let same_line_as_prev_colon = prev_value_line
1883 .map_or(false, |line| line == token.start_position.line);
1884 // Walk back from the most recent event:
1885 // if the last scalar BEFORE the just-pushed
1886 // scalar is an EMPTY implicit scalar on
1887 // this line (the synth'd empty key from a
1888 // prior `:`), the prior `:` was structural
1889 // and this `:` is the inline mapping's
1890 // separator.
1891 let mut saw_synth_empty_on_this_line = false;
1892 let mut seen_value = 0;
1893 for ev in self.events.iter().rev() {
1894 if let EventType::Scalar {
1895 value,
1896 plain_implicit,
1897 ..
1898 } = &ev.event_type
1899 {
1900 if seen_value >= 1 {
1901 if value.is_empty()
1902 && *plain_implicit
1903 && ev.position.line == token.start_position.line
1904 {
1905 saw_synth_empty_on_this_line = true;
1906 }
1907 break;
1908 }
1909 seen_value += 1;
1910 }
1911 }
1912 if prev_was_scalar
1913 && same_line_as_prev_colon
1914 && !saw_synth_empty_on_this_line
1915 {
1916 return Err(Error::parse(
1917 token.start_position,
1918 "Multiple `:` on the same line in block mapping",
1919 ));
1920 }
1921 // Missing key — synthesise empty scalar
1922 // first. Pending anchor/tag belongs to that
1923 // empty key (yaml-test-suite PW8X).
1924 self.events.push(Event::scalar(
1925 token.start_position,
1926 self.pending_anchor.take(),
1927 self.pending_tag.take(),
1928 String::new(),
1929 true,
1930 false,
1931 ScalarStyle::Plain,
1932 ));
1933 }
1934 self.state = ParserState::BlockMappingValue;
1935 }
1936 ParserState::FlowMappingKey => {
1937 self.state = ParserState::FlowMappingValue;
1938 }
1939 ParserState::BlockSequence
1940 if matches!(self.last_token_type, Some(TokenType::BlockEntry)) =>
1941 {
1942 // §8.22: \`- :\` — the sequence item is a
1943 // mapping with an implicit empty key and the
1944 // `:` is the key/value separator (yaml-test-
1945 // suite UKK6/00).
1946 self.state_stack.push(self.state);
1947 self.events.push(Event::mapping_start(
1948 token.start_position,
1949 self.pending_anchor.take(),
1950 self.pending_tag.take(),
1951 false,
1952 ));
1953 self.events.push(Event::scalar(
1954 token.start_position,
1955 None,
1956 None,
1957 String::new(),
1958 true,
1959 false,
1960 ScalarStyle::Plain,
1961 ));
1962 self.state = ParserState::BlockMappingValue;
1963 }
1964 ParserState::FlowMapping => {
1965 // §7.5: in FlowMapping state, a `:` separates
1966 // an emitted key from its value (odd children
1967 // means the key scalar is already on the
1968 // stack — normal). If children are even,
1969 // we're starting a new entry with an empty
1970 // key. The pending anchor/tag (if any) belongs
1971 // to that empty key (yaml-test-suite NKF9,
1972 // WZ62: \`!!str : bar\` — empty key tagged
1973 // !!str).
1974 if !innermost_mapping_has_odd_children(&self.events) {
1975 self.events.push(Event::scalar(
1976 token.start_position,
1977 self.pending_anchor.take(),
1978 self.pending_tag.take(),
1979 String::new(),
1980 true,
1981 false,
1982 ScalarStyle::Plain,
1983 ));
1984 }
1985 self.state = ParserState::FlowMappingValue;
1986 }
1987 ParserState::FlowSequence => {
1988 // §7.5: \`[ {k:v}:value ]\` — a closed flow
1989 // collection followed by \`:\` makes that flow
1990 // node the implicit key. Retroactively wrap
1991 // it in an implicit single-pair mapping by
1992 // inserting MappingStart BEFORE the matching
1993 // flow-open event (yaml-test-suite 9MMW).
1994 let last_is_flow_close = matches!(
1995 self.events.last().map(|e| &e.event_type),
1996 Some(EventType::MappingEnd | EventType::SequenceEnd)
1997 );
1998 if last_is_flow_close {
1999 // Find the matching open via depth walk.
2000 let mut depth = 0i32;
2001 let mut open_idx = None;
2002 for (idx, ev) in self.events.iter().enumerate().rev() {
2003 match &ev.event_type {
2004 EventType::MappingEnd | EventType::SequenceEnd => {
2005 depth += 1;
2006 }
2007 EventType::MappingStart {
2008 flow_style: true, ..
2009 }
2010 | EventType::SequenceStart {
2011 flow_style: true, ..
2012 } => {
2013 depth -= 1;
2014 if depth == 0 {
2015 open_idx = Some(idx);
2016 break;
2017 }
2018 }
2019 _ => {}
2020 }
2021 }
2022 if let Some(oi) = open_idx {
2023 self.state_stack.push(self.state);
2024 self.events.insert(
2025 oi,
2026 Event::mapping_start(
2027 self.events[oi].position,
2028 None,
2029 None,
2030 true,
2031 ),
2032 );
2033 self.state = ParserState::FlowMappingValue;
2034 self.implicit_flow_pair_depth += 1;
2035 self.last_token_type = token_type_for_tracking;
2036 return Ok(());
2037 }
2038 }
2039 // §7.5: `[ : value ]` — leading `:` with no
2040 // preceding scalar implies an empty key for an
2041 // implicit single-pair flow mapping
2042 // (yaml-test-suite CFD4).
2043 self.state_stack.push(self.state);
2044 self.events.push(Event::mapping_start(
2045 token.start_position,
2046 None,
2047 None,
2048 true,
2049 ));
2050 self.events.push(Event::scalar(
2051 token.start_position,
2052 None,
2053 None,
2054 String::new(),
2055 true,
2056 false,
2057 ScalarStyle::Plain,
2058 ));
2059 self.state = ParserState::FlowMappingValue;
2060 self.implicit_flow_pair_depth += 1;
2061 }
2062 _ => {}
2063 }
2064 }
2065
2066 TokenType::FlowEntry => {
2067 // YAML 1.2 §7.4: a `,` must follow an entry. Leading
2068 // `,` (e.g. `[ , a, b ]`) and consecutive `,, ` are
2069 // invalid (yaml-test-suite 9MAG, CTN5).
2070 let no_prior_entry = matches!(
2071 self.events.last().map(|e| &e.event_type),
2072 Some(
2073 EventType::SequenceStart {
2074 flow_style: true,
2075 ..
2076 } | EventType::MappingStart {
2077 flow_style: true,
2078 ..
2079 }
2080 )
2081 );
2082 if no_prior_entry {
2083 return Err(Error::parse(
2084 token.start_position,
2085 "Flow entry separator `,` with no preceding entry",
2086 ));
2087 }
2088 // Consecutive `,` — last_token_type carries the kind of
2089 // the previous token. If it's also FlowEntry, no entry
2090 // came between (e.g. `[a, , b]`, `[a, b, , ]`).
2091 if matches!(self.last_token_type, Some(TokenType::FlowEntry)) {
2092 return Err(Error::parse(
2093 token.start_position,
2094 "Consecutive `,` separators in flow collection",
2095 ));
2096 }
2097 // §7.5: inside a flow mapping, a comma terminates the
2098 // current entry. If the entry is missing its value
2099 // (state FlowMappingValue or odd children), synth an
2100 // implicit empty scalar — consuming any pending
2101 // anchor/tag, which would have been a property of
2102 // the missing value (yaml-test-suite 8KB6, 9BXH,
2103 // FRK4, WZ62).
2104 if matches!(
2105 self.state,
2106 ParserState::FlowMapping
2107 | ParserState::FlowMappingKey
2108 | ParserState::FlowMappingValue
2109 ) && innermost_mapping_has_odd_children(&self.events)
2110 {
2111 self.events.push(Event::scalar(
2112 token.start_position,
2113 self.pending_anchor.take(),
2114 self.pending_tag.take(),
2115 String::new(),
2116 true,
2117 false,
2118 ScalarStyle::Plain,
2119 ));
2120 self.state = ParserState::FlowMapping;
2121 }
2122
2123 // §7.5: same close-on-comma logic for implicit
2124 // single-pair mappings.
2125 if self.implicit_flow_pair_depth > 0
2126 && matches!(
2127 self.state,
2128 ParserState::FlowMapping
2129 | ParserState::FlowMappingKey
2130 | ParserState::FlowMappingValue
2131 )
2132 && matches!(self.state_stack.last(), Some(ParserState::FlowSequence))
2133 {
2134 if innermost_mapping_has_odd_children(&self.events) {
2135 self.events.push(Event::scalar(
2136 token.start_position,
2137 None,
2138 None,
2139 String::new(),
2140 true,
2141 false,
2142 ScalarStyle::Plain,
2143 ));
2144 }
2145 self.events.push(Event::mapping_end(token.start_position));
2146 self.state = self.state_stack.pop().ok_or_else(|| {
2147 Error::parse(
2148 token.start_position,
2149 "internal: parser state stack underflow at implicit flow mapping end (in flow mapping)",
2150 )
2151 })?;
2152 self.implicit_flow_pair_depth -= 1;
2153 }
2154 }
2155
2156 TokenType::Anchor(name) => {
2157 // YAML 1.2 §6.9.2: a node may have at most one anchor.
2158 // A second anchor before the node is consumed is invalid
2159 // (yaml-test-suite 4JVG).
2160 if self.pending_anchor.is_some() {
2161 return Err(Error::parse(
2162 token.start_position,
2163 "Node may not have more than one anchor",
2164 ));
2165 }
2166 // Record the anchor name so subsequent aliases can be
2167 // validated against it (YAML 1.2 §6.9.2 forbids forward
2168 // references).
2169 self.defined_anchors.insert(name.clone());
2170 self.pending_anchor = Some(name.clone());
2171 self.pending_anchor_line = Some(token.start_position.line);
2172 }
2173
2174 TokenType::Alias(name) => {
2175 // YAML 1.2 §6.9.2: alias must reference a previously
2176 // defined anchor — forward references are invalid.
2177 if !self.defined_anchors.contains(name.as_str()) {
2178 return Err(Error::parse(
2179 token.start_position,
2180 format!("Alias `*{name}` references an undefined anchor"),
2181 ));
2182 }
2183 // §6.9.2: an alias is a reference, not an independent
2184 // node — it cannot carry an anchor or tag of its own
2185 // (yaml-test-suite SR86, SU74).
2186 if self.pending_anchor.is_some() || self.pending_tag.is_some() {
2187 return Err(Error::parse(
2188 token.start_position,
2189 "Alias may not have an anchor or tag",
2190 ));
2191 }
2192 if matches!(self.state, ParserState::ImplicitDocumentStart) {
2193 self.events.push(Event::document_start(
2194 token.start_position,
2195 None,
2196 vec![],
2197 true,
2198 ));
2199 self.state = ParserState::DocumentContent;
2200 }
2201
2202 // Generate alias event
2203 self.events
2204 .push(Event::alias(token.start_position, name.clone()));
2205
2206 // Handle state transitions for mapping key/value processing
2207 self.handle_node_completion();
2208 }
2209
2210 TokenType::Tag(tag) => {
2211 // YAML 1.2 §6.9.1 allows at most one tag per node, but
2212 // (like the double-anchor check) detecting that at this
2213 // layer produces false positives — a tag preceding an
2214 // implicit empty node in a sequence is followed by the
2215 // tag of the next sibling node, and the same `pending_tag`
2216 // field is reused. Until the parser tracks per-node tag
2217 // scopes, accept the overwrite silently (yaml-test-suite
2218 // FH7J relies on this).
2219 // Resolve and normalize the tag before storing.
2220 // §6.8: an unresolvable named-handle tag (e.g. `!prefix!X`
2221 // when no `%TAG !prefix!` directive is in scope) is
2222 // invalid (yaml-test-suite QLJ7).
2223 match self.tag_resolver.resolve(&tag) {
2224 Ok(resolved_tag) => {
2225 self.pending_tag = Some(resolved_tag.uri);
2226 self.pending_tag_line = Some(token.start_position.line);
2227 }
2228 Err(e) => {
2229 // Only error on named-handle tags (`!name!suffix`),
2230 // not bare-tag fallback paths.
2231 let is_named_handle = tag.starts_with('!')
2232 && tag[1..].contains('!')
2233 && !tag.starts_with("!!");
2234 if is_named_handle {
2235 return Err(Error::parse(
2236 token.start_position,
2237 format!("Undefined tag handle in `{tag}`: {e}"),
2238 ));
2239 }
2240 self.pending_tag = Some(tag.clone());
2241 self.pending_tag_line = Some(token.start_position.line);
2242 }
2243 }
2244 }
2245
2246 // TODO: Implement these when we add support for advanced features
2247 TokenType::Comment(_) => {
2248 // Not implemented in basic version
2249 }
2250
2251 // Complex key marker
2252 TokenType::Key => {
2253 self.explicit_key_pending = true;
2254 self.last_key_marker_line = Some(token.start_position.line);
2255 self.last_key_marker_column = Some(token.start_position.column);
2256 match self.state {
2257 ParserState::ImplicitDocumentStart => {
2258 // Start implicit document and mapping
2259 let event = self.create_implicit_document_start(token.start_position);
2260 self.events.push(event);
2261 self.events.push(Event::mapping_start(
2262 token.start_position,
2263 self.pending_anchor.take(),
2264 self.pending_tag.take(),
2265 false,
2266 ));
2267 self.state = ParserState::BlockMappingKey;
2268 }
2269 ParserState::DocumentStart => {
2270 // Explicit document start (`---`) followed by a
2271 // complex-key marker — open the document body as
2272 // an implicit block mapping (yaml-test-suite 2XXW).
2273 self.events.push(Event::mapping_start(
2274 token.start_position,
2275 self.pending_anchor.take(),
2276 self.pending_tag.take(),
2277 false,
2278 ));
2279 self.state = ParserState::BlockMappingKey;
2280 }
2281 ParserState::DocumentContent => {
2282 // Check if we just finished a mapping - if so, continue it instead of starting new one
2283 // This happens when the previous mapping key-value pair was processed but no BlockEnd was generated
2284 if !self.events.is_empty() {
2285 if let Some(last_event) = self.events.last() {
2286 // If the last event was a scalar and we have a MappingStart before it,
2287 // we're probably continuing an existing mapping
2288 if matches!(last_event.event_type, EventType::Scalar { .. }) {
2289 // Look for a recent MappingStart without a corresponding MappingEnd
2290 let mut mapping_depth = 0;
2291 let mut has_unfinished_mapping = false;
2292
2293 for event in self.events.iter().rev() {
2294 match &event.event_type {
2295 EventType::MappingEnd => mapping_depth += 1,
2296 EventType::MappingStart { .. } => {
2297 if mapping_depth == 0 {
2298 has_unfinished_mapping = true;
2299 break;
2300 }
2301 mapping_depth -= 1;
2302 }
2303 _ => {}
2304 }
2305 }
2306
2307 if has_unfinished_mapping {
2308 // Continue the existing mapping instead of starting a new one
2309 self.state = ParserState::BlockMappingKey;
2310 return Ok(());
2311 }
2312 }
2313 }
2314 }
2315
2316 // Start new mapping
2317 self.events.push(Event::mapping_start(
2318 token.start_position,
2319 self.pending_anchor.take(),
2320 self.pending_tag.take(),
2321 false,
2322 ));
2323 self.state = ParserState::BlockMappingKey;
2324 }
2325 ParserState::BlockMapping | ParserState::FlowMapping => {
2326 // Already in a mapping, now we have a complex key
2327 self.state = if matches!(self.state, ParserState::BlockMapping) {
2328 ParserState::BlockMappingKey
2329 } else {
2330 ParserState::FlowMappingKey
2331 };
2332 }
2333 ParserState::FlowSequence => {
2334 // §7.5: `[? key : value, ...]` — the `?`
2335 // opens an implicit single-pair flow mapping
2336 // with an explicit complex key (yaml-test-
2337 // suite CT4Q).
2338 self.state_stack.push(self.state);
2339 self.events.push(Event::mapping_start(
2340 token.start_position,
2341 None,
2342 None,
2343 true,
2344 ));
2345 self.state = ParserState::FlowMappingKey;
2346 self.implicit_flow_pair_depth += 1;
2347 }
2348 ParserState::BlockSequence => {
2349 // §8.22: `- ? key : value` — the sequence
2350 // item is itself a block mapping with an
2351 // explicit complex key. Open the wrapping
2352 // mapping before the explicit key marker is
2353 // consumed (yaml-test-suite M2N8/00, V9D5,
2354 // KK5P, PW8X).
2355 self.state_stack.push(self.state);
2356 self.events.push(Event::mapping_start(
2357 token.start_position,
2358 self.pending_anchor.take(),
2359 self.pending_tag.take(),
2360 false,
2361 ));
2362 self.state = ParserState::BlockMappingKey;
2363 }
2364 ParserState::BlockMappingValue => {
2365 // §8.22: \`outer:\\n ? complex\` — the outer
2366 // mapping's value is itself a block mapping
2367 // whose first key is a complex key. Open the
2368 // value mapping and transition to BlockMappingKey
2369 // (yaml-test-suite KK5P).
2370 self.state_stack.push(self.state);
2371 self.events.push(Event::mapping_start(
2372 token.start_position,
2373 self.pending_anchor.take(),
2374 self.pending_tag.take(),
2375 false,
2376 ));
2377 self.state = ParserState::BlockMappingKey;
2378 }
2379 ParserState::BlockMappingKey | ParserState::FlowMappingKey => {
2380 // A new `?` while we still owe a value for the
2381 // previous key — synthesise an implicit empty
2382 // scalar so the mapping stays balanced
2383 // (yaml-test-suite 7W2P).
2384 if innermost_mapping_has_odd_children(&self.events) {
2385 self.events.push(Event::scalar(
2386 token.start_position,
2387 None,
2388 None,
2389 String::new(),
2390 true,
2391 false,
2392 ScalarStyle::Plain,
2393 ));
2394 }
2395 }
2396 _ => {
2397 let context =
2398 ErrorContext::from_input(self.scanner.input(), &self.position, 2)
2399 .with_suggestion(
2400 "Complex keys must be used in mapping contexts".to_string(),
2401 );
2402 return Err(Error::parse_with_context(
2403 self.position,
2404 "Complex key marker (?) in invalid context",
2405 context,
2406 ));
2407 }
2408 }
2409 }
2410 }
2411
2412 // Update the last token type for next iteration
2413 self.last_token_type = token_type_for_tracking;
2414
2415 Ok(())
2416 }
2417
2418 /// Handle completion of a node (scalar or collection) and manage mapping state transitions
2419 #[allow(clippy::missing_const_for_fn)]
2420 fn handle_node_completion(&mut self) {
2421 match self.state {
2422 ParserState::BlockMappingKey => {
2423 // After processing a key, we stay in BlockMappingKey state
2424 // The Value token (:) will transition us to BlockMappingValue
2425 // No state change needed here
2426 }
2427 ParserState::FlowMappingKey => {
2428 // After processing a key in flow mapping, we stay in FlowMappingKey state
2429 // The Value token (:) will transition us to FlowMappingValue
2430 // No state change needed here
2431 }
2432 ParserState::BlockMappingValue => {
2433 // After processing a value, we go back to waiting for the next key
2434 self.state = ParserState::BlockMappingKey;
2435 }
2436 ParserState::FlowMappingValue => {
2437 // After processing a value in flow mapping, we go back to waiting for the next key
2438 self.state = ParserState::FlowMapping;
2439 }
2440 _ => {
2441 // No state change needed for other states
2442 }
2443 }
2444 }
2445}
2446
2447impl Default for BasicParser {
2448 fn default() -> Self {
2449 Self::new(String::new())
2450 }
2451}
2452
2453impl Parser for BasicParser {
2454 fn check_event(&self) -> bool {
2455 // For streaming: check if we have cached events, can generate
2456 // more, or a deferred error is waiting to be surfaced (from
2457 // eager parsing).
2458 self.event_index < self.events.len()
2459 || self.scanner.check_token()
2460 || self.scanning_error.is_some()
2461 }
2462
2463 fn peek_event(&self) -> Result<Option<&Event>> {
2464 // Peek at cached events only (don't generate new ones)
2465 Ok(self.events.get(self.event_index))
2466 }
2467
2468 fn get_event(&mut self) -> Result<Option<Event>> {
2469 // Generate next events until we have one available
2470 // Some tokens (like directives) don't generate events
2471 while self.event_index >= self.events.len() && self.scanner.check_token() {
2472 let events_before = self.events.len();
2473 self.generate_next_event()?;
2474
2475 // If no event was generated and we still have tokens, continue
2476 if self.events.len() == events_before && self.scanner.check_token() {
2477 continue;
2478 }
2479 break;
2480 }
2481
2482 if self.event_index < self.events.len() {
2483 let event = self.events[self.event_index].clone();
2484 self.event_index += 1;
2485 Ok(Some(event))
2486 } else if let Some(error) = self.scanning_error.take() {
2487 // Eager-parse and scanner errors are stored in
2488 // `scanning_error` (see `new_eager`). Surface them through
2489 // the natural iteration path *after* all buffered events
2490 // have been drained, so callers see the partial events
2491 // first and then the error that terminated parsing.
2492 Err(error)
2493 } else {
2494 Ok(None)
2495 }
2496 }
2497
2498 fn reset(&mut self) {
2499 self.event_index = 0;
2500 self.scanner.reset();
2501 self.state_stack.clear();
2502 self.position = Position::start();
2503 self.pending_anchor = None;
2504 self.pending_tag = None;
2505 self.last_token_type = None;
2506 }
2507
2508 fn position(&self) -> Position {
2509 self.position
2510 }
2511}
2512
2513impl BasicParser {
2514 /// Check if there was a scanning error
2515 #[allow(clippy::missing_const_for_fn)]
2516 pub fn take_scanning_error(&mut self) -> Option<Error> {
2517 self.scanning_error.take()
2518 }
2519}
2520
2521#[cfg(test)]
2522mod tests {
2523 use super::*;
2524
2525 #[test]
2526 fn test_basic_parsing() {
2527 let mut parser = BasicParser::new_eager("42".to_string());
2528
2529 assert!(parser.check_event());
2530
2531 // Stream start
2532 let event = parser.get_event().unwrap().unwrap();
2533 assert!(matches!(event.event_type, EventType::StreamStart));
2534
2535 // Document start (implicit)
2536 let event = parser.get_event().unwrap().unwrap();
2537 if let EventType::DocumentStart { implicit, .. } = event.event_type {
2538 assert!(implicit);
2539 } else {
2540 panic!("Expected implicit document start");
2541 }
2542
2543 // Scalar
2544 let event = parser.get_event().unwrap().unwrap();
2545 if let EventType::Scalar { value, .. } = event.event_type {
2546 assert_eq!(value, "42");
2547 } else {
2548 panic!("Expected scalar event");
2549 }
2550
2551 // Document end (implicit)
2552 let event = parser.get_event().unwrap().unwrap();
2553 if let EventType::DocumentEnd { implicit } = event.event_type {
2554 assert!(implicit);
2555 } else {
2556 panic!("Expected implicit document end");
2557 }
2558
2559 // Stream end
2560 let event = parser.get_event().unwrap().unwrap();
2561 assert!(matches!(event.event_type, EventType::StreamEnd));
2562 }
2563
2564 #[test]
2565 fn test_flow_sequence_parsing() {
2566 let mut parser = BasicParser::new_eager("[1, 2, 3]".to_string());
2567
2568 // Stream start
2569 parser.get_event().unwrap();
2570
2571 // Document start (implicit)
2572 parser.get_event().unwrap();
2573
2574 // Sequence start
2575 let event = parser.get_event().unwrap().unwrap();
2576 if let EventType::SequenceStart { flow_style, .. } = event.event_type {
2577 assert!(flow_style);
2578 } else {
2579 panic!("Expected flow sequence start");
2580 }
2581
2582 // First scalar
2583 let event = parser.get_event().unwrap().unwrap();
2584 if let EventType::Scalar { value, .. } = event.event_type {
2585 assert_eq!(value, "1");
2586 } else {
2587 panic!("Expected scalar '1'");
2588 }
2589 }
2590
2591 #[test]
2592 fn test_flow_mapping_parsing() {
2593 let mut parser = BasicParser::new_eager("{'key': 'value'}".to_string());
2594
2595 // Stream start
2596 parser.get_event().unwrap();
2597
2598 // Document start (implicit)
2599 parser.get_event().unwrap();
2600
2601 // Mapping start
2602 let event = parser.get_event().unwrap().unwrap();
2603 if let EventType::MappingStart { flow_style, .. } = event.event_type {
2604 assert!(flow_style);
2605 } else {
2606 panic!("Expected flow mapping start");
2607 }
2608
2609 // Key scalar
2610 let event = parser.get_event().unwrap().unwrap();
2611 if let EventType::Scalar { value, .. } = event.event_type {
2612 assert_eq!(value, "key");
2613 } else {
2614 panic!("Expected scalar 'key'");
2615 }
2616 }
2617}