rust_yaml/parser/mod.rs
1//! YAML parser for converting tokens to events
2
3use crate::{
4 BasicScanner, Error, Limits, Position, Result, Scanner, Token, TokenType, error::ErrorContext,
5 tag::TagResolver,
6};
7
8pub mod events;
9pub mod streaming;
10// pub mod optimizations; // Temporarily disabled
11pub use events::*;
12pub use streaming::*;
13// pub use optimizations::*;
14
15/// Trait for YAML parsers that convert token streams to events
16pub trait Parser {
17 /// Check if there are more events available
18 fn check_event(&self) -> bool;
19
20 /// Peek at the next event without consuming it
21 fn peek_event(&self) -> Result<Option<&Event>>;
22
23 /// Get the next event, consuming it
24 fn get_event(&mut self) -> Result<Option<Event>>;
25
26 /// Reset the parser state
27 fn reset(&mut self);
28
29 /// Get the current position in the input
30 fn position(&self) -> Position;
31}
32
33/// Walks back through `events` looking for an unclosed `DocumentStart`
34/// (i.e. one without a matching `DocumentEnd` after it). Returns true if
35/// the parser is still inside a document.
36fn has_open_document(events: &[Event]) -> bool {
37 for ev in events.iter().rev() {
38 match &ev.event_type {
39 EventType::DocumentEnd { .. } => return false,
40 EventType::DocumentStart { .. } => return true,
41 _ => {}
42 }
43 }
44 false
45}
46
47/// Emit `MappingEnd` / `SequenceEnd` events to close any unbalanced
48/// collection starts in `events`. Called before emitting a `DocumentEnd`
49/// when an outer construct (e.g. a new `---` marker) forces the previous
50/// document closed without going through the usual indent-driven
51/// `BlockEnd` token path. Also synthesises implicit empty scalars for
52/// mappings that have an odd child-event count (i.e. a key without a
53/// value before the close).
54/// Return true when there is at least one *flow* collection still open
55/// (a `SequenceStart` / `MappingStart` with `flow_style=true` without a
56/// matching `…End` afterwards). Used at end-of-stream to enforce §7.4.
57fn has_unclosed_flow_collection(events: &[Event]) -> bool {
58 let mut depth: i32 = 0;
59 for ev in events.iter() {
60 match &ev.event_type {
61 EventType::SequenceStart {
62 flow_style: true, ..
63 }
64 | EventType::MappingStart {
65 flow_style: true, ..
66 } => depth += 1,
67 EventType::SequenceEnd | EventType::MappingEnd if depth > 0 => {
68 depth -= 1;
69 }
70 _ => {}
71 }
72 }
73 depth > 0
74}
75
76/// Walk `events` to detect a document that already contains a closed
77/// root-level node. Returns true when the second root node arrives and
78/// the existing event stack has no unmatched MapStart/SeqStart.
79fn second_root_node_present(events: &[Event]) -> bool {
80 let mut after_doc_start = false;
81 let mut has_root_node = false;
82 let mut depth = 0i32;
83 for e in events.iter() {
84 match &e.event_type {
85 EventType::DocumentStart { .. } => {
86 after_doc_start = true;
87 has_root_node = false;
88 depth = 0;
89 }
90 EventType::DocumentEnd { .. } => after_doc_start = false,
91 EventType::MappingStart { .. } | EventType::SequenceStart { .. } => depth += 1,
92 EventType::MappingEnd | EventType::SequenceEnd => {
93 depth -= 1;
94 if depth == 0 {
95 has_root_node = true;
96 }
97 }
98 EventType::Scalar { .. } | EventType::Alias { .. } if depth == 0 => {
99 has_root_node = true;
100 }
101 _ => {}
102 }
103 }
104 after_doc_start && has_root_node && depth == 0
105}
106
107/// Return true when the innermost still-open mapping has an odd number
108/// of children — i.e. a key has been emitted but its value has not.
109/// Used to decide when to synthesise an implicit empty scalar.
110fn innermost_mapping_has_odd_children(events: &[Event]) -> bool {
111 let mut stack: Vec<(&'static str, usize)> = Vec::new();
112 for ev in events.iter() {
113 match &ev.event_type {
114 EventType::DocumentStart { .. } | EventType::DocumentEnd { .. } => {
115 stack.clear();
116 }
117 EventType::MappingStart { .. } => stack.push(("map", 0)),
118 EventType::SequenceStart { .. } => stack.push(("seq", 0)),
119 EventType::MappingEnd | EventType::SequenceEnd => {
120 stack.pop();
121 if let Some(parent) = stack.last_mut() {
122 parent.1 += 1;
123 }
124 }
125 EventType::Scalar { .. } | EventType::Alias { .. } => {
126 if let Some(parent) = stack.last_mut() {
127 parent.1 += 1;
128 }
129 }
130 _ => {}
131 }
132 }
133 matches!(stack.last(), Some(("map", n)) if n % 2 == 1)
134}
135
136fn close_open_collections(events: &mut Vec<Event>, pos: Position) {
137 // Each entry: (kind, children_at_this_depth) where `kind` is "map"
138 // or "seq". `children` counts top-level node events emitted inside
139 // this collection (Scalar, Alias, or a closed nested collection).
140 let mut stack: Vec<(&'static str, usize)> = Vec::new();
141 for ev in events.iter() {
142 match &ev.event_type {
143 EventType::DocumentStart { .. } | EventType::DocumentEnd { .. } => {
144 stack.clear();
145 }
146 EventType::MappingStart { .. } => stack.push(("map", 0)),
147 EventType::SequenceStart { .. } => stack.push(("seq", 0)),
148 EventType::MappingEnd | EventType::SequenceEnd => {
149 stack.pop();
150 if let Some(parent) = stack.last_mut() {
151 parent.1 += 1;
152 }
153 }
154 EventType::Scalar { .. } | EventType::Alias { .. } => {
155 if let Some(parent) = stack.last_mut() {
156 parent.1 += 1;
157 }
158 }
159 _ => {}
160 }
161 }
162 while let Some((kind, children)) = stack.pop() {
163 if kind == "map" && children % 2 == 1 {
164 // Odd child count → last key has no value yet. Spec says
165 // emit implicit empty scalar (YAML 1.2 §6.9.1).
166 events.push(Event::scalar(
167 pos,
168 None,
169 None,
170 String::new(),
171 true,
172 false,
173 ScalarStyle::Plain,
174 ));
175 }
176 match kind {
177 "map" => events.push(Event::mapping_end(pos)),
178 "seq" => events.push(Event::sequence_end(pos)),
179 _ => {}
180 }
181 }
182}
183
184/// Basic parser implementation that converts tokens to events
185#[derive(Debug)]
186pub struct BasicParser {
187 scanner: BasicScanner,
188 events: Vec<Event>,
189 event_index: usize,
190 state: ParserState,
191 state_stack: Vec<ParserState>,
192 position: Position,
193 pending_anchor: Option<String>,
194 /// Line where `pending_anchor` was set. Used to distinguish a
195 /// "freestanding" anchor (alone on its own line — belongs to the
196 /// upcoming collection) from an "inline" anchor (same line as the
197 /// next key — belongs to that key). yaml-test-suite 6BFJ, 9KAX.
198 pending_anchor_line: Option<usize>,
199 /// Line of the most recent `?` Key marker. Used to detect when
200 /// an explicit-key construct has an inline single-pair mapping as
201 /// its key (yaml-test-suite M2N8/00, M2N8/01, V9D5).
202 last_key_marker_line: Option<usize>,
203 /// Column of the most recent `?` Key marker. Used in V9D5: when
204 /// a `:` arrives at the same column as the most recent `?` on
205 /// a later line, it's the explicit value separator — close any
206 /// inline-wrapped inner mapping first.
207 last_key_marker_column: Option<usize>,
208 /// Set when an explicit value separator just closed an inline-
209 /// wrapped key. The next `<scalar>:<scalar>` on the same line
210 /// should also be wrapped in an inner mapping (V9D5's value side).
211 just_closed_inline_wrap: bool,
212 /// Column of an open inline-wrap mapping (V9D5). Used to detect
213 /// the matching explicit-value separator and close it.
214 inline_wrap_column: Option<usize>,
215 pending_tag: Option<String>,
216 /// Same idea as `pending_anchor_line` but for tags. Used to detect
217 /// a freestanding tag in block-sequence context that should be
218 /// flushed as the previous item's empty value rather than carried
219 /// onto the next item (yaml-test-suite FH7J).
220 pending_tag_line: Option<usize>,
221 last_token_type: Option<TokenType>,
222 scanning_error: Option<Error>,
223 yaml_version: Option<(u8, u8)>,
224 tag_directives: Vec<(String, String)>,
225 tag_resolver: TagResolver,
226 /// Anchor names that have been defined so far in the stream. Used to
227 /// validate that aliases (`*name`) reference a known anchor (YAML 1.2
228 /// §6.9.2). Forward references are forbidden, and we never reset this
229 /// set — once defined, an anchor remains referenceable for the rest of
230 /// the parse, matching common loader semantics.
231 defined_anchors: std::collections::HashSet<String>,
232 /// Line of the most recent `:` Value token. Used by the
233 /// BlockMappingValue heuristic to tell apart "same-line value
234 /// scalar" (6M2F) from "next-line sibling key" (6KGN).
235 last_value_token_line: Option<usize>,
236 /// True while we're holding an explicit `?` key that has not yet
237 /// received its `:`. Used at end-of-stream to distinguish a
238 /// spec-legal `? key` with implicit empty value from a missing-`:`
239 /// bare scalar (yaml-test-suite 7MNF).
240 explicit_key_pending: bool,
241 /// Counts implicit single-pair flow mappings still open. A `,` or
242 /// `]` while this is > 0 closes the innermost implicit mapping
243 /// before continuing the outer flow sequence (§7.5).
244 implicit_flow_pair_depth: usize,
245}
246
247/// Parser state for tracking context
248#[derive(Debug, Clone, Copy, PartialEq)]
249#[allow(dead_code)]
250enum ParserState {
251 StreamStart,
252 StreamEnd,
253 ImplicitDocumentStart,
254 DocumentStart,
255 DocumentContent,
256 DocumentEnd,
257 BlockNode,
258 BlockMapping,
259 BlockMappingKey,
260 BlockMappingValue,
261 BlockSequence,
262 FlowMapping,
263 FlowMappingKey,
264 FlowMappingValue,
265 FlowSequence,
266 BlockEnd,
267}
268
269impl BasicParser {
270 /// Create a new streaming parser (lazy parsing)
271 pub fn new(input: String) -> Self {
272 Self::with_limits(input, Limits::default())
273 }
274
275 /// Create a new streaming parser with custom limits
276 pub fn with_limits(input: String, limits: Limits) -> Self {
277 let scanner = BasicScanner::with_limits(input, limits);
278 let position = scanner.position();
279
280 Self {
281 scanner,
282 events: Vec::new(),
283 event_index: 0,
284 state: ParserState::StreamStart,
285 state_stack: Vec::new(),
286 position,
287 pending_anchor: None,
288 pending_anchor_line: None,
289 last_key_marker_line: None,
290 last_key_marker_column: None,
291 just_closed_inline_wrap: false,
292 inline_wrap_column: None,
293 pending_tag: None,
294 pending_tag_line: None,
295 last_token_type: None,
296 scanning_error: None,
297 yaml_version: None,
298 tag_directives: Vec::new(),
299 tag_resolver: TagResolver::new(),
300 defined_anchors: std::collections::HashSet::new(),
301 last_value_token_line: None,
302 explicit_key_pending: false,
303 implicit_flow_pair_depth: 0,
304 }
305 }
306
307 /// Create a new parser with eager parsing (for compatibility)
308 pub fn new_eager(input: String) -> Self {
309 Self::new_eager_with_limits(input, Limits::default())
310 }
311
312 /// Create a new parser with eager parsing and custom limits
313 pub fn new_eager_with_limits(input: String, limits: Limits) -> Self {
314 let mut scanner = BasicScanner::new_eager_with_limits(input, limits);
315 let position = scanner.position();
316
317 // Check if there were any scanning errors and store them
318 let scanning_error = scanner.take_scanning_error();
319
320 let mut parser = Self {
321 scanner,
322 events: Vec::new(),
323 event_index: 0,
324 state: ParserState::StreamStart,
325 state_stack: Vec::new(),
326 position,
327 pending_anchor: None,
328 pending_anchor_line: None,
329 last_key_marker_line: None,
330 last_key_marker_column: None,
331 just_closed_inline_wrap: false,
332 inline_wrap_column: None,
333 pending_tag: None,
334 pending_tag_line: None,
335 last_token_type: None,
336 scanning_error: None,
337 yaml_version: None,
338 tag_directives: Vec::new(),
339 tag_resolver: TagResolver::new(),
340 defined_anchors: std::collections::HashSet::new(),
341 last_value_token_line: None,
342 explicit_key_pending: false,
343 implicit_flow_pair_depth: 0,
344 };
345
346 // If there was a scanning error, store it for later propagation.
347 // Likewise, surface eager-parse errors via the same field so
348 // `take_scanning_error` reports them.
349 if let Some(error) = scanning_error {
350 parser.scanning_error = Some(error);
351 } else if let Err(error) = parser.parse_all() {
352 parser.scanning_error = Some(error);
353 }
354
355 parser
356 }
357
358 /// Create parser from existing scanner
359 pub fn from_scanner(scanner: BasicScanner) -> Self {
360 let position = scanner.position();
361
362 let mut parser = Self {
363 scanner,
364 events: Vec::new(),
365 event_index: 0,
366 state: ParserState::StreamStart,
367 state_stack: Vec::new(),
368 position,
369 pending_anchor: None,
370 pending_anchor_line: None,
371 last_key_marker_line: None,
372 last_key_marker_column: None,
373 just_closed_inline_wrap: false,
374 inline_wrap_column: None,
375 pending_tag: None,
376 pending_tag_line: None,
377 last_token_type: None,
378 scanning_error: None,
379 yaml_version: None,
380 tag_directives: Vec::new(),
381 tag_resolver: TagResolver::new(),
382 defined_anchors: std::collections::HashSet::new(),
383 last_value_token_line: None,
384 explicit_key_pending: false,
385 implicit_flow_pair_depth: 0,
386 };
387
388 parser.parse_all().unwrap_or(());
389 parser
390 }
391
392 /// Parse all tokens into events
393 fn parse_all(&mut self) -> Result<()> {
394 while self.scanner.check_token() {
395 let token = match self.scanner.get_token()? {
396 Some(token) => token,
397 None => break,
398 };
399
400 self.position = token.end_position;
401 self.process_token(token)?;
402 }
403
404 // Check for unclosed structures
405 self.validate_final_state()?;
406
407 // Ensure stream end
408 if !self
409 .events
410 .iter()
411 .any(|e| matches!(e.event_type, EventType::StreamEnd))
412 {
413 self.events.push(Event::stream_end(self.position));
414 }
415
416 Ok(())
417 }
418
419 /// YAML 1.2 §6.8: directives may appear only before the first
420 /// document (`StreamStart` / `ImplicitDocumentStart`) or after an
421 /// explicit `...` (`DocumentEnd`). Anywhere else they're invalid.
422 fn check_directive_context(&self, pos: Position, name: &str) -> Result<()> {
423 if matches!(
424 self.state,
425 ParserState::StreamStart
426 | ParserState::ImplicitDocumentStart
427 | ParserState::DocumentEnd
428 ) {
429 Ok(())
430 } else {
431 Err(Error::parse(
432 pos,
433 format!("{name} directive is only allowed before a document or after `...`"),
434 ))
435 }
436 }
437
438 /// Create implicit document start event with directives
439 fn create_implicit_document_start(&mut self, position: Position) -> Event {
440 let event = Event::document_start(
441 position,
442 self.yaml_version.take(),
443 self.tag_directives.clone(),
444 true,
445 );
446 self.tag_directives.clear();
447 event
448 }
449
450 /// Validate that the parser is in a valid final state
451 fn validate_final_state(&self) -> Result<()> {
452 match self.state {
453 ParserState::StreamEnd | ParserState::DocumentEnd | ParserState::DocumentContent => {
454 // These are valid final states
455 Ok(())
456 }
457 ParserState::BlockSequence | ParserState::FlowSequence => {
458 let context = ErrorContext::from_input(self.scanner.input(), &self.position, 2)
459 .with_suggestion(
460 "Close the sequence with proper indentation or closing bracket".to_string(),
461 );
462 Err(Error::unclosed_delimiter_with_context(
463 self.position,
464 self.position,
465 "sequence",
466 context,
467 ))
468 }
469 ParserState::BlockMapping | ParserState::FlowMapping => {
470 let context = ErrorContext::from_input(self.scanner.input(), &self.position, 2)
471 .with_suggestion(
472 "Close the mapping with proper indentation or closing brace".to_string(),
473 );
474 Err(Error::unclosed_delimiter_with_context(
475 self.position,
476 self.position,
477 "mapping",
478 context,
479 ))
480 }
481 _ => {
482 let context = ErrorContext::from_input(self.scanner.input(), &self.position, 2)
483 .with_suggestion("Complete the YAML document structure".to_string());
484 Err(Error::parse_with_context(
485 self.position,
486 format!("Document ended in unexpected state: {:?}", self.state),
487 context,
488 ))
489 }
490 }
491 }
492
493 /// Generate the next event by processing the next token
494 fn generate_next_event(&mut self) -> Result<()> {
495 if let Some(token) = self.scanner.get_token()? {
496 self.position = token.end_position;
497 self.process_token(token)?;
498 }
499 Ok(())
500 }
501
502 /// Process a single token and generate appropriate events
503 #[allow(clippy::cognitive_complexity)]
504 fn process_token(&mut self, token: Token) -> Result<()> {
505 // Store the token type for later use without cloning
506 let token_type_for_tracking = match &token.token_type {
507 TokenType::Scalar(..) => Some(TokenType::Scalar(
508 String::new(),
509 crate::scanner::QuoteStyle::Plain,
510 )),
511 TokenType::BlockScalarLiteral(..) => Some(TokenType::BlockScalarLiteral(String::new())),
512 TokenType::BlockScalarFolded(..) => Some(TokenType::BlockScalarFolded(String::new())),
513 TokenType::Alias(..) => Some(TokenType::Alias(String::new())),
514 TokenType::Anchor(..) => Some(TokenType::Anchor(String::new())),
515 TokenType::Tag(..) => Some(TokenType::Tag(String::new())),
516 TokenType::Comment(..) => Some(TokenType::Comment(String::new())),
517 other => {
518 // For simple token types without data, we can safely clone
519 match other {
520 TokenType::StreamStart => Some(TokenType::StreamStart),
521 TokenType::StreamEnd => Some(TokenType::StreamEnd),
522 TokenType::DocumentStart => Some(TokenType::DocumentStart),
523 TokenType::DocumentEnd => Some(TokenType::DocumentEnd),
524 TokenType::BlockSequenceStart => Some(TokenType::BlockSequenceStart),
525 TokenType::BlockMappingStart => Some(TokenType::BlockMappingStart),
526 TokenType::BlockEnd => Some(TokenType::BlockEnd),
527 TokenType::FlowSequenceStart => Some(TokenType::FlowSequenceStart),
528 TokenType::FlowSequenceEnd => Some(TokenType::FlowSequenceEnd),
529 TokenType::FlowMappingStart => Some(TokenType::FlowMappingStart),
530 TokenType::FlowMappingEnd => Some(TokenType::FlowMappingEnd),
531 TokenType::BlockEntry => Some(TokenType::BlockEntry),
532 TokenType::FlowEntry => Some(TokenType::FlowEntry),
533 TokenType::Key => Some(TokenType::Key),
534 TokenType::Value => Some(TokenType::Value),
535 TokenType::YamlDirective(_, _) => Some(TokenType::YamlDirective(0, 0)),
536 TokenType::TagDirective(_, _) => {
537 Some(TokenType::TagDirective(String::new(), String::new()))
538 }
539 _ => None,
540 }
541 }
542 };
543
544 match &token.token_type {
545 TokenType::StreamStart => {
546 self.events.push(Event::stream_start(token.start_position));
547 self.state = ParserState::ImplicitDocumentStart;
548 }
549
550 TokenType::StreamEnd => {
551 // YAML 1.2 §6.8: a directive must be followed by a
552 // document body. If we reach end-of-stream with pending
553 // `%YAML` / `%TAG` directives and no document was ever
554 // opened, that's a parse error (yaml-test-suite 9MMA, B63P).
555 if matches!(
556 self.state,
557 ParserState::ImplicitDocumentStart | ParserState::StreamStart
558 ) && (self.yaml_version.is_some() || !self.tag_directives.is_empty())
559 {
560 return Err(Error::parse(
561 token.start_position,
562 "Directive without a document body",
563 ));
564 }
565 // YAML 1.2 §7.4: every `[` / `{` must be closed before
566 // end-of-stream. Walk the events; an unmatched
567 // FlowSequenceStart / FlowMappingStart is invalid
568 // (yaml-test-suite 6JTT, 9HCY, 9MQT/01).
569 if has_unclosed_flow_collection(&self.events) {
570 return Err(Error::parse(
571 token.start_position,
572 "Unclosed flow collection at end of stream",
573 ));
574 }
575 // YAML 1.2 §8.1.3.1: an implicit mapping key must be
576 // followed by `:`. A bare scalar at a mapping position
577 // with no \`:\` (and no explicit `?` marker) is invalid
578 // (yaml-test-suite 7MNF).
579 if matches!(self.state, ParserState::BlockMappingKey)
580 && !self.explicit_key_pending
581 && innermost_mapping_has_odd_children(&self.events)
582 {
583 // §8.22 carve-out: when the unmatched "key" is
584 // actually a collection node (the inline-wrapped
585 // explicit-key from yaml-test-suite M2N8 cluster),
586 // synth an empty value rather than erroring — the
587 // explicit-key construct allows omitted values.
588 let key_was_collection = matches!(
589 self.events.last().map(|e| &e.event_type),
590 Some(EventType::MappingEnd | EventType::SequenceEnd)
591 );
592 if key_was_collection {
593 self.events.push(Event::scalar(
594 token.start_position,
595 None,
596 None,
597 String::new(),
598 true,
599 false,
600 ScalarStyle::Plain,
601 ));
602 } else {
603 return Err(Error::parse(
604 token.start_position,
605 "Mapping key not followed by `:`",
606 ));
607 }
608 }
609 // YAML 1.2: an explicit `---` with NO body needs an
610 // implicit empty scalar as the doc's content (yaml-test-
611 // suite MUS6/02). We detect that case by checking the
612 // last emitted event — if it's still `DocumentStart`,
613 // nothing has been pushed to the body yet.
614 if matches!(
615 self.events.last().map(|e| &e.event_type),
616 Some(EventType::DocumentStart { .. })
617 ) {
618 self.events.push(Event::scalar(
619 token.start_position,
620 None,
621 None,
622 String::new(),
623 true,
624 false,
625 ScalarStyle::Plain,
626 ));
627 }
628 // §6.9: a stand-alone anchor or tag at end-of-stream
629 // produces a document with a tagged/anchored empty
630 // scalar (yaml-test-suite UKK6/02 — a bare \`!\`).
631 if matches!(self.state, ParserState::ImplicitDocumentStart)
632 && (self.pending_anchor.is_some() || self.pending_tag.is_some())
633 {
634 let event = self.create_implicit_document_start(token.start_position);
635 self.events.push(event);
636 self.events.push(Event::scalar(
637 token.start_position,
638 self.pending_anchor.take(),
639 self.pending_tag.take(),
640 String::new(),
641 true,
642 false,
643 ScalarStyle::Plain,
644 ));
645 self.state = ParserState::DocumentContent;
646 }
647 // Close any open document. A document is "open" in every
648 // state except: not-yet-started (StreamStart /
649 // ImplicitDocumentStart), or already closed (DocumentEnd /
650 // StreamEnd). If still open, also flush any unclosed
651 // block collections first.
652 if !matches!(
653 self.state,
654 ParserState::StreamStart
655 | ParserState::ImplicitDocumentStart
656 | ParserState::DocumentEnd
657 | ParserState::StreamEnd
658 ) {
659 close_open_collections(&mut self.events, token.start_position);
660 self.events
661 .push(Event::document_end(token.start_position, true));
662 }
663 self.events.push(Event::stream_end(token.start_position));
664 self.state = ParserState::StreamEnd;
665 }
666
667 TokenType::YamlDirective(major, minor) => {
668 // YAML 1.2 §6.8: directives may appear only before the
669 // first document or after an explicit `...` document end.
670 self.check_directive_context(token.start_position, "%YAML")?;
671 // §6.8.1: a document may have at most one `%YAML` directive.
672 if self.yaml_version.is_some() {
673 return Err(Error::parse(
674 token.start_position,
675 "Multiple %YAML directives in the same document",
676 ));
677 }
678 self.yaml_version = Some((*major, *minor));
679 }
680
681 TokenType::TagDirective(handle, prefix) => {
682 self.check_directive_context(token.start_position, "%TAG")?;
683 self.tag_directives.push((handle.clone(), prefix.clone()));
684 self.tag_resolver
685 .add_directive(handle.clone(), prefix.clone());
686 }
687
688 TokenType::DocumentStart => {
689 // If the most-recent event is still `DocumentStart`, the
690 // previous document had no body — emit an implicit empty
691 // scalar before closing it (yaml-test-suite 6XDY).
692 if matches!(
693 self.events.last().map(|e| &e.event_type),
694 Some(EventType::DocumentStart { .. })
695 ) {
696 self.events.push(Event::scalar(
697 token.start_position,
698 None,
699 None,
700 String::new(),
701 true,
702 false,
703 ScalarStyle::Plain,
704 ));
705 self.events
706 .push(Event::document_end(token.start_position, true));
707 // §6.8: \`%TAG\` and \`%YAML\` are scoped to one document.
708 // After the implicit close, reset the tag resolver
709 // so directives from the prior doc don't leak
710 // (yaml-test-suite QLJ7).
711 self.tag_resolver = TagResolver::new();
712 } else if has_open_document(&self.events) {
713 // The previous document is still open — its outer
714 // collection(s) and the document itself need closing
715 // before the new `---` (yaml-test-suite 35KP).
716 close_open_collections(&mut self.events, token.start_position);
717 self.events
718 .push(Event::document_end(token.start_position, true));
719 self.tag_resolver = TagResolver::new();
720 }
721
722 // Create document start with directives
723 self.events.push(Event::document_start(
724 token.start_position,
725 self.yaml_version.take(),
726 self.tag_directives.clone(),
727 false,
728 ));
729
730 // Clear tag directives after using them (YAML version persists across documents)
731 // But keep them in the tag resolver for this document
732 self.tag_directives.clear();
733
734 self.state = ParserState::DocumentStart;
735 }
736
737 TokenType::DocumentEnd => {
738 // §6.8: `...` only terminates an *open* document. If
739 // the stream so far has no DocumentStart (e.g. the
740 // input is just `...\n`, yaml-test-suite HWV9), the
741 // marker is a no-op.
742 if !has_open_document(&self.events) {
743 self.state = ParserState::ImplicitDocumentStart;
744 self.last_token_type = token_type_for_tracking;
745 return Ok(());
746 }
747 // Same empty-doc fixup as in DocumentStart/StreamEnd:
748 // `---\n...` needs an implicit empty scalar.
749 if matches!(
750 self.events.last().map(|e| &e.event_type),
751 Some(EventType::DocumentStart { .. })
752 ) {
753 self.events.push(Event::scalar(
754 token.start_position,
755 None,
756 None,
757 String::new(),
758 true,
759 false,
760 ScalarStyle::Plain,
761 ));
762 } else {
763 // Flush any still-open block collections so the
764 // event stream is balanced before -DOC.
765 close_open_collections(&mut self.events, token.start_position);
766 }
767 self.events
768 .push(Event::document_end(token.start_position, false));
769 // YAML 1.2: after `...`, the stream may continue with
770 // either another `---`, more directives, or implicit
771 // document content.
772 self.state = ParserState::ImplicitDocumentStart;
773 }
774
775 TokenType::BlockSequenceStart => {
776 // §3.2.1.1: reject a second root-level node
777 // (yaml-test-suite BD7L: `- a\n- b\ninvalid: x`).
778 if matches!(self.state, ParserState::DocumentContent)
779 && second_root_node_present(&self.events)
780 {
781 return Err(Error::parse(
782 token.start_position,
783 "Document already contains a root node",
784 ));
785 }
786 if matches!(self.state, ParserState::ImplicitDocumentStart) {
787 let event = self.create_implicit_document_start(token.start_position);
788 self.events.push(event);
789 }
790
791 // If we're starting a sequence within a mapping or
792 // outer-sequence context, push the current state so the
793 // outer collection can be restored on close. Without
794 // BlockSequence in this list, a nested `- -` sequence's
795 // inner close falls through to DocumentContent and the
796 // next BlockEntry spuriously opens a fresh sequence
797 // (yaml-test-suite 3ALJ, 57H4).
798 if matches!(
799 self.state,
800 ParserState::BlockMappingValue
801 | ParserState::BlockMappingKey
802 | ParserState::BlockSequence
803 ) {
804 self.state_stack.push(self.state);
805 }
806
807 self.events.push(Event::sequence_start(
808 token.start_position,
809 self.pending_anchor.take(),
810 self.pending_tag.take(),
811 false,
812 ));
813 self.state = ParserState::BlockSequence;
814 }
815
816 TokenType::BlockMappingStart => {
817 // §3.2.1.1: reject a second root-level node
818 // (yaml-test-suite BD7L variants).
819 if matches!(self.state, ParserState::DocumentContent)
820 && second_root_node_present(&self.events)
821 {
822 return Err(Error::parse(
823 token.start_position,
824 "Document already contains a root node",
825 ));
826 }
827 // §9.1.1: an anchor on the \`---\` doc-start line cannot
828 // be followed by an implicit single-pair mapping —
829 // the anchor would have nowhere to attach (it's not
830 // the mapping itself, not the key). \`--- &anchor a: b\`
831 // is invalid (yaml-test-suite CXX2).
832 if matches!(self.state, ParserState::DocumentStart)
833 && self.pending_anchor.is_some()
834 && self.pending_anchor_line == Some(token.start_position.line)
835 {
836 return Err(Error::parse(
837 token.start_position,
838 "Anchor on `---` doc-start line cannot precede an implicit mapping",
839 ));
840 }
841 // Determine whether to create a new mapping or continue existing one
842 // This token is generated when we encounter a key at the start of a line with nested content
843 // It doesn't always mean we need to create a new mapping - sometimes we're just continuing
844
845 let should_create_new_mapping = match self.state {
846 ParserState::ImplicitDocumentStart => {
847 // At document start, we need a new mapping
848 true
849 }
850 ParserState::DocumentStart => {
851 // After explicit document start (---), we need a new mapping
852 true
853 }
854 ParserState::DocumentContent => {
855 // This is a tricky case - we could be:
856 // 1. Starting a new root mapping
857 // 2. Continuing an existing root mapping
858 // The key is to check if we have an unclosed root mapping
859
860 // Count mapping depth from the end
861 let mut mapping_depth = 0;
862 let mut has_unclosed_mapping = false;
863
864 for event in self.events.iter().rev() {
865 match &event.event_type {
866 EventType::MappingEnd => mapping_depth += 1,
867 EventType::MappingStart { .. } => {
868 if mapping_depth == 0 {
869 has_unclosed_mapping = true;
870 break;
871 }
872 mapping_depth -= 1;
873 }
874 EventType::DocumentStart { .. } => break,
875 _ => {}
876 }
877 }
878
879 // Don't create a new mapping if we have an unclosed one
880 !has_unclosed_mapping
881 }
882 ParserState::BlockMappingValue => {
883 // If we're expecting a value and see BlockMappingStart, it's a nested mapping
884 true
885 }
886 ParserState::BlockMappingKey => {
887 // We're already in a mapping key context
888 // BlockMappingStart here means we're continuing the mapping unless:
889 // - After a Key token (complex key)
890 // - After a Value token (nested mapping as value)
891 matches!(
892 &self.last_token_type,
893 Some(TokenType::Key | TokenType::Value)
894 )
895 }
896 ParserState::BlockSequence => {
897 // In a sequence context, BlockMappingStart means we're starting
898 // a nested mapping as a sequence item
899 true
900 }
901 _ => {
902 // For other states, check last token
903 matches!(
904 &self.last_token_type,
905 Some(TokenType::Key | TokenType::Value)
906 )
907 }
908 };
909
910 if should_create_new_mapping {
911 // Create a new nested mapping
912 if matches!(self.state, ParserState::ImplicitDocumentStart) {
913 let event = self.create_implicit_document_start(token.start_position);
914 self.events.push(event);
915 }
916
917 // If we're in a mapping value or sequence context, push state to stack
918 if matches!(
919 self.state,
920 ParserState::BlockMappingValue | ParserState::BlockSequence
921 ) {
922 self.state_stack.push(self.state);
923 }
924
925 // If the BlockMappingStart wraps an implicit key
926 // at the document root and the next token is the
927 // key scalar on the SAME line as the pending
928 // anchor/tag, those properties belong to the key —
929 // not to the surrounding mapping (yaml-test-suite
930 // ZH7C, E76Z, 74H7). For mappings nested in a value
931 // or sequence position, or when the anchor is
932 // "freestanding" on a previous line, the anchor
933 // attaches to the mapping itself (yaml-test-suite
934 // 6BFJ, 9KAX).
935 let in_value_position = matches!(
936 self.state,
937 ParserState::BlockMappingValue | ParserState::BlockSequence
938 );
939 let next_token_line = self
940 .scanner
941 .peek_token()
942 .ok()
943 .and_then(|t| t.map(|tt| tt.start_position.line));
944 let next_is_scalar = matches!(
945 self.scanner.peek_token(),
946 Ok(Some(t)) if matches!(
947 t.token_type,
948 TokenType::Scalar(..) | TokenType::Anchor(_) | TokenType::Tag(_)
949 )
950 );
951 let anchor_same_line_as_key = matches!(
952 (self.pending_anchor_line, next_token_line),
953 (Some(a), Some(k)) if a == k
954 );
955 let (anchor, tag) =
956 if !in_value_position && next_is_scalar && anchor_same_line_as_key {
957 (None, None)
958 } else {
959 self.pending_anchor_line = None;
960 (self.pending_anchor.take(), self.pending_tag.take())
961 };
962 self.events.push(Event::mapping_start(
963 token.start_position,
964 anchor,
965 tag,
966 false,
967 ));
968 self.state = ParserState::BlockMappingKey;
969 } else {
970 // Continue existing mapping
971 // Ensure we're in the right state to handle the next key-value pair
972 match self.state {
973 ParserState::DocumentContent => {
974 // We should be continuing a mapping, so transition to BlockMappingKey
975 self.state = ParserState::BlockMappingKey;
976 }
977 ParserState::BlockMappingValue => {
978 // We just processed a value, now ready for next key
979 self.state = ParserState::BlockMappingKey;
980 }
981 ParserState::BlockMappingKey => {
982 // Already ready for next key, no state change needed
983 }
984 _ => {
985 // For other states, check if we can restore from state stack
986 if let Some(prev_state) = self.state_stack.last() {
987 if matches!(prev_state, ParserState::BlockMappingValue) {
988 if let Some(mapping_state) = self.state_stack.pop() {
989 self.state = mapping_state;
990 self.handle_node_completion();
991 }
992 }
993 }
994 }
995 }
996 }
997 }
998
999 TokenType::FlowSequenceStart => {
1000 if matches!(self.state, ParserState::ImplicitDocumentStart) {
1001 self.events.push(Event::document_start(
1002 token.start_position,
1003 None,
1004 vec![],
1005 true,
1006 ));
1007 }
1008
1009 // Save the enclosing state so we can restore it
1010 // after the flow collection closes. The save list
1011 // includes any state that can legitimately contain a
1012 // flow node — block contexts AND flow-mapping
1013 // key/value positions (yaml-test-suite SBG9
1014 // \`{a: [b,c], [d,e]: f}\` — without FlowMappingValue
1015 // in this list, state_stack pop'd None and we fell
1016 // through to DocumentContent).
1017 if matches!(
1018 self.state,
1019 ParserState::BlockMappingValue
1020 | ParserState::BlockMappingKey
1021 | ParserState::BlockSequence
1022 | ParserState::FlowSequence
1023 | ParserState::FlowMapping
1024 | ParserState::FlowMappingKey
1025 | ParserState::FlowMappingValue
1026 ) {
1027 self.state_stack.push(self.state);
1028 }
1029
1030 self.events.push(Event::sequence_start(
1031 token.start_position,
1032 self.pending_anchor.take(),
1033 self.pending_tag.take(),
1034 true,
1035 ));
1036 self.state = ParserState::FlowSequence;
1037 }
1038
1039 TokenType::FlowMappingStart => {
1040 if matches!(self.state, ParserState::ImplicitDocumentStart) {
1041 self.events.push(Event::document_start(
1042 token.start_position,
1043 None,
1044 vec![],
1045 true,
1046 ));
1047 }
1048
1049 // Save the enclosing state so we can restore it
1050 // after the flow collection closes. The save list
1051 // includes any state that can legitimately contain a
1052 // flow node — block contexts AND flow-mapping
1053 // key/value positions (yaml-test-suite SBG9
1054 // \`{a: [b,c], [d,e]: f}\` — without FlowMappingValue
1055 // in this list, state_stack pop'd None and we fell
1056 // through to DocumentContent).
1057 if matches!(
1058 self.state,
1059 ParserState::BlockMappingValue
1060 | ParserState::BlockMappingKey
1061 | ParserState::BlockSequence
1062 | ParserState::FlowSequence
1063 | ParserState::FlowMapping
1064 | ParserState::FlowMappingKey
1065 | ParserState::FlowMappingValue
1066 ) {
1067 self.state_stack.push(self.state);
1068 }
1069
1070 self.events.push(Event::mapping_start(
1071 token.start_position,
1072 self.pending_anchor.take(),
1073 self.pending_tag.take(),
1074 true,
1075 ));
1076 self.state = ParserState::FlowMapping;
1077 }
1078
1079 TokenType::FlowSequenceEnd => {
1080 // §7.5: close an open implicit single-pair flow mapping
1081 // before the outer flow sequence ends.
1082 if self.implicit_flow_pair_depth > 0
1083 && matches!(
1084 self.state,
1085 ParserState::FlowMapping
1086 | ParserState::FlowMappingKey
1087 | ParserState::FlowMappingValue
1088 )
1089 && matches!(self.state_stack.last(), Some(ParserState::FlowSequence))
1090 {
1091 if innermost_mapping_has_odd_children(&self.events) {
1092 self.events.push(Event::scalar(
1093 token.start_position,
1094 None,
1095 None,
1096 String::new(),
1097 true,
1098 false,
1099 ScalarStyle::Plain,
1100 ));
1101 }
1102 self.events.push(Event::mapping_end(token.start_position));
1103 self.state = self.state_stack.pop().unwrap();
1104 self.implicit_flow_pair_depth -= 1;
1105 }
1106 self.events.push(Event::sequence_end(token.start_position));
1107
1108 // Restore the previous state from the stack if available
1109 if let Some(prev_state) = self.state_stack.pop() {
1110 self.state = prev_state;
1111 } else {
1112 self.state = ParserState::DocumentContent;
1113 }
1114
1115 // Handle state transitions for mapping key/value processing
1116 self.handle_node_completion();
1117 }
1118
1119 TokenType::FlowMappingEnd => {
1120 // §7.5: explicit `?` with no key (yaml-test-suite
1121 // DFF7 \`{... ?\n}\`) — synth empty key AND empty
1122 // value before closing.
1123 if matches!(self.state, ParserState::FlowMappingKey)
1124 && self.explicit_key_pending
1125 && !innermost_mapping_has_odd_children(&self.events)
1126 {
1127 self.events.push(Event::scalar(
1128 token.start_position,
1129 None,
1130 None,
1131 String::new(),
1132 true,
1133 false,
1134 ScalarStyle::Plain,
1135 ));
1136 }
1137 // Spec §7.5: implicit empty value for a flow-mapping
1138 // entry that has only a key, e.g. `{ key }` or
1139 // `{ key, a: b }` (yaml-test-suite 8KB6).
1140 if innermost_mapping_has_odd_children(&self.events) {
1141 self.events.push(Event::scalar(
1142 token.start_position,
1143 None,
1144 None,
1145 String::new(),
1146 true,
1147 false,
1148 ScalarStyle::Plain,
1149 ));
1150 }
1151 self.events.push(Event::mapping_end(token.start_position));
1152
1153 // Restore the previous state from the stack if available
1154 if let Some(prev_state) = self.state_stack.pop() {
1155 self.state = prev_state;
1156 } else {
1157 self.state = ParserState::DocumentContent;
1158 }
1159
1160 // Handle state transitions for mapping key/value processing
1161 self.handle_node_completion();
1162 }
1163
1164 TokenType::BlockEnd => {
1165 // Determine what we're ending based on current state
1166 match self.state {
1167 ParserState::BlockSequence => {
1168 // §6.9: an anchor or tag left unused at the
1169 // close of the sequence belongs to an empty
1170 // scalar that is the final sequence item
1171 // (yaml-test-suite LE5A: \`- !!str\` produces
1172 // a tagged empty scalar before -SEQ).
1173 // §6.9.1: also if the previous token was
1174 // BlockEntry with no item between — the last
1175 // entry was an implicit empty (yaml-test-suite
1176 // SM9W cluster).
1177 let last_was_block_entry =
1178 matches!(self.last_token_type, Some(TokenType::BlockEntry));
1179 if self.pending_anchor.is_some()
1180 || self.pending_tag.is_some()
1181 || last_was_block_entry
1182 {
1183 self.events.push(Event::scalar(
1184 token.start_position,
1185 self.pending_anchor.take(),
1186 self.pending_tag.take(),
1187 String::new(),
1188 true,
1189 false,
1190 ScalarStyle::Plain,
1191 ));
1192 }
1193 self.events.push(Event::sequence_end(token.start_position));
1194 // Pop previous state from stack if available
1195 if let Some(prev_state) = self.state_stack.pop() {
1196 self.state = prev_state;
1197 // Handle state transitions for mapping key/value processing
1198 self.handle_node_completion();
1199 } else {
1200 self.state = ParserState::DocumentContent;
1201 }
1202 }
1203 ParserState::BlockMapping
1204 | ParserState::BlockMappingKey
1205 | ParserState::BlockMappingValue => {
1206 // §6.9.1: if the innermost mapping has odd
1207 // children (last key has no value), synth an
1208 // implicit empty value before closing
1209 // (yaml-test-suite 7W2P). If the unmatched key
1210 // came from a bare scalar with no `:`
1211 // (yaml-test-suite 7MNF), error instead.
1212 if innermost_mapping_has_odd_children(&self.events) {
1213 if matches!(self.state, ParserState::BlockMappingKey)
1214 && !self.explicit_key_pending
1215 {
1216 // §8.22 carve-out: if the unmatched
1217 // 'key' is a collection node (the
1218 // inline-wrapped explicit-key from
1219 // yaml-test-suite M2N8), synth empty
1220 // value instead of erroring.
1221 let key_was_collection = matches!(
1222 self.events.last().map(|e| &e.event_type),
1223 Some(EventType::MappingEnd | EventType::SequenceEnd)
1224 );
1225 if !key_was_collection {
1226 return Err(Error::parse(
1227 token.start_position,
1228 "Mapping key not followed by `:`",
1229 ));
1230 }
1231 }
1232 // §6.9: when synthesising the missing value
1233 // for the last key, consume any pending
1234 // anchor/tag — they were the property of
1235 // that absent value (yaml-test-suite PW8X
1236 // \`b: &b\\n- ...\` — &b belongs to b's empty
1237 // value, not a separate tagged scalar).
1238 self.events.push(Event::scalar(
1239 token.start_position,
1240 self.pending_anchor.take(),
1241 self.pending_tag.take(),
1242 String::new(),
1243 true,
1244 false,
1245 ScalarStyle::Plain,
1246 ));
1247 }
1248 // Flush leftover anchor/tag as a final tagged
1249 // empty scalar (mirror of the BlockSequence
1250 // arm). Skipped above when the missing-value
1251 // synth already consumed it.
1252 if self.pending_anchor.is_some() || self.pending_tag.is_some() {
1253 self.events.push(Event::scalar(
1254 token.start_position,
1255 self.pending_anchor.take(),
1256 self.pending_tag.take(),
1257 String::new(),
1258 true,
1259 false,
1260 ScalarStyle::Plain,
1261 ));
1262 }
1263 // If the pending-property flush above just
1264 // emitted a KEY (leaving odd children), we
1265 // still need the missing implicit empty
1266 // VALUE before closing the mapping (yaml-
1267 // test-suite PW8X \`? &d\` close case).
1268 if innermost_mapping_has_odd_children(&self.events) {
1269 self.events.push(Event::scalar(
1270 token.start_position,
1271 None,
1272 None,
1273 String::new(),
1274 true,
1275 false,
1276 ScalarStyle::Plain,
1277 ));
1278 }
1279 self.events.push(Event::mapping_end(token.start_position));
1280 // Pop previous state from stack if available
1281 if let Some(prev_state) = self.state_stack.pop() {
1282 self.state = prev_state;
1283 // If we popped back to a mapping value state, complete it
1284 if matches!(self.state, ParserState::BlockMappingValue) {
1285 self.handle_node_completion();
1286 }
1287 } else {
1288 // No state on stack - check if we're still in a root mapping
1289 // Count the mapping depth including the one we just closed
1290 let mut mapping_depth = 0;
1291
1292 for event in self.events.iter().rev() {
1293 match &event.event_type {
1294 EventType::MappingEnd => {
1295 mapping_depth += 1;
1296 }
1297 EventType::MappingStart { .. } => {
1298 if mapping_depth > 0 {
1299 mapping_depth -= 1;
1300 } else {
1301 // Found an unclosed mapping - we're still in the root mapping
1302 self.state = ParserState::BlockMappingKey;
1303 return Ok(());
1304 }
1305 }
1306 EventType::DocumentStart { .. } => break,
1307 _ => {}
1308 }
1309 }
1310
1311 // All mappings are closed
1312 self.state = ParserState::DocumentContent;
1313 }
1314 }
1315 _ => {}
1316 }
1317 }
1318
1319 TokenType::Scalar(value, quote_style) => {
1320 if matches!(self.state, ParserState::ImplicitDocumentStart) {
1321 self.events.push(Event::document_start(
1322 token.start_position,
1323 None,
1324 vec![],
1325 true,
1326 ));
1327 self.state = ParserState::DocumentContent;
1328 }
1329
1330 // §3.2.1.1: a document has exactly one root node.
1331 if matches!(self.state, ParserState::DocumentContent)
1332 && second_root_node_present(&self.events)
1333 {
1334 return Err(Error::parse(
1335 token.start_position,
1336 "Document already contains a root node",
1337 ));
1338 }
1339
1340 // §8.22: in BlockSequence state, every item must be
1341 // introduced by \`-\`. A Scalar arriving when the
1342 // previous token was already a scalar / block-scalar
1343 // / closed-flow-collection means \`- a\\n b\` style —
1344 // \`b\` is bogus content at the sequence's indent
1345 // (yaml-test-suite 6S55).
1346 if matches!(self.state, ParserState::BlockSequence)
1347 && matches!(
1348 self.last_token_type,
1349 Some(
1350 TokenType::Scalar(..)
1351 | TokenType::BlockScalarLiteral(..)
1352 | TokenType::BlockScalarFolded(..)
1353 | TokenType::FlowSequenceEnd
1354 | TokenType::FlowMappingEnd
1355 )
1356 )
1357 {
1358 return Err(Error::parse(
1359 token.start_position,
1360 "Block sequence item must start with `-`",
1361 ));
1362 }
1363
1364 // §7.4: in flow mapping/sequence between entries
1365 // (even children = ready for next key/item) a Scalar
1366 // must be preceded by a separator. If the previous
1367 // token was a Scalar (i.e. previous value just emitted)
1368 // and not a comma, this is a missing-comma error
1369 // (yaml-test-suite T833, CML9).
1370 if matches!(
1371 self.state,
1372 ParserState::FlowMapping | ParserState::FlowSequence
1373 ) && matches!(
1374 self.last_token_type,
1375 Some(
1376 TokenType::Scalar(..)
1377 | TokenType::FlowSequenceEnd
1378 | TokenType::FlowMappingEnd
1379 )
1380 ) {
1381 return Err(Error::parse(
1382 token.start_position,
1383 "Missing `,` separator between flow collection entries",
1384 ));
1385 }
1386
1387 // Check if we're in a sequence and the next token is Value (indicating a mapping key)
1388 if matches!(self.state, ParserState::BlockSequence) {
1389 if let Ok(Some(next_token)) = self.scanner.peek_token() {
1390 if matches!(next_token.token_type, TokenType::Value) {
1391 // This scalar is a mapping key within a sequence item
1392 // Push current state to stack and start a new mapping
1393 self.state_stack.push(self.state);
1394 self.events.push(Event::mapping_start(
1395 token.start_position,
1396 self.pending_anchor.take(),
1397 self.pending_tag.take(),
1398 false,
1399 ));
1400 self.state = ParserState::BlockMappingKey;
1401 }
1402 }
1403 }
1404
1405 // §7.5: a flow-sequence entry that is itself `key: value`
1406 // is an implicit single-pair flow mapping. Any
1407 // pending anchor/tag belongs to the KEY scalar, not
1408 // to the synthesised mapping (yaml-test-suite QF4Y,
1409 // L9U5, 87E4, 8UDB, 9MMW, LX3P, CN3R).
1410 //
1411 // §7.5 also says: an implicit key in flow context
1412 // must be on a SINGLE LINE. If the \`:\` is on a
1413 // different line from the key scalar, it's invalid
1414 // (yaml-test-suite DK4H, ZXT5).
1415 if matches!(self.state, ParserState::FlowSequence) {
1416 if let Ok(Some(next_token)) = self.scanner.peek_token() {
1417 if matches!(next_token.token_type, TokenType::Value) {
1418 if next_token.start_position.line != token.start_position.line {
1419 return Err(Error::parse(
1420 next_token.start_position,
1421 "Implicit key in flow context must be on a single line",
1422 ));
1423 }
1424 self.state_stack.push(self.state);
1425 self.events.push(Event::mapping_start(
1426 token.start_position,
1427 None,
1428 None,
1429 true,
1430 ));
1431 self.state = ParserState::FlowMappingKey;
1432 self.implicit_flow_pair_depth += 1;
1433 }
1434 }
1435 }
1436
1437 // YAML 1.2: if we're in BlockMappingValue and the next
1438 // token is `:` (Value), the current scalar is actually a
1439 // NEW KEY — the previous key's value is implicit empty
1440 // (yaml-test-suite 6KGN: `a: &anchor\nb: *anchor`).
1441 // Emit the empty value first (consuming any pending
1442 // anchor/tag — those were intended for the missing
1443 // value), then transition back to BlockMappingKey.
1444 //
1445 // BUT skip the heuristic when:
1446 // * the most recent event was an implicit empty scalar
1447 // (we just synthesised an empty key for a leading-`:`
1448 // mapping, yaml-test-suite 2JQS), or
1449 // * the current scalar is on the SAME line as the
1450 // previous `:` Value token — that puts the scalar
1451 // in the value slot of the current key
1452 // (yaml-test-suite 6M2F: `? &a a\n: &b b\n: *a`).
1453 // §8.22 V9D5: when we JUST closed an inline-wrapped
1454 // explicit key, the value position can also hold an
1455 // inline single-pair mapping. If the next token is
1456 // \`:\` (a key/value separator on this scalar's line),
1457 // wrap the scalar as the key of an inner mapping.
1458 if matches!(self.state, ParserState::BlockMappingValue)
1459 && self.just_closed_inline_wrap
1460 {
1461 self.just_closed_inline_wrap = false;
1462 if let Ok(Some(next_token)) = self.scanner.peek_token() {
1463 if matches!(next_token.token_type, TokenType::Value)
1464 && next_token.start_position.line == token.start_position.line
1465 {
1466 self.state_stack.push(self.state);
1467 self.events.push(Event::mapping_start(
1468 token.start_position,
1469 None,
1470 None,
1471 false,
1472 ));
1473 self.state = ParserState::BlockMappingKey;
1474 // Fall through to the normal Scalar push;
1475 // the scalar will become the inner key.
1476 }
1477 }
1478 }
1479
1480 if matches!(self.state, ParserState::BlockMappingValue) {
1481 let last_was_implicit_empty = matches!(self.events.last(), Some(ev) if matches!(
1482 &ev.event_type,
1483 EventType::Scalar { value, plain_implicit: true, style: ScalarStyle::Plain, .. }
1484 if value.is_empty()
1485 ));
1486 let same_line_as_value = self
1487 .last_value_token_line
1488 .map_or(false, |line| line == token.start_position.line);
1489 // Skip the "new key" pattern when the scalar IS
1490 // the inline value of a just-synthesised empty
1491 // key — both must hold (yaml-test-suite 2JQS).
1492 // S3PD shows it must NOT skip when the empty
1493 // key was on a different line from the current
1494 // scalar.
1495 let skip_pattern = last_was_implicit_empty && same_line_as_value;
1496 if !skip_pattern && !same_line_as_value {
1497 if let Ok(Some(next_token)) = self.scanner.peek_token() {
1498 if matches!(next_token.token_type, TokenType::Value) {
1499 self.events.push(Event::scalar(
1500 token.start_position,
1501 self.pending_anchor.take(),
1502 self.pending_tag.take(),
1503 String::new(),
1504 true,
1505 false,
1506 ScalarStyle::Plain,
1507 ));
1508 self.state = ParserState::BlockMappingKey;
1509 }
1510 }
1511 }
1512 }
1513
1514 // YAML 1.2 §6.9.1: if we're back at a key position but the
1515 // previous key still owes a value (odd children in the
1516 // active mapping), synthesise the implicit empty scalar
1517 // now — this scalar then becomes the next key
1518 // (yaml-test-suite 7W2P: `? a\n? b\nc:`).
1519 if matches!(self.state, ParserState::BlockMappingKey)
1520 && innermost_mapping_has_odd_children(&self.events)
1521 {
1522 self.events.push(Event::scalar(
1523 token.start_position,
1524 None,
1525 None,
1526 String::new(),
1527 true,
1528 false,
1529 ScalarStyle::Plain,
1530 ));
1531 }
1532
1533 // Convert QuoteStyle to ScalarStyle
1534 let style = match quote_style {
1535 crate::scanner::QuoteStyle::Plain => ScalarStyle::Plain,
1536 crate::scanner::QuoteStyle::Single => ScalarStyle::SingleQuoted,
1537 crate::scanner::QuoteStyle::Double => ScalarStyle::DoubleQuoted,
1538 };
1539
1540 self.events.push(Event::scalar(
1541 token.start_position,
1542 self.pending_anchor.take(), // Use pending anchor
1543 self.pending_tag.take(), // Use pending tag
1544 value.clone(),
1545 style == ScalarStyle::Plain,
1546 style != ScalarStyle::Plain,
1547 style,
1548 ));
1549
1550 // Handle state transitions for mapping key/value processing
1551 self.handle_node_completion();
1552 }
1553
1554 TokenType::BlockScalarLiteral(value) => {
1555 if matches!(self.state, ParserState::ImplicitDocumentStart) {
1556 self.events.push(Event::document_start(
1557 token.start_position,
1558 None,
1559 vec![],
1560 true,
1561 ));
1562 self.state = ParserState::DocumentContent;
1563 }
1564
1565 self.events.push(Event::scalar(
1566 token.start_position,
1567 self.pending_anchor.take(), // Use pending anchor
1568 self.pending_tag.take(), // Use pending tag
1569 value.clone(),
1570 false, // Not plain
1571 true, // Quoted style
1572 ScalarStyle::Literal,
1573 ));
1574
1575 // Handle state transitions for mapping key/value processing
1576 self.handle_node_completion();
1577 }
1578
1579 TokenType::BlockScalarFolded(value) => {
1580 if matches!(self.state, ParserState::ImplicitDocumentStart) {
1581 self.events.push(Event::document_start(
1582 token.start_position,
1583 None,
1584 vec![],
1585 true,
1586 ));
1587 self.state = ParserState::DocumentContent;
1588 }
1589
1590 self.events.push(Event::scalar(
1591 token.start_position,
1592 self.pending_anchor.take(), // Use pending anchor
1593 self.pending_tag.take(), // Use pending tag
1594 value.clone(),
1595 false, // Not plain
1596 true, // Quoted style
1597 ScalarStyle::Folded,
1598 ));
1599
1600 // Handle state transitions for mapping key/value processing
1601 self.handle_node_completion();
1602 }
1603
1604 TokenType::BlockEntry => {
1605 // Block sequence entry - this indicates a new item in a sequence
1606 // We need to ensure proper state management for nested structures
1607 match self.state {
1608 ParserState::BlockSequence => {
1609 // We're already in a sequence, this is a new item.
1610 // §6.9.1: if the previous token was also a
1611 // BlockEntry, the previous item had no value —
1612 // synthesise an implicit empty scalar before
1613 // accepting this new BlockEntry (yaml-test-suite
1614 // SM9W cluster). Also synth when a pending
1615 // anchor/tag was left on a PREVIOUS line — the
1616 // property was the previous item's empty value
1617 // (yaml-test-suite PW8X).
1618 let last_was_block_entry =
1619 matches!(self.last_token_type, Some(TokenType::BlockEntry));
1620 let earliest_property_line =
1621 match (self.pending_anchor_line, self.pending_tag_line) {
1622 (Some(a), Some(t)) => Some(a.min(t)),
1623 (Some(a), None) => Some(a),
1624 (None, Some(t)) => Some(t),
1625 (None, None) => None,
1626 };
1627 let property_from_prev_line = (self.pending_anchor.is_some()
1628 || self.pending_tag.is_some())
1629 && earliest_property_line
1630 .map_or(false, |a| a < token.start_position.line);
1631 if last_was_block_entry || property_from_prev_line {
1632 self.events.push(Event::scalar(
1633 token.start_position,
1634 self.pending_anchor.take(),
1635 self.pending_tag.take(),
1636 String::new(),
1637 true,
1638 false,
1639 ScalarStyle::Plain,
1640 ));
1641 self.pending_anchor_line = None;
1642 self.pending_tag_line = None;
1643 }
1644 }
1645 ParserState::BlockMapping | ParserState::BlockMappingValue => {
1646 // If we encounter a BlockEntry while in a mapping,
1647 // we need to close the mapping and continue the sequence
1648 self.events.push(Event::mapping_end(token.start_position));
1649 self.state = ParserState::BlockSequence;
1650 }
1651 _ => {
1652 // BlockEntry in other contexts might indicate we need to start a sequence
1653 // This handles implicit sequence starts
1654 if matches!(self.state, ParserState::ImplicitDocumentStart) {
1655 self.events.push(Event::document_start(
1656 token.start_position,
1657 None,
1658 vec![],
1659 true,
1660 ));
1661 }
1662
1663 // Start an implicit sequence if we're not already in one
1664 self.events.push(Event::sequence_start(
1665 token.start_position,
1666 self.pending_anchor.take(),
1667 self.pending_tag.take(),
1668 false,
1669 ));
1670 self.state = ParserState::BlockSequence;
1671 }
1672 }
1673 }
1674
1675 TokenType::Value => {
1676 // Snapshot the PREVIOUS Value token's line BEFORE
1677 // updating to the current one. The match arms below
1678 // need this to detect multi-`:`-on-same-line (yaml-
1679 // test-suite ZL4Z, ZCZ6).
1680 let prev_value_line = self.last_value_token_line;
1681 self.last_value_token_line = Some(token.start_position.line);
1682 self.explicit_key_pending = false;
1683 // YAML 1.2 §6.9.1: a `:` with no preceding key implies an
1684 // empty key. Handle the four states where this can arise:
1685 // * ImplicitDocumentStart — open `+DOC`, `+MAP`, empty key.
1686 // * DocumentContent — open `+MAP`, empty key.
1687 // * BlockMappingKey with EVEN children — empty key for
1688 // the next entry (no scalar preceded the `:`).
1689 // * Normal cases (`BlockMappingKey` with odd children,
1690 // `FlowMappingKey`) — just transition state.
1691 match self.state {
1692 ParserState::ImplicitDocumentStart => {
1693 let event = self.create_implicit_document_start(token.start_position);
1694 self.events.push(event);
1695 // The mapping itself has no anchor/tag here —
1696 // those (if any) belong to the (empty) key.
1697 self.events.push(Event::mapping_start(
1698 token.start_position,
1699 None,
1700 None,
1701 false,
1702 ));
1703 self.events.push(Event::scalar(
1704 token.start_position,
1705 self.pending_anchor.take(),
1706 self.pending_tag.take(),
1707 String::new(),
1708 true,
1709 false,
1710 ScalarStyle::Plain,
1711 ));
1712 self.state = ParserState::BlockMappingValue;
1713 }
1714 ParserState::DocumentContent | ParserState::DocumentStart => {
1715 self.events.push(Event::mapping_start(
1716 token.start_position,
1717 None,
1718 None,
1719 false,
1720 ));
1721 self.events.push(Event::scalar(
1722 token.start_position,
1723 self.pending_anchor.take(),
1724 self.pending_tag.take(),
1725 String::new(),
1726 true,
1727 false,
1728 ScalarStyle::Plain,
1729 ));
1730 self.state = ParserState::BlockMappingValue;
1731 }
1732 ParserState::BlockMappingKey => {
1733 // §8.22 V9D5: when a `:` arrives at the same
1734 // column as the most recent `?` on a LATER
1735 // line, it's the explicit value separator of
1736 // that `?` key. If we previously wrapped an
1737 // inline single-pair mapping for the explicit
1738 // key (via the path below), close it first so
1739 // the outer mapping receives the value (yaml-
1740 // test-suite V9D5).
1741 // Use inline_wrap_column (set when we opened a
1742 // V9D5-style inline wrap) for matching the
1743 // close. Don't depend on last_key_marker_*
1744 // since those get reset on wrap open.
1745 if self
1746 .inline_wrap_column
1747 .map_or(false, |c| c == token.start_position.column)
1748 && !self.state_stack.is_empty()
1749 && matches!(self.state_stack.last(), Some(ParserState::BlockMappingKey))
1750 {
1751 // Close inline-wrapped key mapping if its
1752 // children are even (complete pairs).
1753 if !innermost_mapping_has_odd_children(&self.events) {
1754 self.events.push(Event::mapping_end(token.start_position));
1755 self.state = self.state_stack.pop().unwrap();
1756 self.inline_wrap_column = None;
1757 self.just_closed_inline_wrap = true;
1758 }
1759 }
1760 // §8.22: when the explicit key marker (\`?\`) is
1761 // followed by a node + \`:\` on the SAME line,
1762 // that whole construct is an inline single-pair
1763 // mapping (the explicit key node itself).
1764 // Wrap retroactively by inserting an inner
1765 // MappingStart before the just-emitted key
1766 // node. yaml-test-suite M2N8/01 \`? []: x\`,
1767 // and the empty-prefix variant M2N8/00
1768 // \`- ? : x\`.
1769 let odd_children = innermost_mapping_has_odd_children(&self.events);
1770 let key_marker_same_line = self
1771 .last_key_marker_line
1772 .map_or(false, |l| l == token.start_position.line);
1773 // Empty-prefix variant: `?` then `:` directly
1774 // (no node between). Open inner mapping with
1775 // empty key and transition to inner value.
1776 if !odd_children && key_marker_same_line {
1777 self.state_stack.push(self.state);
1778 self.events.push(Event::mapping_start(
1779 token.start_position,
1780 None,
1781 None,
1782 false,
1783 ));
1784 self.events.push(Event::scalar(
1785 token.start_position,
1786 None,
1787 None,
1788 String::new(),
1789 true,
1790 false,
1791 ScalarStyle::Plain,
1792 ));
1793 self.last_key_marker_line = None;
1794 self.state = ParserState::BlockMappingValue;
1795 self.last_token_type = token_type_for_tracking;
1796 return Ok(());
1797 }
1798 if odd_children && key_marker_same_line {
1799 // Find the most recent emitted KEY-position
1800 // node within the active mapping (it'll be
1801 // either a Scalar or a flow-collection
1802 // open). Insert MappingStart BEFORE it.
1803 let mut depth = 0i32;
1804 let mut insert_at = None;
1805 for (idx, ev) in self.events.iter().enumerate().rev() {
1806 match &ev.event_type {
1807 EventType::MappingEnd | EventType::SequenceEnd => {
1808 depth += 1;
1809 }
1810 EventType::MappingStart { .. }
1811 | EventType::SequenceStart { .. } => {
1812 if depth == 0 {
1813 insert_at = Some(idx);
1814 break;
1815 }
1816 depth -= 1;
1817 }
1818 EventType::Scalar { .. } if depth == 0 => {
1819 insert_at = Some(idx);
1820 break;
1821 }
1822 _ => {}
1823 }
1824 }
1825 if let Some(ii) = insert_at {
1826 self.state_stack.push(self.state);
1827 self.events.insert(
1828 ii,
1829 Event::mapping_start(
1830 self.events[ii].position,
1831 None,
1832 None,
1833 false,
1834 ),
1835 );
1836 // Record the wrap's "outer key column"
1837 // so the matching explicit-value `:`
1838 // (on a later line at the same column
1839 // as the `?` marker) can close it.
1840 self.inline_wrap_column = self.last_key_marker_column;
1841 self.last_key_marker_line = None;
1842 self.state = ParserState::BlockMappingValue;
1843 self.last_token_type = token_type_for_tracking;
1844 return Ok(());
1845 }
1846 }
1847 let even_children = !innermost_mapping_has_odd_children(&self.events);
1848 if even_children {
1849 // §8.22: two implicit \`:\` on the same line
1850 // in a block mapping (e.g. \`a: 'b': c\`) is
1851 // invalid — block mappings cannot express
1852 // nested implicit single-pair mappings
1853 // inline (yaml-test-suite ZL4Z, ZCZ6).
1854 //
1855 // Carve-out: when the PREVIOUS `:` on this
1856 // line was an explicit value separator
1857 // (paired with `?`), the value position
1858 // legitimately holds an inline mapping
1859 // (yaml-test-suite V9D5 \`: moon: white\`
1860 // after \`? earth: blue\`). We detect this
1861 // by checking whether the synth'd empty key
1862 // (or any structural emission) happened on
1863 // THIS line — if so, allow.
1864 let prev_was_scalar = matches!(
1865 self.last_token_type,
1866 Some(
1867 TokenType::Scalar(..)
1868 | TokenType::BlockScalarLiteral(..)
1869 | TokenType::BlockScalarFolded(..)
1870 )
1871 );
1872 let same_line_as_prev_colon = prev_value_line
1873 .map_or(false, |line| line == token.start_position.line);
1874 // Walk back from the most recent event:
1875 // if the last scalar BEFORE the just-pushed
1876 // scalar is an EMPTY implicit scalar on
1877 // this line (the synth'd empty key from a
1878 // prior `:`), the prior `:` was structural
1879 // and this `:` is the inline mapping's
1880 // separator.
1881 let mut saw_synth_empty_on_this_line = false;
1882 let mut seen_value = 0;
1883 for ev in self.events.iter().rev() {
1884 if let EventType::Scalar {
1885 value,
1886 plain_implicit,
1887 ..
1888 } = &ev.event_type
1889 {
1890 if seen_value >= 1 {
1891 if value.is_empty()
1892 && *plain_implicit
1893 && ev.position.line == token.start_position.line
1894 {
1895 saw_synth_empty_on_this_line = true;
1896 }
1897 break;
1898 }
1899 seen_value += 1;
1900 }
1901 }
1902 if prev_was_scalar
1903 && same_line_as_prev_colon
1904 && !saw_synth_empty_on_this_line
1905 {
1906 return Err(Error::parse(
1907 token.start_position,
1908 "Multiple `:` on the same line in block mapping",
1909 ));
1910 }
1911 // Missing key — synthesise empty scalar
1912 // first. Pending anchor/tag belongs to that
1913 // empty key (yaml-test-suite PW8X).
1914 self.events.push(Event::scalar(
1915 token.start_position,
1916 self.pending_anchor.take(),
1917 self.pending_tag.take(),
1918 String::new(),
1919 true,
1920 false,
1921 ScalarStyle::Plain,
1922 ));
1923 }
1924 self.state = ParserState::BlockMappingValue;
1925 }
1926 ParserState::FlowMappingKey => {
1927 self.state = ParserState::FlowMappingValue;
1928 }
1929 ParserState::BlockSequence
1930 if matches!(self.last_token_type, Some(TokenType::BlockEntry)) =>
1931 {
1932 // §8.22: \`- :\` — the sequence item is a
1933 // mapping with an implicit empty key and the
1934 // `:` is the key/value separator (yaml-test-
1935 // suite UKK6/00).
1936 self.state_stack.push(self.state);
1937 self.events.push(Event::mapping_start(
1938 token.start_position,
1939 self.pending_anchor.take(),
1940 self.pending_tag.take(),
1941 false,
1942 ));
1943 self.events.push(Event::scalar(
1944 token.start_position,
1945 None,
1946 None,
1947 String::new(),
1948 true,
1949 false,
1950 ScalarStyle::Plain,
1951 ));
1952 self.state = ParserState::BlockMappingValue;
1953 }
1954 ParserState::FlowMapping => {
1955 // §7.5: in FlowMapping state, a `:` separates
1956 // an emitted key from its value (odd children
1957 // means the key scalar is already on the
1958 // stack — normal). If children are even,
1959 // we're starting a new entry with an empty
1960 // key. The pending anchor/tag (if any) belongs
1961 // to that empty key (yaml-test-suite NKF9,
1962 // WZ62: \`!!str : bar\` — empty key tagged
1963 // !!str).
1964 if !innermost_mapping_has_odd_children(&self.events) {
1965 self.events.push(Event::scalar(
1966 token.start_position,
1967 self.pending_anchor.take(),
1968 self.pending_tag.take(),
1969 String::new(),
1970 true,
1971 false,
1972 ScalarStyle::Plain,
1973 ));
1974 }
1975 self.state = ParserState::FlowMappingValue;
1976 }
1977 ParserState::FlowSequence => {
1978 // §7.5: \`[ {k:v}:value ]\` — a closed flow
1979 // collection followed by \`:\` makes that flow
1980 // node the implicit key. Retroactively wrap
1981 // it in an implicit single-pair mapping by
1982 // inserting MappingStart BEFORE the matching
1983 // flow-open event (yaml-test-suite 9MMW).
1984 let last_is_flow_close = matches!(
1985 self.events.last().map(|e| &e.event_type),
1986 Some(EventType::MappingEnd | EventType::SequenceEnd)
1987 );
1988 if last_is_flow_close {
1989 // Find the matching open via depth walk.
1990 let mut depth = 0i32;
1991 let mut open_idx = None;
1992 for (idx, ev) in self.events.iter().enumerate().rev() {
1993 match &ev.event_type {
1994 EventType::MappingEnd | EventType::SequenceEnd => {
1995 depth += 1;
1996 }
1997 EventType::MappingStart {
1998 flow_style: true, ..
1999 }
2000 | EventType::SequenceStart {
2001 flow_style: true, ..
2002 } => {
2003 depth -= 1;
2004 if depth == 0 {
2005 open_idx = Some(idx);
2006 break;
2007 }
2008 }
2009 _ => {}
2010 }
2011 }
2012 if let Some(oi) = open_idx {
2013 self.state_stack.push(self.state);
2014 self.events.insert(
2015 oi,
2016 Event::mapping_start(
2017 self.events[oi].position,
2018 None,
2019 None,
2020 true,
2021 ),
2022 );
2023 self.state = ParserState::FlowMappingValue;
2024 self.implicit_flow_pair_depth += 1;
2025 self.last_token_type = token_type_for_tracking;
2026 return Ok(());
2027 }
2028 }
2029 // §7.5: `[ : value ]` — leading `:` with no
2030 // preceding scalar implies an empty key for an
2031 // implicit single-pair flow mapping
2032 // (yaml-test-suite CFD4).
2033 self.state_stack.push(self.state);
2034 self.events.push(Event::mapping_start(
2035 token.start_position,
2036 None,
2037 None,
2038 true,
2039 ));
2040 self.events.push(Event::scalar(
2041 token.start_position,
2042 None,
2043 None,
2044 String::new(),
2045 true,
2046 false,
2047 ScalarStyle::Plain,
2048 ));
2049 self.state = ParserState::FlowMappingValue;
2050 self.implicit_flow_pair_depth += 1;
2051 }
2052 _ => {}
2053 }
2054 }
2055
2056 TokenType::FlowEntry => {
2057 // YAML 1.2 §7.4: a `,` must follow an entry. Leading
2058 // `,` (e.g. `[ , a, b ]`) and consecutive `,, ` are
2059 // invalid (yaml-test-suite 9MAG, CTN5).
2060 let no_prior_entry = matches!(
2061 self.events.last().map(|e| &e.event_type),
2062 Some(
2063 EventType::SequenceStart {
2064 flow_style: true,
2065 ..
2066 } | EventType::MappingStart {
2067 flow_style: true,
2068 ..
2069 }
2070 )
2071 );
2072 if no_prior_entry {
2073 return Err(Error::parse(
2074 token.start_position,
2075 "Flow entry separator `,` with no preceding entry",
2076 ));
2077 }
2078 // Consecutive `,` — last_token_type carries the kind of
2079 // the previous token. If it's also FlowEntry, no entry
2080 // came between (e.g. `[a, , b]`, `[a, b, , ]`).
2081 if matches!(self.last_token_type, Some(TokenType::FlowEntry)) {
2082 return Err(Error::parse(
2083 token.start_position,
2084 "Consecutive `,` separators in flow collection",
2085 ));
2086 }
2087 // §7.5: inside a flow mapping, a comma terminates the
2088 // current entry. If the entry is missing its value
2089 // (state FlowMappingValue or odd children), synth an
2090 // implicit empty scalar — consuming any pending
2091 // anchor/tag, which would have been a property of
2092 // the missing value (yaml-test-suite 8KB6, 9BXH,
2093 // FRK4, WZ62).
2094 if matches!(
2095 self.state,
2096 ParserState::FlowMapping
2097 | ParserState::FlowMappingKey
2098 | ParserState::FlowMappingValue
2099 ) && innermost_mapping_has_odd_children(&self.events)
2100 {
2101 self.events.push(Event::scalar(
2102 token.start_position,
2103 self.pending_anchor.take(),
2104 self.pending_tag.take(),
2105 String::new(),
2106 true,
2107 false,
2108 ScalarStyle::Plain,
2109 ));
2110 self.state = ParserState::FlowMapping;
2111 }
2112
2113 // §7.5: same close-on-comma logic for implicit
2114 // single-pair mappings.
2115 if self.implicit_flow_pair_depth > 0
2116 && matches!(
2117 self.state,
2118 ParserState::FlowMapping
2119 | ParserState::FlowMappingKey
2120 | ParserState::FlowMappingValue
2121 )
2122 && matches!(self.state_stack.last(), Some(ParserState::FlowSequence))
2123 {
2124 if innermost_mapping_has_odd_children(&self.events) {
2125 self.events.push(Event::scalar(
2126 token.start_position,
2127 None,
2128 None,
2129 String::new(),
2130 true,
2131 false,
2132 ScalarStyle::Plain,
2133 ));
2134 }
2135 self.events.push(Event::mapping_end(token.start_position));
2136 self.state = self.state_stack.pop().unwrap();
2137 self.implicit_flow_pair_depth -= 1;
2138 }
2139 }
2140
2141 TokenType::Anchor(name) => {
2142 // YAML 1.2 §6.9.2: a node may have at most one anchor.
2143 // A second anchor before the node is consumed is invalid
2144 // (yaml-test-suite 4JVG).
2145 if self.pending_anchor.is_some() {
2146 return Err(Error::parse(
2147 token.start_position,
2148 "Node may not have more than one anchor",
2149 ));
2150 }
2151 // Record the anchor name so subsequent aliases can be
2152 // validated against it (YAML 1.2 §6.9.2 forbids forward
2153 // references).
2154 self.defined_anchors.insert(name.clone());
2155 self.pending_anchor = Some(name.clone());
2156 self.pending_anchor_line = Some(token.start_position.line);
2157 }
2158
2159 TokenType::Alias(name) => {
2160 // YAML 1.2 §6.9.2: alias must reference a previously
2161 // defined anchor — forward references are invalid.
2162 if !self.defined_anchors.contains(name.as_str()) {
2163 return Err(Error::parse(
2164 token.start_position,
2165 format!("Alias `*{name}` references an undefined anchor"),
2166 ));
2167 }
2168 // §6.9.2: an alias is a reference, not an independent
2169 // node — it cannot carry an anchor or tag of its own
2170 // (yaml-test-suite SR86, SU74).
2171 if self.pending_anchor.is_some() || self.pending_tag.is_some() {
2172 return Err(Error::parse(
2173 token.start_position,
2174 "Alias may not have an anchor or tag",
2175 ));
2176 }
2177 if matches!(self.state, ParserState::ImplicitDocumentStart) {
2178 self.events.push(Event::document_start(
2179 token.start_position,
2180 None,
2181 vec![],
2182 true,
2183 ));
2184 self.state = ParserState::DocumentContent;
2185 }
2186
2187 // Generate alias event
2188 self.events
2189 .push(Event::alias(token.start_position, name.clone()));
2190
2191 // Handle state transitions for mapping key/value processing
2192 self.handle_node_completion();
2193 }
2194
2195 TokenType::Tag(tag) => {
2196 // YAML 1.2 §6.9.1 allows at most one tag per node, but
2197 // (like the double-anchor check) detecting that at this
2198 // layer produces false positives — a tag preceding an
2199 // implicit empty node in a sequence is followed by the
2200 // tag of the next sibling node, and the same `pending_tag`
2201 // field is reused. Until the parser tracks per-node tag
2202 // scopes, accept the overwrite silently (yaml-test-suite
2203 // FH7J relies on this).
2204 // Resolve and normalize the tag before storing.
2205 // §6.8: an unresolvable named-handle tag (e.g. `!prefix!X`
2206 // when no `%TAG !prefix!` directive is in scope) is
2207 // invalid (yaml-test-suite QLJ7).
2208 match self.tag_resolver.resolve(&tag) {
2209 Ok(resolved_tag) => {
2210 self.pending_tag = Some(resolved_tag.uri);
2211 self.pending_tag_line = Some(token.start_position.line);
2212 }
2213 Err(e) => {
2214 // Only error on named-handle tags (`!name!suffix`),
2215 // not bare-tag fallback paths.
2216 let is_named_handle = tag.starts_with('!')
2217 && tag[1..].contains('!')
2218 && !tag.starts_with("!!");
2219 if is_named_handle {
2220 return Err(Error::parse(
2221 token.start_position,
2222 format!("Undefined tag handle in `{tag}`: {e}"),
2223 ));
2224 }
2225 self.pending_tag = Some(tag.clone());
2226 self.pending_tag_line = Some(token.start_position.line);
2227 }
2228 }
2229 }
2230
2231 // TODO: Implement these when we add support for advanced features
2232 TokenType::Comment(_) => {
2233 // Not implemented in basic version
2234 }
2235
2236 // Complex key marker
2237 TokenType::Key => {
2238 self.explicit_key_pending = true;
2239 self.last_key_marker_line = Some(token.start_position.line);
2240 self.last_key_marker_column = Some(token.start_position.column);
2241 match self.state {
2242 ParserState::ImplicitDocumentStart => {
2243 // Start implicit document and mapping
2244 let event = self.create_implicit_document_start(token.start_position);
2245 self.events.push(event);
2246 self.events.push(Event::mapping_start(
2247 token.start_position,
2248 self.pending_anchor.take(),
2249 self.pending_tag.take(),
2250 false,
2251 ));
2252 self.state = ParserState::BlockMappingKey;
2253 }
2254 ParserState::DocumentStart => {
2255 // Explicit document start (`---`) followed by a
2256 // complex-key marker — open the document body as
2257 // an implicit block mapping (yaml-test-suite 2XXW).
2258 self.events.push(Event::mapping_start(
2259 token.start_position,
2260 self.pending_anchor.take(),
2261 self.pending_tag.take(),
2262 false,
2263 ));
2264 self.state = ParserState::BlockMappingKey;
2265 }
2266 ParserState::DocumentContent => {
2267 // Check if we just finished a mapping - if so, continue it instead of starting new one
2268 // This happens when the previous mapping key-value pair was processed but no BlockEnd was generated
2269 if !self.events.is_empty() {
2270 if let Some(last_event) = self.events.last() {
2271 // If the last event was a scalar and we have a MappingStart before it,
2272 // we're probably continuing an existing mapping
2273 if matches!(last_event.event_type, EventType::Scalar { .. }) {
2274 // Look for a recent MappingStart without a corresponding MappingEnd
2275 let mut mapping_depth = 0;
2276 let mut has_unfinished_mapping = false;
2277
2278 for event in self.events.iter().rev() {
2279 match &event.event_type {
2280 EventType::MappingEnd => mapping_depth += 1,
2281 EventType::MappingStart { .. } => {
2282 if mapping_depth == 0 {
2283 has_unfinished_mapping = true;
2284 break;
2285 }
2286 mapping_depth -= 1;
2287 }
2288 _ => {}
2289 }
2290 }
2291
2292 if has_unfinished_mapping {
2293 // Continue the existing mapping instead of starting a new one
2294 self.state = ParserState::BlockMappingKey;
2295 return Ok(());
2296 }
2297 }
2298 }
2299 }
2300
2301 // Start new mapping
2302 self.events.push(Event::mapping_start(
2303 token.start_position,
2304 self.pending_anchor.take(),
2305 self.pending_tag.take(),
2306 false,
2307 ));
2308 self.state = ParserState::BlockMappingKey;
2309 }
2310 ParserState::BlockMapping | ParserState::FlowMapping => {
2311 // Already in a mapping, now we have a complex key
2312 self.state = if matches!(self.state, ParserState::BlockMapping) {
2313 ParserState::BlockMappingKey
2314 } else {
2315 ParserState::FlowMappingKey
2316 };
2317 }
2318 ParserState::FlowSequence => {
2319 // §7.5: `[? key : value, ...]` — the `?`
2320 // opens an implicit single-pair flow mapping
2321 // with an explicit complex key (yaml-test-
2322 // suite CT4Q).
2323 self.state_stack.push(self.state);
2324 self.events.push(Event::mapping_start(
2325 token.start_position,
2326 None,
2327 None,
2328 true,
2329 ));
2330 self.state = ParserState::FlowMappingKey;
2331 self.implicit_flow_pair_depth += 1;
2332 }
2333 ParserState::BlockSequence => {
2334 // §8.22: `- ? key : value` — the sequence
2335 // item is itself a block mapping with an
2336 // explicit complex key. Open the wrapping
2337 // mapping before the explicit key marker is
2338 // consumed (yaml-test-suite M2N8/00, V9D5,
2339 // KK5P, PW8X).
2340 self.state_stack.push(self.state);
2341 self.events.push(Event::mapping_start(
2342 token.start_position,
2343 self.pending_anchor.take(),
2344 self.pending_tag.take(),
2345 false,
2346 ));
2347 self.state = ParserState::BlockMappingKey;
2348 }
2349 ParserState::BlockMappingValue => {
2350 // §8.22: \`outer:\\n ? complex\` — the outer
2351 // mapping's value is itself a block mapping
2352 // whose first key is a complex key. Open the
2353 // value mapping and transition to BlockMappingKey
2354 // (yaml-test-suite KK5P).
2355 self.state_stack.push(self.state);
2356 self.events.push(Event::mapping_start(
2357 token.start_position,
2358 self.pending_anchor.take(),
2359 self.pending_tag.take(),
2360 false,
2361 ));
2362 self.state = ParserState::BlockMappingKey;
2363 }
2364 ParserState::BlockMappingKey | ParserState::FlowMappingKey => {
2365 // A new `?` while we still owe a value for the
2366 // previous key — synthesise an implicit empty
2367 // scalar so the mapping stays balanced
2368 // (yaml-test-suite 7W2P).
2369 if innermost_mapping_has_odd_children(&self.events) {
2370 self.events.push(Event::scalar(
2371 token.start_position,
2372 None,
2373 None,
2374 String::new(),
2375 true,
2376 false,
2377 ScalarStyle::Plain,
2378 ));
2379 }
2380 }
2381 _ => {
2382 let context =
2383 ErrorContext::from_input(self.scanner.input(), &self.position, 2)
2384 .with_suggestion(
2385 "Complex keys must be used in mapping contexts".to_string(),
2386 );
2387 return Err(Error::parse_with_context(
2388 self.position,
2389 "Complex key marker (?) in invalid context",
2390 context,
2391 ));
2392 }
2393 }
2394 }
2395 }
2396
2397 // Update the last token type for next iteration
2398 self.last_token_type = token_type_for_tracking;
2399
2400 Ok(())
2401 }
2402
2403 /// Handle completion of a node (scalar or collection) and manage mapping state transitions
2404 #[allow(clippy::missing_const_for_fn)]
2405 fn handle_node_completion(&mut self) {
2406 match self.state {
2407 ParserState::BlockMappingKey => {
2408 // After processing a key, we stay in BlockMappingKey state
2409 // The Value token (:) will transition us to BlockMappingValue
2410 // No state change needed here
2411 }
2412 ParserState::FlowMappingKey => {
2413 // After processing a key in flow mapping, we stay in FlowMappingKey state
2414 // The Value token (:) will transition us to FlowMappingValue
2415 // No state change needed here
2416 }
2417 ParserState::BlockMappingValue => {
2418 // After processing a value, we go back to waiting for the next key
2419 self.state = ParserState::BlockMappingKey;
2420 }
2421 ParserState::FlowMappingValue => {
2422 // After processing a value in flow mapping, we go back to waiting for the next key
2423 self.state = ParserState::FlowMapping;
2424 }
2425 _ => {
2426 // No state change needed for other states
2427 }
2428 }
2429 }
2430}
2431
2432impl Default for BasicParser {
2433 fn default() -> Self {
2434 Self::new(String::new())
2435 }
2436}
2437
2438impl Parser for BasicParser {
2439 fn check_event(&self) -> bool {
2440 // For streaming: check if we have cached events, can generate
2441 // more, or a deferred error is waiting to be surfaced (from
2442 // eager parsing).
2443 self.event_index < self.events.len()
2444 || self.scanner.check_token()
2445 || self.scanning_error.is_some()
2446 }
2447
2448 fn peek_event(&self) -> Result<Option<&Event>> {
2449 // Peek at cached events only (don't generate new ones)
2450 Ok(self.events.get(self.event_index))
2451 }
2452
2453 fn get_event(&mut self) -> Result<Option<Event>> {
2454 // Generate next events until we have one available
2455 // Some tokens (like directives) don't generate events
2456 while self.event_index >= self.events.len() && self.scanner.check_token() {
2457 let events_before = self.events.len();
2458 self.generate_next_event()?;
2459
2460 // If no event was generated and we still have tokens, continue
2461 if self.events.len() == events_before && self.scanner.check_token() {
2462 continue;
2463 }
2464 break;
2465 }
2466
2467 if self.event_index < self.events.len() {
2468 let event = self.events[self.event_index].clone();
2469 self.event_index += 1;
2470 Ok(Some(event))
2471 } else if let Some(error) = self.scanning_error.take() {
2472 // Eager-parse and scanner errors are stored in
2473 // `scanning_error` (see `new_eager`). Surface them through
2474 // the natural iteration path *after* all buffered events
2475 // have been drained, so callers see the partial events
2476 // first and then the error that terminated parsing.
2477 Err(error)
2478 } else {
2479 Ok(None)
2480 }
2481 }
2482
2483 fn reset(&mut self) {
2484 self.event_index = 0;
2485 self.scanner.reset();
2486 self.state_stack.clear();
2487 self.position = Position::start();
2488 self.pending_anchor = None;
2489 self.pending_tag = None;
2490 self.last_token_type = None;
2491 }
2492
2493 fn position(&self) -> Position {
2494 self.position
2495 }
2496}
2497
2498impl BasicParser {
2499 /// Check if there was a scanning error
2500 #[allow(clippy::missing_const_for_fn)]
2501 pub fn take_scanning_error(&mut self) -> Option<Error> {
2502 self.scanning_error.take()
2503 }
2504}
2505
2506#[cfg(test)]
2507mod tests {
2508 use super::*;
2509
2510 #[test]
2511 fn test_basic_parsing() {
2512 let mut parser = BasicParser::new_eager("42".to_string());
2513
2514 assert!(parser.check_event());
2515
2516 // Stream start
2517 let event = parser.get_event().unwrap().unwrap();
2518 assert!(matches!(event.event_type, EventType::StreamStart));
2519
2520 // Document start (implicit)
2521 let event = parser.get_event().unwrap().unwrap();
2522 if let EventType::DocumentStart { implicit, .. } = event.event_type {
2523 assert!(implicit);
2524 } else {
2525 panic!("Expected implicit document start");
2526 }
2527
2528 // Scalar
2529 let event = parser.get_event().unwrap().unwrap();
2530 if let EventType::Scalar { value, .. } = event.event_type {
2531 assert_eq!(value, "42");
2532 } else {
2533 panic!("Expected scalar event");
2534 }
2535
2536 // Document end (implicit)
2537 let event = parser.get_event().unwrap().unwrap();
2538 if let EventType::DocumentEnd { implicit } = event.event_type {
2539 assert!(implicit);
2540 } else {
2541 panic!("Expected implicit document end");
2542 }
2543
2544 // Stream end
2545 let event = parser.get_event().unwrap().unwrap();
2546 assert!(matches!(event.event_type, EventType::StreamEnd));
2547 }
2548
2549 #[test]
2550 fn test_flow_sequence_parsing() {
2551 let mut parser = BasicParser::new_eager("[1, 2, 3]".to_string());
2552
2553 // Stream start
2554 parser.get_event().unwrap();
2555
2556 // Document start (implicit)
2557 parser.get_event().unwrap();
2558
2559 // Sequence start
2560 let event = parser.get_event().unwrap().unwrap();
2561 if let EventType::SequenceStart { flow_style, .. } = event.event_type {
2562 assert!(flow_style);
2563 } else {
2564 panic!("Expected flow sequence start");
2565 }
2566
2567 // First scalar
2568 let event = parser.get_event().unwrap().unwrap();
2569 if let EventType::Scalar { value, .. } = event.event_type {
2570 assert_eq!(value, "1");
2571 } else {
2572 panic!("Expected scalar '1'");
2573 }
2574 }
2575
2576 #[test]
2577 fn test_flow_mapping_parsing() {
2578 let mut parser = BasicParser::new_eager("{'key': 'value'}".to_string());
2579
2580 // Stream start
2581 parser.get_event().unwrap();
2582
2583 // Document start (implicit)
2584 parser.get_event().unwrap();
2585
2586 // Mapping start
2587 let event = parser.get_event().unwrap().unwrap();
2588 if let EventType::MappingStart { flow_style, .. } = event.event_type {
2589 assert!(flow_style);
2590 } else {
2591 panic!("Expected flow mapping start");
2592 }
2593
2594 // Key scalar
2595 let event = parser.get_event().unwrap().unwrap();
2596 if let EventType::Scalar { value, .. } = event.event_type {
2597 assert_eq!(value, "key");
2598 } else {
2599 panic!("Expected scalar 'key'");
2600 }
2601 }
2602}