use crate::{
input::{str::StrInput, BorrowedInput},
scanner::{
Marker, Placement, QueuedToken, QueuedTokenType, ScalarStyle, ScanError, Scanner, Span,
},
BufferedInput,
};
use alloc::{
borrow::Cow,
collections::{BTreeMap, BTreeSet, VecDeque},
string::{String, ToString},
vec::Vec,
};
use core::{
convert::Infallible,
fmt::{self, Display},
};
#[derive(Clone, Copy, PartialEq, Debug, Eq)]
enum State {
StreamStart,
ImplicitDocumentStart,
DocumentStart,
DocumentContent,
DocumentEnd,
BlockNode,
BlockNodeOrIndentlessSequence,
FlowNode,
BlockSequenceFirstEntry,
BlockSequenceEntry,
IndentlessSequenceEntry,
IndentlessSequenceEntryNode,
BlockMappingFirstKey,
BlockMappingKey,
BlockMappingKeyNode,
BlockMappingValue,
BlockMappingValueNode,
FlowSequenceFirstEntry,
FlowSequenceEntry,
FlowSequenceEntryMappingKey,
FlowSequenceEntryMappingValue,
FlowSequenceEntryMappingValueNode,
FlowSequenceEntryMappingEnd,
FlowMappingFirstKey,
FlowMappingKey,
FlowMappingKeyNode,
FlowMappingValue,
FlowMappingValueNode,
FlowMappingEmptyValue,
BlockSequenceEntryNode,
End,
}
#[derive(Copy, Clone, PartialEq, Debug, Eq, Hash)]
pub struct YamlVersion {
pub major: u32,
pub minor: u32,
}
impl YamlVersion {
#[must_use]
pub const fn new(major: u32, minor: u32) -> Self {
Self { major, minor }
}
}
#[derive(Clone, PartialEq, Debug, Eq)]
pub enum Event<'input> {
Nothing,
StreamStart,
StreamEnd,
DocumentStart(
bool,
Option<YamlVersion>,
),
DocumentEnd,
Alias(
usize,
),
Comment(
Cow<'input, str>,
Placement,
),
Scalar(
Cow<'input, str>,
ScalarStyle,
usize,
Option<Cow<'input, Tag>>,
),
SequenceStart(
StructureStyle,
usize,
Option<Cow<'input, Tag>>,
),
SequenceEnd,
MappingStart(
StructureStyle,
usize,
Option<Cow<'input, Tag>>,
),
MappingEnd,
}
#[derive(Clone, Copy, PartialEq, Debug, Eq, Hash, PartialOrd, Ord)]
pub enum StructureStyle {
Block,
Flow,
}
#[derive(Clone, PartialEq, Debug, Eq, Ord, PartialOrd, Hash)]
pub struct Tag {
pub handle: String,
pub suffix: String,
pub original_handle: String,
}
const YAML_CORE_SCHEMA_PREFIX: &str = "tag:yaml.org,2002:";
const YAML_CORE_SCHEMA_SUFFIXES: [&str; 7] = ["bool", "float", "int", "map", "null", "seq", "str"];
fn known_yaml_core_schema_suffix(suffix: &str) -> Option<&str> {
YAML_CORE_SCHEMA_SUFFIXES
.contains(&suffix)
.then_some(suffix)
}
fn known_yaml_core_schema_suffix_from_split(
handle_tail: &str,
suffix: &str,
) -> Option<&'static str> {
YAML_CORE_SCHEMA_SUFFIXES.iter().copied().find(|candidate| {
candidate
.strip_prefix(handle_tail)
.is_some_and(|candidate_tail| candidate_tail == suffix)
})
}
impl Tag {
#[must_use]
pub fn new(handle: impl Into<String>, suffix: impl Into<String>) -> Self {
let handle = handle.into();
Self {
original_handle: handle.clone(),
handle,
suffix: suffix.into(),
}
}
#[must_use]
pub fn with_original_handle(
handle: impl Into<String>,
suffix: impl Into<String>,
original_handle: impl Into<String>,
) -> Self {
Self {
handle: handle.into(),
suffix: suffix.into(),
original_handle: original_handle.into(),
}
}
#[must_use]
pub fn core_suffix(&self) -> Option<&str> {
if let Some(remaining_prefix) = YAML_CORE_SCHEMA_PREFIX.strip_prefix(self.handle.as_str()) {
let suffix = self.suffix.strip_prefix(remaining_prefix)?;
return known_yaml_core_schema_suffix(suffix);
}
let handle_tail = self.handle.strip_prefix(YAML_CORE_SCHEMA_PREFIX)?;
known_yaml_core_schema_suffix_from_split(handle_tail, &self.suffix)
}
#[must_use]
pub fn suffix_in_namespace(&self, prefix: &str) -> Option<Cow<'_, str>> {
if let Some(handle_tail) = self.handle.strip_prefix(prefix) {
return Some(if handle_tail.is_empty() {
Cow::Borrowed(self.suffix.as_str())
} else {
let mut name = String::with_capacity(handle_tail.len() + self.suffix.len());
name.push_str(handle_tail);
name.push_str(&self.suffix);
Cow::Owned(name)
});
}
prefix
.strip_prefix(self.handle.as_str())
.and_then(|prefix_tail| self.suffix.strip_prefix(prefix_tail))
.map(Cow::Borrowed)
}
#[must_use]
pub fn is_yaml_core_schema(&self) -> bool {
self.core_suffix().is_some()
}
#[must_use]
pub fn is_yaml_core_schema_tag(&self, suffix: &str) -> bool {
self.core_suffix()
.is_some_and(|core_suffix| core_suffix == suffix)
}
#[must_use]
pub fn is_custom(&self) -> bool {
!self.is_yaml_core_schema()
}
#[must_use]
pub fn parts(&self) -> (&str, &str) {
(&self.handle, &self.suffix)
}
#[must_use]
pub fn original_parts(&self) -> (&str, &str) {
(&self.original_handle, &self.suffix)
}
#[must_use]
pub fn original(&self) -> String {
if self.original_handle.is_empty() && self.suffix != "!" {
let mut tag = String::with_capacity(self.suffix.len() + 3);
tag.push_str("!<");
tag.push_str(&self.suffix);
tag.push('>');
return tag;
}
let mut tag = String::with_capacity(self.original_handle.len() + self.suffix.len());
tag.push_str(&self.original_handle);
tag.push_str(&self.suffix);
tag
}
}
impl Display for Tag {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
if self.handle == "!" {
write!(f, "!{}", self.suffix)
} else {
write!(f, "{}{}", self.handle, self.suffix)
}
}
}
impl<'input> Event<'input> {
#[must_use]
pub fn anchor_id(&self) -> Option<usize> {
match self {
Self::Scalar(_, _, anchor_id, _)
| Self::SequenceStart(_, anchor_id, _)
| Self::MappingStart(_, anchor_id, _)
if *anchor_id != 0 =>
{
Some(*anchor_id)
}
_ => None,
}
}
#[must_use]
pub fn alias_id(&self) -> Option<usize> {
match self {
Self::Alias(anchor_id) => Some(*anchor_id),
_ => None,
}
}
#[must_use]
pub fn tag(&self) -> Option<&Tag> {
match self {
Self::Scalar(_, _, _, tag)
| Self::SequenceStart(_, _, tag)
| Self::MappingStart(_, _, tag) => tag.as_deref(),
_ => None,
}
}
#[must_use]
pub fn scalar(&self) -> Option<(&str, ScalarStyle)> {
match self {
Self::Scalar(value, style, _, _) => Some((value.as_ref(), *style)),
_ => None,
}
}
#[must_use]
pub fn is_node(&self) -> bool {
matches!(
self,
Self::Alias(_) | Self::Scalar(..) | Self::SequenceStart(..) | Self::MappingStart(..)
)
}
fn empty_scalar() -> Self {
Event::Scalar("~".into(), ScalarStyle::Plain, 0, None)
}
fn empty_scalar_with_anchor(anchor: usize, tag: Option<Cow<'input, Tag>>) -> Self {
let value = if tag.is_some() {
Cow::default()
} else {
"~".into()
};
Event::Scalar(value, ScalarStyle::Plain, anchor, tag)
}
}
const MAX_BUFFERED_COMMENT_EVENTS: usize = 32;
#[derive(Debug)]
pub struct Parser<'input, T: BorrowedInput<'input>> {
scanner: Scanner<'input, T>,
states: Vec<State>,
state: State,
token: Option<QueuedToken<'input>>,
current: Option<(Event<'input>, Span)>,
current_error: Option<ScanError>,
queued_events: VecDeque<(Event<'input>, Span)>,
pending_key_indent: Option<usize>,
pending_node_anchor_id: usize,
pending_node_tag: Option<Cow<'input, Tag>>,
pending_node_tag_start: Option<Marker>,
pending_node_property_end: Option<Marker>,
pending_empty_scalar_span: Option<Span>,
last_event_end: Option<Marker>,
pending_document_version: Option<YamlVersion>,
pending_document_directives: bool,
pending_document_tag_handles: BTreeSet<String>,
anchors: BTreeMap<Cow<'input, str>, usize>,
anchor_id_count: usize,
tags: BTreeMap<String, String>,
stream_end_emitted: bool,
keep_tags: bool,
}
pub trait EventReceiver<'input> {
fn on_event(&mut self, ev: Event<'input>);
}
pub trait SpannedEventReceiver<'input> {
fn on_event(&mut self, ev: Event<'input>, span: Span);
}
impl<'input, R: EventReceiver<'input>> SpannedEventReceiver<'input> for R {
fn on_event(&mut self, ev: Event<'input>, _span: Span) {
self.on_event(ev);
}
}
pub trait TryEventReceiver<'input> {
type Error;
fn on_event(&mut self, ev: Event<'input>) -> Result<(), Self::Error>;
}
pub trait TrySpannedEventReceiver<'input> {
type Error;
fn on_event(&mut self, ev: Event<'input>, span: Span) -> Result<(), Self::Error>;
}
impl<'input, R: TryEventReceiver<'input>> TrySpannedEventReceiver<'input> for R {
type Error = R::Error;
fn on_event(&mut self, ev: Event<'input>, _span: Span) -> Result<(), Self::Error> {
TryEventReceiver::on_event(self, ev)
}
}
#[derive(Clone, PartialEq, Debug, Eq)]
pub enum TryLoadError<E> {
Scan(
ScanError,
),
Receiver(
E,
),
}
impl<E> TryLoadError<E> {
#[cold]
fn scan(error: ScanError) -> Self {
Self::Scan(error)
}
#[cold]
fn receiver(error: E) -> Self {
Self::Receiver(error)
}
}
impl<E> From<ScanError> for TryLoadError<E> {
#[cold]
fn from(error: ScanError) -> Self {
Self::scan(error)
}
}
impl<E: Display> Display for TryLoadError<E> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Scan(error) => write!(f, "parser error: {error}"),
Self::Receiver(error) => write!(f, "receiver error: {error}"),
}
}
}
impl<E> core::error::Error for TryLoadError<E>
where
E: core::error::Error + 'static,
{
fn source(&self) -> Option<&(dyn core::error::Error + 'static)> {
match self {
Self::Scan(error) => Some(error),
Self::Receiver(error) => Some(error),
}
}
}
fn try_emit<'input, R>(
recv: &mut R,
ev: Event<'input>,
span: Span,
) -> Result<(), TryLoadError<R::Error>>
where
R: TrySpannedEventReceiver<'input>,
{
recv.on_event(ev, span).map_err(TryLoadError::receiver)
}
struct InfallibleSpannedReceiver<'receiver, R>(&'receiver mut R);
impl<'input, R: SpannedEventReceiver<'input>> TrySpannedEventReceiver<'input>
for InfallibleSpannedReceiver<'_, R>
{
type Error = Infallible;
fn on_event(&mut self, ev: Event<'input>, span: Span) -> Result<(), Self::Error> {
self.0.on_event(ev, span);
Ok(())
}
}
fn into_scan_result(result: Result<(), TryLoadError<Infallible>>) -> Result<(), ScanError> {
match result {
Ok(()) => Ok(()),
Err(TryLoadError::Scan(error)) => error.into_result(),
Err(TryLoadError::Receiver(error)) => match error {},
}
}
pub type ParseResult<'input> = Result<(Event<'input>, Span), ScanError>;
pub trait ParserTrait<'input> {
fn peek(&mut self) -> Option<Result<&(Event<'input>, Span), ScanError>>;
fn next_event(&mut self) -> Option<ParseResult<'input>>;
fn load<R: SpannedEventReceiver<'input>>(
&mut self,
recv: &mut R,
multi: bool,
) -> Result<(), ScanError>;
fn try_load<R: TrySpannedEventReceiver<'input>>(
&mut self,
recv: &mut R,
multi: bool,
) -> Result<(), TryLoadError<R::Error>> {
while let Some(res) = self.next_event() {
let (ev, span) = res?;
let is_doc_end = matches!(ev, Event::DocumentEnd);
let is_stream_end = matches!(ev, Event::StreamEnd);
try_emit(recv, ev, span)?;
if is_stream_end {
break;
}
if !multi && is_doc_end {
break;
}
}
Ok(())
}
}
impl<'input> Parser<'input, StrInput<'input>> {
#[must_use]
pub fn new_from_str(value: &'input str) -> Self {
debug_print!("\x1B[;31m>>>>>>>>>> New parser from str\x1B[;0m");
Parser::new(StrInput::new(value))
}
}
impl<T> Parser<'static, BufferedInput<T>>
where
T: Iterator<Item = char>,
{
#[must_use]
pub fn new_from_iter(iter: T) -> Self {
debug_print!("\x1B[;31m>>>>>>>>>> New parser from iter\x1B[;0m");
Parser::new(BufferedInput::new(iter))
}
}
impl<'input, T: BorrowedInput<'input>> Parser<'input, T> {
pub fn get_anchor_offset(&self) -> usize {
self.anchor_id_count
}
pub fn set_anchor_offset(&mut self, offset: usize) {
self.anchor_id_count = offset;
}
pub fn new(src: T) -> Self {
Parser {
scanner: Scanner::new(src),
states: Vec::new(),
state: State::StreamStart,
token: None,
current: None,
current_error: None,
queued_events: VecDeque::new(),
pending_key_indent: None,
pending_node_anchor_id: 0,
pending_node_tag: None,
pending_node_tag_start: None,
pending_node_property_end: None,
pending_empty_scalar_span: None,
last_event_end: None,
pending_document_version: None,
pending_document_directives: false,
pending_document_tag_handles: BTreeSet::new(),
anchors: BTreeMap::new(),
anchor_id_count: 1,
tags: BTreeMap::new(),
stream_end_emitted: false,
keep_tags: false,
}
}
#[must_use]
pub fn keep_tags(mut self, value: bool) -> Self {
self.keep_tags = value;
self
}
pub fn peek(&mut self) -> Option<Result<&(Event<'input>, Span), ScanError>> {
ParserTrait::peek(self)
}
pub fn next_event(&mut self) -> Option<ParseResult<'input>> {
ParserTrait::next_event(self)
}
fn next_event_impl<'a>(&mut self) -> ParseResult<'a>
where
'input: 'a,
{
let event = match self.current.take() {
None => {
if let Some(event) = self.queued_events.pop_front() {
Ok(self.apply_pending_key_indent(event))
} else if let Some(comment) = self.maybe_next_comment_event()? {
Ok(comment)
} else {
self.parse()
}
}
Some(v) => Ok(v),
}?;
Ok(self.remember_event_end(event))
}
fn apply_pending_key_indent<'a>(&mut self, (ev, span): (Event<'a>, Span)) -> (Event<'a>, Span) {
if ev.is_node() {
if let Some(indent) = self.pending_key_indent.take() {
return (ev, span.with_indent(Some(indent)));
}
}
(ev, span)
}
fn remember_event_end<'a>(&mut self, (event, span): (Event<'a>, Span)) -> (Event<'a>, Span) {
self.last_event_end = Some(span.end);
(event, span)
}
fn peek_token(&mut self) -> Result<&QueuedToken<'_>, ScanError> {
match self.token {
None => {
self.token = Some(self.scan_next_token()?);
Ok(self.token.as_ref().unwrap())
}
Some(ref tok) => Ok(tok),
}
}
fn scan_next_token(&mut self) -> Result<QueuedToken<'input>, ScanError> {
match self.scanner.next_queued_token()? {
None => match self.scanner.get_error() {
None => Err(self.unexpected_eof()),
Some(e) => e.into_result(),
},
Some(tok) => Ok(tok),
}
}
#[inline]
fn maybe_next_comment_event<'a>(&mut self) -> Result<Option<(Event<'a>, Span)>, ScanError>
where
'input: 'a,
{
if self.scanner.comments_possible() {
self.next_comment_event()
} else {
Ok(None)
}
}
fn next_comment_event<'a>(&mut self) -> Result<Option<(Event<'a>, Span)>, ScanError>
where
'input: 'a,
{
let is_comment = {
let token = self.peek_token()?;
matches!(token.1, QueuedTokenType::Comment(_))
};
if !is_comment {
return Ok(None);
}
let QueuedToken(span, token) = self.fetch_token();
match token {
QueuedTokenType::Comment(mut comment) => {
comment.placement = self.refined_comment_placement(span, comment.placement);
Ok(Some((
Event::Comment(comment.text, comment.placement),
span,
)))
}
_ => unreachable!("comment token disappeared after peek"),
}
}
#[inline]
fn next_comment_events(&mut self) -> Result<Vec<(Event<'input>, Span)>, ScanError> {
if !self.scanner.comments_possible() {
return Ok(Vec::new());
}
let mut events = Vec::new();
loop {
match self.peek_token() {
Ok(token) if matches!(token.1, QueuedTokenType::Comment(_)) => {}
Err(error) if events.is_empty() => return Err(error),
Ok(_) | Err(_) => return Ok(events),
}
if events.len() == MAX_BUFFERED_COMMENT_EVENTS {
return Err(ScanError::new_str(
self.peek_token()?.0.start,
"too many consecutive comments before resolving collection entry",
));
}
let comment = self
.next_comment_event()?
.expect("comment token disappeared after peek");
events.push(comment);
}
}
fn queue_tail_and_return_first(
&mut self,
events: Vec<(Event<'input>, Span)>,
) -> (Event<'input>, Span) {
let mut events = events.into_iter();
let first = events
.next()
.expect("event queue must contain at least one event");
self.queued_events.extend(events);
first
}
fn queue_event_by_span(
&mut self,
comments: Vec<(Event<'input>, Span)>,
event: (Event<'input>, Span),
) -> (Event<'input>, Span) {
let insert_at = comments
.iter()
.position(|(_, comment_span)| {
comment_span.start.index() >= event.1.start.index()
&& comment_span.end.index() >= event.1.end.index()
})
.unwrap_or(comments.len());
let mut ordered = Vec::with_capacity(comments.len() + 1);
let mut comments = comments.into_iter();
for _ in 0..insert_at {
ordered.push(
comments
.next()
.expect("comment disappeared while ordering queued events"),
);
}
ordered.push(event);
ordered.extend(comments);
self.queue_tail_and_return_first(ordered)
}
fn queue_two_events_by_span(
&mut self,
comments: Vec<(Event<'input>, Span)>,
first: (Event<'input>, Span),
second: (Event<'input>, Span),
) -> (Event<'input>, Span) {
let insert_at = comments
.iter()
.position(|(_, comment_span)| {
comment_span.start.index() >= first.1.start.index()
&& comment_span.end.index() >= first.1.end.index()
})
.unwrap_or(comments.len());
let mut ordered = Vec::with_capacity(comments.len() + 2);
let mut comments = comments.into_iter();
for _ in 0..insert_at {
ordered.push(
comments
.next()
.expect("comment disappeared while ordering queued events"),
);
}
ordered.push(first);
ordered.push(second);
ordered.extend(comments);
self.queue_tail_and_return_first(ordered)
}
fn refined_comment_placement(&mut self, span: Span, placement: Placement) -> Placement {
if placement == Placement::Right {
return Placement::Right;
}
let Ok(next) = self.peek_token() else {
return placement;
};
if matches!(next.1, QueuedTokenType::StreamEnd) {
return Placement::Last;
}
if next.0.start.line() == span.end.line() + 1 {
Placement::Above
} else {
Placement::Free
}
}
#[cold]
fn unexpected_eof(&self) -> ScanError {
let info = match self.state {
State::FlowSequenceFirstEntry | State::FlowSequenceEntry => {
"unexpected EOF while parsing a flow sequence"
}
State::FlowMappingFirstKey
| State::FlowMappingKey
| State::FlowMappingValue
| State::FlowMappingEmptyValue => "unexpected EOF while parsing a flow mapping",
State::FlowSequenceEntryMappingKey
| State::FlowSequenceEntryMappingValue
| State::FlowSequenceEntryMappingEnd
| State::FlowNode => "unexpected EOF while parsing an implicit flow mapping",
State::BlockSequenceFirstEntry | State::BlockSequenceEntry | State::BlockNode => {
"unexpected EOF while parsing a block sequence"
}
State::BlockMappingFirstKey
| State::BlockMappingKey
| State::BlockMappingValue
| State::BlockNodeOrIndentlessSequence => {
"unexpected EOF while parsing a block mapping"
}
_ => "unexpected eof",
};
ScanError::new_str(self.scanner.mark(), info)
}
fn fetch_token<'a>(&mut self) -> QueuedToken<'a>
where
'input: 'a,
{
self.token
.take()
.expect("fetch_token needs to be preceded by peek_token")
}
fn skip(&mut self) {
self.token = None;
}
fn pop_state(&mut self) {
self.state = self.states.pop().unwrap();
}
fn push_state(&mut self, state: State) {
self.states.push(state);
}
fn defer_parse_node<'a>(
&mut self,
node_state: State,
return_state: State,
block: bool,
indentless_sequence: bool,
) -> ParseResult<'a>
where
'input: 'a,
{
self.push_state(return_state);
self.state = node_state;
if let Some(comment) = self.maybe_next_comment_event()? {
Ok(comment)
} else {
self.parse_node(block, indentless_sequence)
}
}
fn parse<'a>(&mut self) -> ParseResult<'a>
where
'input: 'a,
{
if self.state == State::End {
return Ok((Event::StreamEnd, Span::empty(self.scanner.mark())));
}
let event = self.state_machine()?;
Ok(self.apply_pending_key_indent(event))
}
pub fn load<R: SpannedEventReceiver<'input>>(
&mut self,
recv: &mut R,
multi: bool,
) -> Result<(), ScanError> {
ParserTrait::load(self, recv, multi)
}
pub fn try_load<R: TrySpannedEventReceiver<'input>>(
&mut self,
recv: &mut R,
multi: bool,
) -> Result<(), TryLoadError<R::Error>> {
ParserTrait::try_load(self, recv, multi)
}
fn state_machine<'a>(&mut self) -> ParseResult<'a>
where
'input: 'a,
{
debug_print!("\n\x1B[;33mParser state: {:?} \x1B[;0m", self.state);
match self.state {
State::StreamStart => self.stream_start(),
State::ImplicitDocumentStart => self.document_start(true),
State::DocumentStart => self.document_start(false),
State::DocumentContent => self.document_content(),
State::DocumentEnd => self.document_end(),
State::BlockNode => self.parse_node(true, false),
State::BlockNodeOrIndentlessSequence => self.parse_node(true, true),
State::FlowNode => self.parse_node(false, false),
State::BlockMappingFirstKey => self.block_mapping_key(true),
State::BlockMappingKey => self.block_mapping_key(false),
State::BlockMappingKeyNode => self.block_mapping_key_node(),
State::BlockMappingValue => self.block_mapping_value(),
State::BlockMappingValueNode => self.block_mapping_value_node(),
State::BlockSequenceFirstEntry => self.block_sequence_entry(true),
State::BlockSequenceEntry => self.block_sequence_entry(false),
State::BlockSequenceEntryNode => self.block_sequence_entry_node(),
State::FlowSequenceFirstEntry => self.flow_sequence_entry(true),
State::FlowSequenceEntry => self.flow_sequence_entry(false),
State::FlowMappingFirstKey => self.flow_mapping_key(true),
State::FlowMappingKey => self.flow_mapping_key(false),
State::FlowMappingKeyNode => self.flow_mapping_key_node(),
State::FlowMappingValue => self.flow_mapping_value(false),
State::FlowMappingValueNode => self.flow_mapping_value_node(),
State::IndentlessSequenceEntry => self.indentless_sequence_entry(),
State::IndentlessSequenceEntryNode => self.indentless_sequence_entry_node(),
State::FlowSequenceEntryMappingKey => self.flow_sequence_entry_mapping_key(),
State::FlowSequenceEntryMappingValue => self.flow_sequence_entry_mapping_value(),
State::FlowSequenceEntryMappingValueNode => {
self.flow_sequence_entry_mapping_value_node()
}
State::FlowSequenceEntryMappingEnd => self.flow_sequence_entry_mapping_end(),
State::FlowMappingEmptyValue => self.flow_mapping_value(true),
State::End => unreachable!(),
}
}
fn stream_start<'a>(&mut self) -> ParseResult<'a>
where
'input: 'a,
{
match *self.peek_token()? {
QueuedToken(span, QueuedTokenType::StreamStart(_)) => {
self.state = State::ImplicitDocumentStart;
self.skip();
Ok((Event::StreamStart, span))
}
QueuedToken(span, _) => Err(ScanError::new_str(
span.start,
"did not find expected <stream-start>",
)),
}
}
fn has_pending_document_directives(&self) -> bool {
self.pending_document_directives
|| self.pending_document_version.is_some()
|| !self.pending_document_tag_handles.is_empty()
}
fn document_start<'a>(&mut self, implicit: bool) -> ParseResult<'a>
where
'input: 'a,
{
while let QueuedTokenType::DocumentEnd = self.peek_token()?.1 {
self.skip();
}
self.anchors.clear();
if self.has_pending_document_directives() {
return self.explicit_document_start();
}
match *self.peek_token()? {
QueuedToken(span, QueuedTokenType::StreamEnd) => {
self.state = State::End;
self.skip();
Ok((Event::StreamEnd, span))
}
QueuedToken(
_,
QueuedTokenType::VersionDirective(..)
| QueuedTokenType::TagDirective(..)
| QueuedTokenType::ReservedDirective(..)
| QueuedTokenType::DocumentStart,
) => {
self.explicit_document_start()
}
QueuedToken(span, _) if implicit => {
self.parser_process_directives(None, false, BTreeSet::new())?;
self.push_state(State::DocumentEnd);
self.state = State::BlockNode;
Ok((Event::DocumentStart(false, None), span))
}
_ => {
self.explicit_document_start()
}
}
}
fn parser_process_directives(
&mut self,
mut version: Option<YamlVersion>,
continuing: bool,
mut document_tag_handles: BTreeSet<String>,
) -> Result<(Option<YamlVersion>, BTreeSet<String>), ScanError> {
let mut tags = if continuing || self.keep_tags {
self.tags.clone()
} else {
BTreeMap::new()
};
loop {
match self.peek_token()? {
QueuedToken(span, QueuedTokenType::VersionDirective(major, minor)) => {
if version.is_some() {
return Err(ScanError::new_str(
span.start,
"duplicate version directive",
));
}
if *major != 1 {
return Err(ScanError::new_str(
span.start,
"unsupported YAML major version",
));
}
version = Some(YamlVersion::new(*major, *minor));
}
QueuedToken(mark, QueuedTokenType::TagDirective(handle, prefix)) => {
if !document_tag_handles.insert(handle.to_string()) {
return Err(ScanError::new_str(mark.start, "the TAG directive must only be given at most once per handle in the same document"));
}
tags.insert(handle.to_string(), prefix.to_string());
}
QueuedToken(_, QueuedTokenType::ReservedDirective(_, _)) => {
}
_ => break,
}
self.skip();
}
self.tags = tags;
Ok((version, document_tag_handles))
}
fn explicit_document_start<'a>(&mut self) -> ParseResult<'a>
where
'input: 'a,
{
let pending_version = self.pending_document_version.take();
let continuing_directives = core::mem::take(&mut self.pending_document_directives);
let pending_tag_handles = core::mem::take(&mut self.pending_document_tag_handles);
let (version, document_tag_handles) = self.parser_process_directives(
pending_version,
continuing_directives,
pending_tag_handles,
)?;
if let Some(comment) = self.maybe_next_comment_event()? {
self.pending_document_version = version;
self.pending_document_directives = true;
self.pending_document_tag_handles = document_tag_handles;
return Ok(comment);
}
match *self.peek_token()? {
QueuedToken(mark, QueuedTokenType::DocumentStart) => {
self.push_state(State::DocumentEnd);
self.state = State::DocumentContent;
self.skip();
Ok((Event::DocumentStart(true, version), mark))
}
QueuedToken(span, _) => Err(ScanError::new_str(
span.start,
"did not find expected <document start>",
)),
}
}
fn document_content<'a>(&mut self) -> ParseResult<'a>
where
'input: 'a,
{
if let QueuedToken(
mark,
QueuedTokenType::VersionDirective(..)
| QueuedTokenType::TagDirective(..)
| QueuedTokenType::ReservedDirective(..)
| QueuedTokenType::DocumentStart
| QueuedTokenType::DocumentEnd
| QueuedTokenType::StreamEnd,
) = *self.peek_token()?
{
self.pop_state();
let span = self
.last_event_end
.map_or_else(|| Span::empty(mark.start), Span::empty);
Ok((Event::empty_scalar(), span))
} else {
self.state = State::BlockNode;
self.parse_node(true, false)
}
}
fn document_end<'a>(&mut self) -> ParseResult<'a>
where
'input: 'a,
{
let mut explicit_end = false;
let span: Span = match *self.peek_token()? {
QueuedToken(span, QueuedTokenType::DocumentEnd) => {
explicit_end = true;
self.skip();
span
}
QueuedToken(span, _) => self
.last_event_end
.map_or_else(|| Span::empty(span.start), Span::empty),
};
if self.keep_tags {
self.tags.remove("!!");
self.tags.remove("!");
} else {
self.tags.clear();
}
if explicit_end {
self.state = State::ImplicitDocumentStart;
} else {
if let QueuedToken(
span,
QueuedTokenType::VersionDirective(..)
| QueuedTokenType::TagDirective(..)
| QueuedTokenType::ReservedDirective(..),
) = *self.peek_token()?
{
return Err(ScanError::new_str(
span.start,
"missing explicit document end marker before directive",
));
}
self.state = State::DocumentStart;
}
Ok((Event::DocumentEnd, span))
}
fn register_anchor(&mut self, name: Cow<'input, str>, mark: &Span) -> Result<usize, ScanError> {
let new_id = self.anchor_id_count;
self.anchor_id_count = self.anchor_id_count.checked_add(1).ok_or_else(|| {
ScanError::new_str(
mark.start,
"while parsing anchor, anchor count exceeded supported limit",
)
})?;
self.anchors.insert(name, new_id);
Ok(new_id)
}
fn save_pending_node_properties(
&mut self,
anchor_id: usize,
tag: Option<Cow<'input, Tag>>,
tag_start: Option<Marker>,
property_end: Option<Marker>,
) {
self.pending_node_anchor_id = anchor_id;
self.pending_node_tag = tag;
self.pending_node_tag_start = tag_start;
self.pending_node_property_end = property_end;
}
fn attach_tag_start(event: Event<'_>, span: Span, start: Option<Marker>) -> (Event<'_>, Span) {
(event, span.with_tag_start(start))
}
#[allow(clippy::too_many_lines)]
fn parse_node<'a>(&mut self, block: bool, indentless_sequence: bool) -> ParseResult<'a>
where
'input: 'a,
{
if let Some(comment) = self.maybe_next_comment_event()? {
return Ok(comment);
}
let mut anchor_id = core::mem::take(&mut self.pending_node_anchor_id);
let mut tag = self.pending_node_tag.take();
let mut tag_start = self.pending_node_tag_start.take();
let mut property_end = self.pending_node_property_end.take();
match *self.peek_token()? {
QueuedToken(_, QueuedTokenType::Alias(_)) => {
self.pop_state();
if let QueuedToken(span, QueuedTokenType::Alias(name)) = self.fetch_token() {
match self.anchors.get(&*name) {
None => {
return Err(ScanError::new_str(
span.start,
"while parsing node, found unknown anchor",
))
}
Some(id) => return Ok((Event::Alias(*id), span)),
}
}
unreachable!()
}
QueuedToken(_, QueuedTokenType::Anchor(_)) => {
if let QueuedToken(span, QueuedTokenType::Anchor(name)) = self.fetch_token() {
anchor_id = self.register_anchor(name, &span)?;
property_end = Some(span.end);
if matches!(self.peek_token()?.1, QueuedTokenType::Tag(..)) {
if let QueuedToken(tag_span, QueuedTokenType::Tag(handle, suffix)) =
self.fetch_token()
{
tag_start = Some(tag_span.start);
tag = Some(self.resolve_tag(tag_span, &handle, suffix)?);
property_end = Some(tag_span.end);
} else {
unreachable!()
}
}
if let Some(comment) = self.maybe_next_comment_event()? {
self.save_pending_node_properties(anchor_id, tag, tag_start, property_end);
return Ok(comment);
}
} else {
unreachable!()
}
}
QueuedToken(mark, QueuedTokenType::Tag(..)) => {
if let QueuedTokenType::Tag(handle, suffix) = self.fetch_token().1 {
tag_start = Some(mark.start);
property_end = Some(mark.end);
tag = Some(self.resolve_tag(mark, &handle, suffix)?);
if let QueuedTokenType::Anchor(_) = &self.peek_token()?.1 {
if let QueuedToken(mark, QueuedTokenType::Anchor(name)) = self.fetch_token()
{
anchor_id = self.register_anchor(name, &mark)?;
property_end = Some(mark.end);
} else {
unreachable!()
}
}
if let Some(comment) = self.maybe_next_comment_event()? {
self.save_pending_node_properties(anchor_id, tag, tag_start, property_end);
return Ok(comment);
}
} else {
unreachable!()
}
}
_ => {}
}
match *self.peek_token()? {
QueuedToken(mark, QueuedTokenType::BlockEntry) if indentless_sequence => {
self.skip();
let comments = self.next_comment_events()?;
let start = (
Event::SequenceStart(StructureStyle::Block, anchor_id, tag),
mark.with_tag_start(tag_start),
);
if comments.is_empty() {
self.pending_empty_scalar_span = Some(mark);
self.state = State::IndentlessSequenceEntryNode;
Ok(start)
} else if let Ok(QueuedToken(
_,
QueuedTokenType::BlockEntry
| QueuedTokenType::Key
| QueuedTokenType::Value
| QueuedTokenType::BlockEnd,
)) = self.peek_token()
{
self.state = State::IndentlessSequenceEntry;
Ok(self.queue_two_events_by_span(
comments,
start,
(Event::empty_scalar(), mark),
))
} else {
self.pending_empty_scalar_span = Some(mark);
self.state = State::IndentlessSequenceEntryNode;
Ok(self.queue_event_by_span(comments, start))
}
}
QueuedToken(_, QueuedTokenType::Scalar(..)) => {
self.pop_state();
if let QueuedToken(mark, QueuedTokenType::Scalar(style, v)) = self.fetch_token() {
Ok(Self::attach_tag_start(
Event::Scalar(v, style, anchor_id, tag),
mark,
tag_start,
))
} else {
unreachable!()
}
}
QueuedToken(mark, QueuedTokenType::FlowSequenceStart) => {
self.state = State::FlowSequenceFirstEntry;
self.skip();
Ok(Self::attach_tag_start(
Event::SequenceStart(StructureStyle::Flow, anchor_id, tag),
mark,
tag_start,
))
}
QueuedToken(mark, QueuedTokenType::FlowMappingStart) => {
self.state = State::FlowMappingFirstKey;
self.skip();
Ok(Self::attach_tag_start(
Event::MappingStart(StructureStyle::Flow, anchor_id, tag),
mark,
tag_start,
))
}
QueuedToken(mark, QueuedTokenType::BlockSequenceStart) if block => {
self.state = State::BlockSequenceFirstEntry;
self.skip();
Ok(Self::attach_tag_start(
Event::SequenceStart(StructureStyle::Block, anchor_id, tag),
mark,
tag_start,
))
}
QueuedToken(mark, QueuedTokenType::BlockMappingStart) if block => {
self.state = State::BlockMappingFirstKey;
self.skip();
Ok(Self::attach_tag_start(
Event::MappingStart(StructureStyle::Block, anchor_id, tag),
mark,
tag_start,
))
}
QueuedToken(mark, _) if tag.is_some() || anchor_id > 0 => {
self.pop_state();
let span = property_end.map_or_else(|| Span::empty(mark.start), Span::empty);
Ok(Self::attach_tag_start(
Event::empty_scalar_with_anchor(anchor_id, tag),
span,
tag_start,
))
}
QueuedToken(span, _) => {
let info = match self.state {
State::FlowSequenceFirstEntry | State::FlowSequenceEntry => {
"unexpected EOF while parsing a flow sequence"
}
State::FlowMappingFirstKey
| State::FlowMappingKey
| State::FlowMappingValue
| State::FlowMappingEmptyValue => "unexpected EOF while parsing a flow mapping",
State::FlowSequenceEntryMappingKey
| State::FlowSequenceEntryMappingValue
| State::FlowSequenceEntryMappingEnd
| State::FlowNode => "unexpected EOF while parsing an implicit flow mapping",
State::BlockSequenceFirstEntry
| State::BlockSequenceEntry
| State::BlockNode => "unexpected EOF while parsing a block sequence",
State::BlockMappingFirstKey
| State::BlockMappingKey
| State::BlockMappingValue
| State::BlockNodeOrIndentlessSequence => {
"unexpected EOF while parsing a block mapping"
}
_ => "while parsing a node, did not find expected node content",
};
Err(ScanError::new_str(span.start, info))
}
}
}
fn block_mapping_key<'a>(&mut self, _first: bool) -> ParseResult<'a>
where
'input: 'a,
{
match *self.peek_token()? {
QueuedToken(_, QueuedTokenType::Key) => {
if let QueuedToken(key_span, QueuedTokenType::Key) = *self.peek_token()? {
self.pending_key_indent = Some(key_span.start.col());
}
self.skip();
if let Some(comment) = self.maybe_next_comment_event()? {
self.state = State::BlockMappingKeyNode;
Ok(comment)
} else {
self.block_mapping_key_node()
}
}
QueuedToken(mark, QueuedTokenType::Value) => {
self.state = State::BlockMappingValue;
Ok((Event::empty_scalar(), Span::empty(mark.start)))
}
QueuedToken(mark, QueuedTokenType::BlockEnd) => {
self.pop_state();
self.skip();
Ok((Event::MappingEnd, mark))
}
QueuedToken(span, _) => Err(ScanError::new_str(
span.start,
"while parsing a block mapping, did not find expected key",
)),
}
}
fn block_mapping_key_node<'a>(&mut self) -> ParseResult<'a>
where
'input: 'a,
{
if let QueuedToken(
mark,
QueuedTokenType::Key | QueuedTokenType::Value | QueuedTokenType::BlockEnd,
) = *self.peek_token()?
{
self.state = State::BlockMappingValue;
Ok((Event::empty_scalar(), Span::empty(mark.start)))
} else {
self.defer_parse_node(
State::BlockNodeOrIndentlessSequence,
State::BlockMappingValue,
true,
true,
)
}
}
fn block_mapping_value<'a>(&mut self) -> ParseResult<'a>
where
'input: 'a,
{
match *self.peek_token()? {
QueuedToken(mark, QueuedTokenType::Value) => {
self.skip();
let comments = self.next_comment_events()?;
if comments.is_empty() {
self.block_mapping_value_node_with_empty_span(mark)
} else if let Ok(QueuedToken(
_,
QueuedTokenType::Key | QueuedTokenType::Value | QueuedTokenType::BlockEnd,
)) = self.peek_token()
{
self.state = State::BlockMappingKey;
Ok(self.queue_event_by_span(comments, (Event::empty_scalar(), mark)))
} else {
self.pending_empty_scalar_span = Some(mark);
self.state = State::BlockMappingValueNode;
Ok(self.queue_tail_and_return_first(comments))
}
}
QueuedToken(mark, _) => {
self.state = State::BlockMappingKey;
Ok((Event::empty_scalar(), Span::empty(mark.start)))
}
}
}
fn block_mapping_value_node<'a>(&mut self) -> ParseResult<'a>
where
'input: 'a,
{
let mark = match self.pending_empty_scalar_span.take() {
Some(mark) => mark,
None => Span::empty(self.peek_token()?.0.start),
};
self.block_mapping_value_node_with_empty_span(mark)
}
fn block_mapping_value_node_with_empty_span<'a>(&mut self, mark: Span) -> ParseResult<'a>
where
'input: 'a,
{
if let QueuedToken(
_,
QueuedTokenType::Key | QueuedTokenType::Value | QueuedTokenType::BlockEnd,
) = *self.peek_token()?
{
self.state = State::BlockMappingKey;
Ok((Event::empty_scalar(), mark))
} else {
self.defer_parse_node(
State::BlockNodeOrIndentlessSequence,
State::BlockMappingKey,
true,
true,
)
}
}
fn flow_mapping_key<'a>(&mut self, first: bool) -> ParseResult<'a>
where
'input: 'a,
{
let span: Span =
if let QueuedToken(mark, QueuedTokenType::FlowMappingEnd) = *self.peek_token()? {
mark
} else {
if !first {
match *self.peek_token()? {
QueuedToken(_, QueuedTokenType::FlowEntry) => {
self.skip();
if let Some(comment) = self.maybe_next_comment_event()? {
self.state = State::FlowMappingFirstKey;
return Ok(comment);
}
}
QueuedToken(span, _) => {
return Err(ScanError::new_str(
span.start,
"while parsing a flow mapping, did not find expected ',' or '}'",
))
}
}
}
match *self.peek_token()? {
QueuedToken(_, QueuedTokenType::Key) => {
self.skip();
if let Some(comment) = self.maybe_next_comment_event()? {
self.state = State::FlowMappingKeyNode;
return Ok(comment);
}
return self.flow_mapping_key_node();
}
QueuedToken(marker, QueuedTokenType::Value) => {
self.state = State::FlowMappingValue;
return Ok((Event::empty_scalar(), Span::empty(marker.start)));
}
QueuedToken(_, QueuedTokenType::FlowMappingEnd) => (),
_ => {
return self.defer_parse_node(
State::FlowNode,
State::FlowMappingEmptyValue,
false,
false,
);
}
}
self.peek_token()?.0
};
self.pop_state();
self.skip();
Ok((Event::MappingEnd, span))
}
fn flow_mapping_key_node<'a>(&mut self) -> ParseResult<'a>
where
'input: 'a,
{
if let QueuedToken(
mark,
QueuedTokenType::Value | QueuedTokenType::FlowEntry | QueuedTokenType::FlowMappingEnd,
) = *self.peek_token()?
{
self.state = State::FlowMappingValue;
Ok((Event::empty_scalar(), Span::empty(mark.start)))
} else {
self.defer_parse_node(State::FlowNode, State::FlowMappingValue, false, false)
}
}
fn flow_mapping_value<'a>(&mut self, empty: bool) -> ParseResult<'a>
where
'input: 'a,
{
let span: Span = {
if empty {
let QueuedToken(mark, _) = *self.peek_token()?;
self.state = State::FlowMappingKey;
return Ok((Event::empty_scalar(), Span::empty(mark.start)));
}
match *self.peek_token()? {
QueuedToken(span, QueuedTokenType::Value) => {
self.skip();
let comments = self.next_comment_events()?;
if comments.is_empty() {
return self.flow_mapping_value_node_with_empty_span(span);
}
if let Ok(QueuedToken(
_,
QueuedTokenType::FlowEntry | QueuedTokenType::FlowMappingEnd,
)) = self.peek_token()
{
self.state = State::FlowMappingKey;
return Ok(
self.queue_event_by_span(comments, (Event::empty_scalar(), span))
);
}
self.pending_empty_scalar_span = Some(span);
self.state = State::FlowMappingValueNode;
return Ok(self.queue_tail_and_return_first(comments));
}
QueuedToken(marker, _) => Span::empty(marker.start),
}
};
self.state = State::FlowMappingKey;
Ok((Event::empty_scalar(), span))
}
fn flow_mapping_value_node<'a>(&mut self) -> ParseResult<'a>
where
'input: 'a,
{
let mark = match self.pending_empty_scalar_span.take() {
Some(mark) => mark,
None => Span::empty(self.peek_token()?.0.start),
};
self.flow_mapping_value_node_with_empty_span(mark)
}
fn flow_mapping_value_node_with_empty_span<'a>(&mut self, mark: Span) -> ParseResult<'a>
where
'input: 'a,
{
match self.peek_token()?.1 {
QueuedTokenType::FlowEntry | QueuedTokenType::FlowMappingEnd => {
self.state = State::FlowMappingKey;
Ok((Event::empty_scalar(), mark))
}
_ => self.defer_parse_node(State::FlowNode, State::FlowMappingKey, false, false),
}
}
fn flow_sequence_entry<'a>(&mut self, first: bool) -> ParseResult<'a>
where
'input: 'a,
{
match *self.peek_token()? {
QueuedToken(mark, QueuedTokenType::FlowSequenceEnd) => {
self.pop_state();
self.skip();
return Ok((Event::SequenceEnd, mark));
}
QueuedToken(_, QueuedTokenType::FlowEntry) if !first => {
self.skip();
if let Some(comment) = self.maybe_next_comment_event()? {
self.state = State::FlowSequenceFirstEntry;
return Ok(comment);
}
}
QueuedToken(span, _) if !first => {
return Err(ScanError::new_str(
span.start,
"while parsing a flow sequence, expected ',' or ']'",
));
}
_ => { }
}
match *self.peek_token()? {
QueuedToken(mark, QueuedTokenType::FlowSequenceEnd) => {
self.pop_state();
self.skip();
Ok((Event::SequenceEnd, mark))
}
QueuedToken(mark, QueuedTokenType::Key) => {
self.state = State::FlowSequenceEntryMappingKey;
self.skip();
Ok((Event::MappingStart(StructureStyle::Flow, 0, None), mark))
}
_ => self.defer_parse_node(State::FlowNode, State::FlowSequenceEntry, false, false),
}
}
fn indentless_sequence_entry<'a>(&mut self) -> ParseResult<'a>
where
'input: 'a,
{
match *self.peek_token()? {
QueuedToken(mark, QueuedTokenType::BlockEntry) => {
self.skip();
let comments = self.next_comment_events()?;
if comments.is_empty() {
self.indentless_sequence_entry_node_with_empty_span(mark)
} else if let Ok(QueuedToken(
_,
QueuedTokenType::BlockEntry
| QueuedTokenType::Key
| QueuedTokenType::Value
| QueuedTokenType::BlockEnd,
)) = self.peek_token()
{
self.state = State::IndentlessSequenceEntry;
Ok(self.queue_event_by_span(comments, (Event::empty_scalar(), mark)))
} else {
self.pending_empty_scalar_span = Some(mark);
self.state = State::IndentlessSequenceEntryNode;
Ok(self.queue_tail_and_return_first(comments))
}
}
QueuedToken(mark, _) => {
self.pop_state();
Ok((Event::SequenceEnd, mark))
}
}
}
fn indentless_sequence_entry_node<'a>(&mut self) -> ParseResult<'a>
where
'input: 'a,
{
let mark = match self.pending_empty_scalar_span.take() {
Some(mark) => mark,
None => Span::empty(self.peek_token()?.0.start),
};
self.indentless_sequence_entry_node_with_empty_span(mark)
}
fn indentless_sequence_entry_node_with_empty_span<'a>(&mut self, mark: Span) -> ParseResult<'a>
where
'input: 'a,
{
if let QueuedToken(
_,
QueuedTokenType::BlockEntry
| QueuedTokenType::Key
| QueuedTokenType::Value
| QueuedTokenType::BlockEnd,
) = *self.peek_token()?
{
self.state = State::IndentlessSequenceEntry;
Ok((Event::empty_scalar(), mark))
} else {
self.defer_parse_node(
State::BlockNode,
State::IndentlessSequenceEntry,
true,
false,
)
}
}
fn block_sequence_entry<'a>(&mut self, _first: bool) -> ParseResult<'a>
where
'input: 'a,
{
match *self.peek_token()? {
QueuedToken(mark, QueuedTokenType::BlockEnd) => {
self.pop_state();
self.skip();
Ok((Event::SequenceEnd, mark))
}
QueuedToken(mark, QueuedTokenType::BlockEntry) => {
self.skip();
let comments = self.next_comment_events()?;
if comments.is_empty() {
self.block_sequence_entry_node_with_empty_span(mark)
} else if let Ok(QueuedToken(
_,
QueuedTokenType::BlockEntry | QueuedTokenType::BlockEnd,
)) = self.peek_token()
{
self.state = State::BlockSequenceEntry;
Ok(self.queue_event_by_span(comments, (Event::empty_scalar(), mark)))
} else {
self.pending_empty_scalar_span = Some(mark);
self.state = State::BlockSequenceEntryNode;
Ok(self.queue_tail_and_return_first(comments))
}
}
QueuedToken(span, _) => Err(ScanError::new_str(
span.start,
"while parsing a block collection, did not find expected '-' indicator",
)),
}
}
fn block_sequence_entry_node<'a>(&mut self) -> ParseResult<'a>
where
'input: 'a,
{
let mark = match self.pending_empty_scalar_span.take() {
Some(mark) => mark,
None => Span::empty(self.peek_token()?.0.start),
};
self.block_sequence_entry_node_with_empty_span(mark)
}
fn block_sequence_entry_node_with_empty_span<'a>(&mut self, mark: Span) -> ParseResult<'a>
where
'input: 'a,
{
if let QueuedToken(_, QueuedTokenType::BlockEntry | QueuedTokenType::BlockEnd) =
*self.peek_token()?
{
self.state = State::BlockSequenceEntry;
Ok((Event::empty_scalar(), mark))
} else {
self.defer_parse_node(State::BlockNode, State::BlockSequenceEntry, true, false)
}
}
fn flow_sequence_entry_mapping_key<'a>(&mut self) -> ParseResult<'a>
where
'input: 'a,
{
if let QueuedToken(mark, QueuedTokenType::FlowEntry | QueuedTokenType::FlowSequenceEnd) =
*self.peek_token()?
{
self.state = State::FlowSequenceEntryMappingValue;
Ok((Event::empty_scalar(), Span::empty(mark.start)))
} else {
self.defer_parse_node(
State::FlowNode,
State::FlowSequenceEntryMappingValue,
false,
false,
)
}
}
fn flow_sequence_entry_mapping_value<'a>(&mut self) -> ParseResult<'a>
where
'input: 'a,
{
match *self.peek_token()? {
QueuedToken(_, QueuedTokenType::Value) => {
self.skip();
if let Some(comment) = self.maybe_next_comment_event()? {
self.state = State::FlowSequenceEntryMappingValueNode;
Ok(comment)
} else {
self.flow_sequence_entry_mapping_value_node()
}
}
QueuedToken(mark, _) => {
self.state = State::FlowSequenceEntryMappingEnd;
Ok((Event::empty_scalar(), Span::empty(mark.start)))
}
}
}
fn flow_sequence_entry_mapping_value_node<'a>(&mut self) -> ParseResult<'a>
where
'input: 'a,
{
let QueuedToken(span, ref tok) = *self.peek_token()?;
if matches!(
tok,
QueuedTokenType::FlowEntry | QueuedTokenType::FlowSequenceEnd
) {
self.state = State::FlowSequenceEntryMappingEnd;
Ok((Event::empty_scalar(), Span::empty(span.start)))
} else {
self.defer_parse_node(
State::FlowNode,
State::FlowSequenceEntryMappingEnd,
false,
false,
)
}
}
#[allow(clippy::unnecessary_wraps)]
fn flow_sequence_entry_mapping_end<'a>(&mut self) -> ParseResult<'a>
where
'input: 'a,
{
self.state = State::FlowSequenceEntry;
let QueuedToken(span, _) = *self.peek_token()?;
Ok((Event::MappingEnd, Span::empty(span.start)))
}
fn resolve_tag(
&self,
span: Span,
handle: &Cow<'input, str>,
suffix: Cow<'input, str>,
) -> Result<Cow<'input, Tag>, ScanError> {
let original_handle = handle.to_string();
let suffix = suffix.into_owned();
let tag = if handle == "!!" {
Tag::with_original_handle(
self.tags
.get("!!")
.map_or_else(|| "tag:yaml.org,2002:".to_string(), ToString::to_string),
suffix,
original_handle,
)
} else if handle.is_empty() && suffix == "!" {
match self.tags.get("!") {
Some(prefix) => Tag::with_original_handle(prefix.clone(), suffix, original_handle),
None => Tag::with_original_handle(String::new(), suffix, original_handle),
}
} else {
let prefix = self.tags.get(&**handle);
if let Some(prefix) = prefix {
Tag::with_original_handle(prefix.clone(), suffix, original_handle)
} else {
if handle.len() >= 2 && handle.starts_with('!') && handle.ends_with('!') {
return Err(ScanError::new_str(span.start, "the handle wasn't declared"));
}
Tag::with_original_handle(handle.to_string(), suffix, original_handle)
}
};
Ok(Cow::Owned(tag))
}
}
impl<'input, T: BorrowedInput<'input>> ParserTrait<'input> for Parser<'input, T> {
fn peek(&mut self) -> Option<Result<&(Event<'input>, Span), ScanError>> {
if let Some(ref x) = self.current {
Some(Ok(x))
} else if let Some(error) = &self.current_error {
Some(Err(error.clone()))
} else {
if self.stream_end_emitted {
return None;
}
match self.next_event_impl() {
Ok(token) => self.current = Some(token),
Err(error) => {
self.current_error = Some(error.clone());
return Some(Err(error));
}
}
self.current.as_ref().map(Ok)
}
}
fn next_event(&mut self) -> Option<ParseResult<'input>> {
if let Some(error) = self.current_error.take() {
self.stream_end_emitted = true;
return Some(Err(error));
}
if self.stream_end_emitted {
return None;
}
let tok = self.next_event_impl();
if matches!(tok, Ok((Event::StreamEnd, _)) | Err(_)) {
self.stream_end_emitted = true;
}
Some(tok)
}
fn load<R: SpannedEventReceiver<'input>>(
&mut self,
recv: &mut R,
multi: bool,
) -> Result<(), ScanError> {
let mut recv = InfallibleSpannedReceiver(recv);
into_scan_result(ParserTrait::try_load(self, &mut recv, multi))
}
fn try_load<R: TrySpannedEventReceiver<'input>>(
&mut self,
recv: &mut R,
multi: bool,
) -> Result<(), TryLoadError<R::Error>> {
let stream_start_buffered = matches!(self.current.as_ref(), Some((Event::StreamStart, _)));
if !self.scanner.stream_started() || stream_start_buffered {
let (ev, span) = self.next_event_impl()?;
if ev != Event::StreamStart {
return Err(TryLoadError::scan(ScanError::new_str(
span.start,
"did not find expected <stream-start>",
)));
}
try_emit(recv, ev, span)?;
}
let has_buffered_result = self.current.is_some()
|| self.current_error.is_some()
|| !self.queued_events.is_empty();
if self.scanner.stream_ended() && !has_buffered_result {
try_emit(recv, Event::StreamEnd, Span::empty(self.scanner.mark()))?;
self.stream_end_emitted = true;
return Ok(());
}
loop {
let (ev, span) = if let Some(error) = self.current_error.take() {
self.stream_end_emitted = true;
return Err(TryLoadError::scan(error));
} else {
self.next_event_impl()?
};
let is_doc_end = matches!(ev, Event::DocumentEnd);
let is_stream_end = matches!(ev, Event::StreamEnd);
try_emit(recv, ev, span)?;
if is_stream_end {
self.stream_end_emitted = true;
return Ok(());
}
if !multi && is_doc_end {
return Ok(());
}
}
}
}
impl<'input, T: BorrowedInput<'input>> Iterator for Parser<'input, T> {
type Item = Result<(Event<'input>, Span), ScanError>;
fn next(&mut self) -> Option<Self::Item> {
self.next_event()
}
}
#[cfg(test)]
mod test {
use alloc::{
borrow::{Cow, ToOwned},
string::{String, ToString},
vec::Vec,
};
use core::{error::Error as _, fmt};
use crate::scanner::{Marker, ScalarStyle, ScanError, Span};
use super::{
Event, EventReceiver, Parser, State, StructureStyle, Tag, TryEventReceiver, TryLoadError,
TrySpannedEventReceiver, YamlVersion,
};
#[derive(Default)]
struct CollectingSink<'input> {
events: Vec<Event<'input>>,
}
impl<'input> EventReceiver<'input> for CollectingSink<'input> {
fn on_event(&mut self, ev: Event<'input>) {
self.events.push(ev);
}
}
fn first_error_info(input: &str) -> String {
for event in Parser::new_from_str(input) {
if let Err(err) = event {
return err.info().to_owned();
}
}
panic!("expected parser error")
}
fn first_tagged_scalar_tag(input: &str) -> Tag {
Parser::new_from_str(input)
.find_map(|event| match event.expect("input should parse").0 {
Event::Scalar(_, _, _, Some(tag)) => Some(tag.into_owned()),
_ => None,
})
.expect("expected tagged scalar")
}
#[test]
fn deferred_parse_node_can_emit_comment_before_flow_node() {
let mut parser = Parser::new_from_str("# deferred\nvalue\n");
assert_eq!(parser.stream_start().unwrap().0, Event::StreamStart);
assert_eq!(
parser.document_start(true).unwrap().0,
Event::DocumentStart(false, None)
);
let (event, _) = parser
.defer_parse_node(State::FlowNode, State::FlowMappingKey, false, false)
.unwrap();
assert!(matches!(event, Event::Comment(text, _) if text == " deferred"));
assert_eq!(parser.state, State::FlowNode);
}
#[test]
fn queued_node_event_gets_pending_key_indent() {
let mut parser = Parser::new_from_str("");
let span = Span::empty(Marker::new(0, 1, 0));
parser.pending_key_indent = Some(3);
parser
.queued_events
.push_back((Event::SequenceStart(StructureStyle::Block, 0, None), span));
let (event, span) = parser.next_event_impl().unwrap();
assert!(matches!(
event,
Event::SequenceStart(StructureStyle::Block, 0, None)
));
assert_eq!(span.indent, Some(3));
assert_eq!(parser.pending_key_indent, None);
}
#[test]
fn state_machine_handles_deferred_flow_node_states() {
let mut parser = Parser::new_from_str("value\n");
assert_eq!(parser.stream_start().unwrap().0, Event::StreamStart);
assert_eq!(
parser.document_start(true).unwrap().0,
Event::DocumentStart(false, None)
);
parser.state = State::FlowNode;
parser.push_state(State::End);
let (event, _) = parser.state_machine().unwrap();
assert!(matches!(event, Event::Scalar(value, ..) if value == "value"));
let mut parser = Parser::new_from_str("value\n");
assert_eq!(parser.stream_start().unwrap().0, Event::StreamStart);
assert_eq!(
parser.document_start(true).unwrap().0,
Event::DocumentStart(false, None)
);
parser.state = State::FlowSequenceEntryMappingValueNode;
let (event, _) = parser.state_machine().unwrap();
assert!(matches!(event, Event::Scalar(value, ..) if value == "value"));
}
#[test]
fn display_resolved_core_tag_without_extra_bang() {
let tag = Tag::with_original_handle("tag:yaml.org,2002:", "str", "!!");
assert_eq!(tag.to_string(), "tag:yaml.org,2002:str");
}
#[test]
fn tag_helpers_distinguish_core_and_local_tags() {
let core = Tag::with_original_handle("tag:yaml.org,2002:", "int", "!!");
let local = Tag::new("!", "thing");
let non_specific = Tag::with_original_handle("", "!", "");
let verbatim = Tag::with_original_handle("", "tag:example.com,2000:thing", "");
let unknown_yaml_org = Tag::with_original_handle("", "tag:yaml.org,2002:application", "");
assert_eq!(core.core_suffix(), Some("int"));
assert!(core.is_yaml_core_schema());
assert!(core.is_yaml_core_schema_tag("int"));
assert!(!core.is_yaml_core_schema_tag("str"));
assert!(!core.is_custom());
assert_eq!(core.parts(), ("tag:yaml.org,2002:", "int"));
assert_eq!(core.original_parts(), ("!!", "int"));
assert_eq!(core.original(), "!!int");
assert_eq!(local.core_suffix(), None);
assert!(!local.is_yaml_core_schema());
assert!(!local.is_yaml_core_schema_tag("thing"));
assert!(local.is_custom());
assert_eq!(local.parts(), ("!", "thing"));
assert_eq!(local.original_parts(), ("!", "thing"));
assert_eq!(local.original(), "!thing");
assert_eq!(local.to_string(), "!thing");
assert_eq!(non_specific.parts(), ("", "!"));
assert_eq!(non_specific.original_parts(), ("", "!"));
assert_eq!(non_specific.original(), "!");
assert_eq!(verbatim.parts(), ("", "tag:example.com,2000:thing"));
assert_eq!(
verbatim.original_parts(),
("", "tag:example.com,2000:thing")
);
assert_eq!(verbatim.original(), "!<tag:example.com,2000:thing>");
assert_eq!(unknown_yaml_org.core_suffix(), None);
assert!(!unknown_yaml_org.is_yaml_core_schema());
assert!(unknown_yaml_org.is_custom());
}
#[test]
fn core_suffix_uses_resolved_tag_uri_for_common_spellings() {
let cases = [
("shorthand", "v: !!int 1\n", ("tag:yaml.org,2002:", "int")),
(
"verbatim",
"v: !<tag:yaml.org,2002:int> 1\n",
("", "tag:yaml.org,2002:int"),
),
(
"full prefix",
"%TAG !e! tag:yaml.org,2002:\n---\nv: !e!int 1\n",
("tag:yaml.org,2002:", "int"),
),
(
"mid-split",
"%TAG !m! tag:yaml.org,2002:i\n---\nv: !m!nt 1\n",
("tag:yaml.org,2002:i", "nt"),
),
];
for (label, input, expected_parts) in cases {
let tag = first_tagged_scalar_tag(input);
assert_eq!(tag.parts(), expected_parts, "{label}");
assert_eq!(tag.core_suffix(), Some("int"), "{label}");
assert!(tag.is_yaml_core_schema(), "{label}");
assert!(tag.is_yaml_core_schema_tag("int"), "{label}");
assert!(!tag.is_yaml_core_schema_tag("str"), "{label}");
assert!(!tag.is_custom(), "{label}");
}
}
#[test]
fn core_suffix_rejects_non_core_yaml_org_tags() {
let cases = [
"binary",
"merge",
"omap",
"pairs",
"set",
"timestamp",
"value",
"yaml",
];
for suffix in cases {
let tag = Tag::with_original_handle("tag:yaml.org,2002:", suffix, "!!");
assert_eq!(tag.core_suffix(), None, "{suffix}");
assert!(!tag.is_yaml_core_schema(), "{suffix}");
assert!(tag.is_custom(), "{suffix}");
}
}
#[test]
fn core_suffix_rejects_non_core_tags() {
let cases = [
("local", "v: !local 1\n"),
("verbatim custom", "v: !<tag:example.com,2000:int> 1\n"),
(
"custom directive",
"%TAG !e! tag:example.com,2000:\n---\nv: !e!int 1\n",
),
(
"overridden secondary handle",
"%TAG !! tag:example.com,2000:app/\n---\nv: !!int 1\n",
),
];
for (label, input) in cases {
let tag = first_tagged_scalar_tag(input);
assert_eq!(tag.core_suffix(), None, "{label}");
assert!(!tag.is_yaml_core_schema(), "{label}");
assert!(!tag.is_yaml_core_schema_tag("int"), "{label}");
assert!(tag.is_custom(), "{label}");
}
}
#[test]
fn suffix_in_namespace_resolves_across_spellings() {
const NS: &str = "tag:yaml.org,2002:";
let shorthand = Tag::with_original_handle(NS, "omap", "!!");
let verbatim = Tag::with_original_handle("", "tag:yaml.org,2002:omap", "");
let mid_split = Tag::with_original_handle("tag:yaml.org,2002:o", "map", "!o!");
let inside_split = Tag::with_original_handle("tag:yaml.org,", "2002:omap", "!y!");
for tag in [&shorthand, &verbatim, &mid_split, &inside_split] {
assert_eq!(tag.suffix_in_namespace(NS).as_deref(), Some("omap"));
assert_eq!(tag.core_suffix(), None);
}
assert!(matches!(
shorthand.suffix_in_namespace(NS),
Some(Cow::Borrowed(_))
));
assert!(matches!(
verbatim.suffix_in_namespace(NS),
Some(Cow::Borrowed(_))
));
assert!(matches!(
inside_split.suffix_in_namespace(NS),
Some(Cow::Borrowed(_))
));
assert!(matches!(
mid_split.suffix_in_namespace(NS),
Some(Cow::Owned(_))
));
let merge = Tag::with_original_handle(NS, "merge", "!!");
assert_eq!(merge.suffix_in_namespace(NS).as_deref(), Some("merge"));
assert_eq!(merge.core_suffix(), None);
assert_eq!(
Tag::new(NS, "int").suffix_in_namespace(NS).as_deref(),
Some("int")
);
assert_eq!(Tag::new("!", "omap").suffix_in_namespace(NS), None);
assert_eq!(
Tag::with_original_handle("", "tag:example.com,2000:omap", "").suffix_in_namespace(NS),
None
);
}
#[test]
fn attach_tag_start_applies_marker_to_span() {
let event = Event::Scalar("value".into(), ScalarStyle::Plain, 0, None);
let span = Span::new(Marker::new(6, 1, 6), Marker::new(11, 1, 11));
let tag_start = Marker::new(0, 1, 0);
let (attached_event, attached_span) =
Parser::<crate::input::str::StrInput<'_>>::attach_tag_start(
event.clone(),
span,
Some(tag_start),
);
assert_eq!(attached_event, event);
assert_eq!(attached_span.start, span.start);
assert_eq!(attached_span.end, span.end);
assert_eq!(attached_span.tag_start(), Some(tag_start));
}
#[test]
fn event_inspection_helpers_report_node_metadata() {
let tag = Tag::new("!", "thing");
let scalar = Event::Scalar(
"value".into(),
ScalarStyle::DoubleQuoted,
7,
Some(Cow::Borrowed(&tag)),
);
let sequence =
Event::SequenceStart(StructureStyle::Block, 8, Some(Cow::Owned(tag.clone())));
let mapping = Event::MappingStart(StructureStyle::Block, 9, Some(Cow::Borrowed(&tag)));
assert_eq!(scalar.anchor_id(), Some(7));
assert_eq!(scalar.alias_id(), None);
assert_eq!(scalar.tag(), Some(&tag));
assert_eq!(scalar.scalar(), Some(("value", ScalarStyle::DoubleQuoted)));
assert!(scalar.is_node());
assert_eq!(sequence.anchor_id(), Some(8));
assert_eq!(sequence.alias_id(), None);
assert_eq!(sequence.tag(), Some(&tag));
assert_eq!(sequence.scalar(), None);
assert!(sequence.is_node());
assert_eq!(mapping.anchor_id(), Some(9));
assert_eq!(mapping.alias_id(), None);
assert_eq!(mapping.tag(), Some(&tag));
assert_eq!(mapping.scalar(), None);
assert!(mapping.is_node());
let alias = Event::Alias(10);
assert_eq!(alias.anchor_id(), None);
assert_eq!(alias.alias_id(), Some(10));
assert_eq!(alias.tag(), None);
assert_eq!(alias.scalar(), None);
assert!(alias.is_node());
let unanchored_scalar = Event::Scalar("x".into(), ScalarStyle::Plain, 0, None);
assert_eq!(unanchored_scalar.anchor_id(), None);
assert_eq!(unanchored_scalar.alias_id(), None);
let stream_start = Event::StreamStart;
assert_eq!(stream_start.anchor_id(), None);
assert_eq!(stream_start.alias_id(), None);
assert_eq!(stream_start.tag(), None);
assert_eq!(stream_start.scalar(), None);
assert!(!stream_start.is_node());
}
#[test]
fn test_peek_eq_parse() {
let s = "
a0 bb: val
a1: &x
b1: 4
b2: d
a2: 4
a3: [1, 2, 3]
a4:
- [a1, a2]
- 2
a5: *x
";
let mut p = Parser::new_from_str(s);
loop {
let event_peek = p.peek().unwrap().unwrap().clone();
let event = p.next_event().unwrap().unwrap();
assert_eq!(event, event_peek);
if event.0 == Event::StreamEnd {
break;
}
}
}
#[test]
fn test_repeated_peek_returns_buffered_event() {
let mut parser = Parser::new_from_str("key: value\n");
let first_peek = parser.peek().unwrap().unwrap().clone();
let second_peek = parser.peek().unwrap().unwrap().clone();
let next = parser.next_event().unwrap().unwrap();
assert_eq!(first_peek, second_peek);
assert_eq!(first_peek, next);
}
#[test]
fn test_peek_surfaces_scan_error_without_consuming_stream_end_state() {
let mut parser = Parser::new_from_str("a: [1, 2");
loop {
match parser.peek() {
Some(Ok(_)) => {
parser.next_event().unwrap().unwrap();
}
Some(Err(error)) => {
assert_eq!(error.info(), "unclosed bracket '['");
break;
}
None => panic!("expected parse error"),
}
}
}
#[test]
fn test_iterator_terminates_after_scan_error() {
let parser = Parser::new_from_str("foo:\n bar\ninvalid\n");
let mut errors = 0usize;
let mut events = 0usize;
for item in parser {
events += 1;
if item.is_err() {
errors += 1;
}
assert!(
events < 1000,
"parser iterator did not terminate after a scan error"
);
}
assert_eq!(errors, 1);
}
#[test]
fn test_iterator_terminates_after_node_property_error() {
let parser = Parser::new_from_str("- *nope\n- 2\n");
let mut errors = 0usize;
let mut saw_later_node = false;
let mut events = 0usize;
for item in parser {
events += 1;
match item {
Ok((Event::Scalar(value, ..), _)) if value == "2" => saw_later_node = true,
Ok(_) => {}
Err(error) => {
assert_eq!(error.info(), "while parsing node, found unknown anchor");
errors += 1;
}
}
assert!(
events < 1000,
"parser iterator did not terminate after a node-property error"
);
}
assert_eq!(errors, 1);
assert!(!saw_later_node, "parser resumed after the alias error");
}
#[test]
fn test_peeked_scan_error_is_returned_once_by_next_event() {
let mut parser = Parser::new_from_str("a: [1, 2");
let first_error = loop {
match parser.peek() {
Some(Ok(_)) => {
parser.next_event().unwrap().unwrap();
}
Some(Err(error)) => break error,
None => panic!("expected parse error"),
}
};
let Some(Err(second_error)) = parser.peek() else {
panic!("expected cached parse error");
};
assert_eq!(first_error, second_error);
assert_eq!(parser.next_event().unwrap().unwrap_err(), first_error);
assert!(parser.next_event().is_none());
assert!(parser.peek().is_none());
}
#[test]
fn test_peeked_node_property_error_is_stable_and_terminal() {
let mut parser = Parser::new_from_str("a: *nope\nb: 2\n");
for _ in 0..4 {
parser.next_event().unwrap().unwrap();
}
let Some(Err(first_error)) = parser.peek() else {
panic!("expected unknown alias error");
};
let Some(Err(second_error)) = parser.peek() else {
panic!("expected cached unknown alias error");
};
assert_eq!(first_error, second_error);
assert_eq!(
first_error.info(),
"while parsing node, found unknown anchor"
);
assert_eq!(parser.next_event().unwrap().unwrap_err(), first_error);
assert!(parser.next_event().is_none());
assert!(parser.peek().is_none());
}
#[test]
fn test_peek_and_next_return_none_after_stream_end() {
let mut parser = Parser::new_from_str("");
assert!(matches!(
parser.next_event().unwrap().unwrap().0,
Event::StreamStart
));
assert!(matches!(
parser.next_event().unwrap().unwrap().0,
Event::StreamEnd
));
assert!(parser.next_event().is_none());
assert!(parser.peek().is_none());
}
#[test]
fn test_load_after_stream_already_ended_emits_stream_end() {
let mut parser = Parser::new_from_str("");
while parser.next_event().is_some() {}
let mut sink = CollectingSink::default();
parser.load(&mut sink, true).unwrap();
assert_eq!(sink.events, vec![Event::StreamEnd]);
}
#[test]
fn test_load_full_stream_fuses_iterator_after_stream_end() {
let mut parser = Parser::new_from_str("a: 1\n");
let mut sink = CollectingSink::default();
parser.load(&mut sink, true).unwrap();
assert!(matches!(sink.events.last(), Some(Event::StreamEnd)));
assert!(parser.next_event().is_none());
assert!(parser.peek().is_none());
}
#[test]
fn test_load_after_peek_delivers_buffered_document_end_before_stream_end() {
let mut parser = Parser::new_from_str("a");
for _ in 0..3 {
parser.next_event().unwrap().unwrap();
}
assert_eq!(parser.peek().unwrap().unwrap().0, Event::DocumentEnd);
let mut sink = CollectingSink::default();
parser.load(&mut sink, true).unwrap();
assert_eq!(sink.events, vec![Event::DocumentEnd, Event::StreamEnd]);
assert!(parser.next_event().is_none());
}
#[test]
fn test_load_visits_nested_collection_events() {
let mut parser = Parser::new_from_str("root:\n - item: value\n - [a, b]\n");
let mut sink = CollectingSink::default();
parser.load(&mut sink, true).unwrap();
assert_eq!(
sink.events,
vec![
Event::StreamStart,
Event::DocumentStart(false, None),
Event::MappingStart(StructureStyle::Block, 0, None),
Event::Scalar("root".into(), ScalarStyle::Plain, 0, None),
Event::SequenceStart(StructureStyle::Block, 0, None),
Event::MappingStart(StructureStyle::Block, 0, None),
Event::Scalar("item".into(), ScalarStyle::Plain, 0, None),
Event::Scalar("value".into(), ScalarStyle::Plain, 0, None),
Event::MappingEnd,
Event::SequenceStart(StructureStyle::Flow, 0, None),
Event::Scalar("a".into(), ScalarStyle::Plain, 0, None),
Event::Scalar("b".into(), ScalarStyle::Plain, 0, None),
Event::SequenceEnd,
Event::SequenceEnd,
Event::MappingEnd,
Event::DocumentEnd,
Event::StreamEnd,
]
);
}
#[derive(Clone, Debug, PartialEq, Eq)]
enum ValidationError {
ForbiddenValue,
}
#[derive(Debug)]
struct ReceiverFailure;
impl fmt::Display for ReceiverFailure {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "receiver failed")
}
}
impl core::error::Error for ReceiverFailure {}
struct FailingSink<'input> {
events: Vec<Event<'input>>,
}
impl<'input> TryEventReceiver<'input> for FailingSink<'input> {
type Error = ValidationError;
fn on_event(&mut self, ev: Event<'input>) -> Result<(), Self::Error> {
let should_fail = matches!(&ev, Event::Scalar(value, ..) if value.as_ref() == "bad");
self.events.push(ev);
if should_fail {
Err(ValidationError::ForbiddenValue)
} else {
Ok(())
}
}
}
#[test]
fn test_try_load_stops_on_receiver_error() {
let mut parser = Parser::new_from_str("ok: bad\nafter: value\n");
let mut sink = FailingSink { events: Vec::new() };
let err = parser.try_load(&mut sink, true).unwrap_err();
assert_eq!(err, TryLoadError::Receiver(ValidationError::ForbiddenValue));
assert!(sink
.events
.iter()
.any(|event| matches!(event, Event::Scalar(value, ..) if value == "ok")));
assert!(sink
.events
.iter()
.any(|event| matches!(event, Event::Scalar(value, ..) if value == "bad")));
assert!(!sink
.events
.iter()
.any(|event| matches!(event, Event::Scalar(value, ..) if value == "after")));
}
struct SpannedFailingSink {
failed_span: Option<Span>,
}
impl<'input> TrySpannedEventReceiver<'input> for SpannedFailingSink {
type Error = Span;
fn on_event(&mut self, ev: Event<'input>, span: Span) -> Result<(), Self::Error> {
if matches!(ev, Event::Scalar(value, ..) if value.as_ref() == "bad") {
self.failed_span = Some(span);
Err(span)
} else {
Ok(())
}
}
}
#[test]
fn test_try_load_spanned_receiver_gets_span() {
let mut parser = Parser::new_from_str("value: bad\n");
let mut sink = SpannedFailingSink { failed_span: None };
let err = parser.try_load(&mut sink, false).unwrap_err();
let TryLoadError::Receiver(span) = err else {
panic!("expected receiver error");
};
assert_eq!(Some(span), sink.failed_span);
assert!(!span.is_empty());
}
struct NeverFails {
count: usize,
}
impl<'input> TryEventReceiver<'input> for NeverFails {
type Error = ValidationError;
fn on_event(&mut self, _ev: Event<'input>) -> Result<(), Self::Error> {
self.count += 1;
Ok(())
}
}
#[test]
fn test_try_load_returns_scan_error() {
let mut parser = Parser::new_from_str("%YAML 1.2\n%YAML 1.2\n---\n");
let mut sink = NeverFails { count: 0 };
let err = parser.try_load(&mut sink, true).unwrap_err();
let TryLoadError::Scan(err) = err else {
panic!("expected scan error");
};
assert_eq!(err.info(), "duplicate version directive");
}
#[test]
fn test_try_load_error_display_and_source_cover_both_variants() {
let scan = ScanError::new_str(Marker::new(3, 1, 3), "bad yaml");
let scan_err: TryLoadError<ReceiverFailure> = scan.into();
assert!(scan_err.to_string().starts_with("parser error: bad yaml"));
assert!(scan_err.source().is_some());
let receiver_err = TryLoadError::Receiver(ReceiverFailure);
assert_eq!(receiver_err.to_string(), "receiver error: receiver failed");
assert!(receiver_err.source().is_some());
}
#[test]
fn test_try_load_requires_buffered_stream_start() {
let mut parser = Parser::new_from_str("");
let span = Span::empty(Marker::new(0, 1, 0));
parser.current = Some((
Event::Scalar("value".into(), ScalarStyle::Plain, 0, None),
span,
));
let mut sink = NeverFails { count: 0 };
let err = parser.try_load(&mut sink, true).unwrap_err();
let TryLoadError::Scan(err) = err else {
panic!("expected scan error");
};
assert_eq!(err.info(), "did not find expected <stream-start>");
}
#[test]
fn test_try_load_after_stream_already_ended_emits_stream_end() {
let mut parser = Parser::new_from_str("");
while parser.next_event().is_some() {}
let mut sink = FailingSink { events: Vec::new() };
parser.try_load(&mut sink, true).unwrap();
assert_eq!(sink.events, vec![Event::StreamEnd]);
}
#[test]
fn test_try_load_full_stream_fuses_iterator_after_stream_end() {
let mut parser = Parser::new_from_str("a: 1\n");
let mut sink = FailingSink { events: Vec::new() };
parser.try_load(&mut sink, true).unwrap();
assert!(matches!(sink.events.last(), Some(Event::StreamEnd)));
assert!(parser.next_event().is_none());
assert!(parser.peek().is_none());
}
#[test]
fn test_try_load_after_peek_delivers_buffered_document_end_before_stream_end() {
let mut parser = Parser::new_from_str("a");
for _ in 0..3 {
parser.next_event().unwrap().unwrap();
}
assert_eq!(parser.peek().unwrap().unwrap().0, Event::DocumentEnd);
let mut sink = FailingSink { events: Vec::new() };
parser.try_load(&mut sink, true).unwrap();
assert_eq!(sink.events, vec![Event::DocumentEnd, Event::StreamEnd]);
assert!(parser.next_event().is_none());
}
#[test]
fn test_load_single_document_stops_before_next_document() {
let mut parser = Parser::new_from_str("a: 1\n---\nb: 2\n");
let mut sink = CollectingSink::default();
parser.load(&mut sink, false).unwrap();
assert!(sink
.events
.iter()
.any(|event| matches!(event, Event::Scalar(value, ..) if value == "a")));
assert!(!sink
.events
.iter()
.any(|event| matches!(event, Event::Scalar(value, ..) if value == "b")));
assert!(matches!(sink.events.last(), Some(Event::DocumentEnd)));
}
#[test]
fn test_duplicate_version_directive_errors() {
assert_eq!(
first_error_info("%YAML 1.2\n%YAML 1.2\n---\n"),
"duplicate version directive"
);
}
#[test]
fn test_unsupported_yaml_major_version_errors() {
assert_eq!(
first_error_info("%YAML 9.9\n--- a\n"),
"unsupported YAML major version"
);
}
#[test]
fn test_document_start_emits_yaml_version() {
let events = Parser::new_from_str("%YAML 1.2\n---\nvalue\n")
.map(|event| event.unwrap().0)
.collect::<Vec<_>>();
assert!(matches!(
events.get(1),
Some(Event::DocumentStart(
true,
Some(YamlVersion { major: 1, minor: 2 })
))
));
}
#[test]
fn test_document_start_allows_supported_major_future_minor_version() {
let events = Parser::new_from_str("%YAML 1.9\n---\nvalue\n")
.map(|event| event.unwrap().0)
.collect::<Vec<_>>();
assert!(matches!(
events.get(1),
Some(Event::DocumentStart(
true,
Some(YamlVersion { major: 1, minor: 9 })
))
));
}
#[test]
fn test_document_start_keeps_version_and_tags_across_comment() {
let events = Parser::new_from_str(
"%YAML 1.2\n# directive comment\n%TAG !e! tag:example.com,2026:\n---\nkey: !e!thing value\n",
)
.map(|event| event.unwrap().0)
.collect::<Vec<_>>();
assert!(matches!(
events.get(2),
Some(Event::DocumentStart(
true,
Some(YamlVersion { major: 1, minor: 2 })
))
));
let tag = events
.iter()
.find_map(|event| match event {
Event::Scalar(value, _, _, Some(tag)) if value == "value" => Some(tag),
_ => None,
})
.expect("expected tagged scalar after comment-separated directives");
assert_eq!(tag.handle, "tag:example.com,2026:");
assert_eq!(tag.suffix, "thing");
}
#[test]
fn test_each_document_can_declare_own_yaml_version() {
let document_starts = Parser::new_from_str(
"%YAML 1.2\n---\na\n...\n%YAML 1.2\n---\nb\n...\n%YAML 1.1\n---\nc\n",
)
.filter_map(|event| match event.unwrap().0 {
Event::DocumentStart(explicit, version) => Some((explicit, version)),
_ => None,
})
.collect::<Vec<_>>();
assert_eq!(
document_starts,
vec![
(true, Some(YamlVersion::new(1, 2))),
(true, Some(YamlVersion::new(1, 2))),
(true, Some(YamlVersion::new(1, 1))),
]
);
}
#[test]
fn test_duplicate_tag_directive_errors() {
assert_eq!(
first_error_info("%TAG !t! tag:test,2024:\n%TAG !t! tag:other,2024:\n---\n"),
"the TAG directive must only be given at most once per handle in the same document"
);
}
#[test]
fn duplicate_tag_directive_across_comment_is_rejected() {
let input = concat!(
"%TAG !e! tag:example.com,2000:one/\n",
"# separator\n",
"%TAG !e! tag:example.com,2000:two/\n",
"---\n",
);
assert_eq!(
first_error_info(input),
"the TAG directive must only be given at most once per handle in the same document"
);
}
#[test]
fn test_keep_tags_inherited_handle_can_be_redeclared_in_next_document() {
let input = concat!(
"%TAG !e! tag:example.com,2000:one/\n",
"---\n",
"first: !e!thing value\n",
"...\n",
"%TAG !e! tag:example.com,2000:two/\n",
"---\n",
"second: !e!thing value\n",
);
let tags = Parser::new_from_str(input)
.keep_tags(true)
.filter_map(|event| match event.expect("input should parse").0 {
Event::Scalar(value, _, _, Some(tag)) if value == "value" => {
Some(tag.handle.clone())
}
_ => None,
})
.collect::<Vec<_>>();
assert_eq!(
tags,
vec!["tag:example.com,2000:one/", "tag:example.com,2000:two/"]
);
}
#[test]
fn test_directive_after_implicit_document_requires_explicit_end() {
assert_eq!(
first_error_info("---\nkey: value\n%YAML 1.2\n---\n"),
"missing explicit document end marker before directive"
);
}
#[test]
fn test_anchor_offset_overflow_reports_error() {
let mut parser = Parser::new_from_str("&a value");
parser.set_anchor_offset(usize::MAX);
let err = parser
.find_map(Result::err)
.expect("anchor registration should overflow");
assert_eq!(
err.info(),
"while parsing anchor, anchor count exceeded supported limit"
);
}
#[test]
fn test_alias_resolves_to_registered_anchor_id() {
let events = Parser::new_from_str("- &a value\n- *a\n")
.map(|event| event.unwrap().0)
.collect::<Vec<_>>();
assert!(events.iter().any(|event| matches!(event, Event::Alias(1))));
}
#[test]
fn test_anchor_then_tag_applies_both_to_scalar() {
let events = Parser::new_from_str("&a !!str value")
.map(|event| event.unwrap().0)
.collect::<Vec<_>>();
let Some(Event::Scalar(value, _, anchor_id, Some(tag))) = events
.iter()
.find(|event| matches!(event, Event::Scalar(value, ..) if value == "value"))
else {
panic!("expected tagged anchored scalar");
};
assert_eq!(value, "value");
assert_eq!(*anchor_id, 1);
assert_eq!(tag.handle, "tag:yaml.org,2002:");
assert_eq!(tag.suffix, "str");
assert_eq!(tag.original_handle, "!!");
assert_eq!(tag.original(), "!!str");
}
#[test]
fn test_tag_then_anchor_applies_both_to_scalar() {
let events = Parser::new_from_str("!!str &a value")
.map(|event| event.unwrap().0)
.collect::<Vec<_>>();
let Some(Event::Scalar(value, _, anchor_id, Some(tag))) = events
.iter()
.find(|event| matches!(event, Event::Scalar(value, ..) if value == "value"))
else {
panic!("expected tagged anchored scalar");
};
assert_eq!(value, "value");
assert_eq!(*anchor_id, 1);
assert_eq!(tag.handle, "tag:yaml.org,2002:");
assert_eq!(tag.suffix, "str");
assert_eq!(tag.original_handle, "!!");
assert_eq!(tag.original(), "!!str");
}
#[test]
fn test_tag_directive_preserves_original_handle() {
let events =
Parser::new_from_str("%TAG !e! tag:example.com,2000:\n---\nconfig: !e!keep value\n")
.map(|event| event.unwrap().0)
.collect::<Vec<_>>();
let (value, tag) = events
.iter()
.find_map(|event| match event {
Event::Scalar(value, _, _, Some(tag)) if value == "value" => Some((value, tag)),
_ => None,
})
.expect("expected tagged scalar");
assert_eq!(value, "value");
assert_eq!(tag.handle, "tag:example.com,2000:");
assert_eq!(tag.suffix, "keep");
assert_eq!(tag.original_handle, "!e!");
assert_eq!(tag.parts(), ("tag:example.com,2000:", "keep"));
assert_eq!(tag.original_parts(), ("!e!", "keep"));
assert_eq!(tag.original(), "!e!keep");
}
#[test]
fn test_verbatim_tag_original_is_normalized_author_spelling() {
let events = Parser::new_from_str("key: !<tag:example.com,2000:thing> value\n")
.map(|event| event.unwrap().0)
.collect::<Vec<_>>();
let Some(Event::Scalar(value, _, _, Some(tag))) = events
.iter()
.find(|event| matches!(event, Event::Scalar(value, ..) if value == "value"))
else {
panic!("expected tagged scalar");
};
assert_eq!(value, "value");
assert_eq!(tag.handle, "");
assert_eq!(tag.suffix, "tag:example.com,2000:thing");
assert_eq!(tag.original_handle, "");
assert_eq!(tag.parts(), ("", "tag:example.com,2000:thing"));
assert_eq!(tag.original_parts(), ("", "tag:example.com,2000:thing"));
assert_eq!(tag.original(), "!<tag:example.com,2000:thing>");
}
#[test]
fn test_multiple_tag_directives_are_kept_within_document() {
let text = r"
%TAG !a! tag:a,2024:
%TAG !b! tag:b,2024:
---
first: !a!x foo
second: !b!y bar
";
let mut seen_a = false;
let mut seen_b = false;
for event in Parser::new_from_str(text) {
let (event, _) = event.unwrap();
if let Event::Scalar(_, _, _, Some(tag)) = event {
if tag.handle == "tag:a,2024:" {
seen_a = true;
} else if tag.handle == "tag:b,2024:" {
seen_b = true;
}
}
}
assert!(seen_a);
assert!(seen_b);
}
#[test]
fn test_tags_are_cleared_when_next_document_has_no_directives() {
let text = r"
%TAG !t! tag:test,2024:
--- !t!1
foo
--- !t!2
bar
";
let mut parser = Parser::new_from_str(text);
for event in parser.by_ref() {
let (event, _) = event.unwrap();
if let Event::DocumentEnd = event {
break;
}
}
match parser.next().unwrap().unwrap().0 {
Event::DocumentStart(true, None) => {}
_ => panic!("expected explicit second document start"),
}
let err = parser.next().unwrap().unwrap_err();
assert!(format!("{err}").contains("the handle wasn't declared"));
}
#[test]
fn test_pull_parser_clears_anchors_between_documents() {
let mut parser = Parser::new_from_str(
"--- &a value
--- *a
",
);
for event in parser.by_ref() {
let (event, _) = event.unwrap();
if matches!(event, Event::DocumentEnd) {
break;
}
}
match parser.next().unwrap().unwrap().0 {
Event::DocumentStart(true, None) => {}
_ => panic!("expected explicit second document start"),
}
let err = parser.next().unwrap().unwrap_err();
assert!(format!("{err}").contains("unknown anchor"));
}
#[test]
fn test_keep_tags_across_multiple_documents() {
let text = r#"
%YAML 1.1
%TAG !t! tag:test,2024:
--- !t!1 &1
foo: "bar"
--- !t!2 &2
baz: "qux"
"#;
for x in Parser::new_from_str(text).keep_tags(true) {
let x = x.unwrap();
if let Event::MappingStart(_, _, tag) = x.0 {
let tag = tag.unwrap();
assert_eq!(tag.handle, "tag:test,2024:");
}
}
for x in Parser::new_from_str(text).keep_tags(false) {
if x.is_err() {
return;
}
}
panic!("Test failed, did not encounter error")
}
#[test]
fn test_flow_sequence_mapping_allows_empty_key() {
let parser = Parser::new_from_str("[?: value]");
for event in parser {
event.expect("parser should accept flow sequence mappings with empty keys");
}
}
#[test]
fn test_keep_tags_does_not_persist_default_tag_handles() {
let text = "%TAG !! tag:evil,2024:\n--- !!int 1\n--- !!int 2\n";
let mut int_tags = Vec::new();
for event in Parser::new_from_str(text).keep_tags(true) {
let event = event.unwrap().0;
if let Event::Scalar(_, _, _, Some(tag)) = event {
if tag.suffix == "int" {
int_tags.push(tag.handle.clone());
}
}
}
assert_eq!(int_tags, vec!["tag:evil,2024:", "tag:yaml.org,2002:"]);
}
#[test]
fn test_keep_tags_does_not_persist_primary_tag_handle() {
let text = "%TAG ! tag:evil,2024:\n--- !int 1\n--- !int 2\n";
let tags = Parser::new_from_str(text)
.keep_tags(true)
.filter_map(|event| match event.expect("input should parse").0 {
Event::Scalar(_, _, _, Some(tag)) if tag.suffix == "int" => {
Some(tag.handle.clone())
}
_ => None,
})
.collect::<Vec<_>>();
assert_eq!(tags, vec!["tag:evil,2024:", "!"]);
}
#[test]
fn test_resolve_tag_uses_overridden_local_prefix() {
let mut parser = Parser::new_from_str("");
parser
.tags
.insert("!".to_string(), "tag:local.example,2024:".to_string());
let tag = parser
.resolve_tag(
Span::empty(Marker::new(0, 1, 0)),
&Cow::Borrowed(""),
Cow::Borrowed("!"),
)
.unwrap();
assert_eq!(tag.handle, "tag:local.example,2024:");
assert_eq!(tag.suffix, "!");
}
#[test]
fn test_load_after_peek_stream_start() {
#[derive(Default)]
struct Sink<'input> {
events: Vec<Event<'input>>,
}
impl<'input> EventReceiver<'input> for Sink<'input> {
fn on_event(&mut self, ev: Event<'input>) {
self.events.push(ev);
}
}
let mut parser = Parser::new_from_str("key: value\n");
let mut sink = Sink::default();
assert_eq!(parser.peek().unwrap().unwrap().0, Event::StreamStart);
parser.load(&mut sink, false).unwrap();
assert!(matches!(sink.events.first(), Some(Event::StreamStart)));
assert!(matches!(sink.events.get(1), Some(Event::DocumentStart(..))));
}
}