use crate::{
input::{str::StrInput, BorrowedInput},
scanner::{Comment, Placement, ScalarStyle, ScanError, Scanner, Span, Token, TokenType},
BufferedInput,
};
use alloc::{
borrow::Cow,
collections::{BTreeMap, BTreeSet, VecDeque},
string::{String, ToString},
vec::Vec,
};
use core::{
convert::Infallible,
fmt::{self, Display},
};
#[derive(Clone, Copy, PartialEq, Debug, Eq)]
enum State {
StreamStart,
ImplicitDocumentStart,
DocumentStart,
DocumentContent,
DocumentEnd,
BlockNode,
BlockNodeOrIndentlessSequence,
FlowNode,
BlockSequenceFirstEntry,
BlockSequenceEntry,
IndentlessSequenceEntry,
IndentlessSequenceEntryNode,
BlockMappingFirstKey,
BlockMappingKey,
BlockMappingKeyNode,
BlockMappingValue,
BlockMappingValueNode,
FlowSequenceFirstEntry,
FlowSequenceEntry,
FlowSequenceEntryMappingKey,
FlowSequenceEntryMappingValue,
FlowSequenceEntryMappingValueNode,
FlowSequenceEntryMappingEnd,
FlowMappingFirstKey,
FlowMappingKey,
FlowMappingKeyNode,
FlowMappingValue,
FlowMappingValueNode,
FlowMappingEmptyValue,
BlockSequenceEntryNode,
End,
}
#[derive(Clone, PartialEq, Debug, Eq)]
pub enum Event<'input> {
Nothing,
StreamStart,
StreamEnd,
DocumentStart(bool),
DocumentEnd,
Alias(
usize,
),
Comment(
Cow<'input, str>,
Placement,
),
Scalar(
Cow<'input, str>,
ScalarStyle,
usize,
Option<Cow<'input, Tag>>,
),
SequenceStart(
StructureStyle,
usize,
Option<Cow<'input, Tag>>,
),
SequenceEnd,
MappingStart(
StructureStyle,
usize,
Option<Cow<'input, Tag>>,
),
MappingEnd,
}
#[derive(Clone, Copy, PartialEq, Debug, Eq, Hash, PartialOrd, Ord)]
pub enum StructureStyle {
Block,
Flow,
}
#[derive(Clone, PartialEq, Debug, Eq, Ord, PartialOrd, Hash)]
pub struct Tag {
pub handle: String,
pub suffix: String,
}
impl Tag {
#[must_use]
pub fn is_yaml_core_schema(&self) -> bool {
self.handle == "tag:yaml.org,2002:"
}
#[must_use]
pub fn is_yaml_core_schema_tag(&self, suffix: &str) -> bool {
self.is_yaml_core_schema() && self.suffix == suffix
}
#[must_use]
pub fn is_custom(&self) -> bool {
!self.is_yaml_core_schema()
}
#[must_use]
pub fn parts(&self) -> (&str, &str) {
(&self.handle, &self.suffix)
}
}
impl Display for Tag {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
if self.handle == "!" {
write!(f, "!{}", self.suffix)
} else {
write!(f, "{}{}", self.handle, self.suffix)
}
}
}
impl<'input> Event<'input> {
#[must_use]
pub fn anchor_id(&self) -> Option<usize> {
match self {
Self::Scalar(_, _, anchor_id, _)
| Self::SequenceStart(_, anchor_id, _)
| Self::MappingStart(_, anchor_id, _)
if *anchor_id != 0 =>
{
Some(*anchor_id)
}
_ => None,
}
}
#[must_use]
pub fn alias_id(&self) -> Option<usize> {
match self {
Self::Alias(anchor_id) => Some(*anchor_id),
_ => None,
}
}
#[must_use]
pub fn tag(&self) -> Option<&Tag> {
match self {
Self::Scalar(_, _, _, tag)
| Self::SequenceStart(_, _, tag)
| Self::MappingStart(_, _, tag) => tag.as_deref(),
_ => None,
}
}
#[must_use]
pub fn scalar(&self) -> Option<(&str, ScalarStyle)> {
match self {
Self::Scalar(value, style, _, _) => Some((value.as_ref(), *style)),
_ => None,
}
}
#[must_use]
pub fn is_node(&self) -> bool {
matches!(
self,
Self::Alias(_) | Self::Scalar(..) | Self::SequenceStart(..) | Self::MappingStart(..)
)
}
fn empty_scalar() -> Self {
Event::Scalar("~".into(), ScalarStyle::Plain, 0, None)
}
fn empty_scalar_with_anchor(anchor: usize, tag: Option<Cow<'input, Tag>>) -> Self {
Event::Scalar(Cow::default(), ScalarStyle::Plain, anchor, tag)
}
}
#[derive(Debug)]
pub struct Parser<'input, T: BorrowedInput<'input>> {
scanner: Scanner<'input, T>,
states: Vec<State>,
state: State,
token: Option<Token<'input>>,
current: Option<(Event<'input>, Span)>,
queued_events: VecDeque<(Event<'input>, Span)>,
pending_key_indent: Option<usize>,
pending_node_anchor_id: usize,
pending_node_tag: Option<Cow<'input, Tag>>,
pending_empty_scalar_span: Option<Span>,
anchors: BTreeMap<Cow<'input, str>, usize>,
anchor_id_count: usize,
tags: BTreeMap<String, String>,
stream_end_emitted: bool,
keep_tags: bool,
}
pub trait EventReceiver<'input> {
fn on_event(&mut self, ev: Event<'input>);
}
pub trait SpannedEventReceiver<'input> {
fn on_event(&mut self, ev: Event<'input>, span: Span);
}
impl<'input, R: EventReceiver<'input>> SpannedEventReceiver<'input> for R {
fn on_event(&mut self, ev: Event<'input>, _span: Span) {
self.on_event(ev);
}
}
pub trait TryEventReceiver<'input> {
type Error;
fn on_event(&mut self, ev: Event<'input>) -> Result<(), Self::Error>;
}
pub trait TrySpannedEventReceiver<'input> {
type Error;
fn on_event(&mut self, ev: Event<'input>, span: Span) -> Result<(), Self::Error>;
}
impl<'input, R: TryEventReceiver<'input>> TrySpannedEventReceiver<'input> for R {
type Error = R::Error;
fn on_event(&mut self, ev: Event<'input>, _span: Span) -> Result<(), Self::Error> {
TryEventReceiver::on_event(self, ev)
}
}
#[derive(Clone, PartialEq, Debug, Eq)]
pub enum TryLoadError<E> {
Scan(
ScanError,
),
Receiver(
E,
),
}
impl<E> TryLoadError<E> {
#[cold]
fn scan(error: ScanError) -> Self {
Self::Scan(error)
}
#[cold]
fn receiver(error: E) -> Self {
Self::Receiver(error)
}
}
impl<E> From<ScanError> for TryLoadError<E> {
#[cold]
fn from(error: ScanError) -> Self {
Self::scan(error)
}
}
impl<E: Display> Display for TryLoadError<E> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Scan(error) => write!(f, "parser error: {error}"),
Self::Receiver(error) => write!(f, "receiver error: {error}"),
}
}
}
impl<E> core::error::Error for TryLoadError<E>
where
E: core::error::Error + 'static,
{
fn source(&self) -> Option<&(dyn core::error::Error + 'static)> {
match self {
Self::Scan(error) => Some(error),
Self::Receiver(error) => Some(error),
}
}
}
fn try_emit<'input, R>(
recv: &mut R,
ev: Event<'input>,
span: Span,
) -> Result<(), TryLoadError<R::Error>>
where
R: TrySpannedEventReceiver<'input>,
{
recv.on_event(ev, span).map_err(TryLoadError::receiver)
}
struct InfallibleSpannedReceiver<'receiver, R>(&'receiver mut R);
impl<'input, R: SpannedEventReceiver<'input>> TrySpannedEventReceiver<'input>
for InfallibleSpannedReceiver<'_, R>
{
type Error = Infallible;
fn on_event(&mut self, ev: Event<'input>, span: Span) -> Result<(), Self::Error> {
self.0.on_event(ev, span);
Ok(())
}
}
fn into_scan_result(result: Result<(), TryLoadError<Infallible>>) -> Result<(), ScanError> {
match result {
Ok(()) => Ok(()),
Err(TryLoadError::Scan(error)) => error.into_result(),
Err(TryLoadError::Receiver(error)) => match error {},
}
}
pub type ParseResult<'input> = Result<(Event<'input>, Span), ScanError>;
pub trait ParserTrait<'input> {
fn peek(&mut self) -> Option<Result<&(Event<'input>, Span), ScanError>>;
fn next_event(&mut self) -> Option<ParseResult<'input>>;
fn load<R: SpannedEventReceiver<'input>>(
&mut self,
recv: &mut R,
multi: bool,
) -> Result<(), ScanError>;
fn try_load<R: TrySpannedEventReceiver<'input>>(
&mut self,
recv: &mut R,
multi: bool,
) -> Result<(), TryLoadError<R::Error>> {
while let Some(res) = self.next_event() {
let (ev, span) = res?;
let is_doc_end = matches!(ev, Event::DocumentEnd);
let is_stream_end = matches!(ev, Event::StreamEnd);
try_emit(recv, ev, span)?;
if is_stream_end {
break;
}
if !multi && is_doc_end {
break;
}
}
Ok(())
}
}
impl<'input> Parser<'input, StrInput<'input>> {
#[must_use]
pub fn new_from_str(value: &'input str) -> Self {
debug_print!("\x1B[;31m>>>>>>>>>> New parser from str\x1B[;0m");
Parser::new(StrInput::new(value))
}
}
impl<T> Parser<'static, BufferedInput<T>>
where
T: Iterator<Item = char>,
{
#[must_use]
pub fn new_from_iter(iter: T) -> Self {
debug_print!("\x1B[;31m>>>>>>>>>> New parser from iter\x1B[;0m");
Parser::new(BufferedInput::new(iter))
}
}
impl<'input, T: BorrowedInput<'input>> Parser<'input, T> {
pub fn get_anchor_offset(&self) -> usize {
self.anchor_id_count
}
pub fn set_anchor_offset(&mut self, offset: usize) {
self.anchor_id_count = offset;
}
pub fn new(src: T) -> Self {
Parser {
scanner: Scanner::new(src),
states: Vec::new(),
state: State::StreamStart,
token: None,
current: None,
queued_events: VecDeque::new(),
pending_key_indent: None,
pending_node_anchor_id: 0,
pending_node_tag: None,
pending_empty_scalar_span: None,
anchors: BTreeMap::new(),
anchor_id_count: 1,
tags: BTreeMap::new(),
stream_end_emitted: false,
keep_tags: false,
}
}
#[must_use]
pub fn keep_tags(mut self, value: bool) -> Self {
self.keep_tags = value;
self
}
pub fn peek(&mut self) -> Option<Result<&(Event<'input>, Span), ScanError>> {
ParserTrait::peek(self)
}
pub fn next_event(&mut self) -> Option<ParseResult<'input>> {
ParserTrait::next_event(self)
}
fn next_event_impl<'a>(&mut self) -> ParseResult<'a>
where
'input: 'a,
{
match self.current.take() {
None => {
if let Some(event) = self.queued_events.pop_front() {
Ok(self.apply_pending_key_indent(event))
} else if let Some(comment) = self.next_comment_event()? {
Ok(comment)
} else {
self.parse()
}
}
Some(v) => Ok(v),
}
}
fn apply_pending_key_indent<'a>(&mut self, (ev, span): (Event<'a>, Span)) -> (Event<'a>, Span) {
if ev.is_node() {
if let Some(indent) = self.pending_key_indent.take() {
return (ev, span.with_indent(Some(indent)));
}
}
(ev, span)
}
fn peek_token(&mut self) -> Result<&Token<'_>, ScanError> {
match self.token {
None => {
self.token = Some(self.scan_next_token()?);
Ok(self.token.as_ref().unwrap())
}
Some(ref tok) => Ok(tok),
}
}
fn scan_next_token(&mut self) -> Result<Token<'input>, ScanError> {
match self.scanner.next() {
None => match self.scanner.get_error() {
None => Err(self.unexpected_eof()),
Some(e) => e.into_result(),
},
Some(tok) => Ok(tok),
}
}
fn next_comment_event<'a>(&mut self) -> Result<Option<(Event<'a>, Span)>, ScanError>
where
'input: 'a,
{
let is_comment = {
let token = self.peek_token()?;
matches!(token.1, TokenType::Comment(_))
};
if !is_comment {
return Ok(None);
}
let Token(span, token) = self.fetch_token();
match token {
TokenType::Comment(mut comment) => {
comment.placement = self.refined_comment_placement(&comment);
Ok(Some((
Event::Comment(comment.text, comment.placement),
span,
)))
}
_ => unreachable!("comment token disappeared after peek"),
}
}
fn next_comment_events(&mut self) -> Result<Vec<(Event<'input>, Span)>, ScanError> {
let mut comments = Vec::new();
loop {
match self.peek_token() {
Ok(token) if matches!(token.1, TokenType::Comment(_)) => {}
Err(error) if comments.is_empty() => return Err(error),
Ok(_) | Err(_) => return Ok(comments),
}
let comment = self
.next_comment_event()?
.expect("comment token disappeared after peek");
comments.push(comment);
}
}
fn queue_tail_and_return_first(
&mut self,
events: Vec<(Event<'input>, Span)>,
) -> (Event<'input>, Span) {
let mut events = events.into_iter();
let first = events
.next()
.expect("event queue must contain at least one event");
self.queued_events.extend(events);
first
}
fn queue_event_by_span(
&mut self,
comments: Vec<(Event<'input>, Span)>,
event: (Event<'input>, Span),
) -> (Event<'input>, Span) {
let insert_at = comments
.iter()
.position(|(_, comment_span)| {
comment_span.start.index() >= event.1.start.index()
&& comment_span.end.index() >= event.1.end.index()
})
.unwrap_or(comments.len());
let mut ordered = Vec::with_capacity(comments.len() + 1);
let mut comments = comments.into_iter();
for _ in 0..insert_at {
ordered.push(
comments
.next()
.expect("comment disappeared while ordering queued events"),
);
}
ordered.push(event);
ordered.extend(comments);
self.queue_tail_and_return_first(ordered)
}
fn refined_comment_placement(&mut self, comment: &Comment<'_>) -> Placement {
if comment.placement == Placement::Right {
return Placement::Right;
}
let Ok(next) = self.peek_token() else {
return comment.placement;
};
if matches!(next.1, TokenType::StreamEnd) {
return Placement::Last;
}
if next.0.start.line() == comment.span.end.line() + 1 {
Placement::Above
} else {
Placement::Free
}
}
#[cold]
fn unexpected_eof(&self) -> ScanError {
let info = match self.state {
State::FlowSequenceFirstEntry | State::FlowSequenceEntry => {
"unexpected EOF while parsing a flow sequence"
}
State::FlowMappingFirstKey
| State::FlowMappingKey
| State::FlowMappingValue
| State::FlowMappingEmptyValue => "unexpected EOF while parsing a flow mapping",
State::FlowSequenceEntryMappingKey
| State::FlowSequenceEntryMappingValue
| State::FlowSequenceEntryMappingEnd
| State::FlowNode => "unexpected EOF while parsing an implicit flow mapping",
State::BlockSequenceFirstEntry | State::BlockSequenceEntry | State::BlockNode => {
"unexpected EOF while parsing a block sequence"
}
State::BlockMappingFirstKey
| State::BlockMappingKey
| State::BlockMappingValue
| State::BlockNodeOrIndentlessSequence => {
"unexpected EOF while parsing a block mapping"
}
_ => "unexpected eof",
};
ScanError::new_str(self.scanner.mark(), info)
}
fn fetch_token<'a>(&mut self) -> Token<'a>
where
'input: 'a,
{
self.token
.take()
.expect("fetch_token needs to be preceded by peek_token")
}
fn skip(&mut self) {
self.token = None;
}
fn pop_state(&mut self) {
self.state = self.states.pop().unwrap();
}
fn push_state(&mut self, state: State) {
self.states.push(state);
}
fn defer_parse_node<'a>(
&mut self,
node_state: State,
return_state: State,
block: bool,
indentless_sequence: bool,
) -> ParseResult<'a>
where
'input: 'a,
{
self.push_state(return_state);
self.state = node_state;
if let Some(comment) = self.next_comment_event()? {
Ok(comment)
} else {
self.parse_node(block, indentless_sequence)
}
}
fn parse<'a>(&mut self) -> ParseResult<'a>
where
'input: 'a,
{
if self.state == State::End {
return Ok((Event::StreamEnd, Span::empty(self.scanner.mark())));
}
let event = self.state_machine()?;
Ok(self.apply_pending_key_indent(event))
}
pub fn load<R: SpannedEventReceiver<'input>>(
&mut self,
recv: &mut R,
multi: bool,
) -> Result<(), ScanError> {
ParserTrait::load(self, recv, multi)
}
pub fn try_load<R: TrySpannedEventReceiver<'input>>(
&mut self,
recv: &mut R,
multi: bool,
) -> Result<(), TryLoadError<R::Error>> {
ParserTrait::try_load(self, recv, multi)
}
#[cfg(test)]
fn try_load_document<R: TrySpannedEventReceiver<'input>>(
&mut self,
first_ev: Event<'input>,
span: Span,
recv: &mut R,
) -> Result<(), TryLoadError<R::Error>> {
if !matches!(first_ev, Event::DocumentStart(_)) {
return Err(TryLoadError::scan(ScanError::new_str(
span.start,
"did not find expected <document-start>",
)));
}
try_emit(recv, first_ev, span)?;
let (ev, span) = self.next_event_impl()?;
self.try_load_node(ev, span, recv)?;
let (ev, mark) = self.next_event_impl()?;
assert_eq!(ev, Event::DocumentEnd);
try_emit(recv, ev, mark)?;
Ok(())
}
#[cfg(test)]
fn try_load_node<R: TrySpannedEventReceiver<'input>>(
&mut self,
first_ev: Event<'input>,
span: Span,
recv: &mut R,
) -> Result<(), TryLoadError<R::Error>> {
match first_ev {
Event::Alias(..) | Event::Scalar(..) => try_emit(recv, first_ev, span),
Event::SequenceStart(..) => {
try_emit(recv, first_ev, span)?;
self.try_load_sequence(recv)
}
Event::MappingStart(..) => {
try_emit(recv, first_ev, span)?;
self.try_load_mapping(recv)
}
_ => {
#[cfg(feature = "debug_prints")]
std::println!("UNREACHABLE EVENT: {first_ev:?}");
unreachable!();
}
}
}
#[cfg(test)]
fn try_load_mapping<R: TrySpannedEventReceiver<'input>>(
&mut self,
recv: &mut R,
) -> Result<(), TryLoadError<R::Error>> {
let (mut key_ev, mut key_mark) = self.next_event_impl()?;
while key_ev != Event::MappingEnd {
self.try_load_node(key_ev, key_mark, recv)?;
let (ev, mark) = self.next_event_impl()?;
self.try_load_node(ev, mark, recv)?;
let (ev, mark) = self.next_event_impl()?;
key_ev = ev;
key_mark = mark;
}
try_emit(recv, key_ev, key_mark)?;
Ok(())
}
#[cfg(test)]
fn try_load_sequence<R: TrySpannedEventReceiver<'input>>(
&mut self,
recv: &mut R,
) -> Result<(), TryLoadError<R::Error>> {
let (mut ev, mut mark) = self.next_event_impl()?;
while ev != Event::SequenceEnd {
self.try_load_node(ev, mark, recv)?;
let (next_ev, next_mark) = self.next_event_impl()?;
ev = next_ev;
mark = next_mark;
}
try_emit(recv, ev, mark)?;
Ok(())
}
fn state_machine<'a>(&mut self) -> ParseResult<'a>
where
'input: 'a,
{
debug_print!("\n\x1B[;33mParser state: {:?} \x1B[;0m", self.state);
match self.state {
State::StreamStart => self.stream_start(),
State::ImplicitDocumentStart => self.document_start(true),
State::DocumentStart => self.document_start(false),
State::DocumentContent => self.document_content(),
State::DocumentEnd => self.document_end(),
State::BlockNode => self.parse_node(true, false),
State::BlockNodeOrIndentlessSequence => self.parse_node(true, true),
State::FlowNode => self.parse_node(false, false),
State::BlockMappingFirstKey => self.block_mapping_key(true),
State::BlockMappingKey => self.block_mapping_key(false),
State::BlockMappingKeyNode => self.block_mapping_key_node(),
State::BlockMappingValue => self.block_mapping_value(),
State::BlockMappingValueNode => self.block_mapping_value_node(),
State::BlockSequenceFirstEntry => self.block_sequence_entry(true),
State::BlockSequenceEntry => self.block_sequence_entry(false),
State::BlockSequenceEntryNode => self.block_sequence_entry_node(),
State::FlowSequenceFirstEntry => self.flow_sequence_entry(true),
State::FlowSequenceEntry => self.flow_sequence_entry(false),
State::FlowMappingFirstKey => self.flow_mapping_key(true),
State::FlowMappingKey => self.flow_mapping_key(false),
State::FlowMappingKeyNode => self.flow_mapping_key_node(),
State::FlowMappingValue => self.flow_mapping_value(false),
State::FlowMappingValueNode => self.flow_mapping_value_node(),
State::IndentlessSequenceEntry => self.indentless_sequence_entry(),
State::IndentlessSequenceEntryNode => self.indentless_sequence_entry_node(),
State::FlowSequenceEntryMappingKey => self.flow_sequence_entry_mapping_key(),
State::FlowSequenceEntryMappingValue => self.flow_sequence_entry_mapping_value(),
State::FlowSequenceEntryMappingValueNode => {
self.flow_sequence_entry_mapping_value_node()
}
State::FlowSequenceEntryMappingEnd => self.flow_sequence_entry_mapping_end(),
State::FlowMappingEmptyValue => self.flow_mapping_value(true),
State::End => unreachable!(),
}
}
fn stream_start<'a>(&mut self) -> ParseResult<'a>
where
'input: 'a,
{
match *self.peek_token()? {
Token(span, TokenType::StreamStart(_)) => {
self.state = State::ImplicitDocumentStart;
self.skip();
Ok((Event::StreamStart, span))
}
Token(span, _) => Err(ScanError::new_str(
span.start,
"did not find expected <stream-start>",
)),
}
}
fn document_start<'a>(&mut self, implicit: bool) -> ParseResult<'a>
where
'input: 'a,
{
while let TokenType::DocumentEnd = self.peek_token()?.1 {
self.skip();
}
self.anchors.clear();
match *self.peek_token()? {
Token(span, TokenType::StreamEnd) => {
self.state = State::End;
self.skip();
Ok((Event::StreamEnd, span))
}
Token(
_,
TokenType::VersionDirective(..)
| TokenType::TagDirective(..)
| TokenType::ReservedDirective(..)
| TokenType::DocumentStart,
) => {
self.explicit_document_start()
}
Token(span, _) if implicit => {
self.parser_process_directives()?;
self.push_state(State::DocumentEnd);
self.state = State::BlockNode;
Ok((Event::DocumentStart(false), span))
}
_ => {
self.explicit_document_start()
}
}
}
fn parser_process_directives(&mut self) -> Result<(), ScanError> {
let mut version_directive_received = false;
let mut tags = if self.keep_tags {
self.tags.clone()
} else {
BTreeMap::new()
};
let mut document_tag_handles = BTreeSet::new();
loop {
match self.peek_token()? {
Token(span, TokenType::VersionDirective(_, _)) => {
if version_directive_received {
return Err(ScanError::new_str(
span.start,
"duplicate version directive",
));
}
version_directive_received = true;
}
Token(mark, TokenType::TagDirective(handle, prefix)) => {
if !document_tag_handles.insert(handle.to_string()) {
return Err(ScanError::new_str(mark.start, "the TAG directive must only be given at most once per handle in the same document"));
}
tags.insert(handle.to_string(), prefix.to_string());
}
Token(_, TokenType::ReservedDirective(_, _)) => {
}
_ => break,
}
self.skip();
}
self.tags = tags;
Ok(())
}
fn explicit_document_start<'a>(&mut self) -> ParseResult<'a>
where
'input: 'a,
{
self.parser_process_directives()?;
if let Some(comment) = self.next_comment_event()? {
return Ok(comment);
}
match *self.peek_token()? {
Token(mark, TokenType::DocumentStart) => {
self.push_state(State::DocumentEnd);
self.state = State::DocumentContent;
self.skip();
Ok((Event::DocumentStart(true), mark))
}
Token(span, _) => Err(ScanError::new_str(
span.start,
"did not find expected <document start>",
)),
}
}
fn document_content<'a>(&mut self) -> ParseResult<'a>
where
'input: 'a,
{
if let Token(
mark,
TokenType::VersionDirective(..)
| TokenType::TagDirective(..)
| TokenType::ReservedDirective(..)
| TokenType::DocumentStart
| TokenType::DocumentEnd
| TokenType::StreamEnd,
) = *self.peek_token()?
{
self.pop_state();
Ok((Event::empty_scalar(), mark))
} else {
self.state = State::BlockNode;
self.parse_node(true, false)
}
}
fn document_end<'a>(&mut self) -> ParseResult<'a>
where
'input: 'a,
{
let mut explicit_end = false;
let span: Span = match *self.peek_token()? {
Token(span, TokenType::DocumentEnd) => {
explicit_end = true;
self.skip();
span
}
Token(span, _) => span,
};
if self.keep_tags {
self.tags.remove("!!");
self.tags.remove("");
} else {
self.tags.clear();
}
if explicit_end {
self.state = State::ImplicitDocumentStart;
} else {
if let Token(
span,
TokenType::VersionDirective(..)
| TokenType::TagDirective(..)
| TokenType::ReservedDirective(..),
) = *self.peek_token()?
{
return Err(ScanError::new_str(
span.start,
"missing explicit document end marker before directive",
));
}
self.state = State::DocumentStart;
}
Ok((Event::DocumentEnd, span))
}
fn register_anchor(&mut self, name: Cow<'input, str>, mark: &Span) -> Result<usize, ScanError> {
let new_id = self.anchor_id_count;
self.anchor_id_count = self.anchor_id_count.checked_add(1).ok_or_else(|| {
ScanError::new_str(
mark.start,
"while parsing anchor, anchor count exceeded supported limit",
)
})?;
self.anchors.insert(name, new_id);
Ok(new_id)
}
fn save_pending_node_properties(&mut self, anchor_id: usize, tag: Option<Cow<'input, Tag>>) {
self.pending_node_anchor_id = anchor_id;
self.pending_node_tag = tag;
}
#[allow(clippy::too_many_lines)]
fn parse_node<'a>(&mut self, block: bool, indentless_sequence: bool) -> ParseResult<'a>
where
'input: 'a,
{
if let Some(comment) = self.next_comment_event()? {
return Ok(comment);
}
let mut anchor_id = core::mem::take(&mut self.pending_node_anchor_id);
let mut tag = self.pending_node_tag.take();
match *self.peek_token()? {
Token(_, TokenType::Alias(_)) => {
self.pop_state();
if let Token(span, TokenType::Alias(name)) = self.fetch_token() {
match self.anchors.get(&*name) {
None => {
return Err(ScanError::new_str(
span.start,
"while parsing node, found unknown anchor",
))
}
Some(id) => return Ok((Event::Alias(*id), span)),
}
}
unreachable!()
}
Token(_, TokenType::Anchor(_)) => {
if let Token(span, TokenType::Anchor(name)) = self.fetch_token() {
anchor_id = self.register_anchor(name, &span)?;
if let TokenType::Tag(..) = self.peek_token()?.1 {
if let TokenType::Tag(handle, suffix) = self.fetch_token().1 {
tag = Some(self.resolve_tag(span, &handle, suffix)?);
} else {
unreachable!()
}
}
if let Some(comment) = self.next_comment_event()? {
self.save_pending_node_properties(anchor_id, tag);
return Ok(comment);
}
} else {
unreachable!()
}
}
Token(mark, TokenType::Tag(..)) => {
if let TokenType::Tag(handle, suffix) = self.fetch_token().1 {
tag = Some(self.resolve_tag(mark, &handle, suffix)?);
if let TokenType::Anchor(_) = &self.peek_token()?.1 {
if let Token(mark, TokenType::Anchor(name)) = self.fetch_token() {
anchor_id = self.register_anchor(name, &mark)?;
} else {
unreachable!()
}
}
if let Some(comment) = self.next_comment_event()? {
self.save_pending_node_properties(anchor_id, tag);
return Ok(comment);
}
} else {
unreachable!()
}
}
_ => {}
}
match *self.peek_token()? {
Token(mark, TokenType::BlockEntry) if indentless_sequence => {
self.skip();
let comments = self.next_comment_events()?;
self.pending_empty_scalar_span = Some(mark);
self.state = State::IndentlessSequenceEntryNode;
let start = (
Event::SequenceStart(StructureStyle::Block, anchor_id, tag),
mark,
);
if comments.is_empty() {
Ok(start)
} else {
Ok(self.queue_event_by_span(comments, start))
}
}
Token(_, TokenType::Scalar(..)) => {
self.pop_state();
if let Token(mark, TokenType::Scalar(style, v)) = self.fetch_token() {
Ok((Event::Scalar(v, style, anchor_id, tag), mark))
} else {
unreachable!()
}
}
Token(mark, TokenType::FlowSequenceStart) => {
self.state = State::FlowSequenceFirstEntry;
self.skip();
Ok((
Event::SequenceStart(StructureStyle::Flow, anchor_id, tag),
mark,
))
}
Token(mark, TokenType::FlowMappingStart) => {
self.state = State::FlowMappingFirstKey;
self.skip();
Ok((
Event::MappingStart(StructureStyle::Flow, anchor_id, tag),
mark,
))
}
Token(mark, TokenType::BlockSequenceStart) if block => {
self.state = State::BlockSequenceFirstEntry;
self.skip();
Ok((
Event::SequenceStart(StructureStyle::Block, anchor_id, tag),
mark,
))
}
Token(mark, TokenType::BlockMappingStart) if block => {
self.state = State::BlockMappingFirstKey;
self.skip();
Ok((
Event::MappingStart(StructureStyle::Block, anchor_id, tag),
mark,
))
}
Token(mark, _) if tag.is_some() || anchor_id > 0 => {
self.pop_state();
Ok((Event::empty_scalar_with_anchor(anchor_id, tag), mark))
}
Token(span, _) => {
let info = match self.state {
State::FlowSequenceFirstEntry | State::FlowSequenceEntry => {
"unexpected EOF while parsing a flow sequence"
}
State::FlowMappingFirstKey
| State::FlowMappingKey
| State::FlowMappingValue
| State::FlowMappingEmptyValue => "unexpected EOF while parsing a flow mapping",
State::FlowSequenceEntryMappingKey
| State::FlowSequenceEntryMappingValue
| State::FlowSequenceEntryMappingEnd
| State::FlowNode => "unexpected EOF while parsing an implicit flow mapping",
State::BlockSequenceFirstEntry
| State::BlockSequenceEntry
| State::BlockNode => "unexpected EOF while parsing a block sequence",
State::BlockMappingFirstKey
| State::BlockMappingKey
| State::BlockMappingValue
| State::BlockNodeOrIndentlessSequence => {
"unexpected EOF while parsing a block mapping"
}
_ => "while parsing a node, did not find expected node content",
};
Err(ScanError::new_str(span.start, info))
}
}
}
fn block_mapping_key<'a>(&mut self, _first: bool) -> ParseResult<'a>
where
'input: 'a,
{
match *self.peek_token()? {
Token(_, TokenType::Key) => {
if let Token(key_span, TokenType::Key) = *self.peek_token()? {
self.pending_key_indent = Some(key_span.start.col());
}
self.skip();
if let Some(comment) = self.next_comment_event()? {
self.state = State::BlockMappingKeyNode;
Ok(comment)
} else {
self.block_mapping_key_node()
}
}
Token(mark, TokenType::Value) => {
self.state = State::BlockMappingValue;
Ok((Event::empty_scalar(), mark))
}
Token(mark, TokenType::BlockEnd) => {
self.pop_state();
self.skip();
Ok((Event::MappingEnd, mark))
}
Token(span, _) => Err(ScanError::new_str(
span.start,
"while parsing a block mapping, did not find expected key",
)),
}
}
fn block_mapping_key_node<'a>(&mut self) -> ParseResult<'a>
where
'input: 'a,
{
if let Token(mark, TokenType::Key | TokenType::Value | TokenType::BlockEnd) =
*self.peek_token()?
{
self.state = State::BlockMappingValue;
Ok((Event::empty_scalar(), mark))
} else {
self.defer_parse_node(
State::BlockNodeOrIndentlessSequence,
State::BlockMappingValue,
true,
true,
)
}
}
fn block_mapping_value<'a>(&mut self) -> ParseResult<'a>
where
'input: 'a,
{
match *self.peek_token()? {
Token(mark, TokenType::Value) => {
self.skip();
let comments = self.next_comment_events()?;
if comments.is_empty() {
self.block_mapping_value_node_with_empty_span(mark)
} else if let Ok(Token(
_,
TokenType::Key | TokenType::Value | TokenType::BlockEnd,
)) = self.peek_token()
{
self.state = State::BlockMappingKey;
Ok(self.queue_event_by_span(comments, (Event::empty_scalar(), mark)))
} else {
self.pending_empty_scalar_span = Some(mark);
self.state = State::BlockMappingValueNode;
Ok(self.queue_tail_and_return_first(comments))
}
}
Token(mark, _) => {
self.state = State::BlockMappingKey;
Ok((Event::empty_scalar(), mark))
}
}
}
fn block_mapping_value_node<'a>(&mut self) -> ParseResult<'a>
where
'input: 'a,
{
let mark = match self.pending_empty_scalar_span.take() {
Some(mark) => mark,
None => self.peek_token()?.0,
};
self.block_mapping_value_node_with_empty_span(mark)
}
fn block_mapping_value_node_with_empty_span<'a>(&mut self, mark: Span) -> ParseResult<'a>
where
'input: 'a,
{
if let Token(_, TokenType::Key | TokenType::Value | TokenType::BlockEnd) =
*self.peek_token()?
{
self.state = State::BlockMappingKey;
Ok((Event::empty_scalar(), mark))
} else {
self.defer_parse_node(
State::BlockNodeOrIndentlessSequence,
State::BlockMappingKey,
true,
true,
)
}
}
fn flow_mapping_key<'a>(&mut self, first: bool) -> ParseResult<'a>
where
'input: 'a,
{
let span: Span = if let Token(mark, TokenType::FlowMappingEnd) = *self.peek_token()? {
mark
} else {
if !first {
match *self.peek_token()? {
Token(_, TokenType::FlowEntry) => {
self.skip();
if let Some(comment) = self.next_comment_event()? {
self.state = State::FlowMappingFirstKey;
return Ok(comment);
}
}
Token(span, _) => {
return Err(ScanError::new_str(
span.start,
"while parsing a flow mapping, did not find expected ',' or '}'",
))
}
}
}
match *self.peek_token()? {
Token(_, TokenType::Key) => {
self.skip();
if let Some(comment) = self.next_comment_event()? {
self.state = State::FlowMappingKeyNode;
return Ok(comment);
}
return self.flow_mapping_key_node();
}
Token(marker, TokenType::Value) => {
self.state = State::FlowMappingValue;
return Ok((Event::empty_scalar(), marker));
}
Token(_, TokenType::FlowMappingEnd) => (),
_ => {
return self.defer_parse_node(
State::FlowNode,
State::FlowMappingEmptyValue,
false,
false,
);
}
}
self.peek_token()?.0
};
self.pop_state();
self.skip();
Ok((Event::MappingEnd, span))
}
fn flow_mapping_key_node<'a>(&mut self) -> ParseResult<'a>
where
'input: 'a,
{
if let Token(mark, TokenType::Value | TokenType::FlowEntry | TokenType::FlowMappingEnd) =
*self.peek_token()?
{
self.state = State::FlowMappingValue;
Ok((Event::empty_scalar(), mark))
} else {
self.defer_parse_node(State::FlowNode, State::FlowMappingValue, false, false)
}
}
fn flow_mapping_value<'a>(&mut self, empty: bool) -> ParseResult<'a>
where
'input: 'a,
{
let span: Span = {
if empty {
let Token(mark, _) = *self.peek_token()?;
self.state = State::FlowMappingKey;
return Ok((Event::empty_scalar(), mark));
}
match *self.peek_token()? {
Token(span, TokenType::Value) => {
self.skip();
let comments = self.next_comment_events()?;
if comments.is_empty() {
return self.flow_mapping_value_node_with_empty_span(span);
}
if let Ok(Token(_, TokenType::FlowEntry | TokenType::FlowMappingEnd)) =
self.peek_token()
{
self.state = State::FlowMappingKey;
return Ok(
self.queue_event_by_span(comments, (Event::empty_scalar(), span))
);
}
self.pending_empty_scalar_span = Some(span);
self.state = State::FlowMappingValueNode;
return Ok(self.queue_tail_and_return_first(comments));
}
Token(marker, _) => marker,
}
};
self.state = State::FlowMappingKey;
Ok((Event::empty_scalar(), span))
}
fn flow_mapping_value_node<'a>(&mut self) -> ParseResult<'a>
where
'input: 'a,
{
let mark = match self.pending_empty_scalar_span.take() {
Some(mark) => mark,
None => Span::empty(self.peek_token()?.0.start),
};
self.flow_mapping_value_node_with_empty_span(mark)
}
fn flow_mapping_value_node_with_empty_span<'a>(&mut self, mark: Span) -> ParseResult<'a>
where
'input: 'a,
{
match self.peek_token()?.1 {
TokenType::FlowEntry | TokenType::FlowMappingEnd => {
self.state = State::FlowMappingKey;
Ok((Event::empty_scalar(), mark))
}
_ => self.defer_parse_node(State::FlowNode, State::FlowMappingKey, false, false),
}
}
fn flow_sequence_entry<'a>(&mut self, first: bool) -> ParseResult<'a>
where
'input: 'a,
{
match *self.peek_token()? {
Token(mark, TokenType::FlowSequenceEnd) => {
self.pop_state();
self.skip();
return Ok((Event::SequenceEnd, mark));
}
Token(_, TokenType::FlowEntry) if !first => {
self.skip();
if let Some(comment) = self.next_comment_event()? {
self.state = State::FlowSequenceFirstEntry;
return Ok(comment);
}
}
Token(span, _) if !first => {
return Err(ScanError::new_str(
span.start,
"while parsing a flow sequence, expected ',' or ']'",
));
}
_ => { }
}
match *self.peek_token()? {
Token(mark, TokenType::FlowSequenceEnd) => {
self.pop_state();
self.skip();
Ok((Event::SequenceEnd, mark))
}
Token(mark, TokenType::Key) => {
self.state = State::FlowSequenceEntryMappingKey;
self.skip();
Ok((Event::MappingStart(StructureStyle::Flow, 0, None), mark))
}
_ => self.defer_parse_node(State::FlowNode, State::FlowSequenceEntry, false, false),
}
}
fn indentless_sequence_entry<'a>(&mut self) -> ParseResult<'a>
where
'input: 'a,
{
match *self.peek_token()? {
Token(mark, TokenType::BlockEntry) => {
self.skip();
let comments = self.next_comment_events()?;
if comments.is_empty() {
self.indentless_sequence_entry_node_with_empty_span(mark)
} else if let Ok(Token(
_,
TokenType::BlockEntry | TokenType::Key | TokenType::Value | TokenType::BlockEnd,
)) = self.peek_token()
{
self.state = State::IndentlessSequenceEntry;
Ok(self.queue_event_by_span(comments, (Event::empty_scalar(), mark)))
} else {
self.pending_empty_scalar_span = Some(mark);
self.state = State::IndentlessSequenceEntryNode;
Ok(self.queue_tail_and_return_first(comments))
}
}
Token(mark, _) => {
self.pop_state();
Ok((Event::SequenceEnd, mark))
}
}
}
fn indentless_sequence_entry_node<'a>(&mut self) -> ParseResult<'a>
where
'input: 'a,
{
let mark = match self.pending_empty_scalar_span.take() {
Some(mark) => mark,
None => self.peek_token()?.0,
};
self.indentless_sequence_entry_node_with_empty_span(mark)
}
fn indentless_sequence_entry_node_with_empty_span<'a>(&mut self, mark: Span) -> ParseResult<'a>
where
'input: 'a,
{
if let Token(
_,
TokenType::BlockEntry | TokenType::Key | TokenType::Value | TokenType::BlockEnd,
) = *self.peek_token()?
{
self.state = State::IndentlessSequenceEntry;
Ok((Event::empty_scalar(), mark))
} else {
self.defer_parse_node(
State::BlockNode,
State::IndentlessSequenceEntry,
true,
false,
)
}
}
fn block_sequence_entry<'a>(&mut self, _first: bool) -> ParseResult<'a>
where
'input: 'a,
{
match *self.peek_token()? {
Token(mark, TokenType::BlockEnd) => {
self.pop_state();
self.skip();
Ok((Event::SequenceEnd, mark))
}
Token(mark, TokenType::BlockEntry) => {
self.skip();
let comments = self.next_comment_events()?;
if comments.is_empty() {
self.block_sequence_entry_node_with_empty_span(mark)
} else if let Ok(Token(_, TokenType::BlockEntry | TokenType::BlockEnd)) =
self.peek_token()
{
self.state = State::BlockSequenceEntry;
Ok(self.queue_event_by_span(comments, (Event::empty_scalar(), mark)))
} else {
self.pending_empty_scalar_span = Some(mark);
self.state = State::BlockSequenceEntryNode;
Ok(self.queue_tail_and_return_first(comments))
}
}
Token(span, _) => Err(ScanError::new_str(
span.start,
"while parsing a block collection, did not find expected '-' indicator",
)),
}
}
fn block_sequence_entry_node<'a>(&mut self) -> ParseResult<'a>
where
'input: 'a,
{
let mark = match self.pending_empty_scalar_span.take() {
Some(mark) => mark,
None => self.peek_token()?.0,
};
self.block_sequence_entry_node_with_empty_span(mark)
}
fn block_sequence_entry_node_with_empty_span<'a>(&mut self, mark: Span) -> ParseResult<'a>
where
'input: 'a,
{
if let Token(_, TokenType::BlockEntry | TokenType::BlockEnd) = *self.peek_token()? {
self.state = State::BlockSequenceEntry;
Ok((Event::empty_scalar(), mark))
} else {
self.defer_parse_node(State::BlockNode, State::BlockSequenceEntry, true, false)
}
}
fn flow_sequence_entry_mapping_key<'a>(&mut self) -> ParseResult<'a>
where
'input: 'a,
{
if let Token(mark, TokenType::FlowEntry | TokenType::FlowSequenceEnd) =
*self.peek_token()?
{
self.state = State::FlowSequenceEntryMappingValue;
Ok((Event::empty_scalar(), mark))
} else {
self.defer_parse_node(
State::FlowNode,
State::FlowSequenceEntryMappingValue,
false,
false,
)
}
}
fn flow_sequence_entry_mapping_value<'a>(&mut self) -> ParseResult<'a>
where
'input: 'a,
{
match *self.peek_token()? {
Token(_, TokenType::Value) => {
self.skip();
if let Some(comment) = self.next_comment_event()? {
self.state = State::FlowSequenceEntryMappingValueNode;
Ok(comment)
} else {
self.flow_sequence_entry_mapping_value_node()
}
}
Token(mark, _) => {
self.state = State::FlowSequenceEntryMappingEnd;
Ok((Event::empty_scalar(), mark))
}
}
}
fn flow_sequence_entry_mapping_value_node<'a>(&mut self) -> ParseResult<'a>
where
'input: 'a,
{
let Token(span, ref tok) = *self.peek_token()?;
if matches!(tok, TokenType::FlowEntry | TokenType::FlowSequenceEnd) {
self.state = State::FlowSequenceEntryMappingEnd;
Ok((Event::empty_scalar(), Span::empty(span.start)))
} else {
self.defer_parse_node(
State::FlowNode,
State::FlowSequenceEntryMappingEnd,
false,
false,
)
}
}
#[allow(clippy::unnecessary_wraps)]
fn flow_sequence_entry_mapping_end<'a>(&mut self) -> ParseResult<'a>
where
'input: 'a,
{
self.state = State::FlowSequenceEntry;
let Token(span, _) = *self.peek_token()?;
Ok((Event::MappingEnd, Span::empty(span.start)))
}
fn resolve_tag(
&self,
span: Span,
handle: &Cow<'input, str>,
suffix: Cow<'input, str>,
) -> Result<Cow<'input, Tag>, ScanError> {
let suffix = suffix.into_owned();
let tag = if handle == "!!" {
Tag {
handle: self
.tags
.get("!!")
.map_or_else(|| "tag:yaml.org,2002:".to_string(), ToString::to_string),
suffix,
}
} else if handle.is_empty() && suffix == "!" {
match self.tags.get("") {
Some(prefix) => Tag {
handle: prefix.clone(),
suffix,
},
None => Tag {
handle: String::new(),
suffix,
},
}
} else {
let prefix = self.tags.get(&**handle);
if let Some(prefix) = prefix {
Tag {
handle: prefix.clone(),
suffix,
}
} else {
if handle.len() >= 2 && handle.starts_with('!') && handle.ends_with('!') {
return Err(ScanError::new_str(span.start, "the handle wasn't declared"));
}
Tag {
handle: handle.to_string(),
suffix,
}
}
};
Ok(Cow::Owned(tag))
}
}
impl<'input, T: BorrowedInput<'input>> ParserTrait<'input> for Parser<'input, T> {
fn peek(&mut self) -> Option<Result<&(Event<'input>, Span), ScanError>> {
if let Some(ref x) = self.current {
Some(Ok(x))
} else {
if self.stream_end_emitted {
return None;
}
match self.next_event_impl() {
Ok(token) => self.current = Some(token),
Err(e) => return Some(e.into_result()),
}
self.current.as_ref().map(Ok)
}
}
fn next_event(&mut self) -> Option<ParseResult<'input>> {
if self.stream_end_emitted {
return None;
}
let tok = self.next_event_impl();
if matches!(tok, Ok((Event::StreamEnd, _))) {
self.stream_end_emitted = true;
}
Some(tok)
}
fn load<R: SpannedEventReceiver<'input>>(
&mut self,
recv: &mut R,
multi: bool,
) -> Result<(), ScanError> {
let mut recv = InfallibleSpannedReceiver(recv);
into_scan_result(ParserTrait::try_load(self, &mut recv, multi))
}
fn try_load<R: TrySpannedEventReceiver<'input>>(
&mut self,
recv: &mut R,
multi: bool,
) -> Result<(), TryLoadError<R::Error>> {
let stream_start_buffered = matches!(self.current.as_ref(), Some((Event::StreamStart, _)));
if !self.scanner.stream_started() || stream_start_buffered {
let (ev, span) = self.next_event_impl()?;
if ev != Event::StreamStart {
return Err(TryLoadError::scan(ScanError::new_str(
span.start,
"did not find expected <stream-start>",
)));
}
try_emit(recv, ev, span)?;
}
if self.scanner.stream_ended() {
try_emit(recv, Event::StreamEnd, Span::empty(self.scanner.mark()))?;
return Ok(());
}
loop {
let (ev, span) = self.next_event_impl()?;
let is_doc_end = matches!(ev, Event::DocumentEnd);
let is_stream_end = matches!(ev, Event::StreamEnd);
try_emit(recv, ev, span)?;
if is_stream_end {
return Ok(());
}
if !multi && is_doc_end {
return Ok(());
}
}
}
}
impl<'input, T: BorrowedInput<'input>> Iterator for Parser<'input, T> {
type Item = Result<(Event<'input>, Span), ScanError>;
fn next(&mut self) -> Option<Self::Item> {
self.next_event()
}
}
#[cfg(test)]
mod test {
use alloc::{
borrow::{Cow, ToOwned},
string::{String, ToString},
vec::Vec,
};
use core::{error::Error as _, fmt};
use crate::scanner::{Marker, ScalarStyle, ScanError, Span};
use super::{
Event, EventReceiver, Parser, State, StructureStyle, Tag, TryEventReceiver, TryLoadError,
TrySpannedEventReceiver,
};
#[derive(Default)]
struct CollectingSink<'input> {
events: Vec<Event<'input>>,
}
impl<'input> EventReceiver<'input> for CollectingSink<'input> {
fn on_event(&mut self, ev: Event<'input>) {
self.events.push(ev);
}
}
fn first_error_info(input: &str) -> String {
for event in Parser::new_from_str(input) {
if let Err(err) = event {
return err.info().to_owned();
}
}
panic!("expected parser error")
}
#[test]
fn deferred_parse_node_can_emit_comment_before_flow_node() {
let mut parser = Parser::new_from_str("# deferred\nvalue\n");
assert_eq!(parser.stream_start().unwrap().0, Event::StreamStart);
assert_eq!(
parser.document_start(true).unwrap().0,
Event::DocumentStart(false)
);
let (event, _) = parser
.defer_parse_node(State::FlowNode, State::FlowMappingKey, false, false)
.unwrap();
assert!(matches!(event, Event::Comment(text, _) if text == " deferred"));
assert_eq!(parser.state, State::FlowNode);
}
#[test]
fn queued_node_event_gets_pending_key_indent() {
let mut parser = Parser::new_from_str("");
let span = Span::empty(Marker::new(0, 1, 0));
parser.pending_key_indent = Some(3);
parser
.queued_events
.push_back((Event::SequenceStart(StructureStyle::Block, 0, None), span));
let (event, span) = parser.next_event_impl().unwrap();
assert!(matches!(
event,
Event::SequenceStart(StructureStyle::Block, 0, None)
));
assert_eq!(span.indent, Some(3));
assert_eq!(parser.pending_key_indent, None);
}
#[test]
fn state_machine_handles_deferred_flow_node_states() {
let mut parser = Parser::new_from_str("value\n");
assert_eq!(parser.stream_start().unwrap().0, Event::StreamStart);
assert_eq!(
parser.document_start(true).unwrap().0,
Event::DocumentStart(false)
);
parser.state = State::FlowNode;
parser.push_state(State::End);
let (event, _) = parser.state_machine().unwrap();
assert!(matches!(event, Event::Scalar(value, ..) if value == "value"));
let mut parser = Parser::new_from_str("value\n");
assert_eq!(parser.stream_start().unwrap().0, Event::StreamStart);
assert_eq!(
parser.document_start(true).unwrap().0,
Event::DocumentStart(false)
);
parser.state = State::FlowSequenceEntryMappingValueNode;
let (event, _) = parser.state_machine().unwrap();
assert!(matches!(event, Event::Scalar(value, ..) if value == "value"));
}
#[test]
fn display_resolved_core_tag_without_extra_bang() {
let tag = Tag {
handle: "tag:yaml.org,2002:".to_owned(),
suffix: "str".to_owned(),
};
assert_eq!(tag.to_string(), "tag:yaml.org,2002:str");
}
#[test]
fn tag_helpers_distinguish_core_and_local_tags() {
let core = Tag {
handle: "tag:yaml.org,2002:".to_owned(),
suffix: "int".to_owned(),
};
let local = Tag {
handle: "!".to_owned(),
suffix: "thing".to_owned(),
};
assert!(core.is_yaml_core_schema());
assert!(core.is_yaml_core_schema_tag("int"));
assert!(!core.is_yaml_core_schema_tag("str"));
assert!(!core.is_custom());
assert_eq!(core.parts(), ("tag:yaml.org,2002:", "int"));
assert!(!local.is_yaml_core_schema());
assert!(!local.is_yaml_core_schema_tag("thing"));
assert!(local.is_custom());
assert_eq!(local.parts(), ("!", "thing"));
assert_eq!(local.to_string(), "!thing");
}
#[test]
fn event_inspection_helpers_report_node_metadata() {
let tag = Tag {
handle: "!".to_owned(),
suffix: "thing".to_owned(),
};
let scalar = Event::Scalar(
"value".into(),
ScalarStyle::DoubleQuoted,
7,
Some(Cow::Borrowed(&tag)),
);
let sequence =
Event::SequenceStart(StructureStyle::Block, 8, Some(Cow::Owned(tag.clone())));
let mapping = Event::MappingStart(StructureStyle::Block, 9, Some(Cow::Borrowed(&tag)));
assert_eq!(scalar.anchor_id(), Some(7));
assert_eq!(scalar.alias_id(), None);
assert_eq!(scalar.tag(), Some(&tag));
assert_eq!(scalar.scalar(), Some(("value", ScalarStyle::DoubleQuoted)));
assert!(scalar.is_node());
assert_eq!(sequence.anchor_id(), Some(8));
assert_eq!(sequence.alias_id(), None);
assert_eq!(sequence.tag(), Some(&tag));
assert_eq!(sequence.scalar(), None);
assert!(sequence.is_node());
assert_eq!(mapping.anchor_id(), Some(9));
assert_eq!(mapping.alias_id(), None);
assert_eq!(mapping.tag(), Some(&tag));
assert_eq!(mapping.scalar(), None);
assert!(mapping.is_node());
let alias = Event::Alias(10);
assert_eq!(alias.anchor_id(), None);
assert_eq!(alias.alias_id(), Some(10));
assert_eq!(alias.tag(), None);
assert_eq!(alias.scalar(), None);
assert!(alias.is_node());
let unanchored_scalar = Event::Scalar("x".into(), ScalarStyle::Plain, 0, None);
assert_eq!(unanchored_scalar.anchor_id(), None);
assert_eq!(unanchored_scalar.alias_id(), None);
let stream_start = Event::StreamStart;
assert_eq!(stream_start.anchor_id(), None);
assert_eq!(stream_start.alias_id(), None);
assert_eq!(stream_start.tag(), None);
assert_eq!(stream_start.scalar(), None);
assert!(!stream_start.is_node());
}
#[test]
fn test_peek_eq_parse() {
let s = "
a0 bb: val
a1: &x
b1: 4
b2: d
a2: 4
a3: [1, 2, 3]
a4:
- [a1, a2]
- 2
a5: *x
";
let mut p = Parser::new_from_str(s);
loop {
let event_peek = p.peek().unwrap().unwrap().clone();
let event = p.next_event().unwrap().unwrap();
assert_eq!(event, event_peek);
if event.0 == Event::StreamEnd {
break;
}
}
}
#[test]
fn test_repeated_peek_returns_buffered_event() {
let mut parser = Parser::new_from_str("key: value\n");
let first_peek = parser.peek().unwrap().unwrap().clone();
let second_peek = parser.peek().unwrap().unwrap().clone();
let next = parser.next_event().unwrap().unwrap();
assert_eq!(first_peek, second_peek);
assert_eq!(first_peek, next);
}
#[test]
fn test_peek_surfaces_scan_error_without_consuming_stream_end_state() {
let mut parser = Parser::new_from_str("a: [1, 2");
loop {
match parser.peek() {
Some(Ok(_)) => {
parser.next_event().unwrap().unwrap();
}
Some(Err(error)) => {
assert_eq!(error.info(), "unclosed bracket '['");
break;
}
None => panic!("expected parse error"),
}
}
}
#[test]
fn test_peek_and_next_return_none_after_stream_end() {
let mut parser = Parser::new_from_str("");
assert!(matches!(
parser.next_event().unwrap().unwrap().0,
Event::StreamStart
));
assert!(matches!(
parser.next_event().unwrap().unwrap().0,
Event::StreamEnd
));
assert!(parser.next_event().is_none());
assert!(parser.peek().is_none());
}
#[test]
fn test_load_after_stream_already_ended_emits_stream_end() {
let mut parser = Parser::new_from_str("");
while parser.next_event().is_some() {}
let mut sink = CollectingSink::default();
parser.load(&mut sink, true).unwrap();
assert_eq!(sink.events, vec![Event::StreamEnd]);
}
#[test]
fn test_load_visits_nested_collection_events() {
let mut parser = Parser::new_from_str("root:\n - item: value\n - [a, b]\n");
let mut sink = CollectingSink::default();
parser.load(&mut sink, true).unwrap();
assert_eq!(
sink.events,
vec![
Event::StreamStart,
Event::DocumentStart(false),
Event::MappingStart(StructureStyle::Block, 0, None),
Event::Scalar("root".into(), ScalarStyle::Plain, 0, None),
Event::SequenceStart(StructureStyle::Block, 0, None),
Event::MappingStart(StructureStyle::Block, 0, None),
Event::Scalar("item".into(), ScalarStyle::Plain, 0, None),
Event::Scalar("value".into(), ScalarStyle::Plain, 0, None),
Event::MappingEnd,
Event::SequenceStart(StructureStyle::Flow, 0, None),
Event::Scalar("a".into(), ScalarStyle::Plain, 0, None),
Event::Scalar("b".into(), ScalarStyle::Plain, 0, None),
Event::SequenceEnd,
Event::SequenceEnd,
Event::MappingEnd,
Event::DocumentEnd,
Event::StreamEnd,
]
);
}
#[derive(Clone, Debug, PartialEq, Eq)]
enum ValidationError {
ForbiddenValue,
}
#[derive(Debug)]
struct ReceiverFailure;
impl fmt::Display for ReceiverFailure {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "receiver failed")
}
}
impl core::error::Error for ReceiverFailure {}
struct FailingSink<'input> {
events: Vec<Event<'input>>,
}
impl<'input> TryEventReceiver<'input> for FailingSink<'input> {
type Error = ValidationError;
fn on_event(&mut self, ev: Event<'input>) -> Result<(), Self::Error> {
let should_fail = matches!(&ev, Event::Scalar(value, ..) if value.as_ref() == "bad");
self.events.push(ev);
if should_fail {
Err(ValidationError::ForbiddenValue)
} else {
Ok(())
}
}
}
#[test]
fn test_try_load_stops_on_receiver_error() {
let mut parser = Parser::new_from_str("ok: bad\nafter: value\n");
let mut sink = FailingSink { events: Vec::new() };
let err = parser.try_load(&mut sink, true).unwrap_err();
assert_eq!(err, TryLoadError::Receiver(ValidationError::ForbiddenValue));
assert!(sink
.events
.iter()
.any(|event| matches!(event, Event::Scalar(value, ..) if value == "ok")));
assert!(sink
.events
.iter()
.any(|event| matches!(event, Event::Scalar(value, ..) if value == "bad")));
assert!(!sink
.events
.iter()
.any(|event| matches!(event, Event::Scalar(value, ..) if value == "after")));
}
struct SpannedFailingSink {
failed_span: Option<Span>,
}
impl<'input> TrySpannedEventReceiver<'input> for SpannedFailingSink {
type Error = Span;
fn on_event(&mut self, ev: Event<'input>, span: Span) -> Result<(), Self::Error> {
if matches!(ev, Event::Scalar(value, ..) if value.as_ref() == "bad") {
self.failed_span = Some(span);
Err(span)
} else {
Ok(())
}
}
}
#[test]
fn test_try_load_spanned_receiver_gets_span() {
let mut parser = Parser::new_from_str("value: bad\n");
let mut sink = SpannedFailingSink { failed_span: None };
let err = parser.try_load(&mut sink, false).unwrap_err();
let TryLoadError::Receiver(span) = err else {
panic!("expected receiver error");
};
assert_eq!(Some(span), sink.failed_span);
assert!(!span.is_empty());
}
struct NeverFails {
count: usize,
}
impl<'input> TryEventReceiver<'input> for NeverFails {
type Error = ValidationError;
fn on_event(&mut self, _ev: Event<'input>) -> Result<(), Self::Error> {
self.count += 1;
Ok(())
}
}
#[test]
fn test_try_load_returns_scan_error() {
let mut parser = Parser::new_from_str("%YAML 1.2\n%YAML 1.2\n---\n");
let mut sink = NeverFails { count: 0 };
let err = parser.try_load(&mut sink, true).unwrap_err();
let TryLoadError::Scan(err) = err else {
panic!("expected scan error");
};
assert_eq!(err.info(), "duplicate version directive");
}
#[test]
fn test_try_load_error_display_and_source_cover_both_variants() {
let scan = ScanError::new_str(Marker::new(3, 1, 3), "bad yaml");
let scan_err: TryLoadError<ReceiverFailure> = scan.into();
assert!(scan_err.to_string().starts_with("parser error: bad yaml"));
assert!(scan_err.source().is_some());
let receiver_err = TryLoadError::Receiver(ReceiverFailure);
assert_eq!(receiver_err.to_string(), "receiver error: receiver failed");
assert!(receiver_err.source().is_some());
}
#[test]
fn test_try_load_document_rejects_non_document_start_event() {
let mut parser = Parser::new_from_str("");
let span = Span::empty(Marker::new(0, 1, 0));
let mut sink = NeverFails { count: 0 };
let err = parser
.try_load_document(
Event::Scalar("value".into(), ScalarStyle::Plain, 0, None),
span,
&mut sink,
)
.unwrap_err();
let TryLoadError::Scan(err) = err else {
panic!("expected scan error");
};
assert_eq!(err.info(), "did not find expected <document-start>");
}
#[test]
fn test_try_load_requires_buffered_stream_start() {
let mut parser = Parser::new_from_str("");
let span = Span::empty(Marker::new(0, 1, 0));
parser.current = Some((
Event::Scalar("value".into(), ScalarStyle::Plain, 0, None),
span,
));
let mut sink = NeverFails { count: 0 };
let err = parser.try_load(&mut sink, true).unwrap_err();
let TryLoadError::Scan(err) = err else {
panic!("expected scan error");
};
assert_eq!(err.info(), "did not find expected <stream-start>");
}
#[test]
fn test_try_load_after_stream_already_ended_emits_stream_end() {
let mut parser = Parser::new_from_str("");
while parser.next_event().is_some() {}
let mut sink = FailingSink { events: Vec::new() };
parser.try_load(&mut sink, true).unwrap();
assert_eq!(sink.events, vec![Event::StreamEnd]);
}
#[test]
fn test_load_single_document_stops_before_next_document() {
let mut parser = Parser::new_from_str("a: 1\n---\nb: 2\n");
let mut sink = CollectingSink::default();
parser.load(&mut sink, false).unwrap();
assert!(sink
.events
.iter()
.any(|event| matches!(event, Event::Scalar(value, ..) if value == "a")));
assert!(!sink
.events
.iter()
.any(|event| matches!(event, Event::Scalar(value, ..) if value == "b")));
assert!(matches!(sink.events.last(), Some(Event::DocumentEnd)));
}
#[test]
fn test_duplicate_version_directive_errors() {
assert_eq!(
first_error_info("%YAML 1.2\n%YAML 1.2\n---\n"),
"duplicate version directive"
);
}
#[test]
fn test_duplicate_tag_directive_errors() {
assert_eq!(
first_error_info("%TAG !t! tag:test,2024:\n%TAG !t! tag:other,2024:\n---\n"),
"the TAG directive must only be given at most once per handle in the same document"
);
}
#[test]
fn test_directive_after_implicit_document_requires_explicit_end() {
assert_eq!(
first_error_info("---\nkey: value\n%YAML 1.2\n---\n"),
"missing explicit document end marker before directive"
);
}
#[test]
fn test_anchor_offset_overflow_reports_error() {
let mut parser = Parser::new_from_str("&a value");
parser.set_anchor_offset(usize::MAX);
let err = parser
.find_map(Result::err)
.expect("anchor registration should overflow");
assert_eq!(
err.info(),
"while parsing anchor, anchor count exceeded supported limit"
);
}
#[test]
fn test_alias_resolves_to_registered_anchor_id() {
let events = Parser::new_from_str("- &a value\n- *a\n")
.map(|event| event.unwrap().0)
.collect::<Vec<_>>();
assert!(events.iter().any(|event| matches!(event, Event::Alias(1))));
}
#[test]
fn test_anchor_then_tag_applies_both_to_scalar() {
let events = Parser::new_from_str("&a !!str value")
.map(|event| event.unwrap().0)
.collect::<Vec<_>>();
let Some(Event::Scalar(value, _, anchor_id, Some(tag))) = events
.iter()
.find(|event| matches!(event, Event::Scalar(value, ..) if value == "value"))
else {
panic!("expected tagged anchored scalar");
};
assert_eq!(value, "value");
assert_eq!(*anchor_id, 1);
assert_eq!(tag.handle, "tag:yaml.org,2002:");
assert_eq!(tag.suffix, "str");
}
#[test]
fn test_tag_then_anchor_applies_both_to_scalar() {
let events = Parser::new_from_str("!!str &a value")
.map(|event| event.unwrap().0)
.collect::<Vec<_>>();
let Some(Event::Scalar(value, _, anchor_id, Some(tag))) = events
.iter()
.find(|event| matches!(event, Event::Scalar(value, ..) if value == "value"))
else {
panic!("expected tagged anchored scalar");
};
assert_eq!(value, "value");
assert_eq!(*anchor_id, 1);
assert_eq!(tag.handle, "tag:yaml.org,2002:");
assert_eq!(tag.suffix, "str");
}
#[test]
fn test_multiple_tag_directives_are_kept_within_document() {
let text = r"
%TAG !a! tag:a,2024:
%TAG !b! tag:b,2024:
---
first: !a!x foo
second: !b!y bar
";
let mut seen_a = false;
let mut seen_b = false;
for event in Parser::new_from_str(text) {
let (event, _) = event.unwrap();
if let Event::Scalar(_, _, _, Some(tag)) = event {
if tag.handle == "tag:a,2024:" {
seen_a = true;
} else if tag.handle == "tag:b,2024:" {
seen_b = true;
}
}
}
assert!(seen_a);
assert!(seen_b);
}
#[test]
fn test_tags_are_cleared_when_next_document_has_no_directives() {
let text = r"
%TAG !t! tag:test,2024:
--- !t!1
foo
--- !t!2
bar
";
let mut parser = Parser::new_from_str(text);
for event in parser.by_ref() {
let (event, _) = event.unwrap();
if let Event::DocumentEnd = event {
break;
}
}
match parser.next().unwrap().unwrap().0 {
Event::DocumentStart(true) => {}
_ => panic!("expected explicit second document start"),
}
let err = parser.next().unwrap().unwrap_err();
assert!(format!("{err}").contains("the handle wasn't declared"));
}
#[test]
fn test_pull_parser_clears_anchors_between_documents() {
let mut parser = Parser::new_from_str(
"--- &a value
--- *a
",
);
for event in parser.by_ref() {
let (event, _) = event.unwrap();
if matches!(event, Event::DocumentEnd) {
break;
}
}
match parser.next().unwrap().unwrap().0 {
Event::DocumentStart(true) => {}
_ => panic!("expected explicit second document start"),
}
let err = parser.next().unwrap().unwrap_err();
assert!(format!("{err}").contains("unknown anchor"));
}
#[test]
fn test_keep_tags_across_multiple_documents() {
let text = r#"
%YAML 1.1
%TAG !t! tag:test,2024:
--- !t!1 &1
foo: "bar"
--- !t!2 &2
baz: "qux"
"#;
for x in Parser::new_from_str(text).keep_tags(true) {
let x = x.unwrap();
if let Event::MappingStart(_, _, tag) = x.0 {
let tag = tag.unwrap();
assert_eq!(tag.handle, "tag:test,2024:");
}
}
for x in Parser::new_from_str(text).keep_tags(false) {
if x.is_err() {
return;
}
}
panic!("Test failed, did not encounter error")
}
#[test]
fn test_flow_sequence_mapping_allows_empty_key() {
let parser = Parser::new_from_str("[?: value]");
for event in parser {
event.expect("parser should accept flow sequence mappings with empty keys");
}
}
#[test]
fn test_keep_tags_does_not_persist_default_tag_handles() {
let text = "%TAG !! tag:evil,2024:\n--- !!int 1\n--- !!int 2\n";
let mut int_tags = Vec::new();
for event in Parser::new_from_str(text).keep_tags(true) {
let event = event.unwrap().0;
if let Event::Scalar(_, _, _, Some(tag)) = event {
if tag.suffix == "int" {
int_tags.push(tag.handle.clone());
}
}
}
assert_eq!(int_tags, vec!["tag:evil,2024:", "tag:yaml.org,2002:"]);
}
#[test]
fn test_resolve_tag_uses_overridden_local_prefix() {
let mut parser = Parser::new_from_str("");
parser
.tags
.insert(String::new(), "tag:local.example,2024:".to_string());
let tag = parser
.resolve_tag(
Span::empty(Marker::new(0, 1, 0)),
&Cow::Borrowed(""),
Cow::Borrowed("!"),
)
.unwrap();
assert_eq!(tag.handle, "tag:local.example,2024:");
assert_eq!(tag.suffix, "!");
}
#[test]
fn test_load_after_peek_stream_start() {
#[derive(Default)]
struct Sink<'input> {
events: Vec<Event<'input>>,
}
impl<'input> EventReceiver<'input> for Sink<'input> {
fn on_event(&mut self, ev: Event<'input>) {
self.events.push(ev);
}
}
let mut parser = Parser::new_from_str("key: value\n");
let mut sink = Sink::default();
assert_eq!(parser.peek().unwrap().unwrap().0, Event::StreamStart);
parser.load(&mut sink, false).unwrap();
assert!(matches!(sink.events.first(), Some(Event::StreamStart)));
assert!(matches!(sink.events.get(1), Some(Event::DocumentStart(_))));
}
}