use crate::{
input::{str::StrInput, BorrowedInput},
scanner::{ScalarStyle, ScanError, Scanner, Span, Token, TokenType},
BufferedInput,
};
use alloc::{
borrow::Cow,
collections::{BTreeMap, BTreeSet},
string::{String, ToString},
vec::Vec,
};
use core::{
convert::Infallible,
fmt::{self, Display},
};
#[derive(Clone, Copy, PartialEq, Debug, Eq)]
enum State {
StreamStart,
ImplicitDocumentStart,
DocumentStart,
DocumentContent,
DocumentEnd,
BlockNode,
BlockSequenceFirstEntry,
BlockSequenceEntry,
IndentlessSequenceEntry,
BlockMappingFirstKey,
BlockMappingKey,
BlockMappingValue,
FlowSequenceFirstEntry,
FlowSequenceEntry,
FlowSequenceEntryMappingKey,
FlowSequenceEntryMappingValue,
FlowSequenceEntryMappingEnd,
FlowMappingFirstKey,
FlowMappingKey,
FlowMappingValue,
FlowMappingEmptyValue,
End,
}
#[derive(Clone, PartialEq, Debug, Eq)]
pub enum Event<'input> {
Nothing,
StreamStart,
StreamEnd,
DocumentStart(bool),
DocumentEnd,
Alias(
usize,
),
Scalar(
Cow<'input, str>,
ScalarStyle,
usize,
Option<Cow<'input, Tag>>,
),
SequenceStart(
usize,
Option<Cow<'input, Tag>>,
),
SequenceEnd,
MappingStart(
usize,
Option<Cow<'input, Tag>>,
),
MappingEnd,
}
#[derive(Clone, PartialEq, Debug, Eq, Ord, PartialOrd, Hash)]
pub struct Tag {
pub handle: String,
pub suffix: String,
}
impl Tag {
#[must_use]
pub fn is_yaml_core_schema(&self) -> bool {
self.handle == "tag:yaml.org,2002:"
}
}
impl Display for Tag {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
if self.handle == "!" {
write!(f, "!{}", self.suffix)
} else {
write!(f, "{}{}", self.handle, self.suffix)
}
}
}
impl<'input> Event<'input> {
fn empty_scalar() -> Self {
Event::Scalar("~".into(), ScalarStyle::Plain, 0, None)
}
fn empty_scalar_with_anchor(anchor: usize, tag: Option<Cow<'input, Tag>>) -> Self {
Event::Scalar(Cow::default(), ScalarStyle::Plain, anchor, tag)
}
}
#[derive(Debug)]
pub struct Parser<'input, T: BorrowedInput<'input>> {
scanner: Scanner<'input, T>,
states: Vec<State>,
state: State,
token: Option<Token<'input>>,
current: Option<(Event<'input>, Span)>,
pending_key_indent: Option<usize>,
anchors: BTreeMap<Cow<'input, str>, usize>,
anchor_id_count: usize,
tags: BTreeMap<String, String>,
stream_end_emitted: bool,
keep_tags: bool,
}
pub trait EventReceiver<'input> {
fn on_event(&mut self, ev: Event<'input>);
}
pub trait SpannedEventReceiver<'input> {
fn on_event(&mut self, ev: Event<'input>, span: Span);
}
impl<'input, R: EventReceiver<'input>> SpannedEventReceiver<'input> for R {
fn on_event(&mut self, ev: Event<'input>, _span: Span) {
self.on_event(ev);
}
}
pub trait TryEventReceiver<'input> {
type Error;
fn on_event(&mut self, ev: Event<'input>) -> Result<(), Self::Error>;
}
pub trait TrySpannedEventReceiver<'input> {
type Error;
fn on_event(&mut self, ev: Event<'input>, span: Span) -> Result<(), Self::Error>;
}
impl<'input, R: TryEventReceiver<'input>> TrySpannedEventReceiver<'input> for R {
type Error = R::Error;
fn on_event(&mut self, ev: Event<'input>, _span: Span) -> Result<(), Self::Error> {
TryEventReceiver::on_event(self, ev)
}
}
#[derive(Clone, PartialEq, Debug, Eq)]
pub enum TryLoadError<E> {
Scan(
ScanError,
),
Receiver(
E,
),
}
impl<E> From<ScanError> for TryLoadError<E> {
fn from(error: ScanError) -> Self {
Self::Scan(error)
}
}
impl<E: Display> Display for TryLoadError<E> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Scan(error) => write!(f, "parser error: {error}"),
Self::Receiver(error) => write!(f, "receiver error: {error}"),
}
}
}
impl<E> core::error::Error for TryLoadError<E>
where
E: core::error::Error + 'static,
{
fn source(&self) -> Option<&(dyn core::error::Error + 'static)> {
match self {
Self::Scan(error) => Some(error),
Self::Receiver(error) => Some(error),
}
}
}
fn try_emit<'input, R>(
recv: &mut R,
ev: Event<'input>,
span: Span,
) -> Result<(), TryLoadError<R::Error>>
where
R: TrySpannedEventReceiver<'input>,
{
recv.on_event(ev, span).map_err(TryLoadError::Receiver)
}
struct InfallibleSpannedReceiver<'receiver, R>(&'receiver mut R);
impl<'input, R: SpannedEventReceiver<'input>> TrySpannedEventReceiver<'input>
for InfallibleSpannedReceiver<'_, R>
{
type Error = Infallible;
fn on_event(&mut self, ev: Event<'input>, span: Span) -> Result<(), Self::Error> {
self.0.on_event(ev, span);
Ok(())
}
}
fn into_scan_result(result: Result<(), TryLoadError<Infallible>>) -> Result<(), ScanError> {
match result {
Ok(()) => Ok(()),
Err(TryLoadError::Scan(error)) => Err(error),
Err(TryLoadError::Receiver(error)) => match error {},
}
}
pub type ParseResult<'input> = Result<(Event<'input>, Span), ScanError>;
pub trait ParserTrait<'input> {
fn peek(&mut self) -> Option<Result<&(Event<'input>, Span), ScanError>>;
fn next_event(&mut self) -> Option<ParseResult<'input>>;
fn load<R: SpannedEventReceiver<'input>>(
&mut self,
recv: &mut R,
multi: bool,
) -> Result<(), ScanError>;
fn try_load<R: TrySpannedEventReceiver<'input>>(
&mut self,
recv: &mut R,
multi: bool,
) -> Result<(), TryLoadError<R::Error>> {
while let Some(res) = self.next_event() {
let (ev, span) = res?;
let is_doc_end = matches!(ev, Event::DocumentEnd);
let is_stream_end = matches!(ev, Event::StreamEnd);
try_emit(recv, ev, span)?;
if is_stream_end {
break;
}
if !multi && is_doc_end {
break;
}
}
Ok(())
}
}
impl<'input> Parser<'input, StrInput<'input>> {
#[must_use]
pub fn new_from_str(value: &'input str) -> Self {
debug_print!("\x1B[;31m>>>>>>>>>> New parser from str\x1B[;0m");
Parser::new(StrInput::new(value))
}
}
impl<T> Parser<'static, BufferedInput<T>>
where
T: Iterator<Item = char>,
{
#[must_use]
pub fn new_from_iter(iter: T) -> Self {
debug_print!("\x1B[;31m>>>>>>>>>> New parser from iter\x1B[;0m");
Parser::new(BufferedInput::new(iter))
}
}
impl<'input, T: BorrowedInput<'input>> Parser<'input, T> {
pub fn get_anchor_offset(&self) -> usize {
self.anchor_id_count
}
pub fn set_anchor_offset(&mut self, offset: usize) {
self.anchor_id_count = offset;
}
pub fn new(src: T) -> Self {
Parser {
scanner: Scanner::new(src),
states: Vec::new(),
state: State::StreamStart,
token: None,
current: None,
pending_key_indent: None,
anchors: BTreeMap::new(),
anchor_id_count: 1,
tags: BTreeMap::new(),
stream_end_emitted: false,
keep_tags: false,
}
}
#[must_use]
pub fn keep_tags(mut self, value: bool) -> Self {
self.keep_tags = value;
self
}
pub fn peek(&mut self) -> Option<Result<&(Event<'input>, Span), ScanError>> {
ParserTrait::peek(self)
}
pub fn next_event(&mut self) -> Option<ParseResult<'input>> {
ParserTrait::next_event(self)
}
fn next_event_impl<'a>(&mut self) -> ParseResult<'a>
where
'input: 'a,
{
match self.current.take() {
None => self.parse(),
Some(v) => Ok(v),
}
}
fn peek_token(&mut self) -> Result<&Token<'_>, ScanError> {
match self.token {
None => {
self.token = Some(self.scan_next_token()?);
Ok(self.token.as_ref().unwrap())
}
Some(ref tok) => Ok(tok),
}
}
fn scan_next_token(&mut self) -> Result<Token<'input>, ScanError> {
let token = self.scanner.next();
match token {
None => match self.scanner.get_error() {
None => Err(self.unexpected_eof()),
Some(e) => Err(e),
},
Some(tok) => Ok(tok),
}
}
#[cold]
fn unexpected_eof(&self) -> ScanError {
let info = match self.state {
State::FlowSequenceFirstEntry | State::FlowSequenceEntry => {
"unexpected EOF while parsing a flow sequence"
}
State::FlowMappingFirstKey
| State::FlowMappingKey
| State::FlowMappingValue
| State::FlowMappingEmptyValue => "unexpected EOF while parsing a flow mapping",
State::FlowSequenceEntryMappingKey
| State::FlowSequenceEntryMappingValue
| State::FlowSequenceEntryMappingEnd => {
"unexpected EOF while parsing an implicit flow mapping"
}
State::BlockSequenceFirstEntry | State::BlockSequenceEntry => {
"unexpected EOF while parsing a block sequence"
}
State::BlockMappingFirstKey | State::BlockMappingKey | State::BlockMappingValue => {
"unexpected EOF while parsing a block mapping"
}
_ => "unexpected eof",
};
ScanError::new_str(self.scanner.mark(), info)
}
fn fetch_token<'a>(&mut self) -> Token<'a>
where
'input: 'a,
{
self.token
.take()
.expect("fetch_token needs to be preceded by peek_token")
}
fn skip(&mut self) {
self.token = None;
}
fn pop_state(&mut self) {
self.state = self.states.pop().unwrap();
}
fn push_state(&mut self, state: State) {
self.states.push(state);
}
fn parse<'a>(&mut self) -> ParseResult<'a>
where
'input: 'a,
{
if self.state == State::End {
return Ok((Event::StreamEnd, Span::empty(self.scanner.mark())));
}
let (ev, span) = self.state_machine()?;
if let Some(indent) = self.pending_key_indent.take() {
Ok((ev, span.with_indent(Some(indent))))
} else {
Ok((ev, span))
}
}
pub fn load<R: SpannedEventReceiver<'input>>(
&mut self,
recv: &mut R,
multi: bool,
) -> Result<(), ScanError> {
ParserTrait::load(self, recv, multi)
}
pub fn try_load<R: TrySpannedEventReceiver<'input>>(
&mut self,
recv: &mut R,
multi: bool,
) -> Result<(), TryLoadError<R::Error>> {
ParserTrait::try_load(self, recv, multi)
}
fn try_load_document<R: TrySpannedEventReceiver<'input>>(
&mut self,
first_ev: Event<'input>,
span: Span,
recv: &mut R,
) -> Result<(), TryLoadError<R::Error>> {
if !matches!(first_ev, Event::DocumentStart(_)) {
return Err(TryLoadError::Scan(ScanError::new_str(
span.start,
"did not find expected <document-start>",
)));
}
try_emit(recv, first_ev, span)?;
let (ev, span) = self.next_event_impl()?;
self.try_load_node(ev, span, recv)?;
let (ev, mark) = self.next_event_impl()?;
assert_eq!(ev, Event::DocumentEnd);
try_emit(recv, ev, mark)?;
Ok(())
}
fn try_load_node<R: TrySpannedEventReceiver<'input>>(
&mut self,
first_ev: Event<'input>,
span: Span,
recv: &mut R,
) -> Result<(), TryLoadError<R::Error>> {
match first_ev {
Event::Alias(..) | Event::Scalar(..) => try_emit(recv, first_ev, span),
Event::SequenceStart(..) => {
try_emit(recv, first_ev, span)?;
self.try_load_sequence(recv)
}
Event::MappingStart(..) => {
try_emit(recv, first_ev, span)?;
self.try_load_mapping(recv)
}
_ => {
#[cfg(feature = "debug_prints")]
std::println!("UNREACHABLE EVENT: {first_ev:?}");
unreachable!();
}
}
}
fn try_load_mapping<R: TrySpannedEventReceiver<'input>>(
&mut self,
recv: &mut R,
) -> Result<(), TryLoadError<R::Error>> {
let (mut key_ev, mut key_mark) = self.next_event_impl()?;
while key_ev != Event::MappingEnd {
self.try_load_node(key_ev, key_mark, recv)?;
let (ev, mark) = self.next_event_impl()?;
self.try_load_node(ev, mark, recv)?;
let (ev, mark) = self.next_event_impl()?;
key_ev = ev;
key_mark = mark;
}
try_emit(recv, key_ev, key_mark)?;
Ok(())
}
fn try_load_sequence<R: TrySpannedEventReceiver<'input>>(
&mut self,
recv: &mut R,
) -> Result<(), TryLoadError<R::Error>> {
let (mut ev, mut mark) = self.next_event_impl()?;
while ev != Event::SequenceEnd {
self.try_load_node(ev, mark, recv)?;
let (next_ev, next_mark) = self.next_event_impl()?;
ev = next_ev;
mark = next_mark;
}
try_emit(recv, ev, mark)?;
Ok(())
}
fn state_machine<'a>(&mut self) -> ParseResult<'a>
where
'input: 'a,
{
debug_print!("\n\x1B[;33mParser state: {:?} \x1B[;0m", self.state);
match self.state {
State::StreamStart => self.stream_start(),
State::ImplicitDocumentStart => self.document_start(true),
State::DocumentStart => self.document_start(false),
State::DocumentContent => self.document_content(),
State::DocumentEnd => self.document_end(),
State::BlockNode => self.parse_node(true, false),
State::BlockMappingFirstKey => self.block_mapping_key(true),
State::BlockMappingKey => self.block_mapping_key(false),
State::BlockMappingValue => self.block_mapping_value(),
State::BlockSequenceFirstEntry => self.block_sequence_entry(true),
State::BlockSequenceEntry => self.block_sequence_entry(false),
State::FlowSequenceFirstEntry => self.flow_sequence_entry(true),
State::FlowSequenceEntry => self.flow_sequence_entry(false),
State::FlowMappingFirstKey => self.flow_mapping_key(true),
State::FlowMappingKey => self.flow_mapping_key(false),
State::FlowMappingValue => self.flow_mapping_value(false),
State::IndentlessSequenceEntry => self.indentless_sequence_entry(),
State::FlowSequenceEntryMappingKey => self.flow_sequence_entry_mapping_key(),
State::FlowSequenceEntryMappingValue => self.flow_sequence_entry_mapping_value(),
State::FlowSequenceEntryMappingEnd => self.flow_sequence_entry_mapping_end(),
State::FlowMappingEmptyValue => self.flow_mapping_value(true),
State::End => unreachable!(),
}
}
fn stream_start<'a>(&mut self) -> ParseResult<'a>
where
'input: 'a,
{
match *self.peek_token()? {
Token(span, TokenType::StreamStart(_)) => {
self.state = State::ImplicitDocumentStart;
self.skip();
Ok((Event::StreamStart, span))
}
Token(span, _) => Err(ScanError::new_str(
span.start,
"did not find expected <stream-start>",
)),
}
}
fn document_start<'a>(&mut self, implicit: bool) -> ParseResult<'a>
where
'input: 'a,
{
while let TokenType::DocumentEnd = self.peek_token()?.1 {
self.skip();
}
self.anchors.clear();
match *self.peek_token()? {
Token(span, TokenType::StreamEnd) => {
self.state = State::End;
self.skip();
Ok((Event::StreamEnd, span))
}
Token(
_,
TokenType::VersionDirective(..)
| TokenType::TagDirective(..)
| TokenType::ReservedDirective(..)
| TokenType::DocumentStart,
) => {
self.explicit_document_start()
}
Token(span, _) if implicit => {
self.parser_process_directives()?;
self.push_state(State::DocumentEnd);
self.state = State::BlockNode;
Ok((Event::DocumentStart(false), span))
}
_ => {
self.explicit_document_start()
}
}
}
fn parser_process_directives(&mut self) -> Result<(), ScanError> {
let mut version_directive_received = false;
let mut tags = if self.keep_tags {
self.tags.clone()
} else {
BTreeMap::new()
};
let mut document_tag_handles = BTreeSet::new();
loop {
match self.peek_token()? {
Token(span, TokenType::VersionDirective(_, _)) => {
if version_directive_received {
return Err(ScanError::new_str(
span.start,
"duplicate version directive",
));
}
version_directive_received = true;
}
Token(mark, TokenType::TagDirective(handle, prefix)) => {
if !document_tag_handles.insert(handle.to_string()) {
return Err(ScanError::new_str(mark.start, "the TAG directive must only be given at most once per handle in the same document"));
}
tags.insert(handle.to_string(), prefix.to_string());
}
Token(_, TokenType::ReservedDirective(_, _)) => {
}
_ => break,
}
self.skip();
}
self.tags = tags;
Ok(())
}
fn explicit_document_start<'a>(&mut self) -> ParseResult<'a>
where
'input: 'a,
{
self.parser_process_directives()?;
match *self.peek_token()? {
Token(mark, TokenType::DocumentStart) => {
self.push_state(State::DocumentEnd);
self.state = State::DocumentContent;
self.skip();
Ok((Event::DocumentStart(true), mark))
}
Token(span, _) => Err(ScanError::new_str(
span.start,
"did not find expected <document start>",
)),
}
}
fn document_content<'a>(&mut self) -> ParseResult<'a>
where
'input: 'a,
{
match *self.peek_token()? {
Token(
mark,
TokenType::VersionDirective(..)
| TokenType::TagDirective(..)
| TokenType::ReservedDirective(..)
| TokenType::DocumentStart
| TokenType::DocumentEnd
| TokenType::StreamEnd,
) => {
self.pop_state();
Ok((Event::empty_scalar(), mark))
}
_ => self.parse_node(true, false),
}
}
fn document_end<'a>(&mut self) -> ParseResult<'a>
where
'input: 'a,
{
let mut explicit_end = false;
let span: Span = match *self.peek_token()? {
Token(span, TokenType::DocumentEnd) => {
explicit_end = true;
self.skip();
span
}
Token(span, _) => span,
};
if self.keep_tags {
self.tags.remove("!!");
self.tags.remove("");
} else {
self.tags.clear();
}
if explicit_end {
self.state = State::ImplicitDocumentStart;
} else {
if let Token(
span,
TokenType::VersionDirective(..)
| TokenType::TagDirective(..)
| TokenType::ReservedDirective(..),
) = *self.peek_token()?
{
return Err(ScanError::new_str(
span.start,
"missing explicit document end marker before directive",
));
}
self.state = State::DocumentStart;
}
Ok((Event::DocumentEnd, span))
}
fn register_anchor(&mut self, name: Cow<'input, str>, mark: &Span) -> Result<usize, ScanError> {
let new_id = self.anchor_id_count;
self.anchor_id_count = self.anchor_id_count.checked_add(1).ok_or_else(|| {
ScanError::new_str(
mark.start,
"while parsing anchor, anchor count exceeded supported limit",
)
})?;
self.anchors.insert(name, new_id);
Ok(new_id)
}
#[allow(clippy::too_many_lines)]
fn parse_node<'a>(&mut self, block: bool, indentless_sequence: bool) -> ParseResult<'a>
where
'input: 'a,
{
let mut anchor_id = 0;
let mut tag = None;
match *self.peek_token()? {
Token(_, TokenType::Alias(_)) => {
self.pop_state();
if let Token(span, TokenType::Alias(name)) = self.fetch_token() {
match self.anchors.get(&*name) {
None => {
return Err(ScanError::new_str(
span.start,
"while parsing node, found unknown anchor",
))
}
Some(id) => return Ok((Event::Alias(*id), span)),
}
}
unreachable!()
}
Token(_, TokenType::Anchor(_)) => {
if let Token(span, TokenType::Anchor(name)) = self.fetch_token() {
anchor_id = self.register_anchor(name, &span)?;
if let TokenType::Tag(..) = self.peek_token()?.1 {
if let TokenType::Tag(handle, suffix) = self.fetch_token().1 {
tag = Some(self.resolve_tag(span, &handle, suffix)?);
} else {
unreachable!()
}
}
} else {
unreachable!()
}
}
Token(mark, TokenType::Tag(..)) => {
if let TokenType::Tag(handle, suffix) = self.fetch_token().1 {
tag = Some(self.resolve_tag(mark, &handle, suffix)?);
if let TokenType::Anchor(_) = &self.peek_token()?.1 {
if let Token(mark, TokenType::Anchor(name)) = self.fetch_token() {
anchor_id = self.register_anchor(name, &mark)?;
} else {
unreachable!()
}
}
} else {
unreachable!()
}
}
_ => {}
}
match *self.peek_token()? {
Token(mark, TokenType::BlockEntry) if indentless_sequence => {
self.state = State::IndentlessSequenceEntry;
Ok((Event::SequenceStart(anchor_id, tag), mark))
}
Token(_, TokenType::Scalar(..)) => {
self.pop_state();
if let Token(mark, TokenType::Scalar(style, v)) = self.fetch_token() {
Ok((Event::Scalar(v, style, anchor_id, tag), mark))
} else {
unreachable!()
}
}
Token(mark, TokenType::FlowSequenceStart) => {
self.state = State::FlowSequenceFirstEntry;
Ok((Event::SequenceStart(anchor_id, tag), mark))
}
Token(mark, TokenType::FlowMappingStart) => {
self.state = State::FlowMappingFirstKey;
Ok((Event::MappingStart(anchor_id, tag), mark))
}
Token(mark, TokenType::BlockSequenceStart) if block => {
self.state = State::BlockSequenceFirstEntry;
Ok((Event::SequenceStart(anchor_id, tag), mark))
}
Token(mark, TokenType::BlockMappingStart) if block => {
self.state = State::BlockMappingFirstKey;
Ok((Event::MappingStart(anchor_id, tag), mark))
}
Token(mark, _) if tag.is_some() || anchor_id > 0 => {
self.pop_state();
Ok((Event::empty_scalar_with_anchor(anchor_id, tag), mark))
}
Token(span, _) => {
let info = match self.state {
State::FlowSequenceFirstEntry | State::FlowSequenceEntry => {
"unexpected EOF while parsing a flow sequence"
}
State::FlowMappingFirstKey
| State::FlowMappingKey
| State::FlowMappingValue
| State::FlowMappingEmptyValue => "unexpected EOF while parsing a flow mapping",
State::FlowSequenceEntryMappingKey
| State::FlowSequenceEntryMappingValue
| State::FlowSequenceEntryMappingEnd => {
"unexpected EOF while parsing an implicit flow mapping"
}
State::BlockSequenceFirstEntry | State::BlockSequenceEntry => {
"unexpected EOF while parsing a block sequence"
}
State::BlockMappingFirstKey
| State::BlockMappingKey
| State::BlockMappingValue => "unexpected EOF while parsing a block mapping",
_ => "while parsing a node, did not find expected node content",
};
Err(ScanError::new_str(span.start, info))
}
}
}
fn block_mapping_key<'a>(&mut self, first: bool) -> ParseResult<'a>
where
'input: 'a,
{
if first {
let _ = self.peek_token()?;
self.skip();
}
match *self.peek_token()? {
Token(_, TokenType::Key) => {
if let Token(key_span, TokenType::Key) = *self.peek_token()? {
self.pending_key_indent = Some(key_span.start.col());
}
self.skip();
if let Token(mark, TokenType::Key | TokenType::Value | TokenType::BlockEnd) =
*self.peek_token()?
{
self.state = State::BlockMappingValue;
Ok((Event::empty_scalar(), mark))
} else {
self.push_state(State::BlockMappingValue);
self.parse_node(true, true)
}
}
Token(mark, TokenType::Value) => {
self.state = State::BlockMappingValue;
Ok((Event::empty_scalar(), mark))
}
Token(mark, TokenType::BlockEnd) => {
self.pop_state();
self.skip();
Ok((Event::MappingEnd, mark))
}
Token(span, _) => Err(ScanError::new_str(
span.start,
"while parsing a block mapping, did not find expected key",
)),
}
}
fn block_mapping_value<'a>(&mut self) -> ParseResult<'a>
where
'input: 'a,
{
match *self.peek_token()? {
Token(mark, TokenType::Value) => {
self.skip();
if let Token(_, TokenType::Key | TokenType::Value | TokenType::BlockEnd) =
*self.peek_token()?
{
self.state = State::BlockMappingKey;
Ok((Event::empty_scalar(), mark))
} else {
self.push_state(State::BlockMappingKey);
self.parse_node(true, true)
}
}
Token(mark, _) => {
self.state = State::BlockMappingKey;
Ok((Event::empty_scalar(), mark))
}
}
}
fn flow_mapping_key<'a>(&mut self, first: bool) -> ParseResult<'a>
where
'input: 'a,
{
if first {
let _ = self.peek_token()?;
self.skip();
}
let span: Span = if let Token(mark, TokenType::FlowMappingEnd) = *self.peek_token()? {
mark
} else {
if !first {
match *self.peek_token()? {
Token(_, TokenType::FlowEntry) => self.skip(),
Token(span, _) => {
return Err(ScanError::new_str(
span.start,
"while parsing a flow mapping, did not find expected ',' or '}'",
))
}
}
}
match *self.peek_token()? {
Token(_, TokenType::Key) => {
self.skip();
if let Token(
mark,
TokenType::Value | TokenType::FlowEntry | TokenType::FlowMappingEnd,
) = *self.peek_token()?
{
self.state = State::FlowMappingValue;
return Ok((Event::empty_scalar(), mark));
}
self.push_state(State::FlowMappingValue);
return self.parse_node(false, false);
}
Token(marker, TokenType::Value) => {
self.state = State::FlowMappingValue;
return Ok((Event::empty_scalar(), marker));
}
Token(_, TokenType::FlowMappingEnd) => (),
_ => {
self.push_state(State::FlowMappingEmptyValue);
return self.parse_node(false, false);
}
}
self.peek_token()?.0
};
self.pop_state();
self.skip();
Ok((Event::MappingEnd, span))
}
fn flow_mapping_value<'a>(&mut self, empty: bool) -> ParseResult<'a>
where
'input: 'a,
{
let span: Span = {
if empty {
let Token(mark, _) = *self.peek_token()?;
self.state = State::FlowMappingKey;
return Ok((Event::empty_scalar(), mark));
}
match *self.peek_token()? {
Token(span, TokenType::Value) => {
self.skip();
match self.peek_token()?.1 {
TokenType::FlowEntry | TokenType::FlowMappingEnd => {}
_ => {
self.push_state(State::FlowMappingKey);
return self.parse_node(false, false);
}
}
span
}
Token(marker, _) => marker,
}
};
self.state = State::FlowMappingKey;
Ok((Event::empty_scalar(), span))
}
fn flow_sequence_entry<'a>(&mut self, first: bool) -> ParseResult<'a>
where
'input: 'a,
{
if first {
let _ = self.peek_token()?;
self.skip();
}
match *self.peek_token()? {
Token(mark, TokenType::FlowSequenceEnd) => {
self.pop_state();
self.skip();
return Ok((Event::SequenceEnd, mark));
}
Token(_, TokenType::FlowEntry) if !first => {
self.skip();
}
Token(span, _) if !first => {
return Err(ScanError::new_str(
span.start,
"while parsing a flow sequence, expected ',' or ']'",
));
}
_ => { }
}
match *self.peek_token()? {
Token(mark, TokenType::FlowSequenceEnd) => {
self.pop_state();
self.skip();
Ok((Event::SequenceEnd, mark))
}
Token(mark, TokenType::Key) => {
self.state = State::FlowSequenceEntryMappingKey;
self.skip();
Ok((Event::MappingStart(0, None), mark))
}
_ => {
self.push_state(State::FlowSequenceEntry);
self.parse_node(false, false)
}
}
}
fn indentless_sequence_entry<'a>(&mut self) -> ParseResult<'a>
where
'input: 'a,
{
match *self.peek_token()? {
Token(mark, TokenType::BlockEntry) => {
self.skip();
if let Token(
_,
TokenType::BlockEntry | TokenType::Key | TokenType::Value | TokenType::BlockEnd,
) = *self.peek_token()?
{
self.state = State::IndentlessSequenceEntry;
Ok((Event::empty_scalar(), mark))
} else {
self.push_state(State::IndentlessSequenceEntry);
self.parse_node(true, false)
}
}
Token(mark, _) => {
self.pop_state();
Ok((Event::SequenceEnd, mark))
}
}
}
fn block_sequence_entry<'a>(&mut self, first: bool) -> ParseResult<'a>
where
'input: 'a,
{
if first {
let _ = self.peek_token()?;
self.skip();
}
match *self.peek_token()? {
Token(mark, TokenType::BlockEnd) => {
self.pop_state();
self.skip();
Ok((Event::SequenceEnd, mark))
}
Token(mark, TokenType::BlockEntry) => {
self.skip();
if let Token(_, TokenType::BlockEntry | TokenType::BlockEnd) = *self.peek_token()? {
self.state = State::BlockSequenceEntry;
Ok((Event::empty_scalar(), mark))
} else {
self.push_state(State::BlockSequenceEntry);
self.parse_node(true, false)
}
}
Token(span, _) => Err(ScanError::new_str(
span.start,
"while parsing a block collection, did not find expected '-' indicator",
)),
}
}
fn flow_sequence_entry_mapping_key<'a>(&mut self) -> ParseResult<'a>
where
'input: 'a,
{
if let Token(mark, TokenType::FlowEntry | TokenType::FlowSequenceEnd) =
*self.peek_token()?
{
self.state = State::FlowSequenceEntryMappingValue;
Ok((Event::empty_scalar(), mark))
} else {
self.push_state(State::FlowSequenceEntryMappingValue);
self.parse_node(false, false)
}
}
fn flow_sequence_entry_mapping_value<'a>(&mut self) -> ParseResult<'a>
where
'input: 'a,
{
match *self.peek_token()? {
Token(_, TokenType::Value) => {
self.skip();
self.state = State::FlowSequenceEntryMappingValue;
let Token(span, ref tok) = *self.peek_token()?;
if matches!(tok, TokenType::FlowEntry | TokenType::FlowSequenceEnd) {
self.state = State::FlowSequenceEntryMappingEnd;
Ok((Event::empty_scalar(), Span::empty(span.start)))
} else {
self.push_state(State::FlowSequenceEntryMappingEnd);
self.parse_node(false, false)
}
}
Token(mark, _) => {
self.state = State::FlowSequenceEntryMappingEnd;
Ok((Event::empty_scalar(), mark))
}
}
}
#[allow(clippy::unnecessary_wraps)]
fn flow_sequence_entry_mapping_end<'a>(&mut self) -> ParseResult<'a>
where
'input: 'a,
{
self.state = State::FlowSequenceEntry;
let Token(span, _) = *self.peek_token()?;
Ok((Event::MappingEnd, Span::empty(span.start)))
}
fn resolve_tag(
&self,
span: Span,
handle: &Cow<'input, str>,
suffix: Cow<'input, str>,
) -> Result<Cow<'input, Tag>, ScanError> {
let suffix = suffix.into_owned();
let tag = if handle == "!!" {
Tag {
handle: self
.tags
.get("!!")
.map_or_else(|| "tag:yaml.org,2002:".to_string(), ToString::to_string),
suffix,
}
} else if handle.is_empty() && suffix == "!" {
match self.tags.get("") {
Some(prefix) => Tag {
handle: prefix.clone(),
suffix,
},
None => Tag {
handle: String::new(),
suffix,
},
}
} else {
let prefix = self.tags.get(&**handle);
if let Some(prefix) = prefix {
Tag {
handle: prefix.clone(),
suffix,
}
} else {
if handle.len() >= 2 && handle.starts_with('!') && handle.ends_with('!') {
return Err(ScanError::new_str(span.start, "the handle wasn't declared"));
}
Tag {
handle: handle.to_string(),
suffix,
}
}
};
Ok(Cow::Owned(tag))
}
}
impl<'input, T: BorrowedInput<'input>> ParserTrait<'input> for Parser<'input, T> {
fn peek(&mut self) -> Option<Result<&(Event<'input>, Span), ScanError>> {
if let Some(ref x) = self.current {
Some(Ok(x))
} else {
if self.stream_end_emitted {
return None;
}
match self.next_event_impl() {
Ok(token) => self.current = Some(token),
Err(e) => return Some(Err(e)),
}
self.current.as_ref().map(Ok)
}
}
fn next_event(&mut self) -> Option<ParseResult<'input>> {
if self.stream_end_emitted {
return None;
}
let tok = self.next_event_impl();
if matches!(tok, Ok((Event::StreamEnd, _))) {
self.stream_end_emitted = true;
}
Some(tok)
}
fn load<R: SpannedEventReceiver<'input>>(
&mut self,
recv: &mut R,
multi: bool,
) -> Result<(), ScanError> {
let mut recv = InfallibleSpannedReceiver(recv);
into_scan_result(ParserTrait::try_load(self, &mut recv, multi))
}
fn try_load<R: TrySpannedEventReceiver<'input>>(
&mut self,
recv: &mut R,
multi: bool,
) -> Result<(), TryLoadError<R::Error>> {
let stream_start_buffered = matches!(self.current.as_ref(), Some((Event::StreamStart, _)));
if !self.scanner.stream_started() || stream_start_buffered {
let (ev, span) = self.next_event_impl()?;
if ev != Event::StreamStart {
return Err(TryLoadError::Scan(ScanError::new_str(
span.start,
"did not find expected <stream-start>",
)));
}
try_emit(recv, ev, span)?;
}
if self.scanner.stream_ended() {
try_emit(recv, Event::StreamEnd, Span::empty(self.scanner.mark()))?;
return Ok(());
}
loop {
let (ev, span) = self.next_event_impl()?;
if ev == Event::StreamEnd {
try_emit(recv, ev, span)?;
return Ok(());
}
self.anchors.clear();
self.try_load_document(ev, span, recv)?;
if !multi {
break;
}
}
Ok(())
}
}
impl<'input, T: BorrowedInput<'input>> Iterator for Parser<'input, T> {
type Item = Result<(Event<'input>, Span), ScanError>;
fn next(&mut self) -> Option<Self::Item> {
self.next_event()
}
}
#[cfg(test)]
mod test {
use alloc::{
borrow::ToOwned,
string::{String, ToString},
vec::Vec,
};
use crate::scanner::{ScalarStyle, Span};
use super::{
Event, EventReceiver, Parser, Tag, TryEventReceiver, TryLoadError, TrySpannedEventReceiver,
};
#[derive(Default)]
struct CollectingSink<'input> {
events: Vec<Event<'input>>,
}
impl<'input> EventReceiver<'input> for CollectingSink<'input> {
fn on_event(&mut self, ev: Event<'input>) {
self.events.push(ev);
}
}
fn first_error_info(input: &str) -> String {
for event in Parser::new_from_str(input) {
if let Err(err) = event {
return err.info().to_owned();
}
}
panic!("expected parser error")
}
#[test]
fn display_resolved_core_tag_without_extra_bang() {
let tag = Tag {
handle: "tag:yaml.org,2002:".to_owned(),
suffix: "str".to_owned(),
};
assert_eq!(tag.to_string(), "tag:yaml.org,2002:str");
}
#[test]
fn tag_helpers_distinguish_core_and_local_tags() {
let core = Tag {
handle: "tag:yaml.org,2002:".to_owned(),
suffix: "int".to_owned(),
};
let local = Tag {
handle: "!".to_owned(),
suffix: "thing".to_owned(),
};
assert!(core.is_yaml_core_schema());
assert!(!local.is_yaml_core_schema());
assert_eq!(local.to_string(), "!thing");
}
#[test]
fn test_peek_eq_parse() {
let s = "
a0 bb: val
a1: &x
b1: 4
b2: d
a2: 4
a3: [1, 2, 3]
a4:
- [a1, a2]
- 2
a5: *x
";
let mut p = Parser::new_from_str(s);
loop {
let event_peek = p.peek().unwrap().unwrap().clone();
let event = p.next_event().unwrap().unwrap();
assert_eq!(event, event_peek);
if event.0 == Event::StreamEnd {
break;
}
}
}
#[test]
fn test_peek_and_next_return_none_after_stream_end() {
let mut parser = Parser::new_from_str("");
assert!(matches!(
parser.next_event().unwrap().unwrap().0,
Event::StreamStart
));
assert!(matches!(
parser.next_event().unwrap().unwrap().0,
Event::StreamEnd
));
assert!(parser.next_event().is_none());
assert!(parser.peek().is_none());
}
#[test]
fn test_load_after_stream_already_ended_emits_stream_end() {
let mut parser = Parser::new_from_str("");
while parser.next_event().is_some() {}
let mut sink = CollectingSink::default();
parser.load(&mut sink, true).unwrap();
assert_eq!(sink.events, vec![Event::StreamEnd]);
}
#[test]
fn test_load_visits_nested_collection_events() {
let mut parser = Parser::new_from_str("root:\n - item: value\n - [a, b]\n");
let mut sink = CollectingSink::default();
parser.load(&mut sink, true).unwrap();
assert_eq!(
sink.events,
vec![
Event::StreamStart,
Event::DocumentStart(false),
Event::MappingStart(0, None),
Event::Scalar("root".into(), ScalarStyle::Plain, 0, None),
Event::SequenceStart(0, None),
Event::MappingStart(0, None),
Event::Scalar("item".into(), ScalarStyle::Plain, 0, None),
Event::Scalar("value".into(), ScalarStyle::Plain, 0, None),
Event::MappingEnd,
Event::SequenceStart(0, None),
Event::Scalar("a".into(), ScalarStyle::Plain, 0, None),
Event::Scalar("b".into(), ScalarStyle::Plain, 0, None),
Event::SequenceEnd,
Event::SequenceEnd,
Event::MappingEnd,
Event::DocumentEnd,
Event::StreamEnd,
]
);
}
#[derive(Clone, Debug, PartialEq, Eq)]
enum ValidationError {
ForbiddenValue,
}
struct FailingSink<'input> {
events: Vec<Event<'input>>,
}
impl<'input> TryEventReceiver<'input> for FailingSink<'input> {
type Error = ValidationError;
fn on_event(&mut self, ev: Event<'input>) -> Result<(), Self::Error> {
let should_fail = matches!(&ev, Event::Scalar(value, ..) if value.as_ref() == "bad");
self.events.push(ev);
if should_fail {
Err(ValidationError::ForbiddenValue)
} else {
Ok(())
}
}
}
#[test]
fn test_try_load_stops_on_receiver_error() {
let mut parser = Parser::new_from_str("ok: bad\nafter: value\n");
let mut sink = FailingSink { events: Vec::new() };
let err = parser.try_load(&mut sink, true).unwrap_err();
assert_eq!(err, TryLoadError::Receiver(ValidationError::ForbiddenValue));
assert!(sink
.events
.iter()
.any(|event| matches!(event, Event::Scalar(value, ..) if value == "ok")));
assert!(sink
.events
.iter()
.any(|event| matches!(event, Event::Scalar(value, ..) if value == "bad")));
assert!(!sink
.events
.iter()
.any(|event| matches!(event, Event::Scalar(value, ..) if value == "after")));
}
struct SpannedFailingSink {
failed_span: Option<Span>,
}
impl<'input> TrySpannedEventReceiver<'input> for SpannedFailingSink {
type Error = Span;
fn on_event(&mut self, ev: Event<'input>, span: Span) -> Result<(), Self::Error> {
if matches!(ev, Event::Scalar(value, ..) if value.as_ref() == "bad") {
self.failed_span = Some(span);
Err(span)
} else {
Ok(())
}
}
}
#[test]
fn test_try_load_spanned_receiver_gets_span() {
let mut parser = Parser::new_from_str("value: bad\n");
let mut sink = SpannedFailingSink { failed_span: None };
let err = parser.try_load(&mut sink, false).unwrap_err();
let TryLoadError::Receiver(span) = err else {
panic!("expected receiver error");
};
assert_eq!(Some(span), sink.failed_span);
assert!(!span.is_empty());
}
struct NeverFails {
count: usize,
}
impl<'input> TryEventReceiver<'input> for NeverFails {
type Error = ValidationError;
fn on_event(&mut self, _ev: Event<'input>) -> Result<(), Self::Error> {
self.count += 1;
Ok(())
}
}
#[test]
fn test_try_load_returns_scan_error() {
let mut parser = Parser::new_from_str("%YAML 1.2\n%YAML 1.2\n---\n");
let mut sink = NeverFails { count: 0 };
let err = parser.try_load(&mut sink, true).unwrap_err();
let TryLoadError::Scan(err) = err else {
panic!("expected scan error");
};
assert_eq!(err.info(), "duplicate version directive");
}
#[test]
fn test_try_load_after_stream_already_ended_emits_stream_end() {
let mut parser = Parser::new_from_str("");
while parser.next_event().is_some() {}
let mut sink = FailingSink { events: Vec::new() };
parser.try_load(&mut sink, true).unwrap();
assert_eq!(sink.events, vec![Event::StreamEnd]);
}
#[test]
fn test_load_single_document_stops_before_next_document() {
let mut parser = Parser::new_from_str("a: 1\n---\nb: 2\n");
let mut sink = CollectingSink::default();
parser.load(&mut sink, false).unwrap();
assert!(sink
.events
.iter()
.any(|event| matches!(event, Event::Scalar(value, ..) if value == "a")));
assert!(!sink
.events
.iter()
.any(|event| matches!(event, Event::Scalar(value, ..) if value == "b")));
assert!(matches!(sink.events.last(), Some(Event::DocumentEnd)));
}
#[test]
fn test_duplicate_version_directive_errors() {
assert_eq!(
first_error_info("%YAML 1.2\n%YAML 1.2\n---\n"),
"duplicate version directive"
);
}
#[test]
fn test_duplicate_tag_directive_errors() {
assert_eq!(
first_error_info("%TAG !t! tag:test,2024:\n%TAG !t! tag:other,2024:\n---\n"),
"the TAG directive must only be given at most once per handle in the same document"
);
}
#[test]
fn test_directive_after_implicit_document_requires_explicit_end() {
assert_eq!(
first_error_info("---\nkey: value\n%YAML 1.2\n---\n"),
"missing explicit document end marker before directive"
);
}
#[test]
fn test_anchor_offset_overflow_reports_error() {
let mut parser = Parser::new_from_str("&a value");
parser.set_anchor_offset(usize::MAX);
let err = parser
.find_map(Result::err)
.expect("anchor registration should overflow");
assert_eq!(
err.info(),
"while parsing anchor, anchor count exceeded supported limit"
);
}
#[test]
fn test_alias_resolves_to_registered_anchor_id() {
let events = Parser::new_from_str("- &a value\n- *a\n")
.map(|event| event.unwrap().0)
.collect::<Vec<_>>();
assert!(events.iter().any(|event| matches!(event, Event::Alias(1))));
}
#[test]
fn test_anchor_then_tag_applies_both_to_scalar() {
let events = Parser::new_from_str("&a !!str value")
.map(|event| event.unwrap().0)
.collect::<Vec<_>>();
let Some(Event::Scalar(value, _, anchor_id, Some(tag))) = events
.iter()
.find(|event| matches!(event, Event::Scalar(value, ..) if value == "value"))
else {
panic!("expected tagged anchored scalar");
};
assert_eq!(value, "value");
assert_eq!(*anchor_id, 1);
assert_eq!(tag.handle, "tag:yaml.org,2002:");
assert_eq!(tag.suffix, "str");
}
#[test]
fn test_tag_then_anchor_applies_both_to_scalar() {
let events = Parser::new_from_str("!!str &a value")
.map(|event| event.unwrap().0)
.collect::<Vec<_>>();
let Some(Event::Scalar(value, _, anchor_id, Some(tag))) = events
.iter()
.find(|event| matches!(event, Event::Scalar(value, ..) if value == "value"))
else {
panic!("expected tagged anchored scalar");
};
assert_eq!(value, "value");
assert_eq!(*anchor_id, 1);
assert_eq!(tag.handle, "tag:yaml.org,2002:");
assert_eq!(tag.suffix, "str");
}
#[test]
fn test_multiple_tag_directives_are_kept_within_document() {
let text = r"
%TAG !a! tag:a,2024:
%TAG !b! tag:b,2024:
---
first: !a!x foo
second: !b!y bar
";
let mut seen_a = false;
let mut seen_b = false;
for event in Parser::new_from_str(text) {
let (event, _) = event.unwrap();
if let Event::Scalar(_, _, _, Some(tag)) = event {
if tag.handle == "tag:a,2024:" {
seen_a = true;
} else if tag.handle == "tag:b,2024:" {
seen_b = true;
}
}
}
assert!(seen_a);
assert!(seen_b);
}
#[test]
fn test_tags_are_cleared_when_next_document_has_no_directives() {
let text = r"
%TAG !t! tag:test,2024:
--- !t!1
foo
--- !t!2
bar
";
let mut parser = Parser::new_from_str(text);
for event in parser.by_ref() {
let (event, _) = event.unwrap();
if let Event::DocumentEnd = event {
break;
}
}
match parser.next().unwrap().unwrap().0 {
Event::DocumentStart(true) => {}
_ => panic!("expected explicit second document start"),
}
let err = parser.next().unwrap().unwrap_err();
assert!(format!("{err}").contains("the handle wasn't declared"));
}
#[test]
fn test_pull_parser_clears_anchors_between_documents() {
let mut parser = Parser::new_from_str(
"--- &a value
--- *a
",
);
for event in parser.by_ref() {
let (event, _) = event.unwrap();
if matches!(event, Event::DocumentEnd) {
break;
}
}
match parser.next().unwrap().unwrap().0 {
Event::DocumentStart(true) => {}
_ => panic!("expected explicit second document start"),
}
let err = parser.next().unwrap().unwrap_err();
assert!(format!("{err}").contains("unknown anchor"));
}
#[test]
fn test_keep_tags_across_multiple_documents() {
let text = r#"
%YAML 1.1
%TAG !t! tag:test,2024:
--- !t!1 &1
foo: "bar"
--- !t!2 &2
baz: "qux"
"#;
for x in Parser::new_from_str(text).keep_tags(true) {
let x = x.unwrap();
if let Event::MappingStart(_, tag) = x.0 {
let tag = tag.unwrap();
assert_eq!(tag.handle, "tag:test,2024:");
}
}
for x in Parser::new_from_str(text).keep_tags(false) {
if x.is_err() {
return;
}
}
panic!("Test failed, did not encounter error")
}
#[test]
fn test_flow_sequence_mapping_allows_empty_key() {
let parser = Parser::new_from_str("[?: value]");
for event in parser {
event.expect("parser should accept flow sequence mappings with empty keys");
}
}
#[test]
fn test_keep_tags_does_not_persist_default_tag_handles() {
let text = "%TAG !! tag:evil,2024:\n--- !!int 1\n--- !!int 2\n";
let mut int_tags = Vec::new();
for event in Parser::new_from_str(text).keep_tags(true) {
let event = event.unwrap().0;
if let Event::Scalar(_, _, _, Some(tag)) = event {
if tag.suffix == "int" {
int_tags.push(tag.handle.clone());
}
}
}
assert_eq!(int_tags, vec!["tag:evil,2024:", "tag:yaml.org,2002:"]);
}
#[test]
fn test_load_after_peek_stream_start() {
#[derive(Default)]
struct Sink<'input> {
events: Vec<Event<'input>>,
}
impl<'input> EventReceiver<'input> for Sink<'input> {
fn on_event(&mut self, ev: Event<'input>) {
self.events.push(ev);
}
}
let mut parser = Parser::new_from_str("key: value\n");
let mut sink = Sink::default();
assert_eq!(parser.peek().unwrap().unwrap().0, Event::StreamStart);
parser.load(&mut sink, false).unwrap();
assert!(matches!(sink.events.first(), Some(Event::StreamStart)));
assert!(matches!(sink.events.get(1), Some(Event::DocumentStart(_))));
}
}