use crate::{
input::{str::StrInput, BorrowedInput},
scanner::{ScalarStyle, ScanError, Scanner, Span, Token, TokenType},
BufferedInput, Marker,
};
use alloc::{
borrow::Cow,
collections::{BTreeMap, BTreeSet},
string::{String, ToString},
vec::Vec,
};
use core::fmt::Display;
#[derive(Clone, Copy, PartialEq, Debug, Eq)]
enum State {
StreamStart,
ImplicitDocumentStart,
DocumentStart,
DocumentContent,
DocumentEnd,
BlockNode,
BlockSequenceFirstEntry,
BlockSequenceEntry,
IndentlessSequenceEntry,
BlockMappingFirstKey,
BlockMappingKey,
BlockMappingValue,
FlowSequenceFirstEntry,
FlowSequenceEntry,
FlowSequenceEntryMappingKey,
FlowSequenceEntryMappingValue,
FlowSequenceEntryMappingEnd(Marker),
FlowMappingFirstKey,
FlowMappingKey,
FlowMappingValue,
FlowMappingEmptyValue,
End,
}
#[derive(Clone, PartialEq, Debug, Eq)]
pub enum Event<'input> {
Nothing,
StreamStart,
StreamEnd,
DocumentStart(bool),
DocumentEnd,
Alias(
usize,
),
Scalar(
Cow<'input, str>,
ScalarStyle,
usize,
Option<Cow<'input, Tag>>,
),
SequenceStart(
usize,
Option<Cow<'input, Tag>>,
),
SequenceEnd,
MappingStart(
usize,
Option<Cow<'input, Tag>>,
),
MappingEnd,
}
#[derive(Clone, PartialEq, Debug, Eq, Ord, PartialOrd, Hash)]
pub struct Tag {
pub handle: String,
pub suffix: String,
}
impl Tag {
#[must_use]
pub fn is_yaml_core_schema(&self) -> bool {
self.handle == "tag:yaml.org,2002:"
}
}
impl Display for Tag {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
if self.handle == "!" {
write!(f, "!{}", self.suffix)
} else {
write!(f, "{}!{}", self.handle, self.suffix)
}
}
}
impl<'input> Event<'input> {
fn empty_scalar() -> Self {
Event::Scalar("~".into(), ScalarStyle::Plain, 0, None)
}
fn empty_scalar_with_anchor(anchor: usize, tag: Option<Cow<'input, Tag>>) -> Self {
Event::Scalar(Cow::default(), ScalarStyle::Plain, anchor, tag)
}
}
#[derive(Debug)]
pub struct Parser<'input, T: BorrowedInput<'input>> {
scanner: Scanner<'input, T>,
states: Vec<State>,
state: State,
token: Option<Token<'input>>,
current: Option<(Event<'input>, Span)>,
pending_key_indent: Option<usize>,
anchors: BTreeMap<Cow<'input, str>, usize>,
anchor_id_count: usize,
tags: BTreeMap<String, String>,
stream_end_emitted: bool,
keep_tags: bool,
}
pub trait EventReceiver<'input> {
fn on_event(&mut self, ev: Event<'input>);
}
pub trait SpannedEventReceiver<'input> {
fn on_event(&mut self, ev: Event<'input>, span: Span);
}
impl<'input, R: EventReceiver<'input>> SpannedEventReceiver<'input> for R {
fn on_event(&mut self, ev: Event<'input>, _span: Span) {
self.on_event(ev);
}
}
pub type ParseResult<'input> = Result<(Event<'input>, Span), ScanError>;
pub trait ParserTrait<'input> {
fn peek(&mut self) -> Option<Result<&(Event<'input>, Span), ScanError>>;
fn next_event(&mut self) -> Option<ParseResult<'input>>;
fn load<R: SpannedEventReceiver<'input>>(
&mut self,
recv: &mut R,
multi: bool,
) -> Result<(), ScanError>;
}
impl<'input> Parser<'input, StrInput<'input>> {
#[must_use]
pub fn new_from_str(value: &'input str) -> Self {
debug_print!("\x1B[;31m>>>>>>>>>> New parser from str\x1B[;0m");
Parser::new(StrInput::new(value))
}
}
impl<T> Parser<'static, BufferedInput<T>>
where
T: Iterator<Item = char>,
{
#[must_use]
pub fn new_from_iter(iter: T) -> Self {
debug_print!("\x1B[;31m>>>>>>>>>> New parser from iter\x1B[;0m");
Parser::new(BufferedInput::new(iter))
}
}
impl<'input, T: BorrowedInput<'input>> Parser<'input, T> {
pub fn get_anchor_offset(&self) -> usize {
self.anchor_id_count
}
pub fn set_anchor_offset(&mut self, offset: usize) {
self.anchor_id_count = offset;
}
pub fn new(src: T) -> Self {
Parser {
scanner: Scanner::new(src),
states: Vec::new(),
state: State::StreamStart,
token: None,
current: None,
pending_key_indent: None,
anchors: BTreeMap::new(),
anchor_id_count: 1,
tags: BTreeMap::new(),
stream_end_emitted: false,
keep_tags: false,
}
}
#[must_use]
pub fn keep_tags(mut self, value: bool) -> Self {
self.keep_tags = value;
self
}
pub fn peek(&mut self) -> Option<Result<&(Event<'input>, Span), ScanError>> {
ParserTrait::peek(self)
}
pub fn next_event(&mut self) -> Option<ParseResult<'input>> {
ParserTrait::next_event(self)
}
fn next_event_impl<'a>(&mut self) -> ParseResult<'a>
where
'input: 'a,
{
match self.current.take() {
None => self.parse(),
Some(v) => Ok(v),
}
}
fn peek_token(&mut self) -> Result<&Token<'_>, ScanError> {
match self.token {
None => {
self.token = Some(self.scan_next_token()?);
Ok(self.token.as_ref().unwrap())
}
Some(ref tok) => Ok(tok),
}
}
fn scan_next_token(&mut self) -> Result<Token<'input>, ScanError> {
let token = self.scanner.next();
match token {
None => match self.scanner.get_error() {
None => Err(self.unexpected_eof()),
Some(e) => Err(e),
},
Some(tok) => Ok(tok),
}
}
#[cold]
fn unexpected_eof(&self) -> ScanError {
let info = match self.state {
State::FlowSequenceFirstEntry | State::FlowSequenceEntry => {
"unexpected EOF while parsing a flow sequence"
}
State::FlowMappingFirstKey
| State::FlowMappingKey
| State::FlowMappingValue
| State::FlowMappingEmptyValue => "unexpected EOF while parsing a flow mapping",
State::FlowSequenceEntryMappingKey
| State::FlowSequenceEntryMappingValue
| State::FlowSequenceEntryMappingEnd(_) => {
"unexpected EOF while parsing an implicit flow mapping"
}
State::BlockSequenceFirstEntry | State::BlockSequenceEntry => {
"unexpected EOF while parsing a block sequence"
}
State::BlockMappingFirstKey | State::BlockMappingKey | State::BlockMappingValue => {
"unexpected EOF while parsing a block mapping"
}
_ => "unexpected eof",
};
ScanError::new_str(self.scanner.mark(), info)
}
fn fetch_token<'a>(&mut self) -> Token<'a>
where
'input: 'a,
{
self.token
.take()
.expect("fetch_token needs to be preceded by peek_token")
}
fn skip(&mut self) {
self.token = None;
}
fn pop_state(&mut self) {
self.state = self.states.pop().unwrap();
}
fn push_state(&mut self, state: State) {
self.states.push(state);
}
fn parse<'a>(&mut self) -> ParseResult<'a>
where
'input: 'a,
{
if self.state == State::End {
return Ok((Event::StreamEnd, Span::empty(self.scanner.mark())));
}
let (ev, span) = self.state_machine()?;
if let Some(indent) = self.pending_key_indent.take() {
Ok((ev, span.with_indent(Some(indent))))
} else {
Ok((ev, span))
}
}
pub fn load<R: SpannedEventReceiver<'input>>(
&mut self,
recv: &mut R,
multi: bool,
) -> Result<(), ScanError> {
ParserTrait::load(self, recv, multi)
}
fn load_document<R: SpannedEventReceiver<'input>>(
&mut self,
first_ev: Event<'input>,
span: Span,
recv: &mut R,
) -> Result<(), ScanError> {
if !matches!(first_ev, Event::DocumentStart(_)) {
return Err(ScanError::new_str(
span.start,
"did not find expected <document-start>",
));
}
recv.on_event(first_ev, span);
let (ev, span) = self.next_event_impl()?;
self.load_node(ev, span, recv)?;
let (ev, mark) = self.next_event_impl()?;
assert_eq!(ev, Event::DocumentEnd);
recv.on_event(ev, mark);
Ok(())
}
fn load_node<R: SpannedEventReceiver<'input>>(
&mut self,
first_ev: Event<'input>,
span: Span,
recv: &mut R,
) -> Result<(), ScanError> {
match first_ev {
Event::Alias(..) | Event::Scalar(..) => {
recv.on_event(first_ev, span);
Ok(())
}
Event::SequenceStart(..) => {
recv.on_event(first_ev, span);
self.load_sequence(recv)
}
Event::MappingStart(..) => {
recv.on_event(first_ev, span);
self.load_mapping(recv)
}
_ => {
#[cfg(feature = "debug_prints")]
std::println!("UNREACHABLE EVENT: {first_ev:?}");
unreachable!();
}
}
}
fn load_mapping<R: SpannedEventReceiver<'input>>(
&mut self,
recv: &mut R,
) -> Result<(), ScanError> {
let (mut key_ev, mut key_mark) = self.next_event_impl()?;
while key_ev != Event::MappingEnd {
self.load_node(key_ev, key_mark, recv)?;
let (ev, mark) = self.next_event_impl()?;
self.load_node(ev, mark, recv)?;
let (ev, mark) = self.next_event_impl()?;
key_ev = ev;
key_mark = mark;
}
recv.on_event(key_ev, key_mark);
Ok(())
}
fn load_sequence<R: SpannedEventReceiver<'input>>(
&mut self,
recv: &mut R,
) -> Result<(), ScanError> {
let (mut ev, mut mark) = self.next_event_impl()?;
while ev != Event::SequenceEnd {
self.load_node(ev, mark, recv)?;
let (next_ev, next_mark) = self.next_event_impl()?;
ev = next_ev;
mark = next_mark;
}
recv.on_event(ev, mark);
Ok(())
}
fn state_machine<'a>(&mut self) -> ParseResult<'a>
where
'input: 'a,
{
debug_print!("\n\x1B[;33mParser state: {:?} \x1B[;0m", self.state);
match self.state {
State::StreamStart => self.stream_start(),
State::ImplicitDocumentStart => self.document_start(true),
State::DocumentStart => self.document_start(false),
State::DocumentContent => self.document_content(),
State::DocumentEnd => self.document_end(),
State::BlockNode => self.parse_node(true, false),
State::BlockMappingFirstKey => self.block_mapping_key(true),
State::BlockMappingKey => self.block_mapping_key(false),
State::BlockMappingValue => self.block_mapping_value(),
State::BlockSequenceFirstEntry => self.block_sequence_entry(true),
State::BlockSequenceEntry => self.block_sequence_entry(false),
State::FlowSequenceFirstEntry => self.flow_sequence_entry(true),
State::FlowSequenceEntry => self.flow_sequence_entry(false),
State::FlowMappingFirstKey => self.flow_mapping_key(true),
State::FlowMappingKey => self.flow_mapping_key(false),
State::FlowMappingValue => self.flow_mapping_value(false),
State::IndentlessSequenceEntry => self.indentless_sequence_entry(),
State::FlowSequenceEntryMappingKey => self.flow_sequence_entry_mapping_key(),
State::FlowSequenceEntryMappingValue => self.flow_sequence_entry_mapping_value(),
State::FlowSequenceEntryMappingEnd(mark) => self.flow_sequence_entry_mapping_end(mark),
State::FlowMappingEmptyValue => self.flow_mapping_value(true),
State::End => unreachable!(),
}
}
fn stream_start<'a>(&mut self) -> ParseResult<'a>
where
'input: 'a,
{
match *self.peek_token()? {
Token(span, TokenType::StreamStart(_)) => {
self.state = State::ImplicitDocumentStart;
self.skip();
Ok((Event::StreamStart, span))
}
Token(span, _) => Err(ScanError::new_str(
span.start,
"did not find expected <stream-start>",
)),
}
}
fn document_start<'a>(&mut self, implicit: bool) -> ParseResult<'a>
where
'input: 'a,
{
while let TokenType::DocumentEnd = self.peek_token()?.1 {
self.skip();
}
match *self.peek_token()? {
Token(span, TokenType::StreamEnd) => {
self.state = State::End;
self.skip();
Ok((Event::StreamEnd, span))
}
Token(
_,
TokenType::VersionDirective(..)
| TokenType::TagDirective(..)
| TokenType::ReservedDirective(..)
| TokenType::DocumentStart,
) => {
self.explicit_document_start()
}
Token(span, _) if implicit => {
self.parser_process_directives()?;
self.push_state(State::DocumentEnd);
self.state = State::BlockNode;
Ok((Event::DocumentStart(false), span))
}
_ => {
self.explicit_document_start()
}
}
}
fn parser_process_directives(&mut self) -> Result<(), ScanError> {
let mut version_directive_received = false;
let mut tags = if self.keep_tags {
self.tags.clone()
} else {
BTreeMap::new()
};
let mut document_tag_handles = BTreeSet::new();
loop {
match self.peek_token()? {
Token(span, TokenType::VersionDirective(_, _)) => {
if version_directive_received {
return Err(ScanError::new_str(
span.start,
"duplicate version directive",
));
}
version_directive_received = true;
}
Token(mark, TokenType::TagDirective(handle, prefix)) => {
if !document_tag_handles.insert(handle.to_string()) {
return Err(ScanError::new_str(mark.start, "the TAG directive must only be given at most once per handle in the same document"));
}
tags.insert(handle.to_string(), prefix.to_string());
}
Token(_, TokenType::ReservedDirective(_, _)) => {
}
_ => break,
}
self.skip();
}
self.tags = tags;
Ok(())
}
fn explicit_document_start<'a>(&mut self) -> ParseResult<'a>
where
'input: 'a,
{
self.parser_process_directives()?;
match *self.peek_token()? {
Token(mark, TokenType::DocumentStart) => {
self.push_state(State::DocumentEnd);
self.state = State::DocumentContent;
self.skip();
Ok((Event::DocumentStart(true), mark))
}
Token(span, _) => Err(ScanError::new_str(
span.start,
"did not find expected <document start>",
)),
}
}
fn document_content<'a>(&mut self) -> ParseResult<'a>
where
'input: 'a,
{
match *self.peek_token()? {
Token(
mark,
TokenType::VersionDirective(..)
| TokenType::TagDirective(..)
| TokenType::ReservedDirective(..)
| TokenType::DocumentStart
| TokenType::DocumentEnd
| TokenType::StreamEnd,
) => {
self.pop_state();
Ok((Event::empty_scalar(), mark))
}
_ => self.parse_node(true, false),
}
}
fn document_end<'a>(&mut self) -> ParseResult<'a>
where
'input: 'a,
{
let mut explicit_end = false;
let span: Span = match *self.peek_token()? {
Token(span, TokenType::DocumentEnd) => {
explicit_end = true;
self.skip();
span
}
Token(span, _) => span,
};
if self.keep_tags {
self.tags.remove("!!");
self.tags.remove("");
} else {
self.tags.clear();
}
if explicit_end {
self.state = State::ImplicitDocumentStart;
} else {
if let Token(
span,
TokenType::VersionDirective(..)
| TokenType::TagDirective(..)
| TokenType::ReservedDirective(..),
) = *self.peek_token()?
{
return Err(ScanError::new_str(
span.start,
"missing explicit document end marker before directive",
));
}
self.state = State::DocumentStart;
}
Ok((Event::DocumentEnd, span))
}
fn register_anchor(&mut self, name: Cow<'input, str>, mark: &Span) -> Result<usize, ScanError> {
let new_id = self.anchor_id_count;
self.anchor_id_count = self.anchor_id_count.checked_add(1).ok_or_else(|| {
ScanError::new_str(
mark.start,
"while parsing anchor, anchor count exceeded supported limit",
)
})?;
self.anchors.insert(name, new_id);
Ok(new_id)
}
#[allow(clippy::too_many_lines)]
fn parse_node<'a>(&mut self, block: bool, indentless_sequence: bool) -> ParseResult<'a>
where
'input: 'a,
{
let mut anchor_id = 0;
let mut tag = None;
match *self.peek_token()? {
Token(_, TokenType::Alias(_)) => {
self.pop_state();
if let Token(span, TokenType::Alias(name)) = self.fetch_token() {
match self.anchors.get(&*name) {
None => {
return Err(ScanError::new_str(
span.start,
"while parsing node, found unknown anchor",
))
}
Some(id) => return Ok((Event::Alias(*id), span)),
}
}
unreachable!()
}
Token(_, TokenType::Anchor(_)) => {
if let Token(span, TokenType::Anchor(name)) = self.fetch_token() {
anchor_id = self.register_anchor(name, &span)?;
if let TokenType::Tag(..) = self.peek_token()?.1 {
if let TokenType::Tag(handle, suffix) = self.fetch_token().1 {
tag = Some(self.resolve_tag(span, &handle, suffix)?);
} else {
unreachable!()
}
}
} else {
unreachable!()
}
}
Token(mark, TokenType::Tag(..)) => {
if let TokenType::Tag(handle, suffix) = self.fetch_token().1 {
tag = Some(self.resolve_tag(mark, &handle, suffix)?);
if let TokenType::Anchor(_) = &self.peek_token()?.1 {
if let Token(mark, TokenType::Anchor(name)) = self.fetch_token() {
anchor_id = self.register_anchor(name, &mark)?;
} else {
unreachable!()
}
}
} else {
unreachable!()
}
}
_ => {}
}
match *self.peek_token()? {
Token(mark, TokenType::BlockEntry) if indentless_sequence => {
self.state = State::IndentlessSequenceEntry;
Ok((Event::SequenceStart(anchor_id, tag), mark))
}
Token(_, TokenType::Scalar(..)) => {
self.pop_state();
if let Token(mark, TokenType::Scalar(style, v)) = self.fetch_token() {
Ok((Event::Scalar(v, style, anchor_id, tag), mark))
} else {
unreachable!()
}
}
Token(mark, TokenType::FlowSequenceStart) => {
self.state = State::FlowSequenceFirstEntry;
Ok((Event::SequenceStart(anchor_id, tag), mark))
}
Token(mark, TokenType::FlowMappingStart) => {
self.state = State::FlowMappingFirstKey;
Ok((Event::MappingStart(anchor_id, tag), mark))
}
Token(mark, TokenType::BlockSequenceStart) if block => {
self.state = State::BlockSequenceFirstEntry;
Ok((Event::SequenceStart(anchor_id, tag), mark))
}
Token(mark, TokenType::BlockMappingStart) if block => {
self.state = State::BlockMappingFirstKey;
Ok((Event::MappingStart(anchor_id, tag), mark))
}
Token(mark, _) if tag.is_some() || anchor_id > 0 => {
self.pop_state();
Ok((Event::empty_scalar_with_anchor(anchor_id, tag), mark))
}
Token(span, _) => {
let info = match self.state {
State::FlowSequenceFirstEntry | State::FlowSequenceEntry => {
"unexpected EOF while parsing a flow sequence"
}
State::FlowMappingFirstKey
| State::FlowMappingKey
| State::FlowMappingValue
| State::FlowMappingEmptyValue => "unexpected EOF while parsing a flow mapping",
State::FlowSequenceEntryMappingKey
| State::FlowSequenceEntryMappingValue
| State::FlowSequenceEntryMappingEnd(_) => {
"unexpected EOF while parsing an implicit flow mapping"
}
State::BlockSequenceFirstEntry | State::BlockSequenceEntry => {
"unexpected EOF while parsing a block sequence"
}
State::BlockMappingFirstKey
| State::BlockMappingKey
| State::BlockMappingValue => "unexpected EOF while parsing a block mapping",
_ => "while parsing a node, did not find expected node content",
};
Err(ScanError::new_str(span.start, info))
}
}
}
fn block_mapping_key<'a>(&mut self, first: bool) -> ParseResult<'a>
where
'input: 'a,
{
if first {
let _ = self.peek_token()?;
self.skip();
}
match *self.peek_token()? {
Token(_, TokenType::Key) => {
if let Token(key_span, TokenType::Key) = *self.peek_token()? {
self.pending_key_indent = Some(key_span.start.col());
}
self.skip();
if let Token(mark, TokenType::Key | TokenType::Value | TokenType::BlockEnd) =
*self.peek_token()?
{
self.state = State::BlockMappingValue;
Ok((Event::empty_scalar(), mark))
} else {
self.push_state(State::BlockMappingValue);
self.parse_node(true, true)
}
}
Token(mark, TokenType::Value) => {
self.state = State::BlockMappingValue;
Ok((Event::empty_scalar(), mark))
}
Token(mark, TokenType::BlockEnd) => {
self.pop_state();
self.skip();
Ok((Event::MappingEnd, mark))
}
Token(span, _) => Err(ScanError::new_str(
span.start,
"while parsing a block mapping, did not find expected key",
)),
}
}
fn block_mapping_value<'a>(&mut self) -> ParseResult<'a>
where
'input: 'a,
{
match *self.peek_token()? {
Token(mark, TokenType::Value) => {
self.skip();
if let Token(_, TokenType::Key | TokenType::Value | TokenType::BlockEnd) =
*self.peek_token()?
{
self.state = State::BlockMappingKey;
Ok((Event::empty_scalar(), mark))
} else {
self.push_state(State::BlockMappingKey);
self.parse_node(true, true)
}
}
Token(mark, _) => {
self.state = State::BlockMappingKey;
Ok((Event::empty_scalar(), mark))
}
}
}
fn flow_mapping_key<'a>(&mut self, first: bool) -> ParseResult<'a>
where
'input: 'a,
{
if first {
let _ = self.peek_token()?;
self.skip();
}
let span: Span = {
match *self.peek_token()? {
Token(mark, TokenType::FlowMappingEnd) => mark,
Token(mark, _) => {
if !first {
match *self.peek_token()? {
Token(_, TokenType::FlowEntry) => self.skip(),
Token(span, _) => return Err(ScanError::new_str(
span.start,
"while parsing a flow mapping, did not find expected ',' or '}'",
)),
}
}
match *self.peek_token()? {
Token(_, TokenType::Key) => {
self.skip();
if let Token(
mark,
TokenType::Value | TokenType::FlowEntry | TokenType::FlowMappingEnd,
) = *self.peek_token()?
{
self.state = State::FlowMappingValue;
return Ok((Event::empty_scalar(), mark));
}
self.push_state(State::FlowMappingValue);
return self.parse_node(false, false);
}
Token(marker, TokenType::Value) => {
self.state = State::FlowMappingValue;
return Ok((Event::empty_scalar(), marker));
}
Token(_, TokenType::FlowMappingEnd) => (),
_ => {
self.push_state(State::FlowMappingEmptyValue);
return self.parse_node(false, false);
}
}
mark
}
}
};
self.pop_state();
self.skip();
Ok((Event::MappingEnd, span))
}
fn flow_mapping_value<'a>(&mut self, empty: bool) -> ParseResult<'a>
where
'input: 'a,
{
let span: Span = {
if empty {
let Token(mark, _) = *self.peek_token()?;
self.state = State::FlowMappingKey;
return Ok((Event::empty_scalar(), mark));
}
match *self.peek_token()? {
Token(span, TokenType::Value) => {
self.skip();
match self.peek_token()?.1 {
TokenType::FlowEntry | TokenType::FlowMappingEnd => {}
_ => {
self.push_state(State::FlowMappingKey);
return self.parse_node(false, false);
}
}
span
}
Token(marker, _) => marker,
}
};
self.state = State::FlowMappingKey;
Ok((Event::empty_scalar(), span))
}
fn flow_sequence_entry<'a>(&mut self, first: bool) -> ParseResult<'a>
where
'input: 'a,
{
if first {
let _ = self.peek_token()?;
self.skip();
}
match *self.peek_token()? {
Token(mark, TokenType::FlowSequenceEnd) => {
self.pop_state();
self.skip();
return Ok((Event::SequenceEnd, mark));
}
Token(_, TokenType::FlowEntry) if !first => {
self.skip();
}
Token(span, _) if !first => {
return Err(ScanError::new_str(
span.start,
"while parsing a flow sequence, expected ',' or ']'",
));
}
_ => { }
}
match *self.peek_token()? {
Token(mark, TokenType::FlowSequenceEnd) => {
self.pop_state();
self.skip();
Ok((Event::SequenceEnd, mark))
}
Token(mark, TokenType::Key) => {
self.state = State::FlowSequenceEntryMappingKey;
self.skip();
Ok((Event::MappingStart(0, None), mark))
}
_ => {
self.push_state(State::FlowSequenceEntry);
self.parse_node(false, false)
}
}
}
fn indentless_sequence_entry<'a>(&mut self) -> ParseResult<'a>
where
'input: 'a,
{
match *self.peek_token()? {
Token(mark, TokenType::BlockEntry) => {
self.skip();
if let Token(
_,
TokenType::BlockEntry | TokenType::Key | TokenType::Value | TokenType::BlockEnd,
) = *self.peek_token()?
{
self.state = State::IndentlessSequenceEntry;
Ok((Event::empty_scalar(), mark))
} else {
self.push_state(State::IndentlessSequenceEntry);
self.parse_node(true, false)
}
}
Token(mark, _) => {
self.pop_state();
Ok((Event::SequenceEnd, mark))
}
}
}
fn block_sequence_entry<'a>(&mut self, first: bool) -> ParseResult<'a>
where
'input: 'a,
{
if first {
let _ = self.peek_token()?;
self.skip();
}
match *self.peek_token()? {
Token(mark, TokenType::BlockEnd) => {
self.pop_state();
self.skip();
Ok((Event::SequenceEnd, mark))
}
Token(mark, TokenType::BlockEntry) => {
self.skip();
if let Token(_, TokenType::BlockEntry | TokenType::BlockEnd) = *self.peek_token()? {
self.state = State::BlockSequenceEntry;
Ok((Event::empty_scalar(), mark))
} else {
self.push_state(State::BlockSequenceEntry);
self.parse_node(true, false)
}
}
Token(span, _) => Err(ScanError::new_str(
span.start,
"while parsing a block collection, did not find expected '-' indicator",
)),
}
}
fn flow_sequence_entry_mapping_key<'a>(&mut self) -> ParseResult<'a>
where
'input: 'a,
{
if let Token(mark, TokenType::FlowEntry | TokenType::FlowSequenceEnd) =
*self.peek_token()?
{
self.state = State::FlowSequenceEntryMappingValue;
Ok((Event::empty_scalar(), mark))
} else {
self.push_state(State::FlowSequenceEntryMappingValue);
self.parse_node(false, false)
}
}
fn flow_sequence_entry_mapping_value<'a>(&mut self) -> ParseResult<'a>
where
'input: 'a,
{
match *self.peek_token()? {
Token(_, TokenType::Value) => {
self.skip();
self.state = State::FlowSequenceEntryMappingValue;
let Token(span, ref tok) = *self.peek_token()?;
if matches!(tok, TokenType::FlowEntry | TokenType::FlowSequenceEnd) {
self.state = State::FlowSequenceEntryMappingEnd(span.end);
Ok((Event::empty_scalar(), span))
} else {
self.push_state(State::FlowSequenceEntryMappingEnd(span.end));
self.parse_node(false, false)
}
}
Token(mark, _) => {
self.state = State::FlowSequenceEntryMappingEnd(mark.end);
Ok((Event::empty_scalar(), mark))
}
}
}
#[allow(clippy::unnecessary_wraps)]
fn flow_sequence_entry_mapping_end<'a>(&mut self, mark: Marker) -> ParseResult<'a>
where
'input: 'a,
{
self.state = State::FlowSequenceEntry;
Ok((Event::MappingEnd, Span::empty(mark)))
}
fn resolve_tag(
&self,
span: Span,
handle: &Cow<'input, str>,
suffix: Cow<'input, str>,
) -> Result<Cow<'input, Tag>, ScanError> {
let suffix = suffix.into_owned();
let tag = if handle == "!!" {
Tag {
handle: self
.tags
.get("!!")
.map_or_else(|| "tag:yaml.org,2002:".to_string(), ToString::to_string),
suffix,
}
} else if handle.is_empty() && suffix == "!" {
match self.tags.get("") {
Some(prefix) => Tag {
handle: prefix.clone(),
suffix,
},
None => Tag {
handle: String::new(),
suffix,
},
}
} else {
let prefix = self.tags.get(&**handle);
if let Some(prefix) = prefix {
Tag {
handle: prefix.clone(),
suffix,
}
} else {
if handle.len() >= 2 && handle.starts_with('!') && handle.ends_with('!') {
return Err(ScanError::new_str(span.start, "the handle wasn't declared"));
}
Tag {
handle: handle.to_string(),
suffix,
}
}
};
Ok(Cow::Owned(tag))
}
}
impl<'input, T: BorrowedInput<'input>> ParserTrait<'input> for Parser<'input, T> {
fn peek(&mut self) -> Option<Result<&(Event<'input>, Span), ScanError>> {
if let Some(ref x) = self.current {
Some(Ok(x))
} else {
if self.stream_end_emitted {
return None;
}
match self.next_event_impl() {
Ok(token) => self.current = Some(token),
Err(e) => return Some(Err(e)),
}
self.current.as_ref().map(Ok)
}
}
fn next_event(&mut self) -> Option<ParseResult<'input>> {
if self.stream_end_emitted {
return None;
}
let tok = self.next_event_impl();
if matches!(tok, Ok((Event::StreamEnd, _))) {
self.stream_end_emitted = true;
}
Some(tok)
}
fn load<R: SpannedEventReceiver<'input>>(
&mut self,
recv: &mut R,
multi: bool,
) -> Result<(), ScanError> {
let stream_start_buffered = matches!(self.current.as_ref(), Some((Event::StreamStart, _)));
if !self.scanner.stream_started() || stream_start_buffered {
let (ev, span) = self.next_event_impl()?;
if ev != Event::StreamStart {
return Err(ScanError::new_str(
span.start,
"did not find expected <stream-start>",
));
}
recv.on_event(ev, span);
}
if self.scanner.stream_ended() {
recv.on_event(Event::StreamEnd, Span::empty(self.scanner.mark()));
return Ok(());
}
loop {
let (ev, span) = self.next_event_impl()?;
if ev == Event::StreamEnd {
recv.on_event(ev, span);
return Ok(());
}
self.anchors.clear();
self.load_document(ev, span, recv)?;
if !multi {
break;
}
}
Ok(())
}
}
impl<'input, T: BorrowedInput<'input>> Iterator for Parser<'input, T> {
type Item = Result<(Event<'input>, Span), ScanError>;
fn next(&mut self) -> Option<Self::Item> {
self.next_event()
}
}
#[cfg(test)]
mod test {
use alloc::vec::Vec;
use super::{Event, EventReceiver, Parser};
#[test]
fn test_peek_eq_parse() {
let s = "
a0 bb: val
a1: &x
b1: 4
b2: d
a2: 4
a3: [1, 2, 3]
a4:
- [a1, a2]
- 2
a5: *x
";
let mut p = Parser::new_from_str(s);
loop {
let event_peek = p.peek().unwrap().unwrap().clone();
let event = p.next_event().unwrap().unwrap();
assert_eq!(event, event_peek);
if event.0 == Event::StreamEnd {
break;
}
}
}
#[test]
fn test_multiple_tag_directives_are_kept_within_document() {
let text = r"
%TAG !a! tag:a,2024:
%TAG !b! tag:b,2024:
---
first: !a!x foo
second: !b!y bar
";
let mut seen_a = false;
let mut seen_b = false;
for event in Parser::new_from_str(text) {
let (event, _) = event.unwrap();
if let Event::Scalar(_, _, _, Some(tag)) = event {
if tag.handle == "tag:a,2024:" {
seen_a = true;
} else if tag.handle == "tag:b,2024:" {
seen_b = true;
}
}
}
assert!(seen_a);
assert!(seen_b);
}
#[test]
fn test_tags_are_cleared_when_next_document_has_no_directives() {
let text = r"
%TAG !t! tag:test,2024:
--- !t!1
foo
--- !t!2
bar
";
let mut parser = Parser::new_from_str(text);
for event in parser.by_ref() {
let (event, _) = event.unwrap();
if let Event::DocumentEnd = event {
break;
}
}
match parser.next().unwrap().unwrap().0 {
Event::DocumentStart(true) => {}
_ => panic!("expected explicit second document start"),
}
let err = parser.next().unwrap().unwrap_err();
assert!(format!("{err}").contains("the handle wasn't declared"));
}
#[test]
fn test_keep_tags_across_multiple_documents() {
let text = r#"
%YAML 1.1
%TAG !t! tag:test,2024:
--- !t!1 &1
foo: "bar"
--- !t!2 &2
baz: "qux"
"#;
for x in Parser::new_from_str(text).keep_tags(true) {
let x = x.unwrap();
if let Event::MappingStart(_, tag) = x.0 {
let tag = tag.unwrap();
assert_eq!(tag.handle, "tag:test,2024:");
}
}
for x in Parser::new_from_str(text).keep_tags(false) {
if x.is_err() {
return;
}
}
panic!("Test failed, did not encounter error")
}
#[test]
fn test_flow_sequence_mapping_allows_empty_key() {
let parser = Parser::new_from_str("[?: value]");
for event in parser {
event.expect("parser should accept flow sequence mappings with empty keys");
}
}
#[test]
fn test_keep_tags_does_not_persist_default_tag_handles() {
let text = "%TAG !! tag:evil,2024:\n--- !!int 1\n--- !!int 2\n";
let mut int_tags = Vec::new();
for event in Parser::new_from_str(text).keep_tags(true) {
let event = event.unwrap().0;
if let Event::Scalar(_, _, _, Some(tag)) = event {
if tag.suffix == "int" {
int_tags.push(tag.handle.clone());
}
}
}
assert_eq!(int_tags, vec!["tag:evil,2024:", "tag:yaml.org,2002:"]);
}
#[test]
fn test_load_after_peek_stream_start() {
#[derive(Default)]
struct Sink<'input> {
events: Vec<Event<'input>>,
}
impl<'input> EventReceiver<'input> for Sink<'input> {
fn on_event(&mut self, ev: Event<'input>) {
self.events.push(ev);
}
}
let mut parser = Parser::new_from_str("key: value\n");
let mut sink = Sink::default();
assert_eq!(parser.peek().unwrap().unwrap().0, Event::StreamStart);
parser.load(&mut sink, false).unwrap();
assert!(matches!(sink.events.first(), Some(Event::StreamStart)));
assert!(matches!(sink.events.get(1), Some(Event::DocumentStart(_))));
}
}