#[macro_use]
mod macros;
pub(crate) mod entry;
pub(crate) mod error;
pub(crate) mod flag;
mod anchor;
mod context;
mod directive;
mod key;
mod scalar;
mod stats;
mod tag;
use crate::{
queue::Queue,
scanner::{
anchor::{scan_anchor, AnchorKind},
context::{Context, Indent, STARTING_INDENT},
directive::{scan_directive, DirectiveKind},
entry::TokenEntry,
error::{ScanError, ScanResult as Result},
flag::*,
key::{Key, KeyPossible},
scalar::{block::scan_block_scalar, flow::scan_flow_scalar, plain::scan_plain_scalar},
stats::MStats,
tag::scan_node_tag,
},
token::{Marker, StreamEncoding, Token},
};
type Tokens<'de> = Queue<TokenEntry<'de>>;
#[derive(Debug)]
pub(crate) struct Scanner
{
offset: usize,
state: StreamState,
simple_key_allowed: bool,
stats: MStats,
key: Key,
context: Context,
}
impl Scanner
{
#[allow(clippy::new_without_default)]
pub fn new() -> Self
{
Self {
offset: 0,
simple_key_allowed: false,
stats: MStats::new(),
state: StreamState::Start,
key: Key::default(),
context: Context::new(),
}
}
pub fn scan_tokens<'de>(
&mut self,
opts: Flags,
base: &'de str,
tokens: &mut Tokens<'de>,
) -> Result<usize>
{
let mut num_tokens = 0;
let starting_tokens = tokens.len();
while self.state != StreamState::Done
&& (starting_tokens == tokens.len() || self.key.possible())
{
if let Some(mut buffer) = base.get(self.offset..)
{
let run = self.scan_next_token(opts, &mut buffer, tokens);
if matches!(run, Err(ScanError::Extend) | Ok(_))
{
self.offset = base.len() - buffer.len();
}
run?;
num_tokens = tokens.len() - starting_tokens;
}
}
Ok(num_tokens)
}
pub fn offset(&self) -> usize
{
self.offset
}
pub fn reset_offset(&mut self)
{
self.offset = 0;
}
fn scan_next_token<'de>(
&mut self,
opts: Flags,
base: &mut &'de str,
tokens: &mut Tokens<'de>,
) -> Result<()>
{
if self.state == StreamState::Start
{
self.fetch_stream_start(tokens);
return Ok(());
}
self.eat_whitespace(opts, base, COMMENTS)?;
self.expire_stale_saved_key()?;
self.pop_zero_indent_sequence(*base, tokens)?;
self.unroll_indent(tokens, self.stats.column)?;
if base.is_empty() || self.state == StreamState::Done
{
return self.fetch_stream_end(*base, tokens);
}
cache!(~base, 4, opts)?;
match base.as_bytes()
{
[DIRECTIVE, ..] if self.stats.column == 0 => self.fetch_directive(opts, base, tokens),
[b @ b'-', b'-', b'-', ..] | [b @ b'.', b'.', b'.', ..]
if self.stats.column == 0 && isWhiteSpaceZ!(~base, 3) =>
{
self.fetch_document_marker(base, tokens, *b == b'-')
},
[b @ FLOW_MAPPING_START, ..] | [b @ FLOW_SEQUENCE_START, ..] =>
{
self.fetch_flow_collection_start(base, tokens, *b == FLOW_MAPPING_START)
},
[b @ FLOW_MAPPING_END, ..] | [b @ FLOW_SEQUENCE_END, ..] =>
{
self.fetch_flow_collection_end(base, tokens, *b == FLOW_MAPPING_END)
},
[FLOW_ENTRY, ..] => self.fetch_flow_collection_entry(base, tokens),
[BLOCK_ENTRY, ..] if isWhiteSpaceZ!(~base, 1) =>
{
self.fetch_block_collection_entry(base, tokens)
},
[EXPLICIT_KEY, ..] if self.context.is_flow() || isWhiteSpaceZ!(~base, 1) =>
{
self.fetch_explicit_key(base, tokens)
},
[VALUE, ..] if isWhiteSpaceZ!(~base, 1) || self.context.is_flow() =>
{
self.fetch_value(base, tokens)
},
[ANCHOR, ..] | [ALIAS, ..] => self.fetch_anchor(opts, base, tokens),
[TAG, ..] => self.fetch_tag(opts, base, tokens),
[c @ LITERAL, ..] | [c @ FOLDED, ..] if self.context.is_block() =>
{
self.fetch_block_scalar(opts, base, tokens, *c == FOLDED)
},
[SINGLE, ..] | [DOUBLE, ..] => self.fetch_flow_scalar(opts, base, tokens),
_ if self.is_plain_scalar(*base) => self.fetch_plain_scalar(opts, base, tokens),
_ => Err(ScanError::UnknownDelimiter),
}
}
fn fetch_stream_start(&mut self, tokens: &mut Tokens)
{
if self.state == StreamState::Start
{
self.simple_key_allowed = true;
self.state = StreamState::Stream;
let token = Token::StreamStart(StreamEncoding::UTF8);
enqueue!(token, :self.stats => tokens);
}
}
fn fetch_stream_end(&mut self, buffer: &str, tokens: &mut Tokens) -> Result<()>
{
match (self.state, buffer.is_empty())
{
(StreamState::Done, _) =>
{},
(_, true) =>
{
self.unroll_indent(tokens, STARTING_INDENT)?;
self.remove_saved_key()?;
self.state = StreamState::Done;
enqueue!(Token::StreamEnd, :self.stats => tokens);
},
(_, false) =>
{},
}
Ok(())
}
fn fetch_document_marker(
&mut self,
buffer: &mut &str,
tokens: &mut Tokens,
start: bool,
) -> Result<()>
{
let token = match start
{
true => Token::DocumentStart,
false => Token::DocumentEnd,
};
self.unroll_indent(tokens, STARTING_INDENT)?;
self.remove_saved_key()?;
self.simple_key_allowed = false;
advance!(*buffer, :self.stats, 3);
enqueue!(token, :self.stats => tokens);
Ok(())
}
fn fetch_directive<'de>(
&mut self,
opts: Flags,
base: &mut &'de str,
tokens: &mut Tokens<'de>,
) -> Result<()>
{
let mut buffer = *base;
let mut stats = MStats::new();
if !check!(~buffer => [DIRECTIVE, ..])
{
return Ok(());
}
cache!(~buffer, @1, 4, opts)?;
let kind = DirectiveKind::new(&buffer[1..])?;
advance!(buffer, :stats, 1 + kind.len());
let token = scan_directive(opts, &mut buffer, &mut stats, &kind)?;
self.unroll_indent(tokens, STARTING_INDENT)?;
self.remove_saved_key()?;
self.simple_key_allowed = false;
advance!(*base, base.len() - buffer.len());
self.stats += stats;
enqueue!(token, :self.stats => tokens);
Ok(())
}
fn fetch_tag<'de>(
&mut self,
opts: Flags,
base: &mut &'de str,
tokens: &mut Tokens<'de>,
) -> Result<()>
{
let mut buffer = *base;
let mut stats = MStats::new();
if !check!(~buffer => [TAG, ..])
{
return Ok(());
}
let (token, amt) = scan_node_tag(opts, buffer, &mut stats)?;
advance!(buffer, amt);
self.save_key(!REQUIRED)?;
self.simple_key_allowed = false;
advance!(*base, base.len() - buffer.len());
self.stats += stats;
enqueue!(token, :self.stats => tokens);
Ok(())
}
fn fetch_anchor<'de>(
&mut self,
opts: Flags,
base: &mut &'de str,
tokens: &mut Tokens<'de>,
) -> Result<()>
{
let mut buffer = *base;
let mut stats = MStats::new();
let kind = match buffer.as_bytes()
{
[b @ ALIAS, ..] | [b @ ANCHOR, ..] =>
{
AnchorKind::new(b).expect("we only bind * or & so this cannot fail")
},
_ => return Ok(()),
};
let token = scan_anchor(opts, &mut buffer, &mut stats, &kind)?;
self.save_key(!REQUIRED)?;
self.simple_key_allowed = false;
advance!(*base, base.len() - buffer.len());
self.stats += stats;
enqueue!(token, :self.stats => tokens);
Ok(())
}
fn fetch_flow_scalar<'de>(
&mut self,
opts: Flags,
base: &mut &'de str,
tokens: &mut Tokens<'de>,
) -> Result<()>
{
let buffer = *base;
let mut stats = MStats::new();
let single = check!(~buffer => [SINGLE, ..]);
if !check!(~buffer => [SINGLE, ..] | [DOUBLE, ..])
{
return Ok(());
}
let (token, amt) = scan_flow_scalar(opts, buffer, &mut stats, single)?;
self.save_key(!REQUIRED)?;
self.simple_key_allowed = false;
advance!(*base, amt);
self.stats += stats;
enqueue!(token, :self.stats => tokens);
Ok(())
}
fn fetch_plain_scalar<'de>(
&mut self,
opts: Flags,
base: &mut &'de str,
tokens: &mut Tokens<'de>,
) -> Result<()>
{
let buffer = *base;
let mut stats = self.stats.clone();
let (token, amt) = scan_plain_scalar(opts, buffer, &mut stats, &self.context)?;
self.save_key(!REQUIRED)?;
self.simple_key_allowed = false;
advance!(*base, amt);
self.stats = stats;
enqueue!(token, :self.stats => tokens);
Ok(())
}
fn fetch_block_scalar<'de>(
&mut self,
opts: Flags,
base: &mut &'de str,
tokens: &mut Tokens<'de>,
fold: bool,
) -> Result<()>
{
let buffer = *base;
let mut stats = self.stats.clone();
self.remove_saved_key()?;
self.simple_key_allowed = true;
let (token, amt) = scan_block_scalar(opts, buffer, &mut stats, &self.context, fold)?;
advance!(*base, amt);
self.stats = stats;
enqueue!(token, :self.stats => tokens);
Ok(())
}
fn fetch_explicit_key<'de>(
&mut self,
base: &mut &'de str,
tokens: &mut Tokens<'de>,
) -> Result<()>
{
let block_context = self.context.is_block();
if block_context
{
if !self.simple_key_allowed
{
return Err(ScanError::InvalidKey);
}
roll_indent(
&mut self.context,
tokens,
self.stats.read,
self.stats.lines,
self.stats.column,
BLOCK_MAP,
)?;
}
self.remove_saved_key()?;
self.simple_key_allowed = block_context;
advance!(*base, :self.stats, 1);
enqueue!(Token::Key, :self.stats => tokens);
Ok(())
}
fn fetch_value<'de>(&mut self, base: &mut &'de str, tokens: &mut Tokens<'de>) -> Result<()>
{
match self.key.saved().take()
{
Some(saved) if saved.key().allowed() =>
{
let key_stats = saved.stats();
roll_indent(
&mut self.context,
tokens,
key_stats.read,
key_stats.lines,
key_stats.column,
BLOCK_MAP,
)?;
enqueue!(Token::Key, :key_stats => tokens);
self.simple_key_allowed = false;
},
_ =>
{
let block_context = self.context.is_block();
if block_context
{
if !self.simple_key_allowed
{
return Err(ScanError::InvalidValue);
}
roll_indent(
&mut self.context,
tokens,
self.stats.read,
self.stats.lines,
self.stats.column,
BLOCK_MAP,
)?;
}
self.simple_key_allowed = block_context;
},
}
advance!(*base, :self.stats, 1);
enqueue!(Token::Value, :self.stats => tokens);
Ok(())
}
fn fetch_flow_collection_start<'de>(
&mut self,
base: &mut &'de str,
tokens: &mut Tokens<'de>,
map: bool,
) -> Result<()>
{
let token = match map
{
true => Token::FlowMappingStart,
false => Token::FlowSequenceStart,
};
self.context.flow_increment()?;
advance!(*base, :self.stats, 1);
enqueue!(token, :self.stats => tokens);
self.save_key(!REQUIRED)?;
self.simple_key_allowed = true;
Ok(())
}
fn fetch_flow_collection_end<'de>(
&mut self,
base: &mut &'de str,
tokens: &mut Tokens<'de>,
map: bool,
) -> Result<()>
{
let token = match map
{
true => Token::FlowMappingEnd,
false => Token::FlowSequenceEnd,
};
self.remove_saved_key()?;
self.context.flow_decrement()?;
self.simple_key_allowed = false;
advance!(*base, :self.stats, 1);
enqueue!(token, :self.stats => tokens);
Ok(())
}
fn fetch_flow_collection_entry<'de>(
&mut self,
base: &mut &'de str,
tokens: &mut Tokens<'de>,
) -> Result<()>
{
self.remove_saved_key()?;
self.simple_key_allowed = true;
advance!(*base, :self.stats, 1);
let token = Token::FlowEntry;
enqueue!(token, :self.stats => tokens);
Ok(())
}
fn fetch_block_collection_entry<'de>(
&mut self,
base: &mut &'de str,
tokens: &mut Tokens<'de>,
) -> Result<()>
{
match self.context.is_block() && self.simple_key_allowed
{
true => roll_indent(
&mut self.context,
tokens,
self.stats.read,
self.stats.lines,
self.stats.column,
!BLOCK_MAP,
),
false => Err(ScanError::InvalidBlockEntry),
}?;
let is_zero_indented = self.context.indents().last().map_or(false, |entry| {
entry.indent() == self.stats.column && entry.line < self.stats.lines
});
if is_zero_indented
{
let current = self.stats.lines;
if let Some(entry) = self.context.indents_mut().last_mut()
{
entry.line = current;
}
}
self.remove_saved_key()?;
self.simple_key_allowed = true;
advance!(*base, :self.stats, 1);
let token = Token::BlockEntry;
enqueue!(token, :self.stats => tokens);
Ok(())
}
fn unroll_indent<'de, T>(&mut self, tokens: &mut Tokens<'de>, column: T) -> Result<()>
where
T: Into<Indent>,
{
unroll_indent(&mut self.context, self.stats.read, tokens, column)
}
fn expire_stale_saved_key(&mut self) -> Result<()>
{
if let Some(ref mut saved) = self.key.saved()
{
let key = saved.key();
let key_stats = saved.stats();
if key.allowed()
&& (key_stats.lines < self.stats.lines || key_stats.read + 1024 < self.stats.read)
{
if key.required()
{
return Err(ScanError::MissingValue);
}
*saved.key_mut() = KeyPossible::No
}
}
Ok(())
}
fn pop_zero_indent_sequence<'de>(
&mut self,
base: &'de str,
tokens: &mut Tokens<'de>,
) -> Result<()>
{
if let Some(entry) = self.context.indents().last()
{
if entry.line < self.stats.lines
&& entry.zero_indented
&& entry.kind == Marker::BlockSequenceStart
&& (!check!(~base => b'-'))
{
let read = self.stats.read;
self.context.pop_indent(|_| {
enqueue!(Token::BlockEnd, read => tokens);
Ok(())
})?;
}
}
Ok(())
}
fn save_key(&mut self, required: bool) -> Result<()>
{
let required =
required || (self.context.is_block() && self.context.indent() == self.stats.column);
if self.simple_key_allowed
{
self.remove_saved_key()?;
self.key.save(self.stats.clone(), required)
}
Ok(())
}
fn remove_saved_key(&mut self) -> Result<()>
{
if let Some(saved) = self.key.saved().take()
{
if saved.key().required()
{
return Err(ScanError::MissingValue);
}
}
Ok(())
}
fn is_plain_scalar(&self, base: &str) -> bool
{
if isWhiteSpaceZ!(~base)
{
return false;
}
match base.as_bytes()
{
[DIRECTIVE, ..]
| [ANCHOR, ..]
| [ALIAS, ..]
| [TAG, ..]
| [SINGLE, ..]
| [DOUBLE, ..]
| [FLOW_MAPPING_START, ..]
| [FLOW_SEQUENCE_START, ..]
| [FLOW_MAPPING_END, ..]
| [FLOW_SEQUENCE_END]
| [FLOW_ENTRY, ..]
| [LITERAL, ..]
| [FOLDED, ..]
| [COMMENT, ..]
| [RESERVED_1, ..]
| [RESERVED_2, ..] => false,
[VALUE, ..] | [EXPLICIT_KEY, ..] | [BLOCK_ENTRY, ..]
if !is_plain_safe_c(base, 1, self.context.is_block()) =>
{
false
},
_ => true,
}
}
fn eat_whitespace(&mut self, opts: Flags, buffer: &mut &str, comments: bool) -> Result<usize>
{
let mut stats = MStats::new();
let amt = eat_whitespace(opts, *buffer, &mut stats, comments)?;
if stats.lines != 0
{
self.simple_key_allowed = true;
}
advance!(*buffer, amt);
self.stats += stats;
Ok(amt)
}
}
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
enum StreamState
{
Start,
Stream,
Done,
}
fn eat_whitespace(opts: Flags, base: &str, stats: &mut MStats, comments: bool) -> Result<usize>
{
let mut buffer = base;
let mut chomp_line = false;
let mut done = false;
loop
{
cache!(~buffer, 1, opts)?;
let (blank, brk) = (isBlank!(~buffer), isBreak!(~buffer));
match (blank, brk)
{
(true, _) =>
{},
(_, true) => chomp_line = false,
_ if comments && check!(~buffer => b'#') => chomp_line = true,
_ if chomp_line && !check!(~buffer => []) =>
{},
_ => done = true,
}
if done
{
break;
}
if brk
{
advance!(buffer, :stats, @line);
}
else
{
advance!(buffer, :stats, 1);
}
}
Ok(base.len() - buffer.len())
}
fn roll_indent<'de>(
context: &mut Context,
tokens: &mut Tokens<'de>,
mark: usize,
line: usize,
column: usize,
map: bool,
) -> Result<()>
{
let token = match map
{
true => Token::BlockMappingStart,
false => Token::BlockSequenceStart,
};
if context.is_block()
{
if context.indent() < column
{
context.indent_increment(column, line, map)?;
enqueue!(token, mark => tokens);
}
else if (!map) && context.indent() == column
{
let add_token = context
.indents()
.last()
.map_or(false, |entry| entry.kind == Marker::BlockMappingStart);
if add_token
{
context.indent_increment(column, line, map)?;
context.indents_mut().last_mut().unwrap().zero_indented = true;
enqueue!(token, mark => tokens);
}
}
}
Ok(())
}
fn unroll_indent<'de, T>(
context: &mut Context,
mark: usize,
tokens: &mut Tokens<'de>,
column: T,
) -> Result<()>
where
T: Into<Indent>,
{
if context.is_block()
{
let generator = |_| {
let token = Token::BlockEnd;
enqueue!(token, mark => tokens);
Ok(())
};
context.indent_decrement(column, generator)?;
}
Ok(())
}
fn is_plain_safe_c(base: &str, offset: usize, block_context: bool) -> bool
{
let flow_context = !block_context;
let not_flow_indicator = !check!(~base, offset => b',' | b'[' | b']' | b'{' | b'}');
block_context || (flow_context && not_flow_indicator)
}
const DIRECTIVE: u8 = b'%';
const ANCHOR: u8 = b'&';
const ALIAS: u8 = b'*';
const TAG: u8 = b'!';
const SINGLE: u8 = b'\'';
const DOUBLE: u8 = b'"';
const VALUE: u8 = b':';
const FLOW_MAPPING_START: u8 = b'{';
const FLOW_MAPPING_END: u8 = b'}';
const FLOW_SEQUENCE_START: u8 = b'[';
const FLOW_SEQUENCE_END: u8 = b']';
const FLOW_ENTRY: u8 = b',';
const BLOCK_ENTRY: u8 = b'-';
const EXPLICIT_KEY: u8 = b'?';
const LITERAL: u8 = b'|';
const FOLDED: u8 = b'>';
const COMMENT: u8 = b'#';
const RESERVED_1: u8 = b'@';
const RESERVED_2: u8 = b'`';
const COMMENTS: bool = true;
const REQUIRED: bool = true;
const BLOCK_MAP: bool = true;
#[cfg(test)]
mod tests
{
#[macro_use]
mod macros;
mod anchor;
mod collection;
mod complex;
mod directive;
mod document;
mod key;
mod scalar;
mod tag;
mod whitespace;
#[cfg(feature = "test_buffer")]
mod str_reader;
use super::*;
use crate::token::{ScalarStyle::*, Token::*};
pub(in crate::scanner) const TEST_FLAGS: Flags = test_flags();
struct ScanIter<'de>
{
#[cfg(feature = "test_buffer")]
data: str_reader::StrReader<'de>,
#[cfg(not(feature = "test_buffer"))]
data: &'de str,
opts: Flags,
scan: Scanner,
tokens: Tokens<'de>,
done: bool,
}
impl<'de> ScanIter<'de>
{
pub fn new(data: &'de str) -> Self
{
Self {
#[cfg(feature = "test_buffer")]
data: str_reader::StrReader::new(data, str_reader::StrReader::BUF_SIZE),
#[cfg(not(feature = "test_buffer"))]
data,
opts: TEST_FLAGS,
scan: Scanner::new(),
tokens: Tokens::new(),
done: false,
}
}
pub fn next_token(&mut self) -> Result<Option<Token<'de>>>
{
if (!self.done) && self.tokens.is_empty()
{
self.get_next_token()?;
}
if !self.done
{
self.tokens.pop().map(|e| e.into_token()).transpose()
}
else
{
Ok(None)
}
}
#[cfg(feature = "test_buffer")]
fn get_next_token(&mut self) -> Result<()>
{
let count = loop
{
match self
.scan
.scan_tokens(self.opts, self.data.read(), &mut self.tokens)
{
Ok(count) => break count,
Err(e) if e == ScanError::Extend =>
{
self.data.expand(str_reader::StrReader::BUF_EXTEND);
if !self.data.expandable()
{
self.opts.remove(O_EXTENDABLE)
}
continue;
},
Err(e) => return Err(e),
};
};
if count == 0
{
self.done = true
}
Ok(())
}
#[cfg(not(feature = "test_buffer"))]
fn get_next_token(&mut self) -> Result<()>
{
if let 0 = self
.scan
.scan_tokens(self.opts, self.data, &mut self.tokens)?
{
self.done = true
}
Ok(())
}
}
impl<'de> Iterator for ScanIter<'de>
{
type Item = Result<Token<'de>>;
fn next(&mut self) -> Option<Self::Item>
{
dbg!(self.next_token().transpose())
}
}
impl<'de> std::iter::FusedIterator for ScanIter<'de> {}
fn stats_of(base: &str) -> MStats
{
let mut buffer = base;
let mut stats = MStats::new();
loop
{
if check!(~buffer => [])
{
break;
}
else if isBlank!(~buffer)
{
advance!(buffer, :stats, 1);
}
else if isBreak!(~buffer)
{
advance!(buffer, :stats, @line);
}
else
{
let skip = match widthOf!(~buffer)
{
0 => unreachable!(),
n => n,
};
advance!(buffer, :stats, skip);
}
}
stats
}
const fn test_flags() -> Flags
{
#[allow(unused_mut)]
let mut flags = O_ZEROED;
if cfg!(feature = "test_buffer")
{
flags = flags.union(O_EXTENDABLE)
}
if cfg!(feature = "test_lazy")
{
flags = flags.union(O_LAZY)
}
flags
}
}