use crate::{
char_traits::{is_blank_or_breakz, is_breakz, is_flow},
input::{BorrowedInput, Input, SkipTabs},
};
use alloc::string::String;
#[allow(clippy::module_name_repetitions)]
pub struct StrInput<'a> {
original: &'a str,
buffer: &'a str,
lookahead: usize,
}
impl<'a> StrInput<'a> {
#[must_use]
pub fn new(input: &'a str) -> Self {
Self {
original: input,
buffer: input,
lookahead: 0,
}
}
#[inline]
#[must_use]
fn consumed_bytes(&self) -> usize {
self.original.len() - self.buffer.len()
}
}
impl Input for StrInput<'_> {
#[inline]
fn lookahead(&mut self, x: usize) {
self.lookahead = self.lookahead.max(x);
}
#[inline]
fn buflen(&self) -> usize {
self.lookahead
}
#[inline]
fn bufmaxlen(&self) -> usize {
BUFFER_LEN
}
fn buf_is_empty(&self) -> bool {
self.buflen() == 0
}
#[inline]
fn raw_read_ch(&mut self) -> char {
let mut chars = self.buffer.chars();
if let Some(c) = chars.next() {
self.buffer = chars.as_str();
c
} else {
'\0'
}
}
#[inline]
fn raw_read_non_breakz_ch(&mut self) -> Option<char> {
if let Some((c, sub_str)) = split_first_char(self.buffer) {
if is_breakz(c) {
None
} else {
self.buffer = sub_str;
Some(c)
}
} else {
None
}
}
#[inline]
fn skip(&mut self) {
if !self.buffer.is_empty() {
let b = self.buffer.as_bytes()[0];
if b < 0x80 {
self.buffer = &self.buffer[1..];
} else {
let mut chars = self.buffer.chars();
chars.next();
self.buffer = chars.as_str();
}
}
}
#[inline]
fn skip_n(&mut self, count: usize) {
let mut chars = self.buffer.chars();
for _ in 0..count {
if chars.next().is_none() {
break;
}
}
self.buffer = chars.as_str();
}
#[inline]
fn peek(&self) -> char {
if self.buffer.is_empty() {
return '\0';
}
let b = self.buffer.as_bytes()[0];
if b < 0x80 {
b as char
} else {
self.buffer.chars().next().unwrap()
}
}
#[inline]
fn peek_nth(&self, n: usize) -> char {
if n == 0 {
return self.peek();
}
let bytes = self.buffer.as_bytes();
if n == 1 && bytes.len() >= 2 && bytes[0] < 0x80 && bytes[1] < 0x80 {
return bytes[1] as char;
}
let mut chars = self.buffer.chars();
for _ in 0..n {
if chars.next().is_none() {
return '\0';
}
}
chars.next().unwrap_or('\0')
}
#[inline]
fn byte_offset(&self) -> Option<usize> {
Some(self.consumed_bytes())
}
#[inline]
fn slice_bytes(&self, start: usize, end: usize) -> Option<&str> {
debug_assert!(start <= end);
debug_assert!(end <= self.original.len());
self.original.get(start..end)
}
#[inline]
fn look_ch(&mut self) -> char {
self.lookahead(1);
self.peek()
}
#[inline]
fn next_char_is(&self, c: char) -> bool {
self.peek() == c
}
#[inline]
fn nth_char_is(&self, n: usize, c: char) -> bool {
self.peek_nth(n) == c
}
#[inline]
fn next_2_are(&self, c1: char, c2: char) -> bool {
let mut chars = self.buffer.chars();
chars.next() == Some(c1) && chars.next() == Some(c2)
}
#[inline]
fn next_3_are(&self, c1: char, c2: char, c3: char) -> bool {
let mut chars = self.buffer.chars();
chars.next() == Some(c1) && chars.next() == Some(c2) && chars.next() == Some(c3)
}
#[inline]
fn next_is_document_indicator(&self) -> bool {
if self.buffer.len() < 3 {
false
} else {
let bytes = self.buffer.as_bytes();
(bytes.len() == 3 || matches!(bytes[3], b' ' | b'\t' | 0 | b'\n' | b'\r'))
&& (bytes[0] == b'.' || bytes[0] == b'-')
&& bytes[0] == bytes[1]
&& bytes[1] == bytes[2]
}
}
#[inline]
fn next_is_document_start(&self) -> bool {
if self.buffer.len() < 3 {
false
} else {
let bytes = self.buffer.as_bytes();
(bytes.len() == 3 || matches!(bytes[3], b' ' | b'\t' | 0 | b'\n' | b'\r'))
&& bytes[0] == b'-'
&& bytes[1] == b'-'
&& bytes[2] == b'-'
}
}
#[inline]
fn next_is_document_end(&self) -> bool {
if self.buffer.len() < 3 {
false
} else {
let bytes = self.buffer.as_bytes();
(bytes.len() == 3 || matches!(bytes[3], b' ' | b'\t' | 0 | b'\n' | b'\r'))
&& bytes[0] == b'.'
&& bytes[1] == b'.'
&& bytes[2] == b'.'
}
}
fn skip_ws_to_eol(&mut self, skip_tabs: SkipTabs) -> (usize, Result<SkipTabs, &'static str>) {
assert!(!matches!(skip_tabs, SkipTabs::Result(..)));
let mut new_str = self.buffer;
let mut has_yaml_ws = false;
let mut encountered_tab = false;
if skip_tabs == SkipTabs::Yes {
loop {
if let Some(sub_str) = new_str.strip_prefix(' ') {
has_yaml_ws = true;
new_str = sub_str;
} else if let Some(sub_str) = new_str.strip_prefix('\t') {
encountered_tab = true;
new_str = sub_str;
} else {
break;
}
}
} else {
while let Some(sub_str) = new_str.strip_prefix(' ') {
has_yaml_ws = true;
new_str = sub_str;
}
}
let mut chars_consumed = self.buffer.len() - new_str.len();
if !new_str.is_empty() && new_str.as_bytes()[0] == b'#' {
if !encountered_tab && !has_yaml_ws {
return (
chars_consumed,
Err("comments must be separated from other tokens by whitespace"),
);
}
while let Some((c, sub_str)) = split_first_char(new_str) {
if is_breakz(c) {
break;
}
new_str = sub_str;
chars_consumed += 1;
}
}
self.buffer = new_str;
(
chars_consumed,
Ok(SkipTabs::Result(encountered_tab, has_yaml_ws)),
)
}
#[allow(clippy::inline_always)]
#[inline(always)]
fn next_can_be_plain_scalar(&self, in_flow: bool) -> bool {
let nc = self.peek_nth(1);
match self.peek() {
':' if is_blank_or_breakz(nc) || (in_flow && is_flow(nc)) => false,
c if in_flow && is_flow(c) => false,
_ => true,
}
}
#[inline]
fn next_is_blank_or_break(&self) -> bool {
!self.buffer.is_empty() && matches!(self.buffer.as_bytes()[0], b' ' | b'\t' | b'\n' | b'\r')
}
#[inline]
fn next_is_blank_or_breakz(&self) -> bool {
self.buffer.is_empty()
|| matches!(self.buffer.as_bytes()[0], b' ' | b'\t' | 0 | b'\n' | b'\r')
}
#[inline]
fn next_is_blank(&self) -> bool {
!self.buffer.is_empty() && matches!(self.buffer.as_bytes()[0], b' ' | b'\t')
}
#[inline]
fn next_is_break(&self) -> bool {
!self.buffer.is_empty() && matches!(self.buffer.as_bytes()[0], b'\n' | b'\r')
}
#[inline]
fn next_is_breakz(&self) -> bool {
self.buffer.is_empty() || matches!(self.buffer.as_bytes()[0], 0 | b'\n' | b'\r')
}
#[inline]
fn next_is_z(&self) -> bool {
self.buffer.is_empty() || self.buffer.as_bytes()[0] == 0
}
#[inline]
fn next_is_flow(&self) -> bool {
!self.buffer.is_empty()
&& matches!(self.buffer.as_bytes()[0], b',' | b'[' | b']' | b'{' | b'}')
}
#[inline]
fn next_is_digit(&self) -> bool {
!self.buffer.is_empty() && self.buffer.as_bytes()[0].is_ascii_digit()
}
#[inline]
fn next_is_alpha(&self) -> bool {
!self.buffer.is_empty()
&& matches!(self.buffer.as_bytes()[0], b'0'..=b'9' | b'a'..=b'z' | b'A'..=b'Z' | b'_' | b'-')
}
fn skip_while_non_breakz(&mut self) -> usize {
let mut byte_pos = 0;
let mut chars_consumed = 0;
for (i, c) in self.buffer.char_indices() {
if is_breakz(c) {
break;
}
byte_pos = i + c.len_utf8();
chars_consumed += 1;
}
self.buffer = &self.buffer[byte_pos..];
chars_consumed
}
#[inline]
fn skip_while_blank(&mut self) -> usize {
let bytes = self.buffer.as_bytes();
let mut i = 0;
while i < bytes.len() {
match bytes[i] {
b' ' | b'\t' => i += 1,
_ => break,
}
}
self.buffer = &self.buffer[i..];
i
}
fn fetch_while_is_alpha(&mut self, out: &mut String) -> usize {
let bytes = self.buffer.as_bytes();
let mut i = 0;
while i < bytes.len() {
match bytes[i] {
b'0'..=b'9' | b'a'..=b'z' | b'A'..=b'Z' | b'_' | b'-' => i += 1,
_ => break,
}
}
out.push_str(&self.buffer[..i]);
self.buffer = &self.buffer[i..];
i
}
fn fetch_while_is_yaml_non_space(&mut self, out: &mut String) -> usize {
let byte_pos = self
.buffer
.chars()
.take_while(|c| crate::char_traits::is_yaml_non_space(*c))
.map(char::len_utf8)
.sum();
out.push_str(&self.buffer[..byte_pos]);
self.buffer = &self.buffer[byte_pos..];
byte_pos
}
fn fetch_plain_scalar_chunk(
&mut self,
out: &mut String,
_count: usize,
flow_level_gt_0: bool,
) -> (bool, usize) {
let bytes = self.buffer.as_bytes();
let len = bytes.len();
let mut byte_pos = 0;
let mut chars_consumed = 0;
while byte_pos < len {
let b = bytes[byte_pos];
if b < 0x80 {
let c = b as char;
if crate::char_traits::is_blank_or_breakz(c) {
out.push_str(&self.buffer[..byte_pos]);
self.buffer = &self.buffer[byte_pos..];
return (true, chars_consumed);
}
if flow_level_gt_0 && crate::char_traits::is_flow(c) {
out.push_str(&self.buffer[..byte_pos]);
self.buffer = &self.buffer[byte_pos..];
return (true, chars_consumed);
}
if c == ':' {
let next_byte = if byte_pos + 1 < len {
bytes[byte_pos + 1]
} else {
0
};
let is_stop = if next_byte < 0x80 {
let nc = next_byte as char;
crate::char_traits::is_blank_or_breakz(nc)
|| (flow_level_gt_0 && crate::char_traits::is_flow(nc))
} else {
false
};
if is_stop {
out.push_str(&self.buffer[..byte_pos]);
self.buffer = &self.buffer[byte_pos..];
return (true, chars_consumed);
}
}
byte_pos += 1;
chars_consumed += 1;
} else {
let mut chars = self.buffer[byte_pos..].chars();
let c = chars.next().unwrap();
byte_pos += c.len_utf8();
chars_consumed += 1;
}
}
out.push_str(&self.buffer[..byte_pos]);
self.buffer = &self.buffer[byte_pos..];
(true, chars_consumed)
}
}
impl<'a> BorrowedInput<'a> for StrInput<'a> {
#[inline]
fn slice_borrowed(&self, start: usize, end: usize) -> Option<&'a str> {
debug_assert!(start <= end);
debug_assert!(end <= self.original.len());
self.original.get(start..end)
}
}
const BUFFER_LEN: usize = 128;
#[inline]
fn split_first_char(s: &str) -> Option<(char, &str)> {
let mut chars = s.chars();
let c = chars.next()?;
Some((c, chars.as_str()))
}
#[cfg(test)]
mod test {
use crate::input::Input;
use super::StrInput;
#[test]
pub fn is_document_start() {
let input = StrInput::new("---\n");
assert!(input.next_is_document_start());
assert!(input.next_is_document_indicator());
let input = StrInput::new("---");
assert!(input.next_is_document_start());
assert!(input.next_is_document_indicator());
let input = StrInput::new("...\n");
assert!(!input.next_is_document_start());
assert!(input.next_is_document_indicator());
let input = StrInput::new("--- ");
assert!(input.next_is_document_start());
assert!(input.next_is_document_indicator());
}
#[test]
pub fn is_document_end() {
let input = StrInput::new("...\n");
assert!(input.next_is_document_end());
assert!(input.next_is_document_indicator());
let input = StrInput::new("...");
assert!(input.next_is_document_end());
assert!(input.next_is_document_indicator());
let input = StrInput::new("---\n");
assert!(!input.next_is_document_end());
assert!(input.next_is_document_indicator());
let input = StrInput::new("... ");
assert!(input.next_is_document_end());
assert!(input.next_is_document_indicator());
}
}