use crate::{Position, Result};
use std::borrow::Cow;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ZeroString<'a> {
data: Cow<'a, str>,
}
impl<'a> ZeroString<'a> {
pub fn borrowed(s: &'a str) -> Self {
Self {
data: Cow::Borrowed(s),
}
}
pub fn owned(s: String) -> Self {
Self {
data: Cow::Owned(s),
}
}
pub fn as_str(&self) -> &str {
&self.data
}
pub fn into_owned(self) -> String {
self.data.into_owned()
}
pub fn is_borrowed(&self) -> bool {
matches!(self.data, Cow::Borrowed(_))
}
pub fn len(&self) -> usize {
self.data.len()
}
pub fn is_empty(&self) -> bool {
self.data.is_empty()
}
}
impl<'a> From<&'a str> for ZeroString<'a> {
fn from(s: &'a str) -> Self {
Self::borrowed(s)
}
}
impl<'a> From<String> for ZeroString<'a> {
fn from(s: String) -> Self {
Self::owned(s)
}
}
impl<'a> AsRef<str> for ZeroString<'a> {
fn as_ref(&self) -> &str {
&self.data
}
}
#[derive(Debug, Clone, PartialEq)]
pub enum ZeroTokenType<'a> {
StreamStart,
StreamEnd,
DocumentStart,
DocumentEnd,
BlockSequenceStart,
BlockMappingStart,
BlockEnd,
FlowSequenceStart,
FlowSequenceEnd,
FlowMappingStart,
FlowMappingEnd,
BlockEntry,
FlowEntry,
Key,
Value,
Scalar(ZeroString<'a>, crate::scanner::QuoteStyle),
BlockScalarLiteral(ZeroString<'a>),
BlockScalarFolded(ZeroString<'a>),
Anchor(ZeroString<'a>),
Alias(ZeroString<'a>),
Tag(ZeroString<'a>),
Comment(ZeroString<'a>),
}
#[derive(Debug, Clone, PartialEq)]
pub struct ZeroToken<'a> {
pub token_type: ZeroTokenType<'a>,
pub start_position: Position,
pub end_position: Position,
}
impl<'a> ZeroToken<'a> {
pub fn new(
token_type: ZeroTokenType<'a>,
start_position: Position,
end_position: Position,
) -> Self {
Self {
token_type,
start_position,
end_position,
}
}
pub fn simple(token_type: ZeroTokenType<'a>, position: Position) -> Self {
Self::new(token_type, position, position)
}
pub fn into_owned(self) -> crate::scanner::Token {
use crate::scanner::{Token, TokenType};
let token_type = match self.token_type {
ZeroTokenType::StreamStart => TokenType::StreamStart,
ZeroTokenType::StreamEnd => TokenType::StreamEnd,
ZeroTokenType::DocumentStart => TokenType::DocumentStart,
ZeroTokenType::DocumentEnd => TokenType::DocumentEnd,
ZeroTokenType::BlockSequenceStart => TokenType::BlockSequenceStart,
ZeroTokenType::BlockMappingStart => TokenType::BlockMappingStart,
ZeroTokenType::BlockEnd => TokenType::BlockEnd,
ZeroTokenType::FlowSequenceStart => TokenType::FlowSequenceStart,
ZeroTokenType::FlowSequenceEnd => TokenType::FlowSequenceEnd,
ZeroTokenType::FlowMappingStart => TokenType::FlowMappingStart,
ZeroTokenType::FlowMappingEnd => TokenType::FlowMappingEnd,
ZeroTokenType::BlockEntry => TokenType::BlockEntry,
ZeroTokenType::FlowEntry => TokenType::FlowEntry,
ZeroTokenType::Key => TokenType::Key,
ZeroTokenType::Value => TokenType::Value,
ZeroTokenType::Scalar(s, style) => TokenType::Scalar(s.into_owned(), style),
ZeroTokenType::BlockScalarLiteral(s) => TokenType::BlockScalarLiteral(s.into_owned()),
ZeroTokenType::BlockScalarFolded(s) => TokenType::BlockScalarFolded(s.into_owned()),
ZeroTokenType::Anchor(s) => TokenType::Anchor(s.into_owned()),
ZeroTokenType::Alias(s) => TokenType::Alias(s.into_owned()),
ZeroTokenType::Tag(s) => TokenType::Tag(s.into_owned()),
ZeroTokenType::Comment(s) => TokenType::Comment(s.into_owned()),
};
Token::new(token_type, self.start_position, self.end_position)
}
}
pub struct TokenPool<'a> {
tokens: Vec<ZeroToken<'a>>,
index: usize,
}
impl<'a> TokenPool<'a> {
pub fn with_capacity(capacity: usize) -> Self {
Self {
tokens: Vec::with_capacity(capacity),
index: 0,
}
}
pub fn get_token(&mut self) -> &mut ZeroToken<'a> {
if self.index >= self.tokens.len() {
self.tokens.push(ZeroToken::simple(
ZeroTokenType::StreamStart,
Position::start(),
));
}
let token = &mut self.tokens[self.index];
self.index += 1;
token
}
pub fn reset(&mut self) {
self.index = 0;
}
pub fn allocated_count(&self) -> usize {
self.tokens.len()
}
pub fn used_count(&self) -> usize {
self.index
}
}
pub struct ZeroScanner<'a> {
input: &'a str,
pub position: Position,
char_index: usize,
char_indices: Vec<(usize, char)>,
token_pool: TokenPool<'a>,
}
impl<'a> ZeroScanner<'a> {
pub fn new(input: &'a str) -> Self {
let char_indices: Vec<(usize, char)> = input.char_indices().collect();
Self {
input,
position: Position::start(),
char_index: 0,
char_indices,
token_pool: TokenPool::with_capacity(128), }
}
pub fn current_char(&self) -> Option<char> {
self.char_indices.get(self.char_index).map(|(_, ch)| *ch)
}
pub fn advance(&mut self) -> Option<char> {
if let Some((_byte_index, ch)) = self.char_indices.get(self.char_index) {
self.position = self.position.advance(*ch);
self.char_index += 1;
self.char_indices.get(self.char_index).map(|(_, ch)| *ch)
} else {
None
}
}
pub fn peek_char(&self, offset: isize) -> Option<char> {
if offset >= 0 {
let index = self.char_index + offset as usize;
self.char_indices.get(index).map(|(_, ch)| *ch)
} else {
let offset_abs = (-offset) as usize;
if self.char_index >= offset_abs {
let index = self.char_index - offset_abs;
self.char_indices.get(index).map(|(_, ch)| *ch)
} else {
None
}
}
}
pub fn slice_from(&self, start_position: Position) -> Result<&'a str> {
let start_byte = start_position.index;
let end_byte = self.position.index;
if start_byte <= end_byte && end_byte <= self.input.len() {
Ok(&self.input[start_byte..end_byte])
} else {
Err(crate::Error::parse(
self.position,
"Invalid slice bounds".to_string(),
))
}
}
pub fn slice_between(&self, start: Position, end: Position) -> Result<&'a str> {
let start_byte = start.index;
let end_byte = end.index;
if start_byte <= end_byte && end_byte <= self.input.len() {
Ok(&self.input[start_byte..end_byte])
} else {
Err(crate::Error::parse(
self.position,
"Invalid slice bounds".to_string(),
))
}
}
pub fn reset(&mut self) {
self.position = Position::start();
self.char_index = 0;
self.token_pool.reset();
}
pub fn stats(&self) -> ScannerStats {
ScannerStats {
input_length: self.input.len(),
chars_processed: self.char_index,
tokens_allocated: self.token_pool.allocated_count(),
tokens_used: self.token_pool.used_count(),
position: self.position,
}
}
pub fn scan_plain_scalar_zero_copy(&mut self) -> Result<ZeroToken<'a>> {
let start_pos = self.position;
while let Some(ch) = self.current_char() {
match ch {
'\n' | '\r' => break,
':' if self.peek_char(1).map_or(true, |c| c.is_whitespace()) => break,
'#' if self.char_index == 0
|| self.peek_char(-1).map_or(false, |c| c.is_whitespace()) =>
{
break;
}
',' | '[' | ']' | '{' | '}' => break,
_ => {
self.advance();
}
}
}
let slice = self.slice_from(start_pos)?;
let trimmed_slice = slice.trim_end();
let zero_string = if trimmed_slice.len() == slice.len() {
ZeroString::borrowed(trimmed_slice)
} else {
ZeroString::owned(trimmed_slice.to_string())
};
Ok(ZeroToken::new(
ZeroTokenType::Scalar(zero_string, crate::scanner::QuoteStyle::Plain),
start_pos,
self.position,
))
}
pub fn scan_identifier_zero_copy(&mut self) -> Result<ZeroString<'a>> {
let start_pos = self.position;
while let Some(ch) = self.current_char() {
if ch.is_alphanumeric() || ch == '_' || ch == '-' {
self.advance();
} else {
break;
}
}
let slice = self.slice_from(start_pos)?;
Ok(ZeroString::borrowed(slice))
}
pub fn skip_whitespace(&mut self) {
while let Some(ch) = self.current_char() {
if ch == ' ' || ch == '\t' {
self.advance();
} else {
break;
}
}
}
}
#[derive(Debug, Clone)]
pub struct ScannerStats {
pub input_length: usize,
pub chars_processed: usize,
pub tokens_allocated: usize,
pub tokens_used: usize,
pub position: Position,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_zero_string_borrowed() {
let s = "hello world";
let zs = ZeroString::borrowed(s);
assert!(zs.is_borrowed());
assert_eq!(zs.as_str(), "hello world");
assert_eq!(zs.len(), 11);
assert!(!zs.is_empty());
}
#[test]
fn test_zero_string_owned() {
let s = String::from("hello world");
let zs = ZeroString::owned(s);
assert!(!zs.is_borrowed());
assert_eq!(zs.as_str(), "hello world");
assert_eq!(zs.len(), 11);
}
#[test]
fn test_zero_scanner_basic() {
let input = "hello: world";
let mut scanner = ZeroScanner::new(input);
assert_eq!(scanner.current_char(), Some('h'));
assert_eq!(scanner.advance(), Some('e'));
assert_eq!(scanner.current_char(), Some('e'));
assert_eq!(scanner.peek_char(1), Some('l'));
assert_eq!(scanner.peek_char(-1), Some('h'));
}
#[test]
fn test_zero_scanner_slicing() {
let input = "hello: world";
let mut scanner = ZeroScanner::new(input);
let start = scanner.position;
for _ in 0..5 {
scanner.advance();
}
let slice = scanner.slice_from(start).unwrap();
assert_eq!(slice, "hello");
}
#[test]
fn test_token_pool() {
let mut pool = TokenPool::with_capacity(2);
assert_eq!(pool.allocated_count(), 0);
assert_eq!(pool.used_count(), 0);
let _token1 = pool.get_token();
assert_eq!(pool.allocated_count(), 1);
assert_eq!(pool.used_count(), 1);
let _token2 = pool.get_token();
assert_eq!(pool.allocated_count(), 2);
assert_eq!(pool.used_count(), 2);
pool.reset();
assert_eq!(pool.allocated_count(), 2); assert_eq!(pool.used_count(), 0); }
#[test]
fn test_zero_copy_scalar_scanning() {
let input = "hello world: test";
let mut scanner = ZeroScanner::new(input);
let token = scanner.scan_plain_scalar_zero_copy().unwrap();
if let ZeroTokenType::Scalar(value, _) = token.token_type {
assert_eq!(value.as_str(), "hello world");
assert!(value.is_borrowed()); } else {
panic!("Expected scalar token");
}
}
#[test]
fn test_zero_copy_identifier_scanning() {
let input = "my_anchor_123 ";
let mut scanner = ZeroScanner::new(input);
let identifier = scanner.scan_identifier_zero_copy().unwrap();
assert_eq!(identifier.as_str(), "my_anchor_123");
assert!(identifier.is_borrowed()); }
#[test]
fn test_zero_copy_trimming() {
let input = "hello \n";
let mut scanner = ZeroScanner::new(input);
let token = scanner.scan_plain_scalar_zero_copy().unwrap();
if let ZeroTokenType::Scalar(value, _) = token.token_type {
assert_eq!(value.as_str(), "hello");
assert!(!value.is_borrowed());
} else {
panic!("Expected scalar token");
}
}
}