use core::fmt;
use crate::{Options, error::Error};
#[derive(Clone, Copy, Debug, PartialEq)]
pub(crate) enum TokenKind {
Start,
End,
Comma,
Ellipses,
Whitespace,
Text,
}
#[derive(Copy, Clone, Debug, PartialEq)]
pub(crate) struct EscapeStr<'a> {
interpret_backslashes: bool,
raw: &'a str,
}
impl<'a> EscapeStr<'a> {
pub(crate) fn new(raw: &'a str, options: &Options) -> Self {
Self {
interpret_backslashes: options.interpret_backslashes,
raw,
}
}
pub(crate) fn raw(&self) -> &'a str {
self.raw
}
pub(crate) fn chars(&self) -> impl Iterator<Item = char> {
let mut escaped = false;
self.raw.chars().filter(move |c| {
if escaped {
escaped = false;
true
} else if self.interpret_backslashes && *c == '\\' {
escaped = true;
false
} else {
true
}
})
}
pub(crate) fn len(&self) -> usize {
self.chars().count()
}
}
impl<'a> fmt::Display for EscapeStr<'a> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", String::from(*self))
}
}
impl<'a> From<EscapeStr<'a>> for String {
fn from(value: EscapeStr<'a>) -> Self {
value.chars().collect()
}
}
#[derive(Debug)]
struct PartialToken {
kind: TokenKind,
pos: usize,
}
#[derive(Debug, Clone)]
pub(crate) struct Token<'a> {
pub kind: TokenKind,
pub pos: usize,
pub span: EscapeStr<'a>,
}
impl PartialToken {
fn with_span<'a>(self, span: &'a str, end: usize, options: &Options) -> Token<'a> {
debug_assert!(self.pos < end);
Token {
kind: self.kind,
pos: self.pos,
span: EscapeStr::new(&span[self.pos..end], options),
}
}
}
pub(crate) fn tokenize<'a>(input: &'a str, options: &Options) -> Result<Vec<Token<'a>>, Error> {
let mut tokens = Vec::<Token>::new();
let mut token: Option<PartialToken> = None;
let mut brace_stack: u32 = 0;
let mut bytes = input.bytes().enumerate().peekable();
while let Some((pos, c)) = bytes.next() {
match c {
b'{' => {
if let Some(token) = token.take() {
tokens.push(token.with_span(input, pos, options));
}
brace_stack += 1;
tokens.push(Token {
kind: TokenKind::Start,
pos,
span: EscapeStr::new(&input[pos..pos + 1], options),
});
}
b'}' => {
if let Some(token) = token.take() {
tokens.push(token.with_span(input, pos, options));
}
brace_stack = brace_stack.saturating_sub(1);
tokens.push(Token {
kind: TokenKind::End,
pos,
span: EscapeStr::new(&input[pos..pos + 1], options),
});
}
b',' if brace_stack > 0 => {
if let Some(token) = token.take() {
tokens.push(token.with_span(input, pos, options));
}
tokens.push(Token {
kind: TokenKind::Comma,
pos,
span: EscapeStr::new(&input[pos..pos + 1], options),
});
}
b'.' if brace_stack > 0
&& let Some((_, b'.')) = bytes.peek() =>
{
bytes.next();
if let Some(token) = token.take() {
tokens.push(token.with_span(input, pos, options));
}
tokens.push(Token {
kind: TokenKind::Ellipses,
pos,
span: EscapeStr::new(&input[pos..pos + 2], options),
});
}
b'\n' | b'\t' | b' ' | b'\r' => {
if let Some(token) = token.take_if(|token| token.kind != TokenKind::Whitespace) {
tokens.push(token.with_span(input, pos, options));
}
if token.is_none() {
token = Some(PartialToken {
kind: TokenKind::Whitespace,
pos,
});
}
}
_ => {
if options.interpret_backslashes && c == b'\\' && bytes.next().is_none() {
return Err(Error::IncompleteEscape);
}
if let Some(token) = token.take_if(|token| token.kind != TokenKind::Text) {
tokens.push(token.with_span(input, pos, options));
}
if token.is_none() {
token = Some(PartialToken {
kind: TokenKind::Text,
pos,
});
}
}
}
}
if let Some(token) = token.take() {
tokens.push(token.with_span(input, input.len(), options));
}
Ok(tokens)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn escape_backslashes() {
let mut options = Options::new();
options.interpret_backslashes = true;
assert_eq!(EscapeStr::new("he\\llo", &options).to_string(), "hello");
assert_eq!(EscapeStr::new("he\\\\llo", &options).to_string(), "he\\llo");
options.interpret_backslashes = false;
assert_eq!(EscapeStr::new("he\\llo", &options).to_string(), "he\\llo");
assert_eq!(
EscapeStr::new("he\\\\llo", &options).to_string(),
"he\\\\llo"
);
}
}