use std::iter::Peekable;
use std::str::CharIndices;
pub(crate) struct Lexer<'a> {
input: &'a str,
chars: Peekable<CharIndices<'a>>,
}
pub(crate) enum Token<'a> {
Word(&'a str),
Whitespace(&'a str),
SingleQuote,
DoubleQuote,
Escape(&'a str),
}
impl<'a> Iterator for Lexer<'a> {
type Item = (usize, Token<'a>);
#[coverage(off)]
fn next(&mut self) -> Option<Self::Item> {
match self.chars.next() {
Some((idx, chr)) => match chr {
'\'' => Some((idx, Token::SingleQuote)),
'"' => Some((idx, Token::DoubleQuote)),
'\\' => match self.chars.next() {
Some((cont, _)) => Some((idx, Token::Escape(&self.input[idx..cont + 1]))),
None => panic!(),
},
c if c.is_whitespace() => {
let mut end = idx;
loop {
match self.chars.peek() {
Some((cont, c)) if c.is_whitespace() => end = *cont,
_ => break,
}
self.chars.next();
}
Some((idx, Token::Whitespace(&self.input[idx..end + 1])))
}
_ => {
let mut end = idx;
loop {
match self.chars.peek() {
Some((cont, c)) if is_word_character(*c) => end = *cont,
_ => break,
}
self.chars.next();
}
Some((idx, Token::Word(&self.input[idx..end + 1])))
}
},
None => None,
}
}
}
#[coverage(off)]
fn is_word_character(c: char) -> bool {
c != '\'' && c != '"' && c != '\\' && !c.is_whitespace()
}
#[coverage(off)]
pub fn split_string_by_whitespace(input: &str) -> Vec<&str> {
let mut lexer = Lexer {
input,
chars: input.char_indices().peekable(),
};
let mut result = vec![];
while let Some((idx, token)) = lexer.next() {
match token {
Token::Whitespace(_) => continue,
Token::Word(_) | Token::Escape(_) => loop {
match lexer.next() {
Some((cont, Token::Whitespace(_))) => {
result.push(&input[idx..cont]);
break;
}
Some((_, Token::Word(_) | Token::Escape(_))) => continue,
Some((_, Token::SingleQuote | Token::DoubleQuote)) => {
panic!()
}
None => {
result.push(&input[idx..]);
break;
}
}
},
Token::SingleQuote | Token::DoubleQuote => loop {
match lexer.next() {
Some((cont, quote))
if matches!(
("e, &token),
(Token::SingleQuote, Token::SingleQuote) | (Token::DoubleQuote, Token::DoubleQuote)
) =>
{
result.push(&input[idx + 1..cont]);
break;
}
Some((_, _)) => continue,
None => panic!(),
}
},
}
}
result
}
#[cfg(test)]
mod tests {
use super::split_string_by_whitespace;
#[test]
#[coverage(off)]
fn test1() {
let s = "hello world";
println!("{:?}", split_string_by_whitespace(s));
let s = "hello 'world bye'";
println!("{:?}", split_string_by_whitespace(s));
let s = "hello \\'world bye";
println!("{:?}", split_string_by_whitespace(s));
let s = "hello \"world \\\"bye\"";
println!("{:?}", split_string_by_whitespace(s));
let s = "\"hello \" world \\\"bye \"\"";
println!("{:?}", split_string_by_whitespace(s));
}
}