use super::token::Token;
use std::iter::Peekable;
use std::str::Chars;
pub struct Lexer<'a> {
content: Peekable<Chars<'a>>,
current_char: Option<char>,
}
impl<'a> From<&'a str> for Lexer<'a> {
fn from(input: &'a str) -> Lexer {
Lexer {
content: input.chars().peekable(),
current_char: None,
}
}
}
impl<'a> Lexer<'a> {
fn read_until<T>(&mut self, end: T) -> String
where
T: Fn(char) -> bool,
{
let mut s = self.current_char.unwrap().to_string();
if let Some(next_char) = self.content.peek() {
if end(*next_char) {
return s;
}
}
while let Some(c) = self.content.next() {
s.push(c);
if let Some(next_char) = self.content.peek() {
if end(*next_char) {
break;
}
}
}
s
}
fn quoted_string(&mut self, quote: char) -> String {
let mut s = self.read_until(|c| c == quote);
s.remove(0);
self.next_char();
s
}
fn number(&mut self) -> String {
self.read_until(|c| {
(c.is_alphabetic() && c != '.' && c != '-') || c.is_whitespace() || c == ')'
})
}
fn unquoted_string(&mut self) -> String {
self.read_until(|c| c.is_whitespace() || c == ')' || c == '(')
}
fn next_char(&mut self) -> Option<char> {
self.current_char = self.content.next();
self.current_char
}
fn token_from_char(&mut self, c: char) -> Token {
match c {
'!' => match self.content.peek() {
Some('~') | Some('=') => {
let mut s = c.to_string();
s.push(self.next_char().unwrap());
Token::from(s.as_ref())
}
_ => Token::Invalid("! must be followed by = or ~".to_string()),
},
'^' => match self.content.peek() {
Some('=') | Some('^') => {
let mut s = c.to_string();
s.push(self.next_char().unwrap());
Token::from(s.as_ref())
}
_ => Token::from(c),
},
'>' | '<' => if let Some('=') = self.content.peek() {
let mut s = c.to_string();
s.push(self.next_char().unwrap());
Token::from(s.as_ref())
} else {
Token::from(c)
},
'"' => Token::from(self.quoted_string('"')),
'\'' => Token::from(self.quoted_string('\'')),
'-' => {
self.next_char();
Token::Str(self.unquoted_string())
}
'a'..='z' | 'A'..='Z' => Token::from(self.unquoted_string()),
_num if c.is_digit(10) => Token::from(self.number()),
_ => Token::from(c),
}
}
}
impl<'a> Iterator for Lexer<'a> {
type Item = Token;
fn next(&mut self) -> Option<Token> {
while let Some(c) = self.next_char() {
if c.is_whitespace() {
continue;
}
return Some(self.token_from_char(c));
}
None
}
}
#[cfg(test)]
pub mod tests {
use super::*;
macro_rules! lexer_test {
($name:ident, $query:expr, $tokens:expr) => {
#[test]
fn $name() {
let tokens: Vec<Token> = Lexer::from($query).collect();
let expected: Vec<Token> = $tokens;
for (i, _) in tokens.iter().enumerate() {
assert_eq!(tokens[i], expected[i]);
}
assert_eq!(tokens.len(), expected.len());
}
};
}
lexer_test!(
simple_lex,
"milk and cookies",
vec![
Token::from("milk"),
Token::from("and"),
Token::from("cookies"),
]
);
lexer_test!(
boolean_lex,
"completed = false",
vec![
Token::from("completed"),
Token::from("="),
Token::from("false"),
]
);
lexer_test!(
like_comparison,
"title ~ groceries",
vec![
Token::from("title"),
Token::from("~"),
Token::from("groceries"),
]
);
lexer_test!(
shell_friendly_like_comparison,
"title ^ groceries",
vec![
Token::from("title"),
Token::from("^"),
Token::from("groceries"),
]
);
lexer_test!(
shell_friendly_not_like_comparison,
"title ^^ groceries",
vec![
Token::from("title"),
Token::from("^^"),
Token::from("groceries"),
]
);
lexer_test!(
shell_friendly_not_equal_comparison,
"title ^= groceries",
vec![
Token::from("title"),
Token::from("^="),
Token::from("groceries"),
]
);
lexer_test!(
not_like_comparison,
"title !~ groceries",
vec![
Token::from("title"),
Token::from("!~"),
Token::from("groceries"),
]
);
lexer_test!(
not_equal_comparison,
"title != groceries",
vec![
Token::from("title"),
Token::from("!="),
Token::from("groceries"),
]
);
lexer_test!(
single_grouped_expression,
"(priority > 0)",
vec![
Token::from("("),
Token::from("priority"),
Token::from(">"),
Token::from("0"),
Token::from(")"),
]
);
lexer_test!(
keyword_excaped_lex,
"milk -and cookies",
vec![
Token::from("milk"),
Token::Str("and".to_string()),
Token::from("cookies"),
]
);
lexer_test!(complicated_lex, "(priority > 5 and title ^ \"take out the trash\") or (context = \"work\" and (priority >= 2 or (\"my little pony\")))", vec![
Token::from("("),
Token::from("priority"),
Token::from(">"),
Token::from("5"),
Token::from("and"),
Token::from("title"),
Token::from("^"),
Token::from("take out the trash"),
Token::from(")"),
Token::from("or"),
Token::from("("),
Token::from("context"),
Token::from("="),
Token::from("work"),
Token::from("and"),
Token::from("("),
Token::from("priority"),
Token::from(">="),
Token::from("2"),
Token::from("or"),
Token::from("("),
Token::from("my little pony"),
Token::from(")"),
Token::from(")"),
Token::from(")"),
]);
}