const TOKEN_KEY: char = '"';
const TOKEN_STRING_LITERAL: char = '\'';
const TOKEN_ESCAPE: char = '\\';
const TOKEN_EQUAL: char = '=';
const TOKEN_NEGATE: char = '!';
const TOKEN_GREATER: char = '>';
const TOKEN_LESS: char = '<';
const TOKEN_PARENTHESIS_OPEN: char = '(';
const TOKEN_PARENTHESIS_CLOSE: char = ')';
const TOKEN_CURLEY_OPEN: char = '{';
const TOKEN_CURLEY_CLOSE: char = '}';
const TOKEN_COMMA: char = ',';
const TOKEN_COLON: char = ':';
const TOKEN_SQUARE_OPEN: char = '[';
const TOKEN_SQUARE_CLOSE: char = ']';
const TOKEN_PLUS: char = '+';
const TOKEN_MINUS: char = '-';
const TOKEN_DIVIDE: char = '/';
const TOKEN_MULTIPLY: char = '*';
const TOKEN_MODULUS: char = '%';
pub struct Lexer<'a> {
path: &'a str,
head: usize,
escape_token: Option<char>,
}
impl<'a> From<&'a str> for Lexer<'a> {
fn from(path: &'a str) -> Lexer<'a> {
Lexer {
path,
head: 0,
escape_token: None,
}
}
}
impl<'a> Lexer<'a> {
pub fn consumed(&self) -> &'a str {
&self.path[..self.head]
}
pub fn future(&self) -> &'a str {
&self.path[self.head..]
}
pub fn token(&mut self) -> Option<&'a str> {
let (tok, next_index) = self.full_next()?;
self.head = next_index;
Some(tok)
}
pub fn peak(&mut self) -> Option<&'a str> {
let escaped = self.escape_token;
let (tok, _) = self.full_next()?;
self.escape_token = escaped;
Some(tok)
}
fn full_next(&mut self) -> Option<(&'a str, usize)> {
if self.head >= self.path.len() {
return None;
}
self.consume_whitespace();
let (tok, next_index) = self.next(self.head)?;
match tok {
"=" | ">" | "<" | "!" => {
if let Some((tok, next_index)) = self.next(next_index) {
if tok == "=" {
return Some((&self.path[self.head..next_index], next_index));
}
}
}
_ => {}
}
Some((tok, next_index))
}
fn consume_whitespace(&mut self) {
if self.escape_token.is_some() {
return;
}
let c = self.path[self.head..].chars();
for char in c {
if !char.is_whitespace() {
break;
}
self.head += char.len_utf8();
}
}
fn next(&mut self, head: usize) -> Option<(&'a str, usize)> {
let c = self.path[head..].chars();
let mut tok: Option<&str> = None;
let mut next_index = head;
let mut escape_next = false;
let escape_all = match self.escape_token {
None => false,
Some(_) => true,
};
'charloop: for char in c {
if !escape_all && char.is_whitespace() {
break 'charloop;
}
match char {
TOKEN_EQUAL
| TOKEN_LESS
| TOKEN_NEGATE
| TOKEN_GREATER
| TOKEN_PARENTHESIS_OPEN
| TOKEN_PARENTHESIS_CLOSE
| TOKEN_CURLEY_OPEN
| TOKEN_CURLEY_CLOSE
| TOKEN_COMMA
| TOKEN_COLON
| TOKEN_SQUARE_OPEN
| TOKEN_SQUARE_CLOSE
| TOKEN_PLUS
| TOKEN_MINUS
| TOKEN_DIVIDE
| TOKEN_MULTIPLY
| TOKEN_MODULUS => {
if escape_next || escape_all {
next_index += char.len_utf8();
tok = Some(&self.path[head..next_index]);
escape_next = false;
continue 'charloop;
}
if let Some(_) = tok {
break 'charloop;
}
next_index += char.len_utf8();
tok = Some(&self.path[head..next_index]);
break 'charloop;
}
TOKEN_ESCAPE => {
tok = Some(&self.path[head..next_index]);
next_index += char.len_utf8();
escape_next = true;
}
TOKEN_KEY | TOKEN_STRING_LITERAL => {
if escape_next {
escape_next = false;
next_index += char.len_utf8();
tok = Some(&self.path[head..next_index]);
continue;
}
if let Some(t) = self.escape_token {
if t != char {
next_index += char.len_utf8();
tok = Some(&self.path[head..next_index]);
continue;
}
}
if let None = tok {
next_index += char.len_utf8();
tok = Some(&self.path[head..next_index]);
match self.escape_token {
None => self.escape_token = Some(char),
Some(_) => self.escape_token = None,
}
break 'charloop;
}
if let Some(_) = self.escape_token {
tok = Some(&self.path[head..next_index]);
break 'charloop;
}
if let Some(_) = tok {
break 'charloop;
}
}
_ => {
escape_next = false;
next_index += char.len_utf8();
tok = Some(&self.path[head..next_index]);
}
}
}
if let Some(tok) = tok {
Some((tok, next_index))
} else {
None
}
}
}
#[cfg(test)]
mod test {
use super::*;
macro_rules! test_consumed {
($path:expr, $num:expr, $consumed:expr) => {{
let mut p = Lexer::from($path);
let mut x = 0;
loop {
if x == $num {
break;
}
_ = p.token();
x += 1;
}
assert_eq!(p.consumed(), $consumed)
}};
}
macro_rules! test_lexor {
($path:expr, $($args:tt),*) => {{
let mut p = Lexer::from($path);
let mut toks = vec![];
let expected: Vec<&str> = vec![$($args),*];
let _ = p.peak();
let mut tok = p.token();
while let Some(t) = tok {
toks.push(t);
tok = p.token();
}
assert_eq!(toks, expected)
}};
}
#[test]
fn test_lexor() {
test_lexor!(
"SELECT * from something where 'name' = \"happy\"",
"SELECT",
"*",
"from",
"something",
"where",
"'",
"name",
"'",
"=",
"\"",
"happy",
"\""
);
test_lexor!(
"SELECT '~.\"key name\"' FROM 'namespace' WHERE '*[0].name'==5 AND 'chiken'!='egg'",
"SELECT",
"'",
"~.\"key name\"",
"'",
"FROM",
"'",
"namespace",
"'",
"WHERE",
"'",
"*[0].name",
"'",
"==",
"5",
"AND",
"'",
"chiken",
"'",
"!=",
"'",
"egg",
"'"
);
test_lexor!(
r#"a
b
c"#,
"a",
"b",
"c"
);
test_lexor!("()", "(", ")");
test_lexor!(r#" "a" == "b" "#, "\"", "a", "\"", "==", "\"", "b", "\"");
test_lexor!(
r#"length("\"a.b.c\"")"#,
"length",
"(",
"\"",
r#"\"a.b.c\""#,
"\"",
")"
);
}
#[test]
fn test_consumed() {
test_consumed!("SELECT * FROM chicken", 2, "SELECT *");
}
}