const TOKEN_DOT: char = '.';
const TOKEN_RECURSIVE: char = '~';
const TOKEN_ARRAY_OPEN: char = '[';
const TOKEN_ARRAY_CLOSE: char = ']';
const TOKEN_FIRST_OPEN: char = '{';
const TOKEN_FIRST_CLOSE: char = '}';
const TOKEN_FIRST_SEP: char = ',';
const TOKEN_MULTI_OPEN: char = '(';
const TOKEN_MULTI_CLOSE: char = ')';
const TOKEN_MULTI_SEP: char = '|';
const TOKEN_WILDCARD: char = '*';
const TOKEN_REGEX: char = '/';
const TOKEN_ESCAPE: char = '\\';
const TOKEN_STRING_WRAP: char = '"';
pub struct Lexer<'a> {
path: &'a str,
head: usize,
escape_token: Option<char>,
}
impl<'a> From<&'a str> for Lexer<'a> {
fn from(path: &'a str) -> Lexer<'a> {
Lexer {
path,
head: 0,
escape_token: None,
}
}
}
impl<'a> Lexer<'a> {
pub fn token(&mut self) -> Option<&'a str> {
let (tok, next_index) = self.full_next()?;
self.head = next_index;
Some(tok)
}
fn full_next(&mut self) -> Option<(&'a str, usize)> {
if self.head >= self.path.len() {
return None;
}
let (tok, next_index) = self.next(self.head)?;
match tok {
_ => {}
}
Some((tok, next_index))
}
fn next(&mut self, head: usize) -> Option<(&'a str, usize)> {
let c = self.path[head..].chars();
let mut tok: Option<&str> = None;
let mut next_index = head;
let mut escape_next = false;
let escape_all = self.escape_token.is_some();
'charloop: for char in c {
match char {
TOKEN_DOT | TOKEN_WILDCARD | TOKEN_RECURSIVE | TOKEN_ARRAY_OPEN
| TOKEN_ARRAY_CLOSE | TOKEN_FIRST_OPEN | TOKEN_FIRST_CLOSE | TOKEN_MULTI_OPEN
| TOKEN_MULTI_CLOSE | TOKEN_FIRST_SEP | TOKEN_MULTI_SEP => {
if escape_next || escape_all {
next_index += char.len_utf8();
tok = Some(&self.path[head..next_index]);
escape_next = false;
continue 'charloop;
}
if let Some(_) = tok {
break 'charloop;
}
next_index += char.len_utf8();
tok = Some(&self.path[head..next_index]);
break 'charloop;
}
TOKEN_ESCAPE => {
tok = Some(&self.path[head..next_index]);
next_index += char.len_utf8();
escape_next = true;
}
TOKEN_STRING_WRAP | TOKEN_REGEX => {
if escape_next {
escape_next = false;
next_index += char.len_utf8();
tok = Some(&self.path[head..next_index]);
continue;
}
if let Some(t) = self.escape_token {
if t != char {
next_index += char.len_utf8();
tok = Some(&self.path[head..next_index]);
continue;
}
}
if let None = tok {
next_index += char.len_utf8();
tok = Some(&self.path[head..next_index]);
match self.escape_token {
None => self.escape_token = Some(char),
Some(_) => self.escape_token = None,
}
break 'charloop;
}
if let Some(_) = self.escape_token {
tok = Some(&self.path[head..next_index]);
break 'charloop;
}
if let Some(_) = tok {
break 'charloop;
}
}
_ => {
escape_next = false;
next_index += char.len_utf8();
tok = Some(&self.path[head..next_index]);
}
}
}
if let Some(tok) = tok {
Some((tok, next_index))
} else {
None
}
}
}
#[cfg(test)]
mod test {
use super::*;
macro_rules! test_lexor {
($path:expr, $($args:tt),*) => {{
let mut p = Lexer::from($path);
let mut toks = vec![];
let expected: Vec<&str> = vec![$($args),*];
let mut tok = p.token();
while let Some(t) = tok {
toks.push(t);
tok = p.token();
}
assert_eq!(toks, expected)
}};
}
#[test]
fn test_lexor() {
test_lexor!("Im.am a.fish", "Im", ".", "am a", ".", "fish");
test_lexor!(" ", " ");
test_lexor!(
"labels.{hostname|host}",
"labels",
".",
"{",
"hostname",
"|",
"host",
"}"
);
test_lexor!("labels\\.hostname", "labels\\.hostname");
test_lexor!(r#""labels.hostname""#, "\"", "labels.hostname", "\"");
test_lexor!(r#""some\"thing""#, "\"", "some\\\"thing", "\"");
test_lexor!(
r#""one""two""three""four""#,
"\"",
"one",
"\"",
"\"",
"two",
"\"",
"\"",
"three",
"\"",
"\"",
"four",
"\""
);
test_lexor!(
"€€€.€€€.€.asdf.asdf",
"€€€",
".",
"€€€",
".",
"€",
".",
"asdf",
".",
"asdf"
);
test_lexor!("/.*/.something", "/", ".*", "/", ".", "something");
test_lexor!("/asd\"asdf/", "/", "asd\"asdf", "/");
test_lexor!(r#"\"a.b.c\""#, "\\\"a", ".", "b", ".", "c\\\"");
test_lexor!(r#"Im.am a.fish"#, "Im", ".", "am a", ".", "fish");
}
}