use crate::vecpointer::VecPointer;
#[derive(Debug)]
#[derive(PartialEq)]
pub enum Symbol {
Slash,
DoubleSlash,
OpenSquareBracket,
CloseSquareBracket,
OpenBracket,
CloseBracket,
Wildcard,
Dot,
DoubleDot,
AssignmentSign,
AtSign,
MinusSign,
AddSign,
GreaterThanSign,
LessThanSign,
Identifier(String),
Text(String),
Number(f32),
}
pub fn lex(text: &str) -> Result<Vec<Symbol>, &'static str> {
let mut symbols: Vec<Symbol> = Vec::new();
let chars = text.chars().collect();
let mut pointer = VecPointer::new(chars);
while let Some(c) = pointer.current() {
if let Some(s) = is_double_slash(&mut pointer) {
symbols.push(s);
} else if let Some(s) = is_slash(&mut pointer) {
symbols.push(s);
} else if let Some(s) = is_open_bracket(&mut pointer) {
symbols.push(s);
} else if let Some(s) = is_close_bracket(&mut pointer) {
symbols.push(s);
} else if let Some(s) = is_open_square_bracket(&mut pointer) {
symbols.push(s);
} else if let Some(s) = is_close_square_bracket(&mut pointer) {
symbols.push(s);
} else if let Some(s) = is_number(&mut pointer) {
symbols.push(s);
} else if let Some(s) = is_wildcard(&mut pointer) {
symbols.push(s);
} else if let Some(s) = is_double_dot(&mut pointer) {
symbols.push(s);
} else if let Some(s) = is_dot(&mut pointer) {
symbols.push(s);
} else if let Some(s) = is_assignment_sign(&mut pointer) {
symbols.push(s);
} else if let Some(s) = is_at_sign(&mut pointer) {
symbols.push(s);
} else if let Some(s) = is_add_sign(&mut pointer) {
symbols.push(s);
} else if let Some(s) = is_minus_sign(&mut pointer) {
symbols.push(s);
} else if let Some(s) = is_greater_than_sign(&mut pointer) {
symbols.push(s);
} else if let Some(s) = is_less_than_sign(&mut pointer) {
symbols.push(s);
} else if let Some(s) = is_identifier(&mut pointer) {
symbols.push(s);
} else if let Some(s) = is_text(&mut pointer) {
symbols.push(s);
} else {
if !c.is_whitespace(){
eprintln!("Unknown symbol {}", c);
}
pointer.next();
}
}
Ok(symbols)
}
fn is_double_slash(pointer: &mut VecPointer<char>) -> Option<Symbol> {
if let (Some('/'), Some('/')) = (pointer.current(), pointer.peek()) {
pointer.next_add(2);
return Some(Symbol::DoubleSlash);
}
None
}
fn is_slash(pointer: &mut VecPointer<char>) -> Option<Symbol> {
if let Some('/') = pointer.current() {
pointer.next();
return Some(Symbol::Slash);
}
None
}
fn is_open_square_bracket(pointer: &mut VecPointer<char>) -> Option<Symbol> {
if let Some('[') = pointer.current() {
pointer.next();
return Some(Symbol::OpenSquareBracket);
}
None
}
fn is_close_square_bracket(pointer: &mut VecPointer<char>) -> Option<Symbol> {
if let Some(']') = pointer.current() {
pointer.next();
return Some(Symbol::CloseSquareBracket);
}
None
}
fn is_open_bracket(pointer: &mut VecPointer<char>) -> Option<Symbol> {
if let Some('(') = pointer.current() {
pointer.next();
return Some(Symbol::OpenBracket);
}
None
}
fn is_close_bracket(pointer: &mut VecPointer<char>) -> Option<Symbol> {
if let Some(')') = pointer.current() {
pointer.next();
return Some(Symbol::CloseBracket);
}
None
}
fn is_wildcard(pointer: &mut VecPointer<char>) -> Option<Symbol> {
if let Some('*') = pointer.current() {
pointer.next();
return Some(Symbol::Wildcard);
}
None
}
fn is_double_dot(pointer: &mut VecPointer<char>) -> Option<Symbol> {
if let (Some('.'), Some('.')) = (pointer.current(), pointer.peek()) {
pointer.next_add(2);
return Some(Symbol::DoubleDot);
}
None
}
fn is_dot(pointer: &mut VecPointer<char>) -> Option<Symbol> {
if let Some('.') = pointer.current() {
pointer.next();
return Some(Symbol::Dot);
}
None
}
fn is_assignment_sign(pointer: &mut VecPointer<char>) -> Option<Symbol> {
if let Some('=') = pointer.current() {
pointer.next();
return Some(Symbol::AssignmentSign);
}
None
}
fn is_at_sign(pointer: &mut VecPointer<char>) -> Option<Symbol> {
if let Some('@') = pointer.current() {
pointer.next();
return Some(Symbol::AtSign);
}
None
}
fn is_add_sign(pointer: &mut VecPointer<char>) -> Option<Symbol> {
if let Some('+') = pointer.current() {
pointer.next();
return Some(Symbol::AddSign);
}
None
}
fn is_minus_sign(pointer: &mut VecPointer<char>) -> Option<Symbol> {
if let Some('-') = pointer.current() {
pointer.next();
return Some(Symbol::MinusSign);
}
None
}
fn is_greater_than_sign(pointer: &mut VecPointer<char>) -> Option<Symbol> {
if let Some('>') = pointer.current() {
pointer.next();
return Some(Symbol::GreaterThanSign);
}
None
}
fn is_less_than_sign(pointer: &mut VecPointer<char>) -> Option<Symbol> {
if let Some('<') = pointer.current() {
pointer.next();
return Some(Symbol::LessThanSign);
}
None
}
fn is_number(pointer: &mut VecPointer<char>) -> Option<Symbol> {
if let Some(c) = pointer.current() {
if c.is_digit(10) {
let mut num = c.to_string();
while let Some(c) = pointer.next() {
if c.is_digit(10) {
num.push(c);
} else {
break;
}
}
if let Some('.') = pointer.current() {
num.push('.');
while let Some(c) = pointer.next() {
if c.is_digit(10) {
num.push(c);
} else {
break;
}
}
}
return Some(Symbol::Number(num.parse::<f32>().unwrap()));
}
}
None
}
fn is_identifier(pointer: &mut VecPointer<char>) -> Option<Symbol> {
if let Some(c) = pointer.current() {
if c.is_alphabetic() {
let mut id = c.to_string();
while let Some(c) = pointer.next() {
if c.is_alphabetic() {
id.push(c);
} else {
break;
}
}
return Some(Symbol::Identifier(id));
}
}
None
}
fn is_text(pointer: &mut VecPointer<char>) -> Option<Symbol> {
if let Some(c) = pointer.current() {
if c == '"' || c == '\'' {
let delimiter = c;
let mut text = String::from("");
while let Some(c) = pointer.next() {
if c == delimiter {
pointer.next();
return Some(Symbol::Text(text));
} else {
text.push(c);
}
}
pointer.back_add(text.len() + 1);
}
}
None
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn is_number_should_be_some_with_trailing_text() {
let chars = "1234abc".chars().collect();
let symbol = is_number(&mut VecPointer::new(chars)).unwrap();
if let Symbol::Number(f) = symbol {
assert_eq!(1234f32, f);
} else {
panic!("Expected number symbol")
}
}
#[test]
fn is_number_should_capture_decimal() {
let chars = "1234.5678".chars().collect();
let symbol = is_number(&mut VecPointer::new(chars)).unwrap();
if let Symbol::Number(f) = symbol {
assert_eq!(1234.5678f32, f);
} else {
panic!("Expected number symbol")
}
}
#[test]
fn is_number_should_be_none_with_leading_text() {
let chars = "abc1234".chars().collect();
let symbol = is_number(&mut VecPointer::new(chars));
assert!(symbol.is_none());
}
#[test]
fn is_text_should_capture_quoted_text() {
let chars = r###""world""###.chars().collect();
let pointer = &mut VecPointer::new(chars);
let symbol = is_text(pointer);
if let Some(Symbol::Text(text)) = symbol {
assert_eq!("world", text);
matches!(pointer.next(), None);
} else {
panic!("Expected text symbol")
}
}
#[test]
fn is_text_should_capture_single_quoted_text() {
let chars = r###"'world'"###.chars().collect();
let pointer = &mut VecPointer::new(chars);
let symbol = is_text(pointer);
if let Some(Symbol::Text(text)) = symbol {
assert_eq!("world", text);
matches!(pointer.next(), None);
} else {
panic!("Expected text symbol")
}
}
#[test]
fn is_text_should_not_capture_mismatched_quoted_text() {
let chars = r###""world'"###.chars().collect();
let pointer = &mut VecPointer::new(chars);
let symbol = is_text(pointer);
matches!(symbol, None);
matches!(pointer.current(), Some('"')); }
#[test]
fn lex_works1() {
let text = "//bookstore/book[1]/page[last()-1]";
let result = lex(text).unwrap();
let expected = vec![
Symbol::DoubleSlash,
Symbol::Identifier(String::from("bookstore")),
Symbol::Slash,
Symbol::Identifier(String::from("book")),
Symbol::OpenSquareBracket,
Symbol::Number(1.0),
Symbol::CloseSquareBracket,
Symbol::Slash,
Symbol::Identifier(String::from("page")),
Symbol::OpenSquareBracket,
Symbol::Identifier(String::from("last")),
Symbol::OpenBracket,
Symbol::CloseBracket,
Symbol::MinusSign,
Symbol::Number(1.0),
Symbol::CloseSquareBracket,
];
assert_eq!(expected, result);
}
#[test]
fn lex_works2() {
let text = "/bookstore/book[price>35]/price";
let result = lex(text).unwrap();
let expected = vec![
Symbol::Slash,
Symbol::Identifier(String::from("bookstore")),
Symbol::Slash,
Symbol::Identifier(String::from("book")),
Symbol::OpenSquareBracket,
Symbol::Identifier(String::from("price")),
Symbol::GreaterThanSign,
Symbol::Number(35.0),
Symbol::CloseSquareBracket,
Symbol::Slash,
Symbol::Identifier(String::from("price"))
];
assert_eq!(expected, result);
}
#[test]
fn lex_works3() {
let text = r###"//a[@hello="world"]"###;
let result = lex(text).unwrap();
let expected = vec![
Symbol::DoubleSlash,
Symbol::Identifier(String::from("a")),
Symbol::OpenSquareBracket,
Symbol::AtSign,
Symbol::Identifier(String::from("hello")),
Symbol::AssignmentSign,
Symbol::Text(String::from("world")),
Symbol::CloseSquareBracket,
];
assert_eq!(expected, result);
}
}