use crate::frontend::lexer::LexemeType::{CloseParen, DoubleStar, Minus, Numeric, OpenParen, Plus, Slash, Star, EOF};
pub fn lex(data: &str) -> Result<Vec<Lexeme>, String> {
let chars = data.chars().collect();
let mut lexer = Lexer { data: chars, index: 0, lexemes: vec![], errors: String::new(), line: 1};
lexer.lex();
if lexer.errors.is_empty() {
Ok(lexer.lexemes)
} else {
Err(lexer.errors)
}
}
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum LexemeType {
Numeric, OpenParen,
CloseParen,
Plus,
Minus,
Star,
DoubleStar,
Slash ,
EOF,
}
#[derive(Debug, Clone, PartialEq)]
pub struct Lexeme {
pub(crate) lexeme_type: LexemeType,
pub(crate) line: usize,
pub(crate) text: String,
}
impl Lexeme {
pub fn new(lexeme_type: LexemeType, line: usize, text: String) -> Lexeme {
Lexeme { lexeme_type, line, text }
}
}
struct Lexer {
data: Vec<char>,
index: usize,
lexemes: Vec<Lexeme>,
errors: String,
line: usize,
}
impl Lexer {
fn lex(&mut self) {
let mut lexeme_result = self.lex_next();
loop {
match lexeme_result {
Ok(lexeme) => {
self.lexemes.push(lexeme.clone());
if lexeme.lexeme_type == EOF {
break;
}
}
Err (message) => {
self.errors.push_str(&message);
}
}
lexeme_result = self.lex_next();
}
}
fn lex_next(&mut self) -> Result<Lexeme, String> {
while self.in_bounds() && self.current().is_whitespace() {
if self.current() == '\n' {
self.line += 1;
}
self.skip()
}
if !self.in_bounds() {
return Ok(Lexeme::new(EOF, self.line, String::from("end of file")));
}
let start = self.next();
match start {
'(' => Ok(Lexeme::new(OpenParen, self.line, String::from("("))),
')' => Ok(Lexeme::new(CloseParen, self.line, String::from(")"))),
'*' =>
if self.in_bounds() && self.current() == '*' {
self.skip();
Ok(Lexeme::new(DoubleStar, self.line, String::from("**")))
} else {
Ok(Lexeme::new(Star, self.line, String::from("*")))
}
'/' => Ok(Lexeme::new(Slash, self.line, String::from("/"))),
'+' => Ok(Lexeme::new(Plus, self.line, String::from("+"))),
'-' =>
if self.in_bounds() && self.current().is_ascii_digit() {
self.lex_number(start)
} else {
Ok( Lexeme::new (Minus, self.line, String::from("-")) )
}
_ =>
if start.is_ascii_digit() {
self.lex_number(start)
} else {
Err(format!("Unexpected character: '{}'.\n", self.current()))
}
}
}
}
impl Lexer {
fn lex_number(&mut self, start: char) -> Result<Lexeme, String> {
assert!(start.is_numeric() || start == '-');
let mut chars = start.to_string();
while self.in_bounds() && self.current().is_ascii_digit() {
chars.push(self.next());
}
if !self.in_bounds() || self.current() != '.' {
return Ok( Lexeme::new(Numeric, self.line, chars))
}
chars.push(self.next());
if !self.in_bounds() || !self.current().is_ascii_digit() {
return Err("Unterminated float.\n".to_string());
}
while self.in_bounds() && self.current().is_ascii_digit() {
chars.push(self.next());
}
Ok( Lexeme::new(Numeric, self.line, chars ))
}
}
impl Lexer {
fn in_bounds(&self) -> bool {
self.index < self.data.len()
}
fn next(&mut self) -> char {
assert!(self.in_bounds());
let ret_val = self.current();
self.index += 1;
ret_val
}
fn skip(&mut self) {
self.index += 1;
}
fn current(&self) -> char {
assert!(self.in_bounds());
self.data[self.index]
}
}
#[cfg(test)]
mod tests {
use crate::frontend::lexer::{lex, Lexeme};
use crate::frontend::lexer::LexemeType::{Numeric, OpenParen, Plus, Slash, Star, CloseParen, EOF, DoubleStar};
#[test]
fn test_lex() {
let input = "(3 + 5)\n * 3 / -2";
let expected = vec![
Lexeme::new(OpenParen, 1, String::from("(")),
Lexeme::new(Numeric, 1, String::from("3")),
Lexeme::new(Plus, 1, String::from("+")),
Lexeme::new(Numeric, 1, String::from("5")),
Lexeme::new(CloseParen, 1, String::from(")")),
Lexeme::new(Star, 2, String::from("*")),
Lexeme::new(Numeric, 2, String::from("3")),
Lexeme::new(Slash, 2, String::from("/")),
Lexeme::new(Numeric, 2, String::from("-2")),
Lexeme::new(EOF, 2, String::from("end of file")),
];
let tokens = lex(input);
assert_eq!(Ok(expected), tokens);
}
#[test]
fn test_empty() {
let input = "";
assert_eq!(Ok(vec![Lexeme::new(EOF, 1, String::from("end of file"))]), lex(input));
}
#[test]
fn test_multiple_errors() {
let input = "3. + 5.";
assert_eq!(Err("Unterminated float.\nUnterminated float.\n".to_string()), lex(input));
}
#[test]
fn test_exp() {
let input = "**";
assert_eq!(Ok(vec![
Lexeme::new(DoubleStar, 1, String::from("**")),
Lexeme::new(EOF, 1, String::from("end of file"))
]), lex(input));
}
}