use crate::token::{Float, Integer, Radix, Token};
use std::error;
use std::fmt;
use std::num;
use std::result;
use std::str::FromStr;
pub struct Lexer<'a> {
input: &'a str,
position: usize,
read_position: usize,
ch: char,
}
impl<'a> Lexer<'a> {
pub fn new(input: &'a str) -> Self {
let mut l = Lexer {
input,
position: 0,
read_position: 0,
ch: 0 as char,
};
l.read_char();
l
}
pub fn lex(&mut self) -> Result<Vec<Token>> {
let mut tokens = vec![];
loop {
match self.next_token()? {
t @ Token::Eof => {
tokens.push(t);
return Ok(tokens);
}
t => {
tokens.push(t);
}
}
}
}
pub fn next_token(&mut self) -> Result<Token> {
self.skip_whitespace();
let t = match self.ch {
'=' => {
if self.peek_char() == '=' {
self.read_char();
Token::Equal
} else {
Token::Assign
}
}
'+' => Token::Plus,
'-' => Token::Minus,
'!' => {
if self.peek_char() == '=' {
self.read_char();
Token::NotEqual
} else {
Token::Bang
}
}
'*' => Token::Asterisk,
'/' => Token::Slash,
'%' => Token::Percent,
'<' => Token::LessThan,
'>' => Token::GreaterThan,
',' => Token::Comma,
':' => Token::Colon,
';' => Token::Semicolon,
'(' => Token::LeftParen,
')' => Token::RightParen,
'{' => Token::LeftBrace,
'}' => Token::RightBrace,
'[' => Token::LeftBracket,
']' => Token::RightBracket,
'"' => self.read_string()?,
'\u{0000}' => Token::Eof,
_ => {
if is_letter(self.ch) {
let ident = self.read_identifier();
if let Some(key) = lookup_keyword(&ident) {
return Ok(key);
} else {
return Ok(Token::Identifier(ident));
}
} else if is_number(self.ch) {
return Ok(self.read_number()?);
} else {
Token::Illegal(self.ch)
}
}
};
self.read_char();
Ok(t)
}
fn peek_char(&self) -> char {
if self.read_position >= self.input.len() {
0 as char
} else {
if let Some(ch) = self.input.chars().nth(self.read_position) {
ch
} else {
panic!("peeked out of range character")
}
}
}
fn read_char(&mut self) {
if self.read_position >= self.input.len() {
self.ch = 0 as char;
} else {
if let Some(ch) = self.input.chars().nth(self.read_position) {
self.ch = ch;
} else {
panic!("read out of range character");
}
}
self.position = self.read_position;
self.read_position += 1;
}
fn read_identifier(&mut self) -> String {
let pos = self.position;
while is_letter(self.ch) || self.ch.is_numeric() {
self.read_char();
}
self.input
.chars()
.skip(pos)
.take(self.position - pos)
.collect()
}
fn read_number(&mut self) -> Result<Token> {
let pos = self.position;
while (self.ch.is_ascii_alphanumeric() || self.ch == '.') && !self.ch.is_whitespace() {
self.read_char();
}
let chars: Vec<char> = self
.input
.chars()
.skip(pos)
.take(self.position - pos)
.collect();
if chars.contains(&'.') {
Ok(Token::Float(Float::new(
f64::from_str(&chars.iter().collect::<String>()).map_err(Error::IllegalFloat)?,
)))
} else {
Ok(Token::Integer(parse_int(&chars)?))
}
}
fn read_string(&mut self) -> Result<Token> {
let pos = self.position + 1;
loop {
self.read_char();
match self.ch {
'"' => break,
'\u{0000}' => {
return Err(Error::UnexpectedEof);
}
_ => {}
}
}
Ok(Token::String(
self.input
.chars()
.skip(pos)
.take(self.position - pos)
.collect(),
))
}
fn skip_whitespace(&mut self) {
while self.ch.is_ascii_whitespace() {
self.read_char();
}
}
}
fn lookup_keyword(s: &str) -> Option<Token> {
match s {
"fn" => Some(Token::Function),
"let" => Some(Token::Let),
"true" => Some(Token::True),
"false" => Some(Token::False),
"if" => Some(Token::If),
"else" => Some(Token::Else),
"return" => Some(Token::Return),
_ => None,
}
}
fn is_letter(c: char) -> bool {
c.is_ascii_alphabetic() || c == '_'
}
fn is_number(c: char) -> bool {
c >= '0' && c <= '9'
}
fn parse_int(chars: &[char]) -> Result<Integer> {
if chars.len() < 2 {
let raw: String = chars.iter().collect();
return Ok(Integer {
radix: Radix::Decimal,
value: i64::from_str_radix(&raw, 10).map_err(Error::IllegalInteger)?,
});
}
let (radix, skip) = match &chars[0..2] {
['0', 'b'] => (Radix::Binary, 2),
['0', 'x'] => (Radix::Hexadecimal, 2),
['0', 'o'] => (Radix::Octal, 2),
['0', '0'..='9'] => (Radix::Octal, 1),
['0', r] => {
return Err(Error::IllegalIntegerRadix(*r));
}
_ => (Radix::Decimal, 0),
};
let raw: String = chars.iter().skip(skip).collect();
let base = match radix {
Radix::Binary => 2,
Radix::Decimal => 10,
Radix::Hexadecimal => 16,
Radix::Octal => 8,
};
Ok(Integer {
radix,
value: i64::from_str_radix(&raw, base).map_err(Error::IllegalInteger)?,
})
}
pub type Result<T> = result::Result<T, Error>;
#[derive(Debug, PartialEq)]
pub enum Error {
UnexpectedEof,
IllegalFloat(num::ParseFloatError),
IllegalIntegerRadix(char),
IllegalInteger(num::ParseIntError),
}
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Error::UnexpectedEof => write!(f, "unexpected EOF"),
Error::IllegalFloat(err) => write!(f, "illegal floating point number: {}", err),
Error::IllegalIntegerRadix(r) => write!(f, "illegal number radix: {}", r),
Error::IllegalInteger(err) => write!(f, "illegal integer number: {}", err),
}
}
}
impl error::Error for Error {
fn cause(&self) -> Option<&dyn error::Error> {
match self {
Error::IllegalFloat(err) => Some(err),
Error::IllegalInteger(err) => Some(err),
_ => None,
}
}
}