use std::fmt::Display;
use std::iter::Peekable;
use std::num::ParseIntError;
use std::str::Chars;
#[derive(Debug, PartialEq, Eq, Clone)]
pub enum Token {
EQUAL,
BANG,
EQ,
NOTEQ,
PLUS,
MINUS,
ASTERISK,
SLASH,
LT,
GT,
COMMA,
COLON,
DOT,
SEMICOLON,
STRING(String),
LPAREN,
RPAREN,
LSQUIG,
RSQUIG,
LSQR,
RSQR,
LET,
FN,
DO,
WHILE,
FOR,
IF,
ELSE,
RETURN,
BOOL(bool),
INT(isize),
IDENTIFIER(String),
}
impl Display for Token {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::EQUAL => f.write_str("="),
Self::BANG => f.write_str("!"),
Self::EQ => f.write_str("=="),
Self::NOTEQ => f.write_str("!="),
Self::PLUS => f.write_str("+"),
Self::MINUS => f.write_str("-"),
Self::ASTERISK => f.write_str("*"),
Self::SLASH => f.write_str("/"),
Self::LT => f.write_str("<"),
Self::GT => f.write_str(">"),
Self::COMMA => f.write_str(","),
Self::COLON => f.write_str(":"),
Self::DOT => f.write_str("."),
Self::SEMICOLON => f.write_str(";"),
Self::STRING(string) => write!(f, "\"{}\"", string),
Self::LPAREN => f.write_str("("),
Self::RPAREN => f.write_str(")"),
Self::LSQUIG => f.write_str("{"),
Self::RSQUIG => f.write_str("}"),
Self::LSQR => f.write_str("["),
Self::RSQR => f.write_str("]"),
Self::LET => f.write_str("let"),
Self::FN => f.write_str("fn"),
Self::DO => f.write_str("do"),
Self::WHILE => f.write_str("while"),
Self::FOR => f.write_str("for"),
Self::IF => f.write_str("if"),
Self::ELSE => f.write_str("else"),
Self::RETURN => f.write_str("return"),
Self::BOOL(bool) => write!(f, "{bool}"),
Self::INT(val) => write!(f, "Int({val})"),
Self::IDENTIFIER(string) => write!(f, "IDENTIFIER({string})"),
}
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct Location {
pub line: usize,
pub position: usize
}
impl Default for Location {
fn default() -> Self {
Self {
line: 1,
position: 1
}
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct TokenWithLocation(Token, Location);
impl TokenWithLocation {
pub fn new(token: Token, location: Location) -> Self {
Self(token, location)
}
pub fn to_token(self) -> Token {
self.0
}
pub fn ref_token(&self) -> &Token {
&self.0
}
pub fn ref_location(&self) -> &Location {
&self.1
}
}
#[derive(Debug, PartialEq, Clone)]
pub enum LexerErrorType {
ParseIntErr(ParseIntError),
UnclosedString,
IllegalToken
}
impl From<ParseIntError> for LexerErrorType {
fn from(value: ParseIntError) -> Self {
Self::ParseIntErr(value)
}
}
impl Display for LexerErrorType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::ParseIntErr(err) => write!(f, "Error parsing number: {err}"),
Self::UnclosedString => write!(f, "Unclosed string could not be parsed."),
Self::IllegalToken => write!(f, "Illegal character could not make valid token")
}
}
}
#[derive(Debug, PartialEq, Clone)]
pub struct LexerError {
pub error_type: LexerErrorType,
pub location: Location
}
impl Display for LexerError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "Lexer Error at line {} column {}: {}", self.location.line, self.location.position, self.error_type)
}
}
type LexerResult<T> = Result<T, LexerError>;
pub struct Lexer<'a> {
peekable_iter: Peekable<Chars<'a>>,
location: Location
}
impl<'a> Lexer<'a> {
pub fn new(contents: &'a str) -> Lexer<'a> {
Self {
peekable_iter: contents.chars().peekable(),
location: Location::default()
}
}
pub fn parse(&mut self) -> LexerResult<Vec<TokenWithLocation>> {
let mut tokens = Vec::new();
while let Some(token) = self.get_next_token()? {
tokens.push(token);
}
Ok(tokens)
}
pub fn get_next_token(&mut self) -> LexerResult<Option<TokenWithLocation>> {
self.skip_whitespace();
let token: Token;
let location = self.location.clone();
let current_char = self.get_next_char();
if current_char.is_none() {
return Ok(None)
}
let current_char = current_char.unwrap();
if current_char.is_alphabetic() {
let literal = self.read_identifier(current_char);
token = Lexer::literal_keyword(&literal);
}
else if current_char.is_numeric() {
let value = match self.read_number(current_char).parse::<isize>() {
Ok(num) => num,
Err(err) => {
return Err(self.error(err.into()));
}
};
token = Token::INT(value);
} else if current_char == '"' {
let mut string = String::new();
loop {
match self.get_next_char() {
Some(current_char) => {
if current_char == '"' {
break;
}
string.push(current_char);
},
None => return Err(self.error(LexerErrorType::UnclosedString)),
};
}
token = Token::STRING(string)
} else {
token = match current_char {
'=' => {
if let Some(next_char) = self.peek_next_char() {
if *next_char == '=' {
self.get_next_char();
Token::EQ
} else {
Token::EQUAL
}
} else {
Token::EQUAL
}
}
'!' => {
if let Some(next_char) = self.peek_next_char() {
if *next_char == '=' {
self.get_next_char();
Token::NOTEQ
} else {
Token::BANG
}
} else {
Token::BANG
}
}
'+' => Token::PLUS,
'-' => Token::MINUS,
'*' => Token::ASTERISK,
'/' => {
if let Some(next_char) = self.peek_next_char() {
if *next_char == '/' {
self.get_next_char();
while let Some(char) = self.peek_next_char() {
if *char == '\n' {
break;
}
self.get_next_char();
}
return self.get_next_token()
} else {
Token::SLASH
}
} else {
Token::SLASH
}
},
'<' => Token::LT,
'>' => Token::GT,
',' => Token::COMMA,
'.' => Token::DOT,
':' => Token::COLON,
';' => Token::SEMICOLON,
'(' => Token::LPAREN,
')' => Token::RPAREN,
'{' => Token::LSQUIG,
'}' => Token::RSQUIG,
'[' => Token::LSQR,
']' => Token::RSQR,
_ => return Err(LexerError {
error_type: LexerErrorType::IllegalToken,
location
})
};
}
Ok(Some(TokenWithLocation::new(token, location)))
}
fn get_next_char(&mut self) -> Option<char> {
self.location.position += 1;
self.peekable_iter.next()
}
fn peek_next_char(&mut self) -> Option<&char> {
self.peekable_iter.peek()
}
fn read_identifier(&mut self, first_char: char) -> String {
let mut identifier = String::new();
identifier.push(first_char);
while let Some(char) = self.peek_next_char() {
if char.is_alphabetic() || char.is_numeric() || *char == '_'{
identifier.push(self.get_next_char().unwrap());
} else {
break;
}
}
identifier
}
fn read_number(&mut self, first_num: char) -> String {
let mut number = String::new();
number.push(first_num);
while let Some(c) = self.peek_next_char() {
if c.is_numeric() {
number.push(self.get_next_char().unwrap());
} else {
break;
}
}
number
}
fn skip_whitespace(&mut self) {
while let Some(c) = self.peek_next_char() {
if c.is_whitespace() {
let is_new_line = *c == '\n';
self.get_next_char();
if is_new_line {
self.location.line += 1;
self.location.position = 1;
}
} else {
break
}
}
}
fn error(&mut self, err: LexerErrorType) -> LexerError {
LexerError {
error_type: err,
location: self.location.clone()
}
}
fn literal_keyword(literal: &str) -> Token {
match literal {
"let" => Token::LET,
"return" =>Token::RETURN,
"fn" => Token::FN,
"do" => Token::DO,
"while" => Token::WHILE,
"for" => Token::FOR,
"if" => Token::IF,
"else" => Token::ELSE,
"True" => Token::BOOL(true),
"False" => Token::BOOL(false),
_ => Token::IDENTIFIER(String::from(literal))
}
}
}
impl Iterator for Lexer<'_> {
type Item = LexerResult<TokenWithLocation>;
fn next(&mut self) -> Option<Self::Item> {
match self.get_next_token() {
Ok(val) => match val {
Some(val) => Some(Ok(val)),
None => None
},
Err(err) => Some(Err(err))
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parse_numbers() {
let input = String::from("123 323 111");
let expected_tokens = Ok(vec![
TokenWithLocation::new(Token::INT(123), Location::default()),
TokenWithLocation(Token::INT(323), Location { line: 1, position: 5 }),
TokenWithLocation::new(Token::INT(111), Location { line: 1, position: 9 }),
]);
let mut lexer = Lexer::new(&input);
let tokens = lexer.parse();
assert_eq!(expected_tokens, tokens);
}
#[test]
fn parse_identifiers() {
let input = String::from("Hello w0_rld");
let expected_tokens = Ok(vec![
TokenWithLocation::new(Token::IDENTIFIER(String::from("Hello")), Location::default()),
TokenWithLocation::new(Token::IDENTIFIER(String::from("w0_rld")), Location { line: 1, position: 7 }),
]);
let mut lexer = Lexer::new(&input);
let tokens = lexer.parse();
assert_eq!(expected_tokens, tokens);
}
#[test]
fn parse_symbols() {
let input = String::from("=+-*/,:;()[]{}<>!");
let expected_tokens = Ok(vec![
TokenWithLocation::new(Token::EQUAL, Location::default()),
TokenWithLocation::new(Token::PLUS, Location { line: 1, position: 2 }),
TokenWithLocation::new(Token::MINUS, Location { line: 1, position: 3 }),
TokenWithLocation::new(Token::ASTERISK, Location { line: 1, position: 4 }),
TokenWithLocation::new(Token::SLASH, Location { line: 1, position: 5 }),
TokenWithLocation::new(Token::COMMA, Location { line: 1, position: 6 }),
TokenWithLocation::new(Token::COLON, Location { line: 1, position: 7 }),
TokenWithLocation::new(Token::SEMICOLON, Location { line: 1, position: 8 }),
TokenWithLocation::new(Token::LPAREN, Location { line: 1, position: 9 }),
TokenWithLocation::new(Token::RPAREN, Location { line: 1, position: 10 }),
TokenWithLocation::new(Token::LSQR, Location { line: 1, position: 11 }),
TokenWithLocation::new(Token::RSQR, Location { line: 1, position: 12 }),
TokenWithLocation::new(Token::LSQUIG, Location { line: 1, position: 13 }),
TokenWithLocation::new(Token::RSQUIG, Location { line: 1, position: 14 }),
TokenWithLocation::new(Token::LT, Location { line: 1, position: 15 }),
TokenWithLocation::new(Token::GT, Location { line: 1, position: 16 }),
TokenWithLocation::new(Token::BANG, Location { line: 1, position: 17 }),
]);
let mut lexer = Lexer::new(&input);
let tokens = lexer.parse();
assert_eq!(expected_tokens, tokens);
}
#[test]
fn parse_illegal() {
let input = String::from('~');
let expected_result = Err(LexerError{
error_type: LexerErrorType::IllegalToken,
location: Location { line: 1, position: 1 }
});
let mut lexer = Lexer::new(&input);
assert_eq!(expected_result, lexer.parse());
}
#[test]
fn parse_strings() {
let input = String::from("let my_string = \"hello\" + \"world\";");
let expected_tokens = Ok(vec![
TokenWithLocation::new(Token::LET, Location::default()),
TokenWithLocation::new(Token::IDENTIFIER("my_string".to_string()), Location { line: 1, position: 5 }),
TokenWithLocation::new(Token::EQUAL, Location { line: 1, position: 15 }),
TokenWithLocation::new(Token::STRING("hello".to_string()), Location { line: 1, position: 17 }),
TokenWithLocation::new(Token::PLUS, Location { line: 1, position: 25 }),
TokenWithLocation::new(Token::STRING("world".to_string()), Location { line: 1, position: 27 }),
TokenWithLocation::new(Token::SEMICOLON, Location { line: 1, position: 34 }),
]);
let mut lexer = Lexer::new(&input);
let tokens = lexer.parse();
assert_eq!(tokens, expected_tokens)
}
#[test]
fn parse_keywords() {
let input = String::from("let fn while if else return for True False");
let expected_tokens = Ok(vec![
TokenWithLocation::new(Token::LET, Location::default()),
TokenWithLocation::new(Token::FN, Location { line: 1, position: 5 }),
TokenWithLocation::new(Token::WHILE, Location{ line: 1, position: 8 }),
TokenWithLocation::new(Token::IF, Location { line: 1, position: 14 }),
TokenWithLocation::new(Token::ELSE, Location{ line: 1, position: 17 }),
TokenWithLocation::new(Token::RETURN, Location{ line: 1, position: 22 }),
TokenWithLocation::new(Token::FOR, Location{ line: 1, position: 29 }),
TokenWithLocation::new(Token::BOOL(true), Location{ line: 1, position: 33 }),
TokenWithLocation::new(Token::BOOL(false), Location{ line: 1, position: 38 }),
]);
let mut lexer = Lexer::new(&input);
let tokens = lexer.parse();
assert_eq!(tokens, expected_tokens)
}
#[test]
fn parse_loops() {
let input = String::from("while i == 2 {\n\tfor(let b = 2; b < i; b = b + 1;) {\n\t\ti = i - b;\n\t}\n}");
let expected_tokens = Ok(vec ![
TokenWithLocation::new(Token::WHILE, Location::default()),
TokenWithLocation::new(Token::IDENTIFIER(String::from("i")), Location { line: 1, position: 7 }),
TokenWithLocation::new(Token::EQ, Location { line: 1, position: 9 }),
TokenWithLocation::new(Token::INT(2), Location { line: 1, position: 12 }),
TokenWithLocation::new(Token::LSQUIG, Location { line: 1, position: 14 }),
TokenWithLocation::new(Token::FOR, Location { line: 2, position: 2 }),
TokenWithLocation::new(Token::LPAREN, Location { line: 2, position: 5 }),
TokenWithLocation::new(Token::LET, Location { line: 2, position: 6 }),
TokenWithLocation::new(Token::IDENTIFIER(String::from("b")), Location { line: 2, position: 10 }),
TokenWithLocation::new(Token::EQUAL, Location { line: 2, position: 12 }),
TokenWithLocation::new(Token::INT(2), Location { line: 2, position: 14 }),
TokenWithLocation::new(Token::SEMICOLON, Location { line: 2, position: 15 }),
TokenWithLocation::new(Token::IDENTIFIER(String::from("b")), Location { line: 2, position: 17 }),
TokenWithLocation::new(Token::LT, Location { line: 2, position: 19 }),
TokenWithLocation::new(Token::IDENTIFIER(String::from("i")), Location { line: 2, position: 21 }),
TokenWithLocation::new(Token::SEMICOLON, Location { line: 2, position: 22 }),
TokenWithLocation::new(Token::IDENTIFIER(String::from("b")), Location { line: 2, position: 24 }),
TokenWithLocation::new(Token::EQUAL, Location { line: 2, position: 26 }),
TokenWithLocation::new(Token::IDENTIFIER(String::from("b")), Location { line: 2, position: 28 }),
TokenWithLocation::new(Token::PLUS, Location { line: 2, position: 30 }),
TokenWithLocation::new(Token::INT(1), Location { line: 2, position: 32 }),
TokenWithLocation::new(Token::SEMICOLON, Location { line: 2, position: 33 }),
TokenWithLocation::new(Token::RPAREN, Location { line: 2, position: 34 }),
TokenWithLocation::new(Token::LSQUIG, Location { line: 2, position: 36 }),
TokenWithLocation::new(Token::IDENTIFIER(String::from("i")), Location { line: 3, position: 3 }),
TokenWithLocation::new(Token::EQUAL, Location { line: 3, position: 5 }),
TokenWithLocation::new(Token::IDENTIFIER(String::from("i")), Location { line: 3, position: 7 }),
TokenWithLocation::new(Token::MINUS, Location { line: 3, position: 9 }),
TokenWithLocation::new(Token::IDENTIFIER(String::from("b")), Location { line: 3, position: 11 }),
TokenWithLocation::new(Token::SEMICOLON, Location { line: 3, position: 12 }),
TokenWithLocation::new(Token::RSQUIG, Location { line: 4, position: 2 }),
TokenWithLocation::new(Token::RSQUIG, Location { line: 5, position: 1 })
]);
let mut lexer = Lexer::new(&input);
let tokens = lexer.parse();
assert_eq!(tokens, expected_tokens)
}
#[test]
fn parse_everything() {
let input = String::from("fn plus_1(foo) {\n\tlet number = foo + 1; number\n}");
let expected_tokens = Ok(vec![
TokenWithLocation::new(Token::FN, Location::default()),
TokenWithLocation::new(Token::IDENTIFIER(String::from("plus_1")), Location { line: 1, position: 4 }),
TokenWithLocation::new(Token::LPAREN, Location { line: 1, position: 10 }),
TokenWithLocation::new(Token::IDENTIFIER(String::from("foo")), Location { line: 1, position: 11 }),
TokenWithLocation::new(Token::RPAREN, Location { line: 1, position: 14 }),
TokenWithLocation::new(Token::LSQUIG, Location { line: 1, position: 16 }),
TokenWithLocation::new(Token::LET, Location { line: 2, position: 2 }),
TokenWithLocation::new(Token::IDENTIFIER(String::from("number")), Location { line: 2, position: 6 }),
TokenWithLocation::new(Token::EQUAL, Location { line: 2, position: 13 }),
TokenWithLocation::new(Token::IDENTIFIER(String::from("foo")), Location { line: 2, position: 15 }),
TokenWithLocation::new(Token::PLUS, Location { line: 2, position: 19 }),
TokenWithLocation::new(Token::INT(1), Location { line: 2, position: 21 }),
TokenWithLocation::new(Token::SEMICOLON, Location { line: 2, position: 22 }),
TokenWithLocation::new(Token::IDENTIFIER(String::from("number")), Location { line: 2, position: 24 }),
TokenWithLocation::new(Token::RSQUIG, Location { line: 3, position: 1 }),
]);
let mut lexer = Lexer::new(&input);
let tokens = lexer.parse();
assert_eq!(expected_tokens, tokens);
}
}