use crate::error::{Error, Result};
use std::collections::HashMap;
use std::fmt;
#[derive(Debug, Clone, PartialEq)]
pub struct Token {
pub kind: TokenKind,
pub lexeme: String,
pub line: usize,
pub column: usize,
}
#[derive(Debug, Clone, PartialEq)]
pub enum TokenKind {
String(String),
Number(f64),
Identifier(String),
Let,
Const,
If,
Else,
While,
For,
In,
Function,
Return,
Break,
Continue,
True,
False,
Null,
And,
Or,
Not,
Plus,
Minus,
Multiply,
Divide,
Modulo,
Power,
Assign,
PlusAssign,
MinusAssign,
Equal,
NotEqual,
Less,
LessEqual,
Greater,
GreaterEqual,
BitwiseAnd,
BitwiseOr,
BitwiseXor,
LeftShift,
RightShift,
LeftParen,
RightParen,
LeftBrace,
RightBrace,
LeftBracket,
RightBracket,
Comma,
Semicolon,
Colon,
Dot,
Arrow,
Question,
Tilde,
Newline,
Comment(String),
Eof,
InterpolationStart,
InterpolationEnd,
}
impl fmt::Display for TokenKind {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
TokenKind::String(s) => write!(f, "\"{}\"", s),
TokenKind::Number(n) => write!(f, "{}", n),
TokenKind::Identifier(s) => write!(f, "{}", s),
TokenKind::Let => write!(f, "let"),
TokenKind::Const => write!(f, "const"),
TokenKind::If => write!(f, "if"),
TokenKind::Else => write!(f, "else"),
TokenKind::While => write!(f, "while"),
TokenKind::For => write!(f, "for"),
TokenKind::In => write!(f, "in"),
TokenKind::Function => write!(f, "function"),
TokenKind::Return => write!(f, "return"),
TokenKind::Break => write!(f, "break"),
TokenKind::Continue => write!(f, "continue"),
TokenKind::True => write!(f, "true"),
TokenKind::False => write!(f, "false"),
TokenKind::Null => write!(f, "null"),
TokenKind::And => write!(f, "and"),
TokenKind::Or => write!(f, "or"),
TokenKind::Not => write!(f, "not"),
TokenKind::Plus => write!(f, "+"),
TokenKind::Minus => write!(f, "-"),
TokenKind::Multiply => write!(f, "*"),
TokenKind::Divide => write!(f, "/"),
TokenKind::Modulo => write!(f, "%"),
TokenKind::Power => write!(f, "**"),
TokenKind::Assign => write!(f, "="),
TokenKind::PlusAssign => write!(f, "+="),
TokenKind::MinusAssign => write!(f, "-="),
TokenKind::Equal => write!(f, "=="),
TokenKind::NotEqual => write!(f, "!="),
TokenKind::Less => write!(f, "<"),
TokenKind::LessEqual => write!(f, "<="),
TokenKind::Greater => write!(f, ">"),
TokenKind::GreaterEqual => write!(f, ">="),
TokenKind::BitwiseAnd => write!(f, "&"),
TokenKind::BitwiseOr => write!(f, "|"),
TokenKind::BitwiseXor => write!(f, "^"),
TokenKind::LeftShift => write!(f, "<<"),
TokenKind::RightShift => write!(f, ">>"),
TokenKind::LeftParen => write!(f, "("),
TokenKind::RightParen => write!(f, ")"),
TokenKind::LeftBrace => write!(f, "{{"),
TokenKind::RightBrace => write!(f, "}}"),
TokenKind::LeftBracket => write!(f, "["),
TokenKind::RightBracket => write!(f, "]"),
TokenKind::Comma => write!(f, ","),
TokenKind::Semicolon => write!(f, ";"),
TokenKind::Colon => write!(f, ":"),
TokenKind::Dot => write!(f, "."),
TokenKind::Arrow => write!(f, "->"),
TokenKind::Question => write!(f, "?"),
TokenKind::Tilde => write!(f, "~"),
TokenKind::Newline => write!(f, "\\n"),
TokenKind::Comment(s) => write!(f, "#{}", s),
TokenKind::Eof => write!(f, "EOF"),
TokenKind::InterpolationStart => write!(f, "${{"),
TokenKind::InterpolationEnd => write!(f, "}}"),
}
}
}
pub struct Lexer {
input: Vec<char>,
position: usize,
line: usize,
column: usize,
tokens: Vec<Token>,
keywords: HashMap<String, TokenKind>,
}
impl Lexer {
pub fn new(input: &str) -> Self {
let mut keywords = HashMap::new();
keywords.insert("let".to_string(), TokenKind::Let);
keywords.insert("const".to_string(), TokenKind::Const);
keywords.insert("if".to_string(), TokenKind::If);
keywords.insert("else".to_string(), TokenKind::Else);
keywords.insert("while".to_string(), TokenKind::While);
keywords.insert("for".to_string(), TokenKind::For);
keywords.insert("in".to_string(), TokenKind::In);
keywords.insert("fn".to_string(), TokenKind::Function);
keywords.insert("return".to_string(), TokenKind::Return);
keywords.insert("break".to_string(), TokenKind::Break);
keywords.insert("continue".to_string(), TokenKind::Continue);
keywords.insert("true".to_string(), TokenKind::True);
keywords.insert("false".to_string(), TokenKind::False);
keywords.insert("null".to_string(), TokenKind::Null);
keywords.insert("and".to_string(), TokenKind::And);
keywords.insert("or".to_string(), TokenKind::Or);
keywords.insert("not".to_string(), TokenKind::Not);
Self {
input: input.chars().collect(),
position: 0,
line: 1,
column: 1,
tokens: Vec::new(),
keywords,
}
}
pub fn tokenize(&mut self) -> Result<Vec<Token>> {
while !self.is_at_end() {
self.scan_token()?;
}
self.tokens.push(Token {
kind: TokenKind::Eof,
lexeme: String::new(),
line: self.line,
column: self.column,
});
Ok(std::mem::take(&mut self.tokens))
}
fn scan_token(&mut self) -> Result<()> {
let start_column = self.column;
let c = self.advance();
match c {
' ' | '\r' | '\t' => {} '\n' => {
self.add_token(TokenKind::Newline, "\n");
self.line += 1;
self.column = 1;
return Ok(());
}
'#' => self.comment()?,
'"' => self.string()?,
'\'' => self.single_quoted_string()?,
'(' => self.add_token(TokenKind::LeftParen, "("),
')' => self.add_token(TokenKind::RightParen, ")"),
'{' => self.add_token(TokenKind::LeftBrace, "{"),
'}' => self.add_token(TokenKind::RightBrace, "}"),
'[' => self.add_token(TokenKind::LeftBracket, "["),
']' => self.add_token(TokenKind::RightBracket, "]"),
',' => self.add_token(TokenKind::Comma, ","),
';' => self.add_token(TokenKind::Semicolon, ";"),
':' => self.add_token(TokenKind::Colon, ":"),
'.' => self.add_token(TokenKind::Dot, "."),
'?' => self.add_token(TokenKind::Question, "?"),
'~' => self.add_token(TokenKind::Tilde, "~"),
'+' => {
if self.match_char('=') {
self.add_token(TokenKind::PlusAssign, "+=");
} else {
self.add_token(TokenKind::Plus, "+");
}
}
'-' => {
if self.match_char('=') {
self.add_token(TokenKind::MinusAssign, "-=");
} else if self.match_char('>') {
self.add_token(TokenKind::Arrow, "->");
} else {
self.add_token(TokenKind::Minus, "-");
}
}
'*' => {
if self.match_char('*') {
self.add_token(TokenKind::Power, "**");
} else {
self.add_token(TokenKind::Multiply, "*");
}
}
'/' => self.add_token(TokenKind::Divide, "/"),
'%' => self.add_token(TokenKind::Modulo, "%"),
'=' => {
if self.match_char('=') {
self.add_token(TokenKind::Equal, "==");
} else {
self.add_token(TokenKind::Assign, "=");
}
}
'!' => {
if self.match_char('=') {
self.add_token(TokenKind::NotEqual, "!=");
} else {
self.add_token(TokenKind::Not, "!");
}
}
'<' => {
if self.match_char('=') {
self.add_token(TokenKind::LessEqual, "<=");
} else if self.match_char('<') {
self.add_token(TokenKind::LeftShift, "<<");
} else {
self.add_token(TokenKind::Less, "<");
}
}
'>' => {
if self.match_char('=') {
self.add_token(TokenKind::GreaterEqual, ">=");
} else if self.match_char('>') {
self.add_token(TokenKind::RightShift, ">>");
} else {
self.add_token(TokenKind::Greater, ">");
}
}
'&' => {
if self.match_char('&') {
self.add_token(TokenKind::And, "&&");
} else {
self.add_token(TokenKind::BitwiseAnd, "&");
}
}
'|' => {
if self.match_char('|') {
self.add_token(TokenKind::Or, "||");
} else {
self.add_token(TokenKind::BitwiseOr, "|");
}
}
'^' => self.add_token(TokenKind::BitwiseXor, "^"),
_ => {
if c.is_ascii_digit() {
self.number()?;
} else if c.is_ascii_alphabetic() || c == '_' {
self.identifier()?;
} else {
return Err(Error::lexer_error(
self.line,
start_column,
format!("Unexpected character '{}'", c)
));
}
}
}
Ok(())
}
fn comment(&mut self) -> Result<()> {
let start = self.position;
while self.peek() != '\n' && !self.is_at_end() {
self.advance();
}
let comment_text: String = self.input[start..self.position].iter().collect();
self.add_token(TokenKind::Comment(comment_text), "");
Ok(())
}
fn string(&mut self) -> Result<()> {
let mut value = String::new();
while self.peek() != '"' && !self.is_at_end() {
if self.peek() == '\\' {
self.advance(); let c = self.advance();
value.push(match c {
'n' => '\n',
'r' => '\r',
't' => '\t',
'"' => '"',
'\\' => '\\',
'$' => '$',
_ => return Err(Error::lexer_error(self.line, self.column, format!("Invalid escape sequence '\\{}'", c)))
});
} else {
value.push(self.advance());
}
}
if self.is_at_end() {
return Err(Error::lexer_error(self.line, self.column, "Unterminated string"));
}
self.advance(); self.add_token(TokenKind::String(value), "");
Ok(())
}
fn single_quoted_string(&mut self) -> Result<()> {
let mut value = String::new();
while self.peek() != '\'' && !self.is_at_end() {
value.push(self.advance());
}
if self.is_at_end() {
return Err(Error::lexer_error(self.line, self.column, "Unterminated single-quoted string"));
}
self.advance(); self.add_token(TokenKind::String(value), "");
Ok(())
}
fn number(&mut self) -> Result<()> {
let start = self.position - 1;
while self.peek().is_ascii_digit() {
self.advance();
}
if self.peek() == '.' && self.peek_next().is_ascii_digit() {
self.advance(); while self.peek().is_ascii_digit() {
self.advance();
}
}
let lexeme: String = self.input[start..self.position].iter().collect();
let value = lexeme.parse().unwrap(); self.add_token(TokenKind::Number(value), &lexeme);
Ok(())
}
fn identifier(&mut self) -> Result<()> {
let start = self.position - 1;
while self.peek().is_ascii_alphanumeric() || self.peek() == '_' {
self.advance();
}
let lexeme: String = self.input[start..self.position].iter().collect();
let token_kind = self.keywords.get(&lexeme).cloned().unwrap_or(TokenKind::Identifier(lexeme.clone()));
self.add_token(token_kind, &lexeme);
Ok(())
}
fn add_token(&mut self, kind: TokenKind, lexeme: &str) {
self.tokens.push(Token {
kind,
lexeme: lexeme.to_string(),
line: self.line,
column: self.column,
});
}
fn match_char(&mut self, expected: char) -> bool {
if self.is_at_end() || self.input[self.position] != expected {
false
} else {
self.position += 1;
self.column += 1;
true
}
}
fn peek(&self) -> char {
if self.is_at_end() {
'\0'
} else {
self.input[self.position]
}
}
fn peek_next(&self) -> char {
if self.position + 1 >= self.input.len() {
'\0'
} else {
self.input[self.position + 1]
}
}
fn advance(&mut self) -> char {
let c = self.input[self.position];
self.position += 1;
self.column += 1;
c
}
fn is_at_end(&self) -> bool {
self.position >= self.input.len()
}
}