use logos::{Logos, SpannedIter};
use std::collections::VecDeque;
use std::ops::Range;
mod token;
pub use token::Token;
pub struct Lexer<'a> {
inner: SpannedIter<'a, Token>,
source: &'a str,
indent_stack: Vec<usize>,
pending_tokens: VecDeque<(Token, Range<usize>)>,
eof_processed: bool,
}
impl<'a> Lexer<'a> {
pub fn new(source: &'a str) -> Self {
Self {
inner: Token::lexer(source).spanned(),
source,
indent_stack: vec![0],
pending_tokens: VecDeque::new(),
eof_processed: false,
}
}
fn calculate_indent(&self, slice: &str) -> usize {
let mut indent = 0;
for char in slice.chars().skip(1) {
match char {
' ' => indent += 1,
'\t' => indent += 4,
_ => {}
}
}
indent
}
}
impl<'a> Iterator for Lexer<'a> {
type Item = (Token, Range<usize>);
fn next(&mut self) -> Option<Self::Item> {
if let Some(token) = self.pending_tokens.pop_front() {
return Some(token);
}
match self.inner.next() {
Some((Ok(token), span)) => {
match token {
Token::Newline => {
let slice = &self.source[span.clone()];
let indent = self.calculate_indent(slice);
let current_indent = *self.indent_stack.last().unwrap_or(&0);
if indent > current_indent {
self.indent_stack.push(indent);
self.pending_tokens.push_back((Token::Indent, span.clone()));
Some((Token::Newline, span))
} else if indent < current_indent {
while let Some(&top) = self.indent_stack.last() {
if top > indent {
self.indent_stack.pop();
self.pending_tokens.push_back((Token::Dedent, span.clone()));
} else {
break;
}
}
if let Some(&top) = self.indent_stack.last() {
if top != indent {
return Some((Token::Error, span));
}
}
Some((Token::Newline, span))
} else {
Some((Token::Newline, span))
}
}
_ => Some((token, span)),
}
}
Some((Err(_), span)) => Some((Token::Error, span)),
None => {
if !self.eof_processed {
self.eof_processed = true;
while self.indent_stack.len() > 1 {
self.indent_stack.pop();
let len = self.source.len();
self.pending_tokens.push_back((Token::Dedent, len..len));
}
if let Some(token) = self.pending_tokens.pop_front() {
return Some(token);
}
}
None
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_basic_tokens() {
let source = "def foo(x): return x + 1";
let tokens: Vec<Token> = Lexer::new(source).map(|(t, _)| t).collect();
assert_eq!(tokens, vec![
Token::Def, Token::Ident("foo".to_string()), Token::LParen, Token::Ident("x".to_string()), Token::RParen, Token::Colon,
Token::Return, Token::Ident("x".to_string()), Token::Plus, Token::IntLit(1)
]);
}
#[test]
fn test_indentation() {
let source = "def foo():\n return 1\nx = 2\n";
let tokens: Vec<Token> = Lexer::new(source).map(|(t, _)| t).collect();
let expected = vec![
Token::Def, Token::Ident("foo".to_string()), Token::LParen, Token::RParen, Token::Colon,
Token::Newline, Token::Indent,
Token::Return, Token::IntLit(1),
Token::Newline, Token::Dedent,
Token::Ident("x".to_string()), Token::Eq, Token::IntLit(2),
Token::Newline,
];
assert_eq!(tokens, expected);
}
#[test]
fn test_operators() {
let source = "+= -= *= /= == != :: ->";
let tokens: Vec<Token> = Lexer::new(source).map(|(t, _)| t).collect();
assert_eq!(tokens, vec![
Token::PlusEq, Token::MinusEq, Token::StarEq, Token::SlashEq,
Token::EqEq, Token::NotEq, Token::ColonColon, Token::Arrow,
]);
}
#[test]
fn test_keywords() {
let source = "async await unsafe safe asm comptime";
let tokens: Vec<Token> = Lexer::new(source).map(|(t, _)| t).collect();
assert_eq!(tokens, vec![
Token::Async, Token::Await, Token::Unsafe, Token::Safe, Token::Asm, Token::Comptime,
]);
}
#[test]
fn test_numbers_and_strings() {
let source = r#"42 3.14 "hello world""#;
let tokens: Vec<Token> = Lexer::new(source).map(|(t, _)| t).collect();
assert_eq!(tokens[0], Token::IntLit(42));
assert_eq!(tokens[1], Token::FloatLit(3.14));
assert!(matches!(tokens[2], Token::StringLit(_)));
}
}