use crate::number::{Number, ParseNumber};
use crate::span::Span;
use crate::token::Token;
use std::iter::Peekable;
use std::str::Chars;
pub struct Lexer<'src> {
input: &'src str,
iter: Peekable<Chars<'src>>,
start: usize,
pos: usize,
}
impl<'src> Lexer<'src> {
pub fn new(input: &'src str) -> Self {
Self {
input,
iter: input.chars().peekable(),
start: 0,
pos: 0,
}
}
pub fn next(&mut self) -> Token<'src> {
self.skip_spaces();
self.start = self.pos;
let ch = match self.read() {
Some(c) => c,
None => return Token::Eof,
};
match ch {
'0'..='9' => self.number(false),
'.' => self.number(true),
'+' => Token::Plus,
'-' => Token::Minus,
'*' => Token::Star,
'/' => Token::Slash,
'^' => Token::Caret,
'!' => self.exclamation(),
'=' => self.equals(),
'<' => self.less(),
'>' => self.greater(),
'(' => Token::ParenOpen,
')' => Token::ParenClose,
',' => Token::Comma,
ch if Self::is_ident_start(ch) => self.identifier(),
_ => self.invalid(),
}
}
pub fn span(&self) -> Span {
Span::new(self.start, self.pos)
}
fn skip_spaces(&mut self) {
while let Some(ch) = self.peek() {
if Self::is_space(ch) {
self.read();
} else {
break;
}
}
}
fn invalid(&self) -> Token<'src> {
Token::Invalid(&self.input[self.start..self.pos])
}
fn number(&mut self, mut seen_dot: bool) -> Token<'src> {
let mut is_invalid = false;
while let Some(ch) = self.peek() {
if ch.is_ascii_digit() {
self.read();
} else if ch == '.' {
self.read();
if seen_dot {
is_invalid = true;
} else {
seen_dot = true;
}
} else {
break;
}
}
if is_invalid {
return self.invalid();
}
let s = &self.input[self.start..self.pos];
match Number::parse_number(s) {
Ok(n) => Token::Number(n),
Err(_) => Token::Invalid(s),
}
}
fn identifier(&mut self) -> Token<'src> {
while let Some(ch) = self.peek() {
if Self::is_ident_continue(ch) {
self.read();
continue;
} else {
break;
}
}
let s = &self.input[self.start..self.pos];
if s.eq_ignore_ascii_case("if") {
Token::If
} else if s.eq_ignore_ascii_case("let") {
Token::Let
} else if s.eq_ignore_ascii_case("then") {
Token::Then
} else {
Token::Ident(s)
}
}
fn peek(&mut self) -> Option<char> {
self.iter.peek().copied()
}
fn read(&mut self) -> Option<char> {
self.iter.next().inspect(|ch| self.pos += ch.len_utf8())
}
fn is_space(ch: char) -> bool {
ch == ' ' || ch == '\t'
}
fn is_ident_start(ch: char) -> bool {
ch == '_' || ch.is_alphabetic() || Self::is_emoji(ch)
}
fn is_ident_continue(ch: char) -> bool {
ch == '_' || ch.is_alphanumeric() || Self::is_emoji(ch)
}
fn is_emoji(ch: char) -> bool {
let u = ch as u32;
matches!(u,
0x1F300..=0x1FAFF | 0x1F1E6..=0x1F1FF | 0x1F000..=0x1F02F | 0x2600..=0x26FF | 0x2700..=0x27BF | 0xFE0F..=0xFE0F )
}
fn exclamation(&mut self) -> Token<'src> {
if self.peek() == Some('=') {
self.read(); Token::NotEqual
} else {
Token::Bang
}
}
fn equals(&mut self) -> Token<'src> {
if self.peek() == Some('=') {
self.read(); Token::Equal
} else {
Token::Assign }
}
fn less(&mut self) -> Token<'src> {
if self.peek() == Some('=') {
self.read(); Token::LessEqual
} else {
Token::Less
}
}
fn greater(&mut self) -> Token<'src> {
if self.peek() == Some('=') {
self.read(); Token::GreaterEqual
} else {
Token::Greater
}
}
}