use super::Int;
use crate::common::*;
use smol_str::SmolStr;
use std::{borrow::Cow, fmt, iter::Peekable, str::Chars};
#[derive(PartialEq, Eq, Clone, Hash)]
pub enum Token {
Num(Int),
Id(SmolStr),
Pls,
Sub,
Mul,
Div,
OpnBrace,
ClsBrace,
Ass,
OpAss(Box<Token>),
Gt,
GtEq,
Lt,
LtEq,
Mod,
OpnSqr,
ClsSqr,
Square,
Eol,
Eof,
Var,
If,
Else,
And,
Or,
Not,
Is,
Indent(usize),
Loop,
While,
For,
In,
Break,
Continue,
Fun,
Return,
Comma,
Dot,
Seq,
}
use crate::lexer::Token::*;
impl fmt::Debug for Token {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match *self {
Num(x) => write!(f, "{}", x),
Pls => write!(f, "+"),
Sub => write!(f, "-"),
Mul => write!(f, "*"),
Div => write!(f, "/"),
OpnBrace => write!(f, "("),
ClsBrace => write!(f, ")"),
Ass => write!(f, "="),
Gt => write!(f, ">"),
GtEq => write!(f, ">="),
Lt => write!(f, "<"),
LtEq => write!(f, "<="),
Mod => write!(f, "%"),
OpAss(ref op) => write!(f, "{:?}=", *op),
OpnSqr => write!(f, "["),
ClsSqr => write!(f, "]"),
Square => write!(f, "[]"),
Eol => write!(f, "Eol"),
Eof => write!(f, "Eof"),
Var => write!(f, "var"),
If => write!(f, "if"),
Else => write!(f, "else"),
Not => write!(f, "not"),
Is => write!(f, "is"),
And => write!(f, "and"),
Or => write!(f, "or"),
Loop => write!(f, "loop"),
While => write!(f, "while"),
For => write!(f, "for"),
In => write!(f, "in"),
Break => write!(f, "break"),
Continue => write!(f, "continue"),
Fun => write!(f, "fun"),
Return => write!(f, "return"),
Comma => write!(f, ","),
Dot => write!(f, "."),
Seq => write!(f, "seq"),
Indent(i) => write!(f, "' '*{}", i),
Id(ref id) => write!(f, "{}", id),
}
}
}
impl Token {
pub fn parse(c: char) -> Option<Token> {
match c {
'-' => Some(Sub),
'+' => Some(Pls),
'*' => Some(Mul),
'/' => Some(Div),
'(' => Some(OpnBrace),
')' => Some(ClsBrace),
'=' => Some(Ass),
'>' => Some(Gt),
'<' => Some(Lt),
'%' => Some(Mod),
',' => Some(Comma),
'.' => Some(Dot),
'[' => Some(OpnSqr),
']' => Some(ClsSqr),
_ => None,
}
}
pub fn parse_id<A: Into<SmolStr>>(id: A) -> Token {
let id = id.into();
match id.as_ref() {
"var" => Var,
"if" => If,
"else" => Else,
"not" => Not,
"is" => Is,
"and" => And,
"or" => Or,
"loop" => Loop,
"while" => While,
"for" => For,
"in" => In,
"break" => Break,
"continue" => Continue,
"fun" => Fun,
"return" => Return,
"seq" => Seq,
_ => Id(id),
}
}
fn is_valid_for_op_ass(&self) -> bool {
matches!(*self, Pls | Sub | Mul | Div)
}
pub fn matches(&self, token: &Token) -> bool {
match (self, token) {
(&Num(_), &Num(_)) | (&Id(_), &Id(_)) | (&Indent(_), &Indent(_)) => true,
(me, other) => me == other,
}
}
pub fn long_debug(&self) -> Cow<'static, str> {
match *self {
Num(x) => format!("number `{}`", x).into(),
Id(ref id) => format!("id `{}`", id).into(),
Pls | Sub | Mul | Div | OpnBrace | ClsBrace | Ass | OpAss(_) | Gt | Lt | GtEq
| LtEq => format!("operator `{:?}`", self).into(),
Indent(_) => format!("indent {:?}", self).into(),
Eol => "end-of-line".into(),
Eof => "end-of-file".into(),
Comma => "`,`".into(),
Dot => "`.`".into(),
_ => format!("keyword `{:?}`", self).into(),
}
}
pub fn id_str(&self) -> Option<&str> {
if let Token::Id(ref inner) = *self {
Some(inner)
} else {
None
}
}
pub fn is_binary_op(&self) -> bool {
matches!(
*self,
Pls | Sub
| Mul
| Div
| Ass
| Gt
| GtEq
| Lt
| LtEq
| Mod
| OpAss(_)
| Is
| And
| Or
| In
| Dot
)
}
}
#[derive(Debug, Clone)]
pub struct Lexer<'a> {
chars: Peekable<Chars<'a>>,
current_char: Option<char>,
newline: bool,
line_num: usize,
char_num: usize,
}
#[inline]
fn id_char(c: char) -> bool {
c.is_alphanumeric() || c == '_'
}
#[inline]
fn junkspace(c: char) -> bool {
c.is_whitespace() && c != '\n'
}
impl<'a> Lexer<'a> {
pub fn new(code: &'a str) -> Lexer<'a> {
let mut chars = code.chars().peekable();
let first = chars.next();
Lexer {
chars,
current_char: first,
newline: true,
line_num: 1,
char_num: 1,
}
}
fn next_char(&mut self) -> Option<char> {
self.newline = self.current_char == Some('\n');
self.current_char = self.chars.next();
if self.newline {
self.line_num += 1;
self.char_num = 1;
} else {
self.char_num += 1;
}
self.current_char
}
fn cursor(&self) -> SourceRef {
SourceRef(
(self.line_num, self.char_num),
(self.line_num, self.char_num + 1),
)
}
pub fn peekn(&mut self, n: usize) -> Res<Token> {
match n {
0 | 1 => self.peek(),
_ => {
let mut clone = self.clone();
let mut iters = 0;
let mut next = Ok(Eof);
while iters < n {
next = clone.next_token().map(|(t, ..)| t);
iters += 1;
}
next
}
}
}
pub fn peek(&mut self) -> Res<Token> {
while let Some(c) = self.current_char {
if self.newline && c == ' ' {
return Ok(Indent(0));
}
if junkspace(c) {
self.next_char();
continue;
}
if c == '\n' {
return Ok(Eol);
}
if c == '#' {
while let Some(c) = self.next_char() {
if c == '\n' {
break;
}
}
continue;
}
if c.is_ascii_digit() {
return Ok(Num(c as i32));
}
if id_char(c) {
return Ok(Id(c.to_string().into()));
}
if let Some(token) = Token::parse(c) {
if token == Gt {
if let Some(&'=') = self.chars.peek() {
return Ok(GtEq);
}
}
if token == Lt {
if let Some(&'=') = self.chars.peek() {
return Ok(LtEq);
}
}
if token.is_valid_for_op_ass() {
if let Some(&'=') = self.chars.peek() {
return Ok(OpAss(token.into()));
}
}
if token == OpnSqr {
if let Some(&']') = self.chars.peek() {
return Ok(Square);
}
}
return Ok(token);
}
return Err(BadderError::at(self.cursor())
.describe(Stage::Lexer, format!("Unexpected char: `{}`", c)));
}
Ok(Eof)
}
pub fn next_token(&mut self) -> Res<(Token, SourceRef)> {
let peek = self.peek()?;
let src_ref = self.cursor();
if peek == Eof {
return Ok((peek, src_ref));
}
if peek == Eol {
self.next_char();
return Ok((peek, src_ref));
}
if let Indent(_) = peek {
let mut spaces = 1;
while let Some(' ') = self.next_char() {
spaces += 1;
}
if spaces % 4 != 0 {
while let Some(c) = self.current_char {
if c == '\n' {
return self.next_token();
}
if c == '#' {
while let Some(c) = self.next_char() {
if c == '\n' {
return self.next_token();
}
}
}
if !junkspace(c) {
break;
}
self.next_char();
}
let hint = match spaces % 4 {
1 => ", try removing 1 space.",
3 => ", try removing 3 spaces.",
_ => ", try removing 2 spaces.",
};
return Err(BadderError::at(src_ref.up_to(self.cursor())).describe(
Stage::Lexer,
format!("Invalid indent must be multiple of 4 spaces{}", hint),
));
}
return Ok((Indent(spaces / 4), src_ref.up_to(self.cursor())));
}
let c = self.current_char.unwrap();
if let Num(_) = peek {
let mut number_str = c.to_string();
while let Some(c) = self.next_char() {
if c.is_ascii_digit() {
number_str.push(c);
} else {
break;
}
}
let src_ref = src_ref.up_to(self.cursor());
return match number_str.parse() {
Ok(n) => Ok((Num(n), src_ref)),
Err(e) => Err(BadderError::at(src_ref)
.describe(Stage::Lexer, format!("could not parse number: {}", e))),
};
}
if let Id(_) = peek {
let mut id = c.to_string();
while let Some(c) = self.next_char() {
if id_char(c) {
id.push(c);
} else {
break;
}
}
return Ok((Token::parse_id(id), src_ref.up_to(self.cursor())));
}
self.next_char();
if let OpAss(_) = peek {
self.next_char();
}
if peek == GtEq || peek == LtEq || peek == Square {
self.next_char();
}
Ok((peek, src_ref.up_to(self.cursor())))
}
}
#[cfg(test)]
mod lexer_test {
use super::*;
#[test]
fn next_token_src_ref() {
let mut lexer = Lexer::new(" seq some_id[] = 1345, 2");
for (exp_token, exp_src_ref) in vec![
(Indent(2), SourceRef((1, 1), (1, 9))),
(Seq, SourceRef((1, 9), (1, 12))),
(Id("some_id".into()), SourceRef((1, 13), (1, 20))),
(Square, SourceRef((1, 20), (1, 22))),
(Ass, SourceRef((1, 23), (1, 24))),
(Num(1345), SourceRef((1, 25), (1, 29))),
(Comma, SourceRef((1, 29), (1, 30))),
(Num(2), SourceRef((1, 31), (1, 32))),
(Eof, SourceRef((1, 32), (1, 33))),
] {
let (token, src_ref) = lexer.next_token().unwrap();
println!("Token `{:?}`", token);
assert_eq!(token, exp_token);
assert_eq!(src_ref, exp_src_ref);
}
}
}