use super::{Token, TokenKind::{self, *}};
#[derive(Debug)]
pub struct Lexer<'a> {
input: &'a str,
offset: usize,
line: usize,
column: usize,
}
impl<'a> Iterator for Lexer<'a> {
type Item = Token<'a>;
fn next(&mut self) -> Option<Self::Item> {
self.whitespace();
let token = self.curr().map(|ch| match ch {
'\'' => self.char(),
'"' => self.string(),
c if c.is_alphabetic() => self.ident(),
c if c.is_digit(10) => self.number(),
_ => self.glyph(),
});
self.advance();
token
}
}
impl<'a> Lexer<'a> {
pub fn new(input: &'a str) -> Self {
Self {
input,
offset: 0,
line: 1,
column: 1,
}
}
fn curr(&self) -> Option<char> {
self.input.chars().nth(self.offset)
}
fn peek(&self) -> Option<char> {
self.input.chars().nth(self.offset + 1)
}
fn advance(&mut self) {
self.debug("before advance");
if let Some(curr) = self.curr() {
self.offset += 1;
if curr == '\n' {
self.line += 1;
self.column = 1;
} else {
self.column += 1;
}
}
self.debug("before advance");
}
fn whitespace(&mut self) {
self.debug("before skip");
while let Some(curr) = self.curr() {
if curr.is_whitespace() {
self.advance();
} else {
break;
}
}
self.debug("after skip");
}
fn char(&mut self) -> Token<'a> {
let (offset, line, column) = (self.offset, self.line, self.column);
self.advance();
self.advance();
self.advance();
Token {
literal: &self.input[offset..self.offset],
kind: Char,
line,
column,
}
}
fn string(&mut self) -> Token<'a> {
let (offset, line, column) = (self.offset, self.line, self.column);
self.advance();
while let Some(curr) = self.curr() {
if curr == '"' {
break;
} else {
self.advance();
}
}
Token {
literal: &self.input[offset..self.offset + 1],
kind: Str,
line,
column,
}
}
fn ident(&mut self) -> Token<'a> {
self.debug("before ident");
let (offset, line, column) = (self.offset, self.line, self.column);
while let Some(peek) = self.peek() {
if peek.is_alphabetic() {
self.advance();
} else {
break;
}
}
let literal = &self.input[offset..self.offset + 1];
let kind = match literal {
"let" => Let,
"in" => In,
"import" => Import,
"module" => Module,
"exposing" => Exposing,
"type" => Type,
"case" => Case,
"of" => Of,
"not" => Not,
"True" => True,
"False" => False,
"if" => If,
"then" => Then,
"else" => Else,
_ => Ident,
};
self.debug("after ident");
Token {
literal,
kind,
line,
column,
}
}
fn number(&mut self) -> Token<'a> {
self.debug("before int");
let (offset, line, column) = (self.offset, self.line, self.column);
let mut kind = Int;
while let Some(curr) = self.curr() {
if curr.is_digit(10) {
self.advance();
} else if curr == '.' {
self.advance();
kind = Float;
} else {
break;
}
}
self.debug("after int");
Token {
literal: &self.input[offset..self.offset],
kind,
line,
column,
}
}
fn glyph(&mut self) -> Token<'a> {
self.debug("before glyph");
let (offset, line, column) = (self.offset, self.line, self.column);
let kind = match self.curr().unwrap() {
'=' => self.expect_or('=', Equal, Assign),
':' => self.expect_or(':', DoubleColon, Colon),
'.' => self.expect_or('.', DoubleDot, Dot),
'&' => self.expect_or('&', And, Unknown),
'|' => {
let next = self.expect_or('>', Pipeline, Pipe);
self.expect_or('|', Or, next)
}
'+' => self.expect_or('+', DoublePlus, Plus),
'>' => self.expect_or('=', GreaterEq, Greater),
'<' => self.expect_or('=', LessEq, Less),
'*' => Multiply,
'^' => Power,
'%' => Modulo,
'(' => OpenParen,
')' => CloseParen,
'[' => OpenBracket,
']' => CloseBracket,
'}' => CloseBrace,
',' => Comma,
'\\' => Backslash,
'-' => if self.peek() == Some('-') {
return self.single_comment();
} else {
self.expect_or('>', Arrow, Minus)
},
'{' => if self.peek() == Some('-') {
return self.multi_comment();
} else {
OpenBrace
},
_ => unreachable!(),
};
self.debug("after glyph");
Token {
literal: &self.input[offset..self.offset + 1],
kind,
line,
column,
}
}
fn expect_or(&mut self, expect: char, this: TokenKind, other: TokenKind) -> TokenKind {
self.debug("before expect_or");
let token = if self.peek() == Some(expect) {
self.advance();
this
} else {
other
};
self.debug("before expect_or");
token
}
fn single_comment(&mut self) -> Token<'a> {
let (offset, line, column) = (self.offset, self.line, self.column);
while let Some(curr) = self.curr() {
if curr == '\n' {
break;
} else {
self.advance();
}
}
Token {
literal: &self.input[offset..self.offset],
kind: SingleComment,
line,
column,
}
}
fn multi_comment(&mut self) -> Token<'a> {
let (offset, line, column) = (self.offset, self.line, self.column);
while let Some(curr) = self.curr() {
if curr == '-' && self.peek() == Some('}') {
self.advance();
self.advance();
break;
} else {
self.advance();
}
}
Token {
literal: &self.input[offset..self.offset],
kind: MultiComment,
line,
column,
}
}
fn debug(&self, op: &str) {
println!(
"[{op}] {} pos: `{}`, curr: `{}`, peek: `{}`",
" ".repeat(20 - op.len()),
self.offset,
self.curr().unwrap_or('\0'),
self.peek().unwrap_or('\0')
);
}
}