use std::convert::From;
use crate::syntax::{Keyword, Punct, Token, TokenType};
#[cfg(test)]
mod tests;
macro_rules! make_tok {
(ident $i:ident, of $self:expr) => {
{
make_tok!(TokenType::Ident($i), of $self)
}
};
(keyword $k:ident, of $self:expr) => {
{
make_tok!(TokenType::Keyword($k), of $self)
}
};
(punct $p:expr, of $self:expr) => {
{
make_tok!(TokenType::Punct($p), of $self)
}
};
($t:expr, of $self:expr) => {
{
let (ln, col) = $self.current_pos();
$self.pos += 1;
$self.col += 1;
Token::new(ln, col, $t)
}
};
}
type LexingError = String;
pub(crate) struct Lexer<'t> {
ln: usize,
col: usize,
pos: usize,
buf: &'t str,
}
impl<'t> From<&'t str> for Lexer<'t> {
fn from(buf: &'t str) -> Self {
Lexer {
ln: 1,
col: 1,
pos: 0,
buf,
}
}
}
impl<'t> Lexer<'t> {
fn has_next(&self) -> bool {
self.pos < self.buf.len()
}
fn next(&mut self) -> char {
self.col += 1;
let res = self.peek_next().unwrap();
self.pos += 1;
res
}
fn peek_next(&self) -> Option<char> {
self.buf.chars().nth(self.pos)
}
fn increment_ln(&mut self) {
self.ln += 1;
self.col = 1;
self.pos += 1;
}
fn skip_whitespace(&mut self) {
while let Some(c) = self.peek_next() {
if !c.is_whitespace() {
return;
}
self.col += 1;
self.pos += 1;
}
}
fn current_pos(&self) -> (usize, usize) {
(self.ln, self.col)
}
fn read_endl(&mut self) -> (usize, usize) {
self.read_while(|c| !c.is_ascii_control())
}
fn read_while(&mut self, pred: impl Fn(char) -> bool) -> (usize, usize) {
let start = self.pos;
while let Some(c) = self.peek_next() {
if !pred(c) {
break;
}
self.next();
}
(start, self.pos)
}
pub fn lex_all(&mut self) -> Result<Vec<Token>, LexingError> {
let mut v = Vec::new();
while self.has_next() {
if let Some(t) = self.lex_next()? {
v.push(t);
}
}
Ok(v)
}
fn lex_next(&mut self) -> Result<Option<Token>, LexingError> {
use crate::syntax::Punct::*;
let c = self.peek_next().unwrap();
let tok = match c {
_ if c.is_ascii_control() => {
let (ln, col) = self.current_pos();
self.increment_ln();
Token::new(ln, col, TokenType::Term)
}
'-' => {
let (ln, col) = self.current_pos();
self.next();
let tok_t = match self.peek_next() {
Some('>') => {
self.next();
self.skip_whitespace();
TokenType::Punct(Arrow)
}
Some('-') => {
self.next();
self.skip_whitespace();
let (start, end) = self.read_endl();
TokenType::Comment { start, end }
}
_ => TokenType::Punct(Sub),
};
Token::new(ln, col, tok_t)
}
'.' => {
let (ln, col) = self.current_pos();
self.next();
let mut p = Dot;
if let Some(c) = self.peek_next() {
if c == '.' {
p = Range;
self.next();
}
}
if let Some(c) = self.peek_next() {
if c == '.' {
p = Spread;
self.next();
}
}
Token::new(ln, col, TokenType::Punct(p))
}
'+' => make_tok!(punct Add, of self),
'*' => make_tok!(punct Mul, of self),
'/' => make_tok!(punct Div, of self),
'^' => make_tok!(punct Pow, of self),
';' => make_tok!(punct SemiColon, of self),
':' => make_tok!(punct Colon, of self),
'=' => make_tok!(punct Assign, of self),
',' => make_tok!(punct Comma, of self),
'[' => make_tok!(punct OpenBracket, of self),
']' => make_tok!(punct CloseBracket, of self),
'(' => make_tok!(punct OpenParen, of self),
')' => make_tok!(punct CloseParen, of self),
'{' => make_tok!(punct OpenCurly, of self),
'}' => make_tok!(punct CloseCurly, of self),
'|' => make_tok!(punct Pipe, of self),
'\\' => make_tok!(punct Lambda, of self),
_ if c.is_digit(10) => {
let (ln, col) = self.current_pos();
let (start, end) = self.read_while(|c| c.is_digit(10));
let s = &self.buf[start..end];
Token::new(ln, col, TokenType::Num(s.parse().unwrap()))
}
_ if c.is_alphabetic() || c == '_' => {
let (ln, col) = self.current_pos();
let (start, end) = self.read_while(|c| c.is_alphanumeric() || c == '_');
let s = &self.buf[start..end];
let tok_t = match s {
"let" => TokenType::Keyword(Keyword::Let),
"_" => TokenType::Discard,
"true" => TokenType::Bool(true),
"false" => TokenType::Bool(false),
_ => TokenType::Ident { start, end },
};
Token::new(ln, col, tok_t)
}
'\'' => {
let (ln, col) = self.current_pos();
self.next();
let (start, end) = self.read_while(|c| c != '\'');
self.next();
Token::new(ln, col, TokenType::Str { start, end })
}
_ if c.is_whitespace() => {
self.skip_whitespace();
return Ok(None);
}
_ => {
return Err(format!(
"Unexpected `{}` on {}:{}",
self.next(),
self.ln,
self.col
));
}
};
Ok(Some(tok))
}
}