use {
crate::{Result, Syntax, SyntaxTrait, Token},
std::{iter::Peekable, mem, str::CharIndices},
};
struct Lexer<'s> {
tokens: Vec<Token<'s>>,
source: &'s str,
pos: usize,
indents: Vec<usize>,
chars: Peekable<CharIndices<'s>>,
cur: char,
mode: Mode,
modes: Vec<Mode>,
style: Style,
}
#[derive(Debug, PartialEq)]
enum Mode {
None,
Container,
Tag,
}
#[derive(Debug, PartialEq)]
enum Style {
None,
Tabs,
Spaces,
}
pub fn scan<'s>(source: &'s str) -> Result<Vec<Token<'s>>> {
let mut lexer = Lexer::from(source);
lexer.scan()?;
Ok(lexer.tokens)
}
impl<'s> Lexer<'s> {
fn from(source: &'s str) -> Lexer<'s> {
Lexer {
source,
tokens: vec![],
chars: source.char_indices().peekable(),
pos: 0,
indents: vec![],
cur: '0',
style: Style::None,
mode: Mode::None,
modes: vec![],
}
}
fn peek(&mut self) -> Option<&char> {
if let Some((_, c)) = self.chars.peek() {
Some(c)
} else {
None
}
}
fn peek_is(&mut self, c: char) -> bool {
self.peek().filter(|&&p| p == c).is_some()
}
fn prev_is(&self, kind: Syntax) -> bool {
if self.tokens.is_empty() {
return false;
}
self.tokens.last().filter(|t| t.kind == kind).is_some()
}
fn prev_is_equal_sign(&self) -> bool {
if self.tokens.is_empty() {
return false;
}
self.tokens.last().filter(|t| t.literal() == "=").is_some()
}
fn next(&mut self) -> Option<char> {
if let Some((pos, c)) = self.chars.next() {
self.pos = pos;
self.cur = c;
Some(c)
} else {
None
}
}
fn append(&mut self, kind: Syntax) -> Result<()> {
self.tokens.push(Token::new(kind, self.pos, 1, ""));
Ok(())
}
fn eat(&mut self, check: impl Fn(char) -> bool) -> bool {
let mut eaten = false;
while self.peek().filter(|&&c| check(c)).is_some() {
eaten = true;
self.next();
}
eaten
}
fn set_mode(&mut self, mode: Mode) {
self.modes.push(mem::replace(&mut self.mode, mode));
}
fn pop_mode(&mut self) {
if !self.modes.is_empty() {
self.mode = self.modes.remove(self.modes.len() - 1);
}
}
fn in_tag(&self) -> bool {
matches!(self.mode, Mode::Tag)
}
fn in_container(&self) -> bool {
matches!(self.mode, Mode::Container)
}
fn scan(&mut self) -> Result<()> {
while let Some(c) = self.next() {
let start = self.pos;
let kind = match c {
'\n' => self.scan_newline()?,
';' => Syntax::Semi,
',' => Syntax::Comma,
'"' | '\'' | '`' => self.scan_string(c)?,
'!' => {
if self.peek_is('!') {
Syntax::Op
} else {
self.scan_op()?
}
}
':' => {
if self.peek_is('=') || self.in_tag() {
self.scan_op()?
} else {
Syntax::Colon
}
}
'-' => {
if self.peek().filter(|c| c.is_numeric()).is_some() {
self.scan_number()?
} else {
self.scan_op()?
}
}
'#' => {
if self
.peek()
.filter(|c| **c == '{' || c.is_alphabetic())
.is_some()
{
self.scan_op()?
} else {
self.scan_comment()?
}
}
'[' => {
self.set_mode(Mode::Container);
Syntax::LStaple
}
']' => {
self.pop_mode();
Syntax::RStaple
}
'(' => {
if self.in_tag() {
self.scan_open_paren_in_tag()?
} else {
self.set_mode(Mode::Container);
Syntax::LParen
}
}
')' => {
if !self.in_tag() {
self.pop_mode();
}
Syntax::RParen
}
'{' => {
if self.in_tag() {
self.scan_word(true)?
} else {
self.set_mode(Mode::Container);
Syntax::LCurly
}
}
'}' => {
self.pop_mode();
Syntax::RCurly
}
'<' => self.scan_left_arrow()?,
'>' => {
if self.in_tag() {
self.mode = Mode::None;
Syntax::RCaret
} else {
self.scan_op()?
}
}
'=' => {
if self.in_tag() {
Syntax::Equal
} else {
self.scan_op()?
}
}
'/' => {
if self.in_tag() {
Syntax::Slash
} else {
self.scan_op()?
}
}
_ if c.is_whitespace() => {
self.eat(|c| c.is_whitespace());
continue;
}
_ if c.is_numeric() => self.scan_number()?,
_ if c.is_alphabetic() || c == '_' => self.scan_word(false)?,
_ => self.scan_op()?,
};
if kind == Syntax::None {
continue;
}
let len = self.pos - start + self.cur.len_utf8();
self.tokens.push(Token::new(
kind,
start,
len,
&self.source[start..start + len],
));
}
if !self.prev_is(Syntax::Semi) && !self.prev_is(Syntax::Dedent) {
self.append(Syntax::Semi)?;
}
while !self.indents.is_empty() {
self.indents.pop();
self.append(Syntax::Dedent)?;
}
while !self.tokens.is_empty() && self.tokens[0].kind == Syntax::Semi {
self.tokens.remove(0);
}
Ok(())
}
fn scan_comment(&mut self) -> Result<Syntax> {
self.eat(|c| c != '\n');
Ok(Syntax::None)
}
fn scan_number(&mut self) -> Result<Syntax> {
let mut saw_dot = false;
macro_rules! match_set {
($( $pattern:pat )|+) => {{
self.next();
while let Some(&c) = self.peek() {
if matches!(c, $( $pattern )|+) {
self.next();
} else {
break;
}
}
return Ok(Syntax::Number);
}};
}
if self.cur == '0' {
if let Some(&c) = self.peek() {
match c {
'b' => match_set!('0' | '1' | '_'),
'o' => match_set!('0'..='7' | '_'),
'x' => match_set!('a'..='f' | '0'..='9'),
_ => {}
}
}
}
while let Some(&c) = self.peek() {
if c.is_numeric() || c == '_' {
self.next();
} else if !saw_dot && c == '.' {
let mut iter = self.chars.clone();
iter.next();
if let Some((_, c)) = iter.next() {
if c.is_numeric() {
saw_dot = true;
self.next();
continue;
}
}
break;
} else {
break;
}
}
Ok(Syntax::Number)
}
fn scan_string(&mut self, delimiter: char) -> Result<Syntax> {
let mut start = self.pos + 1;
let mut prev = '0';
let mut triple = false;
if self.peek_is(delimiter) {
self.next();
if self.peek_is(delimiter) {
self.next();
triple = true;
start += 2;
} else {
self.tokens
.push(Token::new(Syntax::String(false), start - 1, 2, ""));
return Ok(Syntax::None);
}
}
while let Some(c) = self.next() {
if c == delimiter && prev != '\\' {
if !triple {
let len = self.pos - start - 1;
let lit = &self.source[start..=start + len];
self.tokens.push(Token::new(
Syntax::String(delimiter == '"' && lit.contains('{')),
start,
len,
lit,
));
return Ok(Syntax::None);
} else if self.peek_is(delimiter) {
self.next();
if self.peek_is(delimiter) {
self.next();
if self.pos == start + 2 {
self.tokens
.push(Token::new(Syntax::String(false), start - 3, 6, ""));
return Ok(Syntax::None);
}
let len = self.pos - start - 3;
self.tokens.push(Token::new(
Syntax::String(false),
start,
len,
&self.source[start..=start + len],
));
return Ok(Syntax::None);
}
}
}
prev = c;
}
scan_error!(
start,
1,
"Unclosed string. Expected closing quote: {} ",
delimiter
)
}
fn scan_op(&mut self) -> Result<Syntax> {
self.eat(|c| c.is_op());
Ok(Syntax::Op)
}
fn scan_word(&mut self, mut in_code: bool) -> Result<Syntax> {
let start = self.pos;
let mut curlies = 0;
while let Some(&c) = self.peek() {
if in_code {
if c == '}' {
if curlies == 0 {
in_code = false;
} else {
curlies -= 0;
}
} else if c == '{' {
curlies += 1;
}
} else if c == '{' {
in_code = true;
} else if !c.is_word_char() {
break;
}
self.next();
}
if self.peek_is('?') {
self.next();
}
Ok(match &self.source[start..=self.pos] {
"def" => Syntax::Def,
"do" => Syntax::Do,
"return" => Syntax::Return,
"if" => Syntax::If,
"then" => Syntax::Then,
"else" => Syntax::Else,
"for" => Syntax::For,
"while" => Syntax::While,
"in" => Syntax::In,
"fn" => Syntax::Fn,
"true" => Syntax::Bool(true),
"false" => Syntax::Bool(false),
_ => Syntax::Word,
})
}
fn scan_left_arrow(&mut self) -> Result<Syntax> {
let p = *self.peek().unwrap_or(&'0');
if !self.in_tag() && p.is_tag_opener() {
self.mode = Mode::Tag;
Ok(Syntax::LCaret)
} else if !self.in_tag() && p == '!' {
self.next();
let mut comment = false;
if self.peek_is('-') {
self.next();
if self.peek_is('-') {
self.next();
comment = true
}
}
while let Some(c) = self.next() {
if comment && c == '-' {
if let Some('-') = self.next() {
if let Some('>') = self.next() {
break;
}
}
} else if c == '>' {
self.next();
break;
}
}
Ok(Syntax::String(true))
} else {
self.scan_op()
}
}
fn scan_open_paren_in_tag(&mut self) -> Result<Syntax> {
if self.prev_is_equal_sign() {
let mut open = 0;
while let Some(&c) = self.peek() {
if c == ')' && open == 0 {
self.next();
break;
} else if c == '(' {
open += 1;
} else if c == ')' {
open -= 1;
}
self.next();
}
Ok(Syntax::JS)
} else {
self.set_mode(Mode::Container);
Ok(Syntax::LParen)
}
}
fn scan_newline(&mut self) -> Result<Syntax> {
if self.in_tag() || self.in_container() {
return Ok(Syntax::Semi);
}
let start = self.pos;
let mut indent = 0;
loop {
self.eat(|c| c == '\n');
self.check_indent_style()?;
while self.peek_is(' ') || self.peek_is('\t') {
indent += 1;
self.next();
}
match self.peek() {
Some('\n') => {}
Some('#') => {
self.scan_comment()?;
}
_ => break,
}
indent = 0;
}
if self.peek().is_none() {
return Ok(Syntax::None);
}
if self.peek().filter(|c| c.is_op() && c != &&'<').is_some() {
return Ok(Syntax::None);
}
let last = if !self.indents.is_empty() {
self.indents[self.indents.len() - 1]
} else {
0
};
if indent > last {
self.tokens.push(Token::new(Syntax::Indent, start, 1, ""));
self.indents.push(indent);
return Ok(Syntax::None);
}
if indent < last {
self.append(Syntax::Semi)?;
while !self.indents.is_empty() {
if indent < self.indents[self.indents.len() - 1] {
self.indents.pop();
self.append(Syntax::Dedent)?;
} else {
break;
}
}
return Ok(Syntax::None);
}
Ok(Syntax::Semi)
}
fn check_indent_style(&mut self) -> Result<()> {
let style = if self.peek_is(' ') {
Style::Spaces
} else if self.peek_is('\t') {
Style::Tabs
} else {
Style::None
};
if self.style == Style::None {
self.style = style;
} else if style != Style::None && self.style != style {
return scan_error!(
self.pos,
1,
"Can't mix tabs and spaces. Expected {:?}, found {:?}",
self.style,
style
);
}
Ok(())
}
}