fexplib 0.3.1

A simple lower-house syntax for programming language experimentation
Documentation
use crate::token::*;
use std::{collections::HashSet, iter::Peekable, str::Chars};
use tattle::{declare_error, Loc, Reporter};

const OPERATOR_CHARS: &'static [char] =
    &['+', '*', '-', '/', '<', '>', '&', '|', '!', '=', ':', ''];

declare_error!(LEX_ERROR, "lex", "an error during the lexing phase");

struct Lexer<'a> {
    iter: Peekable<Chars<'a>>,
    source: &'a str,
    reporter: Reporter,
    keywords: HashSet<String>,
    out: Vec<Token>,
    preceding_whitespace: bool,
    prev: usize,
    cur: usize,
}

impl<'a> Lexer<'a> {
    pub fn new(source: &'a str, keywords: HashSet<String>, reporter: Reporter) -> Self {
        Lexer {
            iter: source.chars().peekable(),
            source,
            reporter,
            keywords,
            out: Vec::new(),
            preceding_whitespace: false,
            prev: 0,
            cur: 0,
        }
    }

    fn error(&mut self, message: String) {
        let l = Loc::new(self.prev, self.cur, None);
        self.out
            .push(Token::new(self.preceding_whitespace, ERROR, l));
        self.reporter.error(l, LEX_ERROR, message.into());
    }

    fn peek(&mut self) -> Option<char> {
        self.iter.peek().copied()
    }

    fn advance(&mut self) -> Option<char> {
        self.iter.next().map(|c| {
            self.cur += c.len_utf8();
            c
        })
    }

    fn emit(&mut self, kind: Kind) {
        self.out.push(Token::new(
            self.preceding_whitespace,
            kind,
            Loc::new(self.prev, self.cur, None),
        ));
        self.preceding_whitespace = false;
        self.prev = self.cur;
    }

    fn skip(&mut self) {
        self.prev = self.cur;
    }

    fn slice(&self) -> &'a str {
        &self.source[self.prev..self.cur]
    }

    fn many<F: Fn(char) -> bool>(&mut self, f: F) {
        while let Some(c) = self.peek() {
            if f(c) {
                self.advance();
            } else {
                break;
            }
        }
    }
}

macro_rules! error {
    ($m:expr, $msg:literal) => {{
        $m.error(format!($msg));
    }};
    ($m:expr, $msg:literal, $($arg:expr),+) => {{
        $m.error(format!($msg, $($arg),+));
    }};
}

fn word(l: &mut Lexer) {
    l.many(|c| c.is_alphanumeric() || c == '_');
}

fn keyword(l: &mut Lexer, kind: Kind) {
    word(l);
    l.emit(kind);
}

fn op(l: &mut Lexer, as_var: bool) {
    l.many(|c| OPERATOR_CHARS.contains(&c));
    if l.keywords.contains(l.slice()) {
        l.emit(if as_var { KEYWORD } else { KEYWORD_OP });
    } else {
        l.emit(if as_var { VAR } else { OP });
    }
}

fn num(l: &mut Lexer) {
    l.many(|c| c.is_digit(10));
    if l.peek() == Some('.') {
        l.advance();
        l.many(|c| c.is_digit(10));
        l.emit(FLOAT);
    } else {
        l.emit(INT);
    }
}

fn string(l: &mut Lexer) {
    l.many(|c| c != '"');
    l.advance();
    l.emit(STRING);
}

fn run(l: &mut Lexer) {
    while let Some(c) = l.advance() {
        match c {
            _ if c.is_whitespace() => {
                l.skip();
                l.preceding_whitespace = true;
                continue;
            }
            _ if c.is_alphabetic() || c == '_' => {
                word(l);
                l.emit(VAR);
            }
            _ if c.is_digit(10) => num(l),
            _ if OPERATOR_CHARS.contains(&c) => op(l, false),
            '#' => {
                while let Some(c) = l.advance() {
                    if c == '\n' {
                        break;
                    }
                }
            }
            '`' => {
                l.skip();
                op(l, true)
            }
            '@' => keyword(l, PRIM),
            '%' => keyword(l, SPECIAL),
            '.' => keyword(l, FIELD),
            '\'' => keyword(l, TAG),
            '"' => string(l),
            ';' => l.emit(SEMICOLON),
            ',' => l.emit(COMMA),
            '{' => l.emit(LCURLY),
            '}' => l.emit(RCURLY),
            '(' => l.emit(LPAREN),
            ')' => l.emit(RPAREN),
            '[' => l.emit(LBRACK),
            ']' => l.emit(RBRACK),
            _ => {
                error!(l, "unexpected character");
            }
        }
    }
}

pub fn lex(source: &str, keywords: HashSet<String>, reporter: Reporter) -> Vec<Token> {
    let mut lexer = Lexer::new(source, keywords, reporter);
    run(&mut lexer);
    lexer.out
}