elmx 0.1.0

elm compiler and runtime
Documentation
use super::{Token, TokenKind::{self, *}};

#[derive(Debug)]
pub struct Lexer<'a> {
    input: &'a str,
    offset: usize,
    line: usize,
    column: usize,
}

impl<'a> Iterator for Lexer<'a> {
    type Item = Token<'a>;

    fn next(&mut self) -> Option<Self::Item> {
        self.whitespace();

        let token = self.curr().map(|ch| match ch {
            '\'' => self.char(),
            '"' => self.string(),
            c if c.is_alphabetic() => self.ident(),
            c if c.is_digit(10) => self.number(),
            _ => self.glyph(),
        });

        self.advance();

        token
    }
}

impl<'a> Lexer<'a> {
    pub fn new(input: &'a str) -> Self {
        Self {
            input,
            offset: 0,
            line: 1,
            column: 1,
        }
    }

    fn curr(&self) -> Option<char> {
        self.input.chars().nth(self.offset)
    }

    fn peek(&self) -> Option<char> {
        self.input.chars().nth(self.offset + 1)
    }

    fn advance(&mut self) {
        self.debug("before advance");
        if let Some(curr) = self.curr() {
            self.offset += 1;

            if curr == '\n' {
                self.line += 1;
                self.column = 1;
            } else {
                self.column += 1;
            }
        }
        self.debug("before advance");
    }

    fn whitespace(&mut self) {
        self.debug("before skip");
        while let Some(curr) = self.curr() {
            if curr.is_whitespace() {
                self.advance();
            } else {
                break;
            }
        }
        self.debug("after skip");
    }

    fn char(&mut self) -> Token<'a> {
        let (offset, line, column) = (self.offset, self.line, self.column);

        self.advance();
        self.advance();
        self.advance();
        
        Token {
            literal: &self.input[offset..self.offset],
            kind: Char,
            line,
            column,
        }
    }

    fn string(&mut self) -> Token<'a> {
        let (offset, line, column) = (self.offset, self.line, self.column);

        self.advance();

        while let Some(curr) = self.curr() {
            if curr == '"' {
                break;
            } else {
                self.advance();
            }
        }

        Token {
            literal: &self.input[offset..self.offset + 1],
            kind: Str,
            line,
            column,
        }
    }

    fn ident(&mut self) -> Token<'a> {
        self.debug("before ident");
        let (offset, line, column) = (self.offset, self.line, self.column);

        while let Some(peek) = self.peek() {
            if peek.is_alphabetic() {
                self.advance();
            } else {
                break;
            }
        }

        let literal = &self.input[offset..self.offset + 1];

        let kind = match literal {
            "let" => Let,
            "in" => In,
            "import" => Import,
            "module" => Module,
            "exposing" => Exposing,
            "type" => Type,
            "case" => Case,
            "of" => Of,
            "not" => Not,
            "True" => True,
            "False" => False,
            "if" => If,
            "then" => Then,
            "else" => Else,
            _ => Ident,
        };

        self.debug("after ident");

        Token {
            literal,
            kind,
            line,
            column,
        }
    }

    fn number(&mut self) -> Token<'a> {
        self.debug("before int");
        let (offset, line, column) = (self.offset, self.line, self.column);

        let mut kind = Int;

        while let Some(curr) = self.curr() {
            if curr.is_digit(10) {
                self.advance();
            } else if curr == '.' {
                self.advance();
                kind = Float;
            } else {
                break;
            }
        }

        self.debug("after int");
        Token {
            literal: &self.input[offset..self.offset],
            kind,
            line,
            column,
        }
    }

    fn glyph(&mut self) -> Token<'a> {
        self.debug("before glyph");

        let (offset, line, column) = (self.offset, self.line, self.column);

        let kind = match self.curr().unwrap() {
            '=' => self.expect_or('=', Equal, Assign),
            ':' => self.expect_or(':', DoubleColon, Colon),
            '.' => self.expect_or('.', DoubleDot, Dot),
            '&' => self.expect_or('&', And, Unknown),
            '|' => {
                let next = self.expect_or('>', Pipeline, Pipe);
                self.expect_or('|', Or, next)
            }
            '+' => self.expect_or('+', DoublePlus, Plus),
            '>' => self.expect_or('=', GreaterEq, Greater),
            '<' => self.expect_or('=', LessEq, Less),
            '*' => Multiply,
            '^' => Power,
            '%' => Modulo,
            '(' => OpenParen,
            ')' => CloseParen,
            '[' => OpenBracket,
            ']' => CloseBracket,
            '}' => CloseBrace,
            ',' => Comma,
            '\\' => Backslash,
            '-' => if self.peek() == Some('-') {
                return self.single_comment();
            } else {
                self.expect_or('>', Arrow, Minus)
            },
            '{' => if self.peek() == Some('-') {
                return self.multi_comment();
            } else {
                OpenBrace
            },
            _ => unreachable!(),
        };

        self.debug("after glyph");

        Token {
            literal: &self.input[offset..self.offset + 1],
            kind,
            line,
            column,
        }
    }

    fn expect_or(&mut self, expect: char, this: TokenKind, other: TokenKind) -> TokenKind {
        self.debug("before expect_or");
        let token = if self.peek() == Some(expect) {
            self.advance();
            this
        } else {
            other
        };
        self.debug("before expect_or");
        token
    }

    fn single_comment(&mut self) -> Token<'a> {
        let (offset, line, column) = (self.offset, self.line, self.column);

        while let Some(curr) = self.curr() {
            if curr == '\n' {
                break;
            } else {
                self.advance();
            }
        }

        Token {
            literal: &self.input[offset..self.offset],
            kind: SingleComment,
            line,
            column,
        }
    }

    fn multi_comment(&mut self) -> Token<'a> {
        let (offset, line, column) = (self.offset, self.line, self.column);

        while let Some(curr) = self.curr() {
            if curr == '-' && self.peek() == Some('}') {
                self.advance();
                self.advance();
                break;
            } else {
                self.advance();
            }
        }

        Token {
            literal: &self.input[offset..self.offset],
            kind: MultiComment,
            line,
            column,
        }
    }

    fn debug(&self, op: &str) {
        println!(
            "[{op}] {} pos: `{}`, curr: `{}`, peek: `{}`",
            " ".repeat(20 - op.len()),
            self.offset,
            self.curr().unwrap_or('\0'),
            self.peek().unwrap_or('\0')
        );
    }
}