tml_parser 1.0.6

The official parser for the TML language
use std::vec::Vec;
use std::string::String;

#[repr(u8)]
#[derive(Debug, PartialEq, Clone)]
pub enum Keyword  {
    EOF,
    HASH,
    AT,
    CROSS,
    TEXT,
    IDENT,
    LABEL,
    NUMBER,
    DELIMITER,
}

impl Keyword {
    pub fn to_str(&self) -> &'static str {
        match self {
            Keyword::EOF => "EOF",
            Keyword::HASH => "HASH",
            Keyword::AT => "AT",
            Keyword::CROSS => "CROSS",
            Keyword::TEXT => "TEXT",
            Keyword::IDENT => "IDENT",
            Keyword::LABEL => "LABEL",
            Keyword::NUMBER => "NUMBER",
            Keyword::DELIMITER => "DELIMITER",
        }
    }
}

#[derive(Debug, Clone)]
pub struct Token {
    pub token_type: Keyword,
    pub token_literal: String,
    pub position: (usize, usize),
}

pub trait LexerTrait {
    fn next_token(&mut self) -> Token;
    fn peek_token(&self) -> Token;
}

pub struct Lexer {
    input: String,
    cur_pos: usize,
    peek_pos: usize,
    tokens: Vec<Token>,
    tok_pos: usize,
}

impl LexerTrait for Lexer {
    fn next_token(&mut self) -> Token {
        if self.tok_pos >= self.tokens.len() {
            return self.tokens[self.tokens.len()-1].clone();
        }

        let t = self.tokens[self.tok_pos].clone();

        self.tok_pos += 1;

        t
    }

    fn peek_token(&self) -> Token {
        let len = self.tokens.len();

        if self.tok_pos >= len {
            let i = self.tokens.len()-1;
            return self.tokens[i].clone();
        }

        let i = self.tok_pos;
        let t = self.tokens[i].clone();

        t
    }
}

impl Lexer {
    pub fn new(input: &str) -> impl LexerTrait {
        let mut l = Lexer {input: String::from(input), cur_pos: 0, peek_pos: 0, tokens: Vec::new(), tok_pos: 0};
        l.init();
        l.lex();

        l
    }

    fn init(&mut self) {
        if self.input.len() <= 1 {
            self.peek_pos = self.cur_pos;
        } else {
            self.peek_pos = self.cur_pos + 1;
        }
    }

    #[inline(always)]
    fn get_char(&self) -> String {
        String::from(&self.input[self.cur_pos..self.cur_pos+1])
    }

    #[inline(always)]
    fn get_peek(&self) -> String {
        String::from(&self.input[self.peek_pos..self.peek_pos+1])
    }

    fn read_char(&mut self) {
        if self.cur_pos+1 > self.input.len() {
            return
        }

        self.cur_pos += 1;

        if self.peek_pos+1 >= self.input.len() {
            self.peek_pos = self.cur_pos;
        } else {
            self.peek_pos += 1;
        }
    }

    #[inline(always)]
    fn eof(&self) -> bool {
        return self.cur_pos >= self.input.len();
    }

    fn eat_whitespace(&mut self) {
        while !self.eof() {
            let ch = self.get_char();

            if ch.as_str() != " " && ch.as_str() != "\t" {
                break
            }

            self.read_char();
        }
    }

    fn push(&mut self, tok: Token) {
        self.tokens.push(tok);
    }

    fn parse_text(&mut self) -> Token {
        let mut literal = String::new();
        let start = self.cur_pos;

        while !self.eof() {
            let ch = self.get_char();

            if ch.as_str() == "\n" && (self.get_peek().as_str() == "\n" || self.get_peek().as_str() == "#") {
                literal += ch.as_str();
                self.read_char();
                break
            }

            literal += ch.as_str();

            self.read_char();
        }

        return Token{token_type: Keyword::TEXT, token_literal: literal, position: (start, self.cur_pos)};
    }

    fn parse_ident(&mut self) -> Token {
        let mut literal = String::new();
        let start = self.cur_pos;

        while !self.eof() {
            let ch = self.get_char();

            if ch.as_str() == "\n" {
                break
            }

            literal += ch.as_str();

            self.read_char();
        }

        return Token{token_type: Keyword::IDENT, token_literal: literal, position: (start, self.cur_pos)};
    }

    fn parse_label(&mut self) -> Token {
        let mut literal = String::new();
        let start = self.cur_pos;

        while !self.eof() {
            let ch = self.get_char();

            if ch.as_str() == " " {
                break
            }

            literal += ch.as_str();

            self.read_char();
        }

        return Token{token_type: Keyword::LABEL, token_literal: literal, position: (start, self.cur_pos)};
    }

    fn parse_number(&mut self) -> Token {
        let mut literal = String::new();
        let start = self.cur_pos;

        while !self.eof() {
            let ch = self.get_char();

            if !is_digit(ch.as_str()) && ch.as_str() != "." {
                break
            }

            literal += ch.as_str();

            self.read_char();
        }

        return Token{token_type: Keyword::NUMBER, token_literal: literal, position: (start, self.cur_pos)};
    }

    fn lex(&mut self){
        while !self.eof() {
            self.eat_whitespace();

            let ch = self.get_char();
            let start = self.cur_pos;

            let tok = match ch.as_str() {
                "#" => {
                    self.read_char();
                    Token{token_type: Keyword::HASH, token_literal: String::from(ch), position: (start, self.cur_pos)}
                }

                "x" => {
                    self.read_char();
                    Token{token_type: Keyword::CROSS, token_literal: String::from(ch), position: (start, self.cur_pos)}
                }

                "@" => {
                    self.read_char();
                    Token{token_type: Keyword::AT, token_literal: String::from(ch), position: (start, self.cur_pos)}
                }

                "\n" => {
                    self.read_char();
                    Token{token_type: Keyword::DELIMITER, token_literal: String::from(ch), position: (start, self.cur_pos)}
                }

                _ if self.tokens.len() > 0 && self.tokens[self.tokens.len()-1].token_type == Keyword::HASH => self.parse_ident(),
                _ if self.tokens.len() > 0 && self.tokens[self.tokens.len()-1].token_type == Keyword::NUMBER => self.parse_label(),
                _ if is_digit(ch.as_str()) => self.parse_number(),
                _ => self.parse_text(),
            };

            self.push(tok);
        }

        self.push(Token{token_type: Keyword::EOF, token_literal: String::from(""), position: (self.cur_pos, self.cur_pos)});
    }
}

fn is_digit(s: &str) -> bool {
    return s >= "0" && s <= "9";
}