lualexer 0.1.2

Read Lua code and produce tokens
Documentation
use crate::token::Token;
use crate::lexer::{LexerError, LexerErrorType};
use crate::utils::{
    ParseResult,
    is_digit,
    is_identifier_start,
    is_valid_symbol,
    identifier::parse_identifier_or_keyword,
    number::parse_number,
    string::parse_quoted_string,
    symbols::parse_symbols,
};

/// This trait is used by the Lexer to convert a given input into a vector of tokens
/// (`Vec<Token>`).
pub trait Tokenizer {
    fn new() -> Self;

    fn parse<'a>(&self, input: &'a str) -> Result<Vec<Token<'a>>, LexerError<'a>> {
        let mut tokens = vec!();

        let mut next_input = self.process_whitespace(&mut tokens, input);

        while let Some(character) = next_input.chars().next() {
            match branch_parser(character, next_input) {
                Ok((token, rest_of_input)) => {
                    tokens.push(token);
                    next_input = self.process_whitespace(&mut tokens, rest_of_input);
                },
                Err(lexer_error) => return Err((lexer_error, input)),
            };
        };

        Ok(tokens)
    }

    /// The implementation of this function is expected to return the given input moved until
    /// the first non-whitespace character. Note that it may be called even if the first
    /// character of the input is not a whitespace. In that case, it should simply return the
    /// given input.
    ///
    /// A mutable reference is passed along in order to preserve whitespace information, as
    /// some implementation may need.
    fn process_whitespace<'a>(&self, tokens: &mut Vec<Token<'a>>, input: &'a str) -> &'a str;
}

fn branch_parser<'a>(character: char, input: &'a str) -> ParseResult<'a> {
    if is_identifier_start(character) {
        parse_identifier_or_keyword(input)
    } else if character == '"' || character == '\'' {
        parse_quoted_string(input)
    } else if is_digit(character) {
        parse_number(input)
    } else if is_valid_symbol(character) {
        let mut chars = input.chars();
        chars.next(); // skip the first because we already have it
        parse_symbols(input, (character, chars.next(), chars.next()))
    } else {
        Err(LexerErrorType::UnexpectedSymbol)
    }
}