lualexer 0.1.2

Read Lua code and produce tokens
Documentation
use crate::token::Token;
use crate::lexer::LexerErrorType;
use crate::utils::{
    ParseResult,
    accumulate_while,
    is_digit,
    is_hex_digit,
    is_identifier_start,
};

pub fn parse_number<'a>(input: &'a str) -> ParseResult<'a> {
    let (first_digits, mut rest) = accumulate_while(input, is_digit);
    let mut number_length = first_digits.len();

    let is_hex_number = rest.chars().next()
        .filter(|character| first_digits == "0" && (*character == 'x' || *character == 'X'))
        .is_some();

    if is_hex_number {
        let (hex_digits, mut after_hex_digits) = skip_and_accumulate_hex_digits(rest);

        if hex_digits.len() == 1 {
            Err(LexerErrorType::MalformedNumber)
        } else {
            number_length += hex_digits.len();

            if let Some((exponent_length, after_exponent)) = try_parse_hex_exponent(after_hex_digits) {
                after_hex_digits = after_exponent;
                number_length += exponent_length;
            };

            after_hex_digits.chars().next()
                .filter(|character| is_invalid_after_number(*character))
                .map_or_else(
                    || {
                        let slice = input.get(0..number_length)
                            .unwrap_or_else(|| unreachable!());

                        Ok((Token::new_number(slice), after_hex_digits))
                    },
                    |_invalid_character| Err(LexerErrorType::MalformedNumber)
                )
        }
    } else {
        if let Some((decimal_length, after_float)) = try_parse_float_part(rest) {
            number_length += decimal_length;
            rest = after_float;
        };

        if let Some((exponent_length, after_exponent)) = try_parse_decimal_exponent(rest) {
            number_length += exponent_length;
            rest = after_exponent;
        };

        rest.chars().next()
            .filter(|character| is_invalid_after_number(*character))
            .map_or_else(
                || {
                    let slice = input.get(0..number_length)
                        .unwrap_or_else(|| unreachable!());

                    Ok((Token::new_number(slice), rest))
                },
                |_invalid_character| Err(LexerErrorType::MalformedNumber)
            )
    }
}

fn is_invalid_after_number(character: char) -> bool {
    is_identifier_start(character) || character == '.'
}

fn skip_and_accumulate_digits<'a>(input: &'a str) -> (&'a str, &'a str) {
    input.char_indices()
        .find(|(i, character)| !(is_digit(*character) || i == &0))
        .map(|(i, _)| input.split_at(i))
        .unwrap_or((input, &""))
}

fn skip_and_accumulate_hex_digits<'a>(input: &'a str) -> (&'a str, &'a str) {
    input.char_indices()
        .find(|(i, character)| !(is_hex_digit(*character) || i == &0))
        .map(|(i, _)| input.split_at(i))
        .unwrap_or((input, &""))
}

fn try_parse_decimal_exponent<'a>(input: &'a str) -> Option<(usize, &'a str)> {
    match input.chars().next() {
        Some('e') | Some('E') => {
            let (exponent, after_exponent) = input.char_indices()
                .find(|(i, c)| {
                    !(is_digit(*c) || (i == &0) || (i == &1 && (*c == '-' || *c == '+')))
                })
                .map(|(i, _)| input.split_at(i))
                .unwrap_or((input, &""));

            match exponent {
                "e" | "E" | "e-" | "E-" | "e+" | "E+" => None,
                _ => Some((exponent.len(), after_exponent)),
            }
        }
        _ => None,
    }
}

fn try_parse_hex_exponent<'a>(input: &'a str) -> Option<(usize, &'a str)> {
    try_parse_exponent('p', 'P', input)
}

fn try_parse_exponent<'a>(lowercase_symbol: char, uppercase_symbol: char, input: &'a str) -> Option<(usize, &'a str)> {
    input.chars().next()
        .filter(|character| *character == lowercase_symbol || *character == uppercase_symbol)
        .and_then(|_| {
            let (exponent, rest) = skip_and_accumulate_digits(input);

            if exponent.len() == 1 {
                None
            } else {
                rest.chars().next()
                    .filter(|character| is_invalid_after_number(*character))
                    .map_or_else(
                        || Some((exponent.len(), rest)),
                        |_| None,
                    )
            }
        })
}

fn try_parse_float_part<'a>(input: &'a str) -> Option<(usize, &'a str)> {
    input.chars().next()
        .filter(|character| *character == '.')
        .map(|_| {
            let (decimals, rest) = skip_and_accumulate_digits(input);

            (decimals.len(), rest)
        })
}

#[cfg(test)]
mod tests {
    use super::*;

    mod ok {
        use super::*;

        macro_rules! test_number {
            ($($name:ident : $input:literal),+) => {
                $(
                    #[test]
                    fn $name() {
                        let (token, rest) = parse_number($input).unwrap();
                        assert_eq!(token, Token::new_number($input));
                        assert_eq!(rest, "");
                    }
                )+
            };
        }

        test_number!(
            digit: "1",
            integer: "123",
            trailing_dot: "10.",
            single_decimal: "10.0",
            multiple_decimal: "123.123",
            starting_with_dot: ".123",
            digit_with_exponent: "1e10",
            number_with_exponent: "123e456",
            number_with_exponent_with_plus: "123e+456",
            number_with_negative_exponent: "123e-456",
            number_with_upper_exponent: "123E4",
            float_with_exponent: "10.12e8",
            trailing_dot_with_exponent: "10.e8",
            hex_number: "0x12",
            hex_number_with_lowercase: "0x12a",
            hex_number_with_uppercase: "0x12A",
            hex_number_with_mixed_case: "0x1bF2A",
            hex_with_exponent: "0x12p4",
            hex_with_exponent_uppercase: "0xABP3"
        );
    }

    mod err {
        use super::*;

        macro_rules! test_malformed_number {
            ($($name:ident : $input:literal),+) => {
                $(
                    #[test]
                    fn $name() {
                        let lexer_error = parse_number($input).unwrap_err();
                        assert_eq!(lexer_error, LexerErrorType::MalformedNumber);
                    }
                )+
            };
        }

        test_malformed_number!(
            end_with_letter: "12a",
            end_with_underscore: "12_",
            unfinish_exponent: "10e",
            unfinish_negative_exponent: "10e-",
            unfinish_exponent_with_plus: "10e+",
            unfinish_exponent_uppercase: "10E",
            unfinish_negative_exponent_uppercase: "10E-",
            unfinish_exponent_uppercase_with_plus: "10E+",
            float_after_exponent: "10e2.12",
            trailing_dot_after_exponent: "10e2.",
            multiple_exponent: "10e8e6",
            two_dots: "1..",
            leading_dot_with_other_dot: ".5.1",
            dot_after_decimals: "12.34.",
            hex_with_trailing_dot: "0x12.",
            hex_with_decimal: "0x1.2",
            hex_with_invalid_letter: "0x12u",
            hex_end_with_underscore: "0x12_",
            unfinish_hex_number: "0x",
            unfinish_hex_number_uppercase: "0X",
            multiple_hex_exponent: "0x8p6p1",
            hex_trailing_dot_after_exponent: "0x1CP2.",
            hex_with_float_after_exponent: "0x1CP2.5",
            hex_with_invalid_exponent: "0xAAp2A"
        );
    }
}