use crate::token::Token;
use crate::lexer::LexerErrorType;
use crate::utils::{
ParseResult,
accumulate_while,
is_digit,
is_hex_digit,
is_identifier_start,
};
pub fn parse_number<'a>(input: &'a str) -> ParseResult<'a> {
let (first_digits, mut rest) = accumulate_while(input, is_digit);
let mut number_length = first_digits.len();
let is_hex_number = rest.chars().next()
.filter(|character| first_digits == "0" && (*character == 'x' || *character == 'X'))
.is_some();
if is_hex_number {
let (hex_digits, mut after_hex_digits) = skip_and_accumulate_hex_digits(rest);
if hex_digits.len() == 1 {
Err(LexerErrorType::MalformedNumber)
} else {
number_length += hex_digits.len();
if let Some((exponent_length, after_exponent)) = try_parse_hex_exponent(after_hex_digits) {
after_hex_digits = after_exponent;
number_length += exponent_length;
};
after_hex_digits.chars().next()
.filter(|character| is_invalid_after_number(*character))
.map_or_else(
|| {
let slice = input.get(0..number_length)
.unwrap_or_else(|| unreachable!());
Ok((Token::new_number(slice), after_hex_digits))
},
|_invalid_character| Err(LexerErrorType::MalformedNumber)
)
}
} else {
if let Some((decimal_length, after_float)) = try_parse_float_part(rest) {
number_length += decimal_length;
rest = after_float;
};
if let Some((exponent_length, after_exponent)) = try_parse_decimal_exponent(rest) {
number_length += exponent_length;
rest = after_exponent;
};
rest.chars().next()
.filter(|character| is_invalid_after_number(*character))
.map_or_else(
|| {
let slice = input.get(0..number_length)
.unwrap_or_else(|| unreachable!());
Ok((Token::new_number(slice), rest))
},
|_invalid_character| Err(LexerErrorType::MalformedNumber)
)
}
}
fn is_invalid_after_number(character: char) -> bool {
is_identifier_start(character) || character == '.'
}
fn skip_and_accumulate_digits<'a>(input: &'a str) -> (&'a str, &'a str) {
input.char_indices()
.find(|(i, character)| !(is_digit(*character) || i == &0))
.map(|(i, _)| input.split_at(i))
.unwrap_or((input, &""))
}
fn skip_and_accumulate_hex_digits<'a>(input: &'a str) -> (&'a str, &'a str) {
input.char_indices()
.find(|(i, character)| !(is_hex_digit(*character) || i == &0))
.map(|(i, _)| input.split_at(i))
.unwrap_or((input, &""))
}
fn try_parse_decimal_exponent<'a>(input: &'a str) -> Option<(usize, &'a str)> {
match input.chars().next() {
Some('e') | Some('E') => {
let (exponent, after_exponent) = input.char_indices()
.find(|(i, c)| {
!(is_digit(*c) || (i == &0) || (i == &1 && (*c == '-' || *c == '+')))
})
.map(|(i, _)| input.split_at(i))
.unwrap_or((input, &""));
match exponent {
"e" | "E" | "e-" | "E-" | "e+" | "E+" => None,
_ => Some((exponent.len(), after_exponent)),
}
}
_ => None,
}
}
fn try_parse_hex_exponent<'a>(input: &'a str) -> Option<(usize, &'a str)> {
try_parse_exponent('p', 'P', input)
}
fn try_parse_exponent<'a>(lowercase_symbol: char, uppercase_symbol: char, input: &'a str) -> Option<(usize, &'a str)> {
input.chars().next()
.filter(|character| *character == lowercase_symbol || *character == uppercase_symbol)
.and_then(|_| {
let (exponent, rest) = skip_and_accumulate_digits(input);
if exponent.len() == 1 {
None
} else {
rest.chars().next()
.filter(|character| is_invalid_after_number(*character))
.map_or_else(
|| Some((exponent.len(), rest)),
|_| None,
)
}
})
}
fn try_parse_float_part<'a>(input: &'a str) -> Option<(usize, &'a str)> {
input.chars().next()
.filter(|character| *character == '.')
.map(|_| {
let (decimals, rest) = skip_and_accumulate_digits(input);
(decimals.len(), rest)
})
}
#[cfg(test)]
mod tests {
use super::*;
mod ok {
use super::*;
macro_rules! test_number {
($($name:ident : $input:literal),+) => {
$(
#[test]
fn $name() {
let (token, rest) = parse_number($input).unwrap();
assert_eq!(token, Token::new_number($input));
assert_eq!(rest, "");
}
)+
};
}
test_number!(
digit: "1",
integer: "123",
trailing_dot: "10.",
single_decimal: "10.0",
multiple_decimal: "123.123",
starting_with_dot: ".123",
digit_with_exponent: "1e10",
number_with_exponent: "123e456",
number_with_exponent_with_plus: "123e+456",
number_with_negative_exponent: "123e-456",
number_with_upper_exponent: "123E4",
float_with_exponent: "10.12e8",
trailing_dot_with_exponent: "10.e8",
hex_number: "0x12",
hex_number_with_lowercase: "0x12a",
hex_number_with_uppercase: "0x12A",
hex_number_with_mixed_case: "0x1bF2A",
hex_with_exponent: "0x12p4",
hex_with_exponent_uppercase: "0xABP3"
);
}
mod err {
use super::*;
macro_rules! test_malformed_number {
($($name:ident : $input:literal),+) => {
$(
#[test]
fn $name() {
let lexer_error = parse_number($input).unwrap_err();
assert_eq!(lexer_error, LexerErrorType::MalformedNumber);
}
)+
};
}
test_malformed_number!(
end_with_letter: "12a",
end_with_underscore: "12_",
unfinish_exponent: "10e",
unfinish_negative_exponent: "10e-",
unfinish_exponent_with_plus: "10e+",
unfinish_exponent_uppercase: "10E",
unfinish_negative_exponent_uppercase: "10E-",
unfinish_exponent_uppercase_with_plus: "10E+",
float_after_exponent: "10e2.12",
trailing_dot_after_exponent: "10e2.",
multiple_exponent: "10e8e6",
two_dots: "1..",
leading_dot_with_other_dot: ".5.1",
dot_after_decimals: "12.34.",
hex_with_trailing_dot: "0x12.",
hex_with_decimal: "0x1.2",
hex_with_invalid_letter: "0x12u",
hex_end_with_underscore: "0x12_",
unfinish_hex_number: "0x",
unfinish_hex_number_uppercase: "0X",
multiple_hex_exponent: "0x8p6p1",
hex_trailing_dot_after_exponent: "0x1CP2.",
hex_with_float_after_exponent: "0x1CP2.5",
hex_with_invalid_exponent: "0xAAp2A"
);
}
}