use crate::token::{
cursor::Cursor,
identifier::is_identifier_char,
token::{Literal, Token, TokenKind},
};
pub fn scan_number<'b>(cursor: &mut Cursor<'b>) -> Option<Token<'b>> {
let start_pos = cursor.pos();
let start_line = cursor.line();
let start_column = cursor.column();
if cursor.peek_str(2).eq_ignore_ascii_case("0x") {
cursor.consume();
cursor.consume();
let hex_part = cursor.consume_while(|c| c.is_ascii_hexdigit() || c == '_');
if !hex_part.is_empty()
&& !hex_part.starts_with('_')
&& !hex_part.ends_with('_')
&& !hex_part.contains("__")
{
return Some(Token {
kind: TokenKind::Literal(Literal::Number),
fragment: cursor.make_fragment(start_pos, start_line, start_column),
});
}
return None;
}
if cursor.peek_str(2).eq_ignore_ascii_case("0b") {
cursor.consume();
cursor.consume();
let bin_part = cursor.consume_while(|c| c == '0' || c == '1' || c == '_');
if !bin_part.is_empty()
&& !bin_part.starts_with('_')
&& !bin_part.ends_with('_')
&& !bin_part.contains("__")
{
return Some(Token {
kind: TokenKind::Literal(Literal::Number),
fragment: cursor.make_fragment(start_pos, start_line, start_column),
});
}
return None;
}
if cursor.peek_str(2).eq_ignore_ascii_case("0o") {
cursor.consume();
cursor.consume();
let oct_part = cursor.consume_while(|c| ('0'..='7').contains(&c) || c == '_');
if !oct_part.is_empty()
&& !oct_part.starts_with('_')
&& !oct_part.ends_with('_')
&& !oct_part.contains("__")
{
return Some(Token {
kind: TokenKind::Literal(Literal::Number),
fragment: cursor.make_fragment(start_pos, start_line, start_column),
});
}
return None;
}
let state = cursor.save_state();
let has_leading_dot = cursor.peek() == Some('.');
if has_leading_dot {
cursor.consume();
if !cursor.peek().is_some_and(|c| c.is_ascii_digit()) {
cursor.restore_state(state);
return None;
}
} else if !cursor.peek().is_some_and(|c| c.is_ascii_digit()) {
return None;
}
if !has_leading_dot {
cursor.consume_while(|c| c.is_ascii_digit() || c == '_');
}
if cursor.peek() == Some('.') && !has_leading_dot {
if cursor.peek_ahead(1).is_some_and(|c| c.is_ascii_digit()) {
cursor.consume(); cursor.consume_while(|c| c.is_ascii_digit() || c == '_');
}
} else if has_leading_dot {
cursor.consume_while(|c| c.is_ascii_digit() || c == '_');
}
if let Some(e) = cursor.peek()
&& (e == 'e' || e == 'E')
{
cursor.consume();
if let Some(sign) = cursor.peek()
&& (sign == '+' || sign == '-')
{
cursor.consume();
}
let exp_part = cursor.consume_while(|c| c.is_ascii_digit() || c == '_');
if exp_part.is_empty() {
cursor.restore_state(state);
return None;
}
}
if cursor.pos() == start_pos {
return None;
}
if has_leading_dot && cursor.peek().is_some_and(is_identifier_char) {
cursor.restore_state(state);
return None;
}
Some(Token {
kind: TokenKind::Literal(Literal::Number),
fragment: cursor.make_fragment(start_pos, start_line, start_column),
})
}
#[cfg(test)]
pub mod tests {
use Literal::Number;
use super::*;
use crate::{
bump::Bump,
token::{operator::Operator, tokenize},
};
#[test]
fn test_decimal_integer() {
let bump = Bump::new();
let tokens = tokenize(&bump, "42").unwrap();
assert_eq!(tokens[0].kind, TokenKind::Literal(Number));
assert_eq!(tokens[0].fragment.text(), "42");
}
#[test]
fn test_decimal_float() {
let bump = Bump::new();
let tokens = tokenize(&bump, "3.14").unwrap();
assert_eq!(tokens[0].kind, TokenKind::Literal(Number));
assert_eq!(tokens[0].fragment.text(), "3.14");
}
#[test]
fn test_decimal_with_underscores() {
let bump = Bump::new();
let tokens = tokenize(&bump, "1_234_567").unwrap();
assert_eq!(tokens[0].kind, TokenKind::Literal(Number));
assert_eq!(tokens[0].fragment.text(), "1_234_567");
}
#[test]
fn test_scientific_notation() {
let bump = Bump::new();
let tokens = tokenize(&bump, "1.23e10").unwrap();
assert_eq!(tokens[0].kind, TokenKind::Literal(Number));
assert_eq!(tokens[0].fragment.text(), "1.23e10");
let tokens = tokenize(&bump, "5E-3").unwrap();
assert_eq!(tokens[0].kind, TokenKind::Literal(Number));
assert_eq!(tokens[0].fragment.text(), "5E-3");
}
#[test]
fn test_hex_number() {
let bump = Bump::new();
let tokens = tokenize(&bump, "0x2A").unwrap();
assert_eq!(tokens[0].kind, TokenKind::Literal(Number));
assert_eq!(tokens[0].fragment.text(), "0x2A");
let tokens = tokenize(&bump, "0xDEAD_BEEF").unwrap();
assert_eq!(tokens[0].kind, TokenKind::Literal(Number));
assert_eq!(tokens[0].fragment.text(), "0xDEAD_BEEF");
}
#[test]
fn test_binary_number() {
let bump = Bump::new();
let tokens = tokenize(&bump, "0b1010").unwrap();
assert_eq!(tokens[0].kind, TokenKind::Literal(Number));
assert_eq!(tokens[0].fragment.text(), "0b1010");
let tokens = tokenize(&bump, "0b1111_0000").unwrap();
assert_eq!(tokens[0].kind, TokenKind::Literal(Number));
assert_eq!(tokens[0].fragment.text(), "0b1111_0000");
}
#[test]
fn test_octal_number() {
let bump = Bump::new();
let tokens = tokenize(&bump, "0o777").unwrap();
assert_eq!(tokens[0].kind, TokenKind::Literal(Number));
assert_eq!(tokens[0].fragment.text(), "0o777");
let tokens = tokenize(&bump, "0o12_34").unwrap();
assert_eq!(tokens[0].kind, TokenKind::Literal(Number));
assert_eq!(tokens[0].fragment.text(), "0o12_34");
}
#[test]
fn test_leading_dot() {
let bump = Bump::new();
let tokens = tokenize(&bump, ".5").unwrap();
assert_eq!(tokens[0].kind, TokenKind::Literal(Number));
assert_eq!(tokens[0].fragment.text(), ".5");
}
#[test]
fn test_leading_dot_decimal_with_identifier() {
let bump = Bump::new();
let tokens = tokenize(&bump, ".5sec").unwrap();
assert_eq!(tokens.len(), 2); assert_eq!(tokens[0].kind, TokenKind::Operator(Operator::Dot));
assert_eq!(tokens[1].kind, TokenKind::Identifier);
assert_eq!(tokens[1].fragment.text(), "5sec");
}
#[test]
fn test_leading_dot_decimal_standalone() {
let bump = Bump::new();
let tokens = tokenize(&bump, ".5").unwrap();
assert_eq!(tokens.len(), 1);
assert_eq!(tokens[0].kind, TokenKind::Literal(Number));
assert_eq!(tokens[0].fragment.text(), ".5");
}
#[test]
fn test_number_with_trailing() {
let bump = Bump::new();
let tokens = tokenize(&bump, "42abc").unwrap();
assert_eq!(tokens.len(), 1);
assert_eq!(tokens[0].kind, TokenKind::Identifier);
assert_eq!(tokens[0].fragment.text(), "42abc");
let tokens = tokenize(&bump, "42 abc").unwrap();
assert_eq!(tokens[0].kind, TokenKind::Literal(Number));
assert_eq!(tokens[0].fragment.text(), "42");
}
#[test]
fn test_invalid_numbers() {
let bump = Bump::new();
let result = tokenize(&bump, "0x_FF");
assert!(result.is_err() || result.unwrap()[0].fragment.text() != "0x_FF");
let result = tokenize(&bump, "0b102");
assert!(result.is_ok()); let tokens = result.unwrap();
assert_eq!(tokens[0].fragment.text(), "0b10");
assert_eq!(tokens[1].fragment.text(), "2");
}
}