mago_syntax_core/
utils.rs

1use crate::input::Input;
2use crate::number_separator;
3
4#[inline]
5pub fn parse_literal_float(value: &str) -> Option<f64> {
6    let source = value.replace("_", "");
7
8    source.parse::<f64>().ok()
9}
10
11#[inline]
12pub fn parse_literal_integer(value: &str) -> Option<u64> {
13    let source = value.replace("_", "");
14
15    let value = match source.as_bytes() {
16        [b'0', b'x' | b'X', ..] => u128::from_str_radix(&source.as_str()[2..], 16).ok(),
17        [b'0', b'o' | b'O', ..] => u128::from_str_radix(&source.as_str()[2..], 8).ok(),
18        [b'0', b'b' | b'B', ..] => u128::from_str_radix(&source.as_str()[2..], 2).ok(),
19        _ => source.parse::<u128>().ok(),
20    };
21
22    value.map(|value| if value > u64::MAX as u128 { u64::MAX } else { value as u64 })
23}
24
25#[inline]
26pub fn is_start_of_identifier(byte: &u8) -> bool {
27    byte.is_ascii_lowercase() || byte.is_ascii_uppercase() || (*byte == b'_')
28}
29
30#[inline]
31pub fn is_part_of_identifier(byte: &u8) -> bool {
32    byte.is_ascii_digit()
33        || byte.is_ascii_lowercase()
34        || byte.is_ascii_uppercase()
35        || (*byte == b'_')
36        || (*byte >= 0x80)
37}
38
39/// Reads a sequence of bytes representing digits in a specific numerical base.
40///
41/// This utility function iterates through the input byte slice, consuming bytes
42/// as long as they represent valid digits for the given `base`. It handles
43/// decimal digits ('0'-'9') and hexadecimal digits ('a'-'f', 'A'-'F').
44///
45/// It stops consuming at the first byte that is not a valid digit character,
46/// or is a digit character whose value is greater than or equal to the specified `base`
47/// (e.g., '8' in base 8, or 'A' in base 10).
48///
49/// This function is primarily intended as a helper for lexer implementations
50/// when tokenizing the digit part of number literals (binary, octal, decimal, hexadecimal).
51///
52/// # Arguments
53///
54/// * `input` - A byte slice starting at the potential first digit of the number.
55/// * `base` - The numerical base (e.g., 2, 8, 10, 16) to use for validating digits.
56///   Must be between 2 and 36 (inclusive) for hex characters to be potentially valid.
57///
58/// # Returns
59///
60/// The number of bytes (`usize`) consumed from the beginning of the `input` slice
61/// that constitute a valid sequence of digits for the specified `base`. Returns 0 if
62/// the first byte is not a valid digit for the base.
63#[inline]
64pub fn read_digits_of_base(input: &Input, offset: usize, base: u8) -> usize {
65    if base == 16 {
66        read_digits_with(input, offset, u8::is_ascii_hexdigit)
67    } else {
68        let max = b'0' + base;
69
70        read_digits_with(input, offset, |b| b >= &b'0' && b < &max)
71    }
72}
73
74#[inline]
75fn read_digits_with<F: Fn(&u8) -> bool>(input: &Input, offset: usize, is_digit: F) -> usize {
76    let bytes = input.bytes;
77    let total = input.length;
78    let start = input.offset;
79    let mut pos = start + offset; // Compute the absolute position.
80
81    while pos < total {
82        let current = bytes[pos];
83        if is_digit(&current) {
84            pos += 1;
85        } else if pos + 1 < total && bytes[pos] == number_separator!() && is_digit(&bytes[pos + 1]) {
86            pos += 2; // Skip the separator and the digit.
87        } else {
88            break;
89        }
90    }
91
92    // Return the relative length from the start of the current position.
93    pos - start
94}