mago_syntax_core/
utils.rs

1use crate::input::Input;
2use crate::number_separator;
3
4#[inline]
5pub fn parse_literal_string(s: &str) -> Option<String> {
6    if s.is_empty() {
7        return Some(String::new());
8    }
9
10    let (quote_char, content) = if s.starts_with('"') && s.ends_with('"') && s.len() >= 2 {
11        ('"', &s[1..s.len() - 1])
12    } else if s.starts_with('\'') && s.ends_with('\'') && s.len() >= 2 {
13        ('\'', &s[1..s.len() - 1])
14    } else {
15        return None;
16    };
17
18    let mut result = String::new();
19    let mut chars = content.chars().peekable();
20
21    while let Some(c) = chars.next() {
22        if c != '\\' {
23            result.push(c);
24
25            continue;
26        }
27
28        let Some(&next_char) = chars.peek() else {
29            result.push(c);
30
31            continue;
32        };
33
34        match next_char {
35            '\\' => {
36                result.push('\\');
37                chars.next();
38            }
39            '\'' if quote_char == '\'' => {
40                result.push('\'');
41                chars.next();
42            }
43            '"' if quote_char == '"' => {
44                result.push('"');
45                chars.next();
46            }
47            'n' if quote_char == '"' => {
48                result.push('\n');
49                chars.next();
50            }
51            't' if quote_char == '"' => {
52                result.push('\t');
53                chars.next();
54            }
55            'r' if quote_char == '"' => {
56                result.push('\r');
57                chars.next();
58            }
59            'v' if quote_char == '"' => {
60                result.push('\x0B');
61                chars.next();
62            }
63            'e' if quote_char == '"' => {
64                result.push('\x1B');
65                chars.next();
66            }
67            'f' if quote_char == '"' => {
68                result.push('\x0C');
69                chars.next();
70            }
71            '0' if quote_char == '"' => {
72                result.push('\0');
73                chars.next();
74            }
75
76            'x' if quote_char == '"' => {
77                chars.next();
78
79                let mut hex_chars = String::new();
80                for _ in 0..2 {
81                    if let Some(&next) = chars.peek() {
82                        if next.is_ascii_hexdigit() {
83                            hex_chars.push(chars.next().unwrap());
84                        } else {
85                            break;
86                        }
87                    }
88                }
89
90                if !hex_chars.is_empty() {
91                    match u8::from_str_radix(&hex_chars, 16) {
92                        Ok(byte_val) => result.push(byte_val as char),
93                        Err(_) => {
94                            return None;
95                        }
96                    }
97                } else {
98                    return None;
99                }
100            }
101            c if quote_char == '"' && c.is_ascii_digit() => {
102                let mut octal = String::new();
103                octal.push(chars.next().unwrap());
104
105                for _ in 0..2 {
106                    if let Some(&next) = chars.peek() {
107                        if next.is_ascii_digit() && next <= '7' {
108                            octal.push(chars.next().unwrap());
109                        } else {
110                            break;
111                        }
112                    }
113                }
114
115                result.push(u8::from_str_radix(&octal, 8).ok()? as char);
116            }
117            '$' if quote_char == '"' => {
118                result.push('$');
119                chars.next();
120            }
121            _ => {
122                if quote_char == '\'' {
123                    result.push(c);
124                    result.push(next_char);
125                    chars.next();
126                } else {
127                    result.push(c);
128                }
129            }
130        }
131    }
132
133    Some(result)
134}
135
136#[inline]
137pub fn parse_literal_float(value: &str) -> Option<f64> {
138    let source = value.replace("_", "");
139
140    source.parse::<f64>().ok()
141}
142
143#[inline]
144pub fn parse_literal_integer(value: &str) -> Option<u64> {
145    let source = value.replace("_", "");
146
147    let value = match source.as_bytes() {
148        [b'0', b'x' | b'X', ..] => u128::from_str_radix(&source.as_str()[2..], 16).ok(),
149        [b'0', b'o' | b'O', ..] => u128::from_str_radix(&source.as_str()[2..], 8).ok(),
150        [b'0', b'b' | b'B', ..] => u128::from_str_radix(&source.as_str()[2..], 2).ok(),
151        _ => source.parse::<u128>().ok(),
152    };
153
154    value.map(|value| if value > u64::MAX as u128 { u64::MAX } else { value as u64 })
155}
156
157#[inline]
158pub fn is_start_of_identifier(byte: &u8) -> bool {
159    byte.is_ascii_lowercase() || byte.is_ascii_uppercase() || (*byte == b'_')
160}
161
162#[inline]
163pub fn is_part_of_identifier(byte: &u8) -> bool {
164    byte.is_ascii_digit()
165        || byte.is_ascii_lowercase()
166        || byte.is_ascii_uppercase()
167        || (*byte == b'_')
168        || (*byte >= 0x80)
169}
170
171/// Reads a sequence of bytes representing digits in a specific numerical base.
172///
173/// This utility function iterates through the input byte slice, consuming bytes
174/// as long as they represent valid digits for the given `base`. It handles
175/// decimal digits ('0'-'9') and hexadecimal digits ('a'-'f', 'A'-'F').
176///
177/// It stops consuming at the first byte that is not a valid digit character,
178/// or is a digit character whose value is greater than or equal to the specified `base`
179/// (e.g., '8' in base 8, or 'A' in base 10).
180///
181/// This function is primarily intended as a helper for lexer implementations
182/// when tokenizing the digit part of number literals (binary, octal, decimal, hexadecimal).
183///
184/// # Arguments
185///
186/// * `input` - A byte slice starting at the potential first digit of the number.
187/// * `base` - The numerical base (e.g., 2, 8, 10, 16) to use for validating digits.
188///   Must be between 2 and 36 (inclusive) for hex characters to be potentially valid.
189///
190/// # Returns
191///
192/// The number of bytes (`usize`) consumed from the beginning of the `input` slice
193/// that constitute a valid sequence of digits for the specified `base`. Returns 0 if
194/// the first byte is not a valid digit for the base.
195#[inline]
196pub fn read_digits_of_base(input: &Input, offset: usize, base: u8) -> usize {
197    if base == 16 {
198        read_digits_with(input, offset, u8::is_ascii_hexdigit)
199    } else {
200        let max = b'0' + base;
201
202        read_digits_with(input, offset, |b| b >= &b'0' && b < &max)
203    }
204}
205
206#[inline]
207fn read_digits_with<F: Fn(&u8) -> bool>(input: &Input, offset: usize, is_digit: F) -> usize {
208    let bytes = input.bytes;
209    let total = input.length;
210    let start = input.offset;
211    let mut pos = start + offset; // Compute the absolute position.
212
213    while pos < total {
214        let current = bytes[pos];
215        if is_digit(&current) {
216            pos += 1;
217        } else if pos + 1 < total && bytes[pos] == number_separator!() && is_digit(&bytes[pos + 1]) {
218            pos += 2; // Skip the separator and the digit.
219        } else {
220            break;
221        }
222    }
223
224    // Return the relative length from the start of the current position.
225    pos - start
226}