mago_syntax_core/
utils.rs1use bumpalo::Bump;
2use bumpalo::collections::Vec;
3
4use crate::input::Input;
5use crate::number_separator;
6
7pub fn parse_literal_string_in<'arena>(
18 arena: &'arena Bump,
19 s: &'arena [u8],
20 quote_char: Option<u8>,
21 has_quote: bool,
22) -> Option<&'arena [u8]> {
23 if s.is_empty() {
24 return Some(b"");
25 }
26
27 let s = if has_quote
28 && (s.starts_with(b"b\"") || s.starts_with(b"b'") || s.starts_with(b"B\"") || s.starts_with(b"B'"))
29 {
30 &s[1..]
31 } else {
32 s
33 };
34
35 let (quote_char, content) = if let Some(quote_char) = quote_char {
36 (Some(quote_char), s)
37 } else if !has_quote {
38 (None, s)
39 } else if s.starts_with(b"\"") && s.ends_with(b"\"") && s.len() >= 2 {
40 (Some(b'"'), &s[1..s.len() - 1])
41 } else if s.starts_with(b"'") && s.ends_with(b"'") && s.len() >= 2 {
42 (Some(b'\''), &s[1..s.len() - 1])
43 } else {
44 return None;
45 };
46
47 let needs_processing = content.contains(&b'\\') || quote_char.is_some_and(|q| content.contains(&q));
48 if !needs_processing {
49 return Some(content);
50 }
51
52 let mut result = Vec::with_capacity_in(content.len(), arena);
53 let mut i = 0;
54
55 while i < content.len() {
56 let b = content[i];
57 if b != b'\\' {
58 result.push(b);
59 i += 1;
60 continue;
61 }
62
63 let next_index = i + 1;
64 let Some(&next) = content.get(next_index) else {
65 result.push(b'\\');
66 i += 1;
67 continue;
68 };
69
70 let mut consumed = 2;
73
74 match next {
75 b'\\' => result.push(b'\\'),
76 b'\'' if quote_char == Some(b'\'') => result.push(b'\''),
77 b'"' if quote_char == Some(b'"') => result.push(b'"'),
78 b'$' if quote_char == Some(b'"') => result.push(b'$'),
79 b'n' if quote_char == Some(b'"') => result.push(b'\n'),
80 b't' if quote_char == Some(b'"') => result.push(b'\t'),
81 b'r' if quote_char == Some(b'"') => result.push(b'\r'),
82 b'v' if quote_char == Some(b'"') => result.push(0x0B),
83 b'e' if quote_char == Some(b'"') => result.push(0x1B),
84 b'f' if quote_char == Some(b'"') => result.push(0x0C),
85 b'x' if quote_char == Some(b'"') => {
86 let mut hex_val = 0u8;
87 let mut hex_len = 0;
88 let mut j = i + 2;
89 while hex_len < 2 && j < content.len() {
90 let c = content[j];
91 let digit = if c.is_ascii_digit() {
92 c - b'0'
93 } else if (b'a'..=b'f').contains(&c) {
94 c - b'a' + 10
95 } else if (b'A'..=b'F').contains(&c) {
96 c - b'A' + 10
97 } else {
98 break;
99 };
100 hex_val = hex_val * 16 + digit;
101 hex_len += 1;
102 j += 1;
103 }
104 if hex_len > 0 {
105 result.push(hex_val);
106 consumed = 2 + hex_len;
107 } else {
108 result.push(b'\\');
110 result.push(b'x');
111 }
112 }
113 c if quote_char == Some(b'"') && c.is_ascii_digit() => {
114 let mut octal_val = 0u16;
115 let mut octal_len = 0;
116 let mut j = i + 1;
117 while octal_len < 3 && j < content.len() {
118 let d = content[j];
119 if d.is_ascii_digit() && d <= b'7' {
120 octal_val = octal_val * 8 + u16::from(d - b'0');
121 octal_len += 1;
122 j += 1;
123 } else {
124 break;
125 }
126 }
127 if octal_len > 0 {
128 result.push(octal_val as u8);
130 consumed = 1 + octal_len;
131 } else {
132 result.push(b'\\');
133 result.push(next);
134 }
135 }
136 _ => {
137 result.push(b'\\');
139 result.push(next);
140 }
141 }
142
143 i += consumed;
144 }
145
146 Some(result.into_bump_slice())
147}
148
149#[inline]
151#[must_use]
152pub fn parse_literal_float(value: &[u8]) -> Option<f64> {
153 if memchr::memchr(b'_', value).is_none() {
154 return std::str::from_utf8(value).ok()?.parse::<f64>().ok();
155 }
156
157 let mut buf = [0u8; 64];
158 let mut len = 0;
159
160 for &b in value {
161 if b != b'_' {
162 if len < 64 {
163 buf[len] = b;
164 len += 1;
165 } else {
166 let source: std::vec::Vec<u8> = value.iter().copied().filter(|&b| b != b'_').collect();
167 return std::str::from_utf8(&source).ok()?.parse::<f64>().ok();
168 }
169 }
170 }
171
172 std::str::from_utf8(&buf[..len]).ok()?.parse::<f64>().ok()
173}
174
175#[inline]
179#[must_use]
180pub fn parse_literal_integer(bytes: &[u8]) -> Option<u64> {
181 if bytes.is_empty() {
182 return None;
183 }
184
185 let (radix, start) = match bytes {
186 [b'0', b'x' | b'X', ..] => (16u128, 2),
187 [b'0', b'o' | b'O', ..] => (8u128, 2),
188 [b'0', b'b' | b'B', ..] => (2u128, 2),
189 [b'0', _, ..] if bytes[1..].iter().all(|&b| b == b'_' || (b'0'..=b'7').contains(&b)) => (8u128, 1), [b'0', _, ..] => (10u128, 0), _ => (10u128, 0),
192 };
193
194 let mut result: u128 = 0;
195 let mut has_digits = false;
196
197 for &b in &bytes[start..] {
198 if b == b'_' {
199 continue;
200 }
201
202 let digit = if b.is_ascii_digit() {
203 (b - b'0') as u128
204 } else if (b'a'..=b'f').contains(&b) {
205 (b - b'a' + 10) as u128
206 } else if (b'A'..=b'F').contains(&b) {
207 (b - b'A' + 10) as u128
208 } else {
209 return None;
210 };
211
212 if digit >= radix {
213 return None;
214 }
215
216 has_digits = true;
217
218 result = match result.checked_mul(radix) {
219 Some(r) => r,
220 None => return Some(u64::MAX),
221 };
222
223 result = match result.checked_add(digit) {
224 Some(r) => r,
225 None => return Some(u64::MAX),
226 };
227 }
228
229 if !has_digits {
230 return None;
231 }
232
233 Some(result.min(u64::MAX as u128) as u64)
234}
235
236static IS_IDENT_START: [bool; 256] = {
239 let mut table = [false; 256];
240 let mut i = 0u8;
241 loop {
242 table[i as usize] = matches!(i, b'a'..=b'z' | b'A'..=b'Z' | b'_');
243 if i == 255 {
244 break;
245 }
246 i += 1;
247 }
248
249 table
250};
251
252static IS_IDENT_PART: [bool; 256] = {
255 let mut table = [false; 256];
256 let mut i = 0u8;
257 loop {
258 table[i as usize] = matches!(i, b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | b'_' | 0x80..=0xFF);
259 if i == 255 {
260 break;
261 }
262 i += 1;
263 }
264 table
265};
266
267#[inline(always)]
269#[must_use]
270pub const fn is_start_of_identifier(byte: &u8) -> bool {
271 IS_IDENT_START[*byte as usize]
272}
273
274#[inline(always)]
276#[must_use]
277pub const fn is_part_of_identifier(byte: &u8) -> bool {
278 IS_IDENT_PART[*byte as usize]
279}
280
281#[inline(always)]
287#[must_use]
288pub fn scan_identifier_length(bytes: &[u8], offset: usize) -> usize {
289 let mut len = 1;
290 let remaining = &bytes[offset + 1..];
291
292 for &b in remaining {
293 if IS_IDENT_PART[b as usize] {
294 len += 1;
295 } else {
296 break;
297 }
298 }
299
300 len
301}
302
303#[inline]
328pub fn read_digits_of_base(input: &Input, offset: usize, base: u8) -> usize {
329 if base == 16 {
330 read_digits_with(input, offset, u8::is_ascii_hexdigit)
331 } else {
332 let max = b'0' + base;
333
334 read_digits_with(input, offset, |b| b >= &b'0' && b < &max)
335 }
336}
337
338#[inline]
339fn read_digits_with<F>(input: &Input, offset: usize, is_digit: F) -> usize
340where
341 F: Fn(&u8) -> bool,
342{
343 let bytes = input.bytes;
344 let total = input.length;
345 let start = input.offset;
346 let mut pos = start + offset; while pos < total {
349 let current = bytes[pos];
350 if is_digit(¤t) {
351 pos += 1;
352 } else if pos + 1 < total && bytes[pos] == number_separator!() && is_digit(&bytes[pos + 1]) {
353 pos += 2; } else {
355 break;
356 }
357 }
358
359 pos - start
361}
362
363#[cfg(test)]
364mod tests {
365 use super::*;
366
367 macro_rules! parse_int {
368 ($input:expr, $expected:expr) => {
369 assert_eq!(parse_literal_integer($input), $expected);
370 };
371 }
372
373 #[test]
374 fn test_parse_literal_integer() {
375 parse_int!(b"123", Some(123));
376 parse_int!(b"0", Some(0));
377 parse_int!(b"0b1010", Some(10));
378 parse_int!(b"0o17", Some(15));
379 parse_int!(b"0x1A3F", Some(6719));
380 parse_int!(b"0XFF", Some(255));
381 parse_int!(b"0_1_2_3", Some(83));
382 parse_int!(b"0b1_0_1_0", Some(10));
383 parse_int!(b"0o1_7", Some(15));
384 parse_int!(b"0x1_A_3_F", Some(6719));
385 parse_int!(b"", None);
386 parse_int!(b"0xGHI", None);
387 parse_int!(b"0b102", None);
388 parse_int!(b"0o89", None);
389 }
390}