mago_syntax_core/
utils.rs1use bumpalo::Bump;
2use bumpalo::collections::Vec;
3
4use crate::input::Input;
5use crate::number_separator;
6
7pub fn parse_literal_string_in<'arena>(
18 arena: &'arena Bump,
19 s: &'arena str,
20 quote_char: Option<char>,
21 has_quote: bool,
22) -> Option<&'arena str> {
23 if s.is_empty() {
24 return Some("");
25 }
26
27 let (quote_char, content) = if let Some(quote_char) = quote_char {
28 (Some(quote_char), s)
29 } else if !has_quote {
30 (None, s)
31 } else if s.starts_with('"') && s.ends_with('"') && s.len() >= 2 {
32 (Some('"'), &s[1..s.len() - 1])
33 } else if s.starts_with('\'') && s.ends_with('\'') && s.len() >= 2 {
34 (Some('\''), &s[1..s.len() - 1])
35 } else {
36 return None;
37 };
38
39 let needs_processing = content.contains('\\') || quote_char.is_some_and(|q| content.contains(q));
40 if !needs_processing {
41 return Some(content);
42 }
43
44 let mut result = Vec::with_capacity_in(content.len(), arena);
45 let mut chars = content.chars().peekable();
46 let mut buf = [0; 4];
47
48 while let Some(c) = chars.next() {
49 if c != '\\' {
50 result.extend_from_slice(c.encode_utf8(&mut buf).as_bytes());
51 continue;
52 }
53
54 let Some(&next_char) = chars.peek() else {
55 result.push(b'\\');
56 continue;
57 };
58
59 let mut consumed = true;
60
61 match next_char {
62 '\\' => result.push(b'\\'),
63 '\'' if quote_char == Some('\'') => result.push(b'\''),
64 '"' if quote_char == Some('"') => result.push(b'"'),
65 '$' if quote_char == Some('"') => result.push(b'$'),
66 'n' if quote_char == Some('"') => result.push(b'\n'),
67 't' if quote_char == Some('"') => result.push(b'\t'),
68 'r' if quote_char == Some('"') => result.push(b'\r'),
69 'v' if quote_char == Some('"') => result.push(0x0B),
70 'e' if quote_char == Some('"') => result.push(0x1B),
71 'f' if quote_char == Some('"') => result.push(0x0C),
72 '0' if quote_char == Some('"') => result.push(0x00),
73 'x' if quote_char == Some('"') => {
74 chars.next(); let mut hex_val = 0u8;
76 let mut hex_len = 0;
77 while let Some(peeked) = chars.peek() {
79 if hex_len < 2 && peeked.is_ascii_hexdigit() {
80 hex_val = hex_val * 16 + peeked.to_digit(16).unwrap() as u8;
81 hex_len += 1;
82 chars.next(); } else {
84 break;
85 }
86 }
87 if hex_len > 0 {
88 result.push(hex_val);
89 } else {
90 result.push(b'\\');
92 result.push(b'x');
93 }
94
95 consumed = false;
96 }
97 c if quote_char == Some('"') && c.is_ascii_digit() => {
98 let mut octal_val = 0u8;
99 let mut octal_len = 0;
100
101 while let Some(peeked) = chars.peek() {
102 if octal_len < 3 && peeked.is_ascii_digit() && *peeked <= '7' {
103 octal_val = octal_val * 8 + peeked.to_digit(8).unwrap() as u8;
104 octal_len += 1;
105 chars.next(); } else {
107 break;
108 }
109 }
110 if octal_len > 0 {
111 result.push(octal_val);
112 } else {
113 result.push(b'\\');
114 result.push(b'0');
115 }
116
117 consumed = false;
118 }
119 _ => {
120 if quote_char == Some('\'') {
122 result.push(b'\\');
124 result.extend_from_slice(next_char.encode_utf8(&mut buf).as_bytes());
125 } else {
126 result.extend_from_slice(next_char.encode_utf8(&mut buf).as_bytes());
128 }
129 }
130 }
131
132 if consumed {
133 chars.next(); }
135 }
136
137 std::str::from_utf8(result.into_bump_slice()).ok()
138}
139
140#[inline]
156#[must_use]
157pub fn parse_literal_string(s: &str, quote_char: Option<char>, has_quote: bool) -> Option<String> {
158 if s.is_empty() {
159 return Some(String::new());
160 }
161
162 let (quote_char, content) = if let Some(quote_char) = quote_char {
163 (Some(quote_char), s)
164 } else if !has_quote {
165 (None, s)
166 } else if s.starts_with('"') && s.ends_with('"') && s.len() >= 2 {
167 (Some('"'), &s[1..s.len() - 1])
168 } else if s.starts_with('\'') && s.ends_with('\'') && s.len() >= 2 {
169 (Some('\''), &s[1..s.len() - 1])
170 } else {
171 return None;
172 };
173
174 let mut result = String::new();
175 let mut chars = content.chars().peekable();
176
177 while let Some(c) = chars.next() {
178 if c != '\\' {
179 result.push(c);
180
181 continue;
182 }
183
184 let Some(&next_char) = chars.peek() else {
185 result.push(c);
186
187 continue;
188 };
189
190 match next_char {
191 '\\' => {
192 result.push('\\');
193 chars.next();
194 }
195 '\'' if quote_char == Some('\'') => {
196 result.push('\'');
197 chars.next();
198 }
199 '"' if quote_char == Some('"') => {
200 result.push('"');
201 chars.next();
202 }
203 'n' if quote_char == Some('"') => {
204 result.push('\n');
205 chars.next();
206 }
207 't' if quote_char == Some('"') => {
208 result.push('\t');
209 chars.next();
210 }
211 'r' if quote_char == Some('"') => {
212 result.push('\r');
213 chars.next();
214 }
215 'v' if quote_char == Some('"') => {
216 result.push('\x0B');
217 chars.next();
218 }
219 'e' if quote_char == Some('"') => {
220 result.push('\x1B');
221 chars.next();
222 }
223 'f' if quote_char == Some('"') => {
224 result.push('\x0C');
225 chars.next();
226 }
227 '0' if quote_char == Some('"') => {
228 result.push('\0');
229 chars.next();
230 }
231 'x' if quote_char == Some('"') => {
232 chars.next();
233
234 let mut hex_chars = String::new();
235 for _ in 0..2 {
236 if let Some(&next) = chars.peek() {
237 if next.is_ascii_hexdigit() {
238 hex_chars.push(chars.next().unwrap());
239 } else {
240 break;
241 }
242 }
243 }
244
245 if hex_chars.is_empty() {
246 return None;
247 }
248 match u8::from_str_radix(&hex_chars, 16) {
249 Ok(byte_val) => result.push(byte_val as char),
250 Err(_) => {
251 return None;
252 }
253 }
254 }
255 c if quote_char == Some('"') && c.is_ascii_digit() => {
256 let mut octal = String::new();
257 octal.push(chars.next().unwrap());
258
259 for _ in 0..2 {
260 if let Some(&next) = chars.peek() {
261 if next.is_ascii_digit() && next <= '7' {
262 octal.push(chars.next().unwrap());
263 } else {
264 break;
265 }
266 }
267 }
268
269 result.push(u8::from_str_radix(&octal, 8).ok()? as char);
270 }
271 '$' if quote_char == Some('"') => {
272 result.push('$');
273 chars.next();
274 }
275 _ => {
276 if quote_char == Some('\'') {
277 result.push(c);
278 result.push(next_char);
279 chars.next();
280 } else {
281 result.push(c);
282 }
283 }
284 }
285 }
286
287 Some(result)
288}
289
290#[inline]
291#[must_use]
292pub fn parse_literal_float(value: &str) -> Option<f64> {
293 let source = value.replace('_', "");
294
295 source.parse::<f64>().ok()
296}
297
298#[inline]
299#[must_use]
300pub fn parse_literal_integer(value: &str) -> Option<u64> {
301 if value.is_empty() {
302 return None;
303 }
304
305 let mut s = value;
306 let radix = if s.starts_with("0x") || s.starts_with("0X") {
307 s = &s[2..];
308 16
309 } else if s.starts_with("0o") || s.starts_with("0O") {
310 s = &s[2..];
311 8
312 } else if s.starts_with("0b") || s.starts_with("0B") {
313 s = &s[2..];
314 2
315 } else if s.starts_with('0') && s.len() > 1 {
316 s = &s[1..];
317 8
318 } else {
319 10
320 };
321
322 let mut result: u128 = 0;
323 let mut has_digits = false;
324
325 for c in s.chars() {
326 if c == '_' {
327 continue;
328 }
329
330 let digit = match c.to_digit(radix) {
331 Some(d) => u128::from(d),
332 None => return None,
333 };
334
335 has_digits = true;
336
337 result = match result.checked_mul(u128::from(radix)) {
338 Some(r) => r,
339 None => return Some(u64::MAX),
340 };
341 result = match result.checked_add(digit) {
342 Some(r) => r,
343 None => return Some(u64::MAX),
344 };
345 }
346
347 if !has_digits {
348 return None;
349 }
350
351 Some(if result > u128::from(u64::MAX) { u64::MAX } else { result as u64 })
353}
354
355#[inline]
356#[must_use]
357pub fn is_start_of_identifier(byte: &u8) -> bool {
358 byte.is_ascii_lowercase() || byte.is_ascii_uppercase() || (*byte == b'_')
359}
360
361#[inline]
362#[must_use]
363pub fn is_part_of_identifier(byte: &u8) -> bool {
364 byte.is_ascii_digit()
365 || byte.is_ascii_lowercase()
366 || byte.is_ascii_uppercase()
367 || (*byte == b'_')
368 || (*byte >= 0x80)
369}
370
371#[inline]
396pub fn read_digits_of_base(input: &Input, offset: usize, base: u8) -> usize {
397 if base == 16 {
398 read_digits_with(input, offset, u8::is_ascii_hexdigit)
399 } else {
400 let max = b'0' + base;
401
402 read_digits_with(input, offset, |b| b >= &b'0' && b < &max)
403 }
404}
405
406#[inline]
407fn read_digits_with<F: Fn(&u8) -> bool>(input: &Input, offset: usize, is_digit: F) -> usize {
408 let bytes = input.bytes;
409 let total = input.length;
410 let start = input.offset;
411 let mut pos = start + offset; while pos < total {
414 let current = bytes[pos];
415 if is_digit(¤t) {
416 pos += 1;
417 } else if pos + 1 < total && bytes[pos] == number_separator!() && is_digit(&bytes[pos + 1]) {
418 pos += 2; } else {
420 break;
421 }
422 }
423
424 pos - start
426}
427
428#[cfg(test)]
429mod tests {
430 use super::*;
431
432 macro_rules! parse_int {
433 ($input:expr, $expected:expr) => {
434 assert_eq!(parse_literal_integer($input), $expected);
435 };
436 }
437
438 #[test]
439 fn test_parse_literal_integer() {
440 parse_int!("123", Some(123));
441 parse_int!("0", Some(0));
442 parse_int!("0b1010", Some(10));
443 parse_int!("0o17", Some(15));
444 parse_int!("0x1A3F", Some(6719));
445 parse_int!("0XFF", Some(255));
446 parse_int!("0_1_2_3", Some(83));
447 parse_int!("0b1_0_1_0", Some(10));
448 parse_int!("0o1_7", Some(15));
449 parse_int!("0x1_A_3_F", Some(6719));
450 parse_int!("", None);
451 parse_int!("0xGHI", None);
452 parse_int!("0b102", None);
453 parse_int!("0o89", None);
454 }
455}