1use bumpalo::Bump;
2use bumpalo::collections::Vec;
3
4use crate::input::Input;
5use crate::number_separator;
6
7pub fn parse_literal_string_in<'arena>(
18 arena: &'arena Bump,
19 s: &'arena str,
20 quote_char: Option<char>,
21 has_quote: bool,
22) -> Option<&'arena str> {
23 if s.is_empty() {
24 return Some("");
25 }
26
27 let s = if has_quote && (s.starts_with("b\"") || s.starts_with("b'") || s.starts_with("B\"") || s.starts_with("B'"))
28 {
29 &s[1..]
30 } else {
31 s
32 };
33
34 let (quote_char, content) = if let Some(quote_char) = quote_char {
35 (Some(quote_char), s)
36 } else if !has_quote {
37 (None, s)
38 } else if s.starts_with('"') && s.ends_with('"') && s.len() >= 2 {
39 (Some('"'), &s[1..s.len() - 1])
40 } else if s.starts_with('\'') && s.ends_with('\'') && s.len() >= 2 {
41 (Some('\''), &s[1..s.len() - 1])
42 } else {
43 return None;
44 };
45
46 let needs_processing = content.contains('\\') || quote_char.is_some_and(|q| content.contains(q));
47 if !needs_processing {
48 return Some(content);
49 }
50
51 let mut result = Vec::with_capacity_in(content.len(), arena);
52 let mut chars = content.chars().peekable();
53 let mut buf = [0; 4];
54
55 while let Some(c) = chars.next() {
56 if c != '\\' {
57 result.extend_from_slice(c.encode_utf8(&mut buf).as_bytes());
58 continue;
59 }
60
61 let Some(&next_char) = chars.peek() else {
62 result.push(b'\\');
63 continue;
64 };
65
66 let mut consumed = true;
67
68 match next_char {
69 '\\' => result.push(b'\\'),
70 '\'' if quote_char == Some('\'') => result.push(b'\''),
71 '"' if quote_char == Some('"') => result.push(b'"'),
72 '$' if quote_char == Some('"') => result.push(b'$'),
73 'n' if quote_char == Some('"') => result.push(b'\n'),
74 't' if quote_char == Some('"') => result.push(b'\t'),
75 'r' if quote_char == Some('"') => result.push(b'\r'),
76 'v' if quote_char == Some('"') => result.push(0x0B),
77 'e' if quote_char == Some('"') => result.push(0x1B),
78 'f' if quote_char == Some('"') => result.push(0x0C),
79 'x' if quote_char == Some('"') => {
80 chars.next(); let mut hex_val = 0u8;
82 let mut hex_len = 0;
83 while let Some(peeked) = chars.peek() {
85 if hex_len < 2 && peeked.is_ascii_hexdigit() {
86 hex_val = hex_val * 16 + peeked.to_digit(16).unwrap() as u8;
87 hex_len += 1;
88 chars.next(); } else {
90 break;
91 }
92 }
93 if hex_len > 0 {
94 result.push(hex_val);
95 } else {
96 result.push(b'\\');
98 result.push(b'x');
99 }
100
101 consumed = false;
102 }
103 c if quote_char == Some('"') && c.is_ascii_digit() => {
104 let mut octal_val = 0u16;
105 let mut octal_len = 0;
106
107 while let Some(peeked) = chars.peek() {
108 if octal_len < 3 && peeked.is_ascii_digit() && *peeked <= '7' {
109 octal_val = octal_val * 8 + peeked.to_digit(8).unwrap() as u16;
110 octal_len += 1;
111 chars.next(); } else {
113 break;
114 }
115 }
116 if octal_len > 0 {
117 result.push(octal_val as u8);
119 } else {
120 result.push(b'\\');
121 result.extend_from_slice(next_char.encode_utf8(&mut buf).as_bytes());
122 chars.next();
123 }
124
125 consumed = false;
126 }
127 _ => {
128 result.push(b'\\');
130 result.extend_from_slice(next_char.encode_utf8(&mut buf).as_bytes());
131 }
132 }
133
134 if consumed {
135 chars.next(); }
137 }
138
139 std::str::from_utf8(result.into_bump_slice()).ok()
140}
141
142#[inline]
158#[must_use]
159pub fn parse_literal_string(s: &str, quote_char: Option<char>, has_quote: bool) -> Option<String> {
160 if s.is_empty() {
161 return Some(String::new());
162 }
163
164 let (quote_char, content) = if let Some(quote_char) = quote_char {
165 (Some(quote_char), s)
166 } else if !has_quote {
167 (None, s)
168 } else if s.starts_with('"') && s.ends_with('"') && s.len() >= 2 {
169 (Some('"'), &s[1..s.len() - 1])
170 } else if s.starts_with('\'') && s.ends_with('\'') && s.len() >= 2 {
171 (Some('\''), &s[1..s.len() - 1])
172 } else {
173 return None;
174 };
175
176 let mut result = String::new();
177 let mut chars = content.chars().peekable();
178
179 while let Some(c) = chars.next() {
180 if c != '\\' {
181 result.push(c);
182
183 continue;
184 }
185
186 let Some(&next_char) = chars.peek() else {
187 result.push(c);
188
189 continue;
190 };
191
192 match next_char {
193 '\\' => {
194 result.push('\\');
195 chars.next();
196 }
197 '\'' if quote_char == Some('\'') => {
198 result.push('\'');
199 chars.next();
200 }
201 '"' if quote_char == Some('"') => {
202 result.push('"');
203 chars.next();
204 }
205 'n' if quote_char == Some('"') => {
206 result.push('\n');
207 chars.next();
208 }
209 't' if quote_char == Some('"') => {
210 result.push('\t');
211 chars.next();
212 }
213 'r' if quote_char == Some('"') => {
214 result.push('\r');
215 chars.next();
216 }
217 'v' if quote_char == Some('"') => {
218 result.push('\x0B');
219 chars.next();
220 }
221 'e' if quote_char == Some('"') => {
222 result.push('\x1B');
223 chars.next();
224 }
225 'f' if quote_char == Some('"') => {
226 result.push('\x0C');
227 chars.next();
228 }
229 'x' if quote_char == Some('"') => {
230 chars.next();
231
232 let mut hex_chars = String::new();
233 for _ in 0..2 {
234 if let Some(&next) = chars.peek() {
235 if next.is_ascii_hexdigit() {
236 hex_chars.push(chars.next().unwrap());
237 } else {
238 break;
239 }
240 }
241 }
242
243 if hex_chars.is_empty() {
244 return None;
245 }
246 match u8::from_str_radix(&hex_chars, 16) {
247 Ok(byte_val) => result.push(byte_val as char),
248 Err(_) => {
249 return None;
250 }
251 }
252 }
253 c if quote_char == Some('"') && c.is_ascii_digit() => {
254 let mut octal = String::new();
255 octal.push(chars.next().unwrap());
256
257 for _ in 0..2 {
258 if let Some(&next) = chars.peek() {
259 if next.is_ascii_digit() && next <= '7' {
260 octal.push(chars.next().unwrap());
261 } else {
262 break;
263 }
264 }
265 }
266
267 match u8::from_str_radix(&octal, 8) {
268 Ok(val) => result.push(val as char),
269 Err(_) => {
270 result.push('\\');
271 result.push_str(&octal);
272 }
273 }
274 }
275 '$' if quote_char == Some('"') => {
276 result.push('$');
277 chars.next();
278 }
279 _ => {
280 result.push(c);
281 result.push(next_char);
282 chars.next();
283 }
284 }
285 }
286
287 Some(result)
288}
289
290#[inline]
292#[must_use]
293pub fn parse_literal_float(value: &str) -> Option<f64> {
294 if memchr::memchr(b'_', value.as_bytes()).is_none() {
295 return value.parse::<f64>().ok();
296 }
297
298 let mut buf = [0u8; 64];
299 let mut len = 0;
300
301 for &b in value.as_bytes() {
302 if b != b'_' {
303 if len < 64 {
304 buf[len] = b;
305 len += 1;
306 } else {
307 let source = value.replace('_', "");
308 return source.parse::<f64>().ok();
309 }
310 }
311 }
312
313 let s = unsafe { std::str::from_utf8_unchecked(&buf[..len]) };
315 s.parse::<f64>().ok()
316}
317
318#[inline]
322#[must_use]
323pub fn parse_literal_integer(value: &str) -> Option<u64> {
324 let bytes = value.as_bytes();
325 if bytes.is_empty() {
326 return None;
327 }
328
329 let (radix, start) = match bytes {
330 [b'0', b'x' | b'X', ..] => (16u128, 2),
331 [b'0', b'o' | b'O', ..] => (8u128, 2),
332 [b'0', b'b' | b'B', ..] => (2u128, 2),
333 [b'0', _, ..] if bytes[1..].iter().all(|&b| b == b'_' || (b'0'..=b'7').contains(&b)) => (8u128, 1), [b'0', _, ..] => (10u128, 0), _ => (10u128, 0),
336 };
337
338 let mut result: u128 = 0;
339 let mut has_digits = false;
340
341 for &b in &bytes[start..] {
342 if b == b'_' {
343 continue;
344 }
345
346 let digit = if b.is_ascii_digit() {
347 (b - b'0') as u128
348 } else if (b'a'..=b'f').contains(&b) {
349 (b - b'a' + 10) as u128
350 } else if (b'A'..=b'F').contains(&b) {
351 (b - b'A' + 10) as u128
352 } else {
353 return None;
354 };
355
356 if digit >= radix {
357 return None;
358 }
359
360 has_digits = true;
361
362 result = match result.checked_mul(radix) {
363 Some(r) => r,
364 None => return Some(u64::MAX),
365 };
366
367 result = match result.checked_add(digit) {
368 Some(r) => r,
369 None => return Some(u64::MAX),
370 };
371 }
372
373 if !has_digits {
374 return None;
375 }
376
377 Some(result.min(u64::MAX as u128) as u64)
378}
379
380static IS_IDENT_START: [bool; 256] = {
383 let mut table = [false; 256];
384 let mut i = 0u8;
385 loop {
386 table[i as usize] = matches!(i, b'a'..=b'z' | b'A'..=b'Z' | b'_');
387 if i == 255 {
388 break;
389 }
390 i += 1;
391 }
392
393 table
394};
395
396static IS_IDENT_PART: [bool; 256] = {
399 let mut table = [false; 256];
400 let mut i = 0u8;
401 loop {
402 table[i as usize] = matches!(i, b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | b'_' | 0x80..=0xFF);
403 if i == 255 {
404 break;
405 }
406 i += 1;
407 }
408 table
409};
410
411#[inline(always)]
413#[must_use]
414pub const fn is_start_of_identifier(byte: &u8) -> bool {
415 IS_IDENT_START[*byte as usize]
416}
417
418#[inline(always)]
420#[must_use]
421pub const fn is_part_of_identifier(byte: &u8) -> bool {
422 IS_IDENT_PART[*byte as usize]
423}
424
425#[inline(always)]
431#[must_use]
432pub fn scan_identifier_length(bytes: &[u8], offset: usize) -> usize {
433 let mut len = 1;
434 let remaining = &bytes[offset + 1..];
435
436 for &b in remaining {
437 if IS_IDENT_PART[b as usize] {
438 len += 1;
439 } else {
440 break;
441 }
442 }
443
444 len
445}
446
447#[inline]
472pub fn read_digits_of_base(input: &Input, offset: usize, base: u8) -> usize {
473 if base == 16 {
474 read_digits_with(input, offset, u8::is_ascii_hexdigit)
475 } else {
476 let max = b'0' + base;
477
478 read_digits_with(input, offset, |b| b >= &b'0' && b < &max)
479 }
480}
481
482#[inline]
483fn read_digits_with<F: Fn(&u8) -> bool>(input: &Input, offset: usize, is_digit: F) -> usize {
484 let bytes = input.bytes;
485 let total = input.length;
486 let start = input.offset;
487 let mut pos = start + offset; while pos < total {
490 let current = bytes[pos];
491 if is_digit(¤t) {
492 pos += 1;
493 } else if pos + 1 < total && bytes[pos] == number_separator!() && is_digit(&bytes[pos + 1]) {
494 pos += 2; } else {
496 break;
497 }
498 }
499
500 pos - start
502}
503
504#[cfg(test)]
505mod tests {
506 use super::*;
507
508 macro_rules! parse_int {
509 ($input:expr, $expected:expr) => {
510 assert_eq!(parse_literal_integer($input), $expected);
511 };
512 }
513
514 #[test]
515 fn test_parse_literal_integer() {
516 parse_int!("123", Some(123));
517 parse_int!("0", Some(0));
518 parse_int!("0b1010", Some(10));
519 parse_int!("0o17", Some(15));
520 parse_int!("0x1A3F", Some(6719));
521 parse_int!("0XFF", Some(255));
522 parse_int!("0_1_2_3", Some(83));
523 parse_int!("0b1_0_1_0", Some(10));
524 parse_int!("0o1_7", Some(15));
525 parse_int!("0x1_A_3_F", Some(6719));
526 parse_int!("", None);
527 parse_int!("0xGHI", None);
528 parse_int!("0b102", None);
529 parse_int!("0o89", None);
530 }
531}