1use bumpalo::Bump;
2use bumpalo::collections::Vec;
3
4use crate::input::Input;
5use crate::number_separator;
6
7pub fn parse_literal_string_in<'arena>(
18 arena: &'arena Bump,
19 s: &'arena str,
20 quote_char: Option<char>,
21 has_quote: bool,
22) -> Option<&'arena str> {
23 if s.is_empty() {
24 return Some("");
25 }
26
27 let s = if has_quote && (s.starts_with("b\"") || s.starts_with("b'") || s.starts_with("B\"") || s.starts_with("B'"))
28 {
29 &s[1..]
30 } else {
31 s
32 };
33
34 let (quote_char, content) = if let Some(quote_char) = quote_char {
35 (Some(quote_char), s)
36 } else if !has_quote {
37 (None, s)
38 } else if s.starts_with('"') && s.ends_with('"') && s.len() >= 2 {
39 (Some('"'), &s[1..s.len() - 1])
40 } else if s.starts_with('\'') && s.ends_with('\'') && s.len() >= 2 {
41 (Some('\''), &s[1..s.len() - 1])
42 } else {
43 return None;
44 };
45
46 let needs_processing = content.contains('\\') || quote_char.is_some_and(|q| content.contains(q));
47 if !needs_processing {
48 return Some(content);
49 }
50
51 let mut result = Vec::with_capacity_in(content.len(), arena);
52 let mut chars = content.chars().peekable();
53 let mut buf = [0; 4];
54
55 while let Some(c) = chars.next() {
56 if c != '\\' {
57 result.extend_from_slice(c.encode_utf8(&mut buf).as_bytes());
58 continue;
59 }
60
61 let Some(&next_char) = chars.peek() else {
62 result.push(b'\\');
63 continue;
64 };
65
66 let mut consumed = true;
67
68 match next_char {
69 '\\' => result.push(b'\\'),
70 '\'' if quote_char == Some('\'') => result.push(b'\''),
71 '"' if quote_char == Some('"') => result.push(b'"'),
72 '$' if quote_char == Some('"') => result.push(b'$'),
73 'n' if quote_char == Some('"') => result.push(b'\n'),
74 't' if quote_char == Some('"') => result.push(b'\t'),
75 'r' if quote_char == Some('"') => result.push(b'\r'),
76 'v' if quote_char == Some('"') => result.push(0x0B),
77 'e' if quote_char == Some('"') => result.push(0x1B),
78 'f' if quote_char == Some('"') => result.push(0x0C),
79 'x' if quote_char == Some('"') => {
80 chars.next(); let mut hex_val = 0u8;
82 let mut hex_len = 0;
83 while let Some(peeked) = chars.peek() {
85 if hex_len < 2
86 && peeked.is_ascii_hexdigit()
87 && let Some(digit) = peeked.to_digit(16)
88 {
89 hex_val = hex_val * 16 + digit as u8;
90 hex_len += 1;
91 chars.next(); } else {
93 break;
94 }
95 }
96 if hex_len > 0 {
97 result.push(hex_val);
98 } else {
99 result.push(b'\\');
101 result.push(b'x');
102 }
103
104 consumed = false;
105 }
106 c if quote_char == Some('"') && c.is_ascii_digit() => {
107 let mut octal_val = 0u16;
108 let mut octal_len = 0;
109
110 while let Some(peeked) = chars.peek() {
111 if octal_len < 3
112 && peeked.is_ascii_digit()
113 && *peeked <= '7'
114 && let Some(digit) = peeked.to_digit(8)
115 {
116 octal_val = octal_val * 8 + digit as u16;
117 octal_len += 1;
118 chars.next(); } else {
120 break;
121 }
122 }
123 if octal_len > 0 {
124 result.push(octal_val as u8);
126 } else {
127 result.push(b'\\');
128 result.extend_from_slice(next_char.encode_utf8(&mut buf).as_bytes());
129 chars.next();
130 }
131
132 consumed = false;
133 }
134 _ => {
135 result.push(b'\\');
137 result.extend_from_slice(next_char.encode_utf8(&mut buf).as_bytes());
138 }
139 }
140
141 if consumed {
142 chars.next(); }
144 }
145
146 std::str::from_utf8(result.into_bump_slice()).ok()
147}
148
149#[inline]
165#[must_use]
166pub fn parse_literal_string(s: &str, quote_char: Option<char>, has_quote: bool) -> Option<String> {
167 if s.is_empty() {
168 return Some(String::new());
169 }
170
171 let (quote_char, content) = if let Some(quote_char) = quote_char {
172 (Some(quote_char), s)
173 } else if !has_quote {
174 (None, s)
175 } else if s.starts_with('"') && s.ends_with('"') && s.len() >= 2 {
176 (Some('"'), &s[1..s.len() - 1])
177 } else if s.starts_with('\'') && s.ends_with('\'') && s.len() >= 2 {
178 (Some('\''), &s[1..s.len() - 1])
179 } else {
180 return None;
181 };
182
183 let mut result = String::new();
184 let mut chars = content.chars().peekable();
185
186 while let Some(c) = chars.next() {
187 if c != '\\' {
188 result.push(c);
189
190 continue;
191 }
192
193 let Some(&next_char) = chars.peek() else {
194 result.push(c);
195
196 continue;
197 };
198
199 match next_char {
200 '\\' => {
201 result.push('\\');
202 chars.next();
203 }
204 '\'' if quote_char == Some('\'') => {
205 result.push('\'');
206 chars.next();
207 }
208 '"' if quote_char == Some('"') => {
209 result.push('"');
210 chars.next();
211 }
212 'n' if quote_char == Some('"') => {
213 result.push('\n');
214 chars.next();
215 }
216 't' if quote_char == Some('"') => {
217 result.push('\t');
218 chars.next();
219 }
220 'r' if quote_char == Some('"') => {
221 result.push('\r');
222 chars.next();
223 }
224 'v' if quote_char == Some('"') => {
225 result.push('\x0B');
226 chars.next();
227 }
228 'e' if quote_char == Some('"') => {
229 result.push('\x1B');
230 chars.next();
231 }
232 'f' if quote_char == Some('"') => {
233 result.push('\x0C');
234 chars.next();
235 }
236 'x' if quote_char == Some('"') => {
237 chars.next();
238
239 let mut hex_chars = String::new();
240 for _ in 0..2 {
241 if let Some(&next) = chars.peek() {
242 if next.is_ascii_hexdigit() {
243 if let Some(c) = chars.next() {
244 hex_chars.push(c);
245 }
246 } else {
247 break;
248 }
249 }
250 }
251
252 if hex_chars.is_empty() {
253 return None;
254 }
255 match u8::from_str_radix(&hex_chars, 16) {
256 Ok(byte_val) => result.push(byte_val as char),
257 Err(_) => {
258 return None;
259 }
260 }
261 }
262 c if quote_char == Some('"') && c.is_ascii_digit() => {
263 let mut octal = String::new();
264 if let Some(first) = chars.next() {
265 octal.push(first);
266 }
267
268 for _ in 0..2 {
269 if let Some(&next) = chars.peek() {
270 if next.is_ascii_digit() && next <= '7' {
271 if let Some(c) = chars.next() {
272 octal.push(c);
273 }
274 } else {
275 break;
276 }
277 }
278 }
279
280 match u8::from_str_radix(&octal, 8) {
281 Ok(val) => result.push(val as char),
282 Err(_) => {
283 result.push('\\');
284 result.push_str(&octal);
285 }
286 }
287 }
288 '$' if quote_char == Some('"') => {
289 result.push('$');
290 chars.next();
291 }
292 _ => {
293 result.push(c);
294 result.push(next_char);
295 chars.next();
296 }
297 }
298 }
299
300 Some(result)
301}
302
303#[inline]
305#[must_use]
306pub fn parse_literal_float(value: &str) -> Option<f64> {
307 if memchr::memchr(b'_', value.as_bytes()).is_none() {
308 return value.parse::<f64>().ok();
309 }
310
311 let mut buf = [0u8; 64];
312 let mut len = 0;
313
314 for &b in value.as_bytes() {
315 if b != b'_' {
316 if len < 64 {
317 buf[len] = b;
318 len += 1;
319 } else {
320 let source = value.replace('_', "");
321 return source.parse::<f64>().ok();
322 }
323 }
324 }
325
326 let s = unsafe { std::str::from_utf8_unchecked(&buf[..len]) };
328 s.parse::<f64>().ok()
329}
330
331#[inline]
335#[must_use]
336pub fn parse_literal_integer(value: &str) -> Option<u64> {
337 let bytes = value.as_bytes();
338 if bytes.is_empty() {
339 return None;
340 }
341
342 let (radix, start) = match bytes {
343 [b'0', b'x' | b'X', ..] => (16u128, 2),
344 [b'0', b'o' | b'O', ..] => (8u128, 2),
345 [b'0', b'b' | b'B', ..] => (2u128, 2),
346 [b'0', _, ..] if bytes[1..].iter().all(|&b| b == b'_' || (b'0'..=b'7').contains(&b)) => (8u128, 1), [b'0', _, ..] => (10u128, 0), _ => (10u128, 0),
349 };
350
351 let mut result: u128 = 0;
352 let mut has_digits = false;
353
354 for &b in &bytes[start..] {
355 if b == b'_' {
356 continue;
357 }
358
359 let digit = if b.is_ascii_digit() {
360 (b - b'0') as u128
361 } else if (b'a'..=b'f').contains(&b) {
362 (b - b'a' + 10) as u128
363 } else if (b'A'..=b'F').contains(&b) {
364 (b - b'A' + 10) as u128
365 } else {
366 return None;
367 };
368
369 if digit >= radix {
370 return None;
371 }
372
373 has_digits = true;
374
375 result = match result.checked_mul(radix) {
376 Some(r) => r,
377 None => return Some(u64::MAX),
378 };
379
380 result = match result.checked_add(digit) {
381 Some(r) => r,
382 None => return Some(u64::MAX),
383 };
384 }
385
386 if !has_digits {
387 return None;
388 }
389
390 Some(result.min(u64::MAX as u128) as u64)
391}
392
393static IS_IDENT_START: [bool; 256] = {
396 let mut table = [false; 256];
397 let mut i = 0u8;
398 loop {
399 table[i as usize] = matches!(i, b'a'..=b'z' | b'A'..=b'Z' | b'_');
400 if i == 255 {
401 break;
402 }
403 i += 1;
404 }
405
406 table
407};
408
409static IS_IDENT_PART: [bool; 256] = {
412 let mut table = [false; 256];
413 let mut i = 0u8;
414 loop {
415 table[i as usize] = matches!(i, b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | b'_' | 0x80..=0xFF);
416 if i == 255 {
417 break;
418 }
419 i += 1;
420 }
421 table
422};
423
424#[inline(always)]
426#[must_use]
427pub const fn is_start_of_identifier(byte: &u8) -> bool {
428 IS_IDENT_START[*byte as usize]
429}
430
431#[inline(always)]
433#[must_use]
434pub const fn is_part_of_identifier(byte: &u8) -> bool {
435 IS_IDENT_PART[*byte as usize]
436}
437
438#[inline(always)]
444#[must_use]
445pub fn scan_identifier_length(bytes: &[u8], offset: usize) -> usize {
446 let mut len = 1;
447 let remaining = &bytes[offset + 1..];
448
449 for &b in remaining {
450 if IS_IDENT_PART[b as usize] {
451 len += 1;
452 } else {
453 break;
454 }
455 }
456
457 len
458}
459
460#[inline]
485pub fn read_digits_of_base(input: &Input, offset: usize, base: u8) -> usize {
486 if base == 16 {
487 read_digits_with(input, offset, u8::is_ascii_hexdigit)
488 } else {
489 let max = b'0' + base;
490
491 read_digits_with(input, offset, |b| b >= &b'0' && b < &max)
492 }
493}
494
495#[inline]
496fn read_digits_with<F: Fn(&u8) -> bool>(input: &Input, offset: usize, is_digit: F) -> usize {
497 let bytes = input.bytes;
498 let total = input.length;
499 let start = input.offset;
500 let mut pos = start + offset; while pos < total {
503 let current = bytes[pos];
504 if is_digit(¤t) {
505 pos += 1;
506 } else if pos + 1 < total && bytes[pos] == number_separator!() && is_digit(&bytes[pos + 1]) {
507 pos += 2; } else {
509 break;
510 }
511 }
512
513 pos - start
515}
516
517#[cfg(test)]
518mod tests {
519 use super::*;
520
521 macro_rules! parse_int {
522 ($input:expr, $expected:expr) => {
523 assert_eq!(parse_literal_integer($input), $expected);
524 };
525 }
526
527 #[test]
528 fn test_parse_literal_integer() {
529 parse_int!("123", Some(123));
530 parse_int!("0", Some(0));
531 parse_int!("0b1010", Some(10));
532 parse_int!("0o17", Some(15));
533 parse_int!("0x1A3F", Some(6719));
534 parse_int!("0XFF", Some(255));
535 parse_int!("0_1_2_3", Some(83));
536 parse_int!("0b1_0_1_0", Some(10));
537 parse_int!("0o1_7", Some(15));
538 parse_int!("0x1_A_3_F", Some(6719));
539 parse_int!("", None);
540 parse_int!("0xGHI", None);
541 parse_int!("0b102", None);
542 parse_int!("0o89", None);
543 }
544}