1use bumpalo::Bump;
2use bumpalo::collections::Vec;
3
4use crate::input::Input;
5use crate::number_separator;
6
7pub fn parse_literal_string_in<'arena>(
18 arena: &'arena Bump,
19 s: &'arena str,
20 quote_char: Option<char>,
21 has_quote: bool,
22) -> Option<&'arena str> {
23 if s.is_empty() {
24 return Some("");
25 }
26
27 let s = if has_quote && (s.starts_with("b\"") || s.starts_with("b'") || s.starts_with("B\"") || s.starts_with("B'"))
28 {
29 &s[1..]
30 } else {
31 s
32 };
33
34 let (quote_char, content) = if let Some(quote_char) = quote_char {
35 (Some(quote_char), s)
36 } else if !has_quote {
37 (None, s)
38 } else if s.starts_with('"') && s.ends_with('"') && s.len() >= 2 {
39 (Some('"'), &s[1..s.len() - 1])
40 } else if s.starts_with('\'') && s.ends_with('\'') && s.len() >= 2 {
41 (Some('\''), &s[1..s.len() - 1])
42 } else {
43 return None;
44 };
45
46 let needs_processing = content.contains('\\') || quote_char.is_some_and(|q| content.contains(q));
47 if !needs_processing {
48 return Some(content);
49 }
50
51 let mut result = Vec::with_capacity_in(content.len(), arena);
52 let mut chars = content.chars().peekable();
53 let mut buf = [0; 4];
54
55 while let Some(c) = chars.next() {
56 if c != '\\' {
57 result.extend_from_slice(c.encode_utf8(&mut buf).as_bytes());
58 continue;
59 }
60
61 let Some(&next_char) = chars.peek() else {
62 result.push(b'\\');
63 continue;
64 };
65
66 let mut consumed = true;
67
68 match next_char {
69 '\\' => result.push(b'\\'),
70 '\'' if quote_char == Some('\'') => result.push(b'\''),
71 '"' if quote_char == Some('"') => result.push(b'"'),
72 '$' if quote_char == Some('"') => result.push(b'$'),
73 'n' if quote_char == Some('"') => result.push(b'\n'),
74 't' if quote_char == Some('"') => result.push(b'\t'),
75 'r' if quote_char == Some('"') => result.push(b'\r'),
76 'v' if quote_char == Some('"') => result.push(0x0B),
77 'e' if quote_char == Some('"') => result.push(0x1B),
78 'f' if quote_char == Some('"') => result.push(0x0C),
79 '0' if quote_char == Some('"') => result.push(0x00),
80 'x' if quote_char == Some('"') => {
81 chars.next(); let mut hex_val = 0u8;
83 let mut hex_len = 0;
84 while let Some(peeked) = chars.peek() {
86 if hex_len < 2 && peeked.is_ascii_hexdigit() {
87 hex_val = hex_val * 16 + peeked.to_digit(16).unwrap() as u8;
88 hex_len += 1;
89 chars.next(); } else {
91 break;
92 }
93 }
94 if hex_len > 0 {
95 result.push(hex_val);
96 } else {
97 result.push(b'\\');
99 result.push(b'x');
100 }
101
102 consumed = false;
103 }
104 c if quote_char == Some('"') && c.is_ascii_digit() => {
105 let mut octal_val = 0u16;
106 let mut octal_len = 0;
107
108 while let Some(peeked) = chars.peek() {
109 if octal_len < 3 && peeked.is_ascii_digit() && *peeked <= '7' {
110 octal_val = octal_val * 8 + peeked.to_digit(8).unwrap() as u16;
111 octal_len += 1;
112 chars.next(); } else {
114 break;
115 }
116 }
117 if octal_len > 0 {
118 result.push(octal_val as u8);
120 } else {
121 result.push(b'\\');
122 result.push(b'0');
123 }
124
125 consumed = false;
126 }
127 _ => {
128 if quote_char == Some('\'') {
130 result.push(b'\\');
132 result.extend_from_slice(next_char.encode_utf8(&mut buf).as_bytes());
133 } else {
134 result.extend_from_slice(next_char.encode_utf8(&mut buf).as_bytes());
136 }
137 }
138 }
139
140 if consumed {
141 chars.next(); }
143 }
144
145 std::str::from_utf8(result.into_bump_slice()).ok()
146}
147
148#[inline]
164#[must_use]
165pub fn parse_literal_string(s: &str, quote_char: Option<char>, has_quote: bool) -> Option<String> {
166 if s.is_empty() {
167 return Some(String::new());
168 }
169
170 let (quote_char, content) = if let Some(quote_char) = quote_char {
171 (Some(quote_char), s)
172 } else if !has_quote {
173 (None, s)
174 } else if s.starts_with('"') && s.ends_with('"') && s.len() >= 2 {
175 (Some('"'), &s[1..s.len() - 1])
176 } else if s.starts_with('\'') && s.ends_with('\'') && s.len() >= 2 {
177 (Some('\''), &s[1..s.len() - 1])
178 } else {
179 return None;
180 };
181
182 let mut result = String::new();
183 let mut chars = content.chars().peekable();
184
185 while let Some(c) = chars.next() {
186 if c != '\\' {
187 result.push(c);
188
189 continue;
190 }
191
192 let Some(&next_char) = chars.peek() else {
193 result.push(c);
194
195 continue;
196 };
197
198 match next_char {
199 '\\' => {
200 result.push('\\');
201 chars.next();
202 }
203 '\'' if quote_char == Some('\'') => {
204 result.push('\'');
205 chars.next();
206 }
207 '"' if quote_char == Some('"') => {
208 result.push('"');
209 chars.next();
210 }
211 'n' if quote_char == Some('"') => {
212 result.push('\n');
213 chars.next();
214 }
215 't' if quote_char == Some('"') => {
216 result.push('\t');
217 chars.next();
218 }
219 'r' if quote_char == Some('"') => {
220 result.push('\r');
221 chars.next();
222 }
223 'v' if quote_char == Some('"') => {
224 result.push('\x0B');
225 chars.next();
226 }
227 'e' if quote_char == Some('"') => {
228 result.push('\x1B');
229 chars.next();
230 }
231 'f' if quote_char == Some('"') => {
232 result.push('\x0C');
233 chars.next();
234 }
235 '0' if quote_char == Some('"') => {
236 result.push('\0');
237 chars.next();
238 }
239 'x' if quote_char == Some('"') => {
240 chars.next();
241
242 let mut hex_chars = String::new();
243 for _ in 0..2 {
244 if let Some(&next) = chars.peek() {
245 if next.is_ascii_hexdigit() {
246 hex_chars.push(chars.next().unwrap());
247 } else {
248 break;
249 }
250 }
251 }
252
253 if hex_chars.is_empty() {
254 return None;
255 }
256 match u8::from_str_radix(&hex_chars, 16) {
257 Ok(byte_val) => result.push(byte_val as char),
258 Err(_) => {
259 return None;
260 }
261 }
262 }
263 c if quote_char == Some('"') && c.is_ascii_digit() => {
264 let mut octal = String::new();
265 octal.push(chars.next().unwrap());
266
267 for _ in 0..2 {
268 if let Some(&next) = chars.peek() {
269 if next.is_ascii_digit() && next <= '7' {
270 octal.push(chars.next().unwrap());
271 } else {
272 break;
273 }
274 }
275 }
276
277 result.push(u8::from_str_radix(&octal, 8).ok()? as char);
278 }
279 '$' if quote_char == Some('"') => {
280 result.push('$');
281 chars.next();
282 }
283 _ => {
284 if quote_char == Some('\'') {
285 result.push(c);
286 result.push(next_char);
287 chars.next();
288 } else {
289 result.push(c);
290 }
291 }
292 }
293 }
294
295 Some(result)
296}
297
298#[inline]
300#[must_use]
301pub fn parse_literal_float(value: &str) -> Option<f64> {
302 if memchr::memchr(b'_', value.as_bytes()).is_none() {
303 return value.parse::<f64>().ok();
304 }
305
306 let mut buf = [0u8; 64];
307 let mut len = 0;
308
309 for &b in value.as_bytes() {
310 if b != b'_' {
311 if len < 64 {
312 buf[len] = b;
313 len += 1;
314 } else {
315 let source = value.replace('_', "");
316 return source.parse::<f64>().ok();
317 }
318 }
319 }
320
321 let s = unsafe { std::str::from_utf8_unchecked(&buf[..len]) };
323 s.parse::<f64>().ok()
324}
325
326#[inline]
330#[must_use]
331pub fn parse_literal_integer(value: &str) -> Option<u64> {
332 let bytes = value.as_bytes();
333 if bytes.is_empty() {
334 return None;
335 }
336
337 let (radix, start) = match bytes {
338 [b'0', b'x' | b'X', ..] => (16u128, 2),
339 [b'0', b'o' | b'O', ..] => (8u128, 2),
340 [b'0', b'b' | b'B', ..] => (2u128, 2),
341 [b'0', _, ..] if bytes[1..].iter().all(|&b| b == b'_' || (b'0'..=b'7').contains(&b)) => (8u128, 1), [b'0', _, ..] => (10u128, 0), _ => (10u128, 0),
344 };
345
346 let mut result: u128 = 0;
347 let mut has_digits = false;
348
349 for &b in &bytes[start..] {
350 if b == b'_' {
351 continue;
352 }
353
354 let digit = if b.is_ascii_digit() {
355 (b - b'0') as u128
356 } else if (b'a'..=b'f').contains(&b) {
357 (b - b'a' + 10) as u128
358 } else if (b'A'..=b'F').contains(&b) {
359 (b - b'A' + 10) as u128
360 } else {
361 return None;
362 };
363
364 if digit >= radix {
365 return None;
366 }
367
368 has_digits = true;
369
370 result = match result.checked_mul(radix) {
371 Some(r) => r,
372 None => return Some(u64::MAX),
373 };
374
375 result = match result.checked_add(digit) {
376 Some(r) => r,
377 None => return Some(u64::MAX),
378 };
379 }
380
381 if !has_digits {
382 return None;
383 }
384
385 Some(result.min(u64::MAX as u128) as u64)
386}
387
388static IS_IDENT_START: [bool; 256] = {
391 let mut table = [false; 256];
392 let mut i = 0u8;
393 loop {
394 table[i as usize] = matches!(i, b'a'..=b'z' | b'A'..=b'Z' | b'_');
395 if i == 255 {
396 break;
397 }
398 i += 1;
399 }
400
401 table
402};
403
404static IS_IDENT_PART: [bool; 256] = {
407 let mut table = [false; 256];
408 let mut i = 0u8;
409 loop {
410 table[i as usize] = matches!(i, b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | b'_' | 0x80..=0xFF);
411 if i == 255 {
412 break;
413 }
414 i += 1;
415 }
416 table
417};
418
419#[inline(always)]
421#[must_use]
422pub const fn is_start_of_identifier(byte: &u8) -> bool {
423 IS_IDENT_START[*byte as usize]
424}
425
426#[inline(always)]
428#[must_use]
429pub const fn is_part_of_identifier(byte: &u8) -> bool {
430 IS_IDENT_PART[*byte as usize]
431}
432
433#[inline(always)]
439#[must_use]
440pub fn scan_identifier_length(bytes: &[u8], offset: usize) -> usize {
441 let mut len = 1;
442 let remaining = &bytes[offset + 1..];
443
444 for &b in remaining {
445 if IS_IDENT_PART[b as usize] {
446 len += 1;
447 } else {
448 break;
449 }
450 }
451
452 len
453}
454
455#[inline]
480pub fn read_digits_of_base(input: &Input, offset: usize, base: u8) -> usize {
481 if base == 16 {
482 read_digits_with(input, offset, u8::is_ascii_hexdigit)
483 } else {
484 let max = b'0' + base;
485
486 read_digits_with(input, offset, |b| b >= &b'0' && b < &max)
487 }
488}
489
490#[inline]
491fn read_digits_with<F: Fn(&u8) -> bool>(input: &Input, offset: usize, is_digit: F) -> usize {
492 let bytes = input.bytes;
493 let total = input.length;
494 let start = input.offset;
495 let mut pos = start + offset; while pos < total {
498 let current = bytes[pos];
499 if is_digit(¤t) {
500 pos += 1;
501 } else if pos + 1 < total && bytes[pos] == number_separator!() && is_digit(&bytes[pos + 1]) {
502 pos += 2; } else {
504 break;
505 }
506 }
507
508 pos - start
510}
511
512#[cfg(test)]
513mod tests {
514 use super::*;
515
516 macro_rules! parse_int {
517 ($input:expr, $expected:expr) => {
518 assert_eq!(parse_literal_integer($input), $expected);
519 };
520 }
521
522 #[test]
523 fn test_parse_literal_integer() {
524 parse_int!("123", Some(123));
525 parse_int!("0", Some(0));
526 parse_int!("0b1010", Some(10));
527 parse_int!("0o17", Some(15));
528 parse_int!("0x1A3F", Some(6719));
529 parse_int!("0XFF", Some(255));
530 parse_int!("0_1_2_3", Some(83));
531 parse_int!("0b1_0_1_0", Some(10));
532 parse_int!("0o1_7", Some(15));
533 parse_int!("0x1_A_3_F", Some(6719));
534 parse_int!("", None);
535 parse_int!("0xGHI", None);
536 parse_int!("0b102", None);
537 parse_int!("0o89", None);
538 }
539}