mago_syntax_core/
utils.rs1use bumpalo::Bump;
2use bumpalo::collections::Vec;
3
4use crate::input::Input;
5use crate::number_separator;
6
7pub fn parse_literal_string_in<'arena>(
13 arena: &'arena Bump,
14 s: &'arena str,
15 quote_char: Option<char>,
16 has_quote: bool,
17) -> Option<&'arena str> {
18 if s.is_empty() {
19 return Some("");
20 }
21
22 let (quote_char, content) = if let Some(quote_char) = quote_char {
23 (Some(quote_char), s)
24 } else if !has_quote {
25 (None, s)
26 } else if s.starts_with('"') && s.ends_with('"') && s.len() >= 2 {
27 (Some('"'), &s[1..s.len() - 1])
28 } else if s.starts_with('\'') && s.ends_with('\'') && s.len() >= 2 {
29 (Some('\''), &s[1..s.len() - 1])
30 } else {
31 return None;
32 };
33
34 let needs_processing = content.contains('\\') || quote_char.is_some_and(|q| content.contains(q));
35 if !needs_processing {
36 return Some(content);
37 }
38
39 let mut result = Vec::with_capacity_in(content.len(), arena);
40 let mut chars = content.chars().peekable();
41 let mut buf = [0; 4];
42
43 while let Some(c) = chars.next() {
44 if c != '\\' {
45 result.extend_from_slice(c.encode_utf8(&mut buf).as_bytes());
46 continue;
47 }
48
49 let Some(&next_char) = chars.peek() else {
50 result.push(b'\\');
51 continue;
52 };
53
54 let mut consumed = true;
55
56 match next_char {
57 '\\' => result.push(b'\\'),
58 '\'' if quote_char == Some('\'') => result.push(b'\''),
59 '"' if quote_char == Some('"') => result.push(b'"'),
60 '$' if quote_char == Some('"') => result.push(b'$'),
61 'n' if quote_char == Some('"') => result.push(b'\n'),
62 't' if quote_char == Some('"') => result.push(b'\t'),
63 'r' if quote_char == Some('"') => result.push(b'\r'),
64 'v' if quote_char == Some('"') => result.push(0x0B),
65 'e' if quote_char == Some('"') => result.push(0x1B),
66 'f' if quote_char == Some('"') => result.push(0x0C),
67 '0' if quote_char == Some('"') => result.push(0x00),
68 'x' if quote_char == Some('"') => {
69 chars.next(); let mut hex_val = 0u8;
71 let mut hex_len = 0;
72 while let Some(peeked) = chars.peek() {
74 if hex_len < 2 && peeked.is_ascii_hexdigit() {
75 hex_val = hex_val * 16 + peeked.to_digit(16).unwrap() as u8;
76 hex_len += 1;
77 chars.next(); } else {
79 break;
80 }
81 }
82 if hex_len > 0 {
83 result.push(hex_val);
84 } else {
85 result.push(b'\\');
87 result.push(b'x');
88 }
89
90 consumed = false;
91 }
92 c if quote_char == Some('"') && c.is_ascii_digit() => {
93 let mut octal_val = 0u8;
94 let mut octal_len = 0;
95
96 while let Some(peeked) = chars.peek() {
97 if octal_len < 3 && peeked.is_ascii_digit() && *peeked <= '7' {
98 octal_val = octal_val * 8 + peeked.to_digit(8).unwrap() as u8;
99 octal_len += 1;
100 chars.next(); } else {
102 break;
103 }
104 }
105 if octal_len > 0 {
106 result.push(octal_val);
107 } else {
108 result.push(b'\\');
109 result.push(b'0');
110 }
111
112 consumed = false;
113 }
114 _ => {
115 if quote_char == Some('\'') {
117 result.push(b'\\');
119 result.extend_from_slice(next_char.encode_utf8(&mut buf).as_bytes());
120 } else {
121 result.extend_from_slice(next_char.encode_utf8(&mut buf).as_bytes());
123 }
124 }
125 }
126
127 if consumed {
128 chars.next(); }
130 }
131
132 std::str::from_utf8(result.into_bump_slice()).ok()
133}
134
135#[inline]
146pub fn parse_literal_string(s: &str, quote_char: Option<char>, has_quote: bool) -> Option<String> {
147 if s.is_empty() {
148 return Some(String::new());
149 }
150
151 let (quote_char, content) = if let Some(quote_char) = quote_char {
152 (Some(quote_char), s)
153 } else if !has_quote {
154 (None, s)
155 } else if s.starts_with('"') && s.ends_with('"') && s.len() >= 2 {
156 (Some('"'), &s[1..s.len() - 1])
157 } else if s.starts_with('\'') && s.ends_with('\'') && s.len() >= 2 {
158 (Some('\''), &s[1..s.len() - 1])
159 } else {
160 return None;
161 };
162
163 let mut result = String::new();
164 let mut chars = content.chars().peekable();
165
166 while let Some(c) = chars.next() {
167 if c != '\\' {
168 result.push(c);
169
170 continue;
171 }
172
173 let Some(&next_char) = chars.peek() else {
174 result.push(c);
175
176 continue;
177 };
178
179 match next_char {
180 '\\' => {
181 result.push('\\');
182 chars.next();
183 }
184 '\'' if quote_char == Some('\'') => {
185 result.push('\'');
186 chars.next();
187 }
188 '"' if quote_char == Some('"') => {
189 result.push('"');
190 chars.next();
191 }
192 'n' if quote_char == Some('"') => {
193 result.push('\n');
194 chars.next();
195 }
196 't' if quote_char == Some('"') => {
197 result.push('\t');
198 chars.next();
199 }
200 'r' if quote_char == Some('"') => {
201 result.push('\r');
202 chars.next();
203 }
204 'v' if quote_char == Some('"') => {
205 result.push('\x0B');
206 chars.next();
207 }
208 'e' if quote_char == Some('"') => {
209 result.push('\x1B');
210 chars.next();
211 }
212 'f' if quote_char == Some('"') => {
213 result.push('\x0C');
214 chars.next();
215 }
216 '0' if quote_char == Some('"') => {
217 result.push('\0');
218 chars.next();
219 }
220 'x' if quote_char == Some('"') => {
221 chars.next();
222
223 let mut hex_chars = String::new();
224 for _ in 0..2 {
225 if let Some(&next) = chars.peek() {
226 if next.is_ascii_hexdigit() {
227 hex_chars.push(chars.next().unwrap());
228 } else {
229 break;
230 }
231 }
232 }
233
234 if !hex_chars.is_empty() {
235 match u8::from_str_radix(&hex_chars, 16) {
236 Ok(byte_val) => result.push(byte_val as char),
237 Err(_) => {
238 return None;
239 }
240 }
241 } else {
242 return None;
243 }
244 }
245 c if quote_char == Some('"') && c.is_ascii_digit() => {
246 let mut octal = String::new();
247 octal.push(chars.next().unwrap());
248
249 for _ in 0..2 {
250 if let Some(&next) = chars.peek() {
251 if next.is_ascii_digit() && next <= '7' {
252 octal.push(chars.next().unwrap());
253 } else {
254 break;
255 }
256 }
257 }
258
259 result.push(u8::from_str_radix(&octal, 8).ok()? as char);
260 }
261 '$' if quote_char == Some('"') => {
262 result.push('$');
263 chars.next();
264 }
265 _ => {
266 if quote_char == Some('\'') {
267 result.push(c);
268 result.push(next_char);
269 chars.next();
270 } else {
271 result.push(c);
272 }
273 }
274 }
275 }
276
277 Some(result)
278}
279
280#[inline]
281pub fn parse_literal_float(value: &str) -> Option<f64> {
282 let source = value.replace("_", "");
283
284 source.parse::<f64>().ok()
285}
286
287#[inline]
288pub fn parse_literal_integer(value: &str) -> Option<u64> {
289 if value.is_empty() {
290 return None;
291 }
292
293 let mut s = value;
294 let radix = if s.starts_with("0x") || s.starts_with("0X") {
295 s = &s[2..];
296 16
297 } else if s.starts_with("0o") || s.starts_with("0O") {
298 s = &s[2..];
299 8
300 } else if s.starts_with("0b") || s.starts_with("0B") {
301 s = &s[2..];
302 2
303 } else {
304 10
305 };
306
307 let mut result: u128 = 0;
308 let mut has_digits = false;
309
310 for c in s.chars() {
311 if c == '_' {
312 continue;
313 }
314
315 let digit = match c.to_digit(radix) {
316 Some(d) => d as u128,
317 None => return None,
318 };
319
320 has_digits = true;
321
322 result = match result.checked_mul(radix as u128) {
323 Some(r) => r,
324 None => return Some(u64::MAX),
325 };
326 result = match result.checked_add(digit) {
327 Some(r) => r,
328 None => return Some(u64::MAX),
329 };
330 }
331
332 if !has_digits {
333 return None;
334 }
335
336 Some(if result > u64::MAX as u128 { u64::MAX } else { result as u64 })
338}
339
340#[inline]
341pub fn is_start_of_identifier(byte: &u8) -> bool {
342 byte.is_ascii_lowercase() || byte.is_ascii_uppercase() || (*byte == b'_')
343}
344
345#[inline]
346pub fn is_part_of_identifier(byte: &u8) -> bool {
347 byte.is_ascii_digit()
348 || byte.is_ascii_lowercase()
349 || byte.is_ascii_uppercase()
350 || (*byte == b'_')
351 || (*byte >= 0x80)
352}
353
354#[inline]
379pub fn read_digits_of_base(input: &Input, offset: usize, base: u8) -> usize {
380 if base == 16 {
381 read_digits_with(input, offset, u8::is_ascii_hexdigit)
382 } else {
383 let max = b'0' + base;
384
385 read_digits_with(input, offset, |b| b >= &b'0' && b < &max)
386 }
387}
388
389#[inline]
390fn read_digits_with<F: Fn(&u8) -> bool>(input: &Input, offset: usize, is_digit: F) -> usize {
391 let bytes = input.bytes;
392 let total = input.length;
393 let start = input.offset;
394 let mut pos = start + offset; while pos < total {
397 let current = bytes[pos];
398 if is_digit(¤t) {
399 pos += 1;
400 } else if pos + 1 < total && bytes[pos] == number_separator!() && is_digit(&bytes[pos + 1]) {
401 pos += 2; } else {
403 break;
404 }
405 }
406
407 pos - start
409}