1#![allow(clippy::zero_prefixed_literal)]
2
3use crate::alloc::Vec;
4use crate::{Allocator, Context};
5
6static ESCAPE: [bool; 256] = {
14 const CT: bool = true; const QU: bool = true; const BS: bool = true; const __: bool = false; [
19 CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, __, __, QU, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, BS, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, ]
37};
38
39#[doc(hidden)]
41pub enum StringReference<'de, 'scratch> {
42 Borrowed(&'de str),
43 Scratch(&'scratch str),
44}
45
46impl StringReference<'_, '_> {
47 #[inline]
49 pub(crate) fn as_str(&self) -> &str {
50 match self {
51 Self::Borrowed(s) => s,
52 Self::Scratch(s) => s,
53 }
54 }
55}
56
57pub(crate) struct SliceAccess<'de, C> {
59 cx: C,
60 slice: &'de [u8],
61 pub(crate) index: usize,
62}
63
64impl<'de, C> SliceAccess<'de, C>
65where
66 C: Context,
67{
68 #[inline]
69 pub(crate) fn new(cx: C, slice: &'de [u8], index: usize) -> Self {
70 Self { cx, slice, index }
71 }
72
73 #[inline]
74 fn next(&mut self) -> Result<u8, C::Error> {
75 let Some(b) = self.slice.get(self.index) else {
76 return Err(self.cx.message("End of input"));
77 };
78
79 self.cx.advance(1);
80 self.index += 1;
81 Ok(*b)
82 }
83
84 #[inline]
85 fn parse_hex_escape(&mut self) -> Result<u16, C::Error> {
86 let &[a, b, c, d, ..] = &self.slice[self.index..] else {
87 return Err(self.cx.message("Unexpected end of hex escape"));
88 };
89
90 let mut n = 0;
91 let start = self.cx.mark();
92
93 for b in [a, b, c, d] {
94 let Some(val) = decode_hex_val(b) else {
95 return Err(self
96 .cx
97 .message_at(&start, "Non-hex digit in escape sequence"));
98 };
99
100 n = (n << 4) + val;
101 }
102
103 self.index += 4;
104 self.cx.advance(4);
105 Ok(n)
106 }
107
108 pub(crate) fn parse_escape(
111 &mut self,
112 validate: bool,
113 scratch: &mut Vec<u8, C::Allocator>,
114 ) -> Result<bool, C::Error> {
115 let start = self.cx.mark();
116 let b = self.next()?;
117
118 let extend = match b {
119 b'"' => scratch.push(b'"').is_ok(),
120 b'\\' => scratch.push(b'\\').is_ok(),
121 b'/' => scratch.push(b'/').is_ok(),
122 b'b' => scratch.push(b'\x08').is_ok(),
123 b'f' => scratch.push(b'\x0c').is_ok(),
124 b'n' => scratch.push(b'\n').is_ok(),
125 b'r' => scratch.push(b'\r').is_ok(),
126 b't' => scratch.push(b'\t').is_ok(),
127 b'u' => {
128 fn encode_surrogate(scratch: &mut Vec<u8, impl Allocator>, n: u16) -> bool {
129 scratch
130 .extend_from_slice(&[
131 ((n >> 12) & 0b0000_1111) as u8 | 0b1110_0000,
132 ((n >> 6) & 0b0011_1111) as u8 | 0b1000_0000,
133 (n & 0b0011_1111) as u8 | 0b1000_0000,
134 ])
135 .is_ok()
136 }
137
138 let c = match self.parse_hex_escape()? {
139 n @ 0xDC00..=0xDFFF => {
140 return if validate {
141 Err(self
142 .cx
143 .message_at(&start, "Lone leading surrogate in hex escape"))
144 } else {
145 Ok(encode_surrogate(scratch, n))
146 };
147 }
148
149 n1 @ 0xD800..=0xDBFF => {
154 let pos = self.cx.mark();
155
156 if self.next()? != b'\\' {
157 return if validate {
158 Err(self.cx.message_at(&pos, "Unexpected end of hex escape"))
159 } else {
160 Ok(encode_surrogate(scratch, n1))
161 };
162 }
163
164 if self.next()? != b'u' {
165 return if validate {
166 Err(self.cx.message_at(&pos, "Unexpected end of hex escape"))
167 } else {
168 if !encode_surrogate(scratch, n1) {
169 return Ok(false);
170 }
171
172 self.parse_escape(validate, scratch)
178 };
179 }
180
181 let n2 = self.parse_hex_escape()?;
182
183 if !(0xDC00..=0xDFFF).contains(&n2) {
184 return Err(self
185 .cx
186 .message_at(&start, "Lone leading surrogate in hex escape"));
187 }
188
189 let n = ((((n1 - 0xD800) as u32) << 10) | (n2 - 0xDC00) as u32) + 0x1_0000;
190
191 match char::from_u32(n) {
192 Some(c) => c,
193 None => {
194 return Err(self.cx.message_at(&start, "Invalid unicode"));
195 }
196 }
197 }
198
199 n => char::from_u32(n as u32).unwrap(),
202 };
203
204 scratch
205 .extend_from_slice(c.encode_utf8(&mut [0u8; 4]).as_bytes())
206 .is_ok()
207 }
208 _ => {
209 return Err(self.cx.message_at(&start, "Invalid string escape"));
210 }
211 };
212
213 Ok(extend)
214 }
215
216 fn skip_escape(&mut self, validate: bool) -> Result<(), C::Error> {
219 let start = self.cx.mark();
220 let b = self.next()?;
221
222 match b {
223 b'"' | b'\\' | b'/' | b'b' | b'f' | b'n' | b'r' | b't' => (),
224 b'u' => {
225 match self.parse_hex_escape()? {
226 0xDC00..=0xDFFF => {
227 return if validate {
228 Err(self
229 .cx
230 .message_at(&start, "Lone leading surrogate in hex escape"))
231 } else {
232 Ok(())
233 };
234 }
235
236 n1 @ 0xD800..=0xDBFF => {
241 let pos = self.cx.mark();
242
243 if self.next()? != b'\\' {
244 return if validate {
245 Err(self.cx.message_at(&pos, "Unexpected end of hex escape"))
246 } else {
247 Ok(())
248 };
249 }
250
251 if self.next()? != b'u' {
252 return if validate {
253 Err(self.cx.message_at(&pos, "Unexpected end of hex escape"))
254 } else {
255 self.skip_escape(validate)
261 };
262 }
263
264 let n2 = self.parse_hex_escape()?;
265
266 if !(0xDC00..=0xDFFF).contains(&n2) {
267 return Err(self
268 .cx
269 .message_at(&start, "Lone leading surrogate in hex escape"));
270 }
271
272 let n = ((((n1 - 0xD800) as u32) << 10) | (n2 - 0xDC00) as u32) + 0x1_0000;
273
274 if char::from_u32(n).is_none() {
275 return Err(self.cx.message_at(&start, "Invalid unicode"));
276 }
277 }
278
279 _ => (),
282 }
283 }
284 _ => {
285 return Err(self.cx.message_at(&start, "Invalid string escape"));
286 }
287 };
288
289 Ok(())
290 }
291
292 pub(crate) fn parse_string<'scratch>(
294 &mut self,
295 validate: bool,
296 start: &C::Mark,
297 scratch: &'scratch mut Vec<u8, C::Allocator>,
298 ) -> Result<StringReference<'de, 'scratch>, C::Error> {
299 let mut open_mark = self.cx.mark();
301 let mut open = self.index;
302
303 loop {
304 while self.index < self.slice.len() && !ESCAPE[self.slice[self.index] as usize] {
305 self.index = self.index.wrapping_add(1);
306 self.cx.advance(1);
307 }
308
309 if self.index == self.slice.len() {
310 return Err(self.cx.message("End of input"));
311 }
312
313 match self.slice[self.index] {
314 b'"' => {
315 if scratch.is_empty() {
316 let borrowed = &self.slice[open..self.index];
319
320 self.index = self.index.wrapping_add(1);
321 self.cx.advance(1);
322
323 self.check_utf8(borrowed, start)?;
324
325 let borrowed = unsafe { core::str::from_utf8_unchecked(borrowed) };
327 return Ok(StringReference::Borrowed(borrowed));
328 } else {
329 let slice = &self.slice[open..self.index];
330 self.check_utf8(slice, start)?;
331
332 if scratch.extend_from_slice(slice).is_err() {
333 return Err(self.cx.message("Scratch buffer overflow"));
334 }
335
336 self.index = self.index.wrapping_add(1);
337 self.cx.advance(1);
338
339 let scratch = unsafe { core::str::from_utf8_unchecked(scratch.as_slice()) };
341 return Ok(StringReference::Scratch(scratch));
342 }
343 }
344 b'\\' => {
345 let slice = &self.slice[open..self.index];
346 self.check_utf8(slice, start)?;
347
348 if scratch.extend_from_slice(slice).is_err() {
349 return Err(self.cx.message("Scratch buffer overflow"));
350 }
351
352 self.index = self.index.wrapping_add(1);
353 self.cx.advance(1);
354
355 if !self.parse_escape(validate, scratch)? {
356 return Err(self.cx.message_at(&open_mark, "Buffer overflow"));
357 }
358
359 open = self.index;
360 open_mark = self.cx.mark();
361 }
362 _ => {
363 if validate {
364 return Err(self
365 .cx
366 .message_at(&open_mark, "Control character while parsing string"));
367 }
368
369 self.index = self.index.wrapping_add(1);
370 self.cx.advance(1);
371 }
372 }
373 }
374 }
375
376 pub(crate) fn skip_string(&mut self) -> Result<(), C::Error> {
378 loop {
379 while let Some(b) = self.slice.get(self.index) {
380 if ESCAPE[*b as usize] {
381 break;
382 }
383
384 self.index = self.index.wrapping_add(1);
385 self.cx.advance(1);
386 }
387
388 let b = self.next()?;
389
390 match b {
391 b'"' => {
392 return Ok(());
393 }
394 b'\\' => {
395 self.skip_escape(true)?;
396 }
397 _ => {
398 return Err(self.cx.message("Control character while parsing string"));
399 }
400 }
401 }
402 }
403
404 #[inline]
406 fn check_utf8(&self, bytes: &[u8], start: &C::Mark) -> Result<(), C::Error> {
407 if crate::str::from_utf8(bytes).is_err() {
408 Err(self.cx.message_at(start, "Invalid unicode string"))
409 } else {
410 Ok(())
411 }
412 }
413}
414
415static HEX: [u8; 256] = {
416 const __: u8 = 255; [
418 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, 00, 01, 02, 03, 04, 05, 06, 07, 08, 09, __, __, __, __, __, __, __, 10, 11, 12, 13, 14, 15, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, 10, 11, 12, 13, 14, 15, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, ]
436};
437
438#[inline]
439pub(crate) fn decode_hex_val(val: u8) -> Option<u16> {
440 let n = HEX[val as usize] as u16;
441
442 if n == 255 {
443 None
444 } else {
445 Some(n)
446 }
447}