1#![allow(clippy::zero_prefixed_literal)]
2
3use musli::{Buf, Context};
4
5use crate::parser::{Parser, SliceParser};
6
7static ESCAPE: [bool; 256] = {
15 const CT: bool = true; const QU: bool = true; const BS: bool = true; const __: bool = false; [
20 CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, __, __, QU, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, BS, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, ]
38};
39
40#[doc(hidden)]
42pub enum StringReference<'de, 'scratch> {
43 Borrowed(&'de str),
44 Scratch(&'scratch str),
45}
46
47pub(crate) fn parse_string_slice_reader<'de, 'scratch, C, S>(
49 cx: &C,
50 reader: &mut SliceParser<'de>,
51 validate: bool,
52 start: C::Mark,
53 scratch: &'scratch mut S,
54) -> Result<StringReference<'de, 'scratch>, C::Error>
55where
56 C: ?Sized + Context,
57 S: ?Sized + Buf,
58{
59 let mut open_mark = cx.mark();
61 let mut open = reader.index;
62
63 loop {
64 while reader.index < reader.slice.len() && !ESCAPE[reader.slice[reader.index] as usize] {
65 reader.index = reader.index.wrapping_add(1);
66 cx.advance(1);
67 }
68
69 if reader.index == reader.slice.len() {
70 return Err(cx.message("End of input"));
71 }
72
73 match reader.slice[reader.index] {
74 b'"' => {
75 if scratch.is_empty() {
76 let borrowed = &reader.slice[open..reader.index];
79 reader.index = reader.index.wrapping_add(1);
80 cx.advance(1);
81 check_utf8(cx, borrowed, start)?;
82 let borrowed = unsafe { core::str::from_utf8_unchecked(borrowed) };
84 return Ok(StringReference::Borrowed(borrowed));
85 } else {
86 let slice = &reader.slice[open..reader.index];
87 check_utf8(cx, slice, start)?;
88
89 if !scratch.write(slice) {
90 return Err(cx.message("Scratch buffer overflow"));
91 }
92
93 reader.index = reader.index.wrapping_add(1);
94 cx.advance(1);
95 let scratch = unsafe { core::str::from_utf8_unchecked(scratch.as_slice()) };
97 return Ok(StringReference::Scratch(scratch));
98 }
99 }
100 b'\\' => {
101 let slice = &reader.slice[open..reader.index];
102 check_utf8(cx, slice, start)?;
103
104 if !scratch.write(slice) {
105 return Err(cx.message("Scratch buffer overflow"));
106 }
107
108 reader.index = reader.index.wrapping_add(1);
109 cx.advance(1);
110
111 if !parse_escape(cx, reader, validate, scratch)? {
112 return Err(cx.marked_message(open_mark, "Buffer overflow"));
113 }
114
115 open = reader.index;
116 open_mark = cx.mark();
117 }
118 _ => {
119 if validate {
120 return Err(
121 cx.marked_message(open_mark, "Control character while parsing string")
122 );
123 }
124
125 reader.index = reader.index.wrapping_add(1);
126 cx.advance(1);
127 }
128 }
129 }
130}
131
132#[inline]
134fn check_utf8<C>(cx: &C, bytes: &[u8], start: C::Mark) -> Result<(), C::Error>
135where
136 C: ?Sized + Context,
137{
138 if crate::str::from_utf8(bytes).is_err() {
139 Err(cx.marked_message(start, "Invalid unicode string"))
140 } else {
141 Ok(())
142 }
143}
144
145fn parse_escape<C, B>(
148 cx: &C,
149 parser: &mut SliceParser<'_>,
150 validate: bool,
151 scratch: &mut B,
152) -> Result<bool, C::Error>
153where
154 C: ?Sized + Context,
155 B: ?Sized + Buf,
156{
157 let start = cx.mark();
158 let b = parser.read_byte(cx)?;
159
160 let extend = match b {
161 b'"' => scratch.push(b'"'),
162 b'\\' => scratch.push(b'\\'),
163 b'/' => scratch.push(b'/'),
164 b'b' => scratch.push(b'\x08'),
165 b'f' => scratch.push(b'\x0c'),
166 b'n' => scratch.push(b'\n'),
167 b'r' => scratch.push(b'\r'),
168 b't' => scratch.push(b'\t'),
169 b'u' => {
170 fn encode_surrogate<B>(scratch: &mut B, n: u16) -> bool
171 where
172 B: ?Sized + Buf,
173 {
174 scratch.write(&[
175 (n >> 12 & 0b0000_1111) as u8 | 0b1110_0000,
176 (n >> 6 & 0b0011_1111) as u8 | 0b1000_0000,
177 (n & 0b0011_1111) as u8 | 0b1000_0000,
178 ])
179 }
180
181 let c = match parser.parse_hex_escape(cx)? {
182 n @ 0xDC00..=0xDFFF => {
183 return if validate {
184 Err(cx.marked_message(start, "Lone leading surrogate in hex escape"))
185 } else {
186 Ok(encode_surrogate(scratch, n))
187 };
188 }
189
190 n1 @ 0xD800..=0xDBFF => {
195 let pos = cx.mark();
196
197 if parser.read_byte(cx)? != b'\\' {
198 return if validate {
199 Err(cx.marked_message(pos, "Unexpected end of hex escape"))
200 } else {
201 Ok(encode_surrogate(scratch, n1))
202 };
203 }
204
205 if parser.read_byte(cx)? != b'u' {
206 return if validate {
207 Err(cx.marked_message(pos, "Unexpected end of hex escape"))
208 } else {
209 if !encode_surrogate(scratch, n1) {
210 return Ok(false);
211 }
212
213 parse_escape(cx, parser, validate, scratch)
219 };
220 }
221
222 let n2 = parser.parse_hex_escape(cx)?;
223
224 if !(0xDC00..=0xDFFF).contains(&n2) {
225 return Err(
226 cx.marked_message(start, "Lone leading surrogate in hex escape")
227 );
228 }
229
230 let n = (((n1 - 0xD800) as u32) << 10 | (n2 - 0xDC00) as u32) + 0x1_0000;
231
232 match char::from_u32(n) {
233 Some(c) => c,
234 None => {
235 return Err(cx.marked_message(start, "Invalid unicode"));
236 }
237 }
238 }
239
240 n => char::from_u32(n as u32).unwrap(),
243 };
244
245 scratch.write(c.encode_utf8(&mut [0u8; 4]).as_bytes())
246 }
247 _ => {
248 return Err(cx.marked_message(start, "Invalid string escape"));
249 }
250 };
251
252 Ok(extend)
253}
254
255static HEX: [u8; 256] = {
256 const __: u8 = 255; [
258 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, 00, 01, 02, 03, 04, 05, 06, 07, 08, 09, __, __, __, __, __, __, __, 10, 11, 12, 13, 14, 15, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, 10, 11, 12, 13, 14, 15, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, ]
276};
277
278pub(crate) fn decode_hex_val(val: u8) -> Option<u16> {
279 let n = HEX[val as usize] as u16;
280
281 if n == 255 {
282 None
283 } else {
284 Some(n)
285 }
286}
287
288pub(crate) fn skip_string<'de, P, C>(cx: &C, mut p: P, validate: bool) -> Result<(), C::Error>
290where
291 P: Parser<'de>,
292 C: ?Sized + Context,
293{
294 loop {
295 while let Some(b) = p.peek_byte(cx)? {
296 if ESCAPE[b as usize] {
297 break;
298 }
299
300 p.skip(cx, 1)?;
301 }
302
303 let b = p.read_byte(cx)?;
304
305 match b {
306 b'"' => {
307 return Ok(());
308 }
309 b'\\' => {
310 skip_escape(cx, p.borrow_mut(), validate)?;
311 }
312 _ => {
313 if validate {
314 return Err(cx.message("Control character while parsing string"));
315 }
316 }
317 }
318 }
319}
320
321fn skip_escape<'de, P, C>(cx: &C, mut p: P, validate: bool) -> Result<(), C::Error>
324where
325 P: Parser<'de>,
326 C: ?Sized + Context,
327{
328 let start = cx.mark();
329 let b = p.read_byte(cx)?;
330
331 match b {
332 b'"' | b'\\' | b'/' | b'b' | b'f' | b'n' | b'r' | b't' => (),
333 b'u' => {
334 match p.parse_hex_escape(cx)? {
335 0xDC00..=0xDFFF => {
336 return if validate {
337 Err(cx.marked_message(start, "Lone leading surrogate in hex escape"))
338 } else {
339 Ok(())
340 };
341 }
342
343 n1 @ 0xD800..=0xDBFF => {
348 let pos = cx.mark();
349
350 if p.read_byte(cx)? != b'\\' {
351 return if validate {
352 Err(cx.marked_message(pos, "Unexpected end of hex escape"))
353 } else {
354 Ok(())
355 };
356 }
357
358 if p.read_byte(cx)? != b'u' {
359 return if validate {
360 Err(cx.marked_message(pos, "Unexpected end of hex escape"))
361 } else {
362 skip_escape(cx, p, validate)
368 };
369 }
370
371 let n2 = p.parse_hex_escape(cx)?;
372
373 if !(0xDC00..=0xDFFF).contains(&n2) {
374 return Err(
375 cx.marked_message(start, "Lone leading surrogate in hex escape")
376 );
377 }
378
379 let n = (((n1 - 0xD800) as u32) << 10 | (n2 - 0xDC00) as u32) + 0x1_0000;
380
381 if char::from_u32(n).is_none() {
382 return Err(cx.marked_message(start, "Invalid unicode"));
383 }
384 }
385
386 _ => (),
389 }
390 }
391 _ => {
392 return Err(cx.marked_message(start, "Invalid string escape"));
393 }
394 };
395
396 Ok(())
397}