1use crate::{simd, Error};
2
3#[cold]
4#[inline]
5fn err_eof() -> Error { Error::UnexpectedEof }
6
7#[cold]
8#[inline]
9fn err_token() -> Error { Error::UnexpectedToken }
10
11pub enum JsonStr<'de> {
13 Borrowed(&'de str),
14 Owned(String),
15}
16
17impl<'de> JsonStr<'de> {
18 #[inline]
19 pub fn as_borrowed(&self) -> Option<&'de str> {
20 match self {
21 JsonStr::Borrowed(s) => Some(s),
22 JsonStr::Owned(_) => None,
23 }
24 }
25
26 #[inline]
27 pub fn as_str(&self) -> &str {
28 match self {
29 JsonStr::Borrowed(s) => s,
30 JsonStr::Owned(s) => s.as_str(),
31 }
32 }
33
34 #[inline]
35 pub fn into_owned(self) -> String {
36 match self {
37 JsonStr::Borrowed(s) => s.to_owned(),
38 JsonStr::Owned(s) => s,
39 }
40 }
41}
42
43pub struct Scanner<'de> {
44 input: &'de [u8],
45 pos: usize,
46 #[cfg(feature = "stats")]
47 pub stats: crate::stats::ScannerStats,
48}
49
50impl<'de> Scanner<'de> {
51 #[inline]
52 pub fn new(input: &'de [u8]) -> Self {
53 Scanner {
54 input,
55 pos: 0,
56 #[cfg(feature = "stats")]
57 stats: crate::stats::ScannerStats::default(),
58 }
59 }
60
61 #[inline]
62 pub fn new_str(s: &'de str) -> Self {
63 Self::new(s.as_bytes())
64 }
65
66 #[inline]
67 pub fn peek_byte(&self) -> Result<u8, Error> {
68 self.input.get(self.pos).copied().ok_or_else(err_eof)
69 }
70
71 #[inline]
72 pub fn advance(&mut self) {
73 self.pos += 1;
74 }
75
76 #[inline]
78 pub fn pos(&self) -> usize { self.pos }
79
80 #[inline]
81 pub fn set_pos(&mut self, saved_pos: usize) { self.pos = saved_pos; }
82
83 #[inline]
84 pub fn advance_by(&mut self, n: usize) {
85 self.pos += n;
86 }
87
88 #[inline]
90 pub fn remaining_input(&self) -> &'de [u8] {
91 &self.input[self.pos..]
92 }
93
94 #[inline]
95 pub fn expect_byte(&mut self, expected: u8) -> Result<(), Error> {
96 match self.input.get(self.pos) {
97 Some(&b) if b == expected => { self.pos += 1; Ok(()) }
98 _ => Err(err_token()),
99 }
100 }
101
102 pub fn expect_bytes(&mut self, expected: &[u8]) -> Result<(), Error> {
103 let end = self.pos + expected.len();
104 if self.input.get(self.pos..end) == Some(expected) {
105 self.pos = end;
106 Ok(())
107 } else {
108 Err(err_token())
109 }
110 }
111
112 #[inline(always)]
113 pub fn skip_whitespace(&mut self) {
114 if let Some(&b) = self.input.get(self.pos) {
117 if b > b' ' { return; }
118 } else {
119 return;
120 }
121 self.skip_whitespace_swar();
122 }
123
124 #[inline]
133 fn skip_whitespace_swar(&mut self) {
134 while self.pos + 8 <= self.input.len() {
135 let chunk = u64::from_le_bytes(
136 self.input[self.pos..self.pos + 8].try_into().unwrap(),
137 );
138 let sub = chunk.wrapping_sub(0x2121_2121_2121_2121_u64);
139 if (sub & 0x8080_8080_8080_8080_u64) == 0x8080_8080_8080_8080_u64 {
140 self.pos += 8;
141 } else {
142 break;
143 }
144 }
145 while let Some(&b) = self.input.get(self.pos) {
146 if b > b' ' { break; }
147 self.pos += 1;
148 }
149 }
150
151 #[inline(always)]
152 pub fn peek_byte_after_ws(&mut self) -> Result<u8, Error> {
153 self.skip_whitespace();
154 self.peek_byte()
155 }
156
157 pub fn read_key(&mut self) -> Result<&'de [u8], Error> {
160 self.skip_whitespace();
161 self.expect_byte(b'"')?;
162 let start = self.pos;
163 let stop = simd::find(self.input, self.pos);
164 match self.input.get(stop) {
165 Some(&b'"') => {
166 let k = &self.input[start..stop];
167 self.pos = stop + 1;
168 Ok(k)
169 }
170 Some(&b'\\') => Err(Error::EscapedKey),
171 _ => Err(err_eof()),
172 }
173 }
174
175 #[inline]
177 pub fn read_key_colon(&mut self) -> Result<&'de [u8], Error> {
178 let key = self.read_key()?;
179 if self.input.get(self.pos) == Some(&b':') {
181 self.pos += 1;
182 } else {
183 self.skip_whitespace();
184 self.expect_byte(b':')?;
185 }
186 Ok(key)
187 }
188
189 pub fn read_str(&mut self) -> Result<JsonStr<'de>, Error> {
194 self.skip_whitespace();
195 self.expect_byte(b'"')?;
196 let start = self.pos;
197 let stop = simd::find(self.input, start);
198
199 match self.input.get(stop) {
200 Some(&b'"') => {
201 let s = core::str::from_utf8(&self.input[start..stop])
202 .map_err(|_| Error::InvalidUtf8)?;
203 self.pos = stop + 1;
204
205 #[cfg(feature = "stats")]
206 { self.stats.zero_copy_borrows += 1; }
207
208 Ok(JsonStr::Borrowed(s))
209 }
210 Some(&b'\\') => {
211 self.pos = stop;
212 let owned = self.unescape_from(start)?;
213
214 #[cfg(feature = "stats")]
215 { self.stats.heap_allocations += 1; }
216
217 Ok(JsonStr::Owned(owned))
218 }
219 _ => Err(err_eof()),
220 }
221 }
222
223 pub fn read_number_bytes(&mut self) -> Result<&'de [u8], Error> {
225 self.skip_whitespace();
226 let start = self.pos;
227 if self.input.get(self.pos) == Some(&b'-') { self.pos += 1; }
228
229 #[inline(always)]
233 fn swar_all_digits(chunk: u64) -> bool {
234 let sub = chunk.wrapping_sub(0x3030_3030_3030_3030_u64);
235 if (sub & 0x8080_8080_8080_8080_u64) != 0 { return false; }
236 let check = sub.wrapping_add(0x7676_7676_7676_7676_u64);
237 (check & 0x8080_8080_8080_8080_u64) == 0
238 }
239 while self.pos + 8 <= self.input.len() {
240 let chunk = u64::from_le_bytes(
241 self.input[self.pos..self.pos + 8].try_into().unwrap(),
242 );
243 if swar_all_digits(chunk) { self.pos += 8; } else { break; }
244 }
245 while let Some(&b) = self.input.get(self.pos) { if b.is_ascii_digit() { self.pos += 1; } else { break; } }
246 if self.input.get(self.pos) == Some(&b'.') {
247 self.pos += 1;
248 while self.pos + 8 <= self.input.len() {
249 let chunk = u64::from_le_bytes(
250 self.input[self.pos..self.pos + 8].try_into().unwrap(),
251 );
252 if swar_all_digits(chunk) { self.pos += 8; } else { break; }
253 }
254 while let Some(&b) = self.input.get(self.pos) { if b.is_ascii_digit() { self.pos += 1; } else { break; } }
255 }
256 if matches!(self.input.get(self.pos), Some(b'e') | Some(b'E')) {
257 self.pos += 1;
258 if matches!(self.input.get(self.pos), Some(b'+') | Some(b'-')) { self.pos += 1; }
259 while let Some(&b) = self.input.get(self.pos) { if b.is_ascii_digit() { self.pos += 1; } else { break; } }
260 }
261 let end = self.pos;
262 if end == start || (end == start + 1 && self.input[start] == b'-') {
263 return Err(Error::InvalidNumber);
264 }
265
266 #[cfg(feature = "stats")]
267 { self.stats.bytes_scanned += (end - start) as u64; }
268
269 Ok(&self.input[start..end])
270 }
271
272 #[inline]
274 pub fn peek_null(&mut self) -> bool {
275 self.skip_whitespace();
276 self.input.get(self.pos..self.pos + 4) == Some(b"null")
277 }
278
279 pub fn read_null(&mut self) -> Result<(), Error> {
280 self.skip_whitespace();
281 self.expect_bytes(b"null")
282 }
283
284 pub fn read_bool(&mut self) -> Result<bool, Error> {
285 self.skip_whitespace();
286 match self.input.get(self.pos) {
287 Some(&b't') => {
288 self.pos += 4;
289 if self.input.get(self.pos - 3..self.pos) == Some(b"rue") {
290 Ok(true)
291 } else {
292 self.pos -= 4;
293 Err(err_token())
294 }
295 }
296 Some(&b'f') => {
297 self.pos += 5;
298 if self.input.get(self.pos - 4..self.pos) == Some(b"alse") {
299 Ok(false)
300 } else {
301 self.pos -= 5;
302 Err(err_token())
303 }
304 }
305 _ => Err(err_token()),
306 }
307 }
308
309 pub fn skip_value(&mut self) -> Result<(), Error> {
311 self.skip_whitespace();
312 match self.peek_byte()? {
313 b'"' => self.skip_string(),
314 b'{' => self.skip_object(),
315 b'[' => self.skip_array(),
316 b't' => self.expect_bytes(b"true"),
317 b'f' => self.expect_bytes(b"false"),
318 b'n' => self.expect_bytes(b"null"),
319 b'-' | b'0'..=b'9' => { self.read_number_bytes()?; Ok(()) }
320 _ => Err(err_token()),
321 }
322 }
323
324 fn skip_string(&mut self) -> Result<(), Error> {
325 self.expect_byte(b'"')?;
326 loop {
327 match self.input.get(self.pos) {
328 Some(&b'"') => { self.pos += 1; return Ok(()); }
329 Some(&b'\\') => { self.pos += 2; }
330 Some(_) => { self.pos += 1; }
331 None => return Err(err_eof()),
332 }
333 }
334 }
335
336 pub fn skip_object_tail(&mut self) -> Result<(), Error> {
339 loop {
340 self.skip_whitespace();
341 match self.peek_byte()? {
342 b'}' => { self.pos += 1; return Ok(()); }
343 b'"' => {
344 self.skip_string()?;
345 self.skip_whitespace();
346 self.expect_byte(b':')?;
347 self.skip_value()?;
348 self.skip_whitespace();
349 match self.peek_byte()? {
350 b',' => { self.pos += 1; }
351 b'}' => { self.pos += 1; return Ok(()); }
352 _ => return Err(err_token()),
353 }
354 }
355 _ => return Err(err_token()),
356 }
357 }
358 }
359
360 fn skip_object(&mut self) -> Result<(), Error> {
361 self.expect_byte(b'{')?;
362 self.skip_whitespace();
363 if self.input.get(self.pos) == Some(&b'}') { self.pos += 1; return Ok(()); }
364 loop {
365 self.skip_string()?;
366 self.skip_whitespace();
367 self.expect_byte(b':')?;
368 self.skip_value()?;
369 self.skip_whitespace();
370 match self.peek_byte()? {
371 b',' => { self.pos += 1; self.skip_whitespace(); }
372 b'}' => { self.pos += 1; break; }
373 _ => return Err(err_token()),
374 }
375 }
376 Ok(())
377 }
378
379 fn skip_array(&mut self) -> Result<(), Error> {
380 self.expect_byte(b'[')?;
381 self.skip_whitespace();
382 if self.input.get(self.pos) == Some(&b']') { self.pos += 1; return Ok(()); }
383 loop {
384 self.skip_value()?;
385 self.skip_whitespace();
386 match self.peek_byte()? {
387 b',' => { self.pos += 1; self.skip_whitespace(); }
388 b']' => { self.pos += 1; break; }
389 _ => return Err(err_token()),
390 }
391 }
392 Ok(())
393 }
394
395 fn unescape_from(&mut self, content_start: usize) -> Result<String, Error> {
398 let mut buf: Vec<u8> =
401 Vec::with_capacity(self.input.len().saturating_sub(content_start));
402 buf.extend_from_slice(&self.input[content_start..self.pos]);
403
404 loop {
405 match self.input.get(self.pos) {
406 Some(&b'"') => { self.pos += 1; break; }
407 Some(&b'\\') => {
408 self.pos += 1;
409 let esc = self.input.get(self.pos).copied().ok_or_else(err_eof)?;
410 self.pos += 1;
411 match esc {
412 b'"' => buf.push(b'"'),
413 b'\\' => buf.push(b'\\'),
414 b'/' => buf.push(b'/'),
415 b'n' => buf.push(b'\n'),
416 b't' => buf.push(b'\t'),
417 b'r' => buf.push(b'\r'),
418 b'b' => buf.push(0x08),
419 b'f' => buf.push(0x0C),
420 b'u' => {
421 let hex = self.input.get(self.pos..self.pos + 4).ok_or(Error::InvalidEscape)?;
422 let s = core::str::from_utf8(hex).map_err(|_| Error::InvalidEscape)?;
423 let code = u32::from_str_radix(s, 16).map_err(|_| Error::InvalidEscape)?;
424 let c = if (0xD800..=0xDBFF).contains(&code) {
425 self.pos += 4;
426 if self.input.get(self.pos..self.pos + 2) != Some(b"\\u") {
427 return Err(Error::InvalidEscape);
428 }
429 self.pos += 2;
430 let lo_hex = self.input.get(self.pos..self.pos + 4).ok_or(Error::InvalidEscape)?;
431 let lo_s = core::str::from_utf8(lo_hex).map_err(|_| Error::InvalidEscape)?;
432 let lo = u32::from_str_radix(lo_s, 16).map_err(|_| Error::InvalidEscape)?;
433 self.pos += 4;
434 let combined = 0x10000 + ((code - 0xD800) << 10) + (lo - 0xDC00);
435 char::from_u32(combined).ok_or(Error::InvalidEscape)?
436 } else {
437 self.pos += 4;
438 char::from_u32(code).ok_or(Error::InvalidEscape)?
439 };
440 let mut tmp = [0u8; 4];
441 buf.extend_from_slice(c.encode_utf8(&mut tmp).as_bytes());
442 continue;
443 }
444 _ => return Err(Error::InvalidEscape),
445 }
446 }
447 Some(_) => {
448 let seg_start = self.pos;
449 let stop = simd::find(self.input, self.pos);
450 buf.extend_from_slice(&self.input[seg_start..stop]);
451 self.pos = stop;
452 }
453 None => return Err(err_eof()),
454 }
455 }
456
457 String::from_utf8(buf).map_err(|_| Error::InvalidUtf8)
458 }
459}