1use std::{marker::PhantomData, pin::Pin, ptr::NonNull};
2
3use faststr::FastStr;
4
5use crate::{
6 error::ErrorCode,
7 input::JsonSlice,
8 parser::as_str,
9 util::{private::Sealed, utf8::from_utf8},
10 Error, JsonInput, Result,
11};
12
13pub(crate) struct Position {
14 pub line: usize,
15 pub column: usize,
16}
17
18impl Position {
19 pub(crate) fn from_index(mut i: usize, data: &[u8]) -> Self {
20 i = i.min(data.len());
22 let mut position = Position { line: 1, column: 1 };
23 for ch in &data[..i] {
24 match *ch {
25 b'\n' => {
26 position.line += 1;
27 position.column = 1;
28 }
29 _ => {
30 position.column += 1;
31 }
32 }
33 }
34 position
35 }
36}
37
38#[doc(hidden)]
41pub trait Reader<'de>: Sealed {
42 fn remain(&self) -> usize;
43 fn eat(&mut self, n: usize);
44 fn backward(&mut self, n: usize);
45 fn peek_n(&self, n: usize) -> Option<&'de [u8]>;
46 fn peek(&self) -> Option<u8>;
47 fn index(&self) -> usize;
48 fn at(&self, index: usize) -> u8;
49 fn set_index(&mut self, index: usize);
50 fn next_n(&mut self, n: usize) -> Option<&'de [u8]>;
51
52 #[inline(always)]
53 fn next(&mut self) -> Option<u8> {
54 self.peek().inspect(|_| {
55 self.eat(1);
56 })
57 }
58 fn cur_ptr(&mut self) -> *mut u8;
59
60 unsafe fn set_ptr(&mut self, cur: *mut u8);
63 fn slice_unchecked(&self, start: usize, end: usize) -> &'de [u8];
64
65 fn as_u8_slice(&self) -> &'de [u8];
66
67 fn check_utf8_final(&self) -> Result<()>;
68
69 fn next_invalid_utf8(&self) -> usize;
70
71 fn check_invalid_utf8(&mut self);
72
73 fn slice_ref(&self, subset: &'de [u8]) -> JsonSlice<'de>;
74
75 fn origin_input(&self) -> &'de [u8] {
76 self.as_u8_slice()
77 }
78}
79
80enum PinnedInput<'a> {
81 FastStr(Pin<Box<FastStr>>),
82 Slice(&'a [u8]),
83}
84
85impl<'a> PinnedInput<'a> {
86 unsafe fn as_ptr(&self) -> NonNull<[u8]> {
87 match self {
88 Self::FastStr(f) => f.as_bytes().into(),
89 Self::Slice(slice) => (*slice).into(),
90 }
91 }
92
93 fn slice_ref(&self, subset: &'a [u8]) -> JsonSlice<'a> {
94 match self {
95 Self::FastStr(f) => JsonSlice::FastStr(f.slice_ref(as_str(subset))),
96 Self::Slice(_) => JsonSlice::Raw(subset),
97 }
98 }
99}
100
101impl<'a> From<JsonSlice<'a>> for PinnedInput<'a> {
102 fn from(input: JsonSlice<'a>) -> Self {
103 match input {
104 JsonSlice::Raw(slice) => Self::Slice(slice),
105 JsonSlice::FastStr(f) => Self::FastStr(Pin::new(Box::new(f))),
106 }
107 }
108}
109
110pub struct Read<'a> {
135 input: PinnedInput<'a>,
137 pub(crate) index: usize,
138 next_invalid_utf8: usize,
140}
141
142impl<'a> Read<'a> {
143 pub fn from<I: JsonInput<'a>>(input: I) -> Self {
145 let need = input.need_utf8_valid();
146 Self::new_in(input.to_json_slice(), need)
147 }
148
149 pub(crate) fn new(slice: &'a [u8], validate_utf8: bool) -> Self {
150 Self::new_in(slice.to_json_slice(), validate_utf8)
151 }
152
153 pub(crate) fn new_in(input: JsonSlice<'a>, validate_utf8: bool) -> Self {
154 let input: PinnedInput<'a> = input.into();
155 let slice: NonNull<[u8]> = unsafe { input.as_ptr() };
157
158 let next_invalid_utf8 = validate_utf8
160 .then(|| {
161 from_utf8(unsafe { slice.as_ref() })
162 .err()
163 .map(|e| e.offset())
164 })
165 .flatten()
166 .unwrap_or(usize::MAX);
167
168 Self {
169 input,
170 index: 0,
171 next_invalid_utf8,
172 }
173 }
174
175 #[inline(always)]
176 fn slice(&self) -> &'a [u8] {
177 unsafe { self.input.as_ptr().as_ref() }
178 }
179}
180
181impl<'a> Reader<'a> for Read<'a> {
182 #[inline(always)]
183 fn remain(&self) -> usize {
184 self.slice().len() - self.index
185 }
186
187 #[inline(always)]
188 fn slice_ref(&self, subset: &'a [u8]) -> JsonSlice<'a> {
189 self.input.slice_ref(subset)
190 }
191
192 #[inline(always)]
193 fn peek_n(&self, n: usize) -> Option<&'a [u8]> {
194 let end = self.index + n;
195 (end <= self.slice().len()).then(|| {
196 let ptr = self.slice()[self.index..].as_ptr();
197 unsafe { std::slice::from_raw_parts(ptr, n) }
198 })
199 }
200
201 #[inline(always)]
202 fn set_index(&mut self, index: usize) {
203 self.index = index
204 }
205
206 #[inline(always)]
207 fn peek(&self) -> Option<u8> {
208 if self.index < self.slice().len() {
209 Some(self.slice()[self.index])
210 } else {
211 None
212 }
213 }
214
215 #[inline(always)]
216 fn at(&self, index: usize) -> u8 {
217 self.slice()[index]
218 }
219
220 #[inline(always)]
221 fn next_n(&mut self, n: usize) -> Option<&'a [u8]> {
222 let new_index = self.index + n;
223 if new_index <= self.slice().len() {
224 let ret = &self.slice()[self.index..new_index];
225 self.index = new_index;
226 Some(ret)
227 } else {
228 None
229 }
230 }
231
232 #[inline(always)]
233 fn cur_ptr(&mut self) -> *mut u8 {
234 panic!("should only used in PaddedSliceRead");
235 }
236
237 #[inline(always)]
238 unsafe fn set_ptr(&mut self, _cur: *mut u8) {
239 panic!("should only used in PaddedSliceRead");
240 }
241
242 #[inline(always)]
243 fn index(&self) -> usize {
244 self.index
245 }
246
247 #[inline(always)]
248 fn eat(&mut self, n: usize) {
249 self.index += n;
250 }
251
252 #[inline(always)]
253 fn backward(&mut self, n: usize) {
254 self.index -= n;
255 }
256
257 #[inline(always)]
258 fn slice_unchecked(&self, start: usize, end: usize) -> &'a [u8] {
259 &self.slice()[start..end]
260 }
261
262 #[inline(always)]
263 fn as_u8_slice(&self) -> &'a [u8] {
264 self.slice()
265 }
266
267 #[inline(always)]
268 fn check_utf8_final(&self) -> Result<()> {
269 if self.next_invalid_utf8 == usize::MAX {
270 Ok(())
271 } else {
272 Err(Error::syntax(
273 ErrorCode::InvalidUTF8,
274 self.origin_input(),
275 self.next_invalid_utf8,
276 ))
277 }
278 }
279
280 fn check_invalid_utf8(&mut self) {
281 self.next_invalid_utf8 = match from_utf8(&self.origin_input()[self.index..]) {
282 Ok(_) => usize::MAX,
283 Err(e) => self.index + e.offset(),
284 };
285 }
286
287 fn next_invalid_utf8(&self) -> usize {
288 self.next_invalid_utf8
289 }
290}
291
292pub(crate) struct PaddedSliceRead<'a> {
293 base: NonNull<u8>,
294 cur: NonNull<u8>,
295 len: usize,
296 origin: &'a [u8],
297 _life: PhantomData<&'a mut [u8]>,
298}
299
300impl<'a> PaddedSliceRead<'a> {
301 const PADDING_SIZE: usize = 64;
302 pub fn new(buffer: &'a mut [u8], json: &'a [u8]) -> Self {
303 let base = unsafe { NonNull::new_unchecked(buffer.as_mut_ptr()) };
304 Self {
305 base,
306 cur: base,
307 len: buffer.len() - Self::PADDING_SIZE,
308 origin: json,
309 _life: PhantomData,
310 }
311 }
312}
313
314impl<'a> Reader<'a> for PaddedSliceRead<'a> {
315 #[inline(always)]
316 fn as_u8_slice(&self) -> &'a [u8] {
317 unsafe { std::slice::from_raw_parts(self.base.as_ptr(), self.len) }
318 }
319
320 #[inline(always)]
321 fn slice_ref(&self, subset: &'a [u8]) -> JsonSlice<'a> {
322 subset.into()
323 }
324
325 #[inline(always)]
326 fn remain(&self) -> usize {
327 let remain = self.len as isize - self.index() as isize;
328 std::cmp::max(remain, 0) as usize
329 }
330
331 #[inline(always)]
332 fn peek_n(&self, n: usize) -> Option<&'a [u8]> {
333 unsafe { Some(std::slice::from_raw_parts(self.cur.as_ptr(), n)) }
334 }
335
336 #[inline(always)]
337 fn set_index(&mut self, index: usize) {
338 unsafe { self.cur = NonNull::new_unchecked(self.base.as_ptr().add(index)) }
339 }
340
341 #[inline(always)]
342 fn peek(&self) -> Option<u8> {
343 unsafe { Some(*self.cur.as_ptr()) }
344 }
345
346 #[inline(always)]
347 fn at(&self, index: usize) -> u8 {
348 unsafe { *(self.base.as_ptr().add(index)) }
349 }
350
351 #[inline(always)]
352 fn next_n(&mut self, n: usize) -> Option<&'a [u8]> {
353 unsafe {
354 let ptr = self.cur.as_ptr();
355 self.cur = NonNull::new_unchecked(ptr.add(n));
356 Some(std::slice::from_raw_parts(ptr, n))
357 }
358 }
359
360 #[inline(always)]
361 fn index(&self) -> usize {
362 unsafe { self.cur.as_ptr().offset_from(self.base.as_ptr()) as usize }
363 }
364
365 fn eat(&mut self, n: usize) {
366 unsafe {
367 self.cur = NonNull::new_unchecked(self.cur.as_ptr().add(n));
368 }
369 }
370
371 #[inline(always)]
372 fn cur_ptr(&mut self) -> *mut u8 {
373 self.cur.as_ptr()
374 }
375
376 #[inline(always)]
377 unsafe fn set_ptr(&mut self, cur: *mut u8) {
378 self.cur = NonNull::new_unchecked(cur);
379 }
380
381 #[inline(always)]
382 fn backward(&mut self, n: usize) {
383 unsafe {
384 self.cur = NonNull::new_unchecked(self.cur.as_ptr().sub(n));
385 }
386 }
387
388 #[inline(always)]
389 fn slice_unchecked(&self, start: usize, end: usize) -> &'a [u8] {
390 unsafe {
391 let ptr = self.base.as_ptr().add(start);
392 let n = end - start;
393 std::slice::from_raw_parts(ptr, n)
394 }
395 }
396
397 #[inline(always)]
398 fn check_invalid_utf8(&mut self) {
399 }
401
402 #[inline(always)]
403 fn next_invalid_utf8(&self) -> usize {
404 usize::MAX
405 }
406
407 #[inline(always)]
408 fn check_utf8_final(&self) -> Result<()> {
409 Ok(())
410 }
411
412 #[inline(always)]
413 fn origin_input(&self) -> &'a [u8] {
414 self.origin
415 }
416}
417
418#[cfg(test)]
419mod test {
420 use bytes::Bytes;
421 use faststr::FastStr;
422
423 use super::*;
424 use crate::{Deserialize, Deserializer};
425 fn test_peek() {
426 let data = b"1234567890";
427 let reader = Read::new(data, false);
428 assert_eq!(reader.peek(), Some(b'1'));
429 assert_eq!(reader.peek_n(4).unwrap(), &b"1234"[..]);
430 }
431
432 fn test_next() {
433 let data = b"1234567890";
434 let mut reader = Read::new(data, false);
435 assert_eq!(reader.next(), Some(b'1'));
436 assert_eq!(reader.peek(), Some(b'2'));
437 assert_eq!(reader.next_n(4).unwrap(), &b"2345"[..]);
438 assert_eq!(reader.peek(), Some(b'6'));
439 }
440
441 fn test_index() {
442 let data = b"1234567890";
443 let mut reader = Read::new(data, false);
444 assert_eq!(reader.index(), 0);
445
446 reader.next().unwrap();
447 assert_eq!(reader.index(), 1);
448
449 reader.next_n(4).unwrap();
450 assert_eq!(reader.index(), 5);
451 }
452
453 #[test]
454 fn test_reader() {
455 test_peek();
456 test_next();
457 test_index();
458 }
459
460 macro_rules! test_deserialize_reader {
461 ($json:expr) => {
462 let mut de = Deserializer::new(Read::from($json));
463 let num: i32 = Deserialize::deserialize(&mut de).unwrap();
464 assert_eq!(num, 123);
465 };
466 }
467
468 #[test]
469 fn test_deserialize() {
470 let b = Bytes::from(r#"123"#);
471 let f = FastStr::from(r#"123"#);
472 let s = String::from(r#"123"#);
473 test_deserialize_reader!(r#"123"#);
474 test_deserialize_reader!(r#"123"#.as_bytes());
475 test_deserialize_reader!(&b);
476 test_deserialize_reader!(&f);
477 test_deserialize_reader!(&s);
478 }
479}