1use std::{marker::PhantomData, pin::Pin, ptr::NonNull};
2
3use faststr::FastStr;
4
5use crate::{
6 error::ErrorCode,
7 input::JsonSlice,
8 parser::as_str,
9 util::{private::Sealed, utf8::from_utf8},
10 Error, JsonInput, Result,
11};
12
13pub(crate) struct Position {
14 pub line: usize,
15 pub column: usize,
16}
17
18impl Position {
19 pub(crate) fn from_index(mut i: usize, data: &[u8]) -> Self {
20 i = i.min(data.len());
22 let mut position = Position { line: 1, column: 1 };
23 for ch in &data[..i] {
24 match *ch {
25 b'\n' => {
26 position.line += 1;
27 position.column = 1;
28 }
29 _ => {
30 position.column += 1;
31 }
32 }
33 }
34 position
35 }
36}
37
38#[doc(hidden)]
41pub trait Reader<'de>: Sealed {
42 fn remain(&self) -> usize;
43 fn eat(&mut self, n: usize);
44 fn backward(&mut self, n: usize);
45 fn peek_n(&self, n: usize) -> Option<&'de [u8]>;
46 fn peek(&self) -> Option<u8>;
47 fn index(&self) -> usize;
48 fn at(&self, index: usize) -> u8;
49 fn set_index(&mut self, index: usize);
50 fn next_n(&mut self, n: usize) -> Option<&'de [u8]>;
51
52 #[inline(always)]
53 fn next(&mut self) -> Option<u8> {
54 self.peek().inspect(|_| {
55 self.eat(1);
56 })
57 }
58 fn cur_ptr(&mut self) -> *mut u8;
59
60 unsafe fn set_ptr(&mut self, cur: *mut u8);
63 fn slice_unchecked(&self, start: usize, end: usize) -> &'de [u8];
64
65 fn as_u8_slice(&self) -> &'de [u8];
66
67 fn check_utf8_final(&self) -> Result<()>;
68
69 fn next_invalid_utf8(&self) -> usize;
70
71 fn check_invalid_utf8(&mut self);
72
73 fn slice_ref(&self, subset: &'de [u8]) -> JsonSlice<'de>;
74
75 fn origin_input(&self) -> &'de [u8] {
76 self.as_u8_slice()
77 }
78}
79
80enum PinnedInput<'a> {
81 FastStr(Pin<Box<FastStr>>),
82 Slice(&'a [u8]),
83}
84
85impl<'a> PinnedInput<'a> {
86 unsafe fn as_ptr(&self) -> NonNull<[u8]> {
87 match self {
88 Self::FastStr(f) => f.as_bytes().into(),
89 Self::Slice(slice) => (*slice).into(),
90 }
91 }
92
93 fn slice_ref(&self, subset: &'a [u8]) -> JsonSlice<'a> {
94 match self {
95 Self::FastStr(f) => JsonSlice::FastStr(f.slice_ref(as_str(subset))),
96 Self::Slice(_) => JsonSlice::Raw(subset),
97 }
98 }
99}
100
101impl<'a> From<JsonSlice<'a>> for PinnedInput<'a> {
102 fn from(input: JsonSlice<'a>) -> Self {
103 match input {
104 JsonSlice::Raw(slice) => Self::Slice(slice),
105 JsonSlice::FastStr(f) => Self::FastStr(Pin::new(Box::new(f))),
106 }
107 }
108}
109
110pub struct Read<'a> {
135 input: PinnedInput<'a>,
137 slice: NonNull<[u8]>,
138 pub(crate) index: usize,
139 next_invalid_utf8: usize,
141}
142
143impl<'a> Read<'a> {
144 pub fn from<I: JsonInput<'a>>(input: I) -> Self {
146 let need = input.need_utf8_valid();
147 Self::new_in(input.to_json_slice(), need)
148 }
149
150 pub(crate) fn new(slice: &'a [u8], validate_utf8: bool) -> Self {
151 Self::new_in(slice.to_json_slice(), validate_utf8)
152 }
153
154 pub(crate) fn new_in(input: JsonSlice<'a>, validate_utf8: bool) -> Self {
155 let input: PinnedInput<'a> = input.into();
156 let slice = unsafe { input.as_ptr() };
158
159 let next_invalid_utf8 = validate_utf8
161 .then(|| {
162 from_utf8(unsafe { slice.as_ref() })
163 .err()
164 .map(|e| e.offset())
165 })
166 .flatten()
167 .unwrap_or(usize::MAX);
168
169 Self {
170 input,
171 slice,
172 index: 0,
173 next_invalid_utf8,
174 }
175 }
176
177 #[inline(always)]
178 fn slice(&self) -> &'a [u8] {
179 unsafe { self.slice.as_ref() }
180 }
181}
182
183impl<'a> Reader<'a> for Read<'a> {
184 #[inline(always)]
185 fn remain(&self) -> usize {
186 self.slice().len() - self.index
187 }
188
189 #[inline(always)]
190 fn slice_ref(&self, subset: &'a [u8]) -> JsonSlice<'a> {
191 self.input.slice_ref(subset)
192 }
193
194 #[inline(always)]
195 fn peek_n(&self, n: usize) -> Option<&'a [u8]> {
196 let end = self.index + n;
197 (end <= self.slice().len()).then(|| {
198 let ptr = self.slice()[self.index..].as_ptr();
199 unsafe { std::slice::from_raw_parts(ptr, n) }
200 })
201 }
202
203 #[inline(always)]
204 fn set_index(&mut self, index: usize) {
205 self.index = index
206 }
207
208 #[inline(always)]
209 fn peek(&self) -> Option<u8> {
210 if self.index < self.slice().len() {
211 Some(self.slice()[self.index])
212 } else {
213 None
214 }
215 }
216
217 #[inline(always)]
218 fn at(&self, index: usize) -> u8 {
219 self.slice()[index]
220 }
221
222 #[inline(always)]
223 fn next_n(&mut self, n: usize) -> Option<&'a [u8]> {
224 let new_index = self.index + n;
225 if new_index <= self.slice().len() {
226 let ret = &self.slice()[self.index..new_index];
227 self.index = new_index;
228 Some(ret)
229 } else {
230 None
231 }
232 }
233
234 #[inline(always)]
235 fn cur_ptr(&mut self) -> *mut u8 {
236 panic!("should only used in PaddedSliceRead");
237 }
238
239 #[inline(always)]
240 unsafe fn set_ptr(&mut self, _cur: *mut u8) {
241 panic!("should only used in PaddedSliceRead");
242 }
243
244 #[inline(always)]
245 fn index(&self) -> usize {
246 self.index
247 }
248
249 #[inline(always)]
250 fn eat(&mut self, n: usize) {
251 self.index += n;
252 }
253
254 #[inline(always)]
255 fn backward(&mut self, n: usize) {
256 self.index -= n;
257 }
258
259 #[inline(always)]
260 fn slice_unchecked(&self, start: usize, end: usize) -> &'a [u8] {
261 &self.slice()[start..end]
262 }
263
264 #[inline(always)]
265 fn as_u8_slice(&self) -> &'a [u8] {
266 self.slice()
267 }
268
269 #[inline(always)]
270 fn check_utf8_final(&self) -> Result<()> {
271 if self.next_invalid_utf8 == usize::MAX {
272 Ok(())
273 } else {
274 Err(Error::syntax(
275 ErrorCode::InvalidUTF8,
276 self.origin_input(),
277 self.next_invalid_utf8,
278 ))
279 }
280 }
281
282 fn check_invalid_utf8(&mut self) {
283 self.next_invalid_utf8 = match from_utf8(&self.origin_input()[self.index..]) {
284 Ok(_) => usize::MAX,
285 Err(e) => self.index + e.offset(),
286 };
287 }
288
289 fn next_invalid_utf8(&self) -> usize {
290 self.next_invalid_utf8
291 }
292}
293
294pub(crate) struct PaddedSliceRead<'a> {
295 base: NonNull<u8>,
296 cur: NonNull<u8>,
297 len: usize,
298 origin: &'a [u8],
299 _life: PhantomData<&'a mut [u8]>,
300}
301
302impl<'a> PaddedSliceRead<'a> {
303 const PADDING_SIZE: usize = 64;
304 pub fn new(buffer: &'a mut [u8], json: &'a [u8]) -> Self {
305 let base = unsafe { NonNull::new_unchecked(buffer.as_mut_ptr()) };
306 Self {
307 base,
308 cur: base,
309 len: buffer.len() - Self::PADDING_SIZE,
310 origin: json,
311 _life: PhantomData,
312 }
313 }
314}
315
316impl<'a> Reader<'a> for PaddedSliceRead<'a> {
317 #[inline(always)]
318 fn as_u8_slice(&self) -> &'a [u8] {
319 unsafe { std::slice::from_raw_parts(self.base.as_ptr(), self.len) }
320 }
321
322 #[inline(always)]
323 fn slice_ref(&self, subset: &'a [u8]) -> JsonSlice<'a> {
324 subset.into()
325 }
326
327 #[inline(always)]
328 fn remain(&self) -> usize {
329 let remain = self.len as isize - self.index() as isize;
330 std::cmp::max(remain, 0) as usize
331 }
332
333 #[inline(always)]
334 fn peek_n(&self, n: usize) -> Option<&'a [u8]> {
335 unsafe { Some(std::slice::from_raw_parts(self.cur.as_ptr(), n)) }
336 }
337
338 #[inline(always)]
339 fn set_index(&mut self, index: usize) {
340 unsafe { self.cur = NonNull::new_unchecked(self.base.as_ptr().add(index)) }
341 }
342
343 #[inline(always)]
344 fn peek(&self) -> Option<u8> {
345 unsafe { Some(*self.cur.as_ptr()) }
346 }
347
348 #[inline(always)]
349 fn at(&self, index: usize) -> u8 {
350 unsafe { *(self.base.as_ptr().add(index)) }
351 }
352
353 #[inline(always)]
354 fn next_n(&mut self, n: usize) -> Option<&'a [u8]> {
355 unsafe {
356 let ptr = self.cur.as_ptr();
357 self.cur = NonNull::new_unchecked(ptr.add(n));
358 Some(std::slice::from_raw_parts(ptr, n))
359 }
360 }
361
362 #[inline(always)]
363 fn index(&self) -> usize {
364 unsafe { self.cur.as_ptr().offset_from(self.base.as_ptr()) as usize }
365 }
366
367 fn eat(&mut self, n: usize) {
368 unsafe {
369 self.cur = NonNull::new_unchecked(self.cur.as_ptr().add(n));
370 }
371 }
372
373 #[inline(always)]
374 fn cur_ptr(&mut self) -> *mut u8 {
375 self.cur.as_ptr()
376 }
377
378 #[inline(always)]
379 unsafe fn set_ptr(&mut self, cur: *mut u8) {
380 self.cur = NonNull::new_unchecked(cur);
381 }
382
383 #[inline(always)]
384 fn backward(&mut self, n: usize) {
385 unsafe {
386 self.cur = NonNull::new_unchecked(self.cur.as_ptr().sub(n));
387 }
388 }
389
390 #[inline(always)]
391 fn slice_unchecked(&self, start: usize, end: usize) -> &'a [u8] {
392 unsafe {
393 let ptr = self.base.as_ptr().add(start);
394 let n = end - start;
395 std::slice::from_raw_parts(ptr, n)
396 }
397 }
398
399 #[inline(always)]
400 fn check_invalid_utf8(&mut self) {
401 }
403
404 #[inline(always)]
405 fn next_invalid_utf8(&self) -> usize {
406 usize::MAX
407 }
408
409 #[inline(always)]
410 fn check_utf8_final(&self) -> Result<()> {
411 Ok(())
412 }
413
414 #[inline(always)]
415 fn origin_input(&self) -> &'a [u8] {
416 self.origin
417 }
418}
419
420#[cfg(test)]
421mod test {
422 use bytes::Bytes;
423 use faststr::FastStr;
424
425 use super::*;
426 use crate::{Deserialize, Deserializer};
427 fn test_peek() {
428 let data = b"1234567890";
429 let reader = Read::new(data, false);
430 assert_eq!(reader.peek(), Some(b'1'));
431 assert_eq!(reader.peek_n(4).unwrap(), &b"1234"[..]);
432 }
433
434 fn test_next() {
435 let data = b"1234567890";
436 let mut reader = Read::new(data, false);
437 assert_eq!(reader.next(), Some(b'1'));
438 assert_eq!(reader.peek(), Some(b'2'));
439 assert_eq!(reader.next_n(4).unwrap(), &b"2345"[..]);
440 assert_eq!(reader.peek(), Some(b'6'));
441 }
442
443 fn test_index() {
444 let data = b"1234567890";
445 let mut reader = Read::new(data, false);
446 assert_eq!(reader.index(), 0);
447
448 reader.next().unwrap();
449 assert_eq!(reader.index(), 1);
450
451 reader.next_n(4).unwrap();
452 assert_eq!(reader.index(), 5);
453 }
454
455 #[test]
456 fn test_reader() {
457 test_peek();
458 test_next();
459 test_index();
460 }
461
462 macro_rules! test_deserialize_reader {
463 ($json:expr) => {
464 let mut de = Deserializer::new(Read::from($json));
465 let num: i32 = Deserialize::deserialize(&mut de).unwrap();
466 assert_eq!(num, 123);
467 };
468 }
469
470 #[test]
471 fn test_deserialize() {
472 let b = Bytes::from(r#"123"#);
473 let f = FastStr::from(r#"123"#);
474 let s = String::from(r#"123"#);
475 test_deserialize_reader!(r#"123"#);
476 test_deserialize_reader!(r#"123"#.as_bytes());
477 test_deserialize_reader!(&b);
478 test_deserialize_reader!(&f);
479 test_deserialize_reader!(&s);
480 }
481}