Skip to main content

sonic_rs/
reader.rs

1use std::{marker::PhantomData, pin::Pin, ptr::NonNull};
2
3use faststr::FastStr;
4
5use crate::{
6    error::ErrorCode,
7    input::JsonSlice,
8    parser::as_str,
9    util::{private::Sealed, utf8::from_utf8},
10    Error, JsonInput, Result,
11};
12
13pub(crate) struct Position {
14    pub line: usize,
15    pub column: usize,
16}
17
18impl Position {
19    pub(crate) fn from_index(mut i: usize, data: &[u8]) -> Self {
20        // i must not exceed the length of data
21        i = i.min(data.len());
22        let mut position = Position { line: 1, column: 1 };
23        for ch in &data[..i] {
24            match *ch {
25                b'\n' => {
26                    position.line += 1;
27                    position.column = 1;
28                }
29                _ => {
30                    position.column += 1;
31                }
32            }
33        }
34        position
35    }
36}
37
38/// Trait is used by the deserializer for iterating over input. And it is sealed and cannot be
39/// implemented for types outside of sonic_rs.
40#[doc(hidden)]
41pub trait Reader<'de>: Sealed {
42    fn remain(&self) -> usize;
43    fn eat(&mut self, n: usize);
44    fn backward(&mut self, n: usize);
45    fn peek_n(&self, n: usize) -> Option<&'de [u8]>;
46    fn peek(&self) -> Option<u8>;
47    fn index(&self) -> usize;
48    fn at(&self, index: usize) -> u8;
49    fn set_index(&mut self, index: usize);
50    fn next_n(&mut self, n: usize) -> Option<&'de [u8]>;
51
52    #[inline(always)]
53    fn next(&mut self) -> Option<u8> {
54        self.peek().inspect(|_| {
55            self.eat(1);
56        })
57    }
58    fn cur_ptr(&mut self) -> *mut u8;
59
60    /// # Safety
61    /// cur must be a valid pointer in the slice
62    unsafe fn set_ptr(&mut self, cur: *mut u8);
63    fn slice_unchecked(&self, start: usize, end: usize) -> &'de [u8];
64
65    fn as_u8_slice(&self) -> &'de [u8];
66
67    fn check_utf8_final(&self) -> Result<()>;
68
69    fn next_invalid_utf8(&self) -> usize;
70
71    fn check_invalid_utf8(&mut self);
72
73    fn slice_ref(&self, subset: &'de [u8]) -> JsonSlice<'de>;
74
75    fn origin_input(&self) -> &'de [u8] {
76        self.as_u8_slice()
77    }
78}
79
80enum PinnedInput<'a> {
81    FastStr(Pin<Box<FastStr>>),
82    Slice(&'a [u8]),
83}
84
85impl<'a> PinnedInput<'a> {
86    unsafe fn as_ptr(&self) -> NonNull<[u8]> {
87        match self {
88            Self::FastStr(f) => f.as_bytes().into(),
89            Self::Slice(slice) => (*slice).into(),
90        }
91    }
92
93    fn slice_ref(&self, subset: &'a [u8]) -> JsonSlice<'a> {
94        match self {
95            Self::FastStr(f) => JsonSlice::FastStr(f.slice_ref(as_str(subset))),
96            Self::Slice(_) => JsonSlice::Raw(subset),
97        }
98    }
99}
100
101impl<'a> From<JsonSlice<'a>> for PinnedInput<'a> {
102    fn from(input: JsonSlice<'a>) -> Self {
103        match input {
104            JsonSlice::Raw(slice) => Self::Slice(slice),
105            JsonSlice::FastStr(f) => Self::FastStr(Pin::new(Box::new(f))),
106        }
107    }
108}
109
110/// JSON input source that reads from a string/bytes-like JSON input.
111///
112/// Support most common types: &str, &[u8], &FastStr, &Bytes and &String
113///
114/// # Examples
115/// ```
116/// use bytes::Bytes;
117/// use faststr::FastStr;
118/// use serde::de::Deserialize;
119/// use sonic_rs::{Deserializer, Read};
120///
121/// let mut de = Deserializer::new(Read::from(r#"123"#));
122/// let num: i32 = Deserialize::deserialize(&mut de).unwrap();
123/// assert_eq!(num, 123);
124///
125/// let mut de = Deserializer::new(Read::from(r#"123"#.as_bytes()));
126/// let num: i32 = Deserialize::deserialize(&mut de).unwrap();
127/// assert_eq!(num, 123);
128///
129/// let f = FastStr::new("123");
130/// let mut de = Deserializer::new(Read::from(&f));
131/// let num: i32 = Deserialize::deserialize(&mut de).unwrap();
132/// assert_eq!(num, 123);
133/// ```
134pub struct Read<'a> {
135    // pin the input JSON, because `slice` will reference it
136    input: PinnedInput<'a>,
137    pub(crate) index: usize,
138    // next invalid utf8 position, if not found, will be usize::MAX
139    next_invalid_utf8: usize,
140}
141
142impl<'a> Read<'a> {
143    /// Make a `Read` from string/bytes-like JSON input.
144    pub fn from<I: JsonInput<'a>>(input: I) -> Self {
145        let need = input.need_utf8_valid();
146        Self::new_in(input.to_json_slice(), need)
147    }
148
149    pub(crate) fn new(slice: &'a [u8], validate_utf8: bool) -> Self {
150        Self::new_in(slice.to_json_slice(), validate_utf8)
151    }
152
153    pub(crate) fn new_in(input: JsonSlice<'a>, validate_utf8: bool) -> Self {
154        let input: PinnedInput<'a> = input.into();
155        // #safety: we pinned the input json
156        let slice: NonNull<[u8]> = unsafe { input.as_ptr() };
157
158        // validate the utf-8 at first for slice
159        let next_invalid_utf8 = validate_utf8
160            .then(|| {
161                from_utf8(unsafe { slice.as_ref() })
162                    .err()
163                    .map(|e| e.offset())
164            })
165            .flatten()
166            .unwrap_or(usize::MAX);
167
168        Self {
169            input,
170            index: 0,
171            next_invalid_utf8,
172        }
173    }
174
175    #[inline(always)]
176    fn slice(&self) -> &'a [u8] {
177        unsafe { self.input.as_ptr().as_ref() }
178    }
179}
180
181impl<'a> Reader<'a> for Read<'a> {
182    #[inline(always)]
183    fn remain(&self) -> usize {
184        self.slice().len() - self.index
185    }
186
187    #[inline(always)]
188    fn slice_ref(&self, subset: &'a [u8]) -> JsonSlice<'a> {
189        self.input.slice_ref(subset)
190    }
191
192    #[inline(always)]
193    fn peek_n(&self, n: usize) -> Option<&'a [u8]> {
194        let end = self.index + n;
195        (end <= self.slice().len()).then(|| {
196            let ptr = self.slice()[self.index..].as_ptr();
197            unsafe { std::slice::from_raw_parts(ptr, n) }
198        })
199    }
200
201    #[inline(always)]
202    fn set_index(&mut self, index: usize) {
203        self.index = index
204    }
205
206    #[inline(always)]
207    fn peek(&self) -> Option<u8> {
208        if self.index < self.slice().len() {
209            Some(self.slice()[self.index])
210        } else {
211            None
212        }
213    }
214
215    #[inline(always)]
216    fn at(&self, index: usize) -> u8 {
217        self.slice()[index]
218    }
219
220    #[inline(always)]
221    fn next_n(&mut self, n: usize) -> Option<&'a [u8]> {
222        let new_index = self.index + n;
223        if new_index <= self.slice().len() {
224            let ret = &self.slice()[self.index..new_index];
225            self.index = new_index;
226            Some(ret)
227        } else {
228            None
229        }
230    }
231
232    #[inline(always)]
233    fn cur_ptr(&mut self) -> *mut u8 {
234        panic!("should only used in PaddedSliceRead");
235    }
236
237    #[inline(always)]
238    unsafe fn set_ptr(&mut self, _cur: *mut u8) {
239        panic!("should only used in PaddedSliceRead");
240    }
241
242    #[inline(always)]
243    fn index(&self) -> usize {
244        self.index
245    }
246
247    #[inline(always)]
248    fn eat(&mut self, n: usize) {
249        self.index += n;
250    }
251
252    #[inline(always)]
253    fn backward(&mut self, n: usize) {
254        self.index -= n;
255    }
256
257    #[inline(always)]
258    fn slice_unchecked(&self, start: usize, end: usize) -> &'a [u8] {
259        &self.slice()[start..end]
260    }
261
262    #[inline(always)]
263    fn as_u8_slice(&self) -> &'a [u8] {
264        self.slice()
265    }
266
267    #[inline(always)]
268    fn check_utf8_final(&self) -> Result<()> {
269        if self.next_invalid_utf8 == usize::MAX {
270            Ok(())
271        } else {
272            Err(Error::syntax(
273                ErrorCode::InvalidUTF8,
274                self.origin_input(),
275                self.next_invalid_utf8,
276            ))
277        }
278    }
279
280    fn check_invalid_utf8(&mut self) {
281        self.next_invalid_utf8 = match from_utf8(&self.origin_input()[self.index..]) {
282            Ok(_) => usize::MAX,
283            Err(e) => self.index + e.offset(),
284        };
285    }
286
287    fn next_invalid_utf8(&self) -> usize {
288        self.next_invalid_utf8
289    }
290}
291
292pub(crate) struct PaddedSliceRead<'a> {
293    base: NonNull<u8>,
294    cur: NonNull<u8>,
295    len: usize,
296    origin: &'a [u8],
297    _life: PhantomData<&'a mut [u8]>,
298}
299
300impl<'a> PaddedSliceRead<'a> {
301    const PADDING_SIZE: usize = 64;
302    pub fn new(buffer: &'a mut [u8], json: &'a [u8]) -> Self {
303        let base = unsafe { NonNull::new_unchecked(buffer.as_mut_ptr()) };
304        Self {
305            base,
306            cur: base,
307            len: buffer.len() - Self::PADDING_SIZE,
308            origin: json,
309            _life: PhantomData,
310        }
311    }
312}
313
314impl<'a> Reader<'a> for PaddedSliceRead<'a> {
315    #[inline(always)]
316    fn as_u8_slice(&self) -> &'a [u8] {
317        unsafe { std::slice::from_raw_parts(self.base.as_ptr(), self.len) }
318    }
319
320    #[inline(always)]
321    fn slice_ref(&self, subset: &'a [u8]) -> JsonSlice<'a> {
322        subset.into()
323    }
324
325    #[inline(always)]
326    fn remain(&self) -> usize {
327        let remain = self.len as isize - self.index() as isize;
328        std::cmp::max(remain, 0) as usize
329    }
330
331    #[inline(always)]
332    fn peek_n(&self, n: usize) -> Option<&'a [u8]> {
333        unsafe { Some(std::slice::from_raw_parts(self.cur.as_ptr(), n)) }
334    }
335
336    #[inline(always)]
337    fn set_index(&mut self, index: usize) {
338        unsafe { self.cur = NonNull::new_unchecked(self.base.as_ptr().add(index)) }
339    }
340
341    #[inline(always)]
342    fn peek(&self) -> Option<u8> {
343        unsafe { Some(*self.cur.as_ptr()) }
344    }
345
346    #[inline(always)]
347    fn at(&self, index: usize) -> u8 {
348        unsafe { *(self.base.as_ptr().add(index)) }
349    }
350
351    #[inline(always)]
352    fn next_n(&mut self, n: usize) -> Option<&'a [u8]> {
353        unsafe {
354            let ptr = self.cur.as_ptr();
355            self.cur = NonNull::new_unchecked(ptr.add(n));
356            Some(std::slice::from_raw_parts(ptr, n))
357        }
358    }
359
360    #[inline(always)]
361    fn index(&self) -> usize {
362        unsafe { self.cur.as_ptr().offset_from(self.base.as_ptr()) as usize }
363    }
364
365    fn eat(&mut self, n: usize) {
366        unsafe {
367            self.cur = NonNull::new_unchecked(self.cur.as_ptr().add(n));
368        }
369    }
370
371    #[inline(always)]
372    fn cur_ptr(&mut self) -> *mut u8 {
373        self.cur.as_ptr()
374    }
375
376    #[inline(always)]
377    unsafe fn set_ptr(&mut self, cur: *mut u8) {
378        self.cur = NonNull::new_unchecked(cur);
379    }
380
381    #[inline(always)]
382    fn backward(&mut self, n: usize) {
383        unsafe {
384            self.cur = NonNull::new_unchecked(self.cur.as_ptr().sub(n));
385        }
386    }
387
388    #[inline(always)]
389    fn slice_unchecked(&self, start: usize, end: usize) -> &'a [u8] {
390        unsafe {
391            let ptr = self.base.as_ptr().add(start);
392            let n = end - start;
393            std::slice::from_raw_parts(ptr, n)
394        }
395    }
396
397    #[inline(always)]
398    fn check_invalid_utf8(&mut self) {
399        /* need to nothing here */
400    }
401
402    #[inline(always)]
403    fn next_invalid_utf8(&self) -> usize {
404        usize::MAX
405    }
406
407    #[inline(always)]
408    fn check_utf8_final(&self) -> Result<()> {
409        Ok(())
410    }
411
412    #[inline(always)]
413    fn origin_input(&self) -> &'a [u8] {
414        self.origin
415    }
416}
417
418#[cfg(test)]
419mod test {
420    use bytes::Bytes;
421    use faststr::FastStr;
422
423    use super::*;
424    use crate::{Deserialize, Deserializer};
425    fn test_peek() {
426        let data = b"1234567890";
427        let reader = Read::new(data, false);
428        assert_eq!(reader.peek(), Some(b'1'));
429        assert_eq!(reader.peek_n(4).unwrap(), &b"1234"[..]);
430    }
431
432    fn test_next() {
433        let data = b"1234567890";
434        let mut reader = Read::new(data, false);
435        assert_eq!(reader.next(), Some(b'1'));
436        assert_eq!(reader.peek(), Some(b'2'));
437        assert_eq!(reader.next_n(4).unwrap(), &b"2345"[..]);
438        assert_eq!(reader.peek(), Some(b'6'));
439    }
440
441    fn test_index() {
442        let data = b"1234567890";
443        let mut reader = Read::new(data, false);
444        assert_eq!(reader.index(), 0);
445
446        reader.next().unwrap();
447        assert_eq!(reader.index(), 1);
448
449        reader.next_n(4).unwrap();
450        assert_eq!(reader.index(), 5);
451    }
452
453    #[test]
454    fn test_reader() {
455        test_peek();
456        test_next();
457        test_index();
458    }
459
460    macro_rules! test_deserialize_reader {
461        ($json:expr) => {
462            let mut de = Deserializer::new(Read::from($json));
463            let num: i32 = Deserialize::deserialize(&mut de).unwrap();
464            assert_eq!(num, 123);
465        };
466    }
467
468    #[test]
469    fn test_deserialize() {
470        let b = Bytes::from(r#"123"#);
471        let f = FastStr::from(r#"123"#);
472        let s = String::from(r#"123"#);
473        test_deserialize_reader!(r#"123"#);
474        test_deserialize_reader!(r#"123"#.as_bytes());
475        test_deserialize_reader!(&b);
476        test_deserialize_reader!(&f);
477        test_deserialize_reader!(&s);
478    }
479}