Skip to main content

sonic_rs/
reader.rs

1use std::{marker::PhantomData, pin::Pin, ptr::NonNull};
2
3use faststr::FastStr;
4
5use crate::{
6    error::ErrorCode,
7    input::JsonSlice,
8    parser::as_str,
9    util::{private::Sealed, utf8::from_utf8},
10    Error, JsonInput, Result,
11};
12
13pub(crate) struct Position {
14    pub line: usize,
15    pub column: usize,
16}
17
18impl Position {
19    pub(crate) fn from_index(mut i: usize, data: &[u8]) -> Self {
20        // i must not exceed the length of data
21        i = i.min(data.len());
22        let mut position = Position { line: 1, column: 1 };
23        for ch in &data[..i] {
24            match *ch {
25                b'\n' => {
26                    position.line += 1;
27                    position.column = 1;
28                }
29                _ => {
30                    position.column += 1;
31                }
32            }
33        }
34        position
35    }
36}
37
38/// Trait is used by the deserializer for iterating over input. And it is sealed and cannot be
39/// implemented for types outside of sonic_rs.
40#[doc(hidden)]
41pub trait Reader<'de>: Sealed {
42    fn remain(&self) -> usize;
43    fn eat(&mut self, n: usize);
44    fn backward(&mut self, n: usize);
45    fn peek_n(&self, n: usize) -> Option<&'de [u8]>;
46    fn peek(&self) -> Option<u8>;
47    fn index(&self) -> usize;
48    fn at(&self, index: usize) -> u8;
49    fn set_index(&mut self, index: usize);
50    fn next_n(&mut self, n: usize) -> Option<&'de [u8]>;
51
52    #[inline(always)]
53    fn next(&mut self) -> Option<u8> {
54        self.peek().inspect(|_| {
55            self.eat(1);
56        })
57    }
58    fn cur_ptr(&mut self) -> *mut u8;
59
60    /// # Safety
61    /// cur must be a valid pointer in the slice
62    unsafe fn set_ptr(&mut self, cur: *mut u8);
63    fn slice_unchecked(&self, start: usize, end: usize) -> &'de [u8];
64
65    fn as_u8_slice(&self) -> &'de [u8];
66
67    fn check_utf8_final(&self) -> Result<()>;
68
69    fn next_invalid_utf8(&self) -> usize;
70
71    fn check_invalid_utf8(&mut self);
72
73    fn slice_ref(&self, subset: &'de [u8]) -> JsonSlice<'de>;
74
75    fn origin_input(&self) -> &'de [u8] {
76        self.as_u8_slice()
77    }
78}
79
80enum PinnedInput<'a> {
81    FastStr(Pin<Box<FastStr>>),
82    Slice(&'a [u8]),
83}
84
85impl<'a> PinnedInput<'a> {
86    unsafe fn as_ptr(&self) -> NonNull<[u8]> {
87        match self {
88            Self::FastStr(f) => f.as_bytes().into(),
89            Self::Slice(slice) => (*slice).into(),
90        }
91    }
92
93    fn slice_ref(&self, subset: &'a [u8]) -> JsonSlice<'a> {
94        match self {
95            Self::FastStr(f) => JsonSlice::FastStr(f.slice_ref(as_str(subset))),
96            Self::Slice(_) => JsonSlice::Raw(subset),
97        }
98    }
99}
100
101impl<'a> From<JsonSlice<'a>> for PinnedInput<'a> {
102    fn from(input: JsonSlice<'a>) -> Self {
103        match input {
104            JsonSlice::Raw(slice) => Self::Slice(slice),
105            JsonSlice::FastStr(f) => Self::FastStr(Pin::new(Box::new(f))),
106        }
107    }
108}
109
110/// JSON input source that reads from a string/bytes-like JSON input.
111///
112/// Support most common types: &str, &[u8], &FastStr, &Bytes and &String
113///
114/// # Examples
115/// ```
116/// use bytes::Bytes;
117/// use faststr::FastStr;
118/// use serde::de::Deserialize;
119/// use sonic_rs::{Deserializer, Read};
120///
121/// let mut de = Deserializer::new(Read::from(r#"123"#));
122/// let num: i32 = Deserialize::deserialize(&mut de).unwrap();
123/// assert_eq!(num, 123);
124///
125/// let mut de = Deserializer::new(Read::from(r#"123"#.as_bytes()));
126/// let num: i32 = Deserialize::deserialize(&mut de).unwrap();
127/// assert_eq!(num, 123);
128///
129/// let f = FastStr::new("123");
130/// let mut de = Deserializer::new(Read::from(&f));
131/// let num: i32 = Deserialize::deserialize(&mut de).unwrap();
132/// assert_eq!(num, 123);
133/// ```
134pub struct Read<'a> {
135    // pin the input JSON, because `slice` will reference it
136    input: PinnedInput<'a>,
137    pub(crate) index: usize,
138    // next invalid utf8 position, if not found, will be usize::MAX
139    next_invalid_utf8: usize,
140}
141
142impl<'a> Read<'a> {
143    /// Make a `Read` from string/bytes-like JSON input.
144    pub fn from<I: JsonInput<'a>>(input: I) -> Self {
145        let need = input.need_utf8_valid();
146        Self::new_in(input.to_json_slice(), need)
147    }
148
149    pub(crate) fn new(slice: &'a [u8], validate_utf8: bool) -> Self {
150        Self::new_in(slice.to_json_slice(), validate_utf8)
151    }
152
153    pub(crate) fn new_in(input: JsonSlice<'a>, validate_utf8: bool) -> Self {
154        let input: PinnedInput<'a> = input.into();
155        // #safety: we pinned the input json
156        let slice: NonNull<[u8]> = unsafe { input.as_ptr() };
157
158        // validate the utf-8 at first for slice
159        let next_invalid_utf8 = validate_utf8
160            .then(|| {
161                from_utf8(unsafe { slice.as_ref() })
162                    .err()
163                    .map(|e| e.offset())
164            })
165            .flatten()
166            .unwrap_or(usize::MAX);
167
168        Self {
169            input,
170            index: 0,
171            next_invalid_utf8,
172        }
173    }
174
175    #[inline(always)]
176    fn slice(&self) -> &'a [u8] {
177        unsafe { self.input.as_ptr().as_ref() }
178    }
179}
180
181impl<'a> Reader<'a> for Read<'a> {
182    #[inline(always)]
183    fn remain(&self) -> usize {
184        self.slice().len() - self.index
185    }
186
187    #[inline(always)]
188    fn slice_ref(&self, subset: &'a [u8]) -> JsonSlice<'a> {
189        self.input.slice_ref(subset)
190    }
191
192    #[inline(always)]
193    fn peek_n(&self, n: usize) -> Option<&'a [u8]> {
194        let end = self.index + n;
195        (end <= self.slice().len()).then(|| &self.slice()[self.index..end])
196    }
197
198    #[inline(always)]
199    fn set_index(&mut self, index: usize) {
200        self.index = index
201    }
202
203    #[inline(always)]
204    fn peek(&self) -> Option<u8> {
205        if self.index < self.slice().len() {
206            Some(self.slice()[self.index])
207        } else {
208            None
209        }
210    }
211
212    #[inline(always)]
213    fn at(&self, index: usize) -> u8 {
214        self.slice()[index]
215    }
216
217    #[inline(always)]
218    fn next_n(&mut self, n: usize) -> Option<&'a [u8]> {
219        let new_index = self.index + n;
220        if new_index <= self.slice().len() {
221            let ret = &self.slice()[self.index..new_index];
222            self.index = new_index;
223            Some(ret)
224        } else {
225            None
226        }
227    }
228
229    #[inline(always)]
230    fn cur_ptr(&mut self) -> *mut u8 {
231        panic!("should only used in PaddedSliceRead");
232    }
233
234    #[inline(always)]
235    unsafe fn set_ptr(&mut self, _cur: *mut u8) {
236        panic!("should only used in PaddedSliceRead");
237    }
238
239    #[inline(always)]
240    fn index(&self) -> usize {
241        self.index
242    }
243
244    #[inline(always)]
245    fn eat(&mut self, n: usize) {
246        self.index += n;
247    }
248
249    #[inline(always)]
250    fn backward(&mut self, n: usize) {
251        self.index -= n;
252    }
253
254    #[inline(always)]
255    fn slice_unchecked(&self, start: usize, end: usize) -> &'a [u8] {
256        &self.slice()[start..end]
257    }
258
259    #[inline(always)]
260    fn as_u8_slice(&self) -> &'a [u8] {
261        self.slice()
262    }
263
264    #[inline(always)]
265    fn check_utf8_final(&self) -> Result<()> {
266        if self.next_invalid_utf8 == usize::MAX {
267            Ok(())
268        } else {
269            Err(Error::syntax(
270                ErrorCode::InvalidUTF8,
271                self.origin_input(),
272                self.next_invalid_utf8,
273            ))
274        }
275    }
276
277    fn check_invalid_utf8(&mut self) {
278        self.next_invalid_utf8 = match from_utf8(&self.origin_input()[self.index..]) {
279            Ok(_) => usize::MAX,
280            Err(e) => self.index + e.offset(),
281        };
282    }
283
284    fn next_invalid_utf8(&self) -> usize {
285        self.next_invalid_utf8
286    }
287}
288
289pub(crate) struct PaddedSliceRead<'a> {
290    base: NonNull<u8>,
291    cur: NonNull<u8>,
292    len: usize,
293    origin: &'a [u8],
294    _life: PhantomData<&'a mut [u8]>,
295}
296
297impl<'a> PaddedSliceRead<'a> {
298    const PADDING_SIZE: usize = 64;
299    pub fn new(buffer: &'a mut [u8], json: &'a [u8]) -> Self {
300        // Use as_mut_ptr() to preserve provenance over the entire buffer slice.
301        // NonNull::from(&mut buffer[0]) would narrow provenance to a single byte.
302        let base = NonNull::new(buffer.as_mut_ptr()).expect("slice pointer is non-null");
303        Self {
304            base,
305            cur: base,
306            len: buffer.len() - Self::PADDING_SIZE,
307            origin: json,
308            _life: PhantomData,
309        }
310    }
311}
312
313impl<'a> Reader<'a> for PaddedSliceRead<'a> {
314    #[inline(always)]
315    fn as_u8_slice(&self) -> &'a [u8] {
316        unsafe { std::slice::from_raw_parts(self.base.as_ptr(), self.len) }
317    }
318
319    #[inline(always)]
320    fn slice_ref(&self, subset: &'a [u8]) -> JsonSlice<'a> {
321        subset.into()
322    }
323
324    #[inline(always)]
325    fn remain(&self) -> usize {
326        let remain = self.len as isize - self.index() as isize;
327        std::cmp::max(remain, 0) as usize
328    }
329
330    #[inline(always)]
331    fn peek_n(&self, n: usize) -> Option<&'a [u8]> {
332        debug_assert!(self.index() + n <= self.len + Self::PADDING_SIZE);
333        unsafe { Some(std::slice::from_raw_parts(self.cur.as_ptr(), n)) }
334    }
335
336    #[inline(always)]
337    fn set_index(&mut self, index: usize) {
338        debug_assert!(index <= self.len + Self::PADDING_SIZE);
339        unsafe { self.cur = NonNull::new_unchecked(self.base.as_ptr().add(index)) }
340    }
341
342    #[inline(always)]
343    fn peek(&self) -> Option<u8> {
344        unsafe { Some(*self.cur.as_ptr()) }
345    }
346
347    #[inline(always)]
348    fn at(&self, index: usize) -> u8 {
349        unsafe { *(self.base.as_ptr().add(index)) }
350    }
351
352    #[inline(always)]
353    fn next_n(&mut self, n: usize) -> Option<&'a [u8]> {
354        debug_assert!(self.index() + n <= self.len + Self::PADDING_SIZE);
355        unsafe {
356            let ptr = self.cur.as_ptr();
357            self.cur = NonNull::new_unchecked(ptr.add(n));
358            Some(std::slice::from_raw_parts(ptr, n))
359        }
360    }
361
362    #[inline(always)]
363    fn index(&self) -> usize {
364        unsafe { self.cur.as_ptr().offset_from(self.base.as_ptr()) as usize }
365    }
366
367    fn eat(&mut self, n: usize) {
368        debug_assert!(self.index() + n <= self.len + Self::PADDING_SIZE);
369        unsafe {
370            self.cur = NonNull::new_unchecked(self.cur.as_ptr().add(n));
371        }
372    }
373
374    #[inline(always)]
375    fn cur_ptr(&mut self) -> *mut u8 {
376        self.cur.as_ptr()
377    }
378
379    #[inline(always)]
380    unsafe fn set_ptr(&mut self, cur: *mut u8) {
381        self.cur = NonNull::new_unchecked(cur);
382    }
383
384    #[inline(always)]
385    fn backward(&mut self, n: usize) {
386        debug_assert!(n <= self.index());
387        unsafe {
388            self.cur = NonNull::new_unchecked(self.cur.as_ptr().sub(n));
389        }
390    }
391
392    #[inline(always)]
393    fn slice_unchecked(&self, start: usize, end: usize) -> &'a [u8] {
394        unsafe {
395            let ptr = self.base.as_ptr().add(start);
396            let n = end - start;
397            std::slice::from_raw_parts(ptr, n)
398        }
399    }
400
401    #[inline(always)]
402    fn check_invalid_utf8(&mut self) {
403        /* need to nothing here */
404    }
405
406    #[inline(always)]
407    fn next_invalid_utf8(&self) -> usize {
408        usize::MAX
409    }
410
411    #[inline(always)]
412    fn check_utf8_final(&self) -> Result<()> {
413        Ok(())
414    }
415
416    #[inline(always)]
417    fn origin_input(&self) -> &'a [u8] {
418        self.origin
419    }
420}
421
422#[cfg(test)]
423mod test {
424    use bytes::Bytes;
425    use faststr::FastStr;
426
427    use super::*;
428    use crate::{Deserialize, Deserializer};
429    fn test_peek() {
430        let data = b"1234567890";
431        let reader = Read::new(data, false);
432        assert_eq!(reader.peek(), Some(b'1'));
433        assert_eq!(reader.peek_n(4).unwrap(), &b"1234"[..]);
434    }
435
436    fn test_next() {
437        let data = b"1234567890";
438        let mut reader = Read::new(data, false);
439        assert_eq!(reader.next(), Some(b'1'));
440        assert_eq!(reader.peek(), Some(b'2'));
441        assert_eq!(reader.next_n(4).unwrap(), &b"2345"[..]);
442        assert_eq!(reader.peek(), Some(b'6'));
443    }
444
445    fn test_index() {
446        let data = b"1234567890";
447        let mut reader = Read::new(data, false);
448        assert_eq!(reader.index(), 0);
449
450        reader.next().unwrap();
451        assert_eq!(reader.index(), 1);
452
453        reader.next_n(4).unwrap();
454        assert_eq!(reader.index(), 5);
455    }
456
457    #[test]
458    fn test_reader() {
459        test_peek();
460        test_next();
461        test_index();
462    }
463
464    macro_rules! test_deserialize_reader {
465        ($json:expr) => {
466            let mut de = Deserializer::new(Read::from($json));
467            let num: i32 = Deserialize::deserialize(&mut de).unwrap();
468            assert_eq!(num, 123);
469        };
470    }
471
472    #[test]
473    fn test_deserialize() {
474        let b = Bytes::from(r#"123"#);
475        let f = FastStr::from(r#"123"#);
476        let s = String::from(r#"123"#);
477        test_deserialize_reader!(r#"123"#);
478        test_deserialize_reader!(r#"123"#.as_bytes());
479        test_deserialize_reader!(&b);
480        test_deserialize_reader!(&f);
481        test_deserialize_reader!(&s);
482    }
483}