sonic_rs/
reader.rs

1use std::{marker::PhantomData, pin::Pin, ptr::NonNull};
2
3use faststr::FastStr;
4
5use crate::{
6    error::ErrorCode,
7    input::JsonSlice,
8    parser::as_str,
9    util::{private::Sealed, utf8::from_utf8},
10    Error, JsonInput, Result,
11};
12
13pub(crate) struct Position {
14    pub line: usize,
15    pub column: usize,
16}
17
18impl Position {
19    pub(crate) fn from_index(mut i: usize, data: &[u8]) -> Self {
20        // i must not exceed the length of data
21        i = i.min(data.len());
22        let mut position = Position { line: 1, column: 1 };
23        for ch in &data[..i] {
24            match *ch {
25                b'\n' => {
26                    position.line += 1;
27                    position.column = 1;
28                }
29                _ => {
30                    position.column += 1;
31                }
32            }
33        }
34        position
35    }
36}
37
38/// Trait is used by the deserializer for iterating over input. And it is sealed and cannot be
39/// implemented for types outside of sonic_rs.
40#[doc(hidden)]
41pub trait Reader<'de>: Sealed {
42    fn remain(&self) -> usize;
43    fn eat(&mut self, n: usize);
44    fn backward(&mut self, n: usize);
45    fn peek_n(&self, n: usize) -> Option<&'de [u8]>;
46    fn peek(&self) -> Option<u8>;
47    fn index(&self) -> usize;
48    fn at(&self, index: usize) -> u8;
49    fn set_index(&mut self, index: usize);
50    fn next_n(&mut self, n: usize) -> Option<&'de [u8]>;
51
52    #[inline(always)]
53    fn next(&mut self) -> Option<u8> {
54        self.peek().inspect(|_| {
55            self.eat(1);
56        })
57    }
58    fn cur_ptr(&mut self) -> *mut u8;
59
60    /// # Safety
61    /// cur must be a valid pointer in the slice
62    unsafe fn set_ptr(&mut self, cur: *mut u8);
63    fn slice_unchecked(&self, start: usize, end: usize) -> &'de [u8];
64
65    fn as_u8_slice(&self) -> &'de [u8];
66
67    fn check_utf8_final(&self) -> Result<()>;
68
69    fn next_invalid_utf8(&self) -> usize;
70
71    fn check_invalid_utf8(&mut self);
72
73    fn slice_ref(&self, subset: &'de [u8]) -> JsonSlice<'de>;
74
75    fn origin_input(&self) -> &'de [u8] {
76        self.as_u8_slice()
77    }
78}
79
80enum PinnedInput<'a> {
81    FastStr(Pin<Box<FastStr>>),
82    Slice(&'a [u8]),
83}
84
85impl<'a> PinnedInput<'a> {
86    unsafe fn as_ptr(&self) -> NonNull<[u8]> {
87        match self {
88            Self::FastStr(f) => f.as_bytes().into(),
89            Self::Slice(slice) => (*slice).into(),
90        }
91    }
92
93    fn slice_ref(&self, subset: &'a [u8]) -> JsonSlice<'a> {
94        match self {
95            Self::FastStr(f) => JsonSlice::FastStr(f.slice_ref(as_str(subset))),
96            Self::Slice(_) => JsonSlice::Raw(subset),
97        }
98    }
99}
100
101impl<'a> From<JsonSlice<'a>> for PinnedInput<'a> {
102    fn from(input: JsonSlice<'a>) -> Self {
103        match input {
104            JsonSlice::Raw(slice) => Self::Slice(slice),
105            JsonSlice::FastStr(f) => Self::FastStr(Pin::new(Box::new(f))),
106        }
107    }
108}
109
110/// JSON input source that reads from a string/bytes-like JSON input.
111///
112/// Support most common types: &str, &[u8], &FastStr, &Bytes and &String
113///
114/// # Examples
115/// ```
116/// use bytes::Bytes;
117/// use faststr::FastStr;
118/// use serde::de::Deserialize;
119/// use sonic_rs::{Deserializer, Read};
120///
121/// let mut de = Deserializer::new(Read::from(r#"123"#));
122/// let num: i32 = Deserialize::deserialize(&mut de).unwrap();
123/// assert_eq!(num, 123);
124///
125/// let mut de = Deserializer::new(Read::from(r#"123"#.as_bytes()));
126/// let num: i32 = Deserialize::deserialize(&mut de).unwrap();
127/// assert_eq!(num, 123);
128///
129/// let f = FastStr::new("123");
130/// let mut de = Deserializer::new(Read::from(&f));
131/// let num: i32 = Deserialize::deserialize(&mut de).unwrap();
132/// assert_eq!(num, 123);
133/// ```
134pub struct Read<'a> {
135    // pin the input JSON, because `slice` will reference it
136    input: PinnedInput<'a>,
137    slice: NonNull<[u8]>,
138    pub(crate) index: usize,
139    // next invalid utf8 position, if not found, will be usize::MAX
140    next_invalid_utf8: usize,
141}
142
143impl<'a> Read<'a> {
144    /// Make a `Read` from string/bytes-like JSON input.
145    pub fn from<I: JsonInput<'a>>(input: I) -> Self {
146        let need = input.need_utf8_valid();
147        Self::new_in(input.to_json_slice(), need)
148    }
149
150    pub(crate) fn new(slice: &'a [u8], validate_utf8: bool) -> Self {
151        Self::new_in(slice.to_json_slice(), validate_utf8)
152    }
153
154    pub(crate) fn new_in(input: JsonSlice<'a>, validate_utf8: bool) -> Self {
155        let input: PinnedInput<'a> = input.into();
156        // #safety: we pinned the input json
157        let slice = unsafe { input.as_ptr() };
158
159        // validate the utf-8 at first for slice
160        let next_invalid_utf8 = validate_utf8
161            .then(|| {
162                from_utf8(unsafe { slice.as_ref() })
163                    .err()
164                    .map(|e| e.offset())
165            })
166            .flatten()
167            .unwrap_or(usize::MAX);
168
169        Self {
170            input,
171            slice,
172            index: 0,
173            next_invalid_utf8,
174        }
175    }
176
177    #[inline(always)]
178    fn slice(&self) -> &'a [u8] {
179        unsafe { self.slice.as_ref() }
180    }
181}
182
183impl<'a> Reader<'a> for Read<'a> {
184    #[inline(always)]
185    fn remain(&self) -> usize {
186        self.slice().len() - self.index
187    }
188
189    #[inline(always)]
190    fn slice_ref(&self, subset: &'a [u8]) -> JsonSlice<'a> {
191        self.input.slice_ref(subset)
192    }
193
194    #[inline(always)]
195    fn peek_n(&self, n: usize) -> Option<&'a [u8]> {
196        let end = self.index + n;
197        (end <= self.slice().len()).then(|| {
198            let ptr = self.slice()[self.index..].as_ptr();
199            unsafe { std::slice::from_raw_parts(ptr, n) }
200        })
201    }
202
203    #[inline(always)]
204    fn set_index(&mut self, index: usize) {
205        self.index = index
206    }
207
208    #[inline(always)]
209    fn peek(&self) -> Option<u8> {
210        if self.index < self.slice().len() {
211            Some(self.slice()[self.index])
212        } else {
213            None
214        }
215    }
216
217    #[inline(always)]
218    fn at(&self, index: usize) -> u8 {
219        self.slice()[index]
220    }
221
222    #[inline(always)]
223    fn next_n(&mut self, n: usize) -> Option<&'a [u8]> {
224        let new_index = self.index + n;
225        if new_index <= self.slice().len() {
226            let ret = &self.slice()[self.index..new_index];
227            self.index = new_index;
228            Some(ret)
229        } else {
230            None
231        }
232    }
233
234    #[inline(always)]
235    fn cur_ptr(&mut self) -> *mut u8 {
236        panic!("should only used in PaddedSliceRead");
237    }
238
239    #[inline(always)]
240    unsafe fn set_ptr(&mut self, _cur: *mut u8) {
241        panic!("should only used in PaddedSliceRead");
242    }
243
244    #[inline(always)]
245    fn index(&self) -> usize {
246        self.index
247    }
248
249    #[inline(always)]
250    fn eat(&mut self, n: usize) {
251        self.index += n;
252    }
253
254    #[inline(always)]
255    fn backward(&mut self, n: usize) {
256        self.index -= n;
257    }
258
259    #[inline(always)]
260    fn slice_unchecked(&self, start: usize, end: usize) -> &'a [u8] {
261        &self.slice()[start..end]
262    }
263
264    #[inline(always)]
265    fn as_u8_slice(&self) -> &'a [u8] {
266        self.slice()
267    }
268
269    #[inline(always)]
270    fn check_utf8_final(&self) -> Result<()> {
271        if self.next_invalid_utf8 == usize::MAX {
272            Ok(())
273        } else {
274            Err(Error::syntax(
275                ErrorCode::InvalidUTF8,
276                self.origin_input(),
277                self.next_invalid_utf8,
278            ))
279        }
280    }
281
282    fn check_invalid_utf8(&mut self) {
283        self.next_invalid_utf8 = match from_utf8(&self.origin_input()[self.index..]) {
284            Ok(_) => usize::MAX,
285            Err(e) => self.index + e.offset(),
286        };
287    }
288
289    fn next_invalid_utf8(&self) -> usize {
290        self.next_invalid_utf8
291    }
292}
293
294pub(crate) struct PaddedSliceRead<'a> {
295    base: NonNull<u8>,
296    cur: NonNull<u8>,
297    len: usize,
298    origin: &'a [u8],
299    _life: PhantomData<&'a mut [u8]>,
300}
301
302impl<'a> PaddedSliceRead<'a> {
303    const PADDING_SIZE: usize = 64;
304    pub fn new(buffer: &'a mut [u8], json: &'a [u8]) -> Self {
305        let base = unsafe { NonNull::new_unchecked(buffer.as_mut_ptr()) };
306        Self {
307            base,
308            cur: base,
309            len: buffer.len() - Self::PADDING_SIZE,
310            origin: json,
311            _life: PhantomData,
312        }
313    }
314}
315
316impl<'a> Reader<'a> for PaddedSliceRead<'a> {
317    #[inline(always)]
318    fn as_u8_slice(&self) -> &'a [u8] {
319        unsafe { std::slice::from_raw_parts(self.base.as_ptr(), self.len) }
320    }
321
322    #[inline(always)]
323    fn slice_ref(&self, subset: &'a [u8]) -> JsonSlice<'a> {
324        subset.into()
325    }
326
327    #[inline(always)]
328    fn remain(&self) -> usize {
329        let remain = self.len as isize - self.index() as isize;
330        std::cmp::max(remain, 0) as usize
331    }
332
333    #[inline(always)]
334    fn peek_n(&self, n: usize) -> Option<&'a [u8]> {
335        unsafe { Some(std::slice::from_raw_parts(self.cur.as_ptr(), n)) }
336    }
337
338    #[inline(always)]
339    fn set_index(&mut self, index: usize) {
340        unsafe { self.cur = NonNull::new_unchecked(self.base.as_ptr().add(index)) }
341    }
342
343    #[inline(always)]
344    fn peek(&self) -> Option<u8> {
345        unsafe { Some(*self.cur.as_ptr()) }
346    }
347
348    #[inline(always)]
349    fn at(&self, index: usize) -> u8 {
350        unsafe { *(self.base.as_ptr().add(index)) }
351    }
352
353    #[inline(always)]
354    fn next_n(&mut self, n: usize) -> Option<&'a [u8]> {
355        unsafe {
356            let ptr = self.cur.as_ptr();
357            self.cur = NonNull::new_unchecked(ptr.add(n));
358            Some(std::slice::from_raw_parts(ptr, n))
359        }
360    }
361
362    #[inline(always)]
363    fn index(&self) -> usize {
364        unsafe { self.cur.as_ptr().offset_from(self.base.as_ptr()) as usize }
365    }
366
367    fn eat(&mut self, n: usize) {
368        unsafe {
369            self.cur = NonNull::new_unchecked(self.cur.as_ptr().add(n));
370        }
371    }
372
373    #[inline(always)]
374    fn cur_ptr(&mut self) -> *mut u8 {
375        self.cur.as_ptr()
376    }
377
378    #[inline(always)]
379    unsafe fn set_ptr(&mut self, cur: *mut u8) {
380        self.cur = NonNull::new_unchecked(cur);
381    }
382
383    #[inline(always)]
384    fn backward(&mut self, n: usize) {
385        unsafe {
386            self.cur = NonNull::new_unchecked(self.cur.as_ptr().sub(n));
387        }
388    }
389
390    #[inline(always)]
391    fn slice_unchecked(&self, start: usize, end: usize) -> &'a [u8] {
392        unsafe {
393            let ptr = self.base.as_ptr().add(start);
394            let n = end - start;
395            std::slice::from_raw_parts(ptr, n)
396        }
397    }
398
399    #[inline(always)]
400    fn check_invalid_utf8(&mut self) {
401        /* need to nothing here */
402    }
403
404    #[inline(always)]
405    fn next_invalid_utf8(&self) -> usize {
406        usize::MAX
407    }
408
409    #[inline(always)]
410    fn check_utf8_final(&self) -> Result<()> {
411        Ok(())
412    }
413
414    #[inline(always)]
415    fn origin_input(&self) -> &'a [u8] {
416        self.origin
417    }
418}
419
420#[cfg(test)]
421mod test {
422    use bytes::Bytes;
423    use faststr::FastStr;
424
425    use super::*;
426    use crate::{Deserialize, Deserializer};
427    fn test_peek() {
428        let data = b"1234567890";
429        let reader = Read::new(data, false);
430        assert_eq!(reader.peek(), Some(b'1'));
431        assert_eq!(reader.peek_n(4).unwrap(), &b"1234"[..]);
432    }
433
434    fn test_next() {
435        let data = b"1234567890";
436        let mut reader = Read::new(data, false);
437        assert_eq!(reader.next(), Some(b'1'));
438        assert_eq!(reader.peek(), Some(b'2'));
439        assert_eq!(reader.next_n(4).unwrap(), &b"2345"[..]);
440        assert_eq!(reader.peek(), Some(b'6'));
441    }
442
443    fn test_index() {
444        let data = b"1234567890";
445        let mut reader = Read::new(data, false);
446        assert_eq!(reader.index(), 0);
447
448        reader.next().unwrap();
449        assert_eq!(reader.index(), 1);
450
451        reader.next_n(4).unwrap();
452        assert_eq!(reader.index(), 5);
453    }
454
455    #[test]
456    fn test_reader() {
457        test_peek();
458        test_next();
459        test_index();
460    }
461
462    macro_rules! test_deserialize_reader {
463        ($json:expr) => {
464            let mut de = Deserializer::new(Read::from($json));
465            let num: i32 = Deserialize::deserialize(&mut de).unwrap();
466            assert_eq!(num, 123);
467        };
468    }
469
470    #[test]
471    fn test_deserialize() {
472        let b = Bytes::from(r#"123"#);
473        let f = FastStr::from(r#"123"#);
474        let s = String::from(r#"123"#);
475        test_deserialize_reader!(r#"123"#);
476        test_deserialize_reader!(r#"123"#.as_bytes());
477        test_deserialize_reader!(&b);
478        test_deserialize_reader!(&f);
479        test_deserialize_reader!(&s);
480    }
481}