rsonpath/input/
borrowed.rs

1//! Borrows a slice of bytes of the input document.
2//!
3//! Choose this implementation if:
4//!
5//! 1. You already have the data loaded in-memory and can borrow it while
6//!    using the engine.
7//!
8//! ## Performance characteristics
9//!
10//! This type of input is the fastest to process for the engine,
11//! since there is no additional overhead from loading anything to memory.
12//! It is on par with [`OwnedBytes`](`super::OwnedBytes`), but doesn't take ownership
13//! of the bytes.
14
15use super::{
16    align_to,
17    error::Infallible,
18    padding::{EndPaddedInput, PaddedBlock, TwoSidesPaddedInput},
19    Input, InputBlockIterator, SliceSeekable, MAX_BLOCK_SIZE,
20};
21use crate::{debug, result::InputRecorder, string_pattern::StringPattern};
22
23/// Input wrapping a borrowed [`[u8]`] buffer.
24pub struct BorrowedBytes<'a> {
25    middle_bytes: &'a [u8],
26    first_block: PaddedBlock,
27    last_block: PaddedBlock,
28}
29
30/// Iterator over blocks of [`BorrowedBytes`] of size exactly `N`.
31pub struct BorrowedBytesBlockIterator<'r, I, R, const N: usize> {
32    input: I,
33    idx: usize,
34    recorder: &'r R,
35}
36
37impl<'a> BorrowedBytes<'a> {
38    /// Create a new instance of [`BorrowedBytes`] wrapping the given buffer.
39    ///
40    /// The input will be automatically padded internally, incurring at most
41    /// two times [`MAX_BLOCK_SIZE`] of memory overhead.
42    #[must_use]
43    #[inline(always)]
44    pub fn new(bytes: &'a [u8]) -> Self {
45        let (first, middle, last) = align_to::<MAX_BLOCK_SIZE>(bytes);
46        let first_block = PaddedBlock::pad_first_block(first);
47        let last_block = PaddedBlock::pad_last_block(last);
48
49        Self {
50            middle_bytes: middle,
51            first_block,
52            last_block,
53        }
54    }
55
56    pub(super) fn as_padded_input(&self) -> TwoSidesPaddedInput {
57        TwoSidesPaddedInput::new(&self.first_block, self.middle_bytes, &self.last_block)
58    }
59}
60
61impl<'a> From<&'a [u8]> for BorrowedBytes<'a> {
62    #[inline(always)]
63    fn from(value: &'a [u8]) -> Self {
64        BorrowedBytes::new(value)
65    }
66}
67
68impl<'a> From<&'a str> for BorrowedBytes<'a> {
69    #[inline(always)]
70    fn from(value: &'a str) -> Self {
71        BorrowedBytes::new(value.as_bytes())
72    }
73}
74
75impl<'a, 'r, I, R, const N: usize> BorrowedBytesBlockIterator<'r, I, R, N>
76where
77    R: InputRecorder<&'a [u8]>,
78{
79    #[must_use]
80    #[inline(always)]
81    pub(super) fn new(input: I, recorder: &'r R) -> Self {
82        Self {
83            idx: 0,
84            input,
85            recorder,
86        }
87    }
88}
89
90impl Input for BorrowedBytes<'_> {
91    type BlockIterator<'b, 'r, R, const N: usize>
92        = BorrowedBytesBlockIterator<'r, TwoSidesPaddedInput<'b>, R, N>
93    where
94        Self: 'b,
95        R: InputRecorder<&'b [u8]> + 'r;
96
97    type Error = Infallible;
98    type Block<'b, const N: usize>
99        = &'b [u8]
100    where
101        Self: 'b;
102
103    #[inline(always)]
104    fn leading_padding_len(&self) -> usize {
105        self.first_block.padding_len()
106    }
107
108    #[inline(always)]
109    fn trailing_padding_len(&self) -> usize {
110        self.last_block.padding_len()
111    }
112
113    #[inline(always)]
114    fn len_hint(&self) -> Option<usize> {
115        Some(self.middle_bytes.len() + self.first_block.len() + self.last_block.len())
116    }
117
118    #[inline(always)]
119    fn iter_blocks<'b, 'r, R, const N: usize>(&'b self, recorder: &'r R) -> Self::BlockIterator<'b, 'r, R, N>
120    where
121        R: InputRecorder<&'b [u8]>,
122    {
123        let padded_input = TwoSidesPaddedInput::new(&self.first_block, self.middle_bytes, &self.last_block);
124
125        Self::BlockIterator {
126            idx: 0,
127            input: padded_input,
128            recorder,
129        }
130    }
131
132    #[inline]
133    fn seek_backward(&self, from: usize, needle: u8) -> Option<usize> {
134        return if from >= MAX_BLOCK_SIZE && from < self.middle_bytes.len() + MAX_BLOCK_SIZE {
135            match self.middle_bytes.seek_backward(from - MAX_BLOCK_SIZE, needle) {
136                Some(x) => Some(x + MAX_BLOCK_SIZE),
137                None => handle_first(&self.first_block, needle),
138            }
139        } else {
140            self.as_padded_input().seek_backward(from, needle)
141        };
142
143        #[cold]
144        #[inline(never)]
145        fn handle_first(first_block: &PaddedBlock, needle: u8) -> Option<usize> {
146            first_block.bytes().seek_backward(first_block.len() - 1, needle)
147        }
148    }
149
150    #[inline]
151    fn seek_forward<const N: usize>(&self, from: usize, needles: [u8; N]) -> Result<Option<(usize, u8)>, Infallible> {
152        return Ok(
153            if from >= MAX_BLOCK_SIZE && from < self.middle_bytes.len() + MAX_BLOCK_SIZE {
154                match self.middle_bytes.seek_forward(from - MAX_BLOCK_SIZE, needles) {
155                    Some((x, y)) => Some((x + MAX_BLOCK_SIZE, y)),
156                    None => handle_last(&self.last_block, MAX_BLOCK_SIZE + self.middle_bytes.len(), needles),
157                }
158            } else {
159                self.as_padded_input().seek_forward(from, needles)
160            },
161        );
162
163        #[cold]
164        #[inline(never)]
165        fn handle_last<const N: usize>(
166            last_block: &PaddedBlock,
167            offset: usize,
168            needles: [u8; N],
169        ) -> Option<(usize, u8)> {
170            last_block
171                .bytes()
172                .seek_forward(0, needles)
173                .map(|(x, y)| (x + offset, y))
174        }
175    }
176
177    #[inline]
178    fn seek_non_whitespace_forward(&self, from: usize) -> Result<Option<(usize, u8)>, Infallible> {
179        return Ok(
180            // The hot path is when we start and end within the middle section.
181            // We use the regular slice path for that scenario, and fall back to the very expensive
182            // TwoSidesPaddedInput with all bells and whistles only when that doesn't work.
183            if from >= MAX_BLOCK_SIZE && from < self.middle_bytes.len() + MAX_BLOCK_SIZE {
184                match self.middle_bytes.seek_non_whitespace_forward(from - MAX_BLOCK_SIZE) {
185                    Some((x, y)) => Some((x + MAX_BLOCK_SIZE, y)),
186                    None => handle_last(&self.last_block, MAX_BLOCK_SIZE + self.middle_bytes.len()),
187                }
188            } else {
189                self.as_padded_input().seek_non_whitespace_forward(from)
190            },
191        );
192
193        #[cold]
194        #[inline(never)]
195        fn handle_last(last_block: &PaddedBlock, offset: usize) -> Option<(usize, u8)> {
196            last_block
197                .bytes()
198                .seek_non_whitespace_forward(0)
199                .map(|(x, y)| (x + offset, y))
200        }
201    }
202
203    #[inline]
204    fn seek_non_whitespace_backward(&self, from: usize) -> Option<(usize, u8)> {
205        return if from >= MAX_BLOCK_SIZE && from < self.middle_bytes.len() + MAX_BLOCK_SIZE {
206            match self.middle_bytes.seek_non_whitespace_backward(from - MAX_BLOCK_SIZE) {
207                Some((x, y)) => Some((x + MAX_BLOCK_SIZE, y)),
208                None => handle_first(&self.first_block),
209            }
210        } else {
211            self.as_padded_input().seek_non_whitespace_backward(from)
212        };
213
214        #[cold]
215        #[inline(never)]
216        fn handle_first(first_block: &PaddedBlock) -> Option<(usize, u8)> {
217            first_block.bytes().seek_non_whitespace_backward(first_block.len() - 1)
218        }
219    }
220
221    #[inline(always)]
222    fn is_member_match(&self, from: usize, to: usize, member: &StringPattern) -> Result<bool, Self::Error> {
223        debug_assert!(from < to);
224        // The hot path is when we're checking fully within the middle section.
225        // This has to be as fast as possible, so the "cold" path referring to the TwoSidesPaddedInput
226        // impl is explicitly marked with #[cold].
227        if from > MAX_BLOCK_SIZE && to < self.middle_bytes.len() + MAX_BLOCK_SIZE {
228            // This is the hot path -- do the bounds check and memcmp.
229            let bytes = self.middle_bytes;
230            let from = from - MAX_BLOCK_SIZE;
231            let to = to - MAX_BLOCK_SIZE;
232            let slice = &bytes[from..to];
233            Ok(member.quoted() == slice && (from == 0 || bytes[from - 1] != b'\\'))
234        } else {
235            // This is a very expensive, cold path.
236            Ok(self.as_padded_input().is_member_match(from, to, member))
237        }
238    }
239}
240
241impl<'a, 'r, R, const N: usize> InputBlockIterator<'a, N>
242    for BorrowedBytesBlockIterator<'r, TwoSidesPaddedInput<'a>, R, N>
243where
244    R: InputRecorder<&'a [u8]> + 'r,
245{
246    type Block = &'a [u8];
247    type Error = Infallible;
248
249    #[inline(always)]
250    fn next(&mut self) -> Result<Option<Self::Block>, Self::Error> {
251        debug!("next!");
252        return if self.idx >= MAX_BLOCK_SIZE && self.idx < self.input.middle().len() + MAX_BLOCK_SIZE {
253            let start = self.idx - MAX_BLOCK_SIZE;
254            // SAFETY: Bounds check above.
255            // self.idx >= MBS => start >= 0, and self.idx < middle.len + MBS => self.idx < middle.len
256            // By construction, middle has length divisible by N.
257            let block = unsafe { self.input.middle().get_unchecked(start..start + N) };
258            self.recorder.record_block_start(block);
259            self.idx += N;
260            Ok(Some(block))
261        } else {
262            Ok(cold_path(self))
263        };
264
265        #[cold]
266        fn cold_path<'a, 'r, R, const N: usize>(
267            iter: &mut BorrowedBytesBlockIterator<'r, TwoSidesPaddedInput<'a>, R, N>,
268        ) -> Option<&'a [u8]>
269        where
270            R: InputRecorder<&'a [u8]>,
271        {
272            let block = iter.input.try_slice(iter.idx, N);
273
274            if let Some(b) = block {
275                iter.recorder.record_block_start(b);
276                iter.idx += N;
277            }
278
279            block
280        }
281    }
282
283    #[inline(always)]
284    fn offset(&mut self, count: isize) {
285        assert!(count >= 0);
286        debug!("offsetting input iter by {count}");
287        self.idx += count as usize * N;
288    }
289
290    #[inline(always)]
291    fn get_offset(&self) -> usize {
292        debug!("getting input iter {}", self.idx);
293        self.idx
294    }
295}
296
297impl<'a, 'r, R, const N: usize> InputBlockIterator<'a, N> for BorrowedBytesBlockIterator<'r, EndPaddedInput<'a>, R, N>
298where
299    R: InputRecorder<&'a [u8]> + 'r,
300{
301    type Block = &'a [u8];
302    type Error = Infallible;
303
304    #[inline(always)]
305    fn next(&mut self) -> Result<Option<Self::Block>, Self::Error> {
306        debug!("next!");
307        return if self.idx < self.input.middle().len() {
308            let start = self.idx;
309            // SAFETY: Bounds check above.
310            // self.idx >= MBS => start >= 0, and self.idx < middle.len + MBS => self.idx < middle.len
311            // By construction, middle has length divisible by N.
312            let block = unsafe { self.input.middle().get_unchecked(start..start + N) };
313            self.recorder.record_block_start(block);
314            self.idx += N;
315            Ok(Some(block))
316        } else {
317            Ok(cold_path(self))
318        };
319
320        #[cold]
321        fn cold_path<'a, 'r, R, const N: usize>(
322            iter: &mut BorrowedBytesBlockIterator<'r, EndPaddedInput<'a>, R, N>,
323        ) -> Option<&'a [u8]>
324        where
325            R: InputRecorder<&'a [u8]>,
326        {
327            let block = iter.input.try_slice(iter.idx, N);
328
329            if let Some(b) = block {
330                iter.recorder.record_block_start(b);
331                iter.idx += N;
332            }
333
334            block
335        }
336    }
337
338    #[inline(always)]
339    fn offset(&mut self, count: isize) {
340        assert!(count >= 0);
341        debug!("offsetting input iter by {count}");
342        self.idx += count as usize * N;
343    }
344
345    #[inline(always)]
346    fn get_offset(&self) -> usize {
347        debug!("getting input iter {}", self.idx);
348        self.idx
349    }
350}