rsonpath/input/
owned.rs

1//! Takes ownership of bytes of the input document.
2//!
3//! Choose this implementation if:
4//!
5//! 1. You already have the data loaded in-memory.
6//! 2. You don't want to deal with ownership and would rather have the input
7//!    take ownership of the bytes.
8//!
9//! ## Performance characteristics
10//!
11//! This is as fast as [`BorrowedBytes`](`super::BorrowedBytes`), unless
12//! the [`Borrow`] implementation of the underlying byte structure is weird
13//! and costly.
14// === Design note ===
15// This struct appears to be basically the same as BorrowedBytes, just with different
16// ownership mechanics. It appears that it should be possible to have a single struct
17// that achieves the API of both, taking either ownership or a borrow, but this leads to
18// lifetime issues around the current padding impl.
19
20use super::{
21    align_to,
22    borrowed::BorrowedBytesBlockIterator,
23    error::Infallible,
24    padding::{PaddedBlock, TwoSidesPaddedInput},
25    Input, SliceSeekable, MAX_BLOCK_SIZE,
26};
27use crate::{result::InputRecorder, string_pattern::StringPattern};
28use std::borrow::Borrow;
29
30/// Input wrapping a buffer borrowable as a slice of bytes.
31pub struct OwnedBytes<B> {
32    bytes: B,
33    middle_len: usize,
34    first_block: PaddedBlock,
35    last_block: PaddedBlock,
36}
37
38impl<B> OwnedBytes<B>
39where
40    B: Borrow<[u8]>,
41{
42    /// Create a new instance of [`OwnedBytes`] taking over the given buffer.
43    ///
44    /// The input will be automatically padded internally, incurring at most
45    /// two times [`MAX_BLOCK_SIZE`] of memory overhead.
46    #[inline(always)]
47    pub fn new(bytes: B) -> Self {
48        let (first, middle, last) = align_to::<MAX_BLOCK_SIZE>(bytes.borrow());
49        let first_block = PaddedBlock::pad_first_block(first);
50        let last_block = PaddedBlock::pad_last_block(last);
51
52        Self {
53            middle_len: middle.len(),
54            bytes,
55            first_block,
56            last_block,
57        }
58    }
59}
60
61impl<B> From<B> for OwnedBytes<B>
62where
63    B: Borrow<[u8]>,
64{
65    #[inline(always)]
66    fn from(value: B) -> Self {
67        Self::new(value)
68    }
69}
70
71impl From<String> for OwnedBytes<Vec<u8>> {
72    #[inline(always)]
73    fn from(value: String) -> Self {
74        Self::new(value.into_bytes())
75    }
76}
77
78impl<B> Input for OwnedBytes<B>
79where
80    B: Borrow<[u8]>,
81{
82    type BlockIterator<'i, 'r, R, const N: usize>
83        = BorrowedBytesBlockIterator<'r, TwoSidesPaddedInput<'i>, R, N>
84    where
85        Self: 'i,
86        R: InputRecorder<Self::Block<'i, N>> + 'r;
87
88    type Error = Infallible;
89
90    type Block<'i, const N: usize>
91        = &'i [u8]
92    where
93        Self: 'i;
94
95    #[inline(always)]
96    fn leading_padding_len(&self) -> usize {
97        self.first_block.padding_len()
98    }
99
100    #[inline(always)]
101    fn trailing_padding_len(&self) -> usize {
102        self.last_block.padding_len()
103    }
104
105    #[inline]
106    fn iter_blocks<'i, 'r, R, const N: usize>(&'i self, recorder: &'r R) -> Self::BlockIterator<'i, 'r, R, N>
107    where
108        R: InputRecorder<Self::Block<'i, N>>,
109    {
110        let (_, middle, _) = align_to::<MAX_BLOCK_SIZE>(self.bytes.borrow());
111        assert_eq!(middle.len(), self.middle_len);
112
113        let padded = TwoSidesPaddedInput::new(&self.first_block, middle, &self.last_block);
114
115        BorrowedBytesBlockIterator::new(padded, recorder)
116    }
117
118    #[inline]
119    fn seek_backward(&self, from: usize, needle: u8) -> Option<usize> {
120        let offset = self.leading_padding_len();
121        let from = from.checked_sub(offset)?;
122
123        self.bytes.borrow().seek_backward(from, needle).map(|x| x + offset)
124    }
125
126    #[inline]
127    fn seek_forward<const N: usize>(&self, from: usize, needles: [u8; N]) -> Result<Option<(usize, u8)>, Self::Error> {
128        let offset = self.leading_padding_len();
129        let from = from.saturating_sub(offset);
130
131        Ok(self
132            .bytes
133            .borrow()
134            .seek_forward(from, needles)
135            .map(|(x, y)| (x + self.leading_padding_len(), y)))
136    }
137
138    #[inline]
139    fn seek_non_whitespace_forward(&self, from: usize) -> Result<Option<(usize, u8)>, Self::Error> {
140        let offset = self.leading_padding_len();
141        let from = from.saturating_sub(offset);
142
143        Ok(self
144            .bytes
145            .borrow()
146            .seek_non_whitespace_forward(from)
147            .map(|(x, y)| (x + self.leading_padding_len(), y)))
148    }
149
150    #[inline]
151    fn seek_non_whitespace_backward(&self, from: usize) -> Option<(usize, u8)> {
152        let offset = self.leading_padding_len();
153        let from = from.checked_sub(offset)?;
154
155        self.bytes
156            .borrow()
157            .seek_non_whitespace_backward(from)
158            .map(|(x, y)| (x + self.leading_padding_len(), y))
159    }
160
161    #[inline]
162    fn is_member_match(&self, from: usize, to: usize, member: &StringPattern) -> Result<bool, Self::Error> {
163        let offset = self.leading_padding_len();
164        let Some(from) = from.checked_sub(offset) else {
165            return Ok(false);
166        };
167
168        Ok(self.bytes.borrow().is_member_match(from, to - offset, member))
169    }
170}