perf_event_data/
parsebuf.rs

1use std::borrow::Cow;
2use std::io::{BufRead, BufReader, Read};
3use std::ops::Deref;
4
5use crate::parse::{ParseError, ParseResult, Parser};
6
7used_in_docs!(Parser);
8
9/// A continuous chunk of data read from a [`ParseBuf`].
10///
11/// A `ParseBufChunk` has two variants:
12/// - [`Temporary`] is for when the data being referenced is owned by the
13///   [`ParseBuf`] instance itself. This cannot be kept around and will not be
14///   kept in borrowed form while parsing.
15/// - [`External`] is for when the data being referenced is borrowed from
16///   elsewhere. This allows record parsing to avoid having to copy the data
17///   and, if possible, should be slightly faster.
18///
19/// When implmenting a [`ParseBuf`] instance, you should return [`External`] if
20/// possible.
21///
22/// [`Temporary`]: ParseBufChunk::Temporary
23/// [`External`]: ParseBufChunk::External
24#[derive(Copy, Clone, Debug)]
25pub enum ParseBufChunk<'tmp, 'ext: 'tmp> {
26    /// Data owned by the current [`ParseBuf`] instance. Will only remain valid
27    /// until [`ParseBuf::advance`] is called.
28    Temporary(&'tmp [u8]),
29
30    /// Data not owned by the [`ParseBuf`] instance. Will remain valid even
31    /// after the [`ParseBuf`] is dropped.
32    External(&'ext [u8]),
33}
34
35impl<'tmp, 'ext: 'tmp> ParseBufChunk<'tmp, 'ext> {
36    #[inline]
37    pub(crate) fn to_cow(self) -> Cow<'ext, [u8]> {
38        match self {
39            Self::Temporary(data) => Cow::Owned(data.to_vec()),
40            Self::External(data) => Cow::Borrowed(data),
41        }
42    }
43
44    #[inline]
45    pub(crate) fn truncate(&mut self, len: usize) {
46        if self.len() <= len {
47            return;
48        }
49
50        match self {
51            Self::Temporary(data) => *data = data.split_at(len).0,
52            Self::External(data) => *data = data.split_at(len).0,
53        }
54    }
55}
56
57impl<'tmp, 'ext: 'tmp> Deref for ParseBufChunk<'tmp, 'ext> {
58    type Target = [u8];
59
60    #[inline]
61    fn deref(&self) -> &Self::Target {
62        match *self {
63            Self::Temporary(bytes) => bytes,
64            Self::External(bytes) => bytes,
65        }
66    }
67}
68
69/// A data source from which [`Parser`] can parse data.
70///
71/// A [`ParseBuf`] has two main components:
72/// - An internal buffer that stores some amount of data. [`chunk`] returns a
73///   view into this buffer.
74/// - A position, [`advance`] moves this forward.
75///
76/// # Safety
77/// - If [`remaining_hint`] returns `Some` then the returned value must be
78///   accurate.
79///
80/// [`chunk`]: ParseBuf::chunk
81/// [`advance`]: ParseBuf::advance
82/// [`remaining_hint`]: ParseBuf::remaining_hint
83pub unsafe trait ParseBuf<'p> {
84    /// Returns a chunk starting at the current position.
85    ///
86    /// This method must never return an empty chunk. If an empty chunk would be
87    /// returned, it should return an error instead. [`ParseError::eof`] has
88    /// been provided for this, though it is not required to use it.
89    ///
90    /// This method must keep returning the same data until [`advance`] has been
91    /// called to move past it.
92    ///
93    /// See the documentation for [`ParseBufChunk`] for an explanation on when
94    /// to use [`ParseBufChunk::Temporary`] vs [`ParseBufChunk::External`].
95    ///
96    /// [`advance`]: ParseBuf::advance
97    fn chunk(&mut self) -> ParseResult<ParseBufChunk<'_, 'p>>;
98
99    /// Advance this buffer past `count` bytes.
100    fn advance(&mut self, count: usize);
101
102    /// An indicator of how many bytes are left, if supported.
103    ///
104    /// This is used for some optimizations within [`Parser`], if `Some` is
105    /// returned then the value must be accurate.
106    fn remaining_hint(&self) -> Option<usize> {
107        None
108    }
109}
110
111unsafe impl<'p> ParseBuf<'p> for &'p [u8] {
112    #[inline]
113    fn chunk(&mut self) -> ParseResult<ParseBufChunk<'_, 'p>> {
114        if self.is_empty() {
115            return Err(ParseError::eof());
116        }
117
118        Ok(ParseBufChunk::External(self))
119    }
120
121    #[inline]
122    fn advance(&mut self, count: usize) {
123        *self = self.split_at(count).1;
124    }
125
126    #[inline]
127    fn remaining_hint(&self) -> Option<usize> {
128        Some(self.len())
129    }
130}
131
132// This impl would work for any type that implements BufRead. Unfortunately,
133// that conflicts with the implementation of ParseBuf for &[u8]
134unsafe impl<'p, R> ParseBuf<'p> for BufReader<R>
135where
136    R: Read,
137{
138    #[inline]
139    fn chunk(&mut self) -> ParseResult<ParseBufChunk<'_, 'p>> {
140        let buf = self.fill_buf()?;
141
142        if buf.is_empty() {
143            Err(ParseError::eof())
144        } else {
145            Ok(ParseBufChunk::Temporary(buf))
146        }
147    }
148
149    #[inline]
150    fn advance(&mut self, count: usize) {
151        self.consume(count)
152    }
153}
154
155pub(crate) struct ParseBufCursor<'p> {
156    chunks: Vec<Cow<'p, [u8]>>,
157    offset: usize,
158    len: usize,
159}
160
161impl<'p> ParseBufCursor<'p> {
162    pub(crate) fn new<B>(buf: &mut B, mut len: usize) -> ParseResult<Self>
163    where
164        B: ParseBuf<'p>,
165    {
166        let mut chunks = Vec::with_capacity(2);
167        let total_len = len;
168
169        while len > 0 {
170            let mut chunk = buf.chunk()?;
171            chunk.truncate(len);
172
173            if chunk.len() > 0 {
174                chunks.push(chunk.to_cow());
175            }
176
177            let chunk_len = chunk.len();
178            len -= chunk_len;
179            buf.advance(chunk_len);
180        }
181
182        chunks.reverse();
183
184        Ok(Self {
185            chunks,
186            offset: 0,
187            len: total_len,
188        })
189    }
190
191    pub(crate) fn as_slice(&self) -> Option<&'p [u8]> {
192        if self.chunks.len() != 1 {
193            return None;
194        }
195
196        match &self.chunks[0] {
197            Cow::Borrowed(data) => Some(*data),
198            _ => None,
199        }
200    }
201}
202
203impl<'p> ParseBufCursor<'p> {
204    #[cold]
205    fn advance_slow(&mut self) {
206        while let Some(chunk) = self.chunks.last() {
207            if self.offset < chunk.len() {
208                break;
209            }
210
211            self.offset -= chunk.len();
212            self.chunks.pop();
213        }
214
215        if self.chunks.is_empty() {
216            assert_eq!(self.offset, 0, "advanced past the end of the buffer");
217        }
218    }
219}
220
221unsafe impl<'p> ParseBuf<'p> for ParseBufCursor<'p> {
222    #[inline]
223    fn chunk(&mut self) -> ParseResult<ParseBufChunk<'_, 'p>> {
224        match self.chunks.last().ok_or_else(ParseError::eof)? {
225            Cow::Borrowed(data) => Ok(ParseBufChunk::External(&data[self.offset..])),
226            Cow::Owned(data) => Ok(ParseBufChunk::Temporary(&data[self.offset..])),
227        }
228    }
229
230    #[inline]
231    fn advance(&mut self, count: usize) {
232        self.offset = self
233            .offset
234            .checked_add(count)
235            .expect("advanced past the end of the buffer");
236
237        self.len
238            .checked_sub(count)
239            .expect("advanced past the end of the buffer");
240
241        match self.chunks.last() {
242            Some(chunk) if chunk.len() > self.offset => (),
243            _ => self.advance_slow(),
244        }
245    }
246
247    #[inline]
248    fn remaining_hint(&self) -> Option<usize> {
249        Some(self.len)
250    }
251}
252
253/// A [`ParseBuf`] impl that tracks how many bytes it has been advanced by.
254#[derive(Clone)]
255pub(crate) struct TrackingParseBuf<B> {
256    buf: B,
257    offset: usize,
258}
259
260impl<B> TrackingParseBuf<B> {
261    pub fn new(buf: B) -> Self {
262        Self { buf, offset: 0 }
263    }
264
265    pub fn offset(&self) -> usize {
266        self.offset
267    }
268}
269
270impl<'p> TrackingParseBuf<ParseBufCursor<'p>> {
271    pub(crate) fn as_slice(&self) -> Option<&'p [u8]> {
272        self.buf.as_slice()
273    }
274}
275
276unsafe impl<'p, B> ParseBuf<'p> for TrackingParseBuf<B>
277where
278    B: ParseBuf<'p>,
279{
280    fn chunk(&mut self) -> ParseResult<ParseBufChunk<'_, 'p>> {
281        self.buf.chunk()
282    }
283
284    fn advance(&mut self, count: usize) {
285        self.offset += count;
286        self.buf.advance(count);
287    }
288
289    fn remaining_hint(&self) -> Option<usize> {
290        self.buf.remaining_hint()
291    }
292}
293
294#[cfg(test)]
295mod tests {
296    use super::*;
297
298    struct ChunkBuf<'a>(Vec<&'a [u8]>);
299
300    unsafe impl<'p> ParseBuf<'p> for ChunkBuf<'p> {
301        fn chunk(&mut self) -> ParseResult<ParseBufChunk<'_, 'p>> {
302            self.0
303                .first()
304                .copied()
305                .map(ParseBufChunk::External)
306                .ok_or_else(ParseError::eof)
307        }
308
309        fn advance(&mut self, mut count: usize) {
310            while let Some(chunk) = self.0.first_mut() {
311                if count < chunk.len() {
312                    chunk.advance(count);
313                    break;
314                } else {
315                    count -= chunk.len();
316                    self.0.remove(0);
317                }
318            }
319        }
320    }
321
322    #[test]
323    fn cursor_over_split() {
324        let mut buf = ChunkBuf(vec![b"abcdef", b"012456789"]);
325        let _cursor = ParseBufCursor::new(&mut buf, 8);
326    }
327
328    #[test]
329    fn cursor_zero_split() {
330        let mut buf = ChunkBuf(vec![b"", b"01234"]);
331        let _cursor = ParseBufCursor::new(&mut buf, 4);
332    }
333}