markable_reader/io/
markable_reader.rs

1use std::io::Write;
2
3use super::{buffer::Buffer, MarkerStream, DEFAULT_MARKER_BUFFER_SIZE};
4
5/// Reads bytes from the inner source with the additional ability
6/// to `mark` a stream at a point that can be returned to later
7/// using the a call to `reset()`. Whlie the stream is marked all
8/// subsequent reads are returned as usual, but are also buffered,
9///
10/// which is what allows for returning to a previous part of the
11/// the stream.
12///
13/// If the inner stream should also be buffered, use `BufferedMarkableStream`,
14/// which may offer a slight optimization over passing a `std::io::BufReader`
15/// as the inner reader to this stream.
16pub struct MarkableReader<R> {
17    inner: R,
18    inner_complete: bool,
19    is_marked: bool,
20    mark_buffer: Buffer,
21}
22
23impl<R> MarkableReader<R>
24where
25    R: std::io::Read,
26{
27    /// Creates a new reader with an unbounded marked buffer
28    ///
29    /// # Example
30    // ```
31    // //create a new reader
32    // let file = std::fs::File::open("path.bin").unwrap();
33    // let mut reader = MarkableReader::new(reader);
34    // // now use anywhere you would use a standard reader
35    // ```
36    pub fn new(inner: R) -> MarkableReader<R> {
37        MarkableReader {
38            inner,
39            inner_complete: false,
40            is_marked: false,
41            mark_buffer: Buffer::new(DEFAULT_MARKER_BUFFER_SIZE, None),
42        }
43    }
44
45    /// Creates a new reader with an limited marked buffer
46    /// Any reads that exceed the provided limit will result in an `std::io::Error(ErrorKind::OutOfMemory)` error
47    /// The use of this is very similar to that of the `std::io::BufReader`
48    ///
49    /// # Example
50    // ```
51    // //create a new reader
52    // let file = std::fs::File::open("path.bin").unwrap();
53    // let mut reader = MarkableReader::new_with_limited_back_buffer(reader, 1024 /*1KB back buffer*/);
54    // // now use anywhere you would use a standard reader
55    // ```
56    pub fn new_with_limited_back_buffer(inner: R, limit: usize) -> MarkableReader<R> {
57        MarkableReader {
58            inner,
59            inner_complete: false,
60            is_marked: false,
61            mark_buffer: Buffer::new(DEFAULT_MARKER_BUFFER_SIZE, Some(limit)),
62        }
63    }
64
65    /// Creates a new reader using the provided capacities as the initial capacity and limit.
66    /// Any reads that exceed the provided limit will result in an `std::io::Error(ErrorKind::OutOfMemory)` error
67    ///
68    /// # Example
69    // ```
70    // //create a new reader
71    // let file = std::fs::File::open("path.bin").unwrap();
72    // let mut reader = MarkableReader::new_with_capacity_and_limit(reader, 1024 /*1KB back buffer capacity and limit */, 1024 /* 1KB reader buffer capacity */);
73    // // now use anywhere you would use a standard reader
74    // ```
75    pub fn new_with_capacity_and_limit(
76        inner: R,
77        capacity: usize,
78        limit: usize,
79    ) -> MarkableReader<R> {
80        MarkableReader {
81            inner,
82            inner_complete: false,
83            is_marked: false,
84            mark_buffer: Buffer::new(capacity, Some(limit)),
85        }
86    }
87
88    /// Returns the inner reader. **IMPORTANT** this will likely result in data loss
89    /// of whatever data has been read into the buffer
90    pub fn into_inner(self) -> R {
91        self.inner
92    }
93
94    /// Reads at most `buf.len()` bytes from the underlying buffers to fill the provided buffer.
95    fn read_into_buf(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
96        // If marked, then we only read from the read buffer and all
97        // read bytes go in the mark buffer.
98        // If not marked, we read what we can from the mark buffer and then read the remaining
99        // bytes from the underlying reader.
100        if self.is_marked {
101            // First grab what we can from the mark buffer
102            let buffer_bytes_read = self.mark_buffer.read_into(buf, 0);
103            // Then fill and retain remaining from the inner reader
104            let inner_bytes_read =
105                self.read_data_into_buf_and_marked_stream(buf, buffer_bytes_read)?;
106            Ok(inner_bytes_read + buffer_bytes_read)
107        } else {
108            // Otherwise, read what we can from the mark buffer and then go to inner reader
109            // for any remaining bytes
110            let mut bytes_read = self.mark_buffer.read_into(buf, 0);
111            bytes_read += self.fill_from_inner(buf, bytes_read)?;
112
113            if bytes_read == 0 {
114                Err(std::io::Error::from(std::io::ErrorKind::UnexpectedEof))
115            } else {
116                Ok(bytes_read)
117            }
118        }
119    }
120
121    /// Fills the provided buffer with bytes from the underlying stream and also places those
122    /// bytes into the mark buffer
123    fn read_data_into_buf_and_marked_stream(
124        &mut self,
125        buf: &mut [u8],
126        offset: usize,
127    ) -> std::io::Result<usize> {
128        let inner_bytes_read = self.fill_from_inner(buf, offset)?;
129        if inner_bytes_read > 0 {
130            // Inner the inner bytes read will be last n bytes that were read from into the buffer
131            let inner_bytes = &buf[buf.len() - inner_bytes_read..buf.len()];
132            self.mark_buffer.write(inner_bytes)?;
133        }
134
135        Ok(inner_bytes_read)
136    }
137
138    /// Fills the provided buffer with bytes from the read buffer starting with at the provided offset
139    fn fill_from_inner(&mut self, buf: &mut [u8], offset: usize) -> std::io::Result<usize> {
140        if self.inner_complete {
141            return Ok(0);
142        }
143
144        let mut read = 0;
145        let mut single_byte_buf = vec![0; 1];
146        while read + offset < buf.len() {
147            let current_read = self.inner.read(&mut single_byte_buf)?;
148            if current_read > 0 {
149                buf[read + offset] = single_byte_buf[0];
150                read += 1;
151            } else {
152                return Err(std::io::Error::from(std::io::ErrorKind::UnexpectedEof));
153            }
154        }
155
156        Ok(read)
157    }
158}
159
160impl<R> MarkerStream for MarkableReader<R> {
161    /// Marks the location of the inner stream. From tis point forward
162    /// reads will be cached. If the stream was marked prior to this call
163    /// the current buffer will be discarded.
164    ///
165    /// Returns the number of bytes that were discarded as a result of this operation
166    fn mark(&mut self) -> usize {
167        self.is_marked = true;
168        self.mark_buffer.purge_read()
169    }
170
171    /// Resets the stream previously marked position, if it is set.
172    /// If the reader was not previously marked, this has no affect.
173    ///
174    fn reset(&mut self) {
175        self.is_marked = false;
176        self.mark_buffer.restart();
177    }
178
179    fn clear_buffer(&mut self) {
180        self.is_marked = false;
181        self.mark_buffer.clear();
182    }
183}
184
185impl<R> std::io::Read for MarkableReader<R>
186where
187    R: std::io::Read,
188{
189    fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
190        self.read_into_buf(buf)
191    }
192}
193
194impl<R> From<R> for MarkableReader<R>
195where
196    R: std::io::Read,
197{
198    fn from(value: R) -> Self {
199        MarkableReader::new(value)
200    }
201}
202
203#[cfg(test)]
204mod tests {
205    use std::io::{Cursor, Read};
206
207    use crate::io::MarkerStream;
208
209    use super::MarkableReader;
210
211    #[test]
212    fn test_basic_read() {
213        let input_data = vec![0, 1, 2, 3];
214        let data = Cursor::new(input_data.clone());
215        let mut reader = MarkableReader::new(data);
216
217        let mut read_buf = vec![0; input_data.len()];
218        reader
219            .read_exact(&mut read_buf)
220            .expect("should be able to read bytes back");
221        assert_eq!(
222            input_data, read_buf,
223            "read buffer and input buffer should match"
224        );
225    }
226
227    #[test]
228    fn test_marked_read() {
229        let input_data = vec![0, 1, 2, 3];
230        let data = Cursor::new(input_data.clone());
231        let mut reader = MarkableReader::new(data);
232
233        let mut single_byte_buf = vec![0];
234        reader
235            .read_exact(&mut single_byte_buf)
236            .expect("should be able to read single byte");
237
238        assert_eq!(0, reader.mark(), "no bytes should be wasted");
239
240        let mut rest_of_buf = vec![0; input_data.len() - 1];
241        reader
242            .read_exact(&mut rest_of_buf)
243            .expect("should be able to read rest of buffer");
244
245        reader.reset();
246        rest_of_buf = vec![0; input_data.len() - 1];
247
248        reader
249            .read_exact(&mut rest_of_buf)
250            .expect("should be able to read rest of buffer again after reset");
251
252        assert_eq!(
253            input_data[1..],
254            rest_of_buf,
255            "buffer should be last 3 bytes"
256        );
257    }
258
259    #[test]
260    fn test_back_buffer_and_read_buffer_read() {
261        let input_data = vec![0, 1, 2, 3];
262        let data = Cursor::new(input_data.clone());
263        let mut reader = MarkableReader::new(data);
264
265        let mut half_buf = vec![0; input_data.len() / 2];
266        reader.mark();
267        reader
268            .read_exact(&mut half_buf)
269            .expect("should be able to read half the buffer");
270
271        reader.reset();
272        let mut whole_buf = vec![0; input_data.len()];
273
274        reader
275            .read_exact(&mut whole_buf)
276            .expect("should be able to whole buffer");
277
278        assert_eq!(
279            input_data, whole_buf,
280            "input data and whole buf should match"
281        );
282    }
283
284    #[test]
285    fn test_read_with_popping_bytes() {
286        let input_data = vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9];
287        let data = Cursor::new(input_data.clone());
288        let mut reader = MarkableReader::new(data);
289        let mut single_byte_buffer = vec![0_u8; 1];
290
291        for i in 0..input_data.len() - 1 {
292            reader.mark();
293            let expected = input_data[i..i + 2].to_vec();
294            let mut actual = [0_u8; 2];
295            reader
296                .read_exact(&mut actual)
297                .expect("should always be able to read 2 bytes");
298            assert_eq!(
299                expected, actual,
300                "bytes at index {i} should be {expected:?} but were {actual:?}"
301            );
302
303            reader.reset();
304            reader
305                .read_exact(&mut single_byte_buffer)
306                .expect("should be able to read single byte");
307            assert_eq!(
308                single_byte_buffer[0], input_data[i],
309                "popped byte at index {i} should be {i} but was {}",
310                single_byte_buffer[0]
311            );
312        }
313    }
314}