1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
//! Buffered Random Access (BRA) provides easy random memory access to a
//! sequential source of data. This is achieved by greedily retaining all
//! memory read from a given source, or by keeping a way to reset the
//! source to the beginning for multiple passes.
//!
//! # Examples
//! 
//! [`GreedyBufReader`] can be either used as a buffered reader or as
//! a random memory access descriptor. The amount of data read does
//! not influence the relative index of the data unless the method
//! [`clear`] is called.
//! 
//! [`GreedyBufReader`]: ./struct.GreedyBufReader.html
//! [`clear`]: ./struct.GreedyBufReader.html#method.clear
//!
//! ```
//! # use bra::GreedyBufReader;
//! # use std::io::Read;
//! # fn get_reader() -> impl Read {
//! #     std::io::repeat(1)
//! # }
//! # fn run() -> Result<(), Box<dyn std::error::Error>> {
//! let reader = get_reader();
//! let mut reader = GreedyBufReader::new(reader);
//! 
//! // random access to bytes!
//! let k: u8 = reader.get(12)?;
//! // random slicing!
//! let s: &[u8] = reader.slice(20..48)?;
//! assert_eq!(s.len(), 28);
//! // also functions as a buffered reader
//! let mut chunk = [0; 20];
//! reader.read_exact(&mut chunk)?;
//! # Ok(())
//! # }
//! # run().unwrap();
//! ```

use std::io::{BufRead, Error as IoError, ErrorKind as IoErrorKind, Read, Result as IoResult};
use std::ops::Bound;
use std::ops::RangeBounds;

/// A buffered reader that greedily retains all memory read into a buffer.
/// 
/// Like [`std::io::BufReader`], it fetches bytes from the source in bulk to
/// reduce the number of actual reads. Moreover, it provides methods for
/// reading a byte or slice of bytes at an arbitrary position, reading as many
/// bytes as required to reach that position of the data stream, if they are
/// not in memory already. The position indices are always relative to the
/// position of the data source when it was passed to this construct via
/// [`new`] or [`with_capacity`].
/// 
/// [`std::io::BufReader`]: https://doc.rust-lang.org/std/io/struct.BufReader.html
/// [`new`]: ./struct.GreedyBufReader.html#method.new
/// [`with_capacity`]: ./struct.GreedyBufReader.html#method.with_capacity
pub struct GreedyBufReader<R> {
    inner: R,
    buf: Vec<u8>,
    consumed: usize,
}

impl<R> GreedyBufReader<R>
where
    R: Read,
{
    /// Creates a new greedy buffered reader with the given byte source.
    pub fn new(src: R) -> Self {
        GreedyBufReader {
            inner: src,
            buf: Vec::new(),
            consumed: 0,
        }
    }

    /// Creates a new greedy buffered reader with the given byte source and
    /// the specified buffer capacity.
    /// 
    /// The buffer will be able to read approximately `capacity` bytes without
    /// reallocating.
    pub fn with_capacity(src: R, capacity: usize) -> Self {
        GreedyBufReader {
            inner: src,
            buf: Vec::with_capacity(capacity),
            consumed: 0,
        }
    }

    /// Retrieves the internal reader, discarding the buffer in the process.
    ///
    /// Note that any leftover data in the internal buffer is lost.
    pub fn into_inner(self) -> R {
        self.inner
    }

    /// Retrieves the internal buffer in its current state, discarding the
    /// reader in the process.
    pub fn into_buffer(self) -> Vec<u8> {
        self.buf
    }

    /// Fetches a single byte from the buffered data source. 
    pub fn get(&mut self, index: usize) -> IoResult<u8> {
        if let Some(v) = self.buf.get(index) {
            Ok(*v)
        } else {
            self.prefetch_up_to(index + 1)?;

            self.buf
                .get(index)
                .cloned()
                .ok_or_else(|| IoError::new(IoErrorKind::Other, "Index out of bounds"))
        }
    }

    /// Obtains a slice of bytes.
    /// 
    /// The range's end must be bound (e.g. `5..` is not supported).
    /// 
    /// # Error
    /// 
    /// Returns an I/O error if the range is out of the boundaries
    /// 
    /// # Panics
    /// 
    /// Panics if the range is not end bounded.
    pub fn slice<T>(&mut self, range: T) -> IoResult<&[u8]>
    where
        T: Clone,
        T: RangeBounds<usize>,
    {
        let end = range.end_bound();
        let e = match end {
            Bound::Unbounded => {
                unimplemented!("Unbounded end is currently not supported");
            }
            Bound::Excluded(&e) => e,
            Bound::Included(&e) => e + 1,
        };

        let b = match range.start_bound() {
            Bound::Unbounded => 0,
            Bound::Excluded(&b) | Bound::Included(&b) => b,
        };

        self.prefetch_up_to(e)?;

        if b > e || e > self.buf.len() {
            Err(IoError::new(IoErrorKind::Other, "Index out of bounds"))
        } else {
            Ok(&self.buf[b..e])
        }
    }

    /// Clears all memory of past reads, shrinking or freeing the buffer in the
    /// process. The reader will behave as if freshly constructed, save for
    /// already prefetched data, so that no bytes are lost. The following byte
    /// being read becomes the byte at index `#0`.
    pub fn clear(&mut self) {
        if self.consumed < self.buf.len() {
            self.buf = self.buf[self.consumed..].to_vec();
        } else {
            self.buf = Vec::new();
        }
        self.consumed = 0;
    }

    /// Shrinks the internal buffer to minimal capacity.
    pub fn shrink_to_fit(&mut self) {
        self.buf.shrink_to_fit()
    }

    fn reserve_up_to(&mut self, index: usize) {
        let mut new_size = 16;
        while new_size < index || new_size < self.buf.capacity() {
            new_size *= 2;
        }
        let additional = new_size - self.buf.capacity();
        if additional > 0 {
            self.buf.reserve(additional);
        }
    }

    fn data_to_read(&self) -> &[u8] {
        &self.buf[self.consumed..]
    }

    fn prefetch_up_to(&mut self, i: usize) -> IoResult<()> {
        self.reserve_up_to(i);
        let mut l = 0;
        while self.buf.len() <= i {
            let b = self.fill_buf()?;
            if b.len() == l {
                // no extra data since last call, retreat
                break;
            } else {
                // record length, continue fetching
                l = b.len();
            }
        }
        Ok(())
    }
}

impl<R> Read for GreedyBufReader<R>
where
    R: Read,
{
    fn read(&mut self, buf: &mut [u8]) -> IoResult<usize> {
        // we'll be reading from the buffer
        let mut to_read = self.data_to_read();
        if to_read.is_empty() {
            self.fill_buf()?;
            to_read = self.data_to_read();
        }

        let len = usize::min(to_read.len(), buf.len());
        &mut buf[..len].copy_from_slice(&self.buf[self.consumed..self.consumed + len]);
        self.consume(len);
        Ok(len)
    }
}

impl<R> BufRead for GreedyBufReader<R>
where
    R: Read,
{
    fn fill_buf(&mut self) -> IoResult<&[u8]> {
        if self.buf.capacity() == self.consumed {
            self.reserve_up_to(self.buf.capacity() + 16);
        }

        let b = self.buf.len();
        unsafe {
            // safe because it's within the buffer's limits
            // and we won't be reading uninitialized memory
            self.buf.set_len(self.buf.capacity());
        }
        let buf = &mut self.buf[b..];

        match self.inner.read(buf) {
            Ok(o) => {
                // take off the unwritten portion
                self.buf.truncate(b + o);

                Ok(&self.buf[self.consumed..])
            }
            Err(e) => Err(e),
        }
    }

    fn consume(&mut self, amt: usize) {
        self.consumed += amt;
    }
}

#[cfg(test)]
mod tests {
    use super::GreedyBufReader;
    use std::io::Read;
    #[test]
    fn smoke_test() {
        let data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 50];

        let mut read = GreedyBufReader::new(&data[..]);
        let mut o = Vec::new();
        read.read_to_end(&mut o).unwrap();

        assert_eq!(o, &data);
    }

    #[test]
    fn test_get() {
        let data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 50];

        let mut read = GreedyBufReader::new(&data[..]);

        assert_eq!(read.get(1).unwrap(), 2);
        assert_eq!(read.get(2).unwrap(), 3);
        assert_eq!(read.get(16).unwrap(), 50);
        assert_eq!(read.get(10).unwrap(), 11);
        assert!(read.get(17).is_err());
    }

    #[test]
    fn test_slice() {
        let data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 50];

        let mut read = GreedyBufReader::new(&data[..]);

        assert_eq!(read.slice(0..0).unwrap(), &[]);
        assert_eq!(read.slice(1..2).unwrap(), &[2]);
        assert_eq!(read.slice(..=5).unwrap(), &[1, 2, 3, 4, 5, 6]);
        assert_eq!(read.slice(14..=16).unwrap(), &[15, 16, 50]);
        assert_eq!(read.slice(10..12).unwrap(), &[11, 12]);
        assert!(read.slice(7..18).is_err());
        assert!(read.slice(6..5).is_err());
    }

    #[test]
    fn arbitrary_get_infinite() {
        const B: u8 = 0x33;
        let mut read = GreedyBufReader::new(std::io::repeat(B));

        assert_eq!(read.get(4).unwrap(), B);
        assert_eq!(read.get(13).unwrap(), B);
        assert_eq!(read.get(24389).unwrap(), B);
        assert_eq!(read.get(156).unwrap(), B);
        assert_eq!(read.get(9006).unwrap(), B);
        assert_eq!(read.get(2019).unwrap(), B);
        assert_eq!(read.get(100000).unwrap(), B);
    }

    #[test]
    fn test_clear() {
        let data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 50];
        let mut read = GreedyBufReader::new(&data[..]);

        assert_eq!(read.get(0).unwrap(), 1);
        assert_eq!(read.get(8).unwrap(), 9);
        assert_eq!(read.get(16).unwrap(), 50);

        let mut chunk = [0; 8];
        read.read_exact(&mut chunk).unwrap();
        assert_eq!(chunk, [1, 2, 3, 4, 5, 6, 7, 8]);

        assert_eq!(read.get(0).unwrap(), 1);
        assert_eq!(read.get(8).unwrap(), 9);
        assert_eq!(read.get(16).unwrap(), 50);

        read.clear();

        assert_eq!(read.get(0).unwrap(), 9);
        assert_eq!(read.get(8).unwrap(), 50);
        assert!(read.get(16).is_err());

        read.read_exact(&mut chunk).unwrap();
        assert_eq!(chunk, [9, 10, 11, 12, 13, 14, 15, 16]);

        assert_eq!(read.get(0).unwrap(), 9);
        assert_eq!(read.get(8).unwrap(), 50);
        assert!(read.get(16).is_err());
    }
}