read_byte_slice/
lib.rs

1//! [`ByteSliceIter`] reads bytes from a reader and allows iterating over them as slices with a
2//! maximum length, similar to the [`chunks`] method on slices.
3//!
4//! It is implemented as a [`FallibleStreamingIterator`] so that it can reuse its buffer and not
5//! allocate for each chunk. (That trait is re-exported here for convenience.)
6//!
7//! # Examples
8//! ```
9//! use read_byte_slice::{ByteSliceIter, FallibleStreamingIterator};
10//! use std::fs::File;
11//! # use std::io;
12//! # fn foo() -> io::Result<()> {
13//! let f = File::open("src/lib.rs")?;
14//! // Iterate over the file's contents in 8-byte chunks.
15//! let mut iter = ByteSliceIter::new(f, 8);
16//! while let Some(chunk) = iter.next()? {
17//!     println!("{:?}", chunk);
18//! }
19//! # Ok(())
20//! # }
21//! ```
22//!
23//! [`ByteSliceIter`]: struct.ByteSliceIter.html
24//! [`chunks`]: https://doc.rust-lang.org/std/primitive.slice.html#method.chunks
25//! [`FallibleStreamingIterator`]: ../fallible_streaming_iterator/trait.FallibleStreamingIterator.html
26
27extern crate fallible_streaming_iterator;
28
29// re-export this so callers don't have to explicitly depend on fallible-streaming-iterator.
30pub use fallible_streaming_iterator::FallibleStreamingIterator;
31use std::cmp;
32use std::io::{self, BufRead, BufReader, Read};
33
34// This is internal to the standard library:
35// https://github.com/rust-lang/rust/blob/6ccfe68076abc78392ab9e1d81b5c1a2123af657/src/libstd/sys_common/io.rs#L10
36const DEFAULT_BUF_SIZE: usize = 8 * 1024;
37
38/// An iterator over byte slices from a `Read` that reuses the same buffer instead of allocating.
39///
40/// See the [crate documentation] for example usage.
41///
42/// [crate documentation]: index.html
43pub struct ByteSliceIter<R>
44where
45    R: Read,
46{
47    inner: BufReader<R>,
48    buf: Vec<u8>,
49}
50
51impl<R> ByteSliceIter<R>
52where
53    R: Read,
54{
55    /// Create a new `ByteSliceIter` that reads from `inner` and produces slices of length
56    /// `chunk_len`. If `size` does not divide the total number of bytes read evenly the last
57    /// chunk will not have length `size`.
58    pub fn new(inner: R, size: usize) -> ByteSliceIter<R> {
59        ByteSliceIter {
60            inner: BufReader::with_capacity(cmp::max(size, DEFAULT_BUF_SIZE), inner),
61            // It would be nice to not need the extra buffer here, but there isn't an API to
62            // ask BufReader for its current buffer without reading more, and
63            // `FallibleStreamingIterator::get` doesn't return a `Result`.
64            buf: Vec::with_capacity(size),
65        }
66    }
67}
68
69impl<'a, R> FallibleStreamingIterator for ByteSliceIter<R>
70where
71    R: Read,
72{
73    type Item = [u8];
74    type Error = io::Error;
75
76    fn advance(&mut self) -> Result<(), io::Error> {
77        if self.buf.len() > 0 {
78            self.inner.consume(self.buf.len());
79            self.buf.clear();
80        }
81        let buf = self.inner.fill_buf()?;
82        let cap = self.buf.capacity();
83        self.buf.extend_from_slice(
84            &buf[..cmp::min(buf.len(), cap)],
85        );
86        Ok(())
87    }
88
89    fn get(&self) -> Option<&[u8]> {
90        if self.buf.len() > 0 {
91            Some(self.buf.as_slice())
92        } else {
93            None
94        }
95    }
96}
97
98#[cfg(test)]
99mod tests {
100    use super::*;
101
102    use std::env;
103    use std::env::consts::EXE_EXTENSION;
104    use std::path::Path;
105    use std::process::Command;
106
107    #[test]
108    fn readme_test() {
109        let rustdoc = Path::new("rustdoc").with_extension(EXE_EXTENSION);
110        let readme = Path::new(file!()).parent().unwrap().parent().unwrap().join("README.md");
111        let exe = env::current_exe().unwrap();
112        let outdir = exe.parent().unwrap();
113        let mut cmd = Command::new(rustdoc);
114        cmd.args(&["--verbose", "--test", "-L"])
115            .arg(&outdir)
116            .arg(&readme);
117        println!("{:?}", cmd);
118        let result = cmd.spawn()
119            .expect("Failed to spawn process")
120            .wait()
121            .expect("Failed to run process");
122        assert!(result.success(), "Failed to run rustdoc tests on README.md!");
123    }
124
125    fn sliced(b: &[u8], size: usize) -> Vec<Vec<u8>> {
126        let mut v = vec![];
127        let mut iter = ByteSliceIter::new(b, size);
128        while let Some(chunk) = iter.next().unwrap() {
129            v.push(chunk.to_owned());
130        }
131        v
132    }
133
134    fn test<T: AsRef<[u8]>>(bytes: T, size: usize) {
135        let bytes = bytes.as_ref();
136        let a = sliced(bytes, size);
137        let b = bytes.chunks(size).collect::<Vec<_>>();
138        if a != b {
139            panic!("chunks are not equal!
140read-byte-slice produced {} chunks with lengths: {:?}
141slice.chunks produced {} chunks with lengths: {:?}",
142                   a.len(),
143                   a.iter().map(|c| c.len()).collect::<Vec<_>>(),
144                   b.len(),
145                   b.iter().map(|c| c.len()).collect::<Vec<_>>());
146        }
147    }
148
149    #[test]
150    fn test_simple() {
151        let bytes = b"0123456789abcdef";
152        test(bytes, 4);
153    }
154
155    #[test]
156    fn test_non_even() {
157        let bytes = b"0123456789abcd";
158        test(bytes, 4);
159    }
160
161    #[test]
162    fn test_chunks_larger_than_bufread_default_buffer() {
163        let bytes = (0..DEFAULT_BUF_SIZE * 4).map(|i| (i % 256) as u8).collect::<Vec<u8>>();
164        let size = DEFAULT_BUF_SIZE * 2;
165        test(bytes, size);
166    }
167}