xfind/
buffer.rs

1use std::cmp;
2use std::io;
3use std::ptr;
4
5/// The default buffer capacity for the stream buffer is 8KB.
6pub const DEFAULT_BUFFER_CAPACITY: usize = 8 * (1 << 10);
7
8/// A fairly simple roll buffer for supporting stream searching.
9#[derive(Debug)]
10pub struct Buffer {
11    /// A fixed-size raw buffer.
12    buf: Vec<u8>,
13    /// The minimum size of the buffer, which is equivalent to the length of the search string.
14    min: usize,
15    /// The end of the contents of this buffer.
16    end: usize,
17}
18
19impl Buffer {
20    /// Creates a new buffer for stream searching.
21    pub fn new(min_buffer_len: usize) -> Buffer {
22        let min = cmp::max(1, min_buffer_len);
23        // The minimum buffer capacity is at least 1 byte bigger than our search string, but for
24        // performance reasons we choose a lower bound of `8 * min`.
25        let capacity = cmp::max(min * 8, DEFAULT_BUFFER_CAPACITY);
26        Buffer { buf: vec![0; capacity], min, end: 0 }
27    }
28
29    /// Returns the minimum size of the buffer.
30    #[inline]
31    pub fn min_buffer_len(&self) -> usize {
32        self.min
33    }
34
35    /// Returns the contents of this buffer.
36    #[inline]
37    pub fn buffer(&self) -> &[u8] {
38        &self.buf[..self.end]
39    }
40
41    /// Returns the total length of the contents in this buffer.
42    #[inline]
43    pub fn len(&self) -> usize {
44        self.end
45    }
46
47    /// Returns all free capactiy in this buffer.
48    fn free_buffer(&mut self) -> &mut [u8] {
49        &mut self.buf[self.end..]
50    }
51
52    /// Refill the contents of this buffer by reading as much as possible into this buffer's free
53    /// capacity. If no more bytes could be read, then this returns false. Otherwise, this reads
54    /// until it has filled the buffer past the minimum amount.
55    pub fn fill<R: io::Read>(&mut self, mut rdr: R) -> io::Result<bool> {
56        let mut readany = false;
57        loop {
58            let bytes_read = rdr.read(self.free_buffer())?;
59            if bytes_read == 0 {
60                return Ok(readany);
61            }
62            readany = true;
63            self.end += bytes_read;
64            if self.len() >= self.min {
65                return Ok(true);
66            }
67        }
68    }
69
70    /// Rolls the contents of the buffer so that the suffix of this buffer is moved to the front
71    /// and all other contents are dropped. The size of the suffix corresponds precisely to the
72    /// minimum buffer length.
73    ///
74    /// This should only be called when the entire contents of this buffer have been searched.
75    pub fn roll(&mut self) {
76        let roll_start = self
77            .end
78            .checked_sub(self.min)
79            .expect("buffer capacity should be bigger than minimum amount.");
80        let roll_len = self.min;
81
82        assert!(roll_start + roll_len <= self.end);
83        unsafe {
84            // SAFETY: A buffer contains Copy data, so there's no problem moving it around. Safety
85            // also depends on our indices being in bounds, which they always should be, given the
86            // assert above.
87            ptr::copy(
88                self.buf[roll_start..].as_ptr(),
89                self.buf.as_mut_ptr(),
90                roll_len,
91            );
92        }
93        self.end = roll_len;
94    }
95}
96
97/// A fairly simple roll buffer for supporting stream searching from the end of a stream.
98#[derive(Debug)]
99pub struct BufferRev {
100    /// A fixed-size raw buffer.
101    buf: Vec<u8>,
102    /// The minimum size of the buffer, which is equivalent to the length of the search string.
103    min: usize,
104    /// The end of the contents of this buffer.
105    end: usize,
106}
107
108impl BufferRev {
109    /// Creates a new buffer for stream searching.
110    pub fn new(min_buffer_len: usize) -> Self {
111        let min = cmp::max(1, min_buffer_len);
112        // The minimum buffer capacity is at least 1 byte bigger than our search string, but for
113        // performance reasons we choose a lower bound of `8 * min`.
114        let capacity = cmp::max(min * 8, DEFAULT_BUFFER_CAPACITY);
115        BufferRev { buf: vec![0; capacity], min, end: 0 }
116    }
117
118    /// Returns the minimum size of the buffer.
119    #[inline]
120    pub fn min_buffer_len(&self) -> usize {
121        self.min
122    }
123
124    /// Returns the capacity of the buffer.
125    #[inline]
126    pub fn capacity(&self) -> usize {
127        self.buf.capacity()
128    }
129
130    /// Returns the contents of this buffer.
131    #[inline]
132    pub fn buffer(&self) -> &[u8] {
133        &self.buf[self.capacity() - self.end..]
134    }
135
136    /// Returns the total length of the contents in this buffer.
137    #[inline]
138    pub fn len(&self) -> usize {
139        self.end
140    }
141
142    /// Returns all free capactiy in this buffer.
143    pub fn free_buffer(&mut self) -> &mut [u8] {
144        let capacity = self.capacity();
145        &mut self.buf[..capacity - self.end]
146    }
147
148    /// Fill the contents of this buffer by reading exactly the given amount into this buffer. If
149    /// there are no more than the given amount of bytes left to read, then this returns false.
150    /// Otherwise, this reads until it has filled the buffer with the given amount of bytes.
151    pub fn fill_exact<R: io::Read>(
152        &mut self,
153        mut rdr: R,
154        amount: usize,
155    ) -> io::Result<bool> {
156        let free_buffer_len = self.free_buffer().len();
157        match rdr
158            .read_exact(&mut self.free_buffer()[free_buffer_len - amount..])
159        {
160            Ok(_) => {
161                self.end += amount;
162                Ok(true)
163            }
164            Err(e) => match e.kind() {
165                io::ErrorKind::UnexpectedEof => Ok(false),
166                _ => Err(e),
167            },
168        }
169    }
170
171    /// Rolls the contents of the buffer so that the prefix of this buffer is moved to the end
172    /// and all other contents are dropped. The size of the prefix corresponds precisely to the
173    /// minimum buffer length.
174    ///
175    /// This should only be called when the entire contents of this buffer have been searched. And
176    /// this should only be called when it cooperates with `fill_exact`.
177    pub fn roll_right(&mut self) {
178        let roll_start = self
179            .end
180            .checked_sub(self.min)
181            .expect("buffer capacity should be bigger than minimum amount.");
182        let roll_len = self.min;
183
184        assert!(roll_start + roll_len <= self.end);
185        unsafe {
186            // SAFETY: A buffer contains Copy data, so there's no problem moving it around. Safety
187            // also depends on our indices being in bounds, which they always should be, given the
188            // assert above.
189            ptr::copy(
190                self.buffer()[..roll_len].as_ptr(),
191                self.buf.as_mut_ptr().add(self.capacity() - roll_len),
192                roll_len,
193            );
194        }
195        self.end = roll_len;
196    }
197}
198
199#[cfg(test)]
200mod tests {
201    use super::*;
202    use std::io::prelude::*;
203    use std::io::{Cursor, SeekFrom};
204
205    #[test]
206    fn test_buffer() {
207        let mut haystack = Cursor::new("0123456789".as_bytes());
208        let mut buf = Buffer::new(2);
209        assert_eq!(buf.min_buffer_len(), 2);
210
211        while buf.fill(&mut haystack).unwrap() {}
212        assert_eq!(buf.buffer(), b"0123456789");
213        assert_eq!(buf.len(), 10);
214
215        buf.roll();
216        assert_eq!(buf.buffer(), "89".as_bytes());
217        assert_eq!(buf.len(), 2);
218    }
219
220    #[test]
221    fn test_buffer_rev() {
222        let mut haystack = Cursor::new("0123456789".as_bytes());
223        let mut buf = BufferRev::new(2);
224        assert_eq!(buf.min_buffer_len(), 2);
225
226        haystack.seek(SeekFrom::End(-4)).unwrap();
227        buf.fill_exact(&mut haystack, 4).unwrap();
228        assert_eq!(buf.buffer(), "6789".as_bytes());
229        assert_eq!(buf.len(), 4);
230
231        buf.roll_right();
232        assert_eq!(buf.buffer(), "67".as_bytes());
233        assert_eq!(buf.len(), 2);
234
235        haystack.seek(SeekFrom::End(-10)).unwrap();
236        buf.fill_exact(&mut haystack, 6).unwrap();
237        assert_eq!(buf.buffer(), "01234567".as_bytes());
238        assert_eq!(buf.len(), 8);
239    }
240}