xfind/buffer.rs
1use std::cmp;
2use std::io;
3use std::ptr;
4
5/// The default buffer capacity for the stream buffer is 8KB.
6pub const DEFAULT_BUFFER_CAPACITY: usize = 8 * (1 << 10);
7
8/// A fairly simple roll buffer for supporting stream searching.
9#[derive(Debug)]
10pub struct Buffer {
11 /// A fixed-size raw buffer.
12 buf: Vec<u8>,
13 /// The minimum size of the buffer, which is equivalent to the length of the search string.
14 min: usize,
15 /// The end of the contents of this buffer.
16 end: usize,
17}
18
19impl Buffer {
20 /// Creates a new buffer for stream searching.
21 pub fn new(min_buffer_len: usize) -> Buffer {
22 let min = cmp::max(1, min_buffer_len);
23 // The minimum buffer capacity is at least 1 byte bigger than our search string, but for
24 // performance reasons we choose a lower bound of `8 * min`.
25 let capacity = cmp::max(min * 8, DEFAULT_BUFFER_CAPACITY);
26 Buffer { buf: vec![0; capacity], min, end: 0 }
27 }
28
29 /// Returns the minimum size of the buffer.
30 #[inline]
31 pub fn min_buffer_len(&self) -> usize {
32 self.min
33 }
34
35 /// Returns the contents of this buffer.
36 #[inline]
37 pub fn buffer(&self) -> &[u8] {
38 &self.buf[..self.end]
39 }
40
41 /// Returns the total length of the contents in this buffer.
42 #[inline]
43 pub fn len(&self) -> usize {
44 self.end
45 }
46
47 /// Returns all free capactiy in this buffer.
48 fn free_buffer(&mut self) -> &mut [u8] {
49 &mut self.buf[self.end..]
50 }
51
52 /// Refill the contents of this buffer by reading as much as possible into this buffer's free
53 /// capacity. If no more bytes could be read, then this returns false. Otherwise, this reads
54 /// until it has filled the buffer past the minimum amount.
55 pub fn fill<R: io::Read>(&mut self, mut rdr: R) -> io::Result<bool> {
56 let mut readany = false;
57 loop {
58 let bytes_read = rdr.read(self.free_buffer())?;
59 if bytes_read == 0 {
60 return Ok(readany);
61 }
62 readany = true;
63 self.end += bytes_read;
64 if self.len() >= self.min {
65 return Ok(true);
66 }
67 }
68 }
69
70 /// Rolls the contents of the buffer so that the suffix of this buffer is moved to the front
71 /// and all other contents are dropped. The size of the suffix corresponds precisely to the
72 /// minimum buffer length.
73 ///
74 /// This should only be called when the entire contents of this buffer have been searched.
75 pub fn roll(&mut self) {
76 let roll_start = self
77 .end
78 .checked_sub(self.min)
79 .expect("buffer capacity should be bigger than minimum amount.");
80 let roll_len = self.min;
81
82 assert!(roll_start + roll_len <= self.end);
83 unsafe {
84 // SAFETY: A buffer contains Copy data, so there's no problem moving it around. Safety
85 // also depends on our indices being in bounds, which they always should be, given the
86 // assert above.
87 ptr::copy(
88 self.buf[roll_start..].as_ptr(),
89 self.buf.as_mut_ptr(),
90 roll_len,
91 );
92 }
93 self.end = roll_len;
94 }
95}
96
97/// A fairly simple roll buffer for supporting stream searching from the end of a stream.
98#[derive(Debug)]
99pub struct BufferRev {
100 /// A fixed-size raw buffer.
101 buf: Vec<u8>,
102 /// The minimum size of the buffer, which is equivalent to the length of the search string.
103 min: usize,
104 /// The end of the contents of this buffer.
105 end: usize,
106}
107
108impl BufferRev {
109 /// Creates a new buffer for stream searching.
110 pub fn new(min_buffer_len: usize) -> Self {
111 let min = cmp::max(1, min_buffer_len);
112 // The minimum buffer capacity is at least 1 byte bigger than our search string, but for
113 // performance reasons we choose a lower bound of `8 * min`.
114 let capacity = cmp::max(min * 8, DEFAULT_BUFFER_CAPACITY);
115 BufferRev { buf: vec![0; capacity], min, end: 0 }
116 }
117
118 /// Returns the minimum size of the buffer.
119 #[inline]
120 pub fn min_buffer_len(&self) -> usize {
121 self.min
122 }
123
124 /// Returns the capacity of the buffer.
125 #[inline]
126 pub fn capacity(&self) -> usize {
127 self.buf.capacity()
128 }
129
130 /// Returns the contents of this buffer.
131 #[inline]
132 pub fn buffer(&self) -> &[u8] {
133 &self.buf[self.capacity() - self.end..]
134 }
135
136 /// Returns the total length of the contents in this buffer.
137 #[inline]
138 pub fn len(&self) -> usize {
139 self.end
140 }
141
142 /// Returns all free capactiy in this buffer.
143 pub fn free_buffer(&mut self) -> &mut [u8] {
144 let capacity = self.capacity();
145 &mut self.buf[..capacity - self.end]
146 }
147
148 /// Fill the contents of this buffer by reading exactly the given amount into this buffer. If
149 /// there are no more than the given amount of bytes left to read, then this returns false.
150 /// Otherwise, this reads until it has filled the buffer with the given amount of bytes.
151 pub fn fill_exact<R: io::Read>(
152 &mut self,
153 mut rdr: R,
154 amount: usize,
155 ) -> io::Result<bool> {
156 let free_buffer_len = self.free_buffer().len();
157 match rdr
158 .read_exact(&mut self.free_buffer()[free_buffer_len - amount..])
159 {
160 Ok(_) => {
161 self.end += amount;
162 Ok(true)
163 }
164 Err(e) => match e.kind() {
165 io::ErrorKind::UnexpectedEof => Ok(false),
166 _ => Err(e),
167 },
168 }
169 }
170
171 /// Rolls the contents of the buffer so that the prefix of this buffer is moved to the end
172 /// and all other contents are dropped. The size of the prefix corresponds precisely to the
173 /// minimum buffer length.
174 ///
175 /// This should only be called when the entire contents of this buffer have been searched. And
176 /// this should only be called when it cooperates with `fill_exact`.
177 pub fn roll_right(&mut self) {
178 let roll_start = self
179 .end
180 .checked_sub(self.min)
181 .expect("buffer capacity should be bigger than minimum amount.");
182 let roll_len = self.min;
183
184 assert!(roll_start + roll_len <= self.end);
185 unsafe {
186 // SAFETY: A buffer contains Copy data, so there's no problem moving it around. Safety
187 // also depends on our indices being in bounds, which they always should be, given the
188 // assert above.
189 ptr::copy(
190 self.buffer()[..roll_len].as_ptr(),
191 self.buf.as_mut_ptr().add(self.capacity() - roll_len),
192 roll_len,
193 );
194 }
195 self.end = roll_len;
196 }
197}
198
199#[cfg(test)]
200mod tests {
201 use super::*;
202 use std::io::prelude::*;
203 use std::io::{Cursor, SeekFrom};
204
205 #[test]
206 fn test_buffer() {
207 let mut haystack = Cursor::new("0123456789".as_bytes());
208 let mut buf = Buffer::new(2);
209 assert_eq!(buf.min_buffer_len(), 2);
210
211 while buf.fill(&mut haystack).unwrap() {}
212 assert_eq!(buf.buffer(), b"0123456789");
213 assert_eq!(buf.len(), 10);
214
215 buf.roll();
216 assert_eq!(buf.buffer(), "89".as_bytes());
217 assert_eq!(buf.len(), 2);
218 }
219
220 #[test]
221 fn test_buffer_rev() {
222 let mut haystack = Cursor::new("0123456789".as_bytes());
223 let mut buf = BufferRev::new(2);
224 assert_eq!(buf.min_buffer_len(), 2);
225
226 haystack.seek(SeekFrom::End(-4)).unwrap();
227 buf.fill_exact(&mut haystack, 4).unwrap();
228 assert_eq!(buf.buffer(), "6789".as_bytes());
229 assert_eq!(buf.len(), 4);
230
231 buf.roll_right();
232 assert_eq!(buf.buffer(), "67".as_bytes());
233 assert_eq!(buf.len(), 2);
234
235 haystack.seek(SeekFrom::End(-10)).unwrap();
236 buf.fill_exact(&mut haystack, 6).unwrap();
237 assert_eq!(buf.buffer(), "01234567".as_bytes());
238 assert_eq!(buf.len(), 8);
239 }
240}