1use std::cmp::min;
9use std::io::{self, BufRead, ErrorKind, Read, Seek, SeekFrom};
10
11const DEFAULT_BUFFER_SIZE: usize = 64 * 1024;
13
14#[allow(clippy::module_name_repetitions)]
26pub struct SkippingBufReader<F: Read + Seek> {
27 file: F,
28
29 max_offset: Option<usize>,
32
33 buffer: Vec<u8>,
34 buffer_size: usize,
36 initial_buffer_size: usize,
38
39 valid_bytes_in_buffer: usize,
45
46 read_position_in_buffer: usize,
52}
53
54impl<F: Read + Seek> SkippingBufReader<F> {
55 pub fn new(file: F, start_offset: usize, max_offset: Option<usize>) -> Self {
56 Self::with_buffer_size(DEFAULT_BUFFER_SIZE, file, start_offset, max_offset)
57 }
58
59 pub fn with_buffer_size(
66 buffer_size: usize,
67 mut file: F,
68 start_offset: usize,
69 max_offset: Option<usize>,
70 ) -> Self {
71 file.seek(SeekFrom::Start(start_offset as u64))
72 .expect("failed to seek to given start offset");
73
74 Self {
75 file,
76 max_offset,
77
78 buffer: Vec::with_capacity(buffer_size),
79 buffer_size,
80 initial_buffer_size: buffer_size,
81
82 valid_bytes_in_buffer: 0,
83 read_position_in_buffer: 0,
84 }
85 }
86
87 fn refill_buffer(&mut self) -> io::Result<usize> {
88 loop {
89 if let Some(max_offset) = self.max_offset {
93 let seek_position = usize::try_from(self.file.stream_position()?).unwrap();
94 let bytes_to_max_offset = max_offset - seek_position;
95 self.buffer_size = min(self.buffer_size, bytes_to_max_offset);
96 }
97
98 self.buffer.resize(self.buffer_size, 0);
99
100 match self.file.read(&mut self.buffer) {
101 Ok(0) => {
102 log::debug!("Reached EOF or maximum offset.");
103 return Ok(0);
104 }
105 Ok(bytes_read) => {
106 self.buffer_size = self.initial_buffer_size;
107
108 self.valid_bytes_in_buffer = bytes_read;
109 self.read_position_in_buffer = 0;
110
111 return Ok(bytes_read);
112 }
113 Err(e) if e.kind() == ErrorKind::PermissionDenied => {
114 if self.buffer_size > 1 {
115 self.buffer_size /= 2;
116 } else {
117 self.file.seek_relative(1)?;
119 }
120 }
121 Err(e) => return Err(e),
122 }
123 }
124 }
125
126 #[must_use]
140 pub fn position_in_file(&mut self) -> usize {
141 let seek_position = self
142 .file
143 .stream_position()
144 .expect("obtaining the current seek position should always succeed");
145 let unread_bytes_in_buffer = self.valid_bytes_in_buffer - self.read_position_in_buffer;
146
147 usize::try_from(seek_position).unwrap() - unread_bytes_in_buffer
148 }
149}
150
151impl<F: Read + Seek> BufRead for SkippingBufReader<F> {
152 fn fill_buf(&mut self) -> io::Result<&[u8]> {
153 if self.read_position_in_buffer >= self.valid_bytes_in_buffer {
154 log::trace!("No unread bytes in buffer; refilling it...");
155 let bytes_read = self.refill_buffer()?;
156 log::trace!("Refilled buffer with {bytes_read} bytes.");
157
158 if bytes_read == 0 {
159 return Ok(&[]);
160 }
161 }
162
163 let unread_bytes = &self.buffer[self.read_position_in_buffer..self.valid_bytes_in_buffer];
164 Ok(unread_bytes)
165 }
166
167 fn consume(&mut self, amt: usize) {
168 self.read_position_in_buffer += amt;
169 }
170}
171
172impl<F: Read + Seek> Read for SkippingBufReader<F> {
173 fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
174 let mut available_bytes = self.fill_buf()?;
177 let no_of_bytes_read = available_bytes.read(buf)?;
179 self.consume(no_of_bytes_read);
182
183 Ok(no_of_bytes_read)
184 }
185}
186
187#[cfg(test)]
188mod tests {
189 use super::*;
190 use std::io::Cursor;
191
192 #[test]
193 fn position_in_file_returns_expected_position() {
194 let file = Cursor::new("abcdefghijklmnopqrst");
195 let mut reader = SkippingBufReader::with_buffer_size(8, file, 0, None);
196
197 let mut data = [0; 18];
201 reader.read_exact(&mut data).unwrap();
202 assert_eq!(reader.position_in_file(), 18);
203 }
204
205 #[test]
206 fn stops_reading_at_max_offset_if_specified() {
207 let file = Cursor::new("abcdefghijklmnopqrst");
208 let mut reader = SkippingBufReader::new(file, 0, Some(10));
209
210 let mut data = Vec::new();
211 reader.read_to_end(&mut data).unwrap();
212 assert_eq!(data, b"abcdefghij");
213 }
214
215 struct CursorWithError {
216 inner: Cursor<Vec<u8>>,
217 bad_byte_position: usize,
218 error_kind: ErrorKind,
219 }
220
221 impl Read for CursorWithError {
222 fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
223 let current_position = usize::try_from(self.inner.position()).unwrap();
224 let read_end_position = current_position + buf.len();
225
226 if (current_position..read_end_position).contains(&self.bad_byte_position) {
227 return Err(io::Error::from(self.error_kind));
228 }
229
230 self.inner.read(buf)
231 }
232 }
233
234 impl Seek for CursorWithError {
235 fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> {
236 self.inner.seek(pos)
237 }
238 }
239
240 #[test]
241 fn skips_byte_for_which_read_returns_a_permission_error() {
242 let file = CursorWithError {
243 inner: Cursor::new("abcdefghijklmnopqrst".as_bytes().to_vec()),
244 bad_byte_position: 4,
245 error_kind: ErrorKind::PermissionDenied,
246 };
247 let mut reader = SkippingBufReader::new(file, 0, None);
248
249 let mut data = Vec::new();
250 reader.read_to_end(&mut data).unwrap();
251 assert_eq!(data, b"abcdfghijklmnopqrst");
252 }
253
254 #[test]
255 fn does_not_ignore_error_kinds_other_than_permission_denied() {
256 let file = CursorWithError {
257 inner: Cursor::new("abcdefghijklmnopqrst".as_bytes().to_vec()),
258 bad_byte_position: 4,
259 error_kind: ErrorKind::ResourceBusy,
260 };
261 let mut reader = SkippingBufReader::new(file, 0, None);
262
263 let mut data = Vec::new();
264 let result = reader.read_to_end(&mut data);
265 assert_eq!(result.map_err(|e| e.kind()), Err(ErrorKind::ResourceBusy));
266 }
267}