use std::cmp::min;
use std::io::{self, BufRead, ErrorKind, Read, Seek, SeekFrom};
const DEFAULT_BUFFER_SIZE: usize = 64 * 1024;
#[allow(clippy::module_name_repetitions)]
pub struct SkippingBufReader<F: Read + Seek> {
file: F,
max_offset: Option<usize>,
buffer: Vec<u8>,
buffer_size: usize,
initial_buffer_size: usize,
valid_bytes_in_buffer: usize,
read_position_in_buffer: usize,
}
impl<F: Read + Seek> SkippingBufReader<F> {
pub fn new(file: F, start_offset: usize, max_offset: Option<usize>) -> Self {
Self::with_buffer_size(DEFAULT_BUFFER_SIZE, file, start_offset, max_offset)
}
pub fn with_buffer_size(
buffer_size: usize,
mut file: F,
start_offset: usize,
max_offset: Option<usize>,
) -> Self {
file.seek(SeekFrom::Start(start_offset as u64))
.expect("failed to seek to given start offset");
Self {
file,
max_offset,
buffer: Vec::with_capacity(buffer_size),
buffer_size,
initial_buffer_size: buffer_size,
valid_bytes_in_buffer: 0,
read_position_in_buffer: 0,
}
}
fn refill_buffer(&mut self) -> io::Result<usize> {
loop {
if let Some(max_offset) = self.max_offset {
let seek_position = usize::try_from(self.file.stream_position()?).unwrap();
let bytes_to_max_offset = max_offset - seek_position;
self.buffer_size = min(self.buffer_size, bytes_to_max_offset);
}
self.buffer.resize(self.buffer_size, 0);
match self.file.read(&mut self.buffer) {
Ok(0) => {
log::debug!("Reached EOF or maximum offset.");
return Ok(0);
}
Ok(bytes_read) => {
self.buffer_size = self.initial_buffer_size;
self.valid_bytes_in_buffer = bytes_read;
self.read_position_in_buffer = 0;
return Ok(bytes_read);
}
Err(e) if e.kind() == ErrorKind::PermissionDenied => {
if self.buffer_size > 1 {
self.buffer_size /= 2;
} else {
self.file.seek_relative(1)?;
}
}
Err(e) => return Err(e),
}
}
}
#[must_use]
pub fn position_in_file(&mut self) -> usize {
let seek_position = self
.file
.stream_position()
.expect("obtaining the current seek position should always succeed");
let unread_bytes_in_buffer = self.valid_bytes_in_buffer - self.read_position_in_buffer;
usize::try_from(seek_position).unwrap() - unread_bytes_in_buffer
}
}
impl<F: Read + Seek> BufRead for SkippingBufReader<F> {
fn fill_buf(&mut self) -> io::Result<&[u8]> {
if self.read_position_in_buffer >= self.valid_bytes_in_buffer {
log::trace!("No unread bytes in buffer; refilling it...");
let bytes_read = self.refill_buffer()?;
log::trace!("Refilled buffer with {bytes_read} bytes.");
if bytes_read == 0 {
return Ok(&[]);
}
}
let unread_bytes = &self.buffer[self.read_position_in_buffer..self.valid_bytes_in_buffer];
Ok(unread_bytes)
}
fn consume(&mut self, amt: usize) {
self.read_position_in_buffer += amt;
}
}
impl<F: Read + Seek> Read for SkippingBufReader<F> {
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
let mut available_bytes = self.fill_buf()?;
let no_of_bytes_read = available_bytes.read(buf)?;
self.consume(no_of_bytes_read);
Ok(no_of_bytes_read)
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Cursor;
#[test]
fn position_in_file_returns_expected_position() {
let file = Cursor::new("abcdefghijklmnopqrst");
let mut reader = SkippingBufReader::with_buffer_size(8, file, 0, None);
let mut data = [0; 18];
reader.read_exact(&mut data).unwrap();
assert_eq!(reader.position_in_file(), 18);
}
#[test]
fn stops_reading_at_max_offset_if_specified() {
let file = Cursor::new("abcdefghijklmnopqrst");
let mut reader = SkippingBufReader::new(file, 0, Some(10));
let mut data = Vec::new();
reader.read_to_end(&mut data).unwrap();
assert_eq!(data, b"abcdefghij");
}
struct CursorWithError {
inner: Cursor<Vec<u8>>,
bad_byte_position: usize,
error_kind: ErrorKind,
}
impl Read for CursorWithError {
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
let current_position = usize::try_from(self.inner.position()).unwrap();
let read_end_position = current_position + buf.len();
if (current_position..read_end_position).contains(&self.bad_byte_position) {
return Err(io::Error::from(self.error_kind));
}
self.inner.read(buf)
}
}
impl Seek for CursorWithError {
fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> {
self.inner.seek(pos)
}
}
#[test]
fn skips_byte_for_which_read_returns_a_permission_error() {
let file = CursorWithError {
inner: Cursor::new("abcdefghijklmnopqrst".as_bytes().to_vec()),
bad_byte_position: 4,
error_kind: ErrorKind::PermissionDenied,
};
let mut reader = SkippingBufReader::new(file, 0, None);
let mut data = Vec::new();
reader.read_to_end(&mut data).unwrap();
assert_eq!(data, b"abcdfghijklmnopqrst");
}
#[test]
fn does_not_ignore_error_kinds_other_than_permission_denied() {
let file = CursorWithError {
inner: Cursor::new("abcdefghijklmnopqrst".as_bytes().to_vec()),
bad_byte_position: 4,
error_kind: ErrorKind::ResourceBusy,
};
let mut reader = SkippingBufReader::new(file, 0, None);
let mut data = Vec::new();
let result = reader.read_to_end(&mut data);
assert_eq!(result.map_err(|e| e.kind()), Err(ErrorKind::ResourceBusy));
}
}