//! Buffering wrappers for I/O traits
//! This is copied from https://doc.rust-lang.org/src/std/io/buffered.rs.html
//! It is modified to allow progress indicators like progress bars.
use std::io::prelude::*;
use std::cmp;
use std::fmt;
use std::io::{self, IoSliceMut, SeekFrom};
#[allow(unused_imports)]
use log::{debug, error, info, trace, warn};
// Rust default is 8kb, default here at 20kb
const DEFAULT_BUF_SIZE: usize = 20 * 1024;
pub struct BufReader<R, T> {
inner: R,
buf: Box<[u8]>,
dec_buf: Box<[u8]>,
pos: usize,
cap: usize,
progress: Progress<T>,
}
#[derive(Debug)]
pub struct Progress<T> {
pub updater: T,
}
pub trait ProgressUpdater {
fn update(&mut self, value: u64);
}
impl<R: Read, T> BufReader<R, T> {
/// Creates a new `BufReader<R>` with a default buffer capacity. The default is currently 8 KB,
/// but may change in the future.
///
/// # Examples
///
/// ```no_run
/// use std::io::BufReader;
/// use std::fs::File;
///
/// fn main() -> std::io::Result<()> {
/// let f = File::open("log.txt")?;
/// let reader = BufReader::new(f);
/// Ok(())
/// }
/// ```
pub fn new(inner: R, progress: Progress<T>) -> BufReader<R, T> {
BufReader::with_capacity(DEFAULT_BUF_SIZE, inner, progress)
}
pub fn with_capacity(capacity: usize, inner: R, progress: Progress<T>) -> BufReader<R, T> {
let mut buffer = Vec::with_capacity(capacity);
let mut decoded_buffer = Vec::with_capacity(capacity * 3);
buffer.resize(capacity, 0x00);
decoded_buffer.resize(capacity * 3, 0x00);
// inner.initializer().initialize(&mut buffer);
BufReader {
inner,
buf: buffer.into_boxed_slice(),
dec_buf: decoded_buffer.into_boxed_slice(),
pos: 0,
cap: 0,
progress,
}
}
}
impl<R, T> BufReader<R, T> {
/// Gets a reference to the underlying reader.
///
/// It is inadvisable to directly read from the underlying reader.
///
/// # Examples
///
/// ```no_run
/// use std::io::BufReader;
/// use std::fs::File;
///
/// fn main() -> std::io::Result<()> {
/// let f1 = File::open("log.txt")?;
/// let reader = BufReader::new(f1);
///
/// let f2 = reader.get_ref();
/// Ok(())
/// }
/// ```
pub fn get_ref(&self) -> &R {
&self.inner
}
/// Gets a mutable reference to the underlying reader.
///
/// It is inadvisable to directly read from the underlying reader.
///
/// # Examples
///
/// ```no_run
/// use std::io::BufReader;
/// use std::fs::File;
///
/// fn main() -> std::io::Result<()> {
/// let f1 = File::open("log.txt")?;
/// let mut reader = BufReader::new(f1);
///
/// let f2 = reader.get_mut();
/// Ok(())
/// }
/// ```
pub fn get_mut(&mut self) -> &mut R {
&mut self.inner
}
/// Returns a reference to the internally buffered data.
///
/// Unlike `fill_buf`, this will not attempt to fill the buffer if it is empty.
///
/// # Examples
///
/// ```no_run
/// use std::io::{BufReader, BufRead};
/// use std::fs::File;
///
/// fn main() -> std::io::Result<()> {
/// let f = File::open("log.txt")?;
/// let mut reader = BufReader::new(f);
/// assert!(reader.buffer().is_empty());
///
/// if reader.fill_buf()?.len() > 0 {
/// assert!(!reader.buffer().is_empty());
/// }
/// Ok(())
/// }
/// ```
pub fn buffer(&self) -> &[u8] {
&self.buf[self.pos..self.cap]
}
/// Returns the number of bytes the internal buffer can hold at once.
///
/// # Examples
///
/// ```no_run
/// #![feature(buffered_io_capacity)]
/// use std::io::{BufReader, BufRead};
/// use std::fs::File;
///
/// fn main() -> std::io::Result<()> {
/// let f = File::open("log.txt")?;
/// let mut reader = BufReader::new(f);
///
/// let capacity = reader.capacity();
/// let buffer = reader.fill_buf()?;
/// assert!(buffer.len() <= capacity);
/// Ok(())
/// }
/// ```
pub fn capacity(&self) -> usize {
self.buf.len()
}
/// Unwraps this `BufReader<R>`, returning the underlying reader.
///
/// Note that any leftover data in the internal buffer is lost. Therefore,
/// a following read from the underlying reader may lead to data loss.
///
/// # Examples
///
/// ```no_run
/// use std::io::BufReader;
/// use std::fs::File;
///
/// fn main() -> std::io::Result<()> {
/// let f1 = File::open("log.txt")?;
/// let reader = BufReader::new(f1);
///
/// let f2 = reader.into_inner();
/// Ok(())
/// }
/// ```
pub fn into_inner(self) -> R {
self.inner
}
/// Invalidates all data in the internal buffer.
#[inline]
fn discard_buffer(&mut self) {
self.pos = 0;
self.cap = 0;
}
}
// impl<R: Seek, T> BufReader<R, T> {
// // Not Implemented
// }
impl<R: Read, T: ProgressUpdater> Read for BufReader<R, T> {
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
// If we don't have any buffered data and we're doing a massive read
// (larger than our internal buffer), bypass our internal buffer
// entirely.
if self.pos == self.cap && buf.len() >= self.buf.len() {
self.discard_buffer();
let result = self.inner.read(buf);
return result;
}
let nread = {
let mut rem = self.fill_buf()?;
rem.read(buf)?
};
self.consume(nread);
Ok(nread)
}
fn read_vectored(&mut self, bufs: &mut [IoSliceMut<'_>]) -> io::Result<usize> {
let total_len = bufs.iter().map(|b| b.len()).sum::<usize>();
if self.pos == self.cap && total_len >= self.buf.len() {
self.discard_buffer();
let result = self.inner.read_vectored(bufs);
return result;
}
let nread = {
let mut rem = self.fill_buf()?;
rem.read_vectored(bufs)?
};
self.consume(nread);
Ok(nread)
}
// // we can't skip unconditionally because of the large buffer case in read.
// unsafe fn initializer(&self) -> Initializer {
// self.inner.initializer()
// }
}
impl<R: Read, T: ProgressUpdater> BufRead for BufReader<R, T> {
fn fill_buf(&mut self) -> io::Result<&[u8]> {
// If we've reached the end of our internal buffer then we need to fetch
// some more data from the underlying reader.
// Branch using `>=` instead of the more correct `==`
// to tell the compiler that the pos..cap slice is always valid.
if self.pos >= self.cap {
debug_assert!(self.pos == self.cap);
self.cap = self.inner.read(&mut self.buf)?;
self.pos = 0;
let mut dec_pos = 0;
// Only use CP437 if It can not be converted to UTF-8
let result = std::str::from_utf8(&self.buf);
if result.is_err() {
// Convert all bytes from CP437 to UTF-8
warn!(
"Found some non-UTF-8 characters, trying to convert them from CP437 to UTF-8. \
This might give incorrect results. But this might be better then an error."
);
for i in 0..self.cap {
let newchar = df_cp437::convert_cp437_byte_to_utf8_bytes(&self.buf[i]);
if newchar[0] != 0x00 {
self.dec_buf[dec_pos] = newchar[0];
dec_pos += 1;
}
if newchar[1] != 0x00 {
self.dec_buf[dec_pos] = newchar[1];
dec_pos += 1;
}
self.dec_buf[dec_pos] = newchar[2];
dec_pos += 1;
}
} else {
for i in 0..self.cap {
self.dec_buf[i] = self.buf[i];
}
dec_pos = self.cap;
}
// Set new capacity as it can be larger then the value before
self.cap = dec_pos;
self.progress.updater.update(dec_pos as u64);
}
Ok(&self.dec_buf[self.pos..self.cap])
}
fn consume(&mut self, amt: usize) {
self.pos = cmp::min(self.pos + amt, self.cap);
}
}
impl<R, T> fmt::Debug for BufReader<R, T>
where
R: fmt::Debug,
{
fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt.debug_struct("BufReader")
.field("reader", &self.inner)
.field(
"buffer",
&format_args!("{}/{}", self.cap - self.pos, self.buf.len()),
)
.finish()
}
}
impl<R: Seek, T> Seek for BufReader<R, T> {
/// Seek to an offset, in bytes, in the underlying reader.
///
/// The position used for seeking with `SeekFrom::Current(_)` is the
/// position the underlying reader would be at if the `BufReader<R>` had no
/// internal buffer.
///
/// Seeking always discards the internal buffer, even if the seek position
/// would otherwise fall within it. This guarantees that calling
/// `.into_inner()` immediately after a seek yields the underlying reader
/// at the same position.
///
/// To seek without discarding the internal buffer, use [`BufReader::seek_relative`].
///
/// See [`std::io::Seek`] for more details.
///
/// Note: In the edge case where you're seeking with `SeekFrom::Current(n)`
/// where `n` minus the internal buffer length overflows an `i64`, two
/// seeks will be performed instead of one. If the second seek returns
/// `Err`, the underlying reader will be left at the same position it would
/// have if you called `seek` with `SeekFrom::Current(0)`.
///
/// [`BufReader::seek_relative`]: struct.BufReader.html#method.seek_relative
/// [`std::io::Seek`]: trait.Seek.html
fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> {
let result: u64;
if let SeekFrom::Current(n) = pos {
let remainder = (self.cap - self.pos) as i64;
// it should be safe to assume that remainder fits within an i64 as the alternative
// means we managed to allocate 8 exbibytes and that's absurd.
// But it's not out of the realm of possibility for some weird underlying reader to
// support seeking by i64::min_value() so we need to handle underflow when subtracting
// remainder.
if let Some(offset) = n.checked_sub(remainder) {
result = self.inner.seek(SeekFrom::Current(offset))?;
} else {
// seek backwards by our remainder, and then by the offset
self.inner.seek(SeekFrom::Current(-remainder))?;
self.discard_buffer();
result = self.inner.seek(SeekFrom::Current(n))?;
}
} else {
// Seeking with Start/End doesn't care about our buffer length.
result = self.inner.seek(pos)?;
}
self.discard_buffer();
Ok(result)
}
}