df_st_core 0.3.0-development-2

Core structures for the DF Storyteller project.
Documentation
//! Buffering wrappers for I/O traits
//! This is copied from https://doc.rust-lang.org/src/std/io/buffered.rs.html
//! It is modified to allow progress indicators like progress bars.

use std::io::prelude::*;

use std::cmp;
use std::fmt;
use std::io::{self, IoSliceMut, SeekFrom};

#[allow(unused_imports)]
use log::{debug, error, info, trace, warn};

// Rust default is 8kb, default here at 20kb
const DEFAULT_BUF_SIZE: usize = 20 * 1024;

pub struct BufReader<R, T> {
    inner: R,
    buf: Box<[u8]>,
    dec_buf: Box<[u8]>,
    pos: usize,
    cap: usize,
    progress: Progress<T>,
}

#[derive(Debug)]
pub struct Progress<T> {
    pub updater: T,
}

pub trait ProgressUpdater {
    fn update(&mut self, value: u64);
}

impl<R: Read, T> BufReader<R, T> {
    /// Creates a new `BufReader<R>` with a default buffer capacity. The default is currently 8 KB,
    /// but may change in the future.
    ///
    /// # Examples
    ///
    /// ```no_run
    /// use std::io::BufReader;
    /// use std::fs::File;
    ///
    /// fn main() -> std::io::Result<()> {
    ///     let f = File::open("log.txt")?;
    ///     let reader = BufReader::new(f);
    ///     Ok(())
    /// }
    /// ```
    pub fn new(inner: R, progress: Progress<T>) -> BufReader<R, T> {
        BufReader::with_capacity(DEFAULT_BUF_SIZE, inner, progress)
    }

    pub fn with_capacity(capacity: usize, inner: R, progress: Progress<T>) -> BufReader<R, T> {
        let mut buffer = Vec::with_capacity(capacity);
        let mut decoded_buffer = Vec::with_capacity(capacity * 3);
        buffer.resize(capacity, 0x00);
        decoded_buffer.resize(capacity * 3, 0x00);
        // inner.initializer().initialize(&mut buffer);
        BufReader {
            inner,
            buf: buffer.into_boxed_slice(),
            dec_buf: decoded_buffer.into_boxed_slice(),
            pos: 0,
            cap: 0,
            progress,
        }
    }
}

impl<R, T> BufReader<R, T> {
    /// Gets a reference to the underlying reader.
    ///
    /// It is inadvisable to directly read from the underlying reader.
    ///
    /// # Examples
    ///
    /// ```no_run
    /// use std::io::BufReader;
    /// use std::fs::File;
    ///
    /// fn main() -> std::io::Result<()> {
    ///     let f1 = File::open("log.txt")?;
    ///     let reader = BufReader::new(f1);
    ///
    ///     let f2 = reader.get_ref();
    ///     Ok(())
    /// }
    /// ```
    pub fn get_ref(&self) -> &R {
        &self.inner
    }

    /// Gets a mutable reference to the underlying reader.
    ///
    /// It is inadvisable to directly read from the underlying reader.
    ///
    /// # Examples
    ///
    /// ```no_run
    /// use std::io::BufReader;
    /// use std::fs::File;
    ///
    /// fn main() -> std::io::Result<()> {
    ///     let f1 = File::open("log.txt")?;
    ///     let mut reader = BufReader::new(f1);
    ///
    ///     let f2 = reader.get_mut();
    ///     Ok(())
    /// }
    /// ```
    pub fn get_mut(&mut self) -> &mut R {
        &mut self.inner
    }

    /// Returns a reference to the internally buffered data.
    ///
    /// Unlike `fill_buf`, this will not attempt to fill the buffer if it is empty.
    ///
    /// # Examples
    ///
    /// ```no_run
    /// use std::io::{BufReader, BufRead};
    /// use std::fs::File;
    ///
    /// fn main() -> std::io::Result<()> {
    ///     let f = File::open("log.txt")?;
    ///     let mut reader = BufReader::new(f);
    ///     assert!(reader.buffer().is_empty());
    ///
    ///     if reader.fill_buf()?.len() > 0 {
    ///         assert!(!reader.buffer().is_empty());
    ///     }
    ///     Ok(())
    /// }
    /// ```
    pub fn buffer(&self) -> &[u8] {
        &self.buf[self.pos..self.cap]
    }

    /// Returns the number of bytes the internal buffer can hold at once.
    ///
    /// # Examples
    ///
    /// ```no_run
    /// #![feature(buffered_io_capacity)]
    /// use std::io::{BufReader, BufRead};
    /// use std::fs::File;
    ///
    /// fn main() -> std::io::Result<()> {
    ///     let f = File::open("log.txt")?;
    ///     let mut reader = BufReader::new(f);
    ///
    ///     let capacity = reader.capacity();
    ///     let buffer = reader.fill_buf()?;
    ///     assert!(buffer.len() <= capacity);
    ///     Ok(())
    /// }
    /// ```
    pub fn capacity(&self) -> usize {
        self.buf.len()
    }

    /// Unwraps this `BufReader<R>`, returning the underlying reader.
    ///
    /// Note that any leftover data in the internal buffer is lost. Therefore,
    /// a following read from the underlying reader may lead to data loss.
    ///
    /// # Examples
    ///
    /// ```no_run
    /// use std::io::BufReader;
    /// use std::fs::File;
    ///
    /// fn main() -> std::io::Result<()> {
    ///     let f1 = File::open("log.txt")?;
    ///     let reader = BufReader::new(f1);
    ///
    ///     let f2 = reader.into_inner();
    ///     Ok(())
    /// }
    /// ```
    pub fn into_inner(self) -> R {
        self.inner
    }

    /// Invalidates all data in the internal buffer.
    #[inline]
    fn discard_buffer(&mut self) {
        self.pos = 0;
        self.cap = 0;
    }
}

// impl<R: Seek, T> BufReader<R, T> {
//     // Not Implemented
// }

impl<R: Read, T: ProgressUpdater> Read for BufReader<R, T> {
    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
        // If we don't have any buffered data and we're doing a massive read
        // (larger than our internal buffer), bypass our internal buffer
        // entirely.
        if self.pos == self.cap && buf.len() >= self.buf.len() {
            self.discard_buffer();
            let result = self.inner.read(buf);
            return result;
        }
        let nread = {
            let mut rem = self.fill_buf()?;
            rem.read(buf)?
        };
        self.consume(nread);
        Ok(nread)
    }

    fn read_vectored(&mut self, bufs: &mut [IoSliceMut<'_>]) -> io::Result<usize> {
        let total_len = bufs.iter().map(|b| b.len()).sum::<usize>();
        if self.pos == self.cap && total_len >= self.buf.len() {
            self.discard_buffer();
            let result = self.inner.read_vectored(bufs);
            return result;
        }
        let nread = {
            let mut rem = self.fill_buf()?;
            rem.read_vectored(bufs)?
        };
        self.consume(nread);
        Ok(nread)
    }

    // // we can't skip unconditionally because of the large buffer case in read.
    // unsafe fn initializer(&self) -> Initializer {
    //     self.inner.initializer()
    // }
}

impl<R: Read, T: ProgressUpdater> BufRead for BufReader<R, T> {
    fn fill_buf(&mut self) -> io::Result<&[u8]> {
        // If we've reached the end of our internal buffer then we need to fetch
        // some more data from the underlying reader.
        // Branch using `>=` instead of the more correct `==`
        // to tell the compiler that the pos..cap slice is always valid.
        if self.pos >= self.cap {
            debug_assert!(self.pos == self.cap);
            self.cap = self.inner.read(&mut self.buf)?;
            self.pos = 0;
            let mut dec_pos = 0;
            // Only use CP437 if It can not be converted to UTF-8
            let result = std::str::from_utf8(&self.buf);
            if result.is_err() {
                // Convert all bytes from CP437 to UTF-8
                warn!(
                    "Found some non-UTF-8 characters, trying to convert them from CP437 to UTF-8. \
                    This might give incorrect results. But this might be better then an error."
                );
                for i in 0..self.cap {
                    let newchar = df_cp437::convert_cp437_byte_to_utf8_bytes(&self.buf[i]);
                    if newchar[0] != 0x00 {
                        self.dec_buf[dec_pos] = newchar[0];
                        dec_pos += 1;
                    }
                    if newchar[1] != 0x00 {
                        self.dec_buf[dec_pos] = newchar[1];
                        dec_pos += 1;
                    }
                    self.dec_buf[dec_pos] = newchar[2];
                    dec_pos += 1;
                }
            } else {
                for i in 0..self.cap {
                    self.dec_buf[i] = self.buf[i];
                }
                dec_pos = self.cap;
            }
            // Set new capacity as it can be larger then the value before
            self.cap = dec_pos;
            self.progress.updater.update(dec_pos as u64);
        }
        Ok(&self.dec_buf[self.pos..self.cap])
    }

    fn consume(&mut self, amt: usize) {
        self.pos = cmp::min(self.pos + amt, self.cap);
    }
}

impl<R, T> fmt::Debug for BufReader<R, T>
where
    R: fmt::Debug,
{
    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
        fmt.debug_struct("BufReader")
            .field("reader", &self.inner)
            .field(
                "buffer",
                &format_args!("{}/{}", self.cap - self.pos, self.buf.len()),
            )
            .finish()
    }
}

impl<R: Seek, T> Seek for BufReader<R, T> {
    /// Seek to an offset, in bytes, in the underlying reader.
    ///
    /// The position used for seeking with `SeekFrom::Current(_)` is the
    /// position the underlying reader would be at if the `BufReader<R>` had no
    /// internal buffer.
    ///
    /// Seeking always discards the internal buffer, even if the seek position
    /// would otherwise fall within it. This guarantees that calling
    /// `.into_inner()` immediately after a seek yields the underlying reader
    /// at the same position.
    ///
    /// To seek without discarding the internal buffer, use [`BufReader::seek_relative`].
    ///
    /// See [`std::io::Seek`] for more details.
    ///
    /// Note: In the edge case where you're seeking with `SeekFrom::Current(n)`
    /// where `n` minus the internal buffer length overflows an `i64`, two
    /// seeks will be performed instead of one. If the second seek returns
    /// `Err`, the underlying reader will be left at the same position it would
    /// have if you called `seek` with `SeekFrom::Current(0)`.
    ///
    /// [`BufReader::seek_relative`]: struct.BufReader.html#method.seek_relative
    /// [`std::io::Seek`]: trait.Seek.html
    fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> {
        let result: u64;
        if let SeekFrom::Current(n) = pos {
            let remainder = (self.cap - self.pos) as i64;
            // it should be safe to assume that remainder fits within an i64 as the alternative
            // means we managed to allocate 8 exbibytes and that's absurd.
            // But it's not out of the realm of possibility for some weird underlying reader to
            // support seeking by i64::min_value() so we need to handle underflow when subtracting
            // remainder.
            if let Some(offset) = n.checked_sub(remainder) {
                result = self.inner.seek(SeekFrom::Current(offset))?;
            } else {
                // seek backwards by our remainder, and then by the offset
                self.inner.seek(SeekFrom::Current(-remainder))?;
                self.discard_buffer();
                result = self.inner.seek(SeekFrom::Current(n))?;
            }
        } else {
            // Seeking with Start/End doesn't care about our buffer length.
            result = self.inner.seek(pos)?;
        }
        self.discard_buffer();
        Ok(result)
    }
}