utf8-bufread 1.0.0

Provides alternatives to BufRead's read_line & lines that stop not on newlines
Documentation
use std::io::ErrorKind;
use std::str::Utf8Error;
use std::string::FromUtf8Error;
use std::{error, fmt, io};

/// The error type for operations of the [`BufRead`] trait and associated iterators.
///
/// It can be created from an [`io::Error`] or an [`ErrorKind`].
///
/// Instances of this error may contain some bytes that would have been lost otherwise.
///
/// # Examples
///
/// ```
/// use utf8_bufread::Error;
/// use std::io::ErrorKind;
///
/// let error = Error::from(ErrorKind::InvalidData);
/// ```
///
/// [`Utf8Error`]: std::str::Utf8Error
/// [`BufRead`]: crate::BufRead
// TODO: Add tests for Error
pub struct Error {
    leftovers: Option<Vec<u8>>,
    repr: Repr,
}

enum Repr {
    Simple(ErrorKind),
    Custom {
        kind: ErrorKind,
        inner: Box<dyn error::Error + Send + Sync>,
    },
}

impl Error {
    // TODO: Redirect to [`leftovers`] for more info about what we call leftovers

    pub(crate) fn with_leftovers(mut self, bytes: Vec<u8>) -> Self {
        debug_assert!(!bytes.is_empty());
        if self.leftovers.is_some() {
            panic!("This error already had leftover bytes assigned, we won't drop them !")
        }
        self.leftovers = Some(bytes);
        self
    }

    /// Get the "leftover" bytes stored in this error.
    ///
    /// Leftover bytes are bytes that were read from the inner reader of a type implementing
    /// [`io::BufRead`], before clearing the buffer and filling it again, in a call to one of
    /// [`BufRead`]'s functions that returned an error. This means that they form an invalid or
    /// incomplete codepoint but would be lost if not returned with this error, as the call cleared
    /// the buffer they were coming from.
    ///
    /// It is guaranteed that, if the error contains a non-zero amount of leftover bytes, the
    /// following read operation on the reader that returned the error will not return any of those
    /// bytes, nor "skip" bytes from the reader.
    ///
    /// It is also guaranteed that, if the error contains a non-zero amount of leftover bytes,
    /// their amount is of the expected length of a codepoint, based on the first invalid byte
    /// read, i.e. the first of the leftover bytes.
    ///
    /// If you want to be sure not to lose any bytes from the inner reader, you should check if
    /// the error is holding "leftovers" with `error.leftovers.`[`is_empty`]`()`.
    ///
    /// # Examples
    ///
    /// The following example plays with buffer capacity to purposefully trigger a read that will
    /// return an error holding leftover bytes. The user should not bother thinking about buffer
    /// capacity in most cases, so this example may be a bit harder to follow along.
    ///
    /// ```
    /// use std::io::{BufReader, Read};
    /// use utf8_bufread::BufRead;
    ///
    /// let input = "💖💖";
    /// assert_eq!(input.len(), 8);
    /// // The reader will successfully read the first codepoint, but trying to read the second one
    /// // will result in an error since '💖' is 4 byte long, and we only take the first 7 bytes.
    /// // Since the reader as a buffer capacity of 6, it will have to clear and refill its buffer
    /// // to attempt reading the incomplete codepoint, then fail.
    /// let mut reader = BufReader::with_capacity(6, &input.as_bytes()[..7]);
    /// // First read is successful
    /// let s = reader.read_str().unwrap();
    /// assert_eq!(s.as_ref(), "💖");
    /// // Storing how many bytes were read with the first call for later use
    /// let first_read_len = s.len();
    /// // Second read gives us an error
    /// let err = reader.read_str();
    /// assert!(err.is_err());
    /// let err = err.unwrap_err();
    /// // Since the reader had to clear and re-fill its buffer, the error will contained leftover
    /// // bytes
    /// assert!(!err.leftovers().is_empty());
    /// // We can still "manually" read from the reader, but any bytes read before clearing the
    /// // inner buffer are "lost" (they are stored as leftovers in previously returned error)
    /// let mut buf: [u8; 8] = Default::default();
    /// // If the reader didn't had to clear its buffer, we should have read 3 bytes.
    /// // But since it did, we have 2 bytes stored in the error, hence why we only read 1 byte
    /// assert_eq!(1, reader.read(&mut buf).unwrap());
    /// // The input was truncated to 7 bytes, and we did read all 7 bytes
    /// assert_eq!(7, first_read_len + err.leftovers().len() + 1)
    /// ```
    ///
    /// [`is_empty`]: slice::is_empty
    /// [`BufRead`]: crate::BufRead
    pub fn leftovers(&self) -> &[u8] {
        match &self.repr {
            Repr::Simple(_) => {
                if let Some(l) = &self.leftovers {
                    l.as_slice()
                } else {
                    &[]
                }
            }
            Repr::Custom { inner, .. } => {
                if let Some(e) = inner.downcast_ref::<FromUtf8Error>() {
                    e.as_bytes()
                } else {
                    &[]
                }
            }
        }
    }

    /// Returns the corresponding [`ErrorKind`] for this error.
    ///
    /// # Examples
    ///
    /// ```
    /// use std::io::{Error, ErrorKind};
    ///
    /// fn print_error(err: Error) {
    ///     println!("{:?}", err.kind());
    /// }
    ///
    /// fn main() {
    ///     // Will print "AddrInUse".
    ///     print_error(Error::from(ErrorKind::AddrInUse));
    /// }
    /// ```
    pub fn kind(&self) -> ErrorKind {
        match self.repr {
            Repr::Simple(kind) => kind,
            Repr::Custom { kind, .. } => kind,
        }
    }

    /// Consumes the `Error`, returning its inner error (if any).
    ///
    /// If this [`Error`] was constructed from an [`ErrorKind`], then this function will
    /// return [`None`], otherwise it will return [`Some`].
    ///
    /// # Panics
    ///
    /// This function will panic if this error is holding "leftover" bytes.
    ///
    /// # Examples
    ///
    /// ```
    /// use std::io::{self, ErrorKind};
    /// use utf8_bufread::Error;
    ///
    /// fn print_error(err: Error) {
    /// if let Some(inner_err) = err.into_inner() {
    ///         println!("Inner error: {}", inner_err);
    ///     } else {
    ///         println!("No inner error");
    ///     }
    /// }
    ///
    /// fn main() {
    ///     // Will print "No inner error".
    ///     print_error(Error::from(ErrorKind::Other));
    ///     // Will print "Inner error: ...".
    ///     print_error(Error::from(io::Error::from(ErrorKind::AddrInUse)));
    /// }
    /// ```
    pub fn into_inner(self) -> Option<Box<dyn error::Error + Send + Sync>> {
        if let Ok(inner) = self.into_inner_checked() {
            inner
        } else {
            panic!("This error is holding leftover bytes, we won't drop them !")
        }
    }

    /// Consumes the `Error`, returning its inner error (if any).
    ///
    /// If this [`Error`] was constructed from an [`ErrorKind`] or is holding "leftover" bytes,
    /// then this function will return [`None`], otherwise it will return [`Some`].
    ///
    /// # Examples
    ///
    /// ```
    /// use std::io::{self, ErrorKind};
    /// use utf8_bufread::Error;
    ///
    /// fn print_error(err: Error) {
    ///     if let Some(inner_err) = err.into_inner_checked().ok().flatten() {
    ///         println!("Inner error: {}", inner_err);
    ///     } else {
    ///         println!("No inner error, or transforming the error would cause data loss");
    ///     }
    /// }
    ///
    /// fn main() {
    ///     // Will print "No inner error".
    ///     print_error(Error::from(ErrorKind::Other));
    ///     // Will print "Inner error: ...".
    ///     print_error(Error::from(io::Error::from(ErrorKind::AddrInUse)));
    /// }
    /// ```
    pub fn into_inner_checked(
        self,
    ) -> std::result::Result<Option<Box<dyn error::Error + Send + Sync>>, Self> {
        if self.leftovers.is_some() {
            Err(self)
        } else {
            match self.repr {
                Repr::Simple(_) => Ok(None),
                Repr::Custom { inner, .. } if !inner.is::<FromUtf8Error>() => Ok(Some(inner)),
                // inner must be a `FromUtf8Error`, which has leftovers stored in it
                Repr::Custom { .. } => Err(self),
            }
        }
    }

    /// Consumes the `Error`, returning its inner error (if any).
    ///
    /// If this [`Error`] was constructed from an [`ErrorKind`], then this function will
    /// return [`None`], otherwise it will return [`Some`]. Any leftover bytes held by this error
    /// are lost in the process.
    ///
    ///
    /// # Examples
    ///
    /// ```
    /// use std::io::{self, ErrorKind};
    /// use utf8_bufread::Error;
    ///
    /// fn print_error(err: Error) {
    ///     if let Some(inner_err) = err.into_inner() {
    ///         println!("Inner error: {}", inner_err);
    ///     } else {
    ///         println!("No inner error");
    ///     }
    /// }
    ///
    /// fn main() {
    ///     // Will print "No inner error".
    ///     print_error(Error::from(ErrorKind::Other));
    ///     // Will print "Inner error: ...".
    ///     print_error(Error::from(io::Error::from(ErrorKind::AddrInUse)));
    /// }
    /// ```
    pub fn into_inner_lossy(self) -> Option<Box<dyn error::Error + Send + Sync>> {
        if let Repr::Custom { inner, .. } = self.repr {
            Some(inner)
        } else {
            None
        }
    }
}

impl fmt::Debug for Error {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match &self.repr {
            Repr::Simple(kind) => f
                .debug_struct("Error")
                .field("leftover bytes", &self.leftovers)
                .field("kind", &kind)
                .finish(),
            Repr::Custom { inner, .. } => fmt::Debug::fmt(&inner, f),
        }
    }
}

impl fmt::Display for Error {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match &self.repr {
            Repr::Simple(kind) => fmt::Display::fmt(&io::Error::from(*kind), f),
            Repr::Custom { inner, .. } => fmt::Display::fmt(&inner, f),
        }
    }
}

impl From<ErrorKind> for Error {
    fn from(kind: ErrorKind) -> Self {
        Error {
            leftovers: None,
            repr: Repr::Simple(kind),
        }
    }
}

impl From<io::Error> for Error {
    fn from(err: io::Error) -> Self {
        Error {
            leftovers: None,
            repr: Repr::Custom {
                kind: err.kind(),
                inner: err.into(),
            },
        }
    }
}

impl From<Utf8Error> for Error {
    fn from(err: Utf8Error) -> Self {
        Error {
            leftovers: None,
            repr: Repr::Custom {
                kind: ErrorKind::InvalidData,
                inner: err.into(),
            },
        }
    }
}

impl From<std::string::FromUtf8Error> for Error {
    fn from(err: std::string::FromUtf8Error) -> Self {
        Error {
            leftovers: None,
            repr: Repr::Custom {
                kind: ErrorKind::InvalidData,
                inner: err.into(),
            },
        }
    }
}

impl error::Error for Error {
    fn source(&self) -> Option<&(dyn error::Error + 'static)> {
        match &self.repr {
            Repr::Simple(_) => None,
            Repr::Custom { inner, .. } => inner.source(),
        }
    }
}

pub(crate) type Result<T> = std::result::Result<T, Error>;

#[cfg(test)]
mod with_leftovers_tests {
    use crate::error::Repr;
    use crate::Error;
    use std::io::ErrorKind;

    #[test]
    #[should_panic]
    fn double_call_with_leftovers() {
        Error {
            leftovers: None,
            repr: Repr::Simple(ErrorKind::Interrupted),
        }
        .with_leftovers(Vec::new())
        .with_leftovers(Vec::new());
    }
}