//! This crate provides a trait, [`BufRead`], providing functions to read utf-8 text streams
//! using an [`io::BufRead`] without waiting for newline delimiters.
//!
//! # Quick Start
//!
//! The simplest way to read a whole file with a [`BufRead`] type is to repeatedly calling its
//! [`read_utf8`] method, and storing the read data:
//!
//! ```
//! use utf8_bufread::BufRead;
//! use std::io::BufReader;
//!
//! // Reader may be any type implementing io::BufRead
//! // We'll just use a BufReader wrapping a slice for this example
//! let mut reader = BufReader::<&[u8]>::new("💖".as_ref());
//! // The string we'll use to store the text of the read file
//! let mut text = String::new();
//! loop { // Loop until EOF
//!     match reader.read_utf8() {
//!         Ok(s) => {
//!             if s.len() == 0 { break; } // EOF
//!             text.push_str(s.as_str()) // Append read chunk to text
//!         }
//!         Err(e) => panic!(e), // io::Error or Utf8Error
//!     }
//! }
//! assert_eq!("💖", text.as_str());
//! ```
//!
//! see [`BufRead`]'s documentation for more.
//!
//! [`BufRead`]: self::BufRead
//! [`io::BufRead`]: std::io::BufRead
//! [`read_utf8`]: self::BufRead::read_utf8

use std::io::{self, Error, ErrorKind};
use std::str::{from_utf8, from_utf8_unchecked};

#[deny(missing_crate_level_docs, missing_docs, missing_doc_code_examples)]

/// A trait implemented for all types implementing [`io::BufRead`], providing  functions to
/// read utf-8 text streams without waiting for newline delimiters.
///
/// [`io::BufRead`]: std::io::BufRead
pub trait BufRead: io::BufRead {
    /// Read a number of bytes les than or equal to the [`capacity`] of the its buffer, and
    /// return their utf-8 representation as a [`io::Result`]`<&`[`str`]`>`.
    ///
    /// This function will read bytes from the underlying stream until its buffer is full, an
    /// invalid or incomplete codepoint is found, or EOF is found. Once found, all codepoints
    /// up to, including the EOF (if found), but not including the invalid or incomplete codepoint
    /// (if found), will be returned.
    ///
    /// If this function returns [`Ok("")`], the stream has reached EOF.
    ///
    /// This function avoids the usual issues of using [`BufRead`]`::`[`read_line`]`(&self, &mut `
    /// [`String`]`)` or [`BufRead`]`::`[`lines`]`(&self)` on big text file without newline
    /// delimiters: It will not load the whole file in memory.
    ///
    /// [`capacity`]: std::io::BufRead::capacity
    /// [`io::Result`]: std::io::Result
    /// [`Ok("")`]: Ok
    /// [`BufRead`]: std::io::BufRead
    /// [`read_line`]: std::io::BufRead::read_line
    /// [`lines`]: std::io::BufRead::lines
    ///
    /// # Errors
    ///
    /// This function will immediately return any errors returned by [`fill_buf`].
    ///
    /// If an [`Utf8Error`] is returned by the internal call to [`from_utf8`], all valid codepoints
    /// are returned, and no error is returned, unless no valid codepoints were read. This
    /// allows not to lose any valid data, and the error will be returned on the next call.
    ///
    /// If the first codepoint encountered by [`from_utf8`] is invalid or incomplete, an
    /// [`ErrorKind`]`::`[`InvalidData`] caused by an [`Utf8Error`] is returned. This error cannot
    /// be recovered from, and you will have to read bytes manually to determine if the error was
    /// caused by an invalid codepoint in middle of the file or by an incomplete codepoint because
    /// of an early EOF.
    ///
    /// [`fill_buf`]: std::io::BufRead::fill_buf
    /// [`Utf8Error`]: std::str::Utf8Error
    /// [`from_utf8`]: std::str::from_utf8
    /// [`ErrorKind`]: std::io::ErrorKind
    /// [`InvalidData`]: std::io::ErrorKind::InvalidData
    ///
    /// # Examples
    ///
    /// ```
    /// use utf8_bufread::BufRead;
    /// use std::io::{BufReader, ErrorKind};
    ///
    /// // "foo\nbar" + some invalid bytes
    /// // We give the buffer more than enough capacity to be able to read all the bytes in one
    /// // call
    /// let mut reader = BufReader::with_capacity(
    ///     16,
    ///     [0x66u8, 0x6f, 0x6f, 0xa, 0x62, 0x61, 0x72, 0x9f, 0x92, 0x96].as_ref(),
    /// );
    ///
    /// // On the first read_utf8() call, we will read up to the first byte of the invalid
    /// // codepoint (ie "foo\nbar")
    /// let read_str = reader
    ///     .read_utf8()
    ///     .expect("We will get all the valid bytes without error");
    /// assert_eq!("foo\nbar", read_str);
    ///
    /// // Then on the second call we will get the InvalidData error caused by the Utf8Error error,
    /// // as there is no bytes forming valid codepoints left
    /// let read_err = reader.read_utf8().expect_err("We will get an error");
    /// assert_eq!(ErrorKind::InvalidData, read_err.kind())
    /// ```
    fn read_utf8(&mut self) -> io::Result<String> {
        let (s, used) = {
            // Fill the buffer from inner reader's data and get its content
            let read_bytes = match self.fill_buf() {
                Ok(r) => r,
                // We do not handle `ErrorKind::Interrupt`
                Err(e) => return Err(e),
            };
            // We attempt converting read bytes to utf8
            match from_utf8(read_bytes) {
                Ok(s) => (s, read_bytes.len()),
                Err(e) => {
                    // If we have an error, we will first attempt to return all valid read bytes,
                    // putting the invalid or incomplete codepoint at the beginning of the buffer.
                    // This allows us to recover from reading up to a byte that isn't on a char
                    // boundary by reading the complete codepoint on the next call
                    let used = e.valid_up_to();
                    if used == 0 {
                        // If we cannot decode any valid utf8 byte from the buffer, it either means
                        // - There was a parse error earlier, and we read everything up to this
                        //   point in a previous read call, and now the invalid codepoint is at
                        //   the front of the buffer, we should then return an Utf8Error
                        // - We reached EOF with an incomplete codepoint, we should return an
                        //   Utf8Error too
                        return Err(Error::new(ErrorKind::InvalidData, e));
                    }
                    // This is safe, see `Utf8Error::valid_up_to(&self)` doc
                    (unsafe { from_utf8_unchecked(&read_bytes[..used]) }, used)
                }
            }
        };
        let s = s.to_owned(); // FIXME how to avoid cloning ?
        self.consume(used);
        Ok(s)
    }
}

impl<R: io::BufRead> BufRead for R {}

#[cfg(test)]
mod tests {

    #[test]
    fn readme_simple_example() {
        use crate::BufRead;
        use std::io::BufReader;

        assert_eq!(
            "💖",
            BufReader::<&[u8]>::new("💖".as_ref()).read_utf8().unwrap()
        );
    }

    // TODO more / other ?
}