cesu8str 0.2.4

Convert to and from CESU-8 or Modified UTF-8 encodings, only when necessary.
Documentation
use std::io::{self, BufReader, BufWriter};
use std::collections::VecDeque;

use super::prelude::*;

// can arbitrarily write() bytes if from_bytes(buffer + chunk), and can read() valid codepoints
// match from_bytes(buffer + chunk) {
//   Ok(_) -> valid write()
//   Err(None) -> valid write(), partial codepoint
//   Err(Some(_)) -> invalid write()
// }

struct CesuEncoder(StreamState);
struct CesuDecoder(StreamState);
struct MutfEncoder(StreamState);
struct MutfDecoder(StreamState);

struct StreamState {
    /// The stream's current cache. This should always hold data that is valid, except for the last
    /// few bytes, in the case of a partial codepoint.
    /// Note that this data is unencoded, so data en/decoding must be done on read.
    buffer: VecDeque<u8>,

    /// Length of invalid bytes at the end
    /// 
    /// (ie: (self.buffer.len() - self.invalid_end) does not contain partial codepoints)
    invalid_end: usize,
}

trait StreamConfig
where
    for<'b> &'b Self::BaseStr: TryFrom<&'b [u8], Error = super::EncodingError>
{
    type BaseStr: ?Sized;

    fn state(&mut self) -> &mut StreamState;
    fn try_append_chunk(&mut self, chunk: &[u8]) -> io::Result<usize> {
        let state = self.state();
        let orig_len = state.buffer.len();
        state.buffer.extend(chunk);
        let contents = state.buffer.make_contiguous();
        match <&Self::BaseStr>::try_from(&contents[orig_len..]) {
            Ok(_) => {

                // whole string valid
                state.invalid_end = 0;
                Ok(chunk.len()) // whole chunk written
            },
            Err(e) => {
                let invalid_at = orig_len + e.valid_up_to();
                match e.error_len() {
                    None => { // need more bytes

                        // track invalid bit
                        state.invalid_end = contents.len() - e.valid_up_to();
                        Ok(chunk.len()) // whole chunk written
                    },
                    Some(_) if invalid_at == 0 => { // invalid bytes, at beginning
                        
                        // keep good data
                        state.buffer.truncate(orig_len);

                        Err(io::Error::new(io::ErrorKind::InvalidData, e))
                    },
                    Some(_) => { // invalid bytes, some good

                        // keep good data
                        state.buffer.truncate(e.valid_up_to());

                        let wrote = state.buffer.len() - orig_len;

                        Ok(wrote)
                    }
                }
            }
        }
    }
}
impl StreamConfig for CesuEncoder {
    type BaseStr = Cesu8Str;
    fn state(&mut self) -> &mut StreamState {
        &mut self.0
    }
}
impl StreamConfig for MutfEncoder {
    type BaseStr = Mutf8Str;
    fn state(&mut self) -> &mut StreamState {
        &mut self.0
    }
}

impl io::Write for CesuEncoder {
    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
        self.try_append_chunk(buf)
    }

    fn flush(&mut self) -> io::Result<()> {
        Ok(())
    }
}
impl io::Read for CesuEncoder {
    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
        // find valid part of buffer
        let state = self.state();
        let valid = state.buffer.len() - state.invalid_end;
        let cesu = state.buffer.make_contiguous();
        
        // encode, copy it into user buf
        let cesu = Cesu8Str::try_from_bytes(&cesu[..valid]).unwrap();
        let utf8 = cesu.to_str();
        let safe_len = utf8.floor_char_boundary(buf.len());
        buf[..safe_len].copy_from_slice(&utf8.as_bytes()[..safe_len]);
        
        // track written portion
        let unused_utf8_chars = utf8[safe_len..].chars().count();
        todo!("figure out how to subtract characters (not code points) from the end of a string");

        Ok(safe_len)
    }
}