cesu8str/ngstr/
stream.rs

1use std::io::{self, BufReader, BufWriter};
2use std::collections::VecDeque;
3
4use super::prelude::*;
5
6// can arbitrarily write() bytes if from_bytes(buffer + chunk), and can read() valid codepoints
7// match from_bytes(buffer + chunk) {
8//   Ok(_) -> valid write()
9//   Err(None) -> valid write(), partial codepoint
10//   Err(Some(_)) -> invalid write()
11// }
12
13struct CesuEncoder(StreamState);
14struct CesuDecoder(StreamState);
15struct MutfEncoder(StreamState);
16struct MutfDecoder(StreamState);
17
18struct StreamState {
19    /// The stream's current cache. This should always hold data that is valid, except for the last
20    /// few bytes, in the case of a partial codepoint.
21    /// Note that this data is unencoded, so data en/decoding must be done on read.
22    buffer: VecDeque<u8>,
23
24    /// Length of invalid bytes at the end
25    /// 
26    /// (ie: (self.buffer.len() - self.invalid_end) does not contain partial codepoints)
27    invalid_end: usize,
28}
29
30trait StreamConfig
31where
32    for<'b> &'b Self::BaseStr: TryFrom<&'b [u8], Error = super::EncodingError>
33{
34    type BaseStr: ?Sized;
35
36    fn state(&mut self) -> &mut StreamState;
37    fn try_append_chunk(&mut self, chunk: &[u8]) -> io::Result<usize> {
38        let state = self.state();
39        let orig_len = state.buffer.len();
40        state.buffer.extend(chunk);
41        let contents = state.buffer.make_contiguous();
42        match <&Self::BaseStr>::try_from(&contents[orig_len..]) {
43            Ok(_) => {
44
45                // whole string valid
46                state.invalid_end = 0;
47                Ok(chunk.len()) // whole chunk written
48            },
49            Err(e) => {
50                let invalid_at = orig_len + e.valid_up_to();
51                match e.error_len() {
52                    None => { // need more bytes
53
54                        // track invalid bit
55                        state.invalid_end = contents.len() - e.valid_up_to();
56                        Ok(chunk.len()) // whole chunk written
57                    },
58                    Some(_) if invalid_at == 0 => { // invalid bytes, at beginning
59                        
60                        // keep good data
61                        state.buffer.truncate(orig_len);
62
63                        Err(io::Error::new(io::ErrorKind::InvalidData, e))
64                    },
65                    Some(_) => { // invalid bytes, some good
66
67                        // keep good data
68                        state.buffer.truncate(e.valid_up_to());
69
70                        let wrote = state.buffer.len() - orig_len;
71
72                        Ok(wrote)
73                    }
74                }
75            }
76        }
77    }
78}
79impl StreamConfig for CesuEncoder {
80    type BaseStr = Cesu8Str;
81    fn state(&mut self) -> &mut StreamState {
82        &mut self.0
83    }
84}
85impl StreamConfig for MutfEncoder {
86    type BaseStr = Mutf8Str;
87    fn state(&mut self) -> &mut StreamState {
88        &mut self.0
89    }
90}
91
92impl io::Write for CesuEncoder {
93    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
94        self.try_append_chunk(buf)
95    }
96
97    fn flush(&mut self) -> io::Result<()> {
98        Ok(())
99    }
100}
101impl io::Read for CesuEncoder {
102    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
103        // find valid part of buffer
104        let state = self.state();
105        let valid = state.buffer.len() - state.invalid_end;
106        let cesu = state.buffer.make_contiguous();
107        
108        // encode, copy it into user buf
109        let cesu = Cesu8Str::try_from_bytes(&cesu[..valid]).unwrap();
110        let utf8 = cesu.to_str();
111        let safe_len = utf8.floor_char_boundary(buf.len());
112        buf[..safe_len].copy_from_slice(&utf8.as_bytes()[..safe_len]);
113        
114        // track written portion
115        let unused_utf8_chars = utf8[safe_len..].chars().count();
116        todo!("figure out how to subtract characters (not code points) from the end of a string");
117
118        Ok(safe_len)
119    }
120}