1use std::io::{self, Read};
8
9use encoding_rs::{Decoder, Encoding};
10
11pub struct DecodingReader<R> {
29 inner: R,
30 decoder: Decoder,
31 raw_buf: Vec<u8>,
33 raw_len: usize,
35 decoded_buf: Vec<u8>,
37 decoded_pos: usize,
39 decoded_len: usize,
41 eof: bool,
43}
44
45const CHUNK_SIZE: usize = 8192;
47
48impl<R: Read> DecodingReader<R> {
49 pub fn new(inner: R, encoding: &'static Encoding) -> Self {
51 Self {
52 inner,
53 decoder: encoding.new_decoder(),
54 raw_buf: vec![0u8; CHUNK_SIZE],
55 raw_len: 0,
56 decoded_buf: vec![0u8; CHUNK_SIZE * 4], decoded_pos: 0,
58 decoded_len: 0,
59 eof: false,
60 }
61 }
62
63 fn fill_decoded(&mut self) -> io::Result<()> {
65 if self.decoded_pos < self.decoded_len {
67 return Ok(());
68 }
69
70 self.decoded_pos = 0;
72 self.decoded_len = 0;
73
74 if self.eof && self.raw_len == 0 {
75 return Ok(());
76 }
77
78 if self.raw_len == 0 && !self.eof {
80 let n = self.inner.read(&mut self.raw_buf)?;
81 if n == 0 {
82 self.eof = true;
83 } else {
84 self.raw_len = n;
85 }
86 }
87
88 let (result, read, written, _had_errors) = self.decoder.decode_to_utf8(
90 &self.raw_buf[..self.raw_len],
91 &mut self.decoded_buf,
92 self.eof,
93 );
94
95 if read < self.raw_len {
97 self.raw_buf.copy_within(read..self.raw_len, 0);
98 self.raw_len -= read;
99 } else {
100 self.raw_len = 0;
101 }
102
103 self.decoded_len = written;
104
105 if let encoding_rs::CoderResult::OutputFull = result {
107 }
109
110 Ok(())
111 }
112}
113
114impl<R: Read> Read for DecodingReader<R> {
115 fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
116 if self.decoded_pos >= self.decoded_len {
117 self.fill_decoded()?;
118 }
119
120 if self.decoded_pos >= self.decoded_len {
121 return Ok(0); }
123
124 let available = self.decoded_len - self.decoded_pos;
125 let to_copy = available.min(buf.len());
126 buf[..to_copy]
127 .copy_from_slice(&self.decoded_buf[self.decoded_pos..self.decoded_pos + to_copy]);
128 self.decoded_pos += to_copy;
129 Ok(to_copy)
130 }
131}
132
133#[cfg(test)]
134mod tests {
135 use super::*;
136
137 #[test]
138 fn stream_utf8() {
139 let data = b"Hello, world!";
140 let mut reader = DecodingReader::new(&data[..], encoding_rs::UTF_8);
141 let mut output = String::new();
142 reader.read_to_string(&mut output).unwrap();
143 assert_eq!(output, "Hello, world!");
144 }
145
146 #[test]
147 fn stream_windows_1254_turkish() {
148 let data: &[u8] = &[0xFE, 0xF0, 0xFD];
150 let mut reader = DecodingReader::new(data, encoding_rs::WINDOWS_1254);
151 let mut output = String::new();
152 reader.read_to_string(&mut output).unwrap();
153 assert_eq!(output, "\u{015F}\u{011F}\u{0131}"); }
155
156 #[test]
157 fn stream_empty() {
158 let data: &[u8] = b"";
159 let mut reader = DecodingReader::new(data, encoding_rs::UTF_8);
160 let mut output = String::new();
161 reader.read_to_string(&mut output).unwrap();
162 assert_eq!(output, "");
163 }
164
165 #[test]
166 fn stream_large_input() {
167 let data = "abcdefgh".repeat(2000); let mut reader = DecodingReader::new(data.as_bytes(), encoding_rs::UTF_8);
170 let mut output = String::new();
171 reader.read_to_string(&mut output).unwrap();
172 assert_eq!(output, data);
173 }
174
175 #[test]
176 fn stream_small_read_buf() {
177 let data = b"Hello, world!";
178 let mut reader = DecodingReader::new(&data[..], encoding_rs::UTF_8);
179 let mut output = Vec::new();
180 let mut buf = [0u8; 3]; loop {
182 let n = reader.read(&mut buf).unwrap();
183 if n == 0 {
184 break;
185 }
186 output.extend_from_slice(&buf[..n]);
187 }
188 assert_eq!(String::from_utf8(output).unwrap(), "Hello, world!");
189 }
190}