use std::io::{self, Read};
use encoding_rs::{Decoder, Encoding};
pub struct DecodingReader<R> {
inner: R,
decoder: Decoder,
raw_buf: Vec<u8>,
raw_len: usize,
decoded_buf: Vec<u8>,
decoded_pos: usize,
decoded_len: usize,
eof: bool,
}
const CHUNK_SIZE: usize = 8192;
impl<R: Read> DecodingReader<R> {
pub fn new(inner: R, encoding: &'static Encoding) -> Self {
Self {
inner,
decoder: encoding.new_decoder(),
raw_buf: vec![0u8; CHUNK_SIZE],
raw_len: 0,
decoded_buf: vec![0u8; CHUNK_SIZE * 4], decoded_pos: 0,
decoded_len: 0,
eof: false,
}
}
fn fill_decoded(&mut self) -> io::Result<()> {
if self.decoded_pos < self.decoded_len {
return Ok(());
}
self.decoded_pos = 0;
self.decoded_len = 0;
if self.eof && self.raw_len == 0 {
return Ok(());
}
if self.raw_len == 0 && !self.eof {
let n = self.inner.read(&mut self.raw_buf)?;
if n == 0 {
self.eof = true;
} else {
self.raw_len = n;
}
}
let (result, read, written, _had_errors) = self.decoder.decode_to_utf8(
&self.raw_buf[..self.raw_len],
&mut self.decoded_buf,
self.eof,
);
if read < self.raw_len {
self.raw_buf.copy_within(read..self.raw_len, 0);
self.raw_len -= read;
} else {
self.raw_len = 0;
}
self.decoded_len = written;
if let encoding_rs::CoderResult::OutputFull = result {
}
Ok(())
}
}
impl<R: Read> Read for DecodingReader<R> {
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
if self.decoded_pos >= self.decoded_len {
self.fill_decoded()?;
}
if self.decoded_pos >= self.decoded_len {
return Ok(0); }
let available = self.decoded_len - self.decoded_pos;
let to_copy = available.min(buf.len());
buf[..to_copy]
.copy_from_slice(&self.decoded_buf[self.decoded_pos..self.decoded_pos + to_copy]);
self.decoded_pos += to_copy;
Ok(to_copy)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn stream_utf8() {
let data = b"Hello, world!";
let mut reader = DecodingReader::new(&data[..], encoding_rs::UTF_8);
let mut output = String::new();
reader.read_to_string(&mut output).unwrap();
assert_eq!(output, "Hello, world!");
}
#[test]
fn stream_windows_1254_turkish() {
let data: &[u8] = &[0xFE, 0xF0, 0xFD];
let mut reader = DecodingReader::new(data, encoding_rs::WINDOWS_1254);
let mut output = String::new();
reader.read_to_string(&mut output).unwrap();
assert_eq!(output, "\u{015F}\u{011F}\u{0131}"); }
#[test]
fn stream_empty() {
let data: &[u8] = b"";
let mut reader = DecodingReader::new(data, encoding_rs::UTF_8);
let mut output = String::new();
reader.read_to_string(&mut output).unwrap();
assert_eq!(output, "");
}
#[test]
fn stream_large_input() {
let data = "abcdefgh".repeat(2000); let mut reader = DecodingReader::new(data.as_bytes(), encoding_rs::UTF_8);
let mut output = String::new();
reader.read_to_string(&mut output).unwrap();
assert_eq!(output, data);
}
#[test]
fn stream_small_read_buf() {
let data = b"Hello, world!";
let mut reader = DecodingReader::new(&data[..], encoding_rs::UTF_8);
let mut output = Vec::new();
let mut buf = [0u8; 3]; loop {
let n = reader.read(&mut buf).unwrap();
if n == 0 {
break;
}
output.extend_from_slice(&buf[..n]);
}
assert_eq!(String::from_utf8(output).unwrap(), "Hello, world!");
}
}