use crate::{Utf8Duplexer, Utf8Reader};
use duplex::Duplex;
use std::cmp::min;
use std::io::{self, copy, repeat, Cursor, Read, Write};
use std::str;
#[cfg(feature = "layered-io")]
use {
layered_io::{HalfDuplexLayered, ReadLayered, Status},
std::cmp::max,
};
pub(crate) trait Utf8ReaderInternals<Inner: Read>: Read {
fn impl_(&mut self) -> &mut Utf8Input;
fn inner(&self) -> &Inner;
fn inner_mut(&mut self) -> &mut Inner;
fn into_inner(self) -> Inner;
}
#[cfg(feature = "layered-io")]
pub(crate) trait Utf8ReaderInternalsLayered<Inner: ReadLayered>:
Utf8ReaderInternals<Inner> + ReadLayered
{
}
impl<Inner: Read> Utf8ReaderInternals<Inner> for Utf8Reader<Inner> {
fn impl_(&mut self) -> &mut Utf8Input {
&mut self.input
}
fn inner(&self) -> &Inner {
&self.inner
}
fn inner_mut(&mut self) -> &mut Inner {
&mut self.inner
}
fn into_inner(self) -> Inner {
self.inner
}
}
#[cfg(feature = "layered-io")]
impl<Inner: ReadLayered> Utf8ReaderInternalsLayered<Inner> for Utf8Reader<Inner> {}
impl<Inner: Duplex + Read + Write> Utf8ReaderInternals<Inner> for Utf8Duplexer<Inner> {
fn impl_(&mut self) -> &mut Utf8Input {
&mut self.input
}
fn inner(&self) -> &Inner {
&self.inner
}
fn inner_mut(&mut self) -> &mut Inner {
&mut self.inner
}
fn into_inner(self) -> Inner {
self.inner
}
}
#[cfg(feature = "layered-io")]
impl<Inner: HalfDuplexLayered> Utf8ReaderInternalsLayered<Inner> for Utf8Duplexer<Inner> {}
pub(crate) struct Utf8Input {
overflow: Vec<u8>,
}
impl Utf8Input {
#[inline]
pub(crate) const fn new() -> Self {
Self {
overflow: Vec::new(),
}
}
#[inline]
pub(crate) fn read_str<Inner: Read>(
internals: &mut impl Utf8ReaderInternals<Inner>,
buf: &mut str,
) -> io::Result<usize> {
internals.read(unsafe { buf.as_bytes_mut() })
}
#[inline]
pub(crate) fn read_exact_str<Inner: Read>(
internals: &mut impl Utf8ReaderInternals<Inner>,
buf: &mut str,
) -> io::Result<()> {
internals.read_exact(unsafe { buf.as_bytes_mut() })
}
#[cfg(feature = "layered-io")]
#[inline]
pub(crate) fn read_str_with_status<Inner: ReadLayered>(
internals: &mut impl Utf8ReaderInternalsLayered<Inner>,
buf: &mut str,
) -> io::Result<(usize, Status)> {
let (size, status) = internals.read_with_status(unsafe { buf.as_bytes_mut() })?;
debug_assert!(buf.is_char_boundary(size));
Ok((size, status))
}
#[cfg(feature = "layered-io")]
#[inline]
pub(crate) fn read_exact_str_using_status<Inner: ReadLayered>(
internals: &mut impl Utf8ReaderInternalsLayered<Inner>,
buf: &mut str,
) -> io::Result<Status> {
internals.read_exact_using_status(unsafe { buf.as_bytes_mut() })
}
#[cfg(feature = "layered-io")]
pub(crate) fn read_with_status<Inner: ReadLayered>(
internals: &mut impl Utf8ReaderInternalsLayered<Inner>,
buf: &mut [u8],
) -> io::Result<(usize, Status)> {
let (nread, done) = Self::process_old_data(internals, buf)?;
if done {
return Ok((nread, Status::active()));
}
let (size, status) = internals.inner_mut().read_with_status(&mut buf[nread..])?;
let (nread, done) = Self::process_new_data(internals, buf, nread, size, status.is_end())?;
Ok((nread, if done { status } else { Status::active() }))
}
fn process_old_data<Inner: Read>(
internals: &mut impl Utf8ReaderInternals<Inner>,
buf: &mut [u8],
) -> io::Result<(usize, bool)> {
if buf.len() < 4 {
return Err(io::Error::new(
io::ErrorKind::InvalidInput,
"buffer for reading from Utf8Reader must be at least 4 bytes long",
));
}
let mut nread = 0;
if !internals.impl_().overflow.is_empty() {
nread += internals
.impl_()
.process_overflow(&mut buf[nread..], IncompleteHow::Include)
.unwrap();
if !internals.impl_().overflow.is_empty() {
return Ok((nread, true));
}
}
Ok((nread, false))
}
pub(crate) fn process_new_data<Inner: Read>(
internals: &mut impl Utf8ReaderInternals<Inner>,
buf: &mut [u8],
mut nread: usize,
size: usize,
is_end: bool,
) -> io::Result<(usize, bool)> {
nread += size;
copy(
&mut repeat(b'\0').take((buf.len() - nread) as u64),
&mut Cursor::new(&mut buf[nread..]),
)
.unwrap();
match str::from_utf8(&buf[..nread]) {
Ok(_) => Ok((nread, true)),
Err(error) => {
let (valid, after_valid) = buf[..nread].split_at(error.valid_up_to());
nread = valid.len();
assert!(internals.impl_().overflow.is_empty());
internals.impl_().overflow.extend_from_slice(after_valid);
let incomplete_how = if is_end {
IncompleteHow::Replace
} else {
IncompleteHow::Exclude
};
nread += internals
.impl_()
.process_overflow(&mut buf[nread..], incomplete_how)
.ok_or_else(|| io::Error::new(io::ErrorKind::Other, "invalid UTF-8"))?;
Ok((nread, internals.impl_().overflow.is_empty()))
}
}
}
#[cfg(feature = "layered-io")]
#[inline]
pub(crate) fn minimum_buffer_size<Inner: ReadLayered>(
internals: &impl Utf8ReaderInternals<Inner>,
) -> usize {
max(4, internals.inner().minimum_buffer_size())
}
#[cold]
fn process_overflow(&mut self, buf: &mut [u8], incomplete_how: IncompleteHow) -> Option<usize> {
let mut nread = 0;
loop {
let num = min(buf[nread..].len(), self.overflow.len());
match str::from_utf8(&self.overflow[..num]) {
Ok(_) => {
buf[nread..nread + num].copy_from_slice(&self.overflow[..num]);
self.overflow.copy_within(num.., 0);
self.overflow.resize(self.overflow.len() - num, 0);
nread += num;
}
Err(error) => {
let (valid, after_valid) = self.overflow[..num].split_at(error.valid_up_to());
let valid_len = valid.len();
let after_valid_len = after_valid.len();
buf[nread..nread + valid_len].copy_from_slice(valid);
self.overflow.copy_within(valid_len.., 0);
self.overflow.resize(self.overflow.len() - valid_len, 0);
nread += valid_len;
if let Some(invalid_sequence_length) = error.error_len() {
if '\u{fffd}'.len_utf8() <= buf[nread..].len() {
nread += '\u{fffd}'.encode_utf8(&mut buf[nread..]).len();
self.overflow.copy_within(invalid_sequence_length.., 0);
self.overflow
.resize(self.overflow.len() - invalid_sequence_length, 0);
continue;
}
} else {
match incomplete_how {
IncompleteHow::Replace => {
if '\u{fffd}'.len_utf8() <= buf[nread..].len() {
nread += '\u{fffd}'.encode_utf8(&mut buf[nread..]).len();
self.overflow.clear();
} else if self.overflow.is_empty() {
return None;
}
}
IncompleteHow::Include if after_valid_len == self.overflow.len() => {
if !buf[nread..].is_empty() {
let num = min(buf[nread..].len(), after_valid_len);
buf[nread..nread + num].copy_from_slice(&self.overflow[..num]);
nread += num;
self.overflow.copy_within(num.., 0);
self.overflow.resize(self.overflow.len() - num, 0);
}
}
_ => {}
}
}
}
}
break;
}
Some(nread)
}
#[cfg(feature = "layered-io")]
#[inline]
pub(crate) fn abandon<Inner: ReadLayered>(internals: &mut impl Utf8ReaderInternals<Inner>) {
internals.impl_().overflow.clear();
internals.inner_mut().abandon()
}
#[cfg(feature = "layered-io")]
#[inline]
pub(crate) fn suggested_buffer_size<Inner: ReadLayered>(
internals: &impl Utf8ReaderInternals<Inner>,
) -> usize {
max(
Self::minimum_buffer_size(internals),
internals.inner().suggested_buffer_size(),
)
}
#[inline]
pub(crate) fn read<Inner: Read>(
internals: &mut impl Utf8ReaderInternals<Inner>,
buf: &mut [u8],
) -> io::Result<usize> {
let (nread, done) = Self::process_old_data(internals, buf)?;
if done {
return Ok(nread);
}
let (size, is_end) = match internals.inner_mut().read(&mut buf[nread..]) {
Ok(0) => (0, true),
Ok(size) => (size, false),
Err(err) if err.kind() == io::ErrorKind::Interrupted => (0, false),
Err(err) => return Err(err),
};
let (nread, done) = Self::process_new_data(internals, buf, nread, size, is_end)?;
match (nread, done) {
(0, true) => Ok(0),
(0, false) => Err(io::Error::new(
io::ErrorKind::Interrupted,
"read zero bytes from stream",
)),
(_, _) => Ok(nread),
}
}
#[inline]
pub(crate) fn read_to_string<Inner: Read>(
internals: &mut impl Utf8ReaderInternals<Inner>,
buf: &mut String,
) -> io::Result<usize> {
internals.read_to_end(unsafe { buf.as_mut_vec() })
}
}
enum IncompleteHow {
Include,
Exclude,
Replace,
}