use std::error::Error;
use std::fmt;
use std::io;
use std::str;
use smallvec::SmallVec;
pub struct CodePoints<R: Iterator<Item = io::Result<u8>>> {
input: R,
buffer: SmallVec<[u8; 4]>,
}
impl<R: Iterator<Item = io::Result<u8>>> Iterator for CodePoints<R> {
type Item = io::Result<char>;
fn next(&mut self) -> Option<Self::Item> {
loop {
if !self.buffer.is_empty() {
match str::from_utf8(&self.buffer) {
Ok(s) => {
let mut chars = s.chars();
let c = chars.next().unwrap();
if c.len_utf8() < self.buffer.len() {
self.buffer = SmallVec::from_slice(&self.buffer[c.len_utf8()..]);
} else {
self.buffer.clear();
}
return Some(Ok(c));
}
Err(e) => {
if self.buffer.len() - e.valid_up_to() >= 4 {
let mut split_point = 1;
let mut badbytes = vec![];
loop {
let (bad, rest) = self.buffer.split_at(split_point);
if rest.is_empty() || str::from_utf8(rest).is_ok() {
badbytes.extend_from_slice(bad);
self.buffer = SmallVec::from_slice(rest);
break;
}
split_point += 1;
}
return Some(Err(io::Error::new(io::ErrorKind::InvalidData,
BadUtf8Error { bytes: badbytes })));
}
}
}
}
match self.input.next() {
Some(Ok(byte)) => {
self.buffer.push(byte);
}
None => {
if self.buffer.is_empty() {
return None;
} else {
let bytes = self.buffer.to_vec();
self.buffer = SmallVec::new();
return Some(Err(io::Error::new(io::ErrorKind::UnexpectedEof,
BadUtf8Error { bytes })));
}
}
Some(Err(e)) => {
return Some(Err(e));
}
}
}
}
}
impl<R: Iterator<Item = io::Result<u8>>> From<R> for CodePoints<R> {
fn from(input: R) -> CodePoints<R> {
CodePoints {
input,
buffer: SmallVec::new(),
}
}
}
#[derive(Debug)]
pub struct BadUtf8Error {
pub bytes: Vec<u8>,
}
impl Error for BadUtf8Error {
fn description(&self) -> &str {
"BadUtf8Error"
}
}
impl fmt::Display for BadUtf8Error {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "Bad UTF-8: {:?}", self.bytes)
}
}