use std::io::{Error, ErrorKind, Read};
use std::{error, fmt, result, str::from_utf8};
pub type Result<T> = result::Result<T, Error>;
#[derive(Debug)]
pub struct Chars<R> {
pub inner: R,
}
impl<R: Read> Iterator for Chars<R> {
type Item = result::Result<char, CharsError>;
fn next(&mut self) -> Option<result::Result<char, CharsError>> {
let first_byte = match read_one_byte(&mut self.inner)? {
Ok(b) => b,
Err(e) => return Some(Err(CharsError::Other(e))),
};
let width = utf8_char_width(first_byte);
if width == 1 {
return Some(Ok(first_byte as char));
}
if width == 0 {
return Some(Err(CharsError::NotUtf8));
}
let mut buf = [first_byte, 0, 0, 0];
{
let mut start = 1;
while start < width {
match self.inner.read(&mut buf[start..width]) {
Ok(0) => return Some(Err(CharsError::NotUtf8)),
Ok(n) => start += n,
Err(ref e) if e.kind() == ErrorKind::Interrupted => continue,
Err(e) => return Some(Err(CharsError::Other(e))),
}
}
}
Some(match from_utf8(&buf[..width]).ok() {
Some(s) => Ok(s.chars().next().unwrap()),
None => Err(CharsError::NotUtf8),
})
}
}
fn read_one_byte(reader: &mut dyn Read) -> Option<Result<u8>> {
let mut buf = [0];
loop {
return match reader.read(&mut buf) {
Ok(0) => None,
Ok(..) => Some(Ok(buf[0])),
Err(ref e) if e.kind() == ErrorKind::Interrupted => continue,
Err(e) => Some(Err(e)),
};
}
}
pub fn utf8_char_width(b: u8) -> usize {
UTF8_CHAR_WIDTH[b as usize] as usize
}
static UTF8_CHAR_WIDTH: [u8; 256] = [
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ];
#[derive(Debug)]
pub enum CharsError {
NotUtf8,
Other(Error),
}
impl fmt::Display for CharsError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match *self {
Self::NotUtf8 => write!(f, "byte stream did not contain valid utf8"),
Self::Other(ref e) => e.fmt(f),
}
}
}
impl error::Error for CharsError {
fn source(&self) -> Option<&(dyn error::Error + 'static)> {
match *self {
Self::NotUtf8 => None,
Self::Other(ref e) => Some(e),
}
}
}