use std::char::from_u32_unchecked;
use std::error::Error;
use std::fmt;
use std::io::{self, Read};
const UTF8D: [u8; 364] = [
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12,
12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12,
12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12,
12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12,
12,36,12,12,12,12,12,12,12,12,12,12,
];
pub const UTF8_ACCEPT: u32 = 0;
pub const UTF8_REJECT: u32 = 12;
#[inline]
pub fn decode(state: u32, byte: u32, codep: &mut u32) -> u32 {
let typ = UTF8D[byte as usize] as u32;
*codep =
if state != UTF8_ACCEPT {
(byte & 0x3f) | (*codep << 6)
} else {
(0xff >> typ) & byte
};
let ix = 256 + state + typ;
UTF8D[ix as usize] as u32
}
pub struct Chars<R> {
reader: R,
state: u32,
codep: u32
}
impl<R: Read> Chars<R> {
pub fn new(r: R) -> Chars<R> {
Chars {
reader: r,
state: UTF8_ACCEPT,
codep: 0
}
}
}
impl<R: Read> Iterator for Chars<R> {
type Item = Result<char, ReadError>;
fn next(&mut self) -> Option<Result<char, ReadError>> {
loop {
match read_byte(&mut self.reader) {
Some(Ok(b)) => {
self.state = decode(self.state, b as u32, &mut self.codep);
match self.state {
UTF8_ACCEPT => unsafe { return Some(Ok(from_u32_unchecked(self.codep))) },
UTF8_REJECT => return Some(Err(ReadError::InvalidUtf8)),
_ => {}
}
}
Some(Err(e)) => return Some(Err(e.into())),
None => return None
}
}
}
}
fn read_byte<R: Read>(r: &mut R) -> Option<io::Result<u8>> {
let mut b = [0];
loop {
match r.read(&mut b) {
Ok(0) => return None,
Ok(_) => return Some(Ok(b[0])),
Err(e) =>
if e.kind() == io::ErrorKind::Interrupted {
continue
} else {
return Some(Err(e))
}
}
}
}
#[derive(Debug)]
pub enum ReadError {
InvalidUtf8,
Io(io::Error)
}
impl fmt::Display for ReadError {
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
match *self {
ReadError::InvalidUtf8 => write!(f, "invalid utf-8 encoding"),
ReadError::Io(ref e) => write!(f, "i/o: {}", e)
}
}
}
impl Error for ReadError {
fn description(&self) -> &str {
match *self {
ReadError::InvalidUtf8 => "invalid utf-8 encoding",
ReadError::Io(_) => "i/o error"
}
}
fn cause(&self) -> Option<&Error> {
match *self {
ReadError::Io(ref e) => Some(e),
_ => None
}
}
}
impl From<io::Error> for ReadError {
fn from(e: io::Error) -> ReadError {
ReadError::Io(e)
}
}