1use std::char::from_u32_unchecked;
29use std::error::Error;
30use std::fmt;
31use std::io::{self, Read};
32
33const UTF8D: [u8; 364] = [
34 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
37 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
38 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
39 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
40 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
41 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
42 8, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
43 10, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 11, 6, 6, 6, 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
44 8,
45 0, 12, 24, 36, 60, 96, 84, 12, 12, 12, 48, 72, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
48 12, 0, 12, 12, 12, 12, 12, 0, 12, 0, 12, 12, 12, 24, 12, 12, 12, 12, 12, 24, 12, 24, 12, 12,
49 12, 12, 12, 12, 12, 12, 12, 24, 12, 12, 12, 12, 12, 24, 12, 12, 12, 12, 12, 12, 12, 24, 12, 12,
50 12, 12, 12, 12, 12, 12, 12, 36, 12, 36, 12, 12, 12, 36, 12, 12, 12, 12, 12, 36, 12, 36, 12, 12,
51 12, 36, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
52];
53
54pub const UTF8_ACCEPT: u32 = 0;
55pub const UTF8_REJECT: u32 = 12;
56
57#[inline]
58pub fn decode(state: u32, byte: u32, codep: &mut u32) -> u32 {
59 let typ = UTF8D[byte as usize] as u32;
60
61 *codep = if state != UTF8_ACCEPT {
62 (byte & 0x3f) | (*codep << 6)
63 } else {
64 (0xff >> typ) & byte
65 };
66
67 let ix = 256 + state + typ;
68 UTF8D[ix as usize] as u32
69}
70
71pub struct Chars<R> {
72 reader: R,
73 state: u32,
74 codep: u32,
75}
76
77impl<R: Read> Chars<R> {
78 pub fn new(r: R) -> Chars<R> {
79 Chars {
80 reader: r,
81 state: UTF8_ACCEPT,
82 codep: 0,
83 }
84 }
85}
86
87impl<R: Read> Iterator for Chars<R> {
88 type Item = Result<char, ReadError>;
89
90 fn next(&mut self) -> Option<Result<char, ReadError>> {
91 loop {
92 match read_byte(&mut self.reader) {
93 Some(Ok(b)) => {
94 self.state = decode(self.state, b as u32, &mut self.codep);
95 match self.state {
96 UTF8_ACCEPT => unsafe { return Some(Ok(from_u32_unchecked(self.codep))) },
97 UTF8_REJECT => return Some(Err(ReadError::InvalidUtf8)),
98 _ => {}
99 }
100 }
101 Some(Err(e)) => return Some(Err(e.into())),
102 None => return None,
103 }
104 }
105 }
106}
107
108fn read_byte<R: Read>(r: &mut R) -> Option<io::Result<u8>> {
109 let mut b = [0];
110 loop {
111 match r.read(&mut b) {
112 Ok(0) => return None,
113 Ok(_) => return Some(Ok(b[0])),
114 Err(e) => {
115 if e.kind() == io::ErrorKind::Interrupted {
116 continue;
117 } else {
118 return Some(Err(e));
119 }
120 }
121 }
122 }
123}
124
125#[derive(Debug)]
126pub enum ReadError {
127 InvalidUtf8,
128 Io(io::Error),
129}
130
131impl fmt::Display for ReadError {
132 fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
133 match *self {
134 ReadError::InvalidUtf8 => write!(f, "invalid utf-8 encoding"),
135 ReadError::Io(ref e) => write!(f, "i/o: {}", e),
136 }
137 }
138}
139
140impl Error for ReadError {
141 fn description(&self) -> &str {
142 match *self {
143 ReadError::InvalidUtf8 => "invalid utf-8 encoding",
144 ReadError::Io(_) => "i/o error",
145 }
146 }
147
148 fn cause(&self) -> Option<&dyn Error> {
149 match *self {
150 ReadError::Io(ref e) => Some(e),
151 _ => None,
152 }
153 }
154}
155
156impl From<io::Error> for ReadError {
157 fn from(e: io::Error) -> ReadError {
158 ReadError::Io(e)
159 }
160}