1use std::char::from_u32_unchecked;
29use std::error::Error;
30use std::fmt;
31use std::io::{self, Read};
32
33const UTF8D: [u8; 364] = [
34 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
37 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
38 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
39 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
40 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
41 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
42 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
43 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
44
45 0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12,
48 12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12,
49 12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12,
50 12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12,
51 12,36,12,12,12,12,12,12,12,12,12,12,
52];
53
54pub const UTF8_ACCEPT: u32 = 0;
55pub const UTF8_REJECT: u32 = 12;
56
57#[inline]
58pub fn decode(state: u32, byte: u32, codep: &mut u32) -> u32 {
59 let typ = UTF8D[byte as usize] as u32;
60
61 *codep =
62 if state != UTF8_ACCEPT {
63 (byte & 0x3f) | (*codep << 6)
64 } else {
65 (0xff >> typ) & byte
66 };
67
68 let ix = 256 + state + typ;
69 UTF8D[ix as usize] as u32
70}
71
72pub struct Chars<R> {
73 reader: R,
74 state: u32,
75 codep: u32
76}
77
78impl<R: Read> Chars<R> {
79 pub fn new(r: R) -> Chars<R> {
80 Chars {
81 reader: r,
82 state: UTF8_ACCEPT,
83 codep: 0
84 }
85 }
86}
87
88impl<R: Read> Iterator for Chars<R> {
89 type Item = Result<char, ReadError>;
90
91 fn next(&mut self) -> Option<Result<char, ReadError>> {
92 loop {
93 match read_byte(&mut self.reader) {
94 Some(Ok(b)) => {
95 self.state = decode(self.state, b as u32, &mut self.codep);
96 match self.state {
97 UTF8_ACCEPT => unsafe { return Some(Ok(from_u32_unchecked(self.codep))) },
98 UTF8_REJECT => return Some(Err(ReadError::InvalidUtf8)),
99 _ => {}
100 }
101 }
102 Some(Err(e)) => return Some(Err(e.into())),
103 None => return None
104 }
105 }
106 }
107}
108
109fn read_byte<R: Read>(r: &mut R) -> Option<io::Result<u8>> {
110 let mut b = [0];
111 loop {
112 match r.read(&mut b) {
113 Ok(0) => return None,
114 Ok(_) => return Some(Ok(b[0])),
115 Err(e) =>
116 if e.kind() == io::ErrorKind::Interrupted {
117 continue
118 } else {
119 return Some(Err(e))
120 }
121 }
122 }
123}
124
125#[derive(Debug)]
126pub enum ReadError {
127 InvalidUtf8,
128 Io(io::Error)
129}
130
131impl fmt::Display for ReadError {
132 fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
133 match *self {
134 ReadError::InvalidUtf8 => write!(f, "invalid utf-8 encoding"),
135 ReadError::Io(ref e) => write!(f, "i/o: {}", e)
136 }
137 }
138}
139
140impl Error for ReadError {
141 fn description(&self) -> &str {
142 match *self {
143 ReadError::InvalidUtf8 => "invalid utf-8 encoding",
144 ReadError::Io(_) => "i/o error"
145 }
146
147 }
148
149 fn cause(&self) -> Option<&Error> {
150 match *self {
151 ReadError::Io(ref e) => Some(e),
152 _ => None
153 }
154 }
155}
156
157impl From<io::Error> for ReadError {
158 fn from(e: io::Error) -> ReadError {
159 ReadError::Io(e)
160 }
161}
162