1use std::io;
8use std::io::Read;
9
10static BADCHAR: char = '\u{FFFD}';
12
13macro_rules! try_some_or {
14 ($e:expr, $fail:expr) => ( match try!($e) { Some(v) => v, None => $fail } );
15}
16
17pub struct UTF8Reader<T: Read>
19{
20 stream: T,
21}
22
23fn tochar(codepoint: u32) -> char
24{
25 match ::std::char::from_u32(codepoint)
26 {
27 Some(c) => c,
28 None => BADCHAR,
29 }
30}
31
32impl<T: Read> UTF8Reader<T>
33{
34 pub fn new(reader: T) -> UTF8Reader<T>
35 {
36 UTF8Reader {
37 stream: reader,
38 }
39 }
40
41 fn getb(&mut self) -> io::Result<Option<u8>> {
42 let mut b = [0];
43 if try!(self.stream.read(&mut b)) == 0 {
44 Ok(None)
45 }
46 else {
47 Ok(Some(b[0]))
48 }
49 }
50
51 pub fn getc(&mut self) -> io::Result<Option<char>>
54 {
55 let ch1 = try_some_or!(self.getb(), return Ok(None)) as u32;
56 if ch1 & 0xC0 == 0x80 {
57 return Ok( Some(BADCHAR) )
58 }
59 if ch1 & 0x80 == 0x00
60 {
61 Ok( Some(tochar(ch1)) )
63 }
64 else if ch1 & 0xE0 == 0xC0
65 {
66 let ch2 = try_some_or!(self.getb(), return Ok(Some(BADCHAR))) as u32;
68 if ch2 & 0xC0 != 0x80 {
69 return Ok( Some(BADCHAR) );
70 }
71
72 let ret = (ch1 & 0x1F << 6) | (ch2 & 0x3F << 0);
73 Ok( Some(tochar(ret)) )
74 }
75 else if ch1 & 0xF0 == 0xE0
76 {
77 let ch2 = try_some_or!(self.getb(), return Ok(Some(BADCHAR))) as u32;
79 if ch2 & 0xC0 != 0x80 {
80 return Ok( Some(BADCHAR) );
81 }
82 let ch3 = try_some_or!(self.getb(), return Ok(Some(BADCHAR))) as u32;
83 if ch3 & 0xC0 != 0x80 {
84 return Ok( Some(BADCHAR) );
85 }
86
87 let ret = (ch1 & 0x0F << 12) | (ch2 & 0x3F << 6) | (ch3 & 0x3F << 0);
88 Ok( Some(tochar(ret)) )
89 }
90 else if ch1 & 0xF8 == 0xF0
91 {
92 let ch2 = try_some_or!(self.getb(), return Ok(Some(BADCHAR))) as u32;
94 if ch2 & 0xC0 != 0x80 {
95 return Ok( Some(BADCHAR) );
96 }
97 let ch3 = try_some_or!(self.getb(), return Ok(Some(BADCHAR))) as u32;
98 if ch3 & 0xC0 != 0x80 {
99 return Ok( Some(BADCHAR) );
100 }
101 let ch4 = try_some_or!(self.getb(), return Ok(Some(BADCHAR))) as u32;
102 if ch4 & 0xC0 != 0x80 {
103 return Ok( Some(BADCHAR) );
104 }
105
106 let ret = (ch1 & 0x07 << 18) | (ch2 & 0x3F << 12) | (ch3 & 0x3F << 6) | (ch4 & 0x3F << 0);
107 Ok( Some(tochar(ret)) )
108 }
109 else
110 {
111 Ok( Some(BADCHAR) )
113 }
114 }
115}
116
117impl<T: Read> Iterator for UTF8Reader<T>
119{
120 type Item = io::Result<char>;
121 fn next(&mut self) -> Option<io::Result<char>>
122 {
123 match self.getc()
125 {
126 Ok(None) => None,
128 Ok(Some(c)) => Some( Ok(c) ),
129 Err(e) => Some( Err( e ) ),
130 }
131 }
132}
133
134#[test]
135fn it_works() {
136}
137
138