1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
use crate::grind::Grinder;
use std;
pub struct Utf8<T: Grinder<Item = Option<u8>>> {
inner: T,
current: usize,
peek: Option<u8>,
}
impl<T> Utf8<T>
where
T: Grinder<Item = Option<u8>>,
{
pub fn new(mut inner: T) -> Utf8<T> {
let peek = inner.next();
Utf8 {
inner: inner,
current: 0,
peek: peek,
}
}
fn bump(&mut self) {
self.peek = self.inner.next();
self.current += 1;
}
}
impl<T> Grinder for Utf8<T>
where
T: Grinder<Item = Option<u8>>,
{
type Item = Option<(usize, char, u8)>;
type Error = T::Error;
fn next(&mut self) -> Self::Item {
let offset = self.current;
let lead = match self.peek {
Some(c) => c,
None => return None,
};
self.bump();
let indicator = (!lead).leading_zeros() as u8;
match indicator {
0 => Some((offset, lead as char, 1)),
1 => Some((offset, '�', 1)),
2 | 3 | 4 => {
let mut v = lead as u32 & (0xFF >> indicator);
for i in 1..indicator {
match self.peek {
Some(c) if (c >> 6 == 2) => v = (v << 6) | (c as u32 & 0x3F),
_ => return Some((offset, '�', i)),
}
self.bump();
}
Some((offset, std::char::from_u32(v).unwrap_or('�'), indicator))
}
_ => Some((offset, '�', 1)),
}
}
fn emit(&mut self, err: Self::Error) {
self.inner.emit(err)
}
}