moore_common/grind/
utf8.rs1use crate::grind::Grinder;
8use std;
9
10pub struct Utf8<T: Grinder<Item = Option<u8>>> {
11 inner: T,
12 current: usize,
13 peek: Option<u8>,
14}
15
16impl<T> Utf8<T>
17where
18 T: Grinder<Item = Option<u8>>,
19{
20 pub fn new(mut inner: T) -> Utf8<T> {
22 let peek = inner.next();
23 Utf8 {
24 inner: inner,
25 current: 0,
26 peek: peek,
27 }
28 }
29
30 fn bump(&mut self) {
32 self.peek = self.inner.next();
33 self.current += 1;
34 }
35}
36
37impl<T> Grinder for Utf8<T>
38where
39 T: Grinder<Item = Option<u8>>,
40{
41 type Item = Option<(usize, char, u8)>;
42 type Error = T::Error;
43
44 fn next(&mut self) -> Self::Item {
45 let offset = self.current;
46 let lead = match self.peek {
47 Some(c) => c,
48 None => return None,
49 };
50 self.bump();
51 let indicator = (!lead).leading_zeros() as u8;
52 match indicator {
53 0 => Some((offset, lead as char, 1)),
55
56 1 => Some((offset, '�', 1)),
58
59 2 | 3 | 4 => {
61 let mut v = lead as u32 & (0xFF >> indicator);
62 for i in 1..indicator {
63 match self.peek {
64 Some(c) if (c >> 6 == 2) => v = (v << 6) | (c as u32 & 0x3F),
65 _ => return Some((offset, '�', i)),
66 }
67 self.bump();
68 }
69 Some((offset, std::char::from_u32(v).unwrap_or('�'), indicator))
70 }
71
72 _ => Some((offset, '�', 1)),
74 }
75 }
76
77 fn emit(&mut self, err: Self::Error) {
78 self.inner.emit(err)
79 }
80}