1use crate::{
2 base::{
3 octet,
4 BaseAtom,
5 },
6 utils::Utils,
7 Contexting,
8 CoreAtom,
9 Parse,
10 Parsed,
11 Streaming,
12 Success,
13};
14
15fn raw<Stream, Context>(stream: Stream) -> Parsed<u32, Stream, Context>
16where
17 Stream: Streaming,
18 Context: Contexting<CoreAtom<Stream>>,
19 Context: Contexting<BaseAtom<u8>>,
20 Stream::Item: Into<u8>,
21{
22 let Success { token: a, stream } = octet.parse(stream)?;
23 if a & 0x80 == 0 {
24 Parsed::Success {
25 token: a as u32,
26 stream,
27 }
28 } else if a & 0xE0 == 0xC0 {
29 let Success { token: b, stream } = octet.parse(stream)?;
30
31 Parsed::Success {
32 token: (a as u32 & 0x1F) << 6 | (b as u32 & 0x3F),
33 stream,
34 }
35 } else if a & 0xF0 == 0xE0 {
36 let Success { token: b, stream } = octet.parse(stream)?;
37 let Success { token: c, stream } = octet.parse(stream)?;
38
39 Parsed::Success {
40 token: (a as u32 & 0x0F) << 12 | (b as u32 & 0x3F) << 6 | (c as u32 & 0x3F),
41 stream,
42 }
43 } else if a & 0xF8 == 0xF0 {
44 let Success { token: b, stream } = octet.parse(stream)?;
45 let Success { token: c, stream } = octet.parse(stream)?;
46 let Success { token: d, stream } = octet.parse(stream)?;
47
48 Parsed::Success {
49 token: (a as u32 & 0x07) << 18
50 | (b as u32 & 0x3F) << 12
51 | (c as u32 & 0x3F) << 6
52 | (d as u32 & 0x3F),
53 stream,
54 }
55 } else {
56 Parsed::Failure(Contexting::new(BaseAtom::Utf8 {}))
57 }
58}
59
60#[cfg_attr(
64 feature = "tracing",
65 tracing::instrument(level = "trace", skip_all, ret(Display))
66)]
67pub fn utf8<Stream, Context>(stream: Stream) -> Parsed<char, Stream, Context>
68where
69 Stream: Streaming,
70 Context: Contexting<CoreAtom<Stream>>,
71 Context: Contexting<BaseAtom<u8>>,
72 Stream::Item: Into<u8>,
73{
74 raw
75 .try_map(|raw| char::from_u32(raw).ok_or_else(|| Contexting::new(BaseAtom::Utf8 {})))
76 .parse(stream)
77}
78
79#[cfg(test)]
80mod tests {
81 use crate::{
82 context::Ignore,
83 Parsed,
84 };
85
86 #[test]
87 fn utf8() {
88 println!("{}", "❤".len());
89 assert_eq!(
90 super::utf8::<_, Ignore>("❤".as_bytes()),
91 Parsed::Success {
92 token: '❤',
93 stream: "".as_bytes(),
94 }
95 );
96 }
97}