1use somen::prelude::*;
2
3pub fn utf8<'a, I>() -> impl Parser<I, Output = char>
22where
23 I: Positioned<Ok = u8> + ?Sized + 'a,
24{
25 is_some(|b1| match b1 {
27 0x00..=0x7F => Some(Ok(b1 as u32)),
28 0xC2..=0xF4 => Some(Err(b1)),
29 _ => None,
30 })
31 .then(|res| match res {
33 Ok(c) => value(Ok(c)).left(),
34 Err(b1) => is_some(move |b2| {
35 if b2 & 0xC0 != 0x80
36 || (b1 == 0xE0 && b2 < 0xA0)
37 || (b1 == 0xED && b2 >= 0xA0)
38 || (b1 == 0xF0 && b2 < 0x90)
39 || (b1 == 0xF4 && b2 >= 0x90)
40 {
41 None
42 } else if b1 & 0xE0 == 0xC0 {
43 Some(Ok(((b1 & 0x1F) as u32) << 6 | (b2 & 0x3F) as u32))
44 } else {
45 Some(Err((b1, b2)))
46 }
47 })
48 .right(),
49 })
50 .then(|res| match res {
52 Ok(c) => value(Ok(c)).left(),
53 Err((b1, b2)) => is_some(move |b3| {
54 if b3 & 0xC0 != 0x80 {
55 None
56 } else if b1 & 0xF0 == 0xE0 {
57 Some(Ok(((b1 & 0x0F) as u32) << 12
58 | ((b2 & 0x3F) as u32) << 6
59 | (b3 & 0x3F) as u32))
60 } else {
61 Some(Err((b1, b2, b3)))
62 }
63 })
64 .right(),
65 })
66 .then(|res| match res {
68 Ok(c) => value(c).left(),
69 Err((b1, b2, b3)) => is_some(move |b4| {
70 if b4 & 0xC0 != 0x80 {
71 None
72 } else {
73 Some(
74 ((b1 & 0x07) as u32) << 18
75 | ((b2 & 0x3F) as u32) << 12
76 | ((b3 & 0x3F) as u32) << 6
77 | (b4 & 0x3F) as u32,
78 )
79 }
80 })
81 .right(),
82 })
83 .map(|c| unsafe { char::from_u32_unchecked(c) })
84 .expect("UTF-8 character")
85}