use std::char::from_u32;
#[allow(dead_code)]
pub fn read_char<F>(first: u8, next: F) -> Result<char, String>
where
F: Fn() -> Option<u8>,
{
if first < 0x80 {
return Ok(first as char);
}
let n_bytes = match (!first).leading_zeros() {
n @ 2..=6 => n as usize,
1 => return Err("First byte is continuation byte.".to_string()),
7..=8 => return Err("WTF is this byte??".to_string()),
_ => unreachable!(),
};
let mut res = 0_u32;
res |= u32::from(first & make_mask(7 - n_bytes));
for _ in 1..n_bytes {
let byte = next().ok_or_else(|| "Missing UTF-8 byte".to_string())?;
if byte & 0xC0 != 0x80 {
return Err(format!(
"Found non-continuation byte after leading: \
{}",
byte
));
}
res <<= 6;
res |= u32::from(byte & 0x3F);
}
Ok(from_u32(res).unwrap())
}
#[allow(dead_code)]
fn make_mask(n: usize) -> u8 {
let mut r = 0_u8;
for i in 0..n {
r |= 1 << i;
}
r
}