#[cfg(windows)]
pub(super) fn valid_wtf8(bytes: &[u8]) -> bool {
let mut i = 0;
while i < bytes.len() {
let b = bytes[i];
if b < 0b1000_0000 {
i += 1;
continue;
}
if (b & 0b1110_0000) == 0b1100_0000 {
if b < 0b1100_0010 {
return false;
}
if i + 1 >= bytes.len() {
return false;
}
if (bytes[i + 1] & 0b1100_0000) != 0b1000_0000 {
return false;
}
i += 2;
continue;
}
if (b & 0b1111_0000) == 0b1110_0000 {
if i + 2 >= bytes.len() {
return false;
}
let (b1, b2) = (bytes[i + 1], bytes[i + 2]);
if (b1 & 0b1100_0000) != 0b1000_0000 || (b2 & 0b1100_0000) != 0b1000_0000 {
return false;
}
if b == 0b1110_0000 && b1 < 0b1010_0000 {
return false;
}
i += 3;
continue;
}
if (b & 0b1111_1000) == 0b1111_0000 {
if b > 0b1111_0100 {
return false;
}
if i + 3 >= bytes.len() {
return false;
}
let (b1, b2, b3) = (bytes[i + 1], bytes[i + 2], bytes[i + 3]);
if (b1 & 0b1100_0000) != 0b1000_0000
|| (b2 & 0b1100_0000) != 0b1000_0000
|| (b3 & 0b1100_0000) != 0b1000_0000
{
return false;
}
if b == 0b1111_0000 && b1 < 0b1001_0000 {
return false;
} else if b == 0b1111_0100 && b1 > 0b1000_1111 {
return false;
}
i += 4;
continue;
}
return false;
}
true
}
#[cfg(windows)]
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_valid_wtf8() {
let cases: &[(&[u8], bool)] = &[
(b"hello", true),
(&[0xc2, 0xa0], true), (&[0xc0, 0x80], false), (&[0xc1, 0xbf], false), (&[0xe0, 0xa0, 0x80], true), (&[0xe0, 0x9f, 0xbf], false), ((&[0xed, 0xa0, 0x80]), true), ((&[0xed, 0xbf, 0xbf]), true), ((&[0xf0, 0x90, 0x80, 0x80]), true), ((&[0xf0, 0x8f, 0xbf, 0xbf]), false), ((&[0xf4, 0x90, 0x80, 0x80]), false), ((&[0xc2, 0x00]), false),
((&[0xc2]), false),
((&[0xe0, 0xa0]), false),
((&[0xf0, 0x90, 0x80]), false),
];
for &(input, expected) in cases {
assert_eq!(valid_wtf8(input), expected, "input: {:?}", input);
}
}
}