#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum Validity {
Valid,
Invalid {
valid_up_to: usize,
error_len: Option<usize>,
},
}
impl Validity {
#[must_use]
pub const fn is_valid(&self) -> bool {
matches!(self, Self::Valid)
}
}
#[must_use]
pub fn is_valid(bytes: &[u8]) -> bool {
simdutf8::basic::from_utf8(bytes).is_ok()
}
#[must_use]
pub fn validate(bytes: &[u8]) -> Validity {
match simdutf8::compat::from_utf8(bytes) {
Ok(_) => Validity::Valid,
Err(error) => Validity::Invalid {
valid_up_to: error.valid_up_to(),
error_len: error.error_len(),
},
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn empty_input_is_valid() {
assert!(is_valid(b""));
assert_eq!(validate(b""), Validity::Valid);
}
#[test]
fn ascii_is_valid() {
assert!(is_valid(b"The quick brown fox"));
assert_eq!(validate(b"The quick brown fox"), Validity::Valid);
}
#[test]
fn multilingual_utf8_is_valid() {
let sample = "Grüße — 日本語 — 😊 — Здравствуйте".as_bytes();
assert!(is_valid(sample));
assert_eq!(validate(sample), Validity::Valid);
}
#[test]
fn lone_continuation_byte_is_invalid() {
assert!(!is_valid(b"\x80"));
assert_eq!(
validate(b"\x80"),
Validity::Invalid {
valid_up_to: 0,
error_len: Some(1),
}
);
}
#[test]
fn lone_ff_byte_is_invalid() {
assert_eq!(
validate(b"a\xFFb"),
Validity::Invalid {
valid_up_to: 1,
error_len: Some(1),
}
);
}
#[test]
fn truncated_multibyte_reports_no_error_len() {
assert_eq!(
validate(b"abc\xF0"),
Validity::Invalid {
valid_up_to: 3,
error_len: None,
}
);
}
#[test]
fn utf16le_bytes_are_not_valid_utf8() {
let utf16 = b"\xFF\xFE\x48\x00\x69\x00";
assert!(!is_valid(utf16));
assert!(!validate(utf16).is_valid());
}
}