#[inline]
pub fn from_utf8(bytes: &[u8]) -> Result<&str, std::str::Utf8Error> {
#[cfg(feature = "simd-utf8")]
{
simdutf8::basic::from_utf8(bytes).map_err(|_| {
#[allow(invalid_from_utf8)]
let err = std::str::from_utf8(&[0xFF, 0xFF, 0xFF, 0xFF]).unwrap_err();
err
})
}
#[cfg(not(feature = "simd-utf8"))]
{
std::str::from_utf8(bytes)
}
}
#[inline]
pub fn string_from_utf8(bytes: Vec<u8>) -> Result<String, std::string::FromUtf8Error> {
#[cfg(feature = "simd-utf8")]
{
#[allow(clippy::collapsible_if)]
if simdutf8::basic::from_utf8(&bytes).is_ok() {
#[allow(unsafe_code)]
Ok(unsafe { String::from_utf8_unchecked(bytes) })
} else {
String::from_utf8(bytes)
}
}
#[cfg(not(feature = "simd-utf8"))]
{
String::from_utf8(bytes)
}
}
#[inline]
pub fn is_valid_utf8(bytes: &[u8]) -> bool {
#[cfg(feature = "simd-utf8")]
{
simdutf8::basic::from_utf8(bytes).is_ok()
}
#[cfg(not(feature = "simd-utf8"))]
{
std::str::from_utf8(bytes).is_ok()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_valid_ascii() {
let bytes = b"Hello, world!";
let result = from_utf8(bytes).unwrap();
assert_eq!(result, "Hello, world!");
}
#[test]
fn test_valid_utf8_multibyte() {
let bytes = "Hello, 世界! 🌍".as_bytes();
let result = from_utf8(bytes).unwrap();
assert_eq!(result, "Hello, 世界! 🌍");
}
#[test]
fn test_empty_bytes() {
let bytes = b"";
let result = from_utf8(bytes).unwrap();
assert_eq!(result, "");
}
#[test]
fn test_invalid_utf8() {
let bytes: &[u8] = &[0xFF, 0xFE];
assert!(from_utf8(bytes).is_err());
}
#[test]
fn test_is_valid_utf8_true() {
let bytes = "Valid UTF-8 text".as_bytes();
assert!(is_valid_utf8(bytes));
}
#[test]
fn test_is_valid_utf8_false() {
let bytes: &[u8] = &[0xC0, 0x80];
assert!(!is_valid_utf8(bytes));
}
#[test]
fn test_string_from_utf8_valid() {
let bytes = b"Test string".to_vec();
let result = string_from_utf8(bytes).unwrap();
assert_eq!(result, "Test string");
}
#[test]
fn test_string_from_utf8_invalid() {
let bytes: Vec<u8> = vec![0xFF, 0xFE];
assert!(string_from_utf8(bytes).is_err());
}
#[test]
fn test_large_valid_text() {
let text = "a".repeat(100_000);
let bytes = text.as_bytes();
let result = from_utf8(bytes).unwrap();
assert_eq!(result.len(), 100_000);
}
#[test]
fn test_mixed_unicode() {
let text = "Latin αλφα 中文 العربية עברית Кириллица";
let bytes = text.as_bytes();
let result = from_utf8(bytes).unwrap();
assert_eq!(result, text);
}
}