#![cfg(feature = "simdutf")]
use v8::simdutf;
#[test]
fn validate_utf8() {
assert!(simdutf::validate_utf8(b"hello world"));
assert!(simdutf::validate_utf8("café".as_bytes()));
assert!(simdutf::validate_utf8("日本語".as_bytes()));
assert!(simdutf::validate_utf8(b""));
assert!(!simdutf::validate_utf8(&[0x80]));
assert!(!simdutf::validate_utf8(&[0xC0, 0x80]));
}
#[test]
fn validate_utf8_with_errors() {
let r = simdutf::validate_utf8_with_errors(b"hello");
assert!(r.is_ok());
let r = simdutf::validate_utf8_with_errors(&[b'a', 0x80, b'b']);
assert!(!r.is_ok());
assert_eq!(r.count, 1); }
#[test]
fn validate_ascii() {
assert!(simdutf::validate_ascii(b"hello world 123"));
assert!(!simdutf::validate_ascii(&[0x80]));
assert!(!simdutf::validate_ascii("café".as_bytes()));
assert!(simdutf::validate_ascii(b""));
}
#[test]
fn validate_utf16le() {
let data: Vec<u16> = "hello".encode_utf16().collect();
assert!(simdutf::validate_utf16le(&data));
assert!(!simdutf::validate_utf16le(&[0xD800]));
assert!(simdutf::validate_utf16le(&[]));
}
#[test]
fn validate_utf32() {
let data: Vec<u32> = "hello 日本語".chars().map(|c| c as u32).collect();
assert!(simdutf::validate_utf32(&data));
assert!(!simdutf::validate_utf32(&[0xD800]));
assert!(!simdutf::validate_utf32(&[0x110000]));
}
#[test]
fn utf8_to_utf16le_roundtrip() {
let input = "hello café 日本語";
let utf8 = input.as_bytes();
let expected_utf16: Vec<u16> = input.encode_utf16().collect();
let utf16_len = simdutf::utf16_length_from_utf8(utf8);
assert_eq!(utf16_len, expected_utf16.len());
let mut output = vec![0u16; utf16_len];
let written = unsafe { simdutf::convert_utf8_to_utf16le(utf8, &mut output) };
assert_eq!(written, expected_utf16.len());
assert_eq!(output, expected_utf16);
let utf8_len = simdutf::utf8_length_from_utf16le(&output);
assert_eq!(utf8_len, utf8.len());
let mut utf8_out = vec![0u8; utf8_len];
let written =
unsafe { simdutf::convert_utf16le_to_utf8(&output, &mut utf8_out) };
assert_eq!(written, utf8.len());
assert_eq!(&utf8_out, utf8);
}
#[test]
fn latin1_to_utf8_roundtrip() {
let latin1 = &[0x63u8, 0x61, 0x66, 0xE9];
let expected_utf8 = "café";
let utf8_len = simdutf::utf8_length_from_latin1(latin1);
assert!(utf8_len >= latin1.len());
let mut output = vec![0u8; utf8_len];
let written = unsafe { simdutf::convert_latin1_to_utf8(latin1, &mut output) };
assert_eq!(&output[..written], expected_utf8.as_bytes());
let latin1_len = simdutf::latin1_length_from_utf8(&output[..written]);
assert_eq!(latin1_len, latin1.len());
let mut latin1_out = vec![0u8; latin1_len];
let written2 = unsafe {
simdutf::convert_utf8_to_latin1(&output[..written], &mut latin1_out)
};
assert_eq!(written2, latin1.len());
assert_eq!(&latin1_out, latin1);
}
#[test]
fn utf8_to_utf32_roundtrip() {
let input = "hello 🌍";
let utf8 = input.as_bytes();
let expected: Vec<u32> = input.chars().map(|c| c as u32).collect();
let len = simdutf::utf32_length_from_utf8(utf8);
assert_eq!(len, expected.len());
let mut output = vec![0u32; len];
let written = unsafe { simdutf::convert_utf8_to_utf32(utf8, &mut output) };
assert_eq!(written, expected.len());
assert_eq!(output, expected);
let utf8_len = simdutf::utf8_length_from_utf32(&output);
let mut utf8_out = vec![0u8; utf8_len];
let written =
unsafe { simdutf::convert_utf32_to_utf8(&output, &mut utf8_out) };
assert_eq!(&utf8_out[..written], utf8);
}
#[test]
fn count_utf8_codepoints() {
assert_eq!(simdutf::count_utf8("hello".as_bytes()), 5);
assert_eq!(simdutf::count_utf8("café".as_bytes()), 4);
assert_eq!(simdutf::count_utf8("日本語".as_bytes()), 3);
assert_eq!(simdutf::count_utf8("🌍".as_bytes()), 1);
}
#[test]
fn count_utf16le_codepoints() {
let data: Vec<u16> = "hello 🌍".encode_utf16().collect();
assert_eq!(simdutf::count_utf16le(&data), 7);
}
#[test]
fn length_calculations() {
let utf8 = "hello café 日本語".as_bytes();
let utf16: Vec<u16> = "hello café 日本語".encode_utf16().collect();
assert_eq!(simdutf::utf16_length_from_utf8(utf8), utf16.len());
assert_eq!(simdutf::utf8_length_from_utf16le(&utf16), utf8.len());
}
#[test]
fn detect_encodings_ascii() {
let data = b"hello world";
let mask = simdutf::detect_encodings(data);
assert_ne!(mask & simdutf::encoding::UTF8, 0);
}
#[test]
fn base64_roundtrip() {
let input = b"Hello, World!";
let b64_len = simdutf::base64_length_from_binary(
input.len(),
simdutf::Base64Options::Default,
);
let mut b64 = vec![0u8; b64_len];
let written = unsafe {
simdutf::binary_to_base64(input, &mut b64, simdutf::Base64Options::Default)
};
let b64 = &b64[..written];
assert_eq!(b64, b"SGVsbG8sIFdvcmxkIQ==");
let max_bin_len = simdutf::maximal_binary_length_from_base64(b64);
let mut decoded = vec![0u8; max_bin_len];
let result = unsafe {
simdutf::base64_to_binary(
b64,
&mut decoded,
simdutf::Base64Options::Default,
simdutf::LastChunkHandling::Loose,
)
};
assert!(result.is_ok());
assert_eq!(&decoded[..result.count], input);
}
#[test]
fn base64_url_safe() {
let input = b"\xfb\xff\xfe";
let b64_len = simdutf::base64_length_from_binary(
input.len(),
simdutf::Base64Options::Url,
);
let mut b64 = vec![0u8; b64_len];
let written = unsafe {
simdutf::binary_to_base64(input, &mut b64, simdutf::Base64Options::Url)
};
let b64 = &b64[..written];
assert!(!b64.contains(&b'+'));
assert!(!b64.contains(&b'/'));
let max_bin_len = simdutf::maximal_binary_length_from_base64(b64);
let mut decoded = vec![0u8; max_bin_len];
let result = unsafe {
simdutf::base64_to_binary(
b64,
&mut decoded,
simdutf::Base64Options::Url,
simdutf::LastChunkHandling::Loose,
)
};
assert!(result.is_ok());
assert_eq!(&decoded[..result.count], input);
}