#[inline]
pub fn is_all_hex(s: &str) -> bool {
let bytes = s.as_bytes();
if bytes.is_empty() {
return true;
}
#[cfg(target_arch = "x86_64")]
{
if is_x86_feature_detected!("avx512bw") && bytes.len() >= 64 {
return unsafe { is_all_hex_avx512(bytes) };
}
if is_x86_feature_detected!("avx2") && bytes.len() >= 32 {
return unsafe { is_all_hex_avx2(bytes) };
}
}
#[cfg(target_arch = "aarch64")]
{
if std::arch::is_aarch64_feature_detected!("neon") && bytes.len() >= 16 {
return unsafe { is_all_hex_neon(bytes) };
}
}
is_all_hex_scalar(bytes)
}
#[inline]
fn is_all_hex_scalar(bytes: &[u8]) -> bool {
bytes.iter().all(|&b| b.is_ascii_hexdigit())
}
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "avx2")]
unsafe fn is_all_hex_avx2(bytes: &[u8]) -> bool {
use std::arch::x86_64::*;
let digit_lo = _mm256_set1_epi8((b'0' - 1) as i8); let digit_hi = _mm256_set1_epi8((b'9' + 1) as i8); let upper_lo = _mm256_set1_epi8((b'A' - 1) as i8); let upper_hi = _mm256_set1_epi8((b'F' + 1) as i8); let lower_lo = _mm256_set1_epi8((b'a' - 1) as i8); let lower_hi = _mm256_set1_epi8((b'f' + 1) as i8);
let chunks = bytes.len() / 32;
let ptr = bytes.as_ptr();
for i in 0..chunks {
let data = _mm256_loadu_si256(ptr.add(i * 32) as *const __m256i);
let gt_digit_lo = _mm256_cmpgt_epi8(data, digit_lo);
let lt_digit_hi = _mm256_cmpgt_epi8(digit_hi, data);
let is_digit = _mm256_and_si256(gt_digit_lo, lt_digit_hi);
let gt_upper_lo = _mm256_cmpgt_epi8(data, upper_lo);
let lt_upper_hi = _mm256_cmpgt_epi8(upper_hi, data);
let is_upper = _mm256_and_si256(gt_upper_lo, lt_upper_hi);
let gt_lower_lo = _mm256_cmpgt_epi8(data, lower_lo);
let lt_lower_hi = _mm256_cmpgt_epi8(lower_hi, data);
let is_lower = _mm256_and_si256(gt_lower_lo, lt_lower_hi);
let is_hex = _mm256_or_si256(_mm256_or_si256(is_digit, is_upper), is_lower);
let mask = _mm256_movemask_epi8(is_hex);
if mask != -1i32 {
return false;
}
}
is_all_hex_scalar(&bytes[chunks * 32..])
}
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "avx512f", enable = "avx512bw")]
unsafe fn is_all_hex_avx512(bytes: &[u8]) -> bool {
use std::arch::x86_64::*;
let digit_lo = _mm512_set1_epi8((b'0' - 1) as i8);
let digit_hi = _mm512_set1_epi8((b'9' + 1) as i8);
let upper_lo = _mm512_set1_epi8((b'A' - 1) as i8);
let upper_hi = _mm512_set1_epi8((b'F' + 1) as i8);
let lower_lo = _mm512_set1_epi8((b'a' - 1) as i8);
let lower_hi = _mm512_set1_epi8((b'f' + 1) as i8);
let chunks = bytes.len() / 64;
let ptr = bytes.as_ptr();
for i in 0..chunks {
let data = _mm512_loadu_si512(ptr.add(i * 64) as *const i32);
let is_digit = _mm512_cmpgt_epi8_mask(data, digit_lo) & _mm512_cmpgt_epi8_mask(digit_hi, data);
let is_upper = _mm512_cmpgt_epi8_mask(data, upper_lo) & _mm512_cmpgt_epi8_mask(upper_hi, data);
let is_lower = _mm512_cmpgt_epi8_mask(data, lower_lo) & _mm512_cmpgt_epi8_mask(lower_hi, data);
let is_hex = is_digit | is_upper | is_lower;
if is_hex != u64::MAX {
return false;
}
}
let remainder = &bytes[chunks * 64..];
if remainder.len() >= 32 && is_x86_feature_detected!("avx2") {
is_all_hex_avx2(remainder)
} else {
is_all_hex_scalar(remainder)
}
}
#[cfg(target_arch = "aarch64")]
#[target_feature(enable = "neon")]
unsafe fn is_all_hex_neon(bytes: &[u8]) -> bool {
use std::arch::aarch64::*;
let chunks = bytes.len() / 16;
let ptr = bytes.as_ptr();
for i in 0..chunks {
let data = vld1q_u8(ptr.add(i * 16));
let is_digit = vandq_u8(vcgeq_u8(data, vdupq_n_u8(b'0')), vcleq_u8(data, vdupq_n_u8(b'9')));
let is_upper = vandq_u8(vcgeq_u8(data, vdupq_n_u8(b'A')), vcleq_u8(data, vdupq_n_u8(b'F')));
let is_lower = vandq_u8(vcgeq_u8(data, vdupq_n_u8(b'a')), vcleq_u8(data, vdupq_n_u8(b'f')));
let is_hex = vorrq_u8(vorrq_u8(is_digit, is_upper), is_lower);
let min = vminvq_u8(is_hex);
if min != 0xFF {
return false;
}
}
is_all_hex_scalar(&bytes[chunks * 16..])
}
#[inline]
pub fn is_all_base64_chars(s: &str) -> bool {
let bytes = s.as_bytes();
if bytes.is_empty() {
return true;
}
#[cfg(target_arch = "x86_64")]
{
if is_x86_feature_detected!("avx2") && bytes.len() >= 32 {
return unsafe { is_all_base64_avx2(bytes) };
}
}
#[cfg(target_arch = "aarch64")]
{
if std::arch::is_aarch64_feature_detected!("neon") && bytes.len() >= 16 {
return unsafe { is_all_base64_neon(bytes) };
}
}
is_all_base64_scalar(bytes)
}
#[inline]
fn is_all_base64_scalar(bytes: &[u8]) -> bool {
bytes.iter().all(|&b| {
b.is_ascii_alphanumeric() || b == b'+' || b == b'/' || b == b'='
})
}
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "avx2")]
unsafe fn is_all_base64_avx2(bytes: &[u8]) -> bool {
use std::arch::x86_64::*;
let upper_lo = _mm256_set1_epi8((b'A' - 1) as i8);
let upper_hi = _mm256_set1_epi8((b'Z' + 1) as i8);
let lower_lo = _mm256_set1_epi8((b'a' - 1) as i8);
let lower_hi = _mm256_set1_epi8((b'z' + 1) as i8);
let digit_lo = _mm256_set1_epi8((b'0' - 1) as i8);
let digit_hi = _mm256_set1_epi8((b'9' + 1) as i8);
let plus = _mm256_set1_epi8(b'+' as i8);
let slash = _mm256_set1_epi8(b'/' as i8);
let equals = _mm256_set1_epi8(b'=' as i8);
let chunks = bytes.len() / 32;
let ptr = bytes.as_ptr();
for i in 0..chunks {
let data = _mm256_loadu_si256(ptr.add(i * 32) as *const __m256i);
let is_upper = _mm256_and_si256(
_mm256_cmpgt_epi8(data, upper_lo),
_mm256_cmpgt_epi8(upper_hi, data),
);
let is_lower = _mm256_and_si256(
_mm256_cmpgt_epi8(data, lower_lo),
_mm256_cmpgt_epi8(lower_hi, data),
);
let is_digit = _mm256_and_si256(
_mm256_cmpgt_epi8(data, digit_lo),
_mm256_cmpgt_epi8(digit_hi, data),
);
let is_plus = _mm256_cmpeq_epi8(data, plus);
let is_slash = _mm256_cmpeq_epi8(data, slash);
let is_equals = _mm256_cmpeq_epi8(data, equals);
let is_alpha = _mm256_or_si256(is_upper, is_lower);
let is_alnum = _mm256_or_si256(is_alpha, is_digit);
let is_special = _mm256_or_si256(_mm256_or_si256(is_plus, is_slash), is_equals);
let is_valid = _mm256_or_si256(is_alnum, is_special);
let mask = _mm256_movemask_epi8(is_valid);
if mask != -1i32 {
return false;
}
}
is_all_base64_scalar(&bytes[chunks * 32..])
}
#[cfg(target_arch = "aarch64")]
#[target_feature(enable = "neon")]
unsafe fn is_all_base64_neon(bytes: &[u8]) -> bool {
use std::arch::aarch64::*;
let chunks = bytes.len() / 16;
let ptr = bytes.as_ptr();
for i in 0..chunks {
let data = vld1q_u8(ptr.add(i * 16));
let is_upper = vandq_u8(vcgeq_u8(data, vdupq_n_u8(b'A')), vcleq_u8(data, vdupq_n_u8(b'Z')));
let is_lower = vandq_u8(vcgeq_u8(data, vdupq_n_u8(b'a')), vcleq_u8(data, vdupq_n_u8(b'z')));
let is_digit = vandq_u8(vcgeq_u8(data, vdupq_n_u8(b'0')), vcleq_u8(data, vdupq_n_u8(b'9')));
let is_plus = vceqq_u8(data, vdupq_n_u8(b'+'));
let is_slash = vceqq_u8(data, vdupq_n_u8(b'/'));
let is_equals = vceqq_u8(data, vdupq_n_u8(b'='));
let is_alpha = vorrq_u8(is_upper, is_lower);
let is_alnum = vorrq_u8(is_alpha, is_digit);
let is_special = vorrq_u8(vorrq_u8(is_plus, is_slash), is_equals);
let is_valid = vorrq_u8(is_alnum, is_special);
let min = vminvq_u8(is_valid);
if min != 0xFF {
return false;
}
}
is_all_base64_scalar(&bytes[chunks * 16..])
}
#[inline]
pub fn is_uuid_format(s: &str) -> bool {
let bytes = s.as_bytes();
if bytes.len() != 36 {
return false;
}
if bytes[8] != b'-' || bytes[13] != b'-' || bytes[18] != b'-' || bytes[23] != b'-' {
return false;
}
for (i, &b) in bytes.iter().enumerate() {
match i {
8 | 13 | 18 | 23 => continue,
_ => {
if !b.is_ascii_hexdigit() {
return false;
}
}
}
}
true
}
#[inline]
pub fn has_uppercase(s: &str) -> bool {
let bytes = s.as_bytes();
#[cfg(target_arch = "x86_64")]
{
if is_x86_feature_detected!("avx2") && bytes.len() >= 32 {
return unsafe { has_uppercase_avx2(bytes) };
}
}
#[cfg(target_arch = "aarch64")]
{
if std::arch::is_aarch64_feature_detected!("neon") && bytes.len() >= 16 {
return unsafe { has_uppercase_neon(bytes) };
}
}
bytes.iter().any(|&b| b.is_ascii_uppercase())
}
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "avx2")]
unsafe fn has_uppercase_avx2(bytes: &[u8]) -> bool {
use std::arch::x86_64::*;
let upper_lo = _mm256_set1_epi8((b'A' - 1) as i8);
let upper_hi = _mm256_set1_epi8((b'Z' + 1) as i8);
let chunks = bytes.len() / 32;
let ptr = bytes.as_ptr();
for i in 0..chunks {
let data = _mm256_loadu_si256(ptr.add(i * 32) as *const __m256i);
let is_upper = _mm256_and_si256(
_mm256_cmpgt_epi8(data, upper_lo),
_mm256_cmpgt_epi8(upper_hi, data),
);
let mask = _mm256_movemask_epi8(is_upper);
if mask != 0 {
return true;
}
}
bytes[chunks * 32..].iter().any(|&b| b.is_ascii_uppercase())
}
#[cfg(target_arch = "aarch64")]
#[target_feature(enable = "neon")]
unsafe fn has_uppercase_neon(bytes: &[u8]) -> bool {
use std::arch::aarch64::*;
let chunks = bytes.len() / 16;
let ptr = bytes.as_ptr();
for i in 0..chunks {
let data = vld1q_u8(ptr.add(i * 16));
let is_upper = vandq_u8(vcgeq_u8(data, vdupq_n_u8(b'A')), vcleq_u8(data, vdupq_n_u8(b'Z')));
let max = vmaxvq_u8(is_upper);
if max != 0 {
return true;
}
}
bytes[chunks * 16..].iter().any(|&b| b.is_ascii_uppercase())
}
#[inline]
pub fn has_lowercase(s: &str) -> bool {
let bytes = s.as_bytes();
#[cfg(target_arch = "x86_64")]
{
if is_x86_feature_detected!("avx2") && bytes.len() >= 32 {
return unsafe { has_lowercase_avx2(bytes) };
}
}
#[cfg(target_arch = "aarch64")]
{
if std::arch::is_aarch64_feature_detected!("neon") && bytes.len() >= 16 {
return unsafe { has_lowercase_neon(bytes) };
}
}
bytes.iter().any(|&b| b.is_ascii_lowercase())
}
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "avx2")]
unsafe fn has_lowercase_avx2(bytes: &[u8]) -> bool {
use std::arch::x86_64::*;
let lower_lo = _mm256_set1_epi8((b'a' - 1) as i8);
let lower_hi = _mm256_set1_epi8((b'z' + 1) as i8);
let chunks = bytes.len() / 32;
let ptr = bytes.as_ptr();
for i in 0..chunks {
let data = _mm256_loadu_si256(ptr.add(i * 32) as *const __m256i);
let is_lower = _mm256_and_si256(
_mm256_cmpgt_epi8(data, lower_lo),
_mm256_cmpgt_epi8(lower_hi, data),
);
let mask = _mm256_movemask_epi8(is_lower);
if mask != 0 {
return true;
}
}
bytes[chunks * 32..].iter().any(|&b| b.is_ascii_lowercase())
}
#[cfg(target_arch = "aarch64")]
#[target_feature(enable = "neon")]
unsafe fn has_lowercase_neon(bytes: &[u8]) -> bool {
use std::arch::aarch64::*;
let chunks = bytes.len() / 16;
let ptr = bytes.as_ptr();
for i in 0..chunks {
let data = vld1q_u8(ptr.add(i * 16));
let is_lower = vandq_u8(vcgeq_u8(data, vdupq_n_u8(b'a')), vcleq_u8(data, vdupq_n_u8(b'z')));
let max = vmaxvq_u8(is_lower);
if max != 0 {
return true;
}
}
bytes[chunks * 16..].iter().any(|&b| b.is_ascii_lowercase())
}
#[inline]
pub fn is_all_alphanumeric(s: &str) -> bool {
let bytes = s.as_bytes();
if bytes.is_empty() {
return true;
}
#[cfg(target_arch = "x86_64")]
{
if is_x86_feature_detected!("avx2") && bytes.len() >= 32 {
return unsafe { is_all_alphanumeric_avx2(bytes) };
}
}
#[cfg(target_arch = "aarch64")]
{
if std::arch::is_aarch64_feature_detected!("neon") && bytes.len() >= 16 {
return unsafe { is_all_alphanumeric_neon(bytes) };
}
}
bytes.iter().all(|&b| b.is_ascii_alphanumeric())
}
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "avx2")]
unsafe fn is_all_alphanumeric_avx2(bytes: &[u8]) -> bool {
use std::arch::x86_64::*;
let upper_lo = _mm256_set1_epi8((b'A' - 1) as i8);
let upper_hi = _mm256_set1_epi8((b'Z' + 1) as i8);
let lower_lo = _mm256_set1_epi8((b'a' - 1) as i8);
let lower_hi = _mm256_set1_epi8((b'z' + 1) as i8);
let digit_lo = _mm256_set1_epi8((b'0' - 1) as i8);
let digit_hi = _mm256_set1_epi8((b'9' + 1) as i8);
let chunks = bytes.len() / 32;
let ptr = bytes.as_ptr();
for i in 0..chunks {
let data = _mm256_loadu_si256(ptr.add(i * 32) as *const __m256i);
let is_upper = _mm256_and_si256(
_mm256_cmpgt_epi8(data, upper_lo),
_mm256_cmpgt_epi8(upper_hi, data),
);
let is_lower = _mm256_and_si256(
_mm256_cmpgt_epi8(data, lower_lo),
_mm256_cmpgt_epi8(lower_hi, data),
);
let is_digit = _mm256_and_si256(
_mm256_cmpgt_epi8(data, digit_lo),
_mm256_cmpgt_epi8(digit_hi, data),
);
let is_valid = _mm256_or_si256(_mm256_or_si256(is_upper, is_lower), is_digit);
let mask = _mm256_movemask_epi8(is_valid);
if mask != -1i32 {
return false;
}
}
bytes[chunks * 32..].iter().all(|&b| b.is_ascii_alphanumeric())
}
#[cfg(target_arch = "aarch64")]
#[target_feature(enable = "neon")]
unsafe fn is_all_alphanumeric_neon(bytes: &[u8]) -> bool {
use std::arch::aarch64::*;
let chunks = bytes.len() / 16;
let ptr = bytes.as_ptr();
for i in 0..chunks {
let data = vld1q_u8(ptr.add(i * 16));
let is_upper = vandq_u8(vcgeq_u8(data, vdupq_n_u8(b'A')), vcleq_u8(data, vdupq_n_u8(b'Z')));
let is_lower = vandq_u8(vcgeq_u8(data, vdupq_n_u8(b'a')), vcleq_u8(data, vdupq_n_u8(b'z')));
let is_digit = vandq_u8(vcgeq_u8(data, vdupq_n_u8(b'0')), vcleq_u8(data, vdupq_n_u8(b'9')));
let is_valid = vorrq_u8(vorrq_u8(is_upper, is_lower), is_digit);
let min = vminvq_u8(is_valid);
if min != 0xFF {
return false;
}
}
bytes[chunks * 16..].iter().all(|&b| b.is_ascii_alphanumeric())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_is_all_hex() {
assert!(is_all_hex("0123456789abcdef"));
assert!(is_all_hex("ABCDEF"));
assert!(is_all_hex("aAbBcCdDeEfF"));
assert!(is_all_hex(""));
assert!(!is_all_hex("0123456789abcdefg"));
assert!(!is_all_hex("hello"));
assert!(!is_all_hex("0123456789abcdef "));
}
#[test]
fn test_is_all_hex_long() {
let hex = "a".repeat(1000);
assert!(is_all_hex(&hex));
let mut not_hex = "a".repeat(999);
not_hex.push('g');
assert!(!is_all_hex(¬_hex));
}
#[test]
fn test_is_all_base64_chars() {
assert!(is_all_base64_chars("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/="));
assert!(is_all_base64_chars("SGVsbG8gV29ybGQh"));
assert!(is_all_base64_chars("dGVzdA=="));
assert!(is_all_base64_chars(""));
assert!(!is_all_base64_chars("invalid!"));
assert!(!is_all_base64_chars("has space "));
}
#[test]
fn test_is_uuid_format() {
assert!(is_uuid_format("550e8400-e29b-41d4-a716-446655440000"));
assert!(is_uuid_format("AAAAAAAA-BBBB-CCCC-DDDD-EEEEEEEEEEEE"));
assert!(is_uuid_format("00000000-0000-0000-0000-000000000000"));
assert!(!is_uuid_format("not-a-uuid"));
assert!(!is_uuid_format("550e8400-e29b-41d4-a716-44665544000")); assert!(!is_uuid_format("550e8400-e29b-41d4-a716-4466554400000")); assert!(!is_uuid_format("550e8400e29b-41d4-a716-446655440000")); assert!(!is_uuid_format("550e8400-e29b-41d4-a716-44665544000g")); }
#[test]
fn test_has_uppercase() {
assert!(has_uppercase("Hello"));
assert!(has_uppercase("HELLO"));
assert!(has_uppercase("helloA"));
assert!(!has_uppercase("hello"));
assert!(!has_uppercase("123"));
assert!(!has_uppercase(""));
}
#[test]
fn test_has_lowercase() {
assert!(has_lowercase("Hello"));
assert!(has_lowercase("hello"));
assert!(has_lowercase("HELLOa"));
assert!(!has_lowercase("HELLO"));
assert!(!has_lowercase("123"));
assert!(!has_lowercase(""));
}
#[test]
fn test_is_all_alphanumeric() {
assert!(is_all_alphanumeric("Hello123"));
assert!(is_all_alphanumeric("HELLO"));
assert!(is_all_alphanumeric("12345"));
assert!(is_all_alphanumeric(""));
assert!(!is_all_alphanumeric("Hello World"));
assert!(!is_all_alphanumeric("hello-world"));
assert!(!is_all_alphanumeric("test_123"));
}
#[test]
fn test_long_strings() {
let long_hex = "a".repeat(1000);
assert!(is_all_hex(&long_hex));
let long_alpha = "A".repeat(500) + &"a".repeat(500);
assert!(is_all_alphanumeric(&long_alpha));
assert!(has_uppercase(&long_alpha));
assert!(has_lowercase(&long_alpha));
}
#[test]
fn test_simd_scalar_equivalence() {
for len in [0, 1, 15, 16, 31, 32, 63, 64, 100, 1000] {
let s: String = (0..len).map(|i| (b'a' + (i % 6) as u8) as char).collect();
assert_eq!(
is_all_hex(&s),
is_all_hex_scalar(s.as_bytes()),
"Mismatch at length {} for hex",
len
);
}
}
}