use std::sync::atomic::{AtomicBool, Ordering};
static AVX2_SUPPORTED: AtomicBool = AtomicBool::new(false);
static SSE2_SUPPORTED: AtomicBool = AtomicBool::new(false);
static FEATURES_DETECTED: AtomicBool = AtomicBool::new(false);
fn detect_features() {
if FEATURES_DETECTED.load(Ordering::Relaxed) {
return;
}
#[cfg(target_arch = "x86_64")]
{
AVX2_SUPPORTED.store(is_x86_feature_detected!("avx2"), Ordering::Relaxed);
SSE2_SUPPORTED.store(is_x86_feature_detected!("sse2"), Ordering::Relaxed);
}
#[cfg(target_arch = "aarch64")]
{
SSE2_SUPPORTED.store(true, Ordering::Relaxed);
}
FEATURES_DETECTED.store(true, Ordering::Relaxed);
}
#[inline]
pub fn has_avx2() -> bool {
detect_features();
AVX2_SUPPORTED.load(Ordering::Relaxed)
}
#[inline]
pub fn has_sse2() -> bool {
detect_features();
SSE2_SUPPORTED.load(Ordering::Relaxed)
}
pub fn is_ascii_simd(s: &str) -> bool {
let bytes = s.as_bytes();
#[cfg(target_arch = "x86_64")]
{
if has_avx2() && bytes.len() >= 32 {
return unsafe { is_ascii_avx2(bytes) };
}
if has_sse2() && bytes.len() >= 16 {
return unsafe { is_ascii_sse2(bytes) };
}
}
bytes.iter().all(|&b| b < 128)
}
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "avx2")]
unsafe fn is_ascii_avx2(bytes: &[u8]) -> bool {
use std::arch::x86_64::*;
let len = bytes.len();
let mut i = 0;
while i + 32 <= len {
let chunk = _mm256_loadu_si256(bytes.as_ptr().add(i) as *const __m256i);
let high_bits = _mm256_movemask_epi8(chunk);
if high_bits != 0 {
return false;
}
i += 32;
}
while i < len {
if bytes[i] >= 128 {
return false;
}
i += 1;
}
true
}
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "sse2")]
unsafe fn is_ascii_sse2(bytes: &[u8]) -> bool {
use std::arch::x86_64::*;
let len = bytes.len();
let mut i = 0;
while i + 16 <= len {
let chunk = _mm_loadu_si128(bytes.as_ptr().add(i) as *const __m128i);
let high_bits = _mm_movemask_epi8(chunk);
if high_bits != 0 {
return false;
}
i += 16;
}
while i < len {
if bytes[i] >= 128 {
return false;
}
i += 1;
}
true
}
pub fn to_uppercase_simd(s: &str) -> String {
if is_ascii_simd(s) {
let bytes = s.as_bytes();
#[cfg(target_arch = "x86_64")]
{
if has_avx2() && bytes.len() >= 32 {
return unsafe { to_uppercase_avx2(bytes) };
}
if has_sse2() && bytes.len() >= 16 {
return unsafe { to_uppercase_sse2(bytes) };
}
}
return to_uppercase_ascii_scalar(bytes);
}
s.to_uppercase()
}
pub fn to_lowercase_simd(s: &str) -> String {
if is_ascii_simd(s) {
let bytes = s.as_bytes();
#[cfg(target_arch = "x86_64")]
{
if has_avx2() && bytes.len() >= 32 {
return unsafe { to_lowercase_avx2(bytes) };
}
if has_sse2() && bytes.len() >= 16 {
return unsafe { to_lowercase_sse2(bytes) };
}
}
return to_lowercase_ascii_scalar(bytes);
}
s.to_lowercase()
}
fn to_uppercase_ascii_scalar(bytes: &[u8]) -> String {
let mut result = Vec::with_capacity(bytes.len());
for &b in bytes {
if b >= b'a' && b <= b'z' {
result.push(b - 32);
} else {
result.push(b);
}
}
unsafe { String::from_utf8_unchecked(result) }
}
fn to_lowercase_ascii_scalar(bytes: &[u8]) -> String {
let mut result = Vec::with_capacity(bytes.len());
for &b in bytes {
if b >= b'A' && b <= b'Z' {
result.push(b + 32);
} else {
result.push(b);
}
}
unsafe { String::from_utf8_unchecked(result) }
}
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "avx2")]
unsafe fn to_uppercase_avx2(bytes: &[u8]) -> String {
use std::arch::x86_64::*;
let len = bytes.len();
let mut result: Vec<u8> = Vec::with_capacity(len);
result.set_len(len);
let lower_a = _mm256_set1_epi8(b'a' as i8);
let lower_z = _mm256_set1_epi8(b'z' as i8);
let diff = _mm256_set1_epi8(32);
let mut i = 0;
while i + 32 <= len {
let chunk = _mm256_loadu_si256(bytes.as_ptr().add(i) as *const __m256i);
let ge_a = _mm256_cmpgt_epi8(chunk, _mm256_sub_epi8(lower_a, _mm256_set1_epi8(1)));
let le_z = _mm256_cmpgt_epi8(_mm256_add_epi8(lower_z, _mm256_set1_epi8(1)), chunk);
let is_lower = _mm256_and_si256(ge_a, le_z);
let to_sub = _mm256_and_si256(is_lower, diff);
let converted = _mm256_sub_epi8(chunk, to_sub);
_mm256_storeu_si256(result.as_mut_ptr().add(i) as *mut __m256i, converted);
i += 32;
}
while i < len {
let b = bytes[i];
result[i] = if b >= b'a' && b <= b'z' { b - 32 } else { b };
i += 1;
}
String::from_utf8_unchecked(result)
}
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "sse2")]
unsafe fn to_uppercase_sse2(bytes: &[u8]) -> String {
use std::arch::x86_64::*;
let len = bytes.len();
let mut result: Vec<u8> = Vec::with_capacity(len);
result.set_len(len);
let lower_a = _mm_set1_epi8(b'a' as i8);
let lower_z = _mm_set1_epi8(b'z' as i8);
let diff = _mm_set1_epi8(32);
let mut i = 0;
while i + 16 <= len {
let chunk = _mm_loadu_si128(bytes.as_ptr().add(i) as *const __m128i);
let ge_a = _mm_cmpgt_epi8(chunk, _mm_sub_epi8(lower_a, _mm_set1_epi8(1)));
let le_z = _mm_cmpgt_epi8(_mm_add_epi8(lower_z, _mm_set1_epi8(1)), chunk);
let is_lower = _mm_and_si128(ge_a, le_z);
let to_sub = _mm_and_si128(is_lower, diff);
let converted = _mm_sub_epi8(chunk, to_sub);
_mm_storeu_si128(result.as_mut_ptr().add(i) as *mut __m128i, converted);
i += 16;
}
while i < len {
let b = bytes[i];
result[i] = if b >= b'a' && b <= b'z' { b - 32 } else { b };
i += 1;
}
String::from_utf8_unchecked(result)
}
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "avx2")]
unsafe fn to_lowercase_avx2(bytes: &[u8]) -> String {
use std::arch::x86_64::*;
let len = bytes.len();
let mut result: Vec<u8> = Vec::with_capacity(len);
result.set_len(len);
let upper_a = _mm256_set1_epi8(b'A' as i8);
let upper_z = _mm256_set1_epi8(b'Z' as i8);
let diff = _mm256_set1_epi8(32);
let mut i = 0;
while i + 32 <= len {
let chunk = _mm256_loadu_si256(bytes.as_ptr().add(i) as *const __m256i);
let ge_a = _mm256_cmpgt_epi8(chunk, _mm256_sub_epi8(upper_a, _mm256_set1_epi8(1)));
let le_z = _mm256_cmpgt_epi8(_mm256_add_epi8(upper_z, _mm256_set1_epi8(1)), chunk);
let is_upper = _mm256_and_si256(ge_a, le_z);
let to_add = _mm256_and_si256(is_upper, diff);
let converted = _mm256_add_epi8(chunk, to_add);
_mm256_storeu_si256(result.as_mut_ptr().add(i) as *mut __m256i, converted);
i += 32;
}
while i < len {
let b = bytes[i];
result[i] = if b >= b'A' && b <= b'Z' { b + 32 } else { b };
i += 1;
}
String::from_utf8_unchecked(result)
}
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "sse2")]
unsafe fn to_lowercase_sse2(bytes: &[u8]) -> String {
use std::arch::x86_64::*;
let len = bytes.len();
let mut result: Vec<u8> = Vec::with_capacity(len);
result.set_len(len);
let upper_a = _mm_set1_epi8(b'A' as i8);
let upper_z = _mm_set1_epi8(b'Z' as i8);
let diff = _mm_set1_epi8(32);
let mut i = 0;
while i + 16 <= len {
let chunk = _mm_loadu_si128(bytes.as_ptr().add(i) as *const __m128i);
let ge_a = _mm_cmpgt_epi8(chunk, _mm_sub_epi8(upper_a, _mm_set1_epi8(1)));
let le_z = _mm_cmpgt_epi8(_mm_add_epi8(upper_z, _mm_set1_epi8(1)), chunk);
let is_upper = _mm_and_si128(ge_a, le_z);
let to_add = _mm_and_si128(is_upper, diff);
let converted = _mm_add_epi8(chunk, to_add);
_mm_storeu_si128(result.as_mut_ptr().add(i) as *mut __m128i, converted);
i += 16;
}
while i < len {
let b = bytes[i];
result[i] = if b >= b'A' && b <= b'Z' { b + 32 } else { b };
i += 1;
}
String::from_utf8_unchecked(result)
}
pub fn count_digits_simd(s: &str) -> usize {
let bytes = s.as_bytes();
#[cfg(target_arch = "x86_64")]
{
if has_avx2() && bytes.len() >= 32 {
return unsafe { count_digits_avx2(bytes) };
}
if has_sse2() && bytes.len() >= 16 {
return unsafe { count_digits_sse2(bytes) };
}
}
bytes.iter().filter(|&&b| b >= b'0' && b <= b'9').count()
}
pub fn count_alpha_simd(s: &str) -> usize {
let bytes = s.as_bytes();
#[cfg(target_arch = "x86_64")]
{
if has_avx2() && bytes.len() >= 32 {
return unsafe { count_alpha_avx2(bytes) };
}
if has_sse2() && bytes.len() >= 16 {
return unsafe { count_alpha_sse2(bytes) };
}
}
bytes
.iter()
.filter(|&&b| (b >= b'a' && b <= b'z') || (b >= b'A' && b <= b'Z'))
.count()
}
pub fn count_whitespace_simd(s: &str) -> usize {
let bytes = s.as_bytes();
#[cfg(target_arch = "x86_64")]
{
if has_avx2() && bytes.len() >= 32 {
return unsafe { count_whitespace_avx2(bytes) };
}
if has_sse2() && bytes.len() >= 16 {
return unsafe { count_whitespace_sse2(bytes) };
}
}
bytes
.iter()
.filter(|&&b| b == b' ' || b == b'\t' || b == b'\n' || b == b'\r')
.count()
}
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "avx2")]
unsafe fn count_digits_avx2(bytes: &[u8]) -> usize {
use std::arch::x86_64::*;
let len = bytes.len();
let mut count = 0usize;
let mut i = 0;
let digit_0 = _mm256_set1_epi8(b'0' as i8);
let digit_9 = _mm256_set1_epi8(b'9' as i8);
while i + 32 <= len {
let chunk = _mm256_loadu_si256(bytes.as_ptr().add(i) as *const __m256i);
let ge_0 = _mm256_cmpgt_epi8(chunk, _mm256_sub_epi8(digit_0, _mm256_set1_epi8(1)));
let le_9 = _mm256_cmpgt_epi8(_mm256_add_epi8(digit_9, _mm256_set1_epi8(1)), chunk);
let is_digit = _mm256_and_si256(ge_0, le_9);
let mask = _mm256_movemask_epi8(is_digit) as u32;
count += mask.count_ones() as usize;
i += 32;
}
while i < len {
if bytes[i] >= b'0' && bytes[i] <= b'9' {
count += 1;
}
i += 1;
}
count
}
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "sse2")]
unsafe fn count_digits_sse2(bytes: &[u8]) -> usize {
use std::arch::x86_64::*;
let len = bytes.len();
let mut count = 0usize;
let mut i = 0;
let digit_0 = _mm_set1_epi8(b'0' as i8);
let digit_9 = _mm_set1_epi8(b'9' as i8);
while i + 16 <= len {
let chunk = _mm_loadu_si128(bytes.as_ptr().add(i) as *const __m128i);
let ge_0 = _mm_cmpgt_epi8(chunk, _mm_sub_epi8(digit_0, _mm_set1_epi8(1)));
let le_9 = _mm_cmpgt_epi8(_mm_add_epi8(digit_9, _mm_set1_epi8(1)), chunk);
let is_digit = _mm_and_si128(ge_0, le_9);
let mask = _mm_movemask_epi8(is_digit) as u32;
count += mask.count_ones() as usize;
i += 16;
}
while i < len {
if bytes[i] >= b'0' && bytes[i] <= b'9' {
count += 1;
}
i += 1;
}
count
}
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "avx2")]
unsafe fn count_alpha_avx2(bytes: &[u8]) -> usize {
use std::arch::x86_64::*;
let len = bytes.len();
let mut count = 0usize;
let mut i = 0;
let lower_a = _mm256_set1_epi8(b'a' as i8);
let lower_z = _mm256_set1_epi8(b'z' as i8);
let upper_a = _mm256_set1_epi8(b'A' as i8);
let upper_z = _mm256_set1_epi8(b'Z' as i8);
let one = _mm256_set1_epi8(1);
while i + 32 <= len {
let chunk = _mm256_loadu_si256(bytes.as_ptr().add(i) as *const __m256i);
let ge_a = _mm256_cmpgt_epi8(chunk, _mm256_sub_epi8(lower_a, one));
let le_z = _mm256_cmpgt_epi8(_mm256_add_epi8(lower_z, one), chunk);
let is_lower = _mm256_and_si256(ge_a, le_z);
let ge_upper = _mm256_cmpgt_epi8(chunk, _mm256_sub_epi8(upper_a, one));
let le_upper = _mm256_cmpgt_epi8(_mm256_add_epi8(upper_z, one), chunk);
let is_upper = _mm256_and_si256(ge_upper, le_upper);
let is_alpha = _mm256_or_si256(is_lower, is_upper);
let mask = _mm256_movemask_epi8(is_alpha) as u32;
count += mask.count_ones() as usize;
i += 32;
}
while i < len {
let b = bytes[i];
if (b >= b'a' && b <= b'z') || (b >= b'A' && b <= b'Z') {
count += 1;
}
i += 1;
}
count
}
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "sse2")]
unsafe fn count_alpha_sse2(bytes: &[u8]) -> usize {
use std::arch::x86_64::*;
let len = bytes.len();
let mut count = 0usize;
let mut i = 0;
let lower_a = _mm_set1_epi8(b'a' as i8);
let lower_z = _mm_set1_epi8(b'z' as i8);
let upper_a = _mm_set1_epi8(b'A' as i8);
let upper_z = _mm_set1_epi8(b'Z' as i8);
let one = _mm_set1_epi8(1);
while i + 16 <= len {
let chunk = _mm_loadu_si128(bytes.as_ptr().add(i) as *const __m128i);
let ge_a = _mm_cmpgt_epi8(chunk, _mm_sub_epi8(lower_a, one));
let le_z = _mm_cmpgt_epi8(_mm_add_epi8(lower_z, one), chunk);
let is_lower = _mm_and_si128(ge_a, le_z);
let ge_upper = _mm_cmpgt_epi8(chunk, _mm_sub_epi8(upper_a, one));
let le_upper = _mm_cmpgt_epi8(_mm_add_epi8(upper_z, one), chunk);
let is_upper = _mm_and_si128(ge_upper, le_upper);
let is_alpha = _mm_or_si128(is_lower, is_upper);
let mask = _mm_movemask_epi8(is_alpha) as u32;
count += mask.count_ones() as usize;
i += 16;
}
while i < len {
let b = bytes[i];
if (b >= b'a' && b <= b'z') || (b >= b'A' && b <= b'Z') {
count += 1;
}
i += 1;
}
count
}
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "avx2")]
unsafe fn count_whitespace_avx2(bytes: &[u8]) -> usize {
use std::arch::x86_64::*;
let len = bytes.len();
let mut count = 0usize;
let mut i = 0;
let space = _mm256_set1_epi8(b' ' as i8);
let tab = _mm256_set1_epi8(b'\t' as i8);
let newline = _mm256_set1_epi8(b'\n' as i8);
let cr = _mm256_set1_epi8(b'\r' as i8);
while i + 32 <= len {
let chunk = _mm256_loadu_si256(bytes.as_ptr().add(i) as *const __m256i);
let is_space = _mm256_cmpeq_epi8(chunk, space);
let is_tab = _mm256_cmpeq_epi8(chunk, tab);
let is_newline = _mm256_cmpeq_epi8(chunk, newline);
let is_cr = _mm256_cmpeq_epi8(chunk, cr);
let is_ws = _mm256_or_si256(
_mm256_or_si256(is_space, is_tab),
_mm256_or_si256(is_newline, is_cr),
);
let mask = _mm256_movemask_epi8(is_ws) as u32;
count += mask.count_ones() as usize;
i += 32;
}
while i < len {
let b = bytes[i];
if b == b' ' || b == b'\t' || b == b'\n' || b == b'\r' {
count += 1;
}
i += 1;
}
count
}
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "sse2")]
unsafe fn count_whitespace_sse2(bytes: &[u8]) -> usize {
use std::arch::x86_64::*;
let len = bytes.len();
let mut count = 0usize;
let mut i = 0;
let space = _mm_set1_epi8(b' ' as i8);
let tab = _mm_set1_epi8(b'\t' as i8);
let newline = _mm_set1_epi8(b'\n' as i8);
let cr = _mm_set1_epi8(b'\r' as i8);
while i + 16 <= len {
let chunk = _mm_loadu_si128(bytes.as_ptr().add(i) as *const __m128i);
let is_space = _mm_cmpeq_epi8(chunk, space);
let is_tab = _mm_cmpeq_epi8(chunk, tab);
let is_newline = _mm_cmpeq_epi8(chunk, newline);
let is_cr = _mm_cmpeq_epi8(chunk, cr);
let is_ws = _mm_or_si128(
_mm_or_si128(is_space, is_tab),
_mm_or_si128(is_newline, is_cr),
);
let mask = _mm_movemask_epi8(is_ws) as u32;
count += mask.count_ones() as usize;
i += 16;
}
while i < len {
let b = bytes[i];
if b == b' ' || b == b'\t' || b == b'\n' || b == b'\r' {
count += 1;
}
i += 1;
}
count
}
pub fn find_byte_simd(haystack: &[u8], needle: u8) -> Option<usize> {
#[cfg(target_arch = "x86_64")]
{
if has_avx2() && haystack.len() >= 32 {
return unsafe { find_byte_avx2(haystack, needle) };
}
if has_sse2() && haystack.len() >= 16 {
return unsafe { find_byte_sse2(haystack, needle) };
}
}
haystack.iter().position(|&b| b == needle)
}
pub fn count_byte_simd(haystack: &[u8], needle: u8) -> usize {
#[cfg(target_arch = "x86_64")]
{
if has_avx2() && haystack.len() >= 32 {
return unsafe { count_byte_avx2(haystack, needle) };
}
if has_sse2() && haystack.len() >= 16 {
return unsafe { count_byte_sse2(haystack, needle) };
}
}
haystack.iter().filter(|&&b| b == needle).count()
}
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "avx2")]
unsafe fn find_byte_avx2(haystack: &[u8], needle: u8) -> Option<usize> {
use std::arch::x86_64::*;
let len = haystack.len();
let mut i = 0;
let needle_vec = _mm256_set1_epi8(needle as i8);
while i + 32 <= len {
let chunk = _mm256_loadu_si256(haystack.as_ptr().add(i) as *const __m256i);
let cmp = _mm256_cmpeq_epi8(chunk, needle_vec);
let mask = _mm256_movemask_epi8(cmp) as u32;
if mask != 0 {
return Some(i + mask.trailing_zeros() as usize);
}
i += 32;
}
while i < len {
if haystack[i] == needle {
return Some(i);
}
i += 1;
}
None
}
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "sse2")]
unsafe fn find_byte_sse2(haystack: &[u8], needle: u8) -> Option<usize> {
use std::arch::x86_64::*;
let len = haystack.len();
let mut i = 0;
let needle_vec = _mm_set1_epi8(needle as i8);
while i + 16 <= len {
let chunk = _mm_loadu_si128(haystack.as_ptr().add(i) as *const __m128i);
let cmp = _mm_cmpeq_epi8(chunk, needle_vec);
let mask = _mm_movemask_epi8(cmp) as u32;
if mask != 0 {
return Some(i + mask.trailing_zeros() as usize);
}
i += 16;
}
while i < len {
if haystack[i] == needle {
return Some(i);
}
i += 1;
}
None
}
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "avx2")]
unsafe fn count_byte_avx2(haystack: &[u8], needle: u8) -> usize {
use std::arch::x86_64::*;
let len = haystack.len();
let mut count = 0usize;
let mut i = 0;
let needle_vec = _mm256_set1_epi8(needle as i8);
while i + 32 <= len {
let chunk = _mm256_loadu_si256(haystack.as_ptr().add(i) as *const __m256i);
let cmp = _mm256_cmpeq_epi8(chunk, needle_vec);
let mask = _mm256_movemask_epi8(cmp) as u32;
count += mask.count_ones() as usize;
i += 32;
}
while i < len {
if haystack[i] == needle {
count += 1;
}
i += 1;
}
count
}
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "sse2")]
unsafe fn count_byte_sse2(haystack: &[u8], needle: u8) -> usize {
use std::arch::x86_64::*;
let len = haystack.len();
let mut count = 0usize;
let mut i = 0;
let needle_vec = _mm_set1_epi8(needle as i8);
while i + 16 <= len {
let chunk = _mm_loadu_si128(haystack.as_ptr().add(i) as *const __m128i);
let cmp = _mm_cmpeq_epi8(chunk, needle_vec);
let mask = _mm_movemask_epi8(cmp) as u32;
count += mask.count_ones() as usize;
i += 16;
}
while i < len {
if haystack[i] == needle {
count += 1;
}
i += 1;
}
count
}
pub fn batch_uppercase(strings: &[String]) -> Vec<String> {
strings.iter().map(|s| to_uppercase_simd(s)).collect()
}
pub fn batch_lowercase(strings: &[String]) -> Vec<String> {
strings.iter().map(|s| to_lowercase_simd(s)).collect()
}
pub fn batch_is_ascii(strings: &[String]) -> Vec<bool> {
strings.iter().map(|s| is_ascii_simd(s)).collect()
}
pub fn batch_count_digits(strings: &[String]) -> Vec<usize> {
strings.iter().map(|s| count_digits_simd(s)).collect()
}
pub fn batch_count_alpha(strings: &[String]) -> Vec<usize> {
strings.iter().map(|s| count_alpha_simd(s)).collect()
}
pub fn batch_count_whitespace(strings: &[String]) -> Vec<usize> {
strings.iter().map(|s| count_whitespace_simd(s)).collect()
}
pub fn parallel_batch_uppercase(strings: &[String]) -> Vec<String> {
use rayon::prelude::*;
strings.par_iter().map(|s| to_uppercase_simd(s)).collect()
}
pub fn parallel_batch_lowercase(strings: &[String]) -> Vec<String> {
use rayon::prelude::*;
strings.par_iter().map(|s| to_lowercase_simd(s)).collect()
}
pub fn parallel_batch_is_ascii(strings: &[String]) -> Vec<bool> {
use rayon::prelude::*;
strings.par_iter().map(|s| is_ascii_simd(s)).collect()
}
#[derive(Debug, Clone, Default)]
pub struct SimdStringStats {
pub avx2_available: bool,
pub sse2_available: bool,
pub simd_operations: u64,
pub scalar_operations: u64,
}
impl SimdStringStats {
pub fn new() -> Self {
detect_features();
Self {
avx2_available: AVX2_SUPPORTED.load(Ordering::Relaxed),
sse2_available: SSE2_SUPPORTED.load(Ordering::Relaxed),
simd_operations: 0,
scalar_operations: 0,
}
}
pub fn simd_level(&self) -> &'static str {
if self.avx2_available {
"AVX2 (256-bit)"
} else if self.sse2_available {
"SSE2 (128-bit)"
} else {
"Scalar (no SIMD)"
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_is_ascii_simd() {
assert!(is_ascii_simd("Hello, World!"));
assert!(is_ascii_simd("12345"));
assert!(is_ascii_simd(""));
assert!(!is_ascii_simd("Hello, 世界!"));
assert!(!is_ascii_simd("café"));
let long_ascii = "a".repeat(1000);
assert!(is_ascii_simd(&long_ascii));
let long_mixed = format!("{}世界", "a".repeat(100));
assert!(!is_ascii_simd(&long_mixed));
}
#[test]
fn test_to_uppercase_simd() {
assert_eq!(to_uppercase_simd("hello"), "HELLO");
assert_eq!(to_uppercase_simd("Hello World"), "HELLO WORLD");
assert_eq!(to_uppercase_simd("123abc"), "123ABC");
assert_eq!(to_uppercase_simd(""), "");
let long = "hello ".repeat(100);
let expected = "HELLO ".repeat(100);
assert_eq!(to_uppercase_simd(&long), expected);
}
#[test]
fn test_to_lowercase_simd() {
assert_eq!(to_lowercase_simd("HELLO"), "hello");
assert_eq!(to_lowercase_simd("Hello World"), "hello world");
assert_eq!(to_lowercase_simd("123ABC"), "123abc");
assert_eq!(to_lowercase_simd(""), "");
let long = "HELLO ".repeat(100);
let expected = "hello ".repeat(100);
assert_eq!(to_lowercase_simd(&long), expected);
}
#[test]
fn test_count_digits_simd() {
assert_eq!(count_digits_simd("abc123def456"), 6);
assert_eq!(count_digits_simd("no digits"), 0);
assert_eq!(count_digits_simd("12345678901234567890"), 20);
assert_eq!(count_digits_simd(""), 0);
let long = "a1b2c3d4e5".repeat(50);
assert_eq!(count_digits_simd(&long), 250);
}
#[test]
fn test_count_alpha_simd() {
assert_eq!(count_alpha_simd("abc123DEF"), 6);
assert_eq!(count_alpha_simd("12345"), 0);
assert_eq!(count_alpha_simd("AbCdEfGh"), 8);
assert_eq!(count_alpha_simd(""), 0);
let long = "a1b2c3".repeat(100);
assert_eq!(count_alpha_simd(&long), 300);
}
#[test]
fn test_count_whitespace_simd() {
assert_eq!(count_whitespace_simd("hello world"), 1);
assert_eq!(count_whitespace_simd("a\tb\nc\rd"), 3);
assert_eq!(count_whitespace_simd("no_whitespace"), 0);
assert_eq!(count_whitespace_simd(" \t\n\r "), 9);
let long = "a b ".repeat(100);
assert_eq!(count_whitespace_simd(&long), 200);
}
#[test]
fn test_find_byte_simd() {
assert_eq!(find_byte_simd(b"hello world", b'o'), Some(4));
assert_eq!(find_byte_simd(b"hello world", b'x'), None);
assert_eq!(find_byte_simd(b"", b'a'), None);
let long = "a".repeat(100) + "b";
assert_eq!(find_byte_simd(long.as_bytes(), b'b'), Some(100));
}
#[test]
fn test_count_byte_simd() {
assert_eq!(count_byte_simd(b"hello world", b'o'), 2);
assert_eq!(count_byte_simd(b"hello world", b'l'), 3);
assert_eq!(count_byte_simd(b"hello world", b'x'), 0);
let long = "aba".repeat(100);
assert_eq!(count_byte_simd(long.as_bytes(), b'a'), 200);
}
#[test]
fn test_batch_operations() {
let strings = vec![
"hello".to_string(),
"WORLD".to_string(),
"Test123".to_string(),
];
let upper = batch_uppercase(&strings);
assert_eq!(upper, vec!["HELLO", "WORLD", "TEST123"]);
let lower = batch_lowercase(&strings);
assert_eq!(lower, vec!["hello", "world", "test123"]);
let ascii = batch_is_ascii(&strings);
assert_eq!(ascii, vec![true, true, true]);
let digits = batch_count_digits(&strings);
assert_eq!(digits, vec![0, 0, 3]);
}
#[test]
fn test_simd_stats() {
let stats = SimdStringStats::new();
println!("SIMD Level: {}", stats.simd_level());
println!("AVX2: {}", stats.avx2_available);
println!("SSE2: {}", stats.sse2_available);
assert!(
stats.avx2_available
|| stats.sse2_available
|| stats.simd_level() == "Scalar (no SIMD)"
);
}
#[test]
fn test_non_ascii_fallback() {
assert_eq!(to_uppercase_simd("café"), "CAFÉ");
assert_eq!(to_lowercase_simd("CAFÉ"), "café");
assert_eq!(to_uppercase_simd("日本語"), "日本語");
}
}