#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;
use tracing::trace;
#[derive(Default)]
pub struct SimdStringComparator;
impl SimdStringComparator {
pub fn new() -> Self {
Self
}
pub fn equals(&self, a: &str, b: &str) -> bool {
self.equals_impl(a, b)
}
#[inline(always)]
fn equals_impl(&self, a: &str, b: &str) -> bool {
if a.len() != b.len() {
return false;
}
#[cfg(target_arch = "x86_64")]
{
if is_x86_feature_detected!("sse4.2") {
return unsafe { self.equals_simd(a.as_bytes(), b.as_bytes()) };
}
}
a == b
}
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "sse4.2")]
unsafe fn equals_simd(&self, a: &[u8], b: &[u8]) -> bool {
let len = a.len();
let mut i = 0;
while i + 16 <= len {
let chunk_a = _mm_loadu_si128(a.as_ptr().add(i) as *const __m128i);
let chunk_b = _mm_loadu_si128(b.as_ptr().add(i) as *const __m128i);
let cmp = _mm_cmpeq_epi8(chunk_a, chunk_b);
let mask = _mm_movemask_epi8(cmp);
if mask != 0xFFFF {
return false;
}
i += 16;
}
a[i..] == b[i..]
}
pub fn find_matches(&self, reference: &str, candidates: &[&str]) -> Vec<usize> {
let mut matches = Vec::new();
for (idx, candidate) in candidates.iter().enumerate() {
if self.equals(reference, candidate) {
matches.push(idx);
}
}
matches
}
}
#[derive(Default)]
pub struct SimdBitmapOps;
impl SimdBitmapOps {
pub fn new() -> Self {
Self
}
pub fn popcount(&self, bitmap: &[u8]) -> usize {
self.popcount_impl(bitmap)
}
#[inline(always)]
fn popcount_impl(&self, bitmap: &[u8]) -> usize {
#[cfg(target_arch = "x86_64")]
{
if is_x86_feature_detected!("popcnt") {
return unsafe { self.popcount_simd(bitmap) };
}
}
bitmap.iter().map(|b| b.count_ones() as usize).sum()
}
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "popcnt")]
unsafe fn popcount_simd(&self, bitmap: &[u8]) -> usize {
let mut count = 0usize;
let mut i = 0;
while i + 8 <= bitmap.len() {
let chunk = std::ptr::read_unaligned(bitmap.as_ptr().add(i) as *const u64);
count += chunk.count_ones() as usize;
i += 8;
}
for &byte in &bitmap[i..] {
count += byte.count_ones() as usize;
}
count
}
pub fn bitmap_and(&self, a: &[u8], b: &[u8], output: &mut [u8]) {
self.bitmap_and_impl(a, b, output);
}
#[inline(always)]
fn bitmap_and_impl(&self, a: &[u8], b: &[u8], output: &mut [u8]) {
assert_eq!(a.len(), b.len());
assert_eq!(a.len(), output.len());
#[cfg(target_arch = "x86_64")]
{
if is_x86_feature_detected!("sse4.2") {
unsafe { self.bitmap_and_simd(a, b, output) };
return;
}
}
for i in 0..a.len() {
output[i] = a[i] & b[i];
}
}
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "sse4.2")]
unsafe fn bitmap_and_simd(&self, a: &[u8], b: &[u8], output: &mut [u8]) {
let len = a.len();
let mut i = 0;
while i + 16 <= len {
let chunk_a = _mm_loadu_si128(a.as_ptr().add(i) as *const __m128i);
let chunk_b = _mm_loadu_si128(b.as_ptr().add(i) as *const __m128i);
let result = _mm_and_si128(chunk_a, chunk_b);
_mm_storeu_si128(output.as_mut_ptr().add(i) as *mut __m128i, result);
i += 16;
}
for j in i..len {
output[j] = a[j] & b[j];
}
}
pub fn bitmap_or(&self, a: &[u8], b: &[u8], output: &mut [u8]) {
self.bitmap_or_impl(a, b, output);
}
#[inline(always)]
fn bitmap_or_impl(&self, a: &[u8], b: &[u8], output: &mut [u8]) {
assert_eq!(a.len(), b.len());
assert_eq!(a.len(), output.len());
#[cfg(target_arch = "x86_64")]
{
if is_x86_feature_detected!("sse4.2") {
unsafe { self.bitmap_or_simd(a, b, output) };
return;
}
}
for i in 0..a.len() {
output[i] = a[i] | b[i];
}
}
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "sse4.2")]
unsafe fn bitmap_or_simd(&self, a: &[u8], b: &[u8], output: &mut [u8]) {
let len = a.len();
let mut i = 0;
while i + 16 <= len {
let chunk_a = _mm_loadu_si128(a.as_ptr().add(i) as *const __m128i);
let chunk_b = _mm_loadu_si128(b.as_ptr().add(i) as *const __m128i);
let result = _mm_or_si128(chunk_a, chunk_b);
_mm_storeu_si128(output.as_mut_ptr().add(i) as *mut __m128i, result);
i += 16;
}
for j in i..len {
output[j] = a[j] | b[j];
}
}
pub fn bitmap_xor(&self, a: &[u8], b: &[u8], output: &mut [u8]) {
self.bitmap_xor_impl(a, b, output);
}
#[inline(always)]
fn bitmap_xor_impl(&self, a: &[u8], b: &[u8], output: &mut [u8]) {
assert_eq!(a.len(), b.len());
assert_eq!(a.len(), output.len());
#[cfg(target_arch = "x86_64")]
{
if is_x86_feature_detected!("sse4.2") {
unsafe { self.bitmap_xor_simd(a, b, output) };
return;
}
}
for i in 0..a.len() {
output[i] = a[i] ^ b[i];
}
}
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "sse4.2")]
unsafe fn bitmap_xor_simd(&self, a: &[u8], b: &[u8], output: &mut [u8]) {
let len = a.len();
let mut i = 0;
while i + 16 <= len {
let chunk_a = _mm_loadu_si128(a.as_ptr().add(i) as *const __m128i);
let chunk_b = _mm_loadu_si128(b.as_ptr().add(i) as *const __m128i);
let result = _mm_xor_si128(chunk_a, chunk_b);
_mm_storeu_si128(output.as_mut_ptr().add(i) as *mut __m128i, result);
i += 16;
}
for j in i..len {
output[j] = a[j] ^ b[j];
}
}
}
#[derive(Default)]
pub struct SimdCompressionAnalyzer;
impl SimdCompressionAnalyzer {
pub fn new() -> Self {
Self
}
pub fn analyze_distribution(&self, data: &[u8]) -> (usize, f64) {
let mut histogram = [0u32; 256];
#[cfg(target_arch = "x86_64")]
{
if is_x86_feature_detected!("sse4.2") {
unsafe { self.build_histogram_simd(data, &mut histogram) };
} else {
self.build_histogram_scalar(data, &mut histogram);
}
}
#[cfg(not(target_arch = "x86_64"))]
{
self.build_histogram_scalar(data, &mut histogram);
}
let unique_bytes = histogram.iter().filter(|&&count| count > 0).count();
let total = data.len() as f64;
let max_freq = *histogram.iter().max().unwrap_or(&0) as f64;
let repetition_score = if total > 0.0 { max_freq / total } else { 0.0 };
trace!(
"Distribution analysis: {} unique bytes, {:.2}% repetition",
unique_bytes,
repetition_score * 100.0
);
(unique_bytes, repetition_score)
}
fn build_histogram_scalar(&self, data: &[u8], histogram: &mut [u32; 256]) {
for &byte in data {
histogram[byte as usize] += 1;
}
}
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "sse4.2")]
unsafe fn build_histogram_simd(&self, data: &[u8], histogram: &mut [u32; 256]) {
let mut i = 0;
while i + 16 <= data.len() {
let chunk = _mm_loadu_si128(data.as_ptr().add(i) as *const __m128i);
let bytes: [u8; 16] = std::mem::transmute(chunk);
for &byte in &bytes {
histogram[byte as usize] += 1;
}
i += 16;
}
for &byte in &data[i..] {
histogram[byte as usize] += 1;
}
}
pub fn recommend_compression_level(&self, data: &[u8]) -> u8 {
let (unique_bytes, repetition_score) = self.analyze_distribution(data);
if unique_bytes > 200 && repetition_score < 0.2 {
3
} else if repetition_score > 0.5 {
9
} else if unique_bytes < 100 {
7
} else {
6
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_simd_string_comparator_equals() {
let comparator = SimdStringComparator::new();
assert!(comparator.equals("hello", "hello"));
assert!(comparator.equals("", ""));
assert!(!comparator.equals("hello", "world"));
assert!(!comparator.equals("hello", "hello2"));
assert!(!comparator.equals("short", "longer_string"));
let long1 = "this_is_a_very_long_string_that_exceeds_16_bytes";
let long2 = "this_is_a_very_long_string_that_exceeds_16_bytes";
let long3 = "this_is_a_different_long_string_that_exceeds_16";
assert!(comparator.equals(long1, long2));
assert!(!comparator.equals(long1, long3));
}
#[test]
fn test_simd_string_comparator_find_matches() {
let comparator = SimdStringComparator::new();
let reference = "target";
let candidates = vec!["target", "other", "target", "nope", "target"];
let matches = comparator.find_matches(reference, &candidates);
assert_eq!(matches, vec![0, 2, 4]);
}
#[test]
fn test_simd_bitmap_popcount() {
let ops = SimdBitmapOps::new();
let bitmap = vec![0u8; 32];
assert_eq!(ops.popcount(&bitmap), 0);
let bitmap = vec![0xFFu8; 32];
assert_eq!(ops.popcount(&bitmap), 32 * 8);
let bitmap = vec![0b10101010u8; 16];
assert_eq!(ops.popcount(&bitmap), 16 * 4);
}
#[test]
fn test_simd_bitmap_and() {
let ops = SimdBitmapOps::new();
let a = vec![0b11110000u8; 32];
let b = vec![0b10101010u8; 32];
let mut output = vec![0u8; 32];
ops.bitmap_and(&a, &b, &mut output);
for &byte in &output {
assert_eq!(byte, 0b10100000);
}
}
#[test]
fn test_simd_bitmap_or() {
let ops = SimdBitmapOps::new();
let a = vec![0b11110000u8; 32];
let b = vec![0b00001111u8; 32];
let mut output = vec![0u8; 32];
ops.bitmap_or(&a, &b, &mut output);
for &byte in &output {
assert_eq!(byte, 0b11111111);
}
}
#[test]
fn test_simd_bitmap_xor() {
let ops = SimdBitmapOps::new();
let a = vec![0b11110000u8; 32];
let b = vec![0b11110000u8; 32];
let mut output = vec![0u8; 32];
ops.bitmap_xor(&a, &b, &mut output);
for &byte in &output {
assert_eq!(byte, 0b00000000);
}
}
#[test]
fn test_simd_compression_analyzer_distribution() {
let analyzer = SimdCompressionAnalyzer::new();
let data = vec![b'A'; 1000];
let (unique, repetition) = analyzer.analyze_distribution(&data);
assert_eq!(unique, 1);
assert!(repetition > 0.99);
let data: Vec<u8> = (0..=255).cycle().take(1024).collect();
let (unique, repetition) = analyzer.analyze_distribution(&data);
assert_eq!(unique, 256);
assert!(repetition < 0.1);
}
#[test]
fn test_simd_compression_analyzer_recommend_level() {
let analyzer = SimdCompressionAnalyzer::new();
let data = vec![b'A'; 1000];
let level = analyzer.recommend_compression_level(&data);
assert!(level >= 7);
let data: Vec<u8> = (0..=255).cycle().take(1024).collect();
let level = analyzer.recommend_compression_level(&data);
assert!(level <= 5);
}
#[test]
fn test_simd_bitmap_large_operations() {
let ops = SimdBitmapOps::new();
let a = vec![0b10101010u8; 1024];
let b = vec![0b11001100u8; 1024];
let mut output = vec![0u8; 1024];
ops.bitmap_and(&a, &b, &mut output);
assert!(output.iter().all(|&byte| byte == 0b10001000));
ops.bitmap_or(&a, &b, &mut output);
assert!(output.iter().all(|&byte| byte == 0b11101110));
ops.bitmap_xor(&a, &b, &mut output);
assert!(output.iter().all(|&byte| byte == 0b01100110));
}
}