pub mod popcount {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
use core::arch::x86_64::*;
pub fn popcount_u64_slice(data: &[u64]) -> usize {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
{
if crate::simd_feature_detected!("avx2") && crate::simd_feature_detected!("popcnt") {
return unsafe { popcount_u64_slice_avx2(data) };
} else if crate::simd_feature_detected!("sse4.2")
&& crate::simd_feature_detected!("popcnt")
{
return unsafe { popcount_u64_slice_sse42(data) };
}
}
#[cfg(target_arch = "aarch64")]
{
popcount_u64_slice_neon(data)
}
#[cfg(not(target_arch = "aarch64"))]
{
popcount_u64_slice_scalar(data)
}
}
pub fn popcount_u32_slice(data: &[u32]) -> usize {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
{
if crate::simd_feature_detected!("avx2") && crate::simd_feature_detected!("popcnt") {
return unsafe { popcount_u32_slice_avx2(data) };
} else if crate::simd_feature_detected!("sse4.2")
&& crate::simd_feature_detected!("popcnt")
{
return unsafe { popcount_u32_slice_sse42(data) };
}
}
#[cfg(target_arch = "aarch64")]
{
popcount_u32_slice_neon(data)
}
#[cfg(not(target_arch = "aarch64"))]
{
popcount_u32_slice_scalar(data)
}
}
#[allow(dead_code)] fn popcount_u64_slice_scalar(data: &[u64]) -> usize {
data.iter().map(|&x| x.count_ones() as usize).sum()
}
#[allow(dead_code)] fn popcount_u32_slice_scalar(data: &[u32]) -> usize {
data.iter().map(|&x| x.count_ones() as usize).sum()
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[target_feature(enable = "avx2,popcnt")]
unsafe fn popcount_u64_slice_avx2(data: &[u64]) -> usize {
let mut total = 0usize;
let chunks = data.chunks_exact(4);
let remainder = chunks.remainder();
for chunk in chunks {
let vec = _mm256_loadu_si256(chunk.as_ptr() as *const __m256i);
total += _popcnt64(_mm256_extract_epi64(vec, 0)) as usize;
total += _popcnt64(_mm256_extract_epi64(vec, 1)) as usize;
total += _popcnt64(_mm256_extract_epi64(vec, 2)) as usize;
total += _popcnt64(_mm256_extract_epi64(vec, 3)) as usize;
}
for &val in remainder {
total += _popcnt64(val as i64) as usize;
}
total
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[target_feature(enable = "sse4.2,popcnt")]
unsafe fn popcount_u64_slice_sse42(data: &[u64]) -> usize {
let mut total = 0usize;
for &val in data {
total += _popcnt64(val as i64) as usize;
}
total
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[target_feature(enable = "avx2,popcnt")]
unsafe fn popcount_u32_slice_avx2(data: &[u32]) -> usize {
let mut total = 0usize;
let chunks = data.chunks_exact(8);
let remainder = chunks.remainder();
for chunk in chunks {
let vec = _mm256_loadu_si256(chunk.as_ptr() as *const __m256i);
total += _popcnt32(_mm256_extract_epi32(vec, 0)) as usize;
total += _popcnt32(_mm256_extract_epi32(vec, 1)) as usize;
total += _popcnt32(_mm256_extract_epi32(vec, 2)) as usize;
total += _popcnt32(_mm256_extract_epi32(vec, 3)) as usize;
total += _popcnt32(_mm256_extract_epi32(vec, 4)) as usize;
total += _popcnt32(_mm256_extract_epi32(vec, 5)) as usize;
total += _popcnt32(_mm256_extract_epi32(vec, 6)) as usize;
total += _popcnt32(_mm256_extract_epi32(vec, 7)) as usize;
}
for &val in remainder {
total += _popcnt32(val as i32) as usize;
}
total
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[target_feature(enable = "sse4.2,popcnt")]
unsafe fn popcount_u32_slice_sse42(data: &[u32]) -> usize {
let mut total = 0usize;
for &val in data {
total += _popcnt32(val as i32) as usize;
}
total
}
#[cfg(target_arch = "aarch64")]
fn popcount_u64_slice_neon(data: &[u64]) -> usize {
let mut total = 0usize;
for &val in data {
total += val.count_ones() as usize;
}
total
}
#[cfg(target_arch = "aarch64")]
fn popcount_u32_slice_neon(data: &[u32]) -> usize {
let mut total = 0usize;
for &val in data {
total += val.count_ones() as usize;
}
total
}
}
pub mod bit_manipulation {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
use core::arch::x86_64::*;
#[cfg(feature = "no-std")]
use alloc::vec::Vec;
pub fn reverse_bits_u32_slice(data: &mut [u32]) {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
{
if crate::simd_feature_detected!("avx2") {
return unsafe { reverse_bits_u32_slice_avx2(data) };
} else if crate::simd_feature_detected!("sse2") {
return unsafe { reverse_bits_u32_slice_sse2(data) };
}
}
reverse_bits_u32_slice_scalar(data);
}
pub fn parallel_bit_extract(data: &[u64], mask: u64) -> Vec<u64> {
let mut result = Vec::with_capacity(data.len());
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
{
if crate::simd_feature_detected!("bmi2") {
return unsafe { parallel_bit_extract_bmi2(data, mask) };
}
}
for &val in data {
result.push(parallel_bit_extract_scalar(val, mask));
}
result
}
pub fn count_leading_zeros_slice(data: &[u32]) -> Vec<u32> {
let mut result = Vec::with_capacity(data.len());
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
{
if crate::simd_feature_detected!("avx2") && crate::simd_feature_detected!("lzcnt") {
return unsafe { count_leading_zeros_slice_avx2(data) };
}
}
for &val in data {
result.push(val.leading_zeros());
}
result
}
fn reverse_bits_u32_slice_scalar(data: &mut [u32]) {
for val in data {
*val = val.reverse_bits();
}
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[target_feature(enable = "avx2")]
unsafe fn reverse_bits_u32_slice_avx2(data: &mut [u32]) {
let mut chunks = data.chunks_exact_mut(8);
let remainder_slice = chunks.by_ref();
for chunk in remainder_slice {
let vec = _mm256_loadu_si256(chunk.as_ptr() as *const __m256i);
let reversed = reverse_bits_avx2(vec);
_mm256_storeu_si256(chunk.as_mut_ptr() as *mut __m256i, reversed);
}
let remainder = chunks.into_remainder();
reverse_bits_u32_slice_scalar(remainder);
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[target_feature(enable = "avx2")]
unsafe fn reverse_bits_avx2(vec: __m256i) -> __m256i {
let mask_55 = _mm256_set1_epi32(0x55555555u32 as i32);
let mask_33 = _mm256_set1_epi32(0x33333333u32 as i32);
let mask_0f = _mm256_set1_epi32(0x0f0f0f0fu32 as i32);
let mask_ff = _mm256_set1_epi32(0x00ff00ffu32 as i32);
let mut x = vec;
x = _mm256_or_si256(
_mm256_and_si256(_mm256_srli_epi32(x, 1), mask_55),
_mm256_slli_epi32(_mm256_and_si256(x, mask_55), 1),
);
x = _mm256_or_si256(
_mm256_and_si256(_mm256_srli_epi32(x, 2), mask_33),
_mm256_slli_epi32(_mm256_and_si256(x, mask_33), 2),
);
x = _mm256_or_si256(
_mm256_and_si256(_mm256_srli_epi32(x, 4), mask_0f),
_mm256_slli_epi32(_mm256_and_si256(x, mask_0f), 4),
);
x = _mm256_or_si256(
_mm256_and_si256(_mm256_srli_epi32(x, 8), mask_ff),
_mm256_slli_epi32(_mm256_and_si256(x, mask_ff), 8),
);
_mm256_or_si256(_mm256_srli_epi32(x, 16), _mm256_slli_epi32(x, 16))
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[target_feature(enable = "sse2")]
unsafe fn reverse_bits_u32_slice_sse2(data: &mut [u32]) {
let mut chunks = data.chunks_exact_mut(4);
let remainder_slice = chunks.by_ref();
for chunk in remainder_slice {
let vec = _mm_loadu_si128(chunk.as_ptr() as *const __m128i);
let reversed = reverse_bits_sse2(vec);
_mm_storeu_si128(chunk.as_mut_ptr() as *mut __m128i, reversed);
}
let remainder = chunks.into_remainder();
reverse_bits_u32_slice_scalar(remainder);
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[target_feature(enable = "sse2")]
unsafe fn reverse_bits_sse2(vec: __m128i) -> __m128i {
let mask_55 = _mm_set1_epi32(0x55555555u32 as i32);
let mask_33 = _mm_set1_epi32(0x33333333u32 as i32);
let mask_0f = _mm_set1_epi32(0x0f0f0f0fu32 as i32);
let mask_ff = _mm_set1_epi32(0x00ff00ffu32 as i32);
let mut x = vec;
x = _mm_or_si128(
_mm_and_si128(_mm_srli_epi32(x, 1), mask_55),
_mm_slli_epi32(_mm_and_si128(x, mask_55), 1),
);
x = _mm_or_si128(
_mm_and_si128(_mm_srli_epi32(x, 2), mask_33),
_mm_slli_epi32(_mm_and_si128(x, mask_33), 2),
);
x = _mm_or_si128(
_mm_and_si128(_mm_srli_epi32(x, 4), mask_0f),
_mm_slli_epi32(_mm_and_si128(x, mask_0f), 4),
);
x = _mm_or_si128(
_mm_and_si128(_mm_srli_epi32(x, 8), mask_ff),
_mm_slli_epi32(_mm_and_si128(x, mask_ff), 8),
);
_mm_or_si128(_mm_srli_epi32(x, 16), _mm_slli_epi32(x, 16))
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[target_feature(enable = "bmi2")]
unsafe fn parallel_bit_extract_bmi2(data: &[u64], mask: u64) -> Vec<u64> {
let mut result = Vec::with_capacity(data.len());
for &val in data {
result.push(_pext_u64(val, mask));
}
result
}
fn parallel_bit_extract_scalar(val: u64, mask: u64) -> u64 {
let mut result = 0u64;
let mut mask_bit = 1u64;
let mut result_bit = 1u64;
for _ in 0..64 {
if mask & mask_bit != 0 {
if val & mask_bit != 0 {
result |= result_bit;
}
result_bit <<= 1;
}
mask_bit <<= 1;
}
result
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[target_feature(enable = "avx2,lzcnt")]
unsafe fn count_leading_zeros_slice_avx2(data: &[u32]) -> Vec<u32> {
let mut result = Vec::with_capacity(data.len());
for &val in data {
result.push(_lzcnt_u32(val));
}
result
}
}
pub mod hash_functions {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
use core::arch::x86_64::*;
#[cfg(feature = "no-std")]
use alloc::vec::Vec;
pub fn crc32_hash(data: &[u8]) -> u32 {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
{
if crate::simd_feature_detected!("sse4.2") {
return unsafe { crc32_hash_sse42(data) };
}
}
crc32_hash_scalar(data)
}
pub fn murmur3_hash(data: &[u8], seed: u32) -> u32 {
murmur3_hash_scalar(data, seed)
}
pub fn fast_hash_u64(val: u64) -> u64 {
let mut x = val.wrapping_mul(0x9e3779b97f4a7c15);
x ^= x >> 30;
x = x.wrapping_mul(0xbf58476d1ce4e5b9);
x ^= x >> 27;
x = x.wrapping_mul(0x94d049bb133111eb);
x ^= x >> 31;
x
}
pub fn fast_hash_u64_slice(data: &[u64]) -> Vec<u64> {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
{
if crate::simd_feature_detected!("avx2") {
return unsafe { fast_hash_u64_slice_avx2(data) };
}
}
data.iter().map(|&x| fast_hash_u64(x)).collect()
}
fn crc32_hash_scalar(data: &[u8]) -> u32 {
let mut hash = 0xffffffffu32;
for &byte in data {
hash ^= byte as u32;
for _ in 0..8 {
if hash & 1 != 0 {
hash = (hash >> 1) ^ 0xedb88320;
} else {
hash >>= 1;
}
}
}
!hash
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[target_feature(enable = "sse4.2")]
unsafe fn crc32_hash_sse42(data: &[u8]) -> u32 {
let mut crc = 0xffffffffu32;
let chunks = data.chunks_exact(8);
let remainder = chunks.remainder();
for chunk in chunks {
let val = core::ptr::read_unaligned(chunk.as_ptr() as *const u64);
crc = _mm_crc32_u64(crc as u64, val) as u32;
}
for &byte in remainder {
crc = _mm_crc32_u8(crc, byte);
}
!crc
}
fn murmur3_hash_scalar(data: &[u8], seed: u32) -> u32 {
const C1: u32 = 0xcc9e2d51;
const C2: u32 = 0x1b873593;
const R1: u32 = 15;
const R2: u32 = 13;
const M: u32 = 5;
const N: u32 = 0xe6546b64;
let mut hash = seed;
let len = data.len();
for chunk in data.chunks_exact(4) {
let mut k = u32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]);
k = k.wrapping_mul(C1);
k = k.rotate_left(R1);
k = k.wrapping_mul(C2);
hash ^= k;
hash = hash.rotate_left(R2);
hash = hash.wrapping_mul(M).wrapping_add(N);
}
let remaining = &data[data.len() & !3..];
let mut k1 = 0u32;
if remaining.len() >= 3 {
k1 ^= (remaining[2] as u32) << 16;
}
if remaining.len() >= 2 {
k1 ^= (remaining[1] as u32) << 8;
}
if !remaining.is_empty() {
k1 ^= remaining[0] as u32;
k1 = k1.wrapping_mul(C1);
k1 = k1.rotate_left(R1);
k1 = k1.wrapping_mul(C2);
hash ^= k1;
}
hash ^= len as u32;
hash ^= hash >> 16;
hash = hash.wrapping_mul(0x85ebca6b);
hash ^= hash >> 13;
hash = hash.wrapping_mul(0xc2b2ae35);
hash ^= hash >> 16;
hash
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[target_feature(enable = "avx2")]
unsafe fn fast_hash_u64_slice_avx2(data: &[u64]) -> Vec<u64> {
let mut result = Vec::with_capacity(data.len());
let prime1 = _mm256_set1_epi64x(0x9e3779b97f4a7c15u64 as i64);
let prime2 = _mm256_set1_epi64x(0xbf58476d1ce4e5b9u64 as i64);
let prime3 = _mm256_set1_epi64x(0x94d049bb133111ebu64 as i64);
let chunks = data.chunks_exact(4);
let remainder = chunks.remainder();
for chunk in chunks {
let vec = _mm256_loadu_si256(chunk.as_ptr() as *const __m256i);
let mut x = _mm256_mul_epu32(vec, prime1);
x = _mm256_xor_si256(x, _mm256_srli_epi64(x, 30));
x = _mm256_mul_epu32(x, prime2);
x = _mm256_xor_si256(x, _mm256_srli_epi64(x, 27));
x = _mm256_mul_epu32(x, prime3);
x = _mm256_xor_si256(x, _mm256_srli_epi64(x, 31));
let mut temp = [0u64; 4];
_mm256_storeu_si256(temp.as_mut_ptr() as *mut __m256i, x);
result.extend_from_slice(&temp);
}
for &val in remainder {
result.push(fast_hash_u64(val));
}
result
}
}
pub mod boolean_indexing {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
use core::arch::x86_64::*;
#[cfg(feature = "no-std")]
use alloc::vec::Vec;
pub fn compress_by_mask_f32(data: &[f32], mask: &[bool]) -> Vec<f32> {
assert_eq!(data.len(), mask.len());
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
{
if crate::simd_feature_detected!("avx2") {
return unsafe { compress_by_mask_f32_avx2(data, mask) };
}
}
compress_by_mask_f32_scalar(data, mask)
}
pub fn create_mask_greater_than_f32(data: &[f32], threshold: f32) -> Vec<bool> {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
{
if crate::simd_feature_detected!("avx2") {
return unsafe { create_mask_greater_than_f32_avx2(data, threshold) };
}
}
create_mask_greater_than_f32_scalar(data, threshold)
}
pub fn count_true_mask(mask: &[bool]) -> usize {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
{
if crate::simd_feature_detected!("avx2") {
return unsafe { count_true_mask_avx2(mask) };
}
}
mask.iter().map(|&b| b as usize).sum()
}
fn compress_by_mask_f32_scalar(data: &[f32], mask: &[bool]) -> Vec<f32> {
data.iter()
.zip(mask.iter())
.filter_map(|(&val, &include)| if include { Some(val) } else { None })
.collect()
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[target_feature(enable = "avx2")]
unsafe fn compress_by_mask_f32_avx2(data: &[f32], mask: &[bool]) -> Vec<f32> {
let mut result = Vec::new();
let chunks_data = data.chunks_exact(8);
let chunks_mask = mask.chunks_exact(8);
let remaining_data = chunks_data.remainder();
let remaining_mask = chunks_mask.remainder();
for (data_chunk, mask_chunk) in chunks_data.zip(chunks_mask) {
let mut mask_bits = 0u8;
for (i, &b) in mask_chunk.iter().enumerate() {
if b {
mask_bits |= 1 << i;
}
}
for (i, &val) in data_chunk.iter().enumerate().take(8) {
if mask_bits & (1 << i) != 0 {
result.push(val);
}
}
}
result.extend(compress_by_mask_f32_scalar(remaining_data, remaining_mask));
result
}
fn create_mask_greater_than_f32_scalar(data: &[f32], threshold: f32) -> Vec<bool> {
data.iter().map(|&x| x > threshold).collect()
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[target_feature(enable = "avx2")]
unsafe fn create_mask_greater_than_f32_avx2(data: &[f32], threshold: f32) -> Vec<bool> {
let mut result = Vec::with_capacity(data.len());
let threshold_vec = _mm256_set1_ps(threshold);
let chunks = data.chunks_exact(8);
let remainder = chunks.remainder();
for chunk in chunks {
let data_vec = _mm256_loadu_ps(chunk.as_ptr());
let cmp_result = _mm256_cmp_ps(data_vec, threshold_vec, _CMP_GT_OQ);
let mask = _mm256_movemask_ps(cmp_result);
for i in 0..8 {
result.push((mask & (1 << i)) != 0);
}
}
result.extend(create_mask_greater_than_f32_scalar(remainder, threshold));
result
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[target_feature(enable = "avx2")]
unsafe fn count_true_mask_avx2(mask: &[bool]) -> usize {
let mut count = 0usize;
let chunks = mask.chunks_exact(32); let remainder = chunks.remainder();
for chunk in chunks {
let mut packed = [0u8; 32];
for (i, &b) in chunk.iter().enumerate() {
packed[i] = b as u8;
}
let vec = _mm256_loadu_si256(packed.as_ptr() as *const __m256i);
let zero = _mm256_setzero_si256();
let sum = _mm256_sad_epu8(vec, zero);
count += _mm256_extract_epi64(sum, 0) as usize;
count += _mm256_extract_epi64(sum, 1) as usize;
count += _mm256_extract_epi64(sum, 2) as usize;
count += _mm256_extract_epi64(sum, 3) as usize;
}
count += remainder.iter().map(|&b| b as usize).sum::<usize>();
count
}
}
#[allow(non_snake_case)]
#[cfg(all(test, not(feature = "no-std")))]
mod tests {
use super::*;
#[cfg(feature = "no-std")]
use alloc::{vec, vec::Vec};
#[test]
fn test_popcount_u64() {
let data = vec![0xFF, 0x00, 0xF0F0F0F0F0F0F0F0, 0x5555555555555555];
let expected = 8 + 32 + 32;
let result = popcount::popcount_u64_slice(&data);
assert_eq!(result, expected);
}
#[test]
fn test_popcount_u32() {
let data = vec![0xFF, 0x00, 0xF0F0F0F0, 0x55555555];
let expected = 8 + 16 + 16;
let result = popcount::popcount_u32_slice(&data);
assert_eq!(result, expected);
}
#[test]
fn test_reverse_bits() {
let mut data = vec![0x12345678u32, 0xABCDEF00u32];
let expected = vec![0x1E6A2C48u32, 0x00F7B3D5u32];
bit_manipulation::reverse_bits_u32_slice(&mut data);
assert_eq!(data, expected);
}
#[test]
fn test_parallel_bit_extract() {
let data = vec![0b11110000u64, 0b10101010u64];
let mask = 0b11001100u64;
let result = bit_manipulation::parallel_bit_extract(&data, mask);
assert_eq!(result.len(), data.len());
for &val in &result {
assert!(val <= 0b1111); }
}
#[test]
fn test_count_leading_zeros() {
let data = vec![0x00000001u32, 0x00000100u32, 0x80000000u32];
let expected = vec![31, 23, 0];
let result = bit_manipulation::count_leading_zeros_slice(&data);
assert_eq!(result, expected);
}
#[test]
fn test_crc32_hash() {
let data = b"hello world";
let hash1 = hash_functions::crc32_hash(data);
let hash2 = hash_functions::crc32_hash(data);
assert_eq!(hash1, hash2);
let different_hash = hash_functions::crc32_hash(b"hello world!");
assert_ne!(hash1, different_hash);
}
#[test]
fn test_murmur3_hash() {
let data = b"test data";
let seed = 42;
let hash1 = hash_functions::murmur3_hash(data, seed);
let hash2 = hash_functions::murmur3_hash(data, seed);
assert_eq!(hash1, hash2);
let different_hash = hash_functions::murmur3_hash(data, seed + 1);
assert_ne!(hash1, different_hash);
}
#[test]
fn test_fast_hash_u64() {
let data = vec![0, 1, 2, 0x123456789ABCDEF0];
let result = hash_functions::fast_hash_u64_slice(&data);
assert_eq!(result.len(), data.len());
for i in 0..data.len() {
for j in i + 1..data.len() {
if data[i] != data[j] {
assert_ne!(result[i], result[j]);
}
}
}
}
#[test]
fn test_compress_by_mask() {
let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
let mask = vec![true, false, true, false, true];
let expected = vec![1.0, 3.0, 5.0];
let result = boolean_indexing::compress_by_mask_f32(&data, &mask);
assert_eq!(result, expected);
}
#[test]
fn test_create_mask_greater_than() {
let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
let threshold = 3.0;
let expected = vec![false, false, false, true, true];
let result = boolean_indexing::create_mask_greater_than_f32(&data, threshold);
assert_eq!(result, expected);
}
#[test]
fn test_count_true_mask() {
let mask = vec![true, false, true, true, false, true];
let expected = 4;
let result = boolean_indexing::count_true_mask(&mask);
assert_eq!(result, expected);
}
#[test]
fn test_empty_inputs() {
assert_eq!(popcount::popcount_u64_slice(&[]), 0);
assert_eq!(popcount::popcount_u32_slice(&[]), 0);
let empty_data: Vec<f32> = vec![];
let empty_mask: Vec<bool> = vec![];
assert_eq!(
boolean_indexing::compress_by_mask_f32(&empty_data, &empty_mask),
vec![] as Vec<f32>
);
assert_eq!(boolean_indexing::count_true_mask(&empty_mask), 0);
}
#[test]
fn test_large_inputs() {
let large_data: Vec<u64> = (0..1000).map(|i| i as u64).collect();
let count = popcount::popcount_u64_slice(&large_data);
assert!(count > 0);
let large_float_data: Vec<f32> = (0..1000).map(|i| i as f32).collect();
let mask = boolean_indexing::create_mask_greater_than_f32(&large_float_data, 500.0);
let count_true = boolean_indexing::count_true_mask(&mask);
assert_eq!(count_true, 499); }
}