use crate::succinct::rank_select::bmi2_acceleration::{
Bmi2HashOps, Bmi2Dispatcher, Bmi2Capabilities
};
use std::collections::HashMap;
pub const GOLDEN_RATIO_FRAC_NUM: u64 = 103;
pub const GOLDEN_RATIO_FRAC_DEN: u64 = 64;
pub const GOLDEN_RATIO_ALT_NUM: u64 = 13;
pub const GOLDEN_RATIO_ALT_DEN: u64 = 8;
pub const GOLDEN_LOAD_FACTOR: u8 = 158;
#[inline]
pub fn fabo_hash_combine_u32(hash: u32, value: u32) -> u32 {
let caps = Bmi2Capabilities::get();
if caps.has_bmi2 {
bmi2_hash_combine_u32(hash, value)
} else {
hash.rotate_left(5).wrapping_add(value)
}
}
#[inline]
pub fn fabo_hash_combine_u64(hash: u64, value: u64) -> u64 {
let caps = Bmi2Capabilities::get();
if caps.has_bmi2 {
bmi2_hash_combine_u64(hash, value)
} else {
hash.rotate_left(5).wrapping_add(value)
}
}
#[inline]
pub fn bmi2_hash_combine_u32(hash: u32, value: u32) -> u32 {
#[cfg(target_arch = "x86_64")]
{
let caps = Bmi2Capabilities::get();
if caps.has_bmi2 {
return unsafe { bmi2_hash_combine_u32_hardware(hash, value) };
}
}
let rotated = hash.rotate_left(5);
let mixed = rotated.wrapping_add(value);
mixed ^ (value.rotate_right(13))
}
#[inline]
pub fn bmi2_hash_combine_u64(hash: u64, value: u64) -> u64 {
#[cfg(target_arch = "x86_64")]
{
let caps = Bmi2Capabilities::get();
if caps.has_bmi2 {
return unsafe { bmi2_hash_combine_u64_hardware(hash, value) };
}
}
let rotated = hash.rotate_left(5);
let mixed = rotated.wrapping_add(value);
mixed ^ (value.rotate_right(17))
}
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "bmi2")]
#[inline]
unsafe fn bmi2_hash_combine_u32_hardware(hash: u32, value: u32) -> u32 {
use std::arch::x86_64::*;
let effective_hash = if hash == 0 { 0x9e3779b9u32 } else { hash };
let mask1 = 0xAAAAAAAAu32;
let mask2 = 0x55555555u32;
let extracted1 = unsafe { _pext_u32(effective_hash, mask1) };
let extracted2 = unsafe { _pext_u32(value, mask2) };
let combined = extracted1.wrapping_add(extracted2);
let mut result = unsafe { _pdep_u32(combined, 0xFFFFFFFFu32) };
result ^= value.rotate_right(13);
result = result.wrapping_mul(0x9e3779b9u32);
result ^= result >> 16;
if result == 0 { 0x9e3779b9u32 } else { result }
}
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "bmi2")]
#[inline]
unsafe fn bmi2_hash_combine_u64_hardware(hash: u64, value: u64) -> u64 {
use std::arch::x86_64::*;
let effective_hash = if hash == 0 { 0x9e3779b97f4a7c15u64 } else { hash };
let mask1 = 0xAAAAAAAAAAAAAAAAu64;
let mask2 = 0x5555555555555555u64;
let extracted1 = unsafe { _pext_u64(effective_hash, mask1) };
let extracted2 = unsafe { _pext_u64(value, mask2) };
let combined = extracted1.wrapping_add(extracted2);
let mut result = unsafe { _pdep_u64(combined, 0xFFFFFFFFFFFFFFFFu64) };
result ^= value.rotate_right(13);
result = result.wrapping_mul(0xc2b2ae3d27d4eb4fu64);
result ^= result >> 29;
if result == 0 { 0x9e3779b97f4a7c15u64 } else { result }
}
pub fn golden_ratio_next_size(current_size: usize) -> usize {
if current_size == 0 {
return 16; }
bmi2_golden_ratio_next_size(current_size)
}
#[inline]
pub fn bmi2_golden_ratio_next_size(current_size: usize) -> usize {
if current_size == 0 {
return 16;
}
#[cfg(target_arch = "x86_64")]
{
let caps = Bmi2Capabilities::get();
if caps.has_bmi2 {
return unsafe { bmi2_golden_ratio_hardware(current_size) };
}
}
let size_64 = current_size as u64;
let result = (size_64 * GOLDEN_RATIO_FRAC_NUM) / GOLDEN_RATIO_FRAC_DEN + 1;
result as usize
}
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "bmi2")]
#[inline]
unsafe fn bmi2_golden_ratio_hardware(current_size: usize) -> usize {
use std::arch::x86_64::*;
let size_64 = current_size as u64;
let numerator_bits = unsafe { _bzhi_u64(size_64 * GOLDEN_RATIO_FRAC_NUM, 63) }; let result = numerator_bits / GOLDEN_RATIO_FRAC_DEN + 1;
result as usize
}
pub fn optimal_bucket_count(desired_capacity: usize) -> usize {
if desired_capacity == 0 {
return 16;
}
bmi2_optimal_bucket_count(desired_capacity)
}
#[inline]
pub fn bmi2_optimal_bucket_count(desired_capacity: usize) -> usize {
if desired_capacity == 0 {
return 16;
}
#[cfg(target_arch = "x86_64")]
{
let caps = Bmi2Capabilities::get();
if caps.has_bmi2 {
return unsafe { bmi2_bucket_count_hardware(desired_capacity) };
}
}
let required_buckets = (desired_capacity as u64 * 256) / (GOLDEN_LOAD_FACTOR as u64);
(required_buckets as usize).next_power_of_two().max(16)
}
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "bmi2")]
#[inline]
unsafe fn bmi2_bucket_count_hardware(desired_capacity: usize) -> usize {
use std::arch::x86_64::*;
let capacity_64 = desired_capacity as u64;
let scaled_capacity = unsafe { _bzhi_u64(capacity_64 * 256, 63) }; let required_buckets = scaled_capacity / (GOLDEN_LOAD_FACTOR as u64);
if required_buckets == 0 {
return 16;
}
let log2_bits = 64 - unsafe { _lzcnt_u64(required_buckets - 1) };
let bucket_count = 1usize << log2_bits;
bucket_count.max(16)
}
pub fn advanced_hash_combine(hashes: &[u64]) -> u64 {
if hashes.is_empty() {
return 0;
}
let mut result = hashes[0];
for &hash in &hashes[1..] {
result = fabo_hash_combine_u64(result, hash);
result ^= hash.rotate_right(17);
result = result.wrapping_mul(0x9e3779b97f4a7c15); }
result ^= result >> 30;
result = result.wrapping_mul(0xbf58476d1ce4e5b9);
result ^= result >> 27;
result = result.wrapping_mul(0x94d049bb133111eb);
result ^= result >> 31;
result
}
pub struct HashFunctionBuilder {
rotation_amount: u32,
combine_strategy: CombineStrategy,
}
#[derive(Debug, Clone, Copy)]
pub enum CombineStrategy {
Addition,
Xor,
Fabo,
Bmi2,
Advanced,
}
impl HashFunctionBuilder {
pub fn new() -> Self {
Self {
rotation_amount: 5,
combine_strategy: CombineStrategy::Fabo,
}
}
pub fn with_rotation(mut self, amount: u32) -> Self {
self.rotation_amount = amount;
self
}
pub fn with_strategy(mut self, strategy: CombineStrategy) -> Self {
self.combine_strategy = strategy;
self
}
pub fn build_u32(self) -> impl Fn(u32, u32) -> u32 {
let rotation = self.rotation_amount;
move |hash: u32, value: u32| -> u32 {
match self.combine_strategy {
CombineStrategy::Addition => hash.rotate_left(rotation).wrapping_add(value),
CombineStrategy::Xor => hash.rotate_left(rotation) ^ value,
CombineStrategy::Fabo => fabo_hash_combine_u32(hash, value),
CombineStrategy::Bmi2 => bmi2_hash_combine_u32(hash, value),
CombineStrategy::Advanced => {
let mut result = hash.rotate_left(rotation).wrapping_add(value);
result ^= value.rotate_right(17);
result = result.wrapping_mul(0x9e3779b9);
result ^= result >> 16;
result
}
}
}
}
pub fn build_u64(self) -> impl Fn(u64, u64) -> u64 {
let rotation = self.rotation_amount;
move |hash: u64, value: u64| -> u64 {
match self.combine_strategy {
CombineStrategy::Addition => hash.rotate_left(rotation).wrapping_add(value),
CombineStrategy::Xor => hash.rotate_left(rotation) ^ value,
CombineStrategy::Fabo => fabo_hash_combine_u64(hash, value),
CombineStrategy::Bmi2 => bmi2_hash_combine_u64(hash, value),
CombineStrategy::Advanced => {
let mut result = hash.rotate_left(rotation).wrapping_add(value);
result ^= value.rotate_right(17);
result = result.wrapping_mul(0x9e3779b97f4a7c15);
result ^= result >> 30;
result
}
}
}
}
}
impl Default for HashFunctionBuilder {
fn default() -> Self {
Self::new()
}
}
pub trait HashCombinable {
type Output;
fn fabo_combine(self, hash: Self::Output) -> Self::Output;
fn rotation_amount() -> u32 {
5
}
}
impl HashCombinable for u32 {
type Output = u32;
fn fabo_combine(self, hash: Self::Output) -> Self::Output {
fabo_hash_combine_u32(hash, self)
}
}
impl HashCombinable for u64 {
type Output = u64;
fn fabo_combine(self, hash: Self::Output) -> Self::Output {
fabo_hash_combine_u64(hash, self)
}
}
#[inline]
pub fn extract_hash_bucket_bmi2(hash: u64, bucket_bits: u32) -> u32 {
Bmi2HashOps::hash_bucket_extract(hash, bucket_bits)
}
pub fn extract_hash_buckets_bulk_bmi2(hashes: &[u64], bucket_bits: u32) -> Vec<u32> {
Bmi2HashOps::hash_buckets_bulk(hashes, bucket_bits)
}
pub fn advanced_hash_combine_bmi2(hashes: &[u64]) -> u64 {
if hashes.is_empty() {
return 0;
}
let mut result = hashes[0];
for &hash in &hashes[1..] {
result = bmi2_hash_combine_u64(result, hash);
#[cfg(target_arch = "x86_64")]
{
let caps = Bmi2Capabilities::get();
if caps.has_bmi2 {
result = unsafe { advanced_bmi2_mixing(result, hash) };
} else {
result ^= hash.rotate_right(17);
result = result.wrapping_mul(0x9e3779b97f4a7c15);
}
}
#[cfg(not(target_arch = "x86_64"))]
{
result ^= hash.rotate_right(17);
result = result.wrapping_mul(0x9e3779b97f4a7c15);
}
}
#[cfg(target_arch = "x86_64")]
{
let caps = Bmi2Capabilities::get();
if caps.has_bmi2 {
result = unsafe { bmi2_avalanche_step(result) };
} else {
result = scalar_avalanche_step(result);
}
}
#[cfg(not(target_arch = "x86_64"))]
{
result = scalar_avalanche_step(result);
}
result
}
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "bmi2")]
#[inline]
unsafe fn advanced_bmi2_mixing(result: u64, hash: u64) -> u64 {
use std::arch::x86_64::*;
let pattern1 = 0x5555555555555555u64; let pattern2 = 0x3333333333333333u64;
let extracted1 = unsafe { _pext_u64(result, pattern1) };
let extracted2 = unsafe { _pext_u64(hash, pattern2) };
let combined = extracted1.wrapping_add(extracted2);
unsafe { _pdep_u64(combined, 0xFFFFFFFFFFFFFFFFu64) }.rotate_right(17)
}
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "bmi2")]
#[inline]
unsafe fn bmi2_avalanche_step(mut result: u64) -> u64 {
use std::arch::x86_64::*;
result ^= unsafe { _bzhi_u64(result >> 30, 34) };
result = result.wrapping_mul(0xbf58476d1ce4e5b9);
result ^= unsafe { _bzhi_u64(result >> 27, 37) };
result = result.wrapping_mul(0x94d049bb133111eb);
result ^= result >> 31;
result
}
#[inline]
fn scalar_avalanche_step(mut result: u64) -> u64 {
result ^= result >> 30;
result = result.wrapping_mul(0xbf58476d1ce4e5b9);
result ^= result >> 27;
result = result.wrapping_mul(0x94d049bb133111eb);
result ^= result >> 31;
result
}
pub fn fast_string_hash_bmi2(s: &str, base_hash: u64) -> u64 {
let bytes = s.as_bytes();
if bytes.is_empty() {
return base_hash;
}
if bytes.len() <= 8 {
return scalar_string_hash_bmi2(bytes, base_hash);
}
#[cfg(target_arch = "x86_64")]
{
let caps = Bmi2Capabilities::get();
if caps.has_bmi2 {
return unsafe { bmi2_string_hash_hardware(bytes, base_hash) };
}
}
scalar_string_hash_bmi2(bytes, base_hash)
}
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "bmi2")]
unsafe fn bmi2_string_hash_hardware(bytes: &[u8], mut hash: u64) -> u64 {
use std::arch::x86_64::*;
let chunks = bytes.len() / 8;
for i in 0..chunks {
let offset = i * 8;
let chunk_bytes = &bytes[offset..offset + 8];
let val = u64::from_le_bytes(chunk_bytes.try_into().expect("chunk is 8 bytes"));
let mask = 0xF0F0F0F0F0F0F0F0u64; let extracted = unsafe { _pext_u64(val, mask) };
hash = unsafe { _pdep_u64(hash.wrapping_add(extracted), 0xFFFFFFFFFFFFFFFFu64) };
hash = hash.rotate_left(5);
}
let remaining_start = chunks * 8;
for &byte in &bytes[remaining_start..] {
let byte_extended = unsafe { _pdep_u64(byte as u64, 0x0101010101010101u64) };
hash = hash.rotate_left(5).wrapping_add(byte_extended);
}
hash
}
fn scalar_string_hash_bmi2(bytes: &[u8], mut hash: u64) -> u64 {
let chunks = bytes.len() / 8;
for i in 0..chunks {
let offset = i * 8;
let chunk_bytes = &bytes[offset..offset + 8];
let val = u64::from_le_bytes(chunk_bytes.try_into().expect("chunk is 8 bytes"));
hash = hash.rotate_left(5).wrapping_add(val);
hash ^= val.rotate_right(13);
}
let remaining_start = chunks * 8;
for &byte in &bytes[remaining_start..] {
hash = hash.rotate_left(5).wrapping_add(byte as u64);
}
hash
}
pub fn bmi2_collision_resolution(hash: u64, occupied_mask: u64, probe_type: ProbeType) -> Option<u32> {
match probe_type {
ProbeType::Linear => bmi2_linear_probe(hash, occupied_mask),
ProbeType::Quadratic => bmi2_quadratic_probe(hash, occupied_mask),
ProbeType::DoubleHash => bmi2_double_hash_probe(hash, occupied_mask),
}
}
#[derive(Debug, Clone, Copy)]
pub enum ProbeType {
Linear,
Quadratic,
DoubleHash,
}
fn bmi2_linear_probe(hash: u64, occupied_mask: u64) -> Option<u32> {
if occupied_mask == u64::MAX {
return None; }
#[cfg(target_arch = "x86_64")]
{
let caps = Bmi2Capabilities::get();
if caps.has_bmi2 {
return unsafe { bmi2_linear_probe_hardware(hash, occupied_mask) };
}
}
let free_mask = !occupied_mask;
if free_mask != 0 {
Some(free_mask.trailing_zeros())
} else {
None
}
}
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "bmi2")]
unsafe fn bmi2_linear_probe_hardware(_hash: u64, occupied_mask: u64) -> Option<u32> {
use std::arch::x86_64::*;
if occupied_mask == u64::MAX {
return None;
}
let free_mask = !occupied_mask;
if free_mask != 0 {
Some(unsafe { _tzcnt_u64(free_mask) } as u32)
} else {
None
}
}
fn bmi2_quadratic_probe(hash: u64, occupied_mask: u64) -> Option<u32> {
if occupied_mask == u64::MAX {
return None;
}
#[cfg(target_arch = "x86_64")]
{
let caps = Bmi2Capabilities::get();
if caps.has_bmi2 {
return unsafe { bmi2_quadratic_probe_hardware(hash, occupied_mask) };
}
}
let start_pos = (hash & 63) as u32; for i in 0..64 {
let pos = (start_pos + i * i) & 63;
if (occupied_mask >> pos) & 1 == 0 {
return Some(pos);
}
}
None
}
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "bmi2")]
unsafe fn bmi2_quadratic_probe_hardware(hash: u64, occupied_mask: u64) -> Option<u32> {
use std::arch::x86_64::*;
if occupied_mask == u64::MAX {
return None;
}
let start_pos = unsafe { _bextr_u64(hash, 0, 6) } as u32;
for i in 0..64 {
let offset = i * i;
let pos = (start_pos + offset) & 63;
let slot_mask = 1u64 << pos;
if (occupied_mask & slot_mask) == 0 {
return Some(pos);
}
}
None
}
fn bmi2_double_hash_probe(hash: u64, occupied_mask: u64) -> Option<u32> {
if occupied_mask == u64::MAX {
return None;
}
#[cfg(target_arch = "x86_64")]
{
let caps = Bmi2Capabilities::get();
if caps.has_bmi2 {
return unsafe { bmi2_double_hash_probe_hardware(hash, occupied_mask) };
}
}
let hash1 = (hash & 63) as u32;
let hash2 = ((hash >> 32) & 63) as u32;
let step = if hash2 == 0 { 1 } else { hash2 };
for i in 0..64 {
let pos = (hash1 + i * step) & 63;
if (occupied_mask >> pos) & 1 == 0 {
return Some(pos);
}
}
None
}
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "bmi2")]
unsafe fn bmi2_double_hash_probe_hardware(hash: u64, occupied_mask: u64) -> Option<u32> {
use std::arch::x86_64::*;
if occupied_mask == u64::MAX {
return None;
}
let hash1 = unsafe { _bextr_u64(hash, 0, 6) } as u32; let hash2 = unsafe { _bextr_u64(hash, 32, 6) } as u32; let step = if hash2 == 0 { 1 } else { hash2 };
for i in 0..64 {
let pos = (hash1 + i * step) & 63;
let slot_mask = 1u64 << pos;
if (occupied_mask & slot_mask) == 0 {
return Some(pos);
}
}
None
}
pub fn bmi2_load_factor_calculations(
current_size: usize,
element_count: usize,
target_load_factor: f64
) -> LoadFactorInfo {
#[cfg(target_arch = "x86_64")]
{
let caps = Bmi2Capabilities::get();
if caps.has_bmi2 {
return unsafe { bmi2_load_factor_hardware(current_size, element_count, target_load_factor) };
}
}
scalar_load_factor_calculations(current_size, element_count, target_load_factor)
}
#[derive(Debug, Clone)]
pub struct LoadFactorInfo {
pub current_load_factor: f64,
pub should_resize: bool,
pub suggested_new_size: usize,
pub resize_threshold: usize,
}
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "bmi2")]
unsafe fn bmi2_load_factor_hardware(
current_size: usize,
element_count: usize,
target_load_factor: f64
) -> LoadFactorInfo {
use std::arch::x86_64::*;
if current_size == 0 {
return LoadFactorInfo {
current_load_factor: 0.0,
should_resize: true,
suggested_new_size: 16,
resize_threshold: (16.0 * target_load_factor) as usize,
};
}
let size_64 = current_size as u64;
let count_64 = element_count as u64;
let precision_bits = 32; let scaled_count = unsafe { _bzhi_u64(count_64 << precision_bits, 63) } / size_64;
let scaled_target = (target_load_factor * (1u64 << precision_bits) as f64) as u64;
let current_load_factor = element_count as f64 / current_size as f64;
let should_resize = scaled_count > scaled_target;
let suggested_new_size = if should_resize {
bmi2_golden_ratio_next_size(current_size)
} else {
current_size
};
let resize_threshold = (suggested_new_size as f64 * target_load_factor) as usize;
LoadFactorInfo {
current_load_factor,
should_resize,
suggested_new_size,
resize_threshold,
}
}
fn scalar_load_factor_calculations(
current_size: usize,
element_count: usize,
target_load_factor: f64
) -> LoadFactorInfo {
if current_size == 0 {
return LoadFactorInfo {
current_load_factor: 0.0,
should_resize: true,
suggested_new_size: 16,
resize_threshold: (16.0 * target_load_factor) as usize,
};
}
let current_load_factor = element_count as f64 / current_size as f64;
let should_resize = current_load_factor > target_load_factor;
let suggested_new_size = if should_resize {
golden_ratio_next_size(current_size)
} else {
current_size
};
let resize_threshold = (suggested_new_size as f64 * target_load_factor) as usize;
LoadFactorInfo {
current_load_factor,
should_resize,
suggested_new_size,
resize_threshold,
}
}
pub struct Bmi2HashDispatcher {
capabilities: &'static Bmi2Capabilities,
optimization_tier: HashOptimizationTier,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum HashOptimizationTier {
Scalar,
Bmi1,
Bmi2,
Bmi2Avx2,
}
impl Bmi2HashDispatcher {
pub fn new() -> Self {
let capabilities = Bmi2Capabilities::get();
let optimization_tier = Self::determine_optimization_tier(capabilities);
Self {
capabilities,
optimization_tier,
}
}
fn determine_optimization_tier(caps: &Bmi2Capabilities) -> HashOptimizationTier {
if caps.has_bmi2 && caps.simd_caps.cpu_features.has_avx2 {
HashOptimizationTier::Bmi2Avx2
} else if caps.has_bmi2 {
HashOptimizationTier::Bmi2
} else if caps.has_bmi1 {
HashOptimizationTier::Bmi1
} else {
HashOptimizationTier::Scalar
}
}
pub fn tier(&self) -> HashOptimizationTier {
self.optimization_tier
}
pub fn hash_with_acceleration<T: AsRef<[u8]>>(&self, data: T) -> u64 {
let bytes = data.as_ref();
match self.optimization_tier {
HashOptimizationTier::Bmi2Avx2 | HashOptimizationTier::Bmi2 => {
if bytes.len() >= 8 {
fast_string_hash_bmi2(
std::str::from_utf8(bytes).unwrap_or(""),
0
)
} else {
scalar_string_hash_bmi2(bytes, 0)
}
}
HashOptimizationTier::Bmi1 => {
self.hash_with_bmi1_acceleration(bytes)
}
HashOptimizationTier::Scalar => {
self.hash_scalar_fallback(bytes)
}
}
}
pub fn hash_combine_optimal(&self, hash: u64, value: u64) -> u64 {
match self.optimization_tier {
HashOptimizationTier::Bmi2Avx2 | HashOptimizationTier::Bmi2 => {
bmi2_hash_combine_u64(hash, value)
}
HashOptimizationTier::Bmi1 => {
let rotated = hash.rotate_left(5);
let mixed = rotated.wrapping_add(value);
mixed ^ (value.rotate_right(17))
}
HashOptimizationTier::Scalar => {
hash.rotate_left(5).wrapping_add(value)
}
}
}
pub fn extract_bucket_optimal(&self, hash: u64, bucket_bits: u32) -> u32 {
match self.optimization_tier {
HashOptimizationTier::Bmi2Avx2 | HashOptimizationTier::Bmi2 => {
extract_hash_bucket_bmi2(hash, bucket_bits)
}
HashOptimizationTier::Bmi1 | HashOptimizationTier::Scalar => {
if bucket_bits >= 64 {
hash as u32
} else {
(hash & ((1u64 << bucket_bits) - 1)) as u32
}
}
}
}
pub fn resolve_collision_optimal(
&self,
hash: u64,
occupied_mask: u64,
probe_type: ProbeType
) -> Option<u32> {
match self.optimization_tier {
HashOptimizationTier::Bmi2Avx2 | HashOptimizationTier::Bmi2 => {
bmi2_collision_resolution(hash, occupied_mask, probe_type)
}
HashOptimizationTier::Bmi1 | HashOptimizationTier::Scalar => {
match probe_type {
ProbeType::Linear => bmi2_linear_probe(hash, occupied_mask),
ProbeType::Quadratic => bmi2_quadratic_probe(hash, occupied_mask),
ProbeType::DoubleHash => bmi2_double_hash_probe(hash, occupied_mask),
}
}
}
}
fn hash_with_bmi1_acceleration(&self, bytes: &[u8]) -> u64 {
let mut hash = 0u64;
let chunks = bytes.len() / 8;
for i in 0..chunks {
let offset = i * 8;
let chunk_bytes = &bytes[offset..offset + 8];
let val = u64::from_le_bytes(chunk_bytes.try_into().expect("chunk is 8 bytes"));
hash = hash.rotate_left(5).wrapping_add(val);
hash ^= val.rotate_right(13); }
let remaining_start = chunks * 8;
for &byte in &bytes[remaining_start..] {
hash = hash.rotate_left(5).wrapping_add(byte as u64);
}
hash
}
fn hash_scalar_fallback(&self, bytes: &[u8]) -> u64 {
let mut hash = 0u64;
for &byte in bytes {
hash = hash.rotate_left(5).wrapping_add(byte as u64);
}
hash
}
pub fn performance_report(&self) -> HashPerformanceReport {
let dispatcher = Bmi2Dispatcher::new();
let opt_report = dispatcher.optimization_report();
HashPerformanceReport {
optimization_tier: self.optimization_tier,
has_bmi1: self.capabilities.has_bmi1,
has_bmi2: self.capabilities.has_bmi2,
has_avx2: self.capabilities.simd_caps.cpu_features.has_avx2,
estimated_speedups: HashMap::from([
("hash_combine".to_string(), if self.capabilities.has_bmi2 { 3.5 } else { 1.0 }),
("bucket_extract".to_string(), if self.capabilities.has_bmi2 { 2.5 } else { 1.0 }),
("string_hash".to_string(), if self.capabilities.has_bmi2 { 2.8 } else { 1.0 }),
("collision_resolve".to_string(), if self.capabilities.has_bmi2 { 2.2 } else { 1.0 }),
]),
available_operations: opt_report.available_operations,
}
}
}
impl Default for Bmi2HashDispatcher {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug, Clone)]
pub struct HashPerformanceReport {
pub optimization_tier: HashOptimizationTier,
pub has_bmi1: bool,
pub has_bmi2: bool,
pub has_avx2: bool,
pub estimated_speedups: HashMap<String, f64>,
pub available_operations: Vec<&'static str>,
}
pub mod specialized {
use super::*;
pub fn hash_integer_bmi2<T>(value: T) -> u64
where
T: Into<u64> + Copy
{
let val = value.into();
bmi2_hash_combine_u64(0, val)
}
pub fn hash_string_bmi2(s: &str) -> u64 {
fast_string_hash_bmi2(s, 0)
}
pub fn hash_complex_key_bmi2(components: &[u64]) -> u64 {
advanced_hash_combine_bmi2(components)
}
pub fn hash_float_bmi2(value: f64) -> u64 {
let bits = value.to_bits();
bmi2_hash_combine_u64(0, bits)
}
pub fn hash_tuple_bmi2<T, U>(first: T, second: U) -> u64
where
T: Into<u64> + Copy,
U: Into<u64> + Copy,
{
let components = [first.into(), second.into()];
advanced_hash_combine_bmi2(&components)
}
}
static GLOBAL_BMI2_DISPATCHER: std::sync::OnceLock<Bmi2HashDispatcher> = std::sync::OnceLock::new();
pub fn get_global_bmi2_dispatcher() -> &'static Bmi2HashDispatcher {
GLOBAL_BMI2_DISPATCHER.get_or_init(|| Bmi2HashDispatcher::new())
}
pub fn hash_with_bmi2<T: AsRef<[u8]>>(data: T) -> u64 {
get_global_bmi2_dispatcher().hash_with_acceleration(data)
}
pub fn hash_combine_with_bmi2(hash: u64, value: u64) -> u64 {
get_global_bmi2_dispatcher().hash_combine_optimal(hash, value)
}
pub fn extract_bucket_with_bmi2(hash: u64, bucket_bits: u32) -> u32 {
get_global_bmi2_dispatcher().extract_bucket_optimal(hash, bucket_bits)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_fabo_hash_combine_u32() {
let hash = 0x12345678u32;
let value = 0xabcdef00u32;
let result = fabo_hash_combine_u32(hash, value);
assert_ne!(result, hash);
assert_ne!(result, value);
assert_eq!(result, fabo_hash_combine_u32(hash, value));
}
#[test]
fn test_fabo_hash_combine_u64() {
let hash = 0x123456789abcdef0u64;
let value = 0xfedcba9876543210u64;
let result = fabo_hash_combine_u64(hash, value);
assert_ne!(result, hash);
assert_ne!(result, value);
assert_eq!(result, fabo_hash_combine_u64(hash, value));
}
#[test]
fn test_golden_ratio_next_size() {
assert_eq!(golden_ratio_next_size(0), 16);
assert_eq!(golden_ratio_next_size(1), 2); assert_eq!(golden_ratio_next_size(64), 104);
let size1 = 100;
let size2 = golden_ratio_next_size(size1);
assert!(size2 > size1);
let ratio = size2 as f64 / size1 as f64;
assert!(ratio > 1.5 && ratio < 1.7); }
#[test]
fn test_optimal_bucket_count() {
assert_eq!(optimal_bucket_count(0), 16);
for capacity in [1, 10, 100, 1000] {
let bucket_count = optimal_bucket_count(capacity);
assert!(bucket_count.is_power_of_two());
assert!(bucket_count >= capacity);
}
}
#[test]
fn test_advanced_hash_combine() {
let hashes = [0x123456789abcdef0u64, 0xfedcba9876543210u64, 0x0f0f0f0f0f0f0f0fu64];
let result = advanced_hash_combine(&hashes);
for &hash in &hashes {
assert_ne!(result, hash);
}
assert_eq!(result, advanced_hash_combine(&hashes));
assert_eq!(advanced_hash_combine(&[]), 0);
let single_result = advanced_hash_combine(&[hashes[0]]);
assert_ne!(single_result, hashes[0]);
}
#[test]
fn test_hash_function_builder() {
let builder = HashFunctionBuilder::new()
.with_rotation(7)
.with_strategy(CombineStrategy::Fabo);
let hash_fn = builder.build_u32();
let result = hash_fn(0x12345678u32, 0xabcdef00u32);
assert_eq!(result, hash_fn(0x12345678u32, 0xabcdef00u32));
}
#[test]
fn test_hash_combinable_trait() {
let hash = 0x12345678u32;
let value = 0xabcdef00u32;
let result1 = value.fabo_combine(hash);
let result2 = fabo_hash_combine_u32(hash, value);
assert_eq!(result1, result2);
}
#[test]
fn test_combine_strategies() {
let test_cases = [
(0x12345678u32, 0xabcdef00u32),
(0x87654321u32, 0x00fedcbau32),
(0xfedcba98u32, 0x12345678u32),
];
for (hash, value) in test_cases.iter() {
let addition = HashFunctionBuilder::new()
.with_strategy(CombineStrategy::Addition)
.build_u32()(*hash, *value);
let xor = HashFunctionBuilder::new()
.with_strategy(CombineStrategy::Xor)
.build_u32()(*hash, *value);
let fabo = HashFunctionBuilder::new()
.with_strategy(CombineStrategy::Fabo)
.build_u32()(*hash, *value);
let advanced = HashFunctionBuilder::new()
.with_strategy(CombineStrategy::Advanced)
.build_u32()(*hash, *value);
assert_ne!(addition, 0);
assert_ne!(xor, 0);
assert_ne!(fabo, 0);
assert_ne!(advanced, 0);
let addition2 = HashFunctionBuilder::new()
.with_strategy(CombineStrategy::Addition)
.build_u32()(*hash, *value);
assert_eq!(addition, addition2);
}
}
#[test]
fn test_bmi2_hash_combine_functions() {
let test_cases = [
(0x12345678u32, 0xabcdef00u32),
(0x87654321u32, 0x00fedcbau32),
(0xfedcba98u32, 0x12345678u32),
(0u32, 0xffffffffu32),
(0xffffffffu32, 0u32),
];
for (hash, value) in test_cases.iter() {
let result1 = bmi2_hash_combine_u32(*hash, *value);
let result2 = bmi2_hash_combine_u32(*hash, *value);
assert_eq!(result1, result2);
if *hash != *value && *hash != 0 && *value != 0 {
assert_ne!(result1, *hash);
assert_ne!(result1, *value);
}
}
let test_cases_u64 = [
(0x123456789abcdef0u64, 0xfedcba9876543210u64),
(0x0u64, 0xffffffffffffffffu64),
(0xffffffffffffffffu64, 0x0u64),
(0x5555555555555555u64, 0xaaaaaaaaaaaaaaaa_u64),
];
for (hash, value) in test_cases_u64.iter() {
let result1 = bmi2_hash_combine_u64(*hash, *value);
let result2 = bmi2_hash_combine_u64(*hash, *value);
assert_eq!(result1, result2);
if *hash != *value && *hash != 0 && *value != 0 {
assert_ne!(result1, *hash);
assert_ne!(result1, *value);
}
}
}
#[test]
fn test_bmi2_strategy_in_builder() {
let test_cases = [
(0x12345678u32, 0xabcdef00u32),
(0x87654321u32, 0x00fedcbau32),
(0xfedcba98u32, 0x12345678u32),
];
for (hash, value) in test_cases.iter() {
let bmi2_result = HashFunctionBuilder::new()
.with_strategy(CombineStrategy::Bmi2)
.build_u32()(*hash, *value);
let bmi2_result2 = HashFunctionBuilder::new()
.with_strategy(CombineStrategy::Bmi2)
.build_u32()(*hash, *value);
assert_eq!(bmi2_result, bmi2_result2);
let hash64 = *hash as u64;
let value64 = *value as u64;
let bmi2_result64 = HashFunctionBuilder::new()
.with_strategy(CombineStrategy::Bmi2)
.build_u64()(hash64, value64);
let bmi2_result64_2 = HashFunctionBuilder::new()
.with_strategy(CombineStrategy::Bmi2)
.build_u64()(hash64, value64);
assert_eq!(bmi2_result64, bmi2_result64_2);
}
}
#[test]
fn test_bmi2_golden_ratio_functions() {
assert_eq!(bmi2_golden_ratio_next_size(0), 16);
assert_eq!(bmi2_golden_ratio_next_size(1), 2);
assert_eq!(bmi2_golden_ratio_next_size(64), 104);
let size1 = 100;
let size2 = bmi2_golden_ratio_next_size(size1);
assert!(size2 > size1);
let ratio = size2 as f64 / size1 as f64;
assert!(ratio > 1.5 && ratio < 1.7);
assert_eq!(bmi2_optimal_bucket_count(0), 16);
for capacity in [1, 10, 100, 1000] {
let bucket_count = bmi2_optimal_bucket_count(capacity);
assert!(bucket_count.is_power_of_two());
assert!(bucket_count >= capacity);
}
}
#[test]
fn test_hash_bucket_extraction() {
let test_hashes = [
0x123456789abcdef0u64,
0xfedcba9876543210u64,
0x0u64,
0xffffffffffffffffu64,
0x5555555555555555u64,
0xaaaaaaaaaaaaaaaa_u64,
];
for &hash in &test_hashes {
for bucket_bits in [1, 4, 8, 16, 24, 32] {
let bucket = extract_hash_bucket_bmi2(hash, bucket_bits);
if bucket_bits < 32 {
assert!(bucket < (1u32 << bucket_bits));
} else {
assert!(bucket <= u32::MAX);
}
let bucket2 = extract_hash_bucket_bmi2(hash, bucket_bits);
assert_eq!(bucket, bucket2);
}
}
let buckets = extract_hash_buckets_bulk_bmi2(&test_hashes, 8);
assert_eq!(buckets.len(), test_hashes.len());
for (i, &bucket) in buckets.iter().enumerate() {
let individual_bucket = extract_hash_bucket_bmi2(test_hashes[i], 8);
assert_eq!(bucket, individual_bucket);
assert!(bucket < 256); }
}
#[test]
fn test_advanced_hash_combine_bmi2() {
assert_eq!(advanced_hash_combine_bmi2(&[]), 0);
let single_hash = advanced_hash_combine_bmi2(&[0x123456789abcdef0u64]);
assert_ne!(single_hash, 0x123456789abcdef0u64);
let hashes = [
0x123456789abcdef0u64,
0xfedcba9876543210u64,
0x0f0f0f0f0f0f0f0fu64,
];
let combined = advanced_hash_combine_bmi2(&hashes);
let combined2 = advanced_hash_combine_bmi2(&hashes);
assert_eq!(combined, combined2);
for &hash in &hashes {
assert_ne!(combined, hash);
}
let hashes_reversed = [hashes[2], hashes[1], hashes[0]];
let combined_reversed = advanced_hash_combine_bmi2(&hashes_reversed);
assert_ne!(combined, combined_reversed);
}
#[test]
fn test_fast_string_hash_bmi2() {
let test_strings = [
"",
"a",
"hello",
"hello world",
"The quick brown fox jumps over the lazy dog",
&"A".repeat(100),
"混合UTF-8字符串测试",
];
for test_str in &test_strings {
let hash1 = fast_string_hash_bmi2(test_str, 0);
let hash2 = fast_string_hash_bmi2(test_str, 0);
assert_eq!(hash1, hash2);
if !test_str.is_empty() {
let hash_with_base = fast_string_hash_bmi2(test_str, 0x123456789abcdef0u64);
assert_ne!(hash1, hash_with_base);
}
}
let hash_hello = fast_string_hash_bmi2("hello", 0);
let hash_world = fast_string_hash_bmi2("world", 0);
assert_ne!(hash_hello, hash_world);
}
#[test]
fn test_collision_resolution() {
let test_hash = 0x123456789abcdef0u64;
let empty_mask = 0u64;
let linear_result = bmi2_collision_resolution(test_hash, empty_mask, ProbeType::Linear);
assert_eq!(linear_result, Some(0));
let quadratic_result = bmi2_collision_resolution(test_hash, empty_mask, ProbeType::Quadratic);
assert!(quadratic_result.is_some());
let double_hash_result = bmi2_collision_resolution(test_hash, empty_mask, ProbeType::DoubleHash);
assert!(double_hash_result.is_some());
let full_mask = u64::MAX;
let linear_full = bmi2_collision_resolution(test_hash, full_mask, ProbeType::Linear);
assert_eq!(linear_full, None);
let quadratic_full = bmi2_collision_resolution(test_hash, full_mask, ProbeType::Quadratic);
assert_eq!(quadratic_full, None);
let double_hash_full = bmi2_collision_resolution(test_hash, full_mask, ProbeType::DoubleHash);
assert_eq!(double_hash_full, None);
let partial_mask = 0x0F0F0F0F0F0F0F0Fu64;
let linear_partial = bmi2_collision_resolution(test_hash, partial_mask, ProbeType::Linear);
assert!(linear_partial.is_some());
if let Some(pos) = linear_partial {
assert_eq!((partial_mask >> pos) & 1, 0);
}
}
#[test]
fn test_load_factor_calculations() {
let info = bmi2_load_factor_calculations(0, 0, 0.75);
assert_eq!(info.current_load_factor, 0.0);
assert!(info.should_resize);
assert_eq!(info.suggested_new_size, 16);
let info = bmi2_load_factor_calculations(100, 50, 0.75);
assert_eq!(info.current_load_factor, 0.5);
assert!(!info.should_resize); assert_eq!(info.suggested_new_size, 100);
let info = bmi2_load_factor_calculations(100, 80, 0.75);
assert!(info.current_load_factor > 0.75);
assert!(info.should_resize);
assert!(info.suggested_new_size > 100);
assert!(info.resize_threshold > 0);
assert!(info.resize_threshold as f64 <= info.suggested_new_size as f64 * 0.75);
}
#[test]
fn test_bmi2_hash_dispatcher() {
let dispatcher = Bmi2HashDispatcher::new();
let tier = dispatcher.tier();
println!("BMI2 Hash Dispatcher tier: {:?}", tier);
let test_data = b"hello world test data";
let hash1 = dispatcher.hash_with_acceleration(test_data);
let hash2 = dispatcher.hash_with_acceleration(test_data);
assert_eq!(hash1, hash2);
let hash3 = dispatcher.hash_with_acceleration(b"different data");
assert_ne!(hash1, hash3);
let combined1 = dispatcher.hash_combine_optimal(0x123456789abcdef0u64, 0xfedcba9876543210u64);
let combined2 = dispatcher.hash_combine_optimal(0x123456789abcdef0u64, 0xfedcba9876543210u64);
assert_eq!(combined1, combined2);
let bucket1 = dispatcher.extract_bucket_optimal(0x123456789abcdef0u64, 8);
let bucket2 = dispatcher.extract_bucket_optimal(0x123456789abcdef0u64, 8);
assert_eq!(bucket1, bucket2);
assert!(bucket1 < 256);
let collision_result = dispatcher.resolve_collision_optimal(
0x123456789abcdef0u64,
0x0F0F0F0F0F0F0F0Fu64,
ProbeType::Linear
);
assert!(collision_result.is_some());
let report = dispatcher.performance_report();
println!("BMI2 Hash Performance Report: {:?}", report);
assert!(!report.estimated_speedups.is_empty());
assert!(!report.available_operations.is_empty());
}
#[test]
fn test_specialized_hash_functions() {
use specialized::*;
let int_hash_32 = hash_integer_bmi2(42u32);
let int_hash_64 = hash_integer_bmi2(42u64);
assert_ne!(int_hash_32, 0);
assert_ne!(int_hash_64, 0);
let str_hash = hash_string_bmi2("test string");
assert_ne!(str_hash, 0);
let str_hash2 = fast_string_hash_bmi2("test string", 0);
assert_eq!(str_hash, str_hash2);
let components = [0x123456789abcdef0u64, 0xfedcba9876543210u64, 0x0f0f0f0f0f0f0f0fu64];
let complex_hash = hash_complex_key_bmi2(&components);
assert_ne!(complex_hash, 0);
let float_hash = hash_float_bmi2(3.14159);
assert_ne!(float_hash, 0);
let float_hash2 = hash_float_bmi2(2.71828);
assert_ne!(float_hash, float_hash2);
let tuple_hash = hash_tuple_bmi2(42u32, 84u64);
assert_ne!(tuple_hash, 0);
let tuple_hash2 = hash_tuple_bmi2(84u32, 42u64);
assert_ne!(tuple_hash, tuple_hash2);
}
#[test]
fn test_global_bmi2_functions() {
let dispatcher1 = get_global_bmi2_dispatcher();
let dispatcher2 = get_global_bmi2_dispatcher();
assert_eq!(dispatcher1.tier(), dispatcher2.tier());
let test_data = b"global test data";
let hash1 = hash_with_bmi2(test_data);
let hash2 = hash_with_bmi2(test_data);
assert_eq!(hash1, hash2);
let combined = hash_combine_with_bmi2(hash1, 0xdeadbeefcafebabeu64);
assert_ne!(combined, hash1);
let bucket = extract_bucket_with_bmi2(combined, 8);
assert!(bucket < 256);
}
#[test]
fn test_bmi2_performance_characteristics() {
let small_data = b"small";
let medium_data = "medium size data string with more content".repeat(10);
let large_data = "large data content".repeat(1000);
let dispatcher = Bmi2HashDispatcher::new();
let _hash_small = dispatcher.hash_with_acceleration(small_data);
let _hash_medium = dispatcher.hash_with_acceleration(medium_data.as_bytes());
let _hash_large = dispatcher.hash_with_acceleration(large_data.as_bytes());
let test_hashes: Vec<u64> = (0..1000).map(|i| (i as u64).wrapping_mul(0x123456789abcdef)).collect();
let buckets = extract_hash_buckets_bulk_bmi2(&test_hashes, 8);
assert_eq!(buckets.len(), 1000);
for bucket in buckets {
assert!(bucket < 256);
}
let combined = advanced_hash_combine_bmi2(&test_hashes[0..10]);
assert_ne!(combined, 0);
}
#[test]
fn test_edge_cases() {
assert_eq!(bmi2_hash_combine_u32(0, 0), bmi2_hash_combine_u32(0, 0));
assert_eq!(bmi2_hash_combine_u64(0, 0), bmi2_hash_combine_u64(0, 0));
let max_u32 = u32::MAX;
let max_u64 = u64::MAX;
let max_result_32 = bmi2_hash_combine_u32(max_u32, max_u32);
assert_ne!(max_result_32, max_u32);
let max_result_64 = bmi2_hash_combine_u64(max_u64, max_u64);
assert_ne!(max_result_64, max_u64);
assert_eq!(extract_hash_bucket_bmi2(0, 1), 0);
assert_eq!(extract_hash_bucket_bmi2(1, 1), 1);
assert_eq!(extract_hash_bucket_bmi2(max_u64, 64), max_u64 as u32);
assert_eq!(fast_string_hash_bmi2("", 0), 0);
let single_char = fast_string_hash_bmi2("a", 0);
assert_ne!(single_char, 0);
let info = bmi2_load_factor_calculations(1, 0, 0.75);
assert_eq!(info.current_load_factor, 0.0);
assert!(!info.should_resize);
}
}