const BUCKET_VALUE_OFFSET: usize = 1;
#[derive(Clone, Debug, Default)]
pub(crate) struct BucketValue{
hash:usize,
value:usize,
}
impl BucketValue {
fn get_value_unchecked(&self) -> usize {
self.value - BUCKET_VALUE_OFFSET
}
#[inline]
pub(crate) fn get(&self,hash:usize) -> Option<usize> {
if self.value == 0 {
return None;
}
if self.hash == hash {
Some(self.value - BUCKET_VALUE_OFFSET)
} else {
None
}
}
#[inline]
pub(crate) fn set(&mut self, hash:usize, value: usize) -> Result<Option<usize>,(usize,usize)> {
if self.value == 0 {
self.hash = hash;
self.value = value + BUCKET_VALUE_OFFSET;
return Ok(None);
}
if self.hash == hash {
let old = self.value;
self.value = value + BUCKET_VALUE_OFFSET;
return Ok(Some(old - BUCKET_VALUE_OFFSET));
}
let old = (self.hash, self.value - BUCKET_VALUE_OFFSET);
self.hash = hash;
self.value = value + BUCKET_VALUE_OFFSET;
Err(old)
}
}
#[derive(Clone, Debug)]
pub(crate) struct BasicHashTable {
shift_amount: usize, mod_mask: usize, buckets: Vec<BucketValue>, }
impl BasicHashTable{
pub(crate) fn new(min_capacity: usize, interpret_hash_as_32_bit:bool) -> Self {
let num_bits = determine_hash_table_size_bits(min_capacity);
let table_size = 1 << num_bits;
let mod_mask = table_size - 1;
let base_shift = if interpret_hash_as_32_bit {4} else {std::mem::size_of::<usize>()};
let shift_amount = (base_shift * 8) - num_bits;
let buckets = vec![BucketValue::default(); table_size];
Self {
mod_mask,
shift_amount,
buckets,
}
}
}
impl BasicHashTable {
#[inline]
pub(crate) fn get(&self,hash:usize)->Option<usize>{
let idx = get_bucket_idx(hash, self.shift_amount, self.mod_mask);
self.buckets[idx].get(hash)
}
#[inline]
pub(crate) fn insert(&mut self, hash: usize, position: usize) -> Result<Option<usize>,(usize,usize)> {
let idx = get_bucket_idx(hash, self.shift_amount, self.mod_mask);
self.buckets[idx].set(hash, position)
}
}
fn determine_hash_table_size_bits(slots: usize) -> usize {
((slots+1).next_power_of_two().trailing_zeros() as usize)
.clamp(3, std::mem::size_of::<usize>() * 8) - 1
}
#[inline(always)]
fn get_bucket_idx(checksum: usize, shift_amt:usize, mod_mask:usize) -> usize {
(checksum >> shift_amt) ^ (checksum & mod_mask) }
#[derive(Clone, Debug)]
pub(crate) struct ChainList {
positions: Vec<BucketValue>, mod_mask: usize, }
impl ChainList {
pub(crate) fn new(capacity: usize) -> Self {
if capacity == 0 {
return Self {
positions: Vec::new(),
mod_mask: 0,
};
}
assert_eq!(capacity & (capacity - 1), 0, "Prev Table Capacity ({}) is not a power of 2", capacity);
Self {
positions: vec![BucketValue::default(); capacity],
mod_mask: capacity - 1,
}
}
#[inline]
fn get_prev_pos(&self, last_pos: usize) -> Option<&BucketValue> {
if self.positions.is_empty() {
return None;
}
let prev_idx_value = &self.positions[last_pos & self.mod_mask];
if prev_idx_value.value == 0 { return None;
}
Some(prev_idx_value)
}
#[inline]
pub(crate) fn insert(&mut self, hash:usize, key_position: usize, position: usize) {
if self.positions.is_empty() {return;}
let _ = self.positions.get_mut(key_position & self.mod_mask).unwrap().set(hash, position);
}
pub(crate) fn iter_prev_starts<'a>(&'a self, last_pos: usize, cur_out_pos: usize, hash_value:usize) -> PrevPositionIterator<'a>{
PrevPositionIterator::new(&self, last_pos, cur_out_pos, hash_value)
}
}
pub(crate) struct PrevPositionIterator<'a> {
list: &'a ChainList,
last_pos: usize, cur_out_pos: usize, hash_value: usize, }
impl<'a> PrevPositionIterator<'a> {
fn new(list: &'a ChainList, last_pos: usize,cur_out_pos:usize, hash_value:usize) -> Self {
if list.positions.is_empty() {
return Self {
list,
last_pos,
cur_out_pos,
hash_value: 0,
};
}
Self {
list,
last_pos,
cur_out_pos,
hash_value,
}
}
}
impl<'a> Iterator for PrevPositionIterator<'a> {
type Item = usize;
fn next(&mut self) -> Option<Self::Item> {
loop{
let populated_bucket = self.list.get_prev_pos(self.last_pos)?;
let prev_pos = populated_bucket.get_value_unchecked(); if prev_pos > self.last_pos {
return None; }
self.last_pos = prev_pos;
let diff_pos = self.cur_out_pos - self.last_pos;
if diff_pos >= self.list.positions.len(){
return None;
}
if populated_bucket.hash == self.hash_value {
return Some(prev_pos);
}
}
}
}