use super::{MARKER_CONFLICT, MARKER_FREE, calculate_bucket_position};
use byteorder::WriteBytesExt;
pub const MAX_POINTERS_FOR_HASH_INDEX: usize = 254;
#[derive(Debug)]
pub struct Builder(Vec<u8>);
impl Builder {
#[must_use]
pub fn with_bucket_count(bucket_count: u32) -> Self {
Self(vec![MARKER_FREE; bucket_count as usize])
}
#[must_use]
pub fn with_hash_ratio(item_count: usize, hash_ratio: f32) -> Self {
Self::with_bucket_count(Self::calculate_bucket_count(item_count, hash_ratio))
}
fn calculate_bucket_count(item_count: usize, hash_ratio: f32) -> u32 {
assert!(
hash_ratio.is_sign_positive(),
"hash_ratio may not be negative",
);
if hash_ratio > 0.0 {
#[expect(
clippy::cast_precision_loss,
reason = "item count tends to be some thousands at most, so f32 should be precise enough"
)]
let item_count = item_count as f32;
#[expect(
clippy::cast_sign_loss,
clippy::cast_possible_truncation,
reason = "buckets tend to be in the range of a couple of hundreds to thousands, so easily fits into u32"
)]
let bucket_count = (item_count * hash_ratio) as u32;
1.max(bucket_count)
} else {
0
}
}
#[must_use]
#[expect(
clippy::cast_possible_truncation,
reason = "we know the hash index has a bucket count <= u8"
)]
pub fn bucket_count(&self) -> u32 {
self.0.len() as u32
}
pub fn set(&mut self, key: &[u8], binary_index_pos: u8) -> bool {
debug_assert!(
binary_index_pos <= 253,
"restart index too high for hash index"
);
assert!(self.bucket_count() > 0, "no buckets to insert into");
let bucket_pos = calculate_bucket_position(key, self.bucket_count());
#[expect(unsafe_code, reason = "see safety")]
let curr_marker = unsafe { *self.0.get_unchecked(bucket_pos) };
match curr_marker {
MARKER_CONFLICT => false,
MARKER_FREE => {
#[expect(unsafe_code, reason = "see safety")]
unsafe {
*self.0.get_unchecked_mut(bucket_pos) = binary_index_pos;
}
true
}
x if x == binary_index_pos => {
true
}
_ => {
#[expect(unsafe_code, reason = "see safety")]
unsafe {
*self.0.get_unchecked_mut(bucket_pos) = MARKER_CONFLICT;
}
false
}
}
}
#[must_use]
#[doc(hidden)]
pub fn into_inner(self) -> Vec<u8> {
self.0
}
pub fn write<W: std::io::Write>(self, writer: &mut W) -> std::io::Result<()> {
for byte in self.0 {
writer.write_u8(byte)?;
}
Ok(())
}
}