use std::collections::HashMap;
use std::hash::{BuildHasherDefault, Hasher};
use std::sync::RwLock;
use crate::{Error, Result};
const SHARDS: usize = 64;
const SHARD_MASK: u64 = (SHARDS as u64) - 1;
#[derive(Default)]
pub(crate) struct IdentityHasher(u64);
impl Hasher for IdentityHasher {
#[inline]
fn finish(&self) -> u64 {
self.0
}
#[inline]
fn write(&mut self, _bytes: &[u8]) {
}
#[inline]
fn write_u64(&mut self, value: u64) {
self.0 = value;
}
}
type IdentityBuildHasher = BuildHasherDefault<IdentityHasher>;
type ShardMap = HashMap<u64, Slot, IdentityBuildHasher>;
#[derive(Debug, Clone)]
enum Slot {
Single(u64),
Multi(Vec<(Vec<u8>, u64)>),
}
pub(crate) type KeyHash = u64;
#[derive(Debug)]
pub(crate) struct Index {
shards: Box<[RwLock<ShardMap>; SHARDS]>,
}
impl Default for Index {
fn default() -> Self {
Self::new()
}
}
impl Index {
#[must_use]
pub(crate) fn new() -> Self {
let shards: [RwLock<ShardMap>; SHARDS] =
std::array::from_fn(|_| RwLock::new(ShardMap::default()));
Self {
shards: Box::new(shards),
}
}
#[inline]
#[must_use]
pub(crate) fn hash_key(key: &[u8]) -> KeyHash {
const SEED: u64 = 0x51_7c_c1_b7_27_22_0a_95;
const ROTATE: u32 = 5;
let mut hash = 0_u64;
let mut bytes = key;
while bytes.len() >= 8 {
let mut block = [0_u8; 8];
block.copy_from_slice(&bytes[..8]);
let word = u64::from_le_bytes(block);
hash = (hash.rotate_left(ROTATE) ^ word).wrapping_mul(SEED);
bytes = &bytes[8..];
}
if bytes.len() >= 4 {
let mut block = [0_u8; 4];
block.copy_from_slice(&bytes[..4]);
let word = u32::from_le_bytes(block) as u64;
hash = (hash.rotate_left(ROTATE) ^ word).wrapping_mul(SEED);
bytes = &bytes[4..];
}
for &b in bytes {
hash = (hash.rotate_left(ROTATE) ^ (b as u64)).wrapping_mul(SEED);
}
hash
}
pub(crate) fn get(&self, hash: KeyHash, key: &[u8]) -> Result<Option<u64>> {
let shard_idx = (hash & SHARD_MASK) as usize;
let shard = self.shards[shard_idx]
.read()
.map_err(|_poisoned| Error::LockPoisoned)?;
match shard.get(&hash) {
None => Ok(None),
Some(Slot::Single(offset)) => Ok(Some(*offset)),
Some(Slot::Multi(entries)) => {
for (k, off) in entries {
if k.as_slice() == key {
return Ok(Some(*off));
}
}
Ok(None)
}
}
}
pub(crate) fn replace<F>(
&self,
hash: KeyHash,
key: &[u8],
offset: u64,
mut resolve_existing: F,
) -> Result<Option<u64>>
where
F: FnMut(u64) -> Result<Option<Vec<u8>>>,
{
let shard_idx = (hash & SHARD_MASK) as usize;
let mut shard = self.shards[shard_idx]
.write()
.map_err(|_poisoned| Error::LockPoisoned)?;
match shard.get_mut(&hash) {
None => {
let _prev = shard.insert(hash, Slot::Single(offset));
Ok(None)
}
Some(slot) => match slot {
Slot::Single(existing_offset) => {
let existing = *existing_offset;
match resolve_existing(existing)? {
Some(existing_key) if existing_key.as_slice() == key => {
*existing_offset = offset;
Ok(Some(existing))
}
Some(existing_key) => {
*slot =
Slot::Multi(vec![(existing_key, existing), (key.to_vec(), offset)]);
Ok(None)
}
None => {
*existing_offset = offset;
Ok(Some(existing))
}
}
}
Slot::Multi(entries) => {
for entry in entries.iter_mut() {
if entry.0.as_slice() == key {
let prev = entry.1;
entry.1 = offset;
return Ok(Some(prev));
}
}
entries.push((key.to_vec(), offset));
Ok(None)
}
},
}
}
pub(crate) fn remove(&self, hash: KeyHash, key: &[u8]) -> Result<Option<u64>> {
let shard_idx = (hash & SHARD_MASK) as usize;
let mut shard = self.shards[shard_idx]
.write()
.map_err(|_poisoned| Error::LockPoisoned)?;
match shard.get_mut(&hash) {
None => Ok(None),
Some(slot) => match slot {
Slot::Single(offset) => {
let prev = *offset;
let _removed = shard.remove(&hash);
Ok(Some(prev))
}
Slot::Multi(entries) => {
let mut matched: Option<u64> = None;
entries.retain(|(k, off)| {
if matched.is_none() && k.as_slice() == key {
matched = Some(*off);
false
} else {
true
}
});
if entries.is_empty() {
let _removed = shard.remove(&hash);
} else if entries.len() == 1 {
let (_k, off) = entries[0].clone();
*slot = Slot::Single(off);
}
Ok(matched)
}
},
}
}
pub(crate) fn len(&self) -> Result<usize> {
let mut total = 0;
for shard in self.shards.iter() {
let guard = shard.read().map_err(|_poisoned| Error::LockPoisoned)?;
for slot in guard.values() {
total += match slot {
Slot::Single(_) => 1,
Slot::Multi(entries) => entries.len(),
};
}
}
Ok(total)
}
pub(crate) fn clear(&self) -> Result<()> {
for shard in self.shards.iter() {
let mut guard = shard.write().map_err(|_poisoned| Error::LockPoisoned)?;
guard.clear();
}
Ok(())
}
pub(crate) fn collect_offsets(&self) -> Result<Vec<u64>> {
let mut out = Vec::new();
for shard in self.shards.iter() {
let guard = shard.read().map_err(|_poisoned| Error::LockPoisoned)?;
for slot in guard.values() {
match slot {
Slot::Single(off) => out.push(*off),
Slot::Multi(entries) => {
for (_, off) in entries {
out.push(*off);
}
}
}
}
}
Ok(out)
}
}
#[cfg(test)]
mod tests {
use super::*;
fn no_resolver(_offset: u64) -> Result<Option<Vec<u8>>> {
Ok(None)
}
#[test]
fn insert_and_get_round_trip() {
let idx = Index::new();
let h = Index::hash_key(b"alpha");
assert!(idx
.replace(h, b"alpha", 100, no_resolver)
.unwrap()
.is_none());
assert_eq!(idx.get(h, b"alpha").unwrap(), Some(100));
}
#[test]
fn replace_returns_previous_offset() {
let idx = Index::new();
let h = Index::hash_key(b"alpha");
let _ = idx.replace(h, b"alpha", 100, no_resolver).unwrap();
let resolver = |_off: u64| Ok(Some(b"alpha".to_vec()));
let prev = idx.replace(h, b"alpha", 200, resolver).unwrap();
assert_eq!(prev, Some(100));
assert_eq!(idx.get(h, b"alpha").unwrap(), Some(200));
}
#[test]
fn remove_drops_entry() {
let idx = Index::new();
let h = Index::hash_key(b"alpha");
let _ = idx.replace(h, b"alpha", 100, no_resolver).unwrap();
let prev = idx.remove(h, b"alpha").unwrap();
assert_eq!(prev, Some(100));
assert_eq!(idx.get(h, b"alpha").unwrap(), None);
}
#[test]
fn hash_collision_disambiguates_by_key() {
let idx = Index::new();
let _ = idx.replace(42, b"first", 100, no_resolver).unwrap();
let resolver = |_off: u64| Ok(Some(b"first".to_vec()));
let _ = idx.replace(42, b"second", 200, resolver).unwrap();
assert_eq!(idx.get(42, b"first").unwrap(), Some(100));
assert_eq!(idx.get(42, b"second").unwrap(), Some(200));
assert_eq!(idx.get(42, b"third").unwrap(), None);
}
#[test]
fn len_reflects_total_entries_across_shards() {
let idx = Index::new();
for i in 0_u32..200 {
let key = format!("k{i:04}");
let h = Index::hash_key(key.as_bytes());
let _ = idx
.replace(h, key.as_bytes(), i as u64, no_resolver)
.unwrap();
}
assert_eq!(idx.len().unwrap(), 200);
}
#[test]
fn clear_empties_every_shard() {
let idx = Index::new();
for i in 0_u32..50 {
let key = format!("k{i}");
let h = Index::hash_key(key.as_bytes());
let _ = idx
.replace(h, key.as_bytes(), i as u64, no_resolver)
.unwrap();
}
idx.clear().unwrap();
assert_eq!(idx.len().unwrap(), 0);
}
#[test]
fn fxhash_is_deterministic() {
let h1 = Index::hash_key(b"deterministic");
let h2 = Index::hash_key(b"deterministic");
assert_eq!(h1, h2);
let h3 = Index::hash_key(b"different");
assert_ne!(h1, h3);
}
}