use alloc::vec;
use alloc::vec::Vec;
#[derive(Copy, Clone, Debug, Default, PartialEq, Eq)]
pub(crate) struct LdmEntry {
pub(crate) offset: u32,
pub(crate) checksum: u32,
}
const REBASE_GUARD_BAND: u32 = 1u32 << 30;
pub(crate) struct LdmHashTable {
entries: Vec<LdmEntry>,
bucket_offsets: Vec<u8>,
effective_bucket_log: u32,
bucket_mask: u32,
position_base: usize,
}
impl LdmHashTable {
pub(crate) fn new(hash_log: u32, bucket_size_log: u32) -> Self {
assert!(hash_log > 0, "hash_log must be > 0");
assert!(
hash_log <= 30,
"hash_log {hash_log} exceeds donor ZSTD_LDM_HASHLOG_MAX (30)"
);
let effective_bucket_log = bucket_size_log.min(hash_log);
assert!(
effective_bucket_log <= 8,
"effective bucket_size_log {effective_bucket_log} exceeds donor \
ZSTD_LDM_BUCKETSIZELOG_MAX (8); a u8 cursor would silently \
truncate at 256 slots — widen the cursor or clamp the request"
);
let bucket_count = 1u32 << (hash_log - effective_bucket_log);
let total_entries = 1usize << hash_log;
Self {
entries: vec![LdmEntry::default(); total_entries],
bucket_offsets: vec![0u8; bucket_count as usize],
effective_bucket_log,
bucket_mask: bucket_count - 1,
position_base: 0,
}
}
pub(crate) fn clear(&mut self) {
self.entries.fill(LdmEntry::default());
self.bucket_offsets.fill(0);
self.position_base = 0;
}
pub(crate) const fn bucket_count(&self) -> usize {
self.bucket_mask as usize + 1
}
pub(crate) const fn bucket_slots(&self) -> usize {
1usize << self.effective_bucket_log
}
pub(crate) fn bucket(&self, hash_id: u32) -> &[LdmEntry] {
let start = (hash_id as usize) << self.effective_bucket_log;
let len = self.bucket_slots();
&self.entries[start..start + len]
}
pub(crate) fn insert(&mut self, hash_id: u32, entry: LdmEntry) {
assert!(
entry.offset != 0,
"offset 0 is reserved for the empty-slot sentinel; \
use `insert_absolute` (which applies the +1 bias) or \
store a +1-biased relative offset before calling insert"
);
debug_assert!(
hash_id <= self.bucket_mask,
"hash_id {hash_id} out of range (bucket_count = {})",
self.bucket_count()
);
let slot_mask = self.bucket_slots() - 1;
let bucket_start = (hash_id as usize) << self.effective_bucket_log;
let offset = self.bucket_offsets[hash_id as usize] as usize;
self.entries[bucket_start + offset] = entry;
let next = (offset + 1) & slot_mask;
self.bucket_offsets[hash_id as usize] = next as u8;
}
pub(crate) const fn bucket_mask(&self) -> u32 {
self.bucket_mask
}
pub(crate) const fn position_base(&self) -> usize {
self.position_base
}
pub(crate) fn resolve(&self, entry: &LdmEntry) -> Option<usize> {
match entry.offset {
0 => None,
rel => Some(self.position_base + (rel as usize) - 1),
}
}
pub(crate) fn insert_absolute(&mut self, hash_id: u32, abs_pos: usize, checksum: u32) {
let rel = abs_pos.checked_sub(self.position_base).unwrap_or_else(|| {
panic!(
"insert position {abs_pos} is below position_base {}; \
callers must rebase via `ensure_room_for` or clear before \
reaching this state",
self.position_base
)
});
assert!(
rel < u32::MAX as usize,
"insert position {abs_pos} (rel {rel}) exceeds u32 window; \
producer must call `ensure_room_for` before insert"
);
let stored = (rel as u32) + 1;
self.insert(
hash_id,
LdmEntry {
offset: stored,
checksum,
},
);
}
pub(crate) fn ensure_room_for(&mut self, abs_pos: usize) {
if abs_pos < self.position_base {
return;
}
let max_rel = u32::MAX as usize - REBASE_GUARD_BAND as usize;
while abs_pos - self.position_base > max_rel {
self.reduce(REBASE_GUARD_BAND);
}
}
pub(crate) fn reduce(&mut self, reducer: u32) {
for entry in &mut self.entries {
if entry.offset <= reducer {
entry.offset = 0;
} else {
entry.offset -= reducer;
}
}
self.position_base = self.position_base.saturating_add(reducer as usize);
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn new_table_sizes_match_donor_formulae() {
let t = LdmHashTable::new(8, 4);
assert_eq!(t.bucket_count(), 16);
assert_eq!(t.bucket_slots(), 16);
assert_eq!(t.entries.len(), 256);
assert_eq!(t.bucket_offsets.len(), 16);
assert_eq!(t.bucket_mask(), 15);
}
#[test]
fn new_clamps_bucket_size_log_to_hash_log() {
let t = LdmHashTable::new(6, 12); assert_eq!(t.bucket_count(), 1, "clamp must yield a single bucket");
assert_eq!(t.bucket_slots(), 1usize << 6);
assert_eq!(t.entries.len(), 1usize << 6);
}
#[test]
fn insert_round_robin_wraps_through_bucket_slots() {
let mut t = LdmHashTable::new(4, 2); for k in 1..=6u32 {
t.insert(
1,
LdmEntry {
offset: k,
checksum: k * 7,
},
);
}
let b = t.bucket(1);
assert_eq!(b[0].offset, 5);
assert_eq!(b[1].offset, 6);
assert_eq!(b[2].offset, 3);
assert_eq!(b[3].offset, 4);
}
#[test]
#[should_panic(expected = "offset 0 is reserved")]
fn insert_panics_on_sentinel_offset_zero() {
let mut t = LdmHashTable::new(4, 2);
t.insert(
0,
LdmEntry {
offset: 0,
checksum: 0xDEAD,
},
);
}
#[test]
fn insert_does_not_contaminate_adjacent_bucket() {
let mut t = LdmHashTable::new(4, 2);
t.insert(
2,
LdmEntry {
offset: 42,
checksum: 0xCAFE,
},
);
let b0 = t.bucket(0);
let b1 = t.bucket(1);
let b3 = t.bucket(3);
for e in b0.iter().chain(b1.iter()).chain(b3.iter()) {
assert_eq!(
*e,
LdmEntry::default(),
"neighbouring buckets must stay empty"
);
}
assert_eq!(t.bucket(2)[0].offset, 42);
}
#[test]
fn clear_zeros_entries_and_rewinds_cursors() {
let mut t = LdmHashTable::new(4, 2);
for k in 0..4u32 {
t.insert(
k % 4,
LdmEntry {
offset: k + 1,
checksum: k * 11,
},
);
}
t.clear();
for e in t.bucket(0).iter().chain(t.bucket(3).iter()) {
assert_eq!(*e, LdmEntry::default());
}
for c in &t.bucket_offsets {
assert_eq!(*c, 0);
}
t.insert(
2,
LdmEntry {
offset: 99,
checksum: 0,
},
);
assert_eq!(t.bucket(2)[0].offset, 99);
}
#[test]
#[cfg(target_pointer_width = "64")]
fn new_accepts_large_hash_log_smoke() {
let t = LdmHashTable::new(18, 4);
assert_eq!(t.bucket_count(), 1usize << (18 - 4));
assert_eq!(t.bucket_slots(), 1usize << 4);
}
#[test]
#[should_panic(expected = "ZSTD_LDM_BUCKETSIZELOG_MAX")]
fn new_rejects_bucket_size_log_above_donor_cap() {
let _ = LdmHashTable::new(12, 9);
}
#[test]
fn insert_absolute_round_trips_through_resolve() {
let mut t = LdmHashTable::new(4, 2);
t.insert_absolute(1, 42, 0xCAFE);
let entry = t.bucket(1)[0];
assert_eq!(entry.checksum, 0xCAFE);
assert_eq!(
t.resolve(&entry),
Some(42),
"stored relative offset must resolve back to the inserted absolute"
);
}
#[test]
fn resolve_returns_none_for_empty_slot() {
let t = LdmHashTable::new(4, 2);
let empty = LdmEntry::default();
assert_eq!(empty.offset, 0);
assert_eq!(t.resolve(&empty), None);
}
#[test]
fn reduce_preserves_resolved_absolute_positions() {
let mut t = LdmHashTable::new(4, 2);
t.insert_absolute(0, 100, 0xAAAA);
t.insert_absolute(1, 200, 0xBBBB);
t.insert_absolute(2, 300, 0xCCCC);
assert_eq!(t.position_base(), 0);
t.reduce(150);
assert_eq!(t.position_base(), 150);
let entry0 = t.bucket(0)[0];
assert_eq!(
t.resolve(&entry0),
None,
"pos 100 < new_base 150 must be evicted"
);
let entry1 = t.bucket(1)[0];
assert_eq!(t.resolve(&entry1), Some(200));
let entry2 = t.bucket(2)[0];
assert_eq!(t.resolve(&entry2), Some(300));
}
#[test]
fn ensure_room_for_rebases_above_guard_band() {
let mut t = LdmHashTable::new(4, 2);
t.insert_absolute(0, 1024, 0xAAAA);
assert_eq!(t.position_base(), 0);
let trigger_pos = (u32::MAX as usize) - (REBASE_GUARD_BAND as usize) + 1;
t.ensure_room_for(trigger_pos);
assert_eq!(
t.position_base(),
REBASE_GUARD_BAND as usize,
"rebase must advance position_base by REBASE_GUARD_BAND"
);
assert_eq!(t.resolve(&t.bucket(0)[0]), None);
t.insert_absolute(2, trigger_pos, 0xCAFE);
assert_eq!(t.resolve(&t.bucket(2)[0]), Some(trigger_pos));
}
#[test]
#[cfg(target_pointer_width = "64")]
fn ensure_room_for_loops_across_multiple_guard_bands() {
let mut t = LdmHashTable::new(4, 2);
let abs_pos = 5usize * (REBASE_GUARD_BAND as usize);
t.ensure_room_for(abs_pos);
let max_rel = u32::MAX as usize - REBASE_GUARD_BAND as usize;
assert!(
abs_pos - t.position_base() <= max_rel,
"ensure_room_for must rebase until rel ≤ max_rel \
(got rel = {}, max_rel = {})",
abs_pos - t.position_base(),
max_rel
);
t.insert_absolute(0, abs_pos, 0xFEED);
assert_eq!(t.resolve(&t.bucket(0)[0]), Some(abs_pos));
}
#[test]
#[should_panic(expected = "below position_base")]
fn insert_absolute_panics_below_position_base() {
let mut t = LdmHashTable::new(4, 2);
t.reduce(100); t.insert_absolute(0, 50, 0xCAFE); }
#[test]
fn clear_resets_position_base() {
let mut t = LdmHashTable::new(4, 2);
t.insert_absolute(0, 1024, 0xAAAA);
t.reduce(1 << 20); assert!(t.position_base() > 0);
t.clear();
assert_eq!(t.position_base(), 0, "clear must rewind position_base to 0");
t.insert_absolute(0, 0, 0xCAFE);
let entry = t.bucket(0)[0];
assert_eq!(t.resolve(&entry), Some(0));
}
#[test]
fn bucket_mask_matches_count_minus_one() {
let t = LdmHashTable::new(8, 3);
assert_eq!(t.bucket_mask() as usize + 1, t.bucket_count());
}
}