ferrum_models/common/paged_pool.rs
1//! Multi-sequence paged-KV pool — Phase 4 of Metal paged attention.
2//!
3//! Replaces the per-cache_id `KvCache` allocation model with a
4//! shared-pool architecture matching vLLM / mistral.rs:
5//!
6//! - **One pool per layer** holds K and V for *all* concurrent sequences.
7//! Sized to `MAX_TOTAL_BLOCKS × num_kv_heads × block_size × head_dim`.
8//! - **Per-cache_id state** ([`PagedSeqState`]) carries that sequence's
9//! logical → physical block mapping (`block_table`) plus its current
10//! length (`len`). Multiple cache_ids can index into the same pool
11//! without colliding because their block_tables point at disjoint
12//! physical blocks (or shared ones, if prefix-caching is enabled
13//! later).
14//! - **[`BlockAllocator`]** is a free-list owning physical block indices.
15//! `allocate` pops, `free` pushes back. Out-of-memory surfaces as
16//! `Result::Err` so the scheduler can refuse the request rather than
17//! panicking deep in the model forward.
18//!
19//! ## Ref counting (Phase 1 of block-level prefix cache)
20//!
21//! Each physical block has a `u16` ref count. `allocate` / `allocate_n`
22//! create a block with `ref_count = 1`. `free` decrements; when the count
23//! reaches zero the block returns to the free list. `acquire(block)`
24//! increments — used by the prefix cache when another sequence wants to
25//! share an already-resident block.
26//!
27//! Backwards compatible: pre-prefix-cache callers don't touch `acquire`,
28//! so every block stays at ref=1 and `free` behaves identically to the
29//! old single-ref free.
30//!
31//! ## Hash table (Phase 2 of block-level prefix cache)
32//!
33//! Each block can be tagged with a content-addressed `BlockHash` via
34//! [`BlockAllocator::register_block_hash`]. The hash is computed by
35//! chaining `parent_hash` with the block's tokens (see
36//! [`block_hash_chain`]) so identical prefixes across requests produce
37//! identical hash sequences.
38//!
39//! [`BlockAllocator::try_acquire_by_hash`] looks up a hash and bumps the
40//! ref count on hit. **Soft-free blocks** (ref=0 still in `free_list`
41//! with content intact) can be resurrected — this is what makes the cache
42//! actually useful across requests. `allocate` evicts a stale hash when
43//! a soft-free block is recycled with new content.
44//!
45//! ## What's *not* here yet (deferred to Phase 3-4 of prefix cache):
46//! - **Engine integration**: hashing incoming prompt blocks, looking up
47//! matching prefix, splicing the existing blocks into the new seq's
48//! block_table, running prefill only on the unmatched suffix.
49//! - **LRU eviction**: `free_list` is LIFO. Better policy would be
50//! LRU-soft-free so the oldest (least likely to hit) blocks are
51//! reused first. Today the most-recently-freed gets reused first
52//! (pessimistic for cross-request reuse).
53//! - **Preemption**: when blocks run out we still just `Err`; real
54//! scheduler would refuse-and-queue or evict.
55//! - Cross-process or cross-model pooling.
56
57use ferrum_kernels::backend::Backend;
58use ferrum_types::{FerrumError, Result, TokenId};
59use std::collections::HashMap;
60use std::hash::{Hash, Hasher};
61use std::sync::atomic::{AtomicUsize, Ordering};
62
63/// Hash identifying a content-addressable KV block. Computed from a chain
64/// of (parent_hash, block_tokens) — see [`block_hash_chain`].
65pub type BlockHash = u64;
66
67/// Compute the hash for one block, chained from its parent.
68///
69/// `parent` is the hash of the preceding block (0 for the first block).
70/// `tokens` are the `block_size` token ids in this block.
71///
72/// Uses Rust's default `SipHasher` for collision resistance with no extra
73/// deps. SipHash at ~1 GB/s processes a 16-token block (64 bytes) in tens
74/// of ns; per-request hashing of <200 blocks is well under a microsecond
75/// — invisible next to a prefill kernel.
76pub fn block_hash(parent: BlockHash, tokens: &[TokenId]) -> BlockHash {
77 let mut h = std::collections::hash_map::DefaultHasher::new();
78 parent.hash(&mut h);
79 for t in tokens {
80 t.get().hash(&mut h);
81 }
82 h.finish()
83}
84
85/// Compute the chained hash sequence for a token list, broken into
86/// `block_size`-token blocks. Trailing partial block (length <
87/// `block_size`) is dropped — partial blocks can't be looked up
88/// independently since their content depends on what gets appended next.
89pub fn block_hash_chain(tokens: &[TokenId], block_size: usize) -> Vec<BlockHash> {
90 let n = tokens.len() / block_size;
91 let mut out = Vec::with_capacity(n);
92 let mut parent: BlockHash = 0;
93 for chunk in tokens.chunks_exact(block_size) {
94 parent = block_hash(parent, chunk);
95 out.push(parent);
96 }
97 out
98}
99
100/// LIFO free-list block allocator. `O(1)` allocate / free, no fragmentation
101/// (all blocks are uniform size).
102///
103/// `capacity` is the total physical block count baked into the pool at
104/// load time. The allocator is independent per-model (block index space
105/// is not portable across models).
106pub struct BlockAllocator {
107 free_list: Vec<u32>,
108 capacity: u32,
109 /// Watermark: how many blocks have been live at peak, useful for
110 /// pool-sizing diagnostics in the bench harness.
111 peak_in_use: AtomicUsize,
112 /// Per-block reference count. `ref_counts[i] == 0` ⟺ block i is on
113 /// the free list (either freshly-uninitialized or soft-free with a
114 /// hash still registered). `allocate` sets to 1; `acquire` increments;
115 /// `free` decrements and (only when reaching 0) returns the block
116 /// to the free list. Width 16 bits — supports up to 65535 concurrent
117 /// sharers of the same physical KV block, far past any realistic
118 /// continuous batching cap.
119 ref_counts: Vec<u16>,
120 /// Block-hash lookup. Contains entries for every block whose content
121 /// has been hash-registered via [`Self::register_block_hash`] — both
122 /// live (ref ≥ 1) and soft-free (ref = 0 but content not yet
123 /// overwritten). [`Self::try_acquire_by_hash`] hits this map; on hit
124 /// the entry is resurrected with ref = 1 if it was soft-free.
125 /// Survives across requests — this is what makes the prefix cache
126 /// actually useful.
127 hash_table: HashMap<BlockHash, u32>,
128 /// Reverse map block_id → hash, for cleanup. When a block's content
129 /// gets overwritten by `allocate` returning it on a fresh request,
130 /// we use this to erase its stale hash from `hash_table`.
131 block_to_hash: Vec<Option<BlockHash>>,
132}
133
134impl BlockAllocator {
135 /// Create a fresh allocator. All `num_blocks` blocks start free.
136 /// Free-list is built so `allocate()` returns block 0 first, then 1,
137 /// etc. — predictable for tests and ensures the lower physical
138 /// blocks see the most reuse (better cache locality on M1's SLC).
139 pub fn new(num_blocks: u32) -> Self {
140 let mut free_list: Vec<u32> = (0..num_blocks).collect();
141 free_list.reverse(); // pop() yields 0 first
142 Self {
143 free_list,
144 capacity: num_blocks,
145 peak_in_use: AtomicUsize::new(0),
146 ref_counts: vec![0u16; num_blocks as usize],
147 hash_table: HashMap::new(),
148 block_to_hash: vec![None; num_blocks as usize],
149 }
150 }
151
152 /// Allocate a single physical block. Returns `Err` when the pool is
153 /// exhausted — caller is expected to refuse the request and queue
154 /// it (or evict another seq, when that's wired up).
155 ///
156 /// New block starts with `ref_count = 1`. Drop one ref via `free()`
157 /// to return it to the pool, or call `acquire()` for additional
158 /// sharers.
159 pub fn allocate(&mut self) -> Result<u32> {
160 match self.pop_free_block_preferring_unhashed() {
161 Some(b) => {
162 debug_assert!(
163 self.ref_counts[b as usize] == 0,
164 "allocate yielded block {b} with non-zero ref_count {}",
165 self.ref_counts[b as usize]
166 );
167 // Soft-free block with a stale hash → content will be
168 // overwritten, so erase the hash mapping.
169 self.evict_hash_if_any(b);
170 self.ref_counts[b as usize] = 1;
171 let in_use = self.capacity as usize - self.free_list.len();
172 self.peak_in_use.fetch_max(in_use, Ordering::Relaxed);
173 Ok(b)
174 }
175 None => Err(FerrumError::resource_exhausted(format!(
176 "paged KV pool exhausted (capacity={} blocks, all in use)",
177 self.capacity
178 ))),
179 }
180 }
181
182 fn pop_free_block_preferring_unhashed(&mut self) -> Option<u32> {
183 let pos = self
184 .free_list
185 .iter()
186 .rposition(|&block| self.block_to_hash[block as usize].is_none())
187 .unwrap_or_else(|| self.free_list.len().saturating_sub(1));
188 if self.free_list.is_empty() {
189 None
190 } else {
191 Some(self.free_list.swap_remove(pos))
192 }
193 }
194
195 /// Internal: erase any hash mapping for a block whose content is
196 /// about to be overwritten. No-op if the block had no hash registered.
197 fn evict_hash_if_any(&mut self, block: u32) {
198 if let Some(h) = self.block_to_hash[block as usize].take() {
199 // Only erase from hash_table if it still points at THIS block.
200 // (Defensive — a hash collision later registering the same h
201 // to a different block could mean the entry now points
202 // elsewhere; we shouldn't yank the live one.)
203 if let Some(&mapped) = self.hash_table.get(&h) {
204 if mapped == block {
205 self.hash_table.remove(&h);
206 }
207 }
208 }
209 }
210
211 /// Bulk allocate. Atomic: either all `n` succeed or none are taken.
212 /// Each returned block starts at `ref_count = 1`.
213 pub fn allocate_n(&mut self, n: usize) -> Result<Vec<u32>> {
214 if self.free_list.len() < n {
215 return Err(FerrumError::resource_exhausted(format!(
216 "paged KV pool exhausted: need {n} blocks but only {} free",
217 self.free_list.len()
218 )));
219 }
220 let mut out = Vec::with_capacity(n);
221 for _ in 0..n {
222 let b = self
223 .pop_free_block_preferring_unhashed()
224 .expect("free_list length checked above");
225 debug_assert!(
226 self.ref_counts[b as usize] == 0,
227 "allocate_n yielded block {b} with non-zero ref_count"
228 );
229 self.evict_hash_if_any(b);
230 self.ref_counts[b as usize] = 1;
231 out.push(b);
232 }
233 let in_use = self.capacity as usize - self.free_list.len();
234 self.peak_in_use.fetch_max(in_use, Ordering::Relaxed);
235 Ok(out)
236 }
237
238 /// Look up a hash in the prefix-cache table. If the hash maps to a
239 /// physical block (live or soft-free), bump its ref count and return
240 /// the block id. A soft-free block (ref=0 in free_list) is
241 /// resurrected — pulled out of the free list and ref set to 1.
242 /// Returns `None` on cache miss.
243 pub fn try_acquire_by_hash(&mut self, hash: BlockHash) -> Option<u32> {
244 let &block = self.hash_table.get(&hash)?;
245 let bi = block as usize;
246 if self.ref_counts[bi] == 0 {
247 // Soft-free: pull out of free_list (O(n) on free_list length;
248 // typically small — few hundred at peak utilisation).
249 if let Some(pos) = self.free_list.iter().rposition(|&b| b == block) {
250 self.free_list.swap_remove(pos);
251 } else {
252 // Inconsistency: ref=0 but not in free_list. Shouldn't
253 // happen unless someone bypassed the API. Treat as miss
254 // to avoid corruption.
255 debug_assert!(false, "block {block} has ref=0 but not in free_list");
256 return None;
257 }
258 self.ref_counts[bi] = 1;
259 let in_use = self.capacity as usize - self.free_list.len();
260 self.peak_in_use.fetch_max(in_use, Ordering::Relaxed);
261 } else {
262 self.acquire(block);
263 }
264 Some(block)
265 }
266
267 /// Associate a hash with a block (after its content is written).
268 /// If the block already had a hash, the old mapping is replaced.
269 /// Block must currently be live (ref ≥ 1).
270 pub fn register_block_hash(&mut self, block: u32, hash: BlockHash) {
271 let bi = block as usize;
272 debug_assert!(
273 self.ref_counts[bi] > 0,
274 "register_block_hash on block {block} with ref_count=0 (not allocated)"
275 );
276 // Replace prior hash if any.
277 if let Some(old_h) = self.block_to_hash[bi].replace(hash) {
278 if old_h != hash {
279 if let Some(&mapped) = self.hash_table.get(&old_h) {
280 if mapped == block {
281 self.hash_table.remove(&old_h);
282 }
283 }
284 } else {
285 // Same hash being re-registered; nothing to clean up.
286 return;
287 }
288 }
289 // Last-writer-wins on collision (same hash different content —
290 // SipHash collisions on 64-token inputs are astronomically rare).
291 self.hash_table.insert(hash, block);
292 }
293
294 /// Read accessor for tests / debugging.
295 #[inline]
296 pub fn hash_table_size(&self) -> usize {
297 self.hash_table.len()
298 }
299
300 /// Increment the ref count for an already-allocated block. Used by
301 /// prefix-caching paths when a new sequence wants to share a block
302 /// that's already resident from a prior request.
303 ///
304 /// Panics in debug builds if the block is not currently allocated
305 /// (ref_count==0) — a release-build path that calls `acquire` on a
306 /// free block is a memory-safety bug we'd rather catch loudly.
307 pub fn acquire(&mut self, block: u32) {
308 let bi = block as usize;
309 debug_assert!(
310 self.ref_counts[bi] > 0,
311 "acquire on block {block} with ref_count=0 (not currently allocated)"
312 );
313 self.ref_counts[bi] = self.ref_counts[bi]
314 .checked_add(1)
315 .expect("BlockAllocator ref_count u16 overflow (>65535 sharers)");
316 }
317
318 /// Bulk acquire — convenience for prefix-cache hit paths that take a
319 /// list of physical block ids to share.
320 pub fn acquire_many(&mut self, blocks: &[u32]) {
321 for &b in blocks {
322 self.acquire(b);
323 }
324 }
325
326 /// Drop one ref from each block. Blocks whose ref_count hits 0 are
327 /// returned to the free list and become available for the next
328 /// `allocate*` call.
329 ///
330 /// Pre-prefix-cache callers see no behavioural change: every block
331 /// they hold starts at ref=1 (from `allocate`), and `free` drops it
332 /// to 0 — physically frees on the same call, same as before.
333 pub fn free(&mut self, blocks: &[u32]) {
334 for &b in blocks {
335 let bi = b as usize;
336 debug_assert!(
337 self.ref_counts[bi] > 0,
338 "free on block {b} with ref_count=0 (double-free)"
339 );
340 self.ref_counts[bi] -= 1;
341 if self.ref_counts[bi] == 0 {
342 self.free_list.push(b);
343 }
344 }
345 }
346
347 /// Read current ref count for a block. 0 means free, ≥1 means in
348 /// use by that many sequences.
349 #[inline]
350 pub fn ref_count(&self, block: u32) -> u16 {
351 self.ref_counts[block as usize]
352 }
353
354 pub fn free_count(&self) -> usize {
355 self.free_list.len()
356 }
357
358 pub fn capacity(&self) -> u32 {
359 self.capacity
360 }
361
362 pub fn peak_in_use(&self) -> usize {
363 self.peak_in_use.load(Ordering::Relaxed)
364 }
365}
366
367/// Per-sequence paged-KV state.
368///
369/// Holds the logical→physical block mapping for ONE sequence (one
370/// `cache_id`) plus its current token count. The mapping is stored as
371/// both:
372/// - `blocks: Vec<u32>` — the host-side source of truth, used by the
373/// block allocator + grow logic.
374/// - `block_table_buf: B::Buffer` — a device-side u32 buffer that mirrors
375/// `blocks` and is read directly by the paged Metal kernels (PR #68 /
376/// #69). Kept in sync via [`Self::ensure_capacity`].
377///
378/// `context_lens_buf` is a 1-element u32 device buffer holding `len`.
379/// The kernel reads it each forward; we update it via `B::write_u32`.
380pub struct PagedSeqState<B: Backend> {
381 pub blocks: Vec<u32>,
382 pub block_table_buf: B::Buffer,
383 pub context_lens_buf: B::Buffer,
384 pub len: usize,
385 pub block_size: usize,
386 pub max_blocks_per_seq: usize,
387}
388
389impl<B: Backend> PagedSeqState<B> {
390 /// Allocate buffers for a sequence that hasn't yet allocated any
391 /// blocks. The allocator isn't touched here — the first call to
392 /// [`Self::ensure_capacity`] does the real work.
393 pub fn new(block_size: usize, max_blocks_per_seq: usize) -> Self {
394 let block_table_buf =
395 B::alloc_typed(ferrum_kernels::backend::Dtype::U32, max_blocks_per_seq);
396 let context_lens_buf = B::alloc_typed(ferrum_kernels::backend::Dtype::U32, 1);
397 // Initialise context_lens to 0 so a forward dispatched before
398 // any token has been written sees an empty context.
399 let mut ctx = B::new_context();
400 let mut cl = context_lens_buf;
401 B::write_typed::<u32>(&mut ctx, &mut cl, &[0u32]);
402 B::sync(&mut ctx);
403 Self {
404 blocks: Vec::with_capacity(max_blocks_per_seq),
405 block_table_buf,
406 context_lens_buf: cl,
407 len: 0,
408 block_size,
409 max_blocks_per_seq,
410 }
411 }
412
413 /// Ensure the seq has enough blocks to hold `target_len` tokens.
414 /// Allocates additional blocks from the pool if needed and re-syncs
415 /// `block_table_buf` to the device. Idempotent if already big enough.
416 pub fn ensure_capacity(
417 &mut self,
418 ctx: &mut B::Context,
419 alloc: &mut BlockAllocator,
420 target_len: usize,
421 ) -> Result<()> {
422 let needed = target_len.div_ceil(self.block_size);
423 if needed > self.max_blocks_per_seq {
424 return Err(FerrumError::model(format!(
425 "paged KV: target_len={target_len} would need {needed} blocks, exceeds max_blocks_per_seq={}",
426 self.max_blocks_per_seq
427 )));
428 }
429 while self.blocks.len() < needed {
430 let block = alloc.allocate()?;
431 self.blocks.push(block);
432 }
433 // Mirror the host-side blocks list into the device buffer. We
434 // write the FULL `max_blocks_per_seq` entries — unused slots
435 // beyond `needed` are never read by the kernel (it only walks
436 // `[0, ceil(context_len / block_size))`), but writing them
437 // keeps the buffer's content predictable.
438 let mut padded = self.blocks.clone();
439 padded.resize(self.max_blocks_per_seq, 0);
440 B::write_typed::<u32>(ctx, &mut self.block_table_buf, &padded);
441 Ok(())
442 }
443
444 /// Update the on-device `context_lens_buf` to the current `self.len`.
445 /// Call this after [`Self::ensure_capacity`] but before dispatching
446 /// the paged attention kernel for this seq.
447 pub fn sync_context_len(&mut self, ctx: &mut B::Context) {
448 B::write_typed::<u32>(ctx, &mut self.context_lens_buf, &[self.len as u32]);
449 }
450
451 /// Release all blocks back to the allocator. Buffers are kept (cheap
452 /// to reuse for a future cache_id), but blocks become available for
453 /// other sequences. Sets `len` back to 0.
454 pub fn release(&mut self, alloc: &mut BlockAllocator) {
455 alloc.free(&self.blocks);
456 self.blocks.clear();
457 self.len = 0;
458 }
459}
460
461#[cfg(test)]
462mod tests {
463 use super::*;
464
465 #[test]
466 fn allocator_basic() {
467 let mut a = BlockAllocator::new(4);
468 assert_eq!(a.free_count(), 4);
469 assert_eq!(a.allocate().unwrap(), 0);
470 assert_eq!(a.allocate().unwrap(), 1);
471 assert_eq!(a.allocate().unwrap(), 2);
472 assert_eq!(a.allocate().unwrap(), 3);
473 assert!(a.allocate().is_err());
474 assert_eq!(a.free_count(), 0);
475
476 a.free(&[1, 3]);
477 assert_eq!(a.free_count(), 2);
478 // LIFO: most recently freed comes back first.
479 assert_eq!(a.allocate().unwrap(), 3);
480 assert_eq!(a.allocate().unwrap(), 1);
481 }
482
483 #[test]
484 fn allocator_atomic_n_failure() {
485 let mut a = BlockAllocator::new(3);
486 let _ = a.allocate().unwrap(); // 1 left in free_list... wait, 2 left
487 let _ = a.allocate().unwrap();
488 // 1 free, asking for 2 should fail without consuming the 1.
489 assert!(a.allocate_n(2).is_err());
490 assert_eq!(a.free_count(), 1);
491 }
492
493 #[test]
494 fn allocator_peak_tracking() {
495 let mut a = BlockAllocator::new(8);
496 let blocks = a.allocate_n(5).unwrap();
497 assert_eq!(a.peak_in_use(), 5);
498 a.free(&blocks);
499 assert_eq!(a.peak_in_use(), 5); // peak doesn't decrease
500 let _ = a.allocate_n(3).unwrap();
501 assert_eq!(a.peak_in_use(), 5);
502 }
503
504 #[test]
505 fn refcount_allocate_starts_at_one() {
506 let mut a = BlockAllocator::new(4);
507 let b = a.allocate().unwrap();
508 assert_eq!(a.ref_count(b), 1);
509 }
510
511 #[test]
512 fn refcount_acquire_increments() {
513 let mut a = BlockAllocator::new(4);
514 let b = a.allocate().unwrap();
515 a.acquire(b);
516 a.acquire(b);
517 assert_eq!(a.ref_count(b), 3);
518 }
519
520 #[test]
521 fn refcount_free_decrements_no_physical_release() {
522 let mut a = BlockAllocator::new(4);
523 let b = a.allocate().unwrap();
524 a.acquire(b); // ref=2
525 a.free(&[b]); // ref=1 — NOT yet returned to free_list
526 assert_eq!(a.ref_count(b), 1);
527 assert_eq!(a.free_count(), 3); // only 3 of original 4 blocks free
528 }
529
530 #[test]
531 fn refcount_free_physical_release_at_zero() {
532 let mut a = BlockAllocator::new(4);
533 let b = a.allocate().unwrap();
534 a.acquire(b); // ref=2
535 a.free(&[b]); // ref=1
536 a.free(&[b]); // ref=0 → physical release
537 assert_eq!(a.ref_count(b), 0);
538 assert_eq!(a.free_count(), 4);
539 }
540
541 #[test]
542 fn refcount_legacy_single_ref_behaviour_unchanged() {
543 // Pre-prefix-cache code never calls `acquire`. Verify that
544 // allocate+free behaves identically to the old version: block
545 // immediately returns to the pool.
546 let mut a = BlockAllocator::new(2);
547 let b = a.allocate().unwrap();
548 assert_eq!(a.free_count(), 1);
549 a.free(&[b]);
550 assert_eq!(a.free_count(), 2);
551 assert_eq!(a.ref_count(b), 0);
552 // Re-allocation reuses the slot (LIFO).
553 let b2 = a.allocate().unwrap();
554 assert_eq!(b2, b);
555 }
556
557 #[test]
558 fn refcount_bulk_acquire_and_release() {
559 let mut a = BlockAllocator::new(8);
560 let blocks = a.allocate_n(3).unwrap();
561 a.acquire_many(&blocks); // each ref=2
562 for &b in &blocks {
563 assert_eq!(a.ref_count(b), 2);
564 }
565 a.free(&blocks); // each ref=1, none physically released
566 assert_eq!(a.free_count(), 5);
567 a.free(&blocks); // each ref=0, all released
568 assert_eq!(a.free_count(), 8);
569 }
570
571 // ── Phase 2: hash table tests ────────────────────────────────────────
572
573 fn toks(ids: &[u32]) -> Vec<TokenId> {
574 ids.iter().map(|&i| TokenId::new(i)).collect()
575 }
576
577 #[test]
578 fn block_hash_chain_basic() {
579 let tokens = toks(&[1, 2, 3, 4, 5, 6, 7, 8]);
580 let chain = block_hash_chain(&tokens, 4);
581 assert_eq!(chain.len(), 2);
582 // Identical second block must produce a DIFFERENT chained hash
583 // because its parent (first block) is the same here but the
584 // chain order forces hash[1] = h(hash[0], tokens[4..8]).
585 assert_ne!(chain[0], chain[1]);
586 }
587
588 #[test]
589 fn block_hash_chain_identical_prefix_same_hashes() {
590 let a = toks(&[10, 20, 30, 40, 50, 60, 70, 80]);
591 let b = toks(&[10, 20, 30, 40, 99, 99, 99, 99]);
592 let ca = block_hash_chain(&a, 4);
593 let cb = block_hash_chain(&b, 4);
594 assert_eq!(ca[0], cb[0], "first block matches → same hash[0]");
595 assert_ne!(ca[1], cb[1], "second block differs → different hash[1]");
596 }
597
598 #[test]
599 fn block_hash_chain_drops_partial_trailing() {
600 let tokens = toks(&[1, 2, 3, 4, 5, 6, 7]); // 7 tokens, block_size=4 → 1 full
601 let chain = block_hash_chain(&tokens, 4);
602 assert_eq!(chain.len(), 1);
603 }
604
605 #[test]
606 fn hash_table_register_and_acquire_live() {
607 let mut a = BlockAllocator::new(4);
608 let b = a.allocate().unwrap();
609 let h = 0xDEAD_BEEFu64;
610 a.register_block_hash(b, h);
611 assert_eq!(a.hash_table_size(), 1);
612 // Re-acquire by hash: must bump ref (block is live)
613 let got = a.try_acquire_by_hash(h);
614 assert_eq!(got, Some(b));
615 assert_eq!(a.ref_count(b), 2);
616 }
617
618 #[test]
619 fn hash_table_soft_free_resurrection() {
620 let mut a = BlockAllocator::new(4);
621 let b = a.allocate().unwrap();
622 let h = 0xABCD_1234u64;
623 a.register_block_hash(b, h);
624 a.free(&[b]); // ref=0, block in free_list, hash STILL registered
625 assert_eq!(a.free_count(), 4);
626 assert_eq!(a.ref_count(b), 0);
627 // Cache hit on the soft-free block: must resurrect with ref=1
628 let got = a.try_acquire_by_hash(h);
629 assert_eq!(got, Some(b));
630 assert_eq!(a.ref_count(b), 1);
631 // The block is OFF the free_list now.
632 assert_eq!(a.free_count(), 3);
633 }
634
635 #[test]
636 fn hash_table_miss_returns_none() {
637 let mut a = BlockAllocator::new(4);
638 let b = a.allocate().unwrap();
639 a.register_block_hash(b, 1);
640 assert_eq!(a.try_acquire_by_hash(99), None);
641 // No side effects on miss
642 assert_eq!(a.ref_count(b), 1);
643 }
644
645 #[test]
646 fn hash_table_evicts_on_realloc() {
647 // Allocate, register hash, free → soft-free with hash. Then
648 // allocate a new block — the recycled one's hash must be erased
649 // because its content is about to be overwritten.
650 let mut a = BlockAllocator::new(2);
651 let b1 = a.allocate().unwrap();
652 let _held_clean_block = a.allocate().unwrap();
653 let h = 0x1234u64;
654 a.register_block_hash(b1, h);
655 a.free(&[b1]); // soft-free
656 assert_eq!(a.hash_table_size(), 1);
657 // Allocate to consume the only free block — should recycle b1 and
658 // evict its hash.
659 let b2 = a.allocate().unwrap();
660 assert_eq!(b2, b1, "must reuse b1 when no clean free block exists");
661 assert_eq!(a.hash_table_size(), 0, "stale hash erased on realloc");
662 assert_eq!(a.try_acquire_by_hash(h), None);
663 }
664
665 #[test]
666 fn allocator_prefers_unhashed_free_block_over_soft_cached_hash() {
667 let mut a = BlockAllocator::new(3);
668 let cached = a.allocate().unwrap();
669 let h = 0xCAFE_BABEu64;
670 a.register_block_hash(cached, h);
671 a.free(&[cached]);
672 assert_eq!(a.hash_table_size(), 1);
673
674 let fresh = a.allocate().unwrap();
675 assert_ne!(fresh, cached, "clean free block should be used first");
676 assert_eq!(a.hash_table_size(), 1, "cached block hash must survive");
677 assert_eq!(a.try_acquire_by_hash(h), Some(cached));
678 }
679
680 #[test]
681 fn hash_table_replace_block_hash() {
682 let mut a = BlockAllocator::new(4);
683 let b = a.allocate().unwrap();
684 a.register_block_hash(b, 1);
685 a.register_block_hash(b, 2); // overwrite
686 assert_eq!(a.hash_table_size(), 1);
687 assert_eq!(a.try_acquire_by_hash(1), None);
688 assert_eq!(a.try_acquire_by_hash(2), Some(b));
689 }
690}