use alloc::vec::Vec;
use crate::encoding::Sequence;
use crate::encoding::blocks::encode_offset_with_history;
use super::fast_kernel::hash_table::FastHashTable;
use super::fast_kernel::kernel::compress_block_fast;
pub(crate) const FAST_LEVEL_1_HASH_LOG: u32 = 14;
pub(crate) const FAST_LEVEL_1_MLS: u32 = 7;
#[cfg(test)]
pub(crate) const FAST_LEVEL_1_WINDOW_LOG: u8 = 19;
pub(crate) const FAST_INITIAL_REP: [u32; 2] = [1, 4];
pub(crate) const FAST_INITIAL_OFFSET_HIST: [u32; 3] = [1, 4, 8];
pub(crate) const HISTORY_DRAIN_BASE: usize = 0;
const INITIAL_PREFIX_START_INDEX: u32 = 1;
pub(crate) struct FastKernelMatcher {
history: Vec<u8>,
prefix_start_index: u32,
rep: [u32; 2],
pub(crate) offset_hist: [u32; 3],
hash_table: FastHashTable,
pub(crate) max_window_size: usize,
window_log: u8,
use_cmov: bool,
step_size: usize,
pending: Option<Vec<u8>>,
last_block_start: usize,
recycled_space: Option<Vec<u8>>,
}
impl FastKernelMatcher {
#[cfg(test)]
pub(crate) fn new() -> Self {
Self::with_params(
FAST_LEVEL_1_WINDOW_LOG,
FAST_LEVEL_1_HASH_LOG,
FAST_LEVEL_1_MLS,
2,
)
}
#[cfg(test)]
pub(crate) fn step_size(&self) -> usize {
self.step_size
}
#[cfg(test)]
pub(crate) fn hash_log(&self) -> u32 {
self.hash_table.hash_log()
}
#[cfg(test)]
pub(crate) fn mls(&self) -> u32 {
self.hash_table.mls()
}
pub(crate) fn with_params(window_log: u8, hash_log: u32, mls: u32, step_size: usize) -> Self {
assert!(
step_size >= 2,
"FastKernelMatcher requires step_size >= 2 (got {step_size})"
);
assert!(
window_log <= 30,
"FastKernelMatcher requires window_log <= 30 (got {window_log}); \
2 * (1 << 30) is the eviction-band ceiling that keeps history \
length below the kernel's u32::MAX input bound"
);
let history = alloc::vec![0u8; HISTORY_DRAIN_BASE];
Self {
last_block_start: HISTORY_DRAIN_BASE,
recycled_space: None,
history,
prefix_start_index: INITIAL_PREFIX_START_INDEX,
rep: FAST_INITIAL_REP,
offset_hist: FAST_INITIAL_OFFSET_HIST,
hash_table: FastHashTable::new(hash_log, mls),
max_window_size: 1usize << window_log,
window_log,
use_cmov: window_log < 19,
step_size,
pending: None,
}
}
pub(crate) fn reset(&mut self, window_log: u8, hash_log: u32, mls: u32, step_size: usize) {
assert!(
step_size >= 2,
"FastKernelMatcher requires step_size >= 2 (got {step_size})"
);
assert!(
window_log <= 30,
"FastKernelMatcher requires window_log <= 30 (got {window_log})"
);
if self.hash_table.hash_log() != hash_log || self.hash_table.mls() != mls {
self.hash_table = FastHashTable::new(hash_log, mls);
} else {
self.hash_table.clear();
}
self.history.clear();
self.history.resize(HISTORY_DRAIN_BASE, 0);
self.prefix_start_index = INITIAL_PREFIX_START_INDEX;
self.rep = FAST_INITIAL_REP;
self.offset_hist = FAST_INITIAL_OFFSET_HIST;
self.window_log = window_log;
self.use_cmov = window_log < 19;
self.step_size = step_size;
self.max_window_size = 1usize << window_log;
self.pending = None;
self.last_block_start = HISTORY_DRAIN_BASE;
self.recycled_space = None;
}
#[cfg(test)]
pub(crate) fn window_size(&self) -> u64 {
1u64 << self.window_log
}
pub(crate) fn last_committed_space(&self) -> &[u8] {
match self.pending.as_deref() {
Some(slice) => slice,
None => &self.history[self.last_block_start..],
}
}
pub(crate) fn accept_data(&mut self, space: Vec<u8>) {
assert!(
self.pending.is_none(),
"FastKernelMatcher: accept_data called with a still-pending buffer; \
the driver must run start_matching / skip_matching between commits",
);
let real_len = self.history.len().saturating_sub(HISTORY_DRAIN_BASE);
let new_real_total = real_len.saturating_add(space.len());
let cap = self.max_window_size.saturating_mul(2);
assert!(
space.len() <= cap,
"FastKernelMatcher requires block_size <= 2 × max_window_size \
(block={}, cap={})",
space.len(),
cap,
);
if new_real_total > cap {
let retain_real = cap.saturating_sub(space.len()).min(self.max_window_size);
let drop_n = real_len.saturating_sub(retain_real);
if drop_n > 0 {
self.drain_real_prefix(drop_n);
}
}
self.pending = Some(space);
}
fn drain_real_prefix(&mut self, drop_n: usize) {
let drain_end = HISTORY_DRAIN_BASE + drop_n;
self.history.drain(HISTORY_DRAIN_BASE..drain_end);
self.prefix_start_index = INITIAL_PREFIX_START_INDEX;
self.hash_table.clear();
self.last_block_start = self.last_block_start.saturating_sub(drop_n);
self.prime_hash_table_for_range(INITIAL_PREFIX_START_INDEX as usize);
}
fn extend_history_with_pending(&mut self) -> usize {
let mut space = self
.pending
.take()
.expect("extend_history_with_pending without a pending buffer");
let block_start = self.history.len();
self.history.extend_from_slice(&space);
self.last_block_start = block_start;
space.clear();
self.recycled_space = Some(space);
block_start
}
pub(crate) fn take_recycled_space(&mut self) -> Option<Vec<u8>> {
self.recycled_space.take()
}
pub(crate) fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
let block_start = self.extend_history_with_pending();
let advertised_window = 1usize << self.window_log;
let effective_prefix = self
.history
.len()
.saturating_sub(advertised_window)
.max(self.prefix_start_index as usize) as u32;
let prefix_start_index = effective_prefix;
let rep_in = self.rep;
let mls = self.hash_table.mls();
let history: &[u8] = &self.history;
let hash_table = &mut self.hash_table;
let offset_hist = &mut self.offset_hist;
let mut wrap_emit = |seq: Sequence<'_>| {
if let Sequence::Triple {
literals,
offset,
match_len,
} = seq
{
let _ =
encode_offset_with_history(offset as u32, literals.len() as u32, offset_hist);
handle_sequence(Sequence::Triple {
literals,
offset,
match_len,
});
} else {
handle_sequence(seq);
}
};
let result = match (mls, self.use_cmov) {
(4, false) => compress_block_fast::<4, false>(
history,
block_start,
prefix_start_index,
hash_table,
rep_in,
self.step_size,
&mut wrap_emit,
),
(4, true) => compress_block_fast::<4, true>(
history,
block_start,
prefix_start_index,
hash_table,
rep_in,
self.step_size,
&mut wrap_emit,
),
(5, false) => compress_block_fast::<5, false>(
history,
block_start,
prefix_start_index,
hash_table,
rep_in,
self.step_size,
&mut wrap_emit,
),
(5, true) => compress_block_fast::<5, true>(
history,
block_start,
prefix_start_index,
hash_table,
rep_in,
self.step_size,
&mut wrap_emit,
),
(6, false) => compress_block_fast::<6, false>(
history,
block_start,
prefix_start_index,
hash_table,
rep_in,
self.step_size,
&mut wrap_emit,
),
(6, true) => compress_block_fast::<6, true>(
history,
block_start,
prefix_start_index,
hash_table,
rep_in,
self.step_size,
&mut wrap_emit,
),
(7, false) => compress_block_fast::<7, false>(
history,
block_start,
prefix_start_index,
hash_table,
rep_in,
self.step_size,
&mut wrap_emit,
),
(7, true) => compress_block_fast::<7, true>(
history,
block_start,
prefix_start_index,
hash_table,
rep_in,
self.step_size,
&mut wrap_emit,
),
(8, false) => compress_block_fast::<8, false>(
history,
block_start,
prefix_start_index,
hash_table,
rep_in,
self.step_size,
&mut wrap_emit,
),
(8, true) => compress_block_fast::<8, true>(
history,
block_start,
prefix_start_index,
hash_table,
rep_in,
self.step_size,
&mut wrap_emit,
),
_ => unreachable!(
"FastHashTable construction rejects mls outside 4..=8 — \
got mls={mls} which means the table was bypassed",
),
};
self.rep = result.rep;
if result.tail_literals_len > 0 {
let tail_start = self.history.len() - result.tail_literals_len;
handle_sequence(Sequence::Literals {
literals: &self.history[tail_start..],
});
}
}
pub(crate) fn skip_matching_with_hint(&mut self, incompressible_hint: Option<bool>) {
let block_start = self.extend_history_with_pending();
if incompressible_hint == Some(false) {
self.prime_hash_table_for_range(block_start);
}
}
pub(crate) fn prime_offset_history(&mut self, offset_hist: [u32; 3]) {
self.offset_hist = offset_hist;
self.rep = [offset_hist[0], offset_hist[1]];
}
pub(crate) fn history_len_for_eviction_accounting(&self) -> usize {
self.history.len().saturating_sub(HISTORY_DRAIN_BASE)
}
pub(crate) fn trim_to_window(&mut self) -> usize {
let real_len = self.history.len().saturating_sub(HISTORY_DRAIN_BASE);
if real_len <= self.max_window_size {
return 0;
}
let drop_n = real_len - self.max_window_size;
self.drain_real_prefix(drop_n);
drop_n
}
fn prime_hash_table_for_range(&mut self, range_start: usize) {
let history_len = self.history.len();
const HASH_READ_SIZE: usize = 8;
if history_len < HASH_READ_SIZE {
return;
}
let last_hashable = history_len - HASH_READ_SIZE;
if range_start > last_hashable {
return;
}
match self.hash_table.mls() {
4 => self.prime_hash_table_impl::<4>(range_start, last_hashable),
5 => self.prime_hash_table_impl::<5>(range_start, last_hashable),
6 => self.prime_hash_table_impl::<6>(range_start, last_hashable),
7 => self.prime_hash_table_impl::<7>(range_start, last_hashable),
8 => self.prime_hash_table_impl::<8>(range_start, last_hashable),
_ => unreachable!("FastHashTable construction rejects mls outside 4..=8"),
}
}
fn prime_hash_table_impl<const MLS: u32>(&mut self, range_start: usize, last_hashable: usize) {
let base = self.history.as_ptr();
for pos in range_start..=last_hashable {
let ptr = unsafe { base.add(pos) };
let hash = unsafe { self.hash_table.hash_ptr::<MLS>(ptr) };
unsafe { self.hash_table.put(hash, pos as u32) };
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn new_uses_donor_level_1_defaults() {
let m = FastKernelMatcher::new();
assert_eq!(m.window_log, FAST_LEVEL_1_WINDOW_LOG);
assert_eq!(m.hash_table.hash_log(), FAST_LEVEL_1_HASH_LOG);
assert_eq!(m.hash_table.mls(), FAST_LEVEL_1_MLS);
assert_eq!(m.rep, FAST_INITIAL_REP);
assert_eq!(m.offset_hist, FAST_INITIAL_OFFSET_HIST);
assert_eq!(m.max_window_size, 1usize << FAST_LEVEL_1_WINDOW_LOG);
assert_eq!(m.history.len(), HISTORY_DRAIN_BASE);
assert_eq!(m.prefix_start_index, INITIAL_PREFIX_START_INDEX);
assert!(m.pending.is_none());
}
#[test]
fn with_params_threads_through_each_field() {
let m = FastKernelMatcher::with_params(16, 12, 5, 2);
assert_eq!(m.window_log, 16);
assert_eq!(m.hash_table.hash_log(), 12);
assert_eq!(m.hash_table.mls(), 5);
assert_eq!(m.max_window_size, 1usize << 16);
}
#[test]
fn window_size_reports_one_shifted_window_log() {
let m = FastKernelMatcher::with_params(16, 12, 5, 2);
assert_eq!(m.window_size(), 1u64 << 16);
let m = FastKernelMatcher::with_params(22, 14, 7, 2);
assert_eq!(m.window_size(), 1u64 << 22);
}
#[test]
fn last_committed_space_empty_before_commit() {
let m = FastKernelMatcher::new();
assert!(m.last_committed_space().is_empty());
}
#[test]
fn reset_clears_history_and_state() {
let mut m = FastKernelMatcher::new();
m.history.extend_from_slice(&[1, 2, 3, 4]);
m.prefix_start_index = 7;
m.rep = [42, 99];
m.offset_hist = [10, 20, 30];
m.pending = Some(alloc::vec![5, 6, 7]);
m.reset(
FAST_LEVEL_1_WINDOW_LOG,
FAST_LEVEL_1_HASH_LOG,
FAST_LEVEL_1_MLS,
2,
);
assert_eq!(m.history.len(), HISTORY_DRAIN_BASE);
assert_eq!(m.prefix_start_index, INITIAL_PREFIX_START_INDEX);
assert_eq!(m.rep, FAST_INITIAL_REP);
assert_eq!(m.offset_hist, FAST_INITIAL_OFFSET_HIST);
assert!(m.pending.is_none());
assert_eq!(m.hash_table.hash_log(), FAST_LEVEL_1_HASH_LOG);
assert_eq!(m.hash_table.mls(), FAST_LEVEL_1_MLS);
}
#[test]
fn reset_with_changed_params_rebuilds_hash_table() {
let mut m = FastKernelMatcher::new();
m.reset(16, 10, 4, 2);
assert_eq!(m.hash_table.hash_log(), 10);
assert_eq!(m.hash_table.mls(), 4);
assert_eq!(m.window_log, 16);
assert_eq!(m.max_window_size, 1usize << 16);
}
#[test]
fn accept_then_start_matching_emits_match_for_repeated_run() {
let mut data = alloc::vec::Vec::with_capacity(64);
for i in 0..32u8 {
data.push(i.wrapping_mul(7).wrapping_add(13));
}
data.extend_from_within(0..32);
let mut m = FastKernelMatcher::with_params(12, 8, 4, 2);
m.accept_data(data.clone());
let mut emitted_match_lens: alloc::vec::Vec<usize> = alloc::vec::Vec::new();
let mut emitted_literal_byte_count: usize = 0;
let mut tail_byte_count: usize = 0;
m.start_matching(|seq| match seq {
Sequence::Triple {
literals,
offset: _,
match_len,
} => {
emitted_literal_byte_count += literals.len();
emitted_match_lens.push(match_len);
}
Sequence::Literals { literals } => {
tail_byte_count += literals.len();
}
});
let total_matched: usize = emitted_match_lens.iter().sum();
assert_eq!(
emitted_literal_byte_count + total_matched + tail_byte_count,
data.len(),
"all input bytes must be accounted for as literals/matches/tail",
);
assert!(
emitted_match_lens.iter().any(|&m| m >= 4),
"kernel must emit at least one Triple with match_len >= MIN_MATCH (got {emitted_match_lens:?})",
);
assert!(m.pending.is_none());
assert_eq!(m.history.len(), data.len() + HISTORY_DRAIN_BASE);
assert_eq!(m.last_committed_space(), data.as_slice());
}
#[test]
fn skip_matching_extends_history_without_emissions() {
let mut m = FastKernelMatcher::with_params(12, 8, 4, 2);
let pre_rep = m.rep;
let pre_offset_hist = m.offset_hist;
let payload: alloc::vec::Vec<u8> = (0..40u8).collect();
m.accept_data(payload.clone());
assert_eq!(m.last_committed_space().len(), payload.len());
m.skip_matching_with_hint(None);
assert_eq!(
m.history.len(),
payload.len() + HISTORY_DRAIN_BASE,
"skip_matching must append the pending buffer to history",
);
assert_eq!(m.rep, pre_rep, "skip must not touch rep state");
assert_eq!(
m.offset_hist, pre_offset_hist,
"skip must not touch offset_hist",
);
assert!(m.pending.is_none());
}
#[test]
fn cross_block_match_finds_first_block_payload() {
let mut block1 = alloc::vec::Vec::with_capacity(128);
for i in 0..128u8 {
block1.push(i.wrapping_mul(11).wrapping_add(5));
}
let mut block2 = alloc::vec::Vec::with_capacity(64);
block2.extend(0..16u8); block2.extend_from_slice(&block1[0..32]); block2.extend(200..216u8);
let mut m = FastKernelMatcher::with_params(12, 8, 4, 2);
m.accept_data(block1.clone());
m.start_matching(|_seq| {});
m.accept_data(block2.clone());
let mut max_match: usize = 0;
let mut saw_cross_block = false;
m.start_matching(|seq| {
if let Sequence::Triple {
offset, match_len, ..
} = seq
{
max_match = max_match.max(match_len);
if offset >= block2.len() {
saw_cross_block = true;
}
}
});
assert!(
saw_cross_block,
"block 2's matcher must find at least one cross-block match \
(max_len={max_match})",
);
assert_eq!(
m.history.len(),
block1.len() + block2.len() + HISTORY_DRAIN_BASE,
"history must hold both blocks after two start_matching calls",
);
}
#[test]
fn skip_matching_with_false_hint_populates_hashes_for_dict_priming() {
let mut dict_block = alloc::vec::Vec::with_capacity(32);
for i in 0..32u8 {
dict_block.push(i.wrapping_mul(13).wrapping_add(7));
}
let mut m = FastKernelMatcher::with_params(12, 8, 4, 2);
m.accept_data(dict_block.clone());
m.skip_matching_with_hint(Some(false));
assert_eq!(m.history.len(), dict_block.len() + HISTORY_DRAIN_BASE);
assert_eq!(m.prefix_start_index, INITIAL_PREFIX_START_INDEX);
let mut block2 = alloc::vec::Vec::with_capacity(48);
block2.extend(100..116u8);
block2.extend_from_slice(&dict_block[0..16]);
block2.extend(120..136u8);
m.accept_data(block2.clone());
let mut saw_cross_block = false;
m.start_matching(|seq| {
if let Sequence::Triple { offset, .. } = seq
&& offset >= block2.len()
{
saw_cross_block = true;
}
});
assert!(
saw_cross_block,
"skip_matching(Some(false)) must populate hashes so block 2 \
can match against the primed bytes",
);
}
#[test]
fn skip_matching_with_none_hint_skips_hash_population() {
let mut dict_block = alloc::vec::Vec::with_capacity(32);
for i in 0..32u8 {
dict_block.push(i.wrapping_mul(13).wrapping_add(7));
}
let mut m = FastKernelMatcher::with_params(12, 8, 4, 2);
m.accept_data(dict_block.clone());
m.skip_matching_with_hint(None);
let mut block2 = alloc::vec::Vec::with_capacity(48);
block2.extend(100..116u8);
block2.extend_from_slice(&dict_block[0..16]);
block2.extend(120..136u8);
m.accept_data(block2.clone());
let mut saw_cross_block = false;
m.start_matching(|seq| {
if let Sequence::Triple { offset, .. } = seq
&& offset >= block2.len()
{
saw_cross_block = true;
}
});
assert!(
!saw_cross_block,
"skip_matching(None) must NOT populate hashes — the legacy \
skip cost-savings only hold when future blocks are willing \
to miss matches in the skipped region",
);
}
#[test]
fn extend_history_drains_old_prefix_past_two_window_sizes() {
let mut m = FastKernelMatcher::with_params(8, 6, 4, 2);
for round in 0..3 {
let block: alloc::vec::Vec<u8> = (0..200u8)
.map(|i| i.wrapping_add(round as u8 * 17))
.collect();
m.accept_data(block);
m.skip_matching_with_hint(None);
}
assert!(
m.history.len() <= m.max_window_size * 2 + HISTORY_DRAIN_BASE,
"after eviction, REAL history must be bounded by 2× \
max_window_size (got history.len()={}, max_window_size={})",
m.history.len(),
m.max_window_size,
);
assert!(
m.history.len() <= m.max_window_size + 200 + HISTORY_DRAIN_BASE,
"post-append history = retained prefix (≤ max_window_size) \
+ last block (200 bytes); got {}",
m.history.len(),
);
assert_eq!(
m.prefix_start_index, INITIAL_PREFIX_START_INDEX,
"drain must rebase prefix_start_index to the baseline (1)",
);
}
#[test]
fn skip_matching_dict_prime_handles_exactly_hash_read_size_bytes() {
let mut m = FastKernelMatcher::with_params(12, 8, 4, 2);
let payload: alloc::vec::Vec<u8> = (0..8u8).collect();
m.accept_data(payload);
m.skip_matching_with_hint(Some(false));
assert_eq!(m.history.len(), 8 + HISTORY_DRAIN_BASE);
}
#[test]
fn skip_matching_dict_prime_handles_below_hash_read_size_bytes() {
let mut m = FastKernelMatcher::with_params(12, 8, 4, 2);
let payload: alloc::vec::Vec<u8> = (0..4u8).collect();
m.accept_data(payload);
m.skip_matching_with_hint(Some(false));
assert_eq!(m.history.len(), 4 + HISTORY_DRAIN_BASE);
}
#[test]
fn rep_and_offset_hist_track_emitted_explicit_offsets_in_lockstep() {
let mut data = alloc::vec::Vec::with_capacity(96);
for i in 0..48u8 {
data.push(i.wrapping_mul(11).wrapping_add(3));
}
data.extend_from_within(0..48);
let mut m = FastKernelMatcher::with_params(12, 8, 4, 2);
m.accept_data(data.clone());
let mut emitted_offsets: alloc::vec::Vec<usize> = alloc::vec::Vec::new();
m.start_matching(|seq| {
if let Sequence::Triple {
offset, match_len, ..
} = seq
&& match_len >= 4
{
emitted_offsets.push(offset);
}
});
assert!(
!emitted_offsets.is_empty(),
"test setup must produce at least one explicit match \
(otherwise this isn't testing the rep/offset_hist sync)",
);
let last_explicit = emitted_offsets[emitted_offsets.len() - 1];
assert_eq!(
m.rep[0] as usize, last_explicit,
"kernel's rep[0] must reflect the last emitted explicit \
offset (sync with wire encoder)",
);
assert_eq!(
m.offset_hist[0] as usize, last_explicit,
"offset_hist[0] (encode_offset_with_history-tracked) must \
match rep[0] (kernel-tracked) after a clean block",
);
}
#[test]
fn eviction_during_dict_priming_drops_stale_prime_entries() {
let mut m = FastKernelMatcher::with_params(8, 6, 4, 2);
let block1: alloc::vec::Vec<u8> = (0..200u8).collect();
m.accept_data(block1);
m.skip_matching_with_hint(Some(false));
let block2: alloc::vec::Vec<u8> = (0..200u8).map(|i| i.wrapping_add(50)).collect();
m.accept_data(block2);
m.skip_matching_with_hint(Some(false));
let block3: alloc::vec::Vec<u8> = (0..200u8).map(|i| i.wrapping_add(100)).collect();
m.accept_data(block3);
m.skip_matching_with_hint(Some(false));
assert_eq!(
m.prefix_start_index, INITIAL_PREFIX_START_INDEX,
"drain must rebase prefix_start_index to the baseline (1)",
);
assert!(m.history.len() <= m.max_window_size * 2);
}
#[test]
fn accept_data_evicts_eagerly_so_commit_observes_byte_delta() {
let mut m = FastKernelMatcher::with_params(8, 6, 4, 2);
m.accept_data((0..200u8).collect());
m.skip_matching_with_hint(None);
assert_eq!(m.history.len(), 200 + HISTORY_DRAIN_BASE);
assert_eq!(
m.prefix_start_index, INITIAL_PREFIX_START_INDEX,
"no eviction yet"
);
m.accept_data((0..200u8).map(|i| i.wrapping_add(50)).collect());
m.skip_matching_with_hint(None);
assert_eq!(m.history.len(), 400 + HISTORY_DRAIN_BASE);
assert_eq!(
m.prefix_start_index, INITIAL_PREFIX_START_INDEX,
"still no eviction (400 < 512)",
);
let pre = m.history_len_for_eviction_accounting();
m.accept_data((0..200u8).map(|i| i.wrapping_add(100)).collect());
let post = m.history_len_for_eviction_accounting();
assert!(
pre > post,
"accept_data must shrink history at the eviction threshold \
(pre={pre}, post={post}) — driver's commit_space relies on \
this delta for retire_dictionary_budget accounting",
);
assert_eq!(
post, 256,
"post-eviction retained must equal max_window_size",
);
assert_eq!(
m.prefix_start_index, INITIAL_PREFIX_START_INDEX,
"drain rebases prefix_start_index to INITIAL_PREFIX_START_INDEX \
— eviction is proven by the history.len() shrink above",
);
}
#[test]
fn trim_to_window_keeps_last_committed_space_consistent() {
let mut m = FastKernelMatcher::with_params(8, 6, 4, 2);
let payload: alloc::vec::Vec<u8> = (0..200u8).collect();
m.accept_data(payload);
m.skip_matching_with_hint(None);
assert_eq!(m.last_block_start, HISTORY_DRAIN_BASE);
assert_eq!(m.history.len(), 200 + HISTORY_DRAIN_BASE);
let payload2: alloc::vec::Vec<u8> = (50..150u8).collect();
m.accept_data(payload2);
m.skip_matching_with_hint(None);
assert_eq!(m.last_block_start, HISTORY_DRAIN_BASE + 200);
assert_eq!(m.history.len(), 300 + HISTORY_DRAIN_BASE);
m.max_window_size = 64;
let drained = m.trim_to_window();
assert_eq!(
drained,
300 - 64,
"trim must drain REAL history down to max_window_size = 64",
);
assert_eq!(m.history.len(), 64 + HISTORY_DRAIN_BASE);
let last = m.last_committed_space();
assert!(
last.len() <= 64,
"last_committed_space must be in-bounds after trim \
(got len {})",
last.len(),
);
}
#[test]
fn prime_offset_history_keeps_rep_and_offset_hist_in_lockstep() {
let mut m = FastKernelMatcher::with_params(12, 8, 4, 2);
assert_eq!(m.rep, FAST_INITIAL_REP);
assert_eq!(m.offset_hist, FAST_INITIAL_OFFSET_HIST);
let primed = [9u32, 4, 8];
m.prime_offset_history(primed);
assert_eq!(
m.offset_hist, primed,
"offset_hist must be updated by prime_offset_history",
);
assert_eq!(
m.rep,
[primed[0], primed[1]],
"rep[0..2] must mirror offset_hist[0..2] post-prime \
(kernel's repcode decisions must match the wire \
encoder's seeded history)",
);
}
#[test]
fn accept_data_evicts_more_aggressively_when_block_larger_than_window() {
let mut m = FastKernelMatcher::with_params(8, 6, 4, 2);
let preamble: alloc::vec::Vec<u8> = (0..256u32).map(|i| i as u8).collect();
m.accept_data(preamble);
m.skip_matching_with_hint(None);
assert_eq!(
m.history_len_for_eviction_accounting(),
256,
"history pre-filled to one full window of real bytes",
);
let oversize: alloc::vec::Vec<u8> = (0..400u32)
.map(|i| (i as u8).wrapping_mul(7).wrapping_add(11))
.collect();
m.accept_data(oversize);
let real_len_after = m.history_len_for_eviction_accounting();
m.start_matching(|_| {});
let real_total = m.history_len_for_eviction_accounting();
assert!(
real_total <= m.max_window_size * 2,
"post-append history MUST stay within 2 × max_window_size \
(got real_total={real_total}, cap={})",
m.max_window_size * 2,
);
assert!(
real_len_after < 256,
"pre-append drain must have shed historical bytes \
(got real_len_after_drain={real_len_after}, was 256 \
before accept)",
);
}
#[test]
fn start_matching_enforces_max_window_size_offset_bound() {
let mut m = FastKernelMatcher::with_params(7, 8, 4, 2);
let max_window = m.max_window_size;
assert_eq!(max_window, 128, "test assumes window_log=7");
let block1: alloc::vec::Vec<u8> = (0..200u8).map(|i| 0x30 + (i % 64)).collect();
m.accept_data(block1.clone());
m.start_matching(|_| {});
let block2 = block1.clone();
m.accept_data(block2);
let mut max_emitted_offset = 0usize;
let mut emitted_match_count = 0usize;
m.start_matching(|seq| {
if let Sequence::Triple {
offset, match_len, ..
} = seq
&& match_len > 0
{
emitted_match_count += 1;
if offset > max_emitted_offset {
max_emitted_offset = offset;
}
}
});
assert!(
emitted_match_count > 0,
"fixture must produce at least one Triple match — \
history.len()=~400, max_window=128, block2 is identical to block1",
);
assert!(
max_emitted_offset <= max_window,
"sliding floor MUST cap emitted offsets at max_window_size; \
got max emitted offset {} vs max_window_size {}",
max_emitted_offset,
max_window,
);
}
#[test]
fn start_matching_caps_offsets_at_window_log_not_inflated_max() {
let mut m = FastKernelMatcher::with_params(7, 8, 4, 2);
let advertised_window: usize = 1 << m.window_log;
assert_eq!(advertised_window, 128, "test assumes window_log=7");
let dict: alloc::vec::Vec<u8> = (0..200u8).map(|i| 0x40 + (i % 64)).collect();
m.accept_data(dict.clone());
m.start_matching(|_| {}); m.max_window_size = m.max_window_size.saturating_add(200);
let block: alloc::vec::Vec<u8> = (0..100u8).map(|i| 0x40 + (i % 64)).collect();
m.accept_data(block);
let mut max_emitted_offset = 0usize;
let mut emitted_match_count = 0usize;
m.start_matching(|seq| {
if let Sequence::Triple {
offset, match_len, ..
} = seq
&& match_len > 0
{
emitted_match_count += 1;
if offset > max_emitted_offset {
max_emitted_offset = offset;
}
}
});
assert!(
emitted_match_count > 0,
"fixture must produce at least one match — block content \
repeats dict, history.len() ≈ 300, scan should find at \
least one Triple",
);
assert!(
max_emitted_offset <= advertised_window,
"sliding floor MUST cap emitted offsets at the ADVERTISED \
frame window (1 << window_log = {}), NOT the inflated \
max_window_size; got max emitted offset {}",
advertised_window,
max_emitted_offset,
);
}
}