use alloc::vec::Vec;
use crate::encoding::Sequence;
use crate::encoding::dict_attach::DictAttach;
use super::fast_kernel::hash_table::FastHashTable;
use super::fast_kernel::kernel::compress_block_fast;
pub(crate) const FAST_LEVEL_1_HASH_LOG: u32 = 14;
pub(crate) const FAST_LEVEL_1_MLS: u32 = 7;
#[cfg(test)]
pub(crate) const FAST_LEVEL_1_WINDOW_LOG: u8 = 19;
pub(crate) const FAST_INITIAL_REP: [u32; 2] = [1, 4];
pub(crate) const FAST_INITIAL_OFFSET_HIST: [u32; 3] = [1, 4, 8];
pub(crate) const HISTORY_DRAIN_BASE: usize = 0;
const INITIAL_PREFIX_START_INDEX: u32 = 1;
pub(crate) struct FastKernelMatcher {
history: Vec<u8>,
prefix_start_index: u32,
rep: [u32; 2],
pub(crate) offset_hist: [u32; 3],
hash_table: FastHashTable,
pub(crate) max_window_size: usize,
window_log: u8,
use_cmov: bool,
kernel: crate::encoding::fastpath::FastpathKernel,
step_size: usize,
pending: Option<Vec<u8>>,
last_block_start: usize,
recycled_space: Option<Vec<u8>>,
borrowed: Option<(*const u8, usize)>,
last_borrowed_block: Option<(usize, usize)>,
dict: DictAttach<FastHashTable>,
table_pos_high_water: usize,
}
impl Clone for FastKernelMatcher {
fn clone(&self) -> Self {
Self {
history: self.history.clone(),
prefix_start_index: self.prefix_start_index,
rep: self.rep,
offset_hist: self.offset_hist,
hash_table: self.hash_table.clone(),
max_window_size: self.max_window_size,
window_log: self.window_log,
use_cmov: self.use_cmov,
kernel: self.kernel,
step_size: self.step_size,
pending: self.pending.clone(),
last_block_start: self.last_block_start,
recycled_space: self.recycled_space.clone(),
borrowed: self.borrowed,
last_borrowed_block: self.last_borrowed_block,
dict: self.dict.clone(),
table_pos_high_water: self.table_pos_high_water,
}
}
fn clone_from(&mut self, source: &Self) {
self.history.clone_from(&source.history);
self.prefix_start_index = source.prefix_start_index;
self.rep = source.rep;
self.offset_hist = source.offset_hist;
self.hash_table.clone_from(&source.hash_table);
self.max_window_size = source.max_window_size;
self.window_log = source.window_log;
self.use_cmov = source.use_cmov;
self.kernel = source.kernel;
self.step_size = source.step_size;
self.pending.clone_from(&source.pending);
self.last_block_start = source.last_block_start;
self.recycled_space.clone_from(&source.recycled_space);
self.borrowed = source.borrowed;
self.last_borrowed_block = source.last_borrowed_block;
self.dict.clone_from(&source.dict);
self.table_pos_high_water = source.table_pos_high_water;
}
}
impl FastKernelMatcher {
#[cfg(test)]
pub(crate) fn new() -> Self {
Self::with_params(
FAST_LEVEL_1_WINDOW_LOG,
FAST_LEVEL_1_HASH_LOG,
FAST_LEVEL_1_MLS,
2,
)
}
#[cfg(test)]
pub(crate) fn step_size(&self) -> usize {
self.step_size
}
#[cfg(test)]
pub(crate) fn hash_log(&self) -> u32 {
self.hash_table.hash_log()
}
pub(crate) fn dict_is_attached(&self) -> bool {
self.dict.is_attached()
}
#[cfg(test)]
pub(crate) fn mls(&self) -> u32 {
self.hash_table.mls()
}
pub(crate) fn with_params(window_log: u8, hash_log: u32, mls: u32, step_size: usize) -> Self {
assert!(
step_size >= 2,
"FastKernelMatcher requires step_size >= 2 (got {step_size})"
);
assert!(
window_log <= 30,
"FastKernelMatcher requires window_log <= 30 (got {window_log}); \
2 * (1 << 30) is the eviction-band ceiling that keeps history \
length below the kernel's u32::MAX input bound"
);
let history = alloc::vec![0u8; HISTORY_DRAIN_BASE];
Self {
last_block_start: HISTORY_DRAIN_BASE,
recycled_space: None,
history,
prefix_start_index: INITIAL_PREFIX_START_INDEX,
rep: FAST_INITIAL_REP,
offset_hist: FAST_INITIAL_OFFSET_HIST,
hash_table: FastHashTable::new(hash_log, mls),
max_window_size: 1usize << window_log,
window_log,
use_cmov: window_log < 19,
kernel: crate::encoding::fastpath::select_kernel(),
step_size,
pending: None,
borrowed: None,
last_borrowed_block: None,
dict: DictAttach::new(),
table_pos_high_water: 0,
}
}
pub(crate) fn reset(
&mut self,
window_log: u8,
hash_log: u32,
mls: u32,
step_size: usize,
dict_attach_epoch: bool,
table_overwritten_by_restore: bool,
) {
assert!(
step_size >= 2,
"FastKernelMatcher requires step_size >= 2 (got {step_size})"
);
assert!(
window_log <= 30,
"FastKernelMatcher requires window_log <= 30 (got {window_log})"
);
if table_overwritten_by_restore
&& self.hash_table.hash_log() == hash_log
&& self.hash_table.mls() == mls
{
} else if self.hash_table.hash_log() != hash_log || self.hash_table.mls() != mls {
self.hash_table = FastHashTable::new(hash_log, mls);
self.dict.invalidate();
} else if dict_attach_epoch && self.dict.is_primed() {
let span = u32::try_from(self.table_pos_high_water).unwrap_or(u32::MAX);
self.hash_table.advance_epoch(span);
} else {
self.hash_table.clear();
}
self.table_pos_high_water = 0;
self.history.clear();
self.history.resize(HISTORY_DRAIN_BASE, 0);
self.prefix_start_index = INITIAL_PREFIX_START_INDEX;
self.rep = FAST_INITIAL_REP;
self.offset_hist = FAST_INITIAL_OFFSET_HIST;
self.window_log = window_log;
self.use_cmov = window_log < 19;
self.step_size = step_size;
self.max_window_size = 1usize << window_log;
self.pending = None;
self.last_block_start = HISTORY_DRAIN_BASE;
self.recycled_space = None;
self.borrowed = None;
self.last_borrowed_block = None;
}
#[cfg(test)]
pub(crate) fn window_size(&self) -> u64 {
1u64 << self.window_log
}
pub(crate) fn heap_size(&self) -> usize {
self.history.capacity()
+ self.hash_table.heap_size()
+ self.pending.as_ref().map_or(0, |v| v.capacity())
+ self.recycled_space.as_ref().map_or(0, |v| v.capacity())
+ self.dict.table().map_or(0, |t| t.heap_size())
}
#[inline(always)]
fn history_bytes(&self) -> &[u8] {
match self.borrowed {
Some((ptr, len)) => unsafe { core::slice::from_raw_parts(ptr, len) },
None => &self.history,
}
}
pub(crate) unsafe fn set_borrowed_window(&mut self, buffer: &[u8]) {
assert!(
self.pending.is_none(),
"set_borrowed_window requires no staged owned block; reset before switching to a borrowed window",
);
debug_assert!(
self.borrowed.is_none(),
"set_borrowed_window called without a preceding reset()/clear_borrowed_window",
);
self.borrowed = Some((buffer.as_ptr(), buffer.len()));
self.last_borrowed_block = None;
self.prefix_start_index = INITIAL_PREFIX_START_INDEX;
}
pub(crate) fn clear_borrowed_window(&mut self) {
self.borrowed = None;
self.last_borrowed_block = None;
self.prefix_start_index = INITIAL_PREFIX_START_INDEX;
}
pub(crate) fn last_committed_space(&self) -> &[u8] {
if let Some(slice) = self.pending.as_deref() {
return slice;
}
if let Some((start, end)) = self.last_borrowed_block {
return &self.history_bytes()[start..end];
}
&self.history_bytes()[self.last_block_start..]
}
pub(crate) fn accept_data(&mut self, space: Vec<u8>) {
assert!(
self.pending.is_none(),
"FastKernelMatcher: accept_data called with a still-pending buffer; \
the driver must run start_matching / skip_matching between commits",
);
let real_len = self.history.len().saturating_sub(HISTORY_DRAIN_BASE);
let cap = self.max_window_size * 2;
assert!(
space.len() <= cap,
"FastKernelMatcher requires block_size <= 2 × max_window_size \
(block={}, cap={})",
space.len(),
cap,
);
if real_len > cap - space.len() {
let retain_real = cap.saturating_sub(space.len()).min(self.max_window_size);
let drop_n = real_len.saturating_sub(retain_real);
if drop_n > 0 {
self.drain_real_prefix(drop_n);
}
}
self.pending = Some(space);
}
fn drain_real_prefix(&mut self, drop_n: usize) {
let drain_end = HISTORY_DRAIN_BASE + drop_n;
self.history.drain(HISTORY_DRAIN_BASE..drain_end);
self.prefix_start_index = INITIAL_PREFIX_START_INDEX;
self.dict.invalidate();
self.hash_table.clear();
self.last_block_start = self.last_block_start.saturating_sub(drop_n);
self.prime_hash_table_for_range(INITIAL_PREFIX_START_INDEX as usize);
}
fn extend_history_with_pending(&mut self) -> usize {
let mut space = self
.pending
.take()
.expect("extend_history_with_pending without a pending buffer");
let block_start = self.history.len();
self.history.extend_from_slice(&space);
self.table_pos_high_water = self.table_pos_high_water.max(self.history.len());
self.last_block_start = block_start;
space.clear();
self.recycled_space = Some(space);
block_start
}
pub(crate) fn take_recycled_space(&mut self) -> Option<Vec<u8>> {
self.recycled_space.take()
}
pub(crate) fn start_matching(&mut self, handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
assert!(
self.borrowed.is_none(),
"start_matching is the owned path; clear the borrowed window first (use start_matching_borrowed)",
);
let block_start = self.extend_history_with_pending();
let advertised_window = 1usize << self.window_log;
let window_low = self.history_bytes().len().saturating_sub(advertised_window) as u32;
let prefix_start_index = window_low.max(self.prefix_start_index);
let rep_in = self.rep;
let mls = self.hash_table.mls();
let step_size = self.step_size;
let use_cmov = self.use_cmov;
let use_dict = self.dict.is_attached();
let history: &[u8] = &self.history;
let rep_out = if use_dict {
use super::fast_kernel::kernel::PrefixBounds;
let dict_end = self.dict.region_len() as u32;
let bounds = PrefixBounds {
prefix_start_index,
window_low,
};
let main_table = &mut self.hash_table;
let dict_table = self
.dict
.table()
.expect("use_dict implies dict_table is Some");
run_fast_kernel_block_dict(
history,
block_start,
bounds,
dict_end,
main_table,
dict_table,
rep_in,
step_size,
mls,
use_cmov,
handle_sequence,
)
} else {
run_fast_kernel_block(
history,
block_start,
prefix_start_index,
window_low,
&mut self.hash_table,
rep_in,
step_size,
mls,
use_cmov,
handle_sequence,
)
};
self.rep = rep_out;
}
pub(crate) fn start_matching_borrowed(
&mut self,
block_start: usize,
block_end: usize,
handle_sequence: impl for<'a> FnMut(Sequence<'a>),
) {
let (ptr, total_len) = self
.borrowed
.expect("start_matching_borrowed requires a registered borrowed window");
assert!(
block_start <= block_end && block_end <= total_len,
"borrowed block bounds out of range: start={block_start} end={block_end} total={total_len}",
);
self.table_pos_high_water = self.table_pos_high_water.max(block_end);
let advertised_window = 1usize << self.window_log;
let window_low = block_end.saturating_sub(advertised_window) as u32;
let prefix_start_index = window_low.max(self.prefix_start_index);
let rep_in = self.rep;
let mls = self.hash_table.mls();
let history: &[u8] = unsafe { core::slice::from_raw_parts(ptr, block_end) };
let rep_out = run_fast_kernel_block(
history,
block_start,
prefix_start_index,
window_low,
&mut self.hash_table,
rep_in,
self.step_size,
mls,
self.use_cmov,
handle_sequence,
);
self.rep = rep_out;
self.last_borrowed_block = Some((block_start, block_end));
}
pub(crate) fn start_matching_borrowed_dict(
&mut self,
block_start: usize,
block_end: usize,
mut handle_sequence: impl for<'a> FnMut(Sequence<'a>),
) {
use super::fast_kernel::kernel::{PrefixBounds, compress_block_fast_dict_borrowed};
let (ptr, total_len) = self
.borrowed
.expect("start_matching_borrowed_dict requires a registered borrowed window");
assert!(
block_start <= block_end && block_end <= total_len,
"borrowed block bounds out of range: start={block_start} end={block_end} total={total_len}",
);
self.last_borrowed_block = Some((block_start, block_end));
let dict_end = self.dict.region_len();
let virtual_len = dict_end
.checked_add(block_end)
.filter(|&v| v <= u32::MAX as usize)
.expect("dict_end + block_end exceeds the u32 FastKernel position space");
self.table_pos_high_water = self.table_pos_high_water.max(virtual_len);
let advertised_window = 1usize << self.window_log;
let mls = self.hash_table.mls();
let use_cmov = self.use_cmov;
let step_size = self.step_size;
let rep_in = self.rep;
let kernel = self.kernel;
let window_low = virtual_len.saturating_sub(advertised_window);
let prefix_start_index = window_low.max(self.prefix_start_index as usize) as u32;
let bounds = PrefixBounds {
prefix_start_index,
window_low: window_low as u32,
};
let inp: &[u8] = unsafe { core::slice::from_raw_parts(ptr, block_end) };
debug_assert!(
dict_end <= self.history.len(),
"dict region_len ({dict_end}) exceeds history.len() ({}) — \
dictionary bytes must be committed before borrowed-dict scan",
self.history.len(),
);
let dict: &[u8] = unsafe { core::slice::from_raw_parts(self.history.as_ptr(), dict_end) };
let dict_tab_ptr: *const FastHashTable = self
.dict
.table()
.expect("start_matching_borrowed_dict requires an attached dict table");
let dict_tab: &FastHashTable = unsafe { &*dict_tab_ptr };
let main_table = &mut self.hash_table;
macro_rules! run {
($mls:literal, $cmov:literal) => {
compress_block_fast_dict_borrowed::<$mls, $cmov>(
inp,
dict,
block_start,
block_end,
main_table,
dict_tab,
bounds,
rep_in,
step_size,
&mut handle_sequence,
kernel,
)
};
}
let result = match (mls, use_cmov) {
(4, false) => run!(4, false),
(4, true) => run!(4, true),
(5, false) => run!(5, false),
(5, true) => run!(5, true),
(6, false) => run!(6, false),
(6, true) => run!(6, true),
(7, false) => run!(7, false),
(7, true) => run!(7, true),
(8, false) => run!(8, false),
(8, true) => run!(8, true),
_ => {
unreachable!("FastHashTable construction rejects mls outside 4..=8 — got mls={mls}")
}
};
self.rep = result.rep;
if result.tail_literals_len > 0 {
let tail_start = block_end - result.tail_literals_len;
handle_sequence(Sequence::Literals {
literals: &inp[tail_start..block_end],
});
}
}
pub(crate) fn stage_borrowed_block(&mut self, block_start: usize, block_end: usize) {
let (_ptr, total_len) = self
.borrowed
.expect("stage_borrowed_block requires a registered borrowed window");
assert!(
block_start <= block_end && block_end <= total_len,
"staged borrowed block bounds out of range: start={block_start} end={block_end} total={total_len}",
);
self.last_borrowed_block = Some((block_start, block_end));
}
pub(crate) fn skip_matching_with_hint(&mut self, incompressible_hint: Option<bool>) {
let block_start = self.extend_history_with_pending();
if incompressible_hint == Some(false) {
self.prime_hash_table_for_range(block_start);
}
}
pub(crate) fn skip_matching_borrowed(
&mut self,
block_start: usize,
block_end: usize,
incompressible_hint: Option<bool>,
) {
let (_ptr, total_len) = self
.borrowed
.expect("skip_matching_borrowed requires a registered borrowed window");
assert!(
block_start <= block_end && block_end <= total_len,
"borrowed block bounds out of range: start={block_start} end={block_end} total={total_len}",
);
self.last_borrowed_block = Some((block_start, block_end));
if incompressible_hint == Some(false) {
self.prime_hash_table_for_range_borrowed(block_start, block_end);
}
}
fn prime_hash_table_for_range_borrowed(&mut self, range_start: usize, block_end: usize) {
const HASH_READ_SIZE: usize = 8;
if block_end < HASH_READ_SIZE {
return;
}
let last_hashable = block_end - HASH_READ_SIZE;
let backfill_start = range_start.saturating_sub(HASH_READ_SIZE - 1);
if backfill_start > last_hashable {
return;
}
let (base, _len) = self
.borrowed
.expect("prime_hash_table_for_range_borrowed requires a registered borrowed window");
let base_offset = self.dict.region_len();
debug_assert!(
base_offset
.checked_add(block_end)
.is_some_and(|v| v <= u32::MAX as usize),
"virtual position overflow: dict.region_len()={base_offset} + block_end={block_end} exceeds u32",
);
match self.hash_table.mls() {
4 => self.prime_hash_table_impl::<4>(base, backfill_start, last_hashable, base_offset),
5 => self.prime_hash_table_impl::<5>(base, backfill_start, last_hashable, base_offset),
6 => self.prime_hash_table_impl::<6>(base, backfill_start, last_hashable, base_offset),
7 => self.prime_hash_table_impl::<7>(base, backfill_start, last_hashable, base_offset),
8 => self.prime_hash_table_impl::<8>(base, backfill_start, last_hashable, base_offset),
_ => unreachable!("FastHashTable construction rejects mls outside 4..=8"),
}
}
pub(crate) fn prime_offset_history(&mut self, offset_hist: [u32; 3]) {
self.offset_hist = offset_hist;
self.rep = [offset_hist[0], offset_hist[1]];
}
pub(crate) fn history_len_for_eviction_accounting(&self) -> usize {
self.history.len().saturating_sub(HISTORY_DRAIN_BASE)
}
pub(crate) fn trim_to_window(&mut self) -> usize {
let real_len = self.history.len().saturating_sub(HISTORY_DRAIN_BASE);
if real_len <= self.max_window_size {
return 0;
}
let drop_n = real_len - self.max_window_size;
self.drain_real_prefix(drop_n);
drop_n
}
fn prime_hash_table_for_range(&mut self, range_start: usize) {
let history_len = self.history.len();
const HASH_READ_SIZE: usize = 8;
if history_len < HASH_READ_SIZE {
return;
}
let last_hashable = history_len - HASH_READ_SIZE;
let backfill_start = range_start.saturating_sub(HASH_READ_SIZE - 1);
if backfill_start > last_hashable {
return;
}
let base = self.history.as_ptr();
match self.hash_table.mls() {
4 => self.prime_hash_table_impl::<4>(base, backfill_start, last_hashable, 0),
5 => self.prime_hash_table_impl::<5>(base, backfill_start, last_hashable, 0),
6 => self.prime_hash_table_impl::<6>(base, backfill_start, last_hashable, 0),
7 => self.prime_hash_table_impl::<7>(base, backfill_start, last_hashable, 0),
8 => self.prime_hash_table_impl::<8>(base, backfill_start, last_hashable, 0),
_ => unreachable!("FastHashTable construction rejects mls outside 4..=8"),
}
}
fn prime_hash_table_impl<const MLS: u32>(
&mut self,
base: *const u8,
range_start: usize,
last_hashable: usize,
base_offset: usize,
) {
for pos in range_start..=last_hashable {
let ptr = unsafe { base.add(pos) };
let hash = unsafe { self.hash_table.hash_ptr::<MLS>(ptr) };
unsafe { self.hash_table.put(hash, (base_offset + pos) as u32) };
}
}
pub(crate) fn skip_matching_for_dict_prime(&mut self) {
let block_start = self.extend_history_with_pending();
self.prime_dict_table_for_range(block_start);
}
pub(crate) fn mark_dict_primed(&mut self) {
self.dict.mark_primed();
}
pub(crate) fn invalidate_dict_cache(&mut self) {
self.dict.invalidate();
}
fn prime_dict_table_for_range(&mut self, range_start: usize) {
const HASH_READ_SIZE: usize = 8;
let history_len = self.history.len();
self.dict.set_region_len(history_len);
if self.dict.is_primed() {
return;
}
if history_len < HASH_READ_SIZE {
return;
}
let last_hashable = history_len - HASH_READ_SIZE;
let backfill_start = range_start.saturating_sub(HASH_READ_SIZE - 1);
if backfill_start > last_hashable {
return;
}
let hash_log = self.hash_table.hash_log();
let mls = self.hash_table.mls();
self.dict
.table_mut_or_init(|| FastHashTable::new(hash_log, mls));
let base = self.history.as_ptr();
match self.hash_table.mls() {
4 => self.prime_dict_table_impl::<4>(base, backfill_start, last_hashable),
5 => self.prime_dict_table_impl::<5>(base, backfill_start, last_hashable),
6 => self.prime_dict_table_impl::<6>(base, backfill_start, last_hashable),
7 => self.prime_dict_table_impl::<7>(base, backfill_start, last_hashable),
8 => self.prime_dict_table_impl::<8>(base, backfill_start, last_hashable),
_ => unreachable!("FastHashTable construction rejects mls outside 4..=8"),
}
}
fn prime_dict_table_impl<const MLS: u32>(
&mut self,
base: *const u8,
range_start: usize,
last_hashable: usize,
) {
let dict_table = self
.dict
.table_mut()
.expect("prime_dict_table_for_range creates the table before this call");
for pos in range_start..=last_hashable {
let ptr = unsafe { base.add(pos) };
let hash = unsafe { dict_table.hash_ptr::<MLS>(ptr) };
unsafe { dict_table.put(hash, pos as u32) };
}
}
}
#[allow(clippy::too_many_arguments)]
fn run_fast_kernel_block(
history: &[u8],
block_start: usize,
prefix_start_index: u32,
window_low: u32,
hash_table: &mut FastHashTable,
rep_in: [u32; 2],
step_size: usize,
mls: u32,
use_cmov: bool,
mut handle_sequence: impl for<'a> FnMut(Sequence<'a>),
) -> [u32; 2] {
use super::fast_kernel::kernel::PrefixBounds;
let bounds = PrefixBounds {
prefix_start_index,
window_low,
};
let result = match (mls, use_cmov) {
(4, false) => compress_block_fast::<4, false>(
history,
block_start,
bounds,
hash_table,
rep_in,
step_size,
&mut handle_sequence,
),
(4, true) => compress_block_fast::<4, true>(
history,
block_start,
bounds,
hash_table,
rep_in,
step_size,
&mut handle_sequence,
),
(5, false) => compress_block_fast::<5, false>(
history,
block_start,
bounds,
hash_table,
rep_in,
step_size,
&mut handle_sequence,
),
(5, true) => compress_block_fast::<5, true>(
history,
block_start,
bounds,
hash_table,
rep_in,
step_size,
&mut handle_sequence,
),
(6, false) => compress_block_fast::<6, false>(
history,
block_start,
bounds,
hash_table,
rep_in,
step_size,
&mut handle_sequence,
),
(6, true) => compress_block_fast::<6, true>(
history,
block_start,
bounds,
hash_table,
rep_in,
step_size,
&mut handle_sequence,
),
(7, false) => compress_block_fast::<7, false>(
history,
block_start,
bounds,
hash_table,
rep_in,
step_size,
&mut handle_sequence,
),
(7, true) => compress_block_fast::<7, true>(
history,
block_start,
bounds,
hash_table,
rep_in,
step_size,
&mut handle_sequence,
),
(8, false) => compress_block_fast::<8, false>(
history,
block_start,
bounds,
hash_table,
rep_in,
step_size,
&mut handle_sequence,
),
(8, true) => compress_block_fast::<8, true>(
history,
block_start,
bounds,
hash_table,
rep_in,
step_size,
&mut handle_sequence,
),
_ => unreachable!(
"FastHashTable construction rejects mls outside 4..=8 — \
got mls={mls} which means the table was bypassed",
),
};
if result.tail_literals_len > 0 {
let tail_start = history.len() - result.tail_literals_len;
handle_sequence(Sequence::Literals {
literals: &history[tail_start..],
});
}
result.rep
}
#[allow(clippy::too_many_arguments)]
fn run_fast_kernel_block_dict(
history: &[u8],
block_start: usize,
bounds: super::fast_kernel::kernel::PrefixBounds,
dict_end: u32,
main_table: &mut FastHashTable,
dict_table: &FastHashTable,
rep_in: [u32; 2],
step_size: usize,
mls: u32,
use_cmov: bool,
mut handle_sequence: impl for<'a> FnMut(Sequence<'a>),
) -> [u32; 2] {
use super::fast_kernel::kernel::compress_block_fast_dict;
macro_rules! run {
($mls:literal, $cmov:literal) => {
compress_block_fast_dict::<$mls, $cmov>(
history,
block_start,
bounds,
main_table,
dict_table,
dict_end,
rep_in,
step_size,
&mut handle_sequence,
)
};
}
let result = match (mls, use_cmov) {
(4, false) => run!(4, false),
(4, true) => run!(4, true),
(5, false) => run!(5, false),
(5, true) => run!(5, true),
(6, false) => run!(6, false),
(6, true) => run!(6, true),
(7, false) => run!(7, false),
(7, true) => run!(7, true),
(8, false) => run!(8, false),
(8, true) => run!(8, true),
_ => unreachable!("FastHashTable construction rejects mls outside 4..=8 — got mls={mls}",),
};
if result.tail_literals_len > 0 {
let tail_start = history.len() - result.tail_literals_len;
handle_sequence(Sequence::Literals {
literals: &history[tail_start..],
});
}
result.rep
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn new_uses_level_1_defaults() {
let m = FastKernelMatcher::new();
assert_eq!(m.window_log, FAST_LEVEL_1_WINDOW_LOG);
assert_eq!(m.hash_table.hash_log(), FAST_LEVEL_1_HASH_LOG);
assert_eq!(m.hash_table.mls(), FAST_LEVEL_1_MLS);
assert_eq!(m.rep, FAST_INITIAL_REP);
assert_eq!(m.offset_hist, FAST_INITIAL_OFFSET_HIST);
assert_eq!(m.max_window_size, 1usize << FAST_LEVEL_1_WINDOW_LOG);
assert_eq!(m.history.len(), HISTORY_DRAIN_BASE);
assert_eq!(m.prefix_start_index, INITIAL_PREFIX_START_INDEX);
assert!(m.pending.is_none());
}
#[test]
fn borrowed_window_reads_match_owned_then_restores() {
let mut m = FastKernelMatcher::new();
m.history = b"owned-history-bytes".to_vec();
assert_eq!(m.history_bytes(), b"owned-history-bytes");
let external = b"a-different-borrowed-window".to_vec();
unsafe { m.set_borrowed_window(&external) };
assert_eq!(m.history_bytes(), &external[..]);
assert_eq!(m.history, b"owned-history-bytes");
m.clear_borrowed_window();
assert_eq!(m.history_bytes(), b"owned-history-bytes");
}
#[test]
fn reset_clears_borrowed_window() {
let mut m = FastKernelMatcher::new();
let external = b"borrowed".to_vec();
unsafe { m.set_borrowed_window(&external) };
m.reset(
FAST_LEVEL_1_WINDOW_LOG,
FAST_LEVEL_1_HASH_LOG,
FAST_LEVEL_1_MLS,
2,
false,
false,
);
assert!(m.borrowed.is_none());
assert_eq!(m.history_bytes().len(), HISTORY_DRAIN_BASE);
}
#[test]
fn borrowed_window_matches_owned_sequence_stream() {
#[derive(PartialEq, Debug)]
enum Seq {
Triple(alloc::vec::Vec<u8>, usize, usize),
Lits(alloc::vec::Vec<u8>),
}
let mut whole = alloc::vec::Vec::with_capacity(320);
for i in 0..320usize {
whole.push((i % 64) as u8);
}
let split = 192usize;
let mut owned = FastKernelMatcher::with_params(15, 12, 5, 2);
let mut owned_seqs: alloc::vec::Vec<Seq> = alloc::vec::Vec::new();
owned.accept_data(whole[..split].to_vec());
owned.start_matching(|seq| match seq {
Sequence::Triple {
literals,
offset,
match_len,
} => owned_seqs.push(Seq::Triple(literals.to_vec(), offset, match_len)),
Sequence::Literals { literals } => owned_seqs.push(Seq::Lits(literals.to_vec())),
});
owned.accept_data(whole[split..].to_vec());
owned.start_matching(|seq| match seq {
Sequence::Triple {
literals,
offset,
match_len,
} => owned_seqs.push(Seq::Triple(literals.to_vec(), offset, match_len)),
Sequence::Literals { literals } => owned_seqs.push(Seq::Lits(literals.to_vec())),
});
let mut borrowed = FastKernelMatcher::with_params(15, 12, 5, 2);
let mut borrowed_seqs: alloc::vec::Vec<Seq> = alloc::vec::Vec::new();
unsafe { borrowed.set_borrowed_window(&whole) };
borrowed.start_matching_borrowed(0, split, |seq| match seq {
Sequence::Triple {
literals,
offset,
match_len,
} => borrowed_seqs.push(Seq::Triple(literals.to_vec(), offset, match_len)),
Sequence::Literals { literals } => borrowed_seqs.push(Seq::Lits(literals.to_vec())),
});
borrowed.start_matching_borrowed(split, whole.len(), |seq| match seq {
Sequence::Triple {
literals,
offset,
match_len,
} => borrowed_seqs.push(Seq::Triple(literals.to_vec(), offset, match_len)),
Sequence::Literals { literals } => borrowed_seqs.push(Seq::Lits(literals.to_vec())),
});
assert_eq!(
owned_seqs, borrowed_seqs,
"borrowed window must emit the identical sequence stream as the owned path",
);
assert_eq!(
owned.rep, borrowed.rep,
"rep state must match after both scans"
);
assert_eq!(
owned.offset_hist, borrowed.offset_hist,
"offset_hist must match after both scans",
);
assert!(
borrowed_seqs.iter().any(|s| matches!(s, Seq::Triple(..))),
"pattern must yield at least one match to make the check meaningful",
);
}
#[test]
fn with_params_threads_through_each_field() {
let m = FastKernelMatcher::with_params(16, 12, 5, 2);
assert_eq!(m.window_log, 16);
assert_eq!(m.hash_table.hash_log(), 12);
assert_eq!(m.hash_table.mls(), 5);
assert_eq!(m.max_window_size, 1usize << 16);
}
#[test]
fn window_size_reports_one_shifted_window_log() {
let m = FastKernelMatcher::with_params(16, 12, 5, 2);
assert_eq!(m.window_size(), 1u64 << 16);
let m = FastKernelMatcher::with_params(22, 14, 7, 2);
assert_eq!(m.window_size(), 1u64 << 22);
}
#[test]
fn last_committed_space_empty_before_commit() {
let m = FastKernelMatcher::new();
assert!(m.last_committed_space().is_empty());
}
#[test]
fn reset_clears_history_and_state() {
let mut m = FastKernelMatcher::new();
m.history.extend_from_slice(&[1, 2, 3, 4]);
m.prefix_start_index = 7;
m.rep = [42, 99];
m.offset_hist = [10, 20, 30];
m.pending = Some(alloc::vec![5, 6, 7]);
m.reset(
FAST_LEVEL_1_WINDOW_LOG,
FAST_LEVEL_1_HASH_LOG,
FAST_LEVEL_1_MLS,
2,
false,
false,
);
assert_eq!(m.history.len(), HISTORY_DRAIN_BASE);
assert_eq!(m.prefix_start_index, INITIAL_PREFIX_START_INDEX);
assert_eq!(m.rep, FAST_INITIAL_REP);
assert_eq!(m.offset_hist, FAST_INITIAL_OFFSET_HIST);
assert!(m.pending.is_none());
assert_eq!(m.hash_table.hash_log(), FAST_LEVEL_1_HASH_LOG);
assert_eq!(m.hash_table.mls(), FAST_LEVEL_1_MLS);
}
#[test]
fn reset_with_changed_params_rebuilds_hash_table() {
let mut m = FastKernelMatcher::new();
m.reset(16, 10, 4, 2, false, false);
assert_eq!(m.hash_table.hash_log(), 10);
assert_eq!(m.hash_table.mls(), 4);
assert_eq!(m.window_log, 16);
assert_eq!(m.max_window_size, 1usize << 16);
}
#[test]
fn reset_keeps_table_when_overwritten_by_restore() {
let mut m = FastKernelMatcher::new();
m.reset(16, 10, 4, 2, false, false);
let probe_hash = 7u32;
unsafe { m.hash_table.put(probe_hash, 0xCAFE) };
m.reset(16, 10, 4, 2, false, true);
assert_eq!(
unsafe { m.hash_table.get(probe_hash) },
0xCAFE,
"restore-pending reset must not clear the table"
);
m.reset(16, 10, 4, 2, false, false);
assert_eq!(
unsafe { m.hash_table.get(probe_hash) },
0,
"plain reset must clear the table"
);
unsafe { m.hash_table.put(probe_hash, 0xCAFE) };
m.reset(16, 11, 4, 2, false, true);
assert_eq!(m.hash_table.hash_log(), 11);
assert_eq!(
unsafe { m.hash_table.get(probe_hash) },
0,
"shape change must rebuild the table regardless of the flag"
);
}
#[test]
fn accept_then_start_matching_emits_match_for_repeated_run() {
let mut data = alloc::vec::Vec::with_capacity(64);
for i in 0..32u8 {
data.push(i.wrapping_mul(7).wrapping_add(13));
}
data.extend_from_within(0..32);
let mut m = FastKernelMatcher::with_params(12, 8, 4, 2);
m.accept_data(data.clone());
let mut emitted_match_lens: alloc::vec::Vec<usize> = alloc::vec::Vec::new();
let mut emitted_literal_byte_count: usize = 0;
let mut tail_byte_count: usize = 0;
m.start_matching(|seq| match seq {
Sequence::Triple {
literals,
offset: _,
match_len,
} => {
emitted_literal_byte_count += literals.len();
emitted_match_lens.push(match_len);
}
Sequence::Literals { literals } => {
tail_byte_count += literals.len();
}
});
let total_matched: usize = emitted_match_lens.iter().sum();
assert_eq!(
emitted_literal_byte_count + total_matched + tail_byte_count,
data.len(),
"all input bytes must be accounted for as literals/matches/tail",
);
assert!(
emitted_match_lens.iter().any(|&m| m >= 4),
"kernel must emit at least one Triple with match_len >= MIN_MATCH (got {emitted_match_lens:?})",
);
assert!(m.pending.is_none());
assert_eq!(m.history.len(), data.len() + HISTORY_DRAIN_BASE);
assert_eq!(m.last_committed_space(), data.as_slice());
}
#[test]
fn skip_matching_extends_history_without_emissions() {
let mut m = FastKernelMatcher::with_params(12, 8, 4, 2);
let pre_rep = m.rep;
let pre_offset_hist = m.offset_hist;
let payload: alloc::vec::Vec<u8> = (0..40u8).collect();
m.accept_data(payload.clone());
assert_eq!(m.last_committed_space().len(), payload.len());
m.skip_matching_with_hint(None);
assert_eq!(
m.history.len(),
payload.len() + HISTORY_DRAIN_BASE,
"skip_matching must append the pending buffer to history",
);
assert_eq!(m.rep, pre_rep, "skip must not touch rep state");
assert_eq!(
m.offset_hist, pre_offset_hist,
"skip must not touch offset_hist",
);
assert!(m.pending.is_none());
}
#[test]
fn cross_block_match_finds_first_block_payload() {
let mut block1 = alloc::vec::Vec::with_capacity(128);
for i in 0..128u8 {
block1.push(i.wrapping_mul(11).wrapping_add(5));
}
let mut block2 = alloc::vec::Vec::with_capacity(64);
block2.extend(0..16u8); block2.extend_from_slice(&block1[0..32]); block2.extend(200..216u8);
let mut m = FastKernelMatcher::with_params(12, 8, 4, 2);
m.accept_data(block1.clone());
m.start_matching(|_seq| {});
m.accept_data(block2.clone());
let mut max_match: usize = 0;
let mut saw_cross_block = false;
m.start_matching(|seq| {
if let Sequence::Triple {
offset, match_len, ..
} = seq
{
max_match = max_match.max(match_len);
if offset >= block2.len() {
saw_cross_block = true;
}
}
});
assert!(
saw_cross_block,
"block 2's matcher must find at least one cross-block match \
(max_len={max_match})",
);
assert_eq!(
m.history.len(),
block1.len() + block2.len() + HISTORY_DRAIN_BASE,
"history must hold both blocks after two start_matching calls",
);
}
#[test]
fn skip_matching_with_false_hint_populates_hashes_for_dict_priming() {
let mut dict_block = alloc::vec::Vec::with_capacity(32);
for i in 0..32u8 {
dict_block.push(i.wrapping_mul(13).wrapping_add(7));
}
let mut m = FastKernelMatcher::with_params(12, 8, 4, 2);
m.accept_data(dict_block.clone());
m.skip_matching_with_hint(Some(false));
assert_eq!(m.history.len(), dict_block.len() + HISTORY_DRAIN_BASE);
assert_eq!(m.prefix_start_index, INITIAL_PREFIX_START_INDEX);
let mut block2 = alloc::vec::Vec::with_capacity(48);
block2.extend(100..116u8);
block2.extend_from_slice(&dict_block[0..16]);
block2.extend(120..136u8);
m.accept_data(block2.clone());
let mut saw_cross_block = false;
m.start_matching(|seq| {
if let Sequence::Triple { offset, .. } = seq
&& offset >= block2.len()
{
saw_cross_block = true;
}
});
assert!(
saw_cross_block,
"skip_matching(Some(false)) must populate hashes so block 2 \
can match against the primed bytes",
);
}
#[test]
fn skip_matching_with_none_hint_skips_hash_population() {
let mut dict_block = alloc::vec::Vec::with_capacity(32);
for i in 0..32u8 {
dict_block.push(i.wrapping_mul(13).wrapping_add(7));
}
let mut m = FastKernelMatcher::with_params(12, 8, 4, 2);
m.accept_data(dict_block.clone());
m.skip_matching_with_hint(None);
let mut block2 = alloc::vec::Vec::with_capacity(48);
block2.extend(100..116u8);
block2.extend_from_slice(&dict_block[0..16]);
block2.extend(120..136u8);
m.accept_data(block2.clone());
let mut saw_cross_block = false;
m.start_matching(|seq| {
if let Sequence::Triple { offset, .. } = seq
&& offset >= block2.len()
{
saw_cross_block = true;
}
});
assert!(
!saw_cross_block,
"skip_matching(None) must NOT populate hashes — the legacy \
skip cost-savings only hold when future blocks are willing \
to miss matches in the skipped region",
);
}
#[test]
fn extend_history_drains_old_prefix_past_two_window_sizes() {
let mut m = FastKernelMatcher::with_params(8, 6, 4, 2);
for round in 0..3 {
let block: alloc::vec::Vec<u8> = (0..200u8)
.map(|i| i.wrapping_add(round as u8 * 17))
.collect();
m.accept_data(block);
m.skip_matching_with_hint(None);
}
assert!(
m.history.len() <= m.max_window_size * 2 + HISTORY_DRAIN_BASE,
"after eviction, REAL history must be bounded by 2× \
max_window_size (got history.len()={}, max_window_size={})",
m.history.len(),
m.max_window_size,
);
assert!(
m.history.len() <= m.max_window_size + 200 + HISTORY_DRAIN_BASE,
"post-append history = retained prefix (≤ max_window_size) \
+ last block (200 bytes); got {}",
m.history.len(),
);
assert_eq!(
m.prefix_start_index, INITIAL_PREFIX_START_INDEX,
"drain must rebase prefix_start_index to the baseline (1)",
);
}
#[test]
fn skip_matching_dict_prime_handles_exactly_hash_read_size_bytes() {
let mut m = FastKernelMatcher::with_params(12, 8, 4, 2);
let payload: alloc::vec::Vec<u8> = (0..8u8).collect();
m.accept_data(payload);
m.skip_matching_with_hint(Some(false));
assert_eq!(m.history.len(), 8 + HISTORY_DRAIN_BASE);
}
#[test]
fn skip_matching_dict_prime_handles_below_hash_read_size_bytes() {
let mut m = FastKernelMatcher::with_params(12, 8, 4, 2);
let payload: alloc::vec::Vec<u8> = (0..4u8).collect();
m.accept_data(payload);
m.skip_matching_with_hint(Some(false));
assert_eq!(m.history.len(), 4 + HISTORY_DRAIN_BASE);
}
#[test]
fn dict_prime_indexes_positions_across_chunk_seam() {
let mut m = FastKernelMatcher::with_params(20, 20, 4, 2);
let chunk1: alloc::vec::Vec<u8> = (0..16u8)
.map(|i| i.wrapping_mul(37).wrapping_add(13))
.collect();
m.accept_data(chunk1);
m.skip_matching_for_dict_prime();
let seam = m.history.len(); let chunk2: alloc::vec::Vec<u8> = (16..32u8)
.map(|i| i.wrapping_mul(37).wrapping_add(13))
.collect();
m.accept_data(chunk2);
m.skip_matching_for_dict_prime();
let p = seam - 4;
let dt = m.dict.table().expect("dict table primed");
let h = unsafe { dt.hash_ptr::<4>(m.history.as_ptr().add(p)) };
assert_eq!(
unsafe { dt.get(h) },
p as u32,
"seam-spanning position {p} must be indexed in the dict table",
);
}
#[test]
fn main_table_prime_indexes_positions_across_slice_seam() {
let mut m = FastKernelMatcher::with_params(20, 20, 4, 2);
let chunk1: alloc::vec::Vec<u8> = (0..16u8)
.map(|i| i.wrapping_mul(53).wrapping_add(7))
.collect();
m.accept_data(chunk1);
m.skip_matching_with_hint(Some(false));
let seam = m.history.len();
let chunk2: alloc::vec::Vec<u8> = (16..32u8)
.map(|i| i.wrapping_mul(53).wrapping_add(7))
.collect();
m.accept_data(chunk2);
m.skip_matching_with_hint(Some(false));
let p = seam - 4;
let h = unsafe { m.hash_table.hash_ptr::<4>(m.history.as_ptr().add(p)) };
assert_eq!(
unsafe { m.hash_table.get(h) },
p as u32,
"seam-spanning position {p} must be indexed in the main table",
);
}
#[test]
fn rep_tracks_last_explicit_offset_and_offset_hist_is_not_matched() {
let mut data = alloc::vec::Vec::with_capacity(96);
for i in 0..48u8 {
data.push(i.wrapping_mul(11).wrapping_add(3));
}
data.extend_from_within(0..48);
let mut m = FastKernelMatcher::with_params(12, 8, 4, 2);
m.accept_data(data.clone());
let mut emitted_offsets: alloc::vec::Vec<usize> = alloc::vec::Vec::new();
m.start_matching(|seq| {
if let Sequence::Triple {
offset, match_len, ..
} = seq
&& match_len >= 4
{
emitted_offsets.push(offset);
}
});
assert!(
!emitted_offsets.is_empty(),
"test setup must produce at least one explicit match",
);
let last_explicit = emitted_offsets[emitted_offsets.len() - 1];
assert_eq!(
m.rep[0] as usize, last_explicit,
"kernel's rep[0] must reflect the last emitted explicit offset",
);
assert_eq!(
m.offset_hist, FAST_INITIAL_OFFSET_HIST,
"Fast matching must not mutate offset_hist (it is seeded only \
by reset / prime_offset_history; the wire offset coding runs \
downstream against the encode pipeline's own history)",
);
}
#[test]
fn eviction_during_dict_priming_drops_stale_prime_entries() {
let mut m = FastKernelMatcher::with_params(8, 6, 4, 2);
let block1: alloc::vec::Vec<u8> = (0..200u8).collect();
m.accept_data(block1);
m.skip_matching_with_hint(Some(false));
let block2: alloc::vec::Vec<u8> = (0..200u8).map(|i| i.wrapping_add(50)).collect();
m.accept_data(block2);
m.skip_matching_with_hint(Some(false));
let block3: alloc::vec::Vec<u8> = (0..200u8).map(|i| i.wrapping_add(100)).collect();
m.accept_data(block3);
m.skip_matching_with_hint(Some(false));
assert_eq!(
m.prefix_start_index, INITIAL_PREFIX_START_INDEX,
"drain must rebase prefix_start_index to the baseline (1)",
);
assert!(m.history.len() <= m.max_window_size * 2);
}
#[test]
fn accept_data_evicts_eagerly_so_commit_observes_byte_delta() {
let mut m = FastKernelMatcher::with_params(8, 6, 4, 2);
m.accept_data((0..200u8).collect());
m.skip_matching_with_hint(None);
assert_eq!(m.history.len(), 200 + HISTORY_DRAIN_BASE);
assert_eq!(
m.prefix_start_index, INITIAL_PREFIX_START_INDEX,
"no eviction yet"
);
m.accept_data((0..200u8).map(|i| i.wrapping_add(50)).collect());
m.skip_matching_with_hint(None);
assert_eq!(m.history.len(), 400 + HISTORY_DRAIN_BASE);
assert_eq!(
m.prefix_start_index, INITIAL_PREFIX_START_INDEX,
"still no eviction (400 < 512)",
);
let pre = m.history_len_for_eviction_accounting();
m.accept_data((0..200u8).map(|i| i.wrapping_add(100)).collect());
let post = m.history_len_for_eviction_accounting();
assert!(
pre > post,
"accept_data must shrink history at the eviction threshold \
(pre={pre}, post={post}) — driver's commit_space relies on \
this delta for retire_dictionary_budget accounting",
);
assert_eq!(
post, 256,
"post-eviction retained must equal max_window_size",
);
assert_eq!(
m.prefix_start_index, INITIAL_PREFIX_START_INDEX,
"drain rebases prefix_start_index to INITIAL_PREFIX_START_INDEX \
— eviction is proven by the history.len() shrink above",
);
}
#[test]
fn trim_to_window_keeps_last_committed_space_consistent() {
let mut m = FastKernelMatcher::with_params(8, 6, 4, 2);
let payload: alloc::vec::Vec<u8> = (0..200u8).collect();
m.accept_data(payload);
m.skip_matching_with_hint(None);
assert_eq!(m.last_block_start, HISTORY_DRAIN_BASE);
assert_eq!(m.history.len(), 200 + HISTORY_DRAIN_BASE);
let payload2: alloc::vec::Vec<u8> = (50..150u8).collect();
m.accept_data(payload2);
m.skip_matching_with_hint(None);
assert_eq!(m.last_block_start, HISTORY_DRAIN_BASE + 200);
assert_eq!(m.history.len(), 300 + HISTORY_DRAIN_BASE);
m.max_window_size = 64;
let drained = m.trim_to_window();
assert_eq!(
drained,
300 - 64,
"trim must drain REAL history down to max_window_size = 64",
);
assert_eq!(m.history.len(), 64 + HISTORY_DRAIN_BASE);
let last = m.last_committed_space();
assert!(
last.len() <= 64,
"last_committed_space must be in-bounds after trim \
(got len {})",
last.len(),
);
}
#[test]
fn prime_offset_history_keeps_rep_and_offset_hist_in_lockstep() {
let mut m = FastKernelMatcher::with_params(12, 8, 4, 2);
assert_eq!(m.rep, FAST_INITIAL_REP);
assert_eq!(m.offset_hist, FAST_INITIAL_OFFSET_HIST);
let primed = [9u32, 4, 8];
m.prime_offset_history(primed);
assert_eq!(
m.offset_hist, primed,
"offset_hist must be updated by prime_offset_history",
);
assert_eq!(
m.rep,
[primed[0], primed[1]],
"rep[0..2] must mirror offset_hist[0..2] post-prime \
(kernel's repcode decisions must match the wire \
encoder's seeded history)",
);
}
#[test]
fn accept_data_evicts_more_aggressively_when_block_larger_than_window() {
let mut m = FastKernelMatcher::with_params(8, 6, 4, 2);
let preamble: alloc::vec::Vec<u8> = (0..256u32).map(|i| i as u8).collect();
m.accept_data(preamble);
m.skip_matching_with_hint(None);
assert_eq!(
m.history_len_for_eviction_accounting(),
256,
"history pre-filled to one full window of real bytes",
);
let oversize: alloc::vec::Vec<u8> = (0..400u32)
.map(|i| (i as u8).wrapping_mul(7).wrapping_add(11))
.collect();
m.accept_data(oversize);
let real_len_after = m.history_len_for_eviction_accounting();
m.start_matching(|_| {});
let real_total = m.history_len_for_eviction_accounting();
assert!(
real_total <= m.max_window_size * 2,
"post-append history MUST stay within 2 × max_window_size \
(got real_total={real_total}, cap={})",
m.max_window_size * 2,
);
assert!(
real_len_after < 256,
"pre-append drain must have shed historical bytes \
(got real_len_after_drain={real_len_after}, was 256 \
before accept)",
);
}
#[test]
fn start_matching_enforces_max_window_size_offset_bound() {
let mut m = FastKernelMatcher::with_params(7, 8, 4, 2);
let max_window = m.max_window_size;
assert_eq!(max_window, 128, "test assumes window_log=7");
let block1: alloc::vec::Vec<u8> = (0..200u8).map(|i| 0x30 + (i % 64)).collect();
m.accept_data(block1.clone());
m.start_matching(|_| {});
let block2 = block1.clone();
m.accept_data(block2);
let mut max_emitted_offset = 0usize;
let mut emitted_match_count = 0usize;
m.start_matching(|seq| {
if let Sequence::Triple {
offset, match_len, ..
} = seq
&& match_len > 0
{
emitted_match_count += 1;
if offset > max_emitted_offset {
max_emitted_offset = offset;
}
}
});
assert!(
emitted_match_count > 0,
"fixture must produce at least one Triple match — \
history.len()=~400, max_window=128, block2 is identical to block1",
);
assert!(
max_emitted_offset <= max_window,
"sliding floor MUST cap emitted offsets at max_window_size; \
got max emitted offset {} vs max_window_size {}",
max_emitted_offset,
max_window,
);
}
#[test]
fn start_matching_caps_offsets_at_window_log_not_inflated_max() {
let mut m = FastKernelMatcher::with_params(7, 8, 4, 2);
let advertised_window: usize = 1 << m.window_log;
assert_eq!(advertised_window, 128, "test assumes window_log=7");
let dict: alloc::vec::Vec<u8> = (0..200u8).map(|i| 0x40 + (i % 4)).collect();
m.accept_data(dict.clone());
m.start_matching(|_| {}); m.max_window_size = m.max_window_size.saturating_add(200);
let block: alloc::vec::Vec<u8> = (0..100u8).map(|i| 0x40 + (i % 4)).collect();
m.accept_data(block);
let mut max_emitted_offset = 0usize;
let mut emitted_match_count = 0usize;
m.start_matching(|seq| {
if let Sequence::Triple {
offset, match_len, ..
} = seq
&& match_len > 0
{
emitted_match_count += 1;
if offset > max_emitted_offset {
max_emitted_offset = offset;
}
}
});
assert!(
emitted_match_count > 0,
"fixture must produce at least one match — block content \
repeats dict, history.len() ≈ 300, scan should find at \
least one Triple",
);
assert!(
max_emitted_offset <= advertised_window,
"sliding floor MUST cap emitted offsets at the ADVERTISED \
frame window (1 << window_log = {}), NOT the inflated \
max_window_size; got max emitted offset {}",
advertised_window,
max_emitted_offset,
);
}
#[test]
fn block_zero_prologue_preserves_default_rep_offset_one() {
let mut data = alloc::vec::Vec::with_capacity(200);
data.push(0x01);
data.resize(200, 0x42);
let mut m = FastKernelMatcher::with_params(12, 8, 4, 2);
m.accept_data(data.clone());
let mut first_literals_len: Option<usize> = None;
let mut first_offset: Option<usize> = None;
m.start_matching(|seq| {
if first_literals_len.is_some() {
return;
}
if let Sequence::Triple {
literals, offset, ..
} = seq
{
first_literals_len = Some(literals.len());
first_offset = Some(offset);
}
});
assert_eq!(
first_offset,
Some(1),
"first emit must reference offset=1 — upstream zstd's default \
rep_offset1=1 fires on rep-at-ip2 at iter 1, and the \
prologue MUST NOT zero it (max_rep computed against \
window_low=0 at block 0, NOT against the sentinel \
prefix=1)",
);
assert_eq!(
first_literals_len,
Some(2),
"first emit must have a 2-byte literal prefix \
([0x01, 0x42]) — the rep-at-ip2 probe lands at ip2=3, \
then the one-byte backward extension drops new_ip to 2, \
so literals = data[0..2]. A different prefix length \
would indicate the explicit-match catch-up fired instead",
);
}
}