use super::count::count_forward;
use super::hash_table::FastHashTable;
use crate::encoding::Sequence;
const SEARCH_STRENGTH: usize = 6;
const K_STEP_INCR: usize = 1 << (SEARCH_STRENGTH - 1);
const HASH_READ_SIZE: usize = 8;
#[inline(always)]
unsafe fn read32(ptr: *const u8) -> u32 {
unsafe { core::ptr::read_unaligned(ptr.cast::<u32>()) }
}
const CMOV_DUMMY: [u8; 4] = [0x12, 0x34, 0x56, 0x78];
#[inline(always)]
unsafe fn match_found<const USE_CMOV: bool>(
ip: *const u8,
base: *const u8,
match_idx: u32,
prefix_start_index: u32,
ip_pos: usize,
data_len: usize,
) -> bool {
let match_pos = match_idx as usize;
if match_pos.checked_add(4).is_none_or(|end| end > data_len) {
return false;
}
if match_pos >= ip_pos {
return false;
}
if USE_CMOV {
let in_range = match_idx >= prefix_start_index;
let mval_addr = if in_range {
unsafe { base.add(match_pos) }
} else {
CMOV_DUMMY.as_ptr()
};
let bytes_match = unsafe { read32(ip) == read32(mval_addr) };
#[allow(clippy::needless_bitwise_bool)]
let r = bytes_match & in_range;
r
} else {
if match_idx < prefix_start_index {
return false;
}
unsafe { read32(ip) == read32(base.add(match_pos)) }
}
}
pub(crate) struct FastBlockResult {
pub(crate) rep: [u32; 2],
pub(crate) tail_literals_len: usize,
}
#[inline(always)]
pub(crate) fn compress_block_fast<const MLS: u32, const USE_CMOV: bool>(
data: &[u8],
block_start: usize,
prefix_start_index: u32,
hash_table: &mut FastHashTable,
rep: [u32; 2],
step_size: usize,
mut handle_sequence: impl for<'a> FnMut(Sequence<'a>),
) -> FastBlockResult {
assert!(
step_size >= 2,
"Fast kernel requires step_size >= 2 (got {step_size}); \
the donor formula clamps to a min of 2",
);
assert!(
(4..=8).contains(&MLS),
"Fast kernel only supports MLS in 4..=8 (got {MLS})",
);
assert_eq!(
MLS,
hash_table.mls(),
"compress_block_fast<{MLS}> called with hash_table whose mls = {}; \
the table's hash formula must match the kernel's monomorphised mls",
hash_table.mls(),
);
assert!(
block_start <= data.len(),
"block_start ({block_start}) must not exceed data.len() ({})",
data.len(),
);
assert!(
data.len() <= u32::MAX as usize,
"FastKernel does not support data.len() ({}) > u32::MAX ({}); \
the kernel stores absolute positions in a u32 hash table and \
u32 offset codes, so larger inputs would silently truncate",
data.len(),
u32::MAX,
);
if data.len() < block_start + HASH_READ_SIZE {
return FastBlockResult {
rep,
tail_literals_len: data.len() - block_start,
};
}
let base = data.as_ptr();
let iend_addr = data.len();
let ilimit = iend_addr - HASH_READ_SIZE;
let mut anchor: usize = block_start;
let mut ip0: usize = block_start;
if ip0 == 0 {
ip0 = 1;
}
let mut rep_offset1: u32 = rep[0];
let mut rep_offset2: u32 = rep[1];
let mut offset_saved1: u32 = 0;
let mut offset_saved2: u32 = 0;
{
let max_rep = (ip0 as u32).saturating_sub(prefix_start_index);
if rep_offset2 > max_rep {
offset_saved2 = rep_offset2;
rep_offset2 = 0;
}
if rep_offset1 > max_rep {
offset_saved1 = rep_offset1;
rep_offset1 = 0;
}
}
let mut step: usize = step_size;
let mut next_step: usize = ip0.saturating_add(K_STEP_INCR);
'restart: while ip0 < ilimit {
let mut ip1 = ip0 + 1;
let Some(mut ip2) = ip0.checked_add(step) else {
break;
};
let Some(mut ip3) = ip2.checked_add(1) else {
break;
};
if ip3 > ilimit {
break;
}
let mut hash0 = unsafe { hash_table.hash_ptr::<MLS>(base.add(ip0)) };
let mut hash1 = unsafe { hash_table.hash_ptr::<MLS>(base.add(ip1)) };
let mut match_idx = unsafe { hash_table.get(hash0) };
enum MatchFound {
Rep {
new_ip: usize,
match0: usize,
m_len: usize,
current0: usize,
},
Explicit {
new_ip: usize,
match_idx: u32,
current0: usize,
},
}
let found: Option<MatchFound> = loop {
let rval = if rep_offset1 > 0 {
unsafe { read32(base.add(ip2 - rep_offset1 as usize)) }
} else {
0
};
unsafe { hash_table.put(hash0, ip0 as u32) };
if rep_offset1 > 0 && unsafe { read32(base.add(ip2)) } == rval {
let mut new_ip = ip2;
let mut match0 = new_ip - rep_offset1 as usize;
let mut m_len: usize = 4;
if new_ip > anchor
&& match0 > prefix_start_index as usize
&& data[new_ip - 1] == data[match0 - 1]
{
new_ip -= 1;
match0 -= 1;
m_len += 1;
}
unsafe { hash_table.put(hash1, ip1 as u32) };
break Some(MatchFound::Rep {
new_ip,
match0,
m_len,
current0: ip0,
});
}
if unsafe {
match_found::<USE_CMOV>(
base.add(ip0),
base,
match_idx,
prefix_start_index,
ip0,
iend_addr,
)
} {
unsafe { hash_table.put(hash1, ip1 as u32) };
break Some(MatchFound::Explicit {
new_ip: ip0,
match_idx,
current0: ip0,
});
}
match_idx = unsafe { hash_table.get(hash1) };
hash0 = hash1;
hash1 = unsafe { hash_table.hash_ptr::<MLS>(base.add(ip2)) };
ip0 = ip1;
ip1 = ip2;
ip2 = ip3;
unsafe { hash_table.put(hash0, ip0 as u32) };
if unsafe {
match_found::<USE_CMOV>(
base.add(ip0),
base,
match_idx,
prefix_start_index,
ip0,
iend_addr,
)
} {
if step <= 4 {
unsafe { hash_table.put(hash1, ip1 as u32) };
}
break Some(MatchFound::Explicit {
new_ip: ip0,
match_idx,
current0: ip0,
});
}
match_idx = unsafe { hash_table.get(hash1) };
hash0 = hash1;
hash1 = unsafe { hash_table.hash_ptr::<MLS>(base.add(ip2)) };
ip0 = ip1;
ip1 = ip2;
let Some(new_ip2) = ip0.checked_add(step) else {
break None;
};
let Some(new_ip3) = ip1.checked_add(step) else {
break None;
};
ip2 = new_ip2;
ip3 = new_ip3;
if ip2 >= next_step {
step += 1;
next_step = next_step.saturating_add(K_STEP_INCR);
}
if ip3 > ilimit {
break None;
}
};
let Some(found) = found else {
break 'restart;
};
let current0 = match found {
MatchFound::Rep { current0, .. } => current0,
MatchFound::Explicit { current0, .. } => current0,
};
let (mut match_ip, mut match_pos, mut m_len, offset, is_rep) = match found {
MatchFound::Rep {
new_ip,
match0,
m_len,
current0: _,
} => (new_ip, match0, m_len, rep_offset1 as usize, true),
MatchFound::Explicit {
new_ip,
match_idx,
current0: _,
} => {
let match_pos = match_idx as usize;
let offset = new_ip - match_pos;
rep_offset2 = rep_offset1;
rep_offset1 = offset as u32;
(new_ip, match_pos, 4usize, offset, false)
}
};
if !is_rep {
while match_ip > anchor
&& match_pos > prefix_start_index as usize
&& data[match_ip - 1] == data[match_pos - 1]
{
match_ip -= 1;
match_pos -= 1;
m_len += 1;
}
}
let forward = unsafe {
count_forward(
base.add(match_ip + m_len),
base.add(match_pos + m_len),
base.add(iend_addr),
)
};
m_len += forward;
let literals = &data[anchor..match_ip];
handle_sequence(Sequence::Triple {
literals,
offset,
match_len: m_len,
});
ip0 = match_ip + m_len;
anchor = ip0;
if ip0 <= ilimit {
let in_range_fwd = current0
.checked_add(2)
.and_then(|c| c.checked_add(HASH_READ_SIZE))
.is_some_and(|end| end <= iend_addr);
if in_range_fwd {
let current0_plus_2 = current0 + 2;
let h_fwd = unsafe { hash_table.hash_ptr::<MLS>(base.add(current0_plus_2)) };
unsafe { hash_table.put(h_fwd, current0_plus_2 as u32) };
}
if ip0 >= 2 {
let h_back = unsafe { hash_table.hash_ptr::<MLS>(base.add(ip0 - 2)) };
unsafe { hash_table.put(h_back, (ip0 - 2) as u32) };
}
while rep_offset2 > 0
&& ip0 <= ilimit
&& ip0 >= rep_offset2 as usize
&& unsafe { read32(base.add(ip0)) == read32(base.add(ip0 - rep_offset2 as usize)) }
{
let r_off = rep_offset2 as usize;
let r_extra = unsafe {
count_forward(
base.add(ip0 + 4),
base.add(ip0 + 4 - r_off),
base.add(iend_addr),
)
};
let r_len = 4 + r_extra;
core::mem::swap(&mut rep_offset1, &mut rep_offset2);
let h_at = unsafe { hash_table.hash_ptr::<MLS>(base.add(ip0)) };
unsafe { hash_table.put(h_at, ip0 as u32) };
handle_sequence(Sequence::Triple {
literals: &data[anchor..ip0],
offset: r_off,
match_len: r_len,
});
ip0 += r_len;
anchor = ip0;
}
}
step = step_size;
next_step = ip0.saturating_add(K_STEP_INCR);
continue 'restart;
}
if offset_saved1 != 0 && rep_offset1 != 0 {
offset_saved2 = offset_saved1;
}
let final_rep = [
if rep_offset1 != 0 {
rep_offset1
} else {
offset_saved1
},
if rep_offset2 != 0 {
rep_offset2
} else {
offset_saved2
},
];
FastBlockResult {
rep: final_rep,
tail_literals_len: data.len() - anchor,
}
}
#[cfg(test)]
mod tests {
use super::*;
use alloc::vec;
use alloc::vec::Vec;
fn run_block(
data: &[u8],
hash_log: u32,
mls: u32,
) -> (Vec<(Vec<u8>, usize, usize)>, FastBlockResult) {
let mut table = FastHashTable::new(hash_log, mls);
let mut tuples: Vec<(Vec<u8>, usize, usize)> = Vec::new();
let mut handle = |seq: Sequence<'_>| match seq {
Sequence::Triple {
literals,
offset,
match_len,
} => {
tuples.push((literals.to_vec(), offset, match_len));
}
Sequence::Literals { literals } => {
tuples.push((literals.to_vec(), 0, 0));
}
};
let result = match mls {
4 => compress_block_fast::<4, false>(data, 0, 0, &mut table, [0, 0], 2, &mut handle),
5 => compress_block_fast::<5, false>(data, 0, 0, &mut table, [0, 0], 2, &mut handle),
_ => panic!("test helper only supports mls=4 and mls=5"),
};
let acct: usize = tuples
.iter()
.map(|(lits, _off, mlen)| lits.len() + mlen)
.sum::<usize>()
+ result.tail_literals_len;
assert_eq!(acct, data.len(), "kernel must account for every input byte",);
(tuples, result)
}
#[test]
fn short_input_reports_tail_without_emission() {
let data = [1u8, 2, 3, 4, 5];
let (tuples, result) = run_block(&data, 8, 4);
assert!(
tuples.is_empty(),
"kernel must NOT emit sequences for short inputs (got {tuples:?})",
);
assert_eq!(result.tail_literals_len, data.len());
}
#[test]
fn finds_long_repeat_in_simple_pattern() {
let mut data = Vec::new();
data.extend_from_slice(b"ABCDEFGHIJKLMNOP");
data.extend_from_slice(b"ABCDEFGHIJKLMNOP");
data.extend_from_slice(b"________");
let (tuples, _result) = run_block(&data, 12, 4);
let triple = tuples
.iter()
.find(|(_, _, m)| *m > 0)
.expect("kernel must emit at least one Triple for the repeated half");
assert!(
triple.2 >= 4,
"match_len must be ≥ MIN_MATCH=4 (got {})",
triple.2,
);
assert!(
triple.1 > 0,
"explicit-offset match must have offset > 0 (got {})",
triple.1,
);
}
fn run_block_with_rep(
data: &[u8],
hash_log: u32,
rep: [u32; 2],
) -> (Vec<(Vec<u8>, usize, usize)>, FastBlockResult) {
let mut table = FastHashTable::new(hash_log, 4);
let mut tuples: Vec<(Vec<u8>, usize, usize)> = Vec::new();
let mut handle = |seq: Sequence<'_>| match seq {
Sequence::Triple {
literals,
offset,
match_len,
} => tuples.push((literals.to_vec(), offset, match_len)),
Sequence::Literals { literals } => tuples.push((literals.to_vec(), 0, 0)),
};
let result = compress_block_fast::<4, false>(data, 0, 0, &mut table, rep, 2, &mut handle);
let acct: usize = tuples
.iter()
.map(|(lits, _off, mlen)| lits.len() + mlen)
.sum::<usize>()
+ result.tail_literals_len;
assert_eq!(acct, data.len(), "kernel must account for every input byte");
(tuples, result)
}
#[test]
fn repcode_match_emits_with_rep_offset_one() {
let data = vec![0x42u8; 64];
let (tuples, _) = run_block_with_rep(&data, 8, [1, 4]);
let rep_triple = tuples
.iter()
.find(|(_, off, m)| *off == 1 && *m > 0)
.unwrap_or_else(|| panic!("repcode Triple at offset=1 expected, got {tuples:?}"));
assert!(
rep_triple.2 >= 4,
"match_len must be ≥ MIN_MATCH=4 (got {})",
rep_triple.2,
);
assert!(
rep_triple.2 >= 32,
"uniform-byte rep extension must consume most of the buffer, got {}",
rep_triple.2,
);
}
#[test]
fn explicit_match_backward_extension_extends_by_marker_byte() {
let mut data: Vec<u8> = (0..15u8).collect();
data.push(b'Z');
data.extend_from_slice(b"AAAAAAAA");
for i in 0..8u8 {
data.push(0x80 + i);
}
data.push(b'Z');
data.extend_from_slice(b"AAAAAAAA");
for i in 0..16u8 {
data.push(0x40 + (i % 16));
}
let (tuples, _) = run_block_with_rep(&data, 12, [0, 0]);
let triple = tuples
.iter()
.find(|(_, _, m)| *m > 0)
.unwrap_or_else(|| panic!("expected an explicit-match Triple, got {tuples:?}"));
assert!(
triple.2 >= 5,
"expected match_len ≥ 5 from backward extension (got {})",
triple.2,
);
assert!(
!triple.0.ends_with(b"Z"),
"backward extension must consume the 'Z' marker (literals = {:?})",
triple.0,
);
}
#[test]
fn prefix_start_index_filter_rejects_below_window() {
let data = vec![0xAAu8; 64];
let mut table = FastHashTable::new(8, 4);
let h = unsafe { table.hash_ptr::<4>(data.as_ptr().add(1)) };
unsafe { table.put(h, 0) };
let mut tuples: Vec<(Vec<u8>, usize, usize)> = Vec::new();
let mut handle = |seq: Sequence<'_>| match seq {
Sequence::Triple {
literals,
offset,
match_len,
} => tuples.push((literals.to_vec(), offset, match_len)),
Sequence::Literals { literals } => tuples.push((literals.to_vec(), 0, 0)),
};
let _ = compress_block_fast::<4, false>(&data, 0, 5, &mut table, [0, 0], 2, &mut handle);
let mut anchor: usize = 0;
for (lits, off, m) in &tuples {
if *m > 0 {
let match_start = anchor + lits.len();
let match_src = match_start
.checked_sub(*off)
.expect("offset must not exceed match_start (would wrap)");
assert!(
match_src >= 5,
"match source {match_src} below prefix_start_index=5 \
(match_start={match_start}, offset={off})",
);
anchor = match_start + m;
} else {
anchor += lits.len();
}
}
}
#[test]
fn match_found_rejects_stale_forward_entry() {
let data = vec![0u8; 200];
let mut table = FastHashTable::new(8, 4);
let h = unsafe { table.hash_ptr::<4>(data.as_ptr().add(1)) };
unsafe { table.put(h, 150) };
let mut tuples: Vec<(Vec<u8>, usize, usize)> = Vec::new();
let mut handle = |seq: Sequence<'_>| match seq {
Sequence::Triple {
literals,
offset,
match_len,
} => tuples.push((literals.to_vec(), offset, match_len)),
Sequence::Literals { literals } => tuples.push((literals.to_vec(), 0, 0)),
};
let _ = compress_block_fast::<4, false>(&data, 0, 0, &mut table, [0, 0], 2, &mut handle);
for (_, off, m) in &tuples {
if *m > 0 {
assert!(
*off > 0 && *off <= data.len(),
"every emitted offset must reference an in-buffer backward position (got {off})",
);
}
}
}
#[test]
fn block_exactly_hash_read_size_emits_no_sequences() {
let data = [1u8, 2, 3, 4, 5, 6, 7, 8];
let (tuples, result) = run_block_with_rep(&data, 8, [0, 0]);
assert!(
tuples.is_empty(),
"exactly HASH_READ_SIZE bytes must produce no main-loop iterations",
);
assert_eq!(result.tail_literals_len, data.len());
}
#[test]
fn block_just_below_hash_read_size_emits_no_sequences() {
let data = [1u8, 2, 3, 4, 5, 6, 7];
let (tuples, result) = run_block_with_rep(&data, 8, [0, 0]);
assert!(tuples.is_empty());
assert_eq!(result.tail_literals_len, data.len());
}
#[test]
fn rep_offset_save_restore_when_out_of_range() {
let mut data = vec![0u8; 64];
let mut state = 0x1234_5678u32;
for byte in &mut data {
state ^= state << 13;
state ^= state >> 17;
state ^= state << 5;
*byte = state as u8;
}
let huge = 9999;
let mut table = FastHashTable::new(10, 4);
let mut tuples: Vec<(Vec<u8>, usize, usize)> = Vec::new();
let mut handle = |seq: Sequence<'_>| match seq {
Sequence::Triple {
literals,
offset,
match_len,
} => tuples.push((literals.to_vec(), offset, match_len)),
Sequence::Literals { literals } => tuples.push((literals.to_vec(), 0, 0)),
};
let result =
compress_block_fast::<4, false>(&data, 0, 0, &mut table, [huge, 7], 2, &mut handle);
assert_eq!(
result.rep[0], huge,
"out-of-range rep_offset1 must be restored verbatim across the block",
);
assert_eq!(result.rep[1], 7, "rep_offset2 (also stashed) must restore");
}
#[test]
fn cmov_variant_matches_branch_variant_output() {
let mut data = alloc::vec::Vec::new();
for i in 0..512u32 {
data.push((i & 0xFF) as u8);
}
let tail = data[0..64].to_vec();
data.extend_from_slice(&tail);
let collect = |use_cmov: bool| -> alloc::vec::Vec<(alloc::vec::Vec<u8>, usize, usize)> {
let mut table = FastHashTable::new(12, 4);
let mut tuples = alloc::vec::Vec::new();
let mut handle = |seq: Sequence<'_>| match seq {
Sequence::Triple {
literals,
offset,
match_len,
} => {
tuples.push((literals.to_vec(), offset, match_len));
}
Sequence::Literals { literals } => {
tuples.push((literals.to_vec(), 0, 0));
}
};
if use_cmov {
let _ =
compress_block_fast::<4, true>(&data, 0, 0, &mut table, [0, 0], 2, &mut handle);
} else {
let _ = compress_block_fast::<4, false>(
&data,
0,
0,
&mut table,
[0, 0],
2,
&mut handle,
);
}
tuples
};
let out_branch = collect(false);
let out_cmov = collect(true);
assert_eq!(
out_branch, out_cmov,
"cmov and branch variants must emit identical sequences"
);
}
#[test]
fn cmov_variant_rejects_out_of_window_when_ip_equals_dummy() {
let mut data: alloc::vec::Vec<u8> = alloc::vec![0xAA; 32];
data[16] = 0x12;
data[17] = 0x34;
data[18] = 0x56;
data[19] = 0x78;
let base = data.as_ptr();
let ip_pos = 16usize;
let ip = unsafe { base.add(ip_pos) };
let branch_result = unsafe { match_found::<false>(ip, base, 4, 10, ip_pos, data.len()) };
assert!(
!branch_result,
"branch variant must reject out-of-window match_idx"
);
let cmov_result = unsafe { match_found::<true>(ip, base, 4, 10, ip_pos, data.len()) };
assert!(
!cmov_result,
"cmov variant must reject out-of-window match_idx even when \
ip bytes coincide with CMOV_DUMMY",
);
}
}