use alloc::collections::VecDeque;
use alloc::vec::Vec;
use super::super::Sequence;
use super::super::blocks::encode_offset_with_history;
use super::super::cost_model::{HC_OPT_NUM, HcOptimalCostProfile};
use super::super::dict_attach::DictAttach;
use super::super::hc::HC_MIN_MATCH_LEN;
use super::super::opt::types::{HcOptimalSequence, MatchCandidate};
use super::helpers::INCOMPRESSIBLE_SKIP_STEP;
pub(crate) const STREAM_ABS_HEADROOM: usize = HC_OPT_NUM + 16;
pub(crate) const REBASE_RESET_FLOOR_CEILING: usize = usize::MAX >> 1;
#[inline]
pub(crate) fn check_stream_abs_headroom(
history_abs_start: usize,
window_size: usize,
data_len: usize,
) {
let _future_abs_end = history_abs_start
.checked_add(window_size)
.and_then(|p| p.checked_add(data_len))
.and_then(|p| p.checked_add(STREAM_ABS_HEADROOM))
.expect(
"structured-zstd: cumulative input would leave less than \
STREAM_ABS_HEADROOM (HC_OPT_NUM + 16) slack below \
`usize::MAX` for the encoder's absolute-position \
lookahead; on 32-bit targets, split the input into \
smaller frames or use a 64-bit build",
);
}
#[cfg(test)]
mod bt_pair_index_wrap_tests {
use super::MatchTable;
#[test]
fn bt_pair_index_matches_modulo_ring_after_wraparound() {
let mut table = MatchTable::new(1 << 20);
table.chain_log = 4;
table.index_shift = usize::MAX - 5;
let bt_mask = table.bt_mask();
assert_eq!(bt_mask, 0b0111);
for abs_pos in [0usize, 1, 4, 5, 6, 7, 8, 12, 17] {
let got = table.bt_pair_index_for_abs(abs_pos);
let expected = 2 * (abs_pos.wrapping_add(table.index_shift) & bt_mask);
assert_eq!(
got, expected,
"abs_pos={abs_pos}: wrapping_add ring slot must match the masked sum"
);
}
assert_eq!(table.bt_pair_index_for_abs(7), 2);
assert_eq!(table.bt_pair_index_for_abs(14), 0);
}
#[test]
fn bt_pair_index_matches_modulo_ring_without_overflow() {
let mut table = MatchTable::new(1 << 20);
table.chain_log = 8; table.index_shift = 17;
let bt_mask = table.bt_mask();
for abs_pos in [0usize, 1, 16, 32, 64, 127, 128, 255, 1 << 20] {
let got = table.bt_pair_index_for_abs(abs_pos);
let expected = 2 * ((abs_pos + table.index_shift) & bt_mask);
assert_eq!(got, expected, "abs_pos={abs_pos}");
}
}
}
#[cfg(test)]
mod stream_abs_headroom_tests {
use super::{STREAM_ABS_HEADROOM, check_stream_abs_headroom};
#[test]
fn accepts_exactly_at_the_boundary() {
let history_abs_start = usize::MAX - STREAM_ABS_HEADROOM - 2;
check_stream_abs_headroom(history_abs_start, 1, 1);
}
#[test]
fn accepts_well_below_the_boundary() {
check_stream_abs_headroom(0, 1 << 20, 1 << 20);
}
#[test]
#[should_panic(expected = "STREAM_ABS_HEADROOM")]
fn rejects_one_byte_past_the_boundary() {
let history_abs_start = usize::MAX - STREAM_ABS_HEADROOM - 1;
check_stream_abs_headroom(history_abs_start, 1, 1);
}
#[test]
#[should_panic(expected = "STREAM_ABS_HEADROOM")]
fn rejects_history_abs_start_already_too_high() {
check_stream_abs_headroom(usize::MAX - 10, 0, 0);
}
}
pub(crate) const HC_PRIME3BYTES: u32 = 506_832_829;
pub(crate) const HC_PRIME4BYTES: u32 = 2_654_435_761;
pub(crate) const HC_PRIME5BYTES: u64 = 889_523_592_379;
pub(crate) const HC_PRIME6BYTES: u64 = 227_718_039_650_203;
pub(crate) const HC_EMPTY: u32 = 0;
pub(crate) const HC_HASH_LOG: usize = 20;
pub(crate) const HC_CHAIN_LOG: usize = 19;
pub(crate) const HC3_HASH_LOG: usize = 17;
#[derive(Clone, Copy, PartialEq, Eq, Default, Debug)]
pub(crate) enum DmsDictLayout {
#[default]
None,
Hc,
Bt,
}
#[derive(Clone, Default)]
pub(crate) struct DmsDictTables {
pub(crate) layout: DmsDictLayout,
pub(crate) hash_table: Vec<u32>,
pub(crate) chain_table: Vec<u32>,
pub(crate) hash_log: usize,
pub(crate) mls: usize,
}
pub(crate) struct MatchTable {
pub(crate) max_window_size: usize,
pub(crate) chunk_lens: VecDeque<usize>,
pub(crate) window_size: usize,
pub(crate) history: Vec<u8>,
pub(crate) history_start: usize,
pub(crate) history_abs_start: usize,
pub(crate) position_base: usize,
pub(crate) index_shift: usize,
pub(crate) offset_hist: [u32; 3],
pub(crate) hash_table: Vec<u32>,
pub(crate) hash3_table: Vec<u32>,
pub(crate) chain_table: Vec<u32>,
pub(crate) hash_log: usize,
pub(crate) chain_log: usize,
pub(crate) hash3_log: usize,
pub(crate) next_to_update3: usize,
pub(crate) skip_insert_until_abs: usize,
pub(crate) dictionary_limit_abs: Option<usize>,
pub(crate) dictionary_primed_for_frame: bool,
pub(crate) allow_zero_relative_position: bool,
pub(crate) search_depth: usize,
pub(crate) is_btultra2: bool,
pub(crate) uses_bt: bool,
pub(crate) search_mls: usize,
pub(crate) dms: DictAttach<DmsDictTables>,
pub(crate) borrowed_input: Option<(*const u8, usize)>,
pub(crate) borrowed_block: Option<(usize, usize)>,
}
impl Clone for MatchTable {
fn clone(&self) -> Self {
Self {
max_window_size: self.max_window_size,
chunk_lens: self.chunk_lens.clone(),
window_size: self.window_size,
history: self.history.clone(),
history_start: self.history_start,
history_abs_start: self.history_abs_start,
position_base: self.position_base,
index_shift: self.index_shift,
offset_hist: self.offset_hist,
hash_table: self.hash_table.clone(),
hash3_table: self.hash3_table.clone(),
chain_table: self.chain_table.clone(),
hash_log: self.hash_log,
chain_log: self.chain_log,
hash3_log: self.hash3_log,
next_to_update3: self.next_to_update3,
skip_insert_until_abs: self.skip_insert_until_abs,
dictionary_limit_abs: self.dictionary_limit_abs,
dictionary_primed_for_frame: self.dictionary_primed_for_frame,
allow_zero_relative_position: self.allow_zero_relative_position,
search_depth: self.search_depth,
is_btultra2: self.is_btultra2,
uses_bt: self.uses_bt,
search_mls: self.search_mls,
dms: self.dms.clone(),
borrowed_input: self.borrowed_input,
borrowed_block: self.borrowed_block,
}
}
fn clone_from(&mut self, source: &Self) {
self.chunk_lens.clone_from(&source.chunk_lens);
self.history.clone_from(&source.history);
self.hash_table.clone_from(&source.hash_table);
self.hash3_table.clone_from(&source.hash3_table);
self.chain_table.clone_from(&source.chain_table);
self.dms.clone_from(&source.dms);
self.max_window_size = source.max_window_size;
self.window_size = source.window_size;
self.history_start = source.history_start;
self.history_abs_start = source.history_abs_start;
self.position_base = source.position_base;
self.index_shift = source.index_shift;
self.offset_hist = source.offset_hist;
self.hash_log = source.hash_log;
self.chain_log = source.chain_log;
self.hash3_log = source.hash3_log;
self.next_to_update3 = source.next_to_update3;
self.skip_insert_until_abs = source.skip_insert_until_abs;
self.dictionary_limit_abs = source.dictionary_limit_abs;
self.dictionary_primed_for_frame = source.dictionary_primed_for_frame;
self.allow_zero_relative_position = source.allow_zero_relative_position;
self.search_depth = source.search_depth;
self.is_btultra2 = source.is_btultra2;
self.uses_bt = source.uses_bt;
self.search_mls = source.search_mls;
self.borrowed_input = source.borrowed_input;
self.borrowed_block = source.borrowed_block;
}
}
impl MatchTable {
pub(crate) fn new(max_window_size: usize) -> Self {
Self {
max_window_size,
chunk_lens: VecDeque::new(),
window_size: 0,
history: Vec::new(),
history_start: 0,
history_abs_start: 0,
position_base: 0,
index_shift: 0,
offset_hist: [1, 4, 8],
hash_table: Vec::new(),
hash3_table: Vec::new(),
chain_table: Vec::new(),
hash_log: HC_HASH_LOG,
chain_log: HC_CHAIN_LOG,
hash3_log: HC3_HASH_LOG,
next_to_update3: 0,
skip_insert_until_abs: 0,
dictionary_limit_abs: None,
dictionary_primed_for_frame: false,
allow_zero_relative_position: false,
search_depth: 0,
is_btultra2: false,
uses_bt: false,
search_mls: 4,
dms: DictAttach::new(),
borrowed_input: None,
borrowed_block: None,
}
}
#[inline(always)]
pub(crate) fn can_skip_rebase_check_at(
&self,
abs_pos: usize,
max_abs_pos: usize,
is_btultra2: bool,
) -> bool {
let max_rel_no_rebase = (u32::MAX as usize).saturating_sub(2);
self.position_base == 0
&& self.index_shift == 0
&& max_abs_pos <= max_rel_no_rebase
&& (self.allow_zero_relative_position
|| abs_pos > self.history_abs_start
|| (is_btultra2 && abs_pos == self.history_abs_start))
}
#[inline]
pub(crate) fn needs_rebase(&self, abs_pos: usize, is_btultra2: bool) -> bool {
if is_btultra2
&& !self.allow_zero_relative_position
&& self.position_base == 0
&& abs_pos == 0
{
return false;
}
self.relative_position(abs_pos)
.is_none_or(|relative| relative >= u32::MAX - 1)
}
pub(crate) fn insert_hash3_only_no_rebase(&mut self, abs_pos: usize) {
if self.hash3_log == 0 {
return;
}
let idx = abs_pos - self.history_abs_start;
let concat = &self.history[self.history_start..];
if idx + 4 > concat.len() {
return;
}
let Some(relative_pos) = self.relative_position(abs_pos) else {
return;
};
let hash3 = Self::hash_position_at(concat, idx, self.hash3_log, 3);
self.hash3_table[hash3] = relative_pos + 1;
}
#[inline]
pub(crate) fn insert_position_no_rebase(&mut self, abs_pos: usize) {
let idx = abs_pos.wrapping_sub(self.history_abs_start);
let hash = {
let concat = self.live_history();
if idx + 4 > concat.len() {
return;
}
Self::hash_position_at(concat, idx, self.hash_log, 4)
};
let Some(relative_pos) = self.relative_position(abs_pos) else {
return;
};
let stored = relative_pos + 1;
let chain_mask = (1usize << self.chain_log) - 1;
let chain_idx = relative_pos as usize & chain_mask;
debug_assert!(hash < self.hash_table.len());
debug_assert!(chain_idx < self.chain_table.len());
unsafe {
let prev = *self.hash_table.get_unchecked(hash);
*self.chain_table.get_unchecked_mut(chain_idx) = prev;
*self.hash_table.get_unchecked_mut(hash) = stored;
}
}
pub(crate) fn ensure_tables(&mut self) {
let hash_size = 1 << self.hash_log;
if self.hash_table.len() != hash_size {
self.hash_table = alloc::vec![HC_EMPTY; hash_size];
}
let chain_size = 1 << self.chain_log;
if self.chain_table.len() != chain_size {
self.chain_table = alloc::vec![HC_EMPTY; chain_size];
}
let hash3_size = if self.hash3_log == 0 {
0
} else {
1 << self.hash3_log
};
if self.hash3_table.len() != hash3_size {
self.hash3_table = alloc::vec![HC_EMPTY; hash3_size];
}
}
#[inline(always)]
pub(crate) fn read_le_u32(data: &[u8]) -> u32 {
debug_assert!(data.len() >= 4);
unsafe { Self::read_le_u32_ptr(data.as_ptr()) }
}
#[inline(always)]
pub(crate) unsafe fn read_le_u32_ptr(ptr: *const u8) -> u32 {
unsafe { u32::from_le(core::ptr::read_unaligned(ptr as *const u32)) }
}
#[inline(always)]
pub(crate) unsafe fn read_le_u64_ptr(ptr: *const u8) -> u64 {
unsafe { u64::from_le(core::ptr::read_unaligned(ptr as *const u64)) }
}
#[inline(always)]
pub(crate) fn hash_value_with_mls(value: u32, hash_log: usize, mls: usize) -> usize {
match mls {
3 => (((value << 8).wrapping_mul(HC_PRIME3BYTES)) >> (32 - hash_log)) as usize,
_ => ((value.wrapping_mul(HC_PRIME4BYTES)) >> (32 - hash_log)) as usize,
}
}
#[inline(always)]
pub(crate) fn hash_value8_with_mls(value: u64, hash_log: usize, mls: usize) -> usize {
match mls {
6 => (((value << 16).wrapping_mul(HC_PRIME6BYTES)) >> (64 - hash_log)) as usize,
_ => (((value << 24).wrapping_mul(HC_PRIME5BYTES)) >> (64 - hash_log)) as usize,
}
}
#[inline(always)]
pub(crate) fn hash_position_with_mls(data: &[u8], hash_log: usize, mls: usize) -> usize {
if mls >= 5 {
debug_assert!(data.len() >= 8);
let value = unsafe { Self::read_le_u64_ptr(data.as_ptr()) };
return Self::hash_value8_with_mls(value, hash_log, mls);
}
let value = Self::read_le_u32(data);
Self::hash_value_with_mls(value, hash_log, mls)
}
#[inline(always)]
pub(crate) fn hash_position_at(data: &[u8], idx: usize, hash_log: usize, mls: usize) -> usize {
if mls >= 5 {
debug_assert!(idx + 8 <= data.len());
let value = unsafe { Self::read_le_u64_ptr(data.as_ptr().add(idx)) };
return Self::hash_value8_with_mls(value, hash_log, mls);
}
debug_assert!(idx + 4 <= data.len());
let value = unsafe { Self::read_le_u32_ptr(data.as_ptr().add(idx)) };
Self::hash_value_with_mls(value, hash_log, mls)
}
#[inline(always)]
pub(crate) fn hash_position(&self, data: &[u8]) -> usize {
Self::hash_position_with_mls(data, self.hash_log, 4)
}
#[cfg(test)]
pub(crate) fn hash3_position(data: &[u8], hash_log: usize) -> usize {
let value = Self::read_le_u32(data);
(((value << 8).wrapping_mul(HC_PRIME3BYTES)) >> (32 - hash_log)) as usize
}
pub(crate) fn mark_dictionary_primed(&mut self) {
self.dictionary_primed_for_frame = true;
}
pub(crate) fn set_dictionary_limit_from_primed_bytes(&mut self, primed_len: usize) {
self.dictionary_limit_abs = if primed_len == 0 {
None
} else {
Some(self.history_abs_start.saturating_add(primed_len))
};
}
pub(crate) fn prime_dms_bt(&mut self, region_len: usize) {
let mls = self.search_mls;
let read = if mls >= 5 { 8 } else { 4 };
let region = region_len.min(self.live_history().len());
if region < read {
self.dms.invalidate();
return;
}
let dms_hash_log =
(usize::BITS - (region - 1).leading_zeros()).clamp(10, self.hash_log as u32) as usize;
if self.dms.is_primed()
&& self.dms.region_len() == region
&& self.dms.table().is_some_and(|t| {
t.layout == DmsDictLayout::Bt && t.mls == mls && t.hash_log == dms_hash_log
})
{
return;
}
const DMS_BUILD_DEPTH: usize = 1 << 9;
let (mut hash_table, mut chain_table) = match self.dms.table_mut() {
Some(t) => (
core::mem::take(&mut t.hash_table),
core::mem::take(&mut t.chain_table),
),
None => (Vec::new(), Vec::new()),
};
hash_table.clear();
hash_table.resize(1usize << dms_hash_log, 0u32);
chain_table.clear();
chain_table.resize(2 * region, 0u32);
let concat = self.live_history();
let mut current = 0usize;
while current + read <= region {
let h = Self::hash_position_at(concat, current, dms_hash_log, mls);
let mut match_packed = hash_table[h];
hash_table[h] = (current + 1) as u32;
let mut smaller_slot = 2 * current;
let mut larger_slot = 2 * current + 1;
let mut common_smaller = 0usize;
let mut common_larger = 0usize;
let mut compares = DMS_BUILD_DEPTH;
while compares > 0 && match_packed != 0 {
let cand = (match_packed - 1) as usize;
if cand >= current {
break;
}
compares -= 1;
let next_pair = 2 * cand;
let mut ml = common_smaller.min(common_larger);
let limit = region - current;
while ml < limit && concat[cand + ml] == concat[current + ml] {
ml += 1;
}
if current + ml >= region {
break;
}
if concat[cand + ml] < concat[current + ml] {
chain_table[smaller_slot] = match_packed;
common_smaller = ml;
smaller_slot = next_pair + 1;
match_packed = chain_table[next_pair + 1];
} else {
chain_table[larger_slot] = match_packed;
common_larger = ml;
larger_slot = next_pair;
match_packed = chain_table[next_pair];
}
}
chain_table[smaller_slot] = 0;
chain_table[larger_slot] = 0;
current += 1;
}
let tables = self.dms.table_mut_or_init(DmsDictTables::default);
tables.layout = DmsDictLayout::Bt;
tables.hash_table = hash_table;
tables.chain_table = chain_table;
tables.hash_log = dms_hash_log;
tables.mls = mls;
self.dms.set_region_len(region);
self.dms.mark_primed();
}
pub(crate) fn prime_dms_hc(&mut self, region_len: usize) {
let mls = HC_MIN_MATCH_LEN;
let region = region_len.min(self.live_history().len());
if region < mls {
self.dms.invalidate();
return;
}
let dms_hash_log =
(usize::BITS - (region - 1).leading_zeros()).clamp(10, self.hash_log as u32) as usize;
if self.dms.is_primed()
&& self.dms.region_len() == region
&& self.dms.table().is_some_and(|t| {
t.layout == DmsDictLayout::Hc && t.mls == mls && t.hash_log == dms_hash_log
})
{
return;
}
let (mut hash_table, mut chain_table) = match self.dms.table_mut() {
Some(t) => (
core::mem::take(&mut t.hash_table),
core::mem::take(&mut t.chain_table),
),
None => (Vec::new(), Vec::new()),
};
hash_table.clear();
hash_table.resize(1usize << dms_hash_log, 0u32);
chain_table.clear();
chain_table.resize(region, 0u32);
let concat = self.live_history();
let mut current = 0usize;
while current + mls <= region {
let h = Self::hash_position_at(concat, current, dms_hash_log, mls);
chain_table[current] = hash_table[h];
hash_table[h] = (current + 1) as u32;
current += 1;
}
let tables = self.dms.table_mut_or_init(DmsDictTables::default);
tables.layout = DmsDictLayout::Hc;
tables.hash_table = hash_table;
tables.chain_table = chain_table;
tables.hash_log = dms_hash_log;
tables.mls = mls;
self.dms.set_region_len(region);
self.dms.mark_primed();
}
pub(crate) fn heap_size(&self) -> usize {
let u32_sz = core::mem::size_of::<u32>();
let usize_sz = core::mem::size_of::<usize>();
self.chunk_lens.capacity() * usize_sz
+ self.history.capacity()
+ (self.hash_table.capacity()
+ self.hash3_table.capacity()
+ self.chain_table.capacity())
* u32_sz
+ self.dms.table().map_or(0, |t| {
(t.hash_table.capacity() + t.chain_table.capacity()) * u32_sz
})
}
pub(crate) fn reserve_history(&mut self, expected_bytes: usize) {
let cap = self.max_window_size
+ (self.max_window_size >> 2)
+ crate::common::MAX_BLOCK_SIZE as usize;
let want = expected_bytes.min(cap);
if want > self.history.capacity() {
self.history.reserve_exact(want - self.history.len());
}
}
pub(crate) fn add_data(&mut self, data: Vec<u8>, mut reuse_space: impl FnMut(Vec<u8>)) {
assert!(data.len() <= self.max_window_size);
check_stream_abs_headroom(self.history_abs_start, self.window_size, data.len());
if self.window_size + data.len() > self.max_window_size {
let target = self.max_window_size
+ (self.max_window_size >> 2)
+ crate::common::MAX_BLOCK_SIZE as usize;
if target > self.history.len() && self.history.capacity() < target {
self.history.reserve_exact(target - self.history.len());
}
}
while self.window_size + data.len() > self.max_window_size {
let removed_len = self.chunk_lens.pop_front().unwrap();
self.window_size -= removed_len;
self.history_start += removed_len;
self.history_abs_start += removed_len;
}
self.compact_history();
let added = data.len();
self.history.extend_from_slice(&data);
self.next_to_update3 = self.next_to_update3.max(self.history_abs_start);
self.window_size += added;
self.chunk_lens.push_back(added);
reuse_space(data);
}
pub(crate) fn trim_to_window(&mut self) {
while self.window_size > self.max_window_size {
let removed_len = self.chunk_lens.pop_front().unwrap();
self.window_size -= removed_len;
self.history_start += removed_len;
self.history_abs_start += removed_len;
}
}
pub(crate) fn compact_history(&mut self) {
if self.history_start == 0 {
return;
}
if self.history_start >= (self.max_window_size >> 2)
|| self.history_start * 2 >= self.history.len()
{
self.history.drain(..self.history_start);
self.history_start = 0;
}
}
pub(crate) fn live_history(&self) -> &[u8] {
if let Some((_start, end)) = self.borrowed_block {
let (ptr, total) = self
.borrowed_input
.expect("borrowed_block set without a registered borrowed window");
debug_assert!(
end <= total,
"borrowed block end {end} exceeds window {total}"
);
return unsafe { core::slice::from_raw_parts(ptr, end) };
}
&self.history[self.history_start..]
}
pub(crate) fn history_abs_end(&self) -> usize {
self.history_abs_start + self.live_history().len()
}
pub(crate) fn get_last_space(&self) -> &[u8] {
if let (Some((ptr, _total)), Some((block_start, block_end))) =
(self.borrowed_input, self.borrowed_block)
{
return unsafe {
core::slice::from_raw_parts(ptr.add(block_start), block_end - block_start)
};
}
let last = *self.chunk_lens.back().unwrap();
&self.history[self.history.len() - last..]
}
pub(crate) unsafe fn set_borrowed_window(&mut self, buffer: &[u8]) {
self.borrowed_input = Some((buffer.as_ptr(), buffer.len()));
self.borrowed_block = None;
self.history_abs_start = 0;
}
pub(crate) fn clear_borrowed_window(&mut self) {
self.borrowed_input = None;
self.borrowed_block = None;
}
pub(crate) fn stage_borrowed_block(&mut self, block_start: usize, block_end: usize) {
let (_ptr, total) = self
.borrowed_input
.expect("stage_borrowed_block requires a registered borrowed window");
assert!(
block_start <= block_end && block_end <= total,
"borrowed block bounds out of range: start={block_start} end={block_end} total={total}",
);
self.borrowed_block = Some((block_start, block_end));
}
pub(crate) fn current_block_range(&self) -> (usize, usize) {
if let Some((start, end)) = self.borrowed_block {
(start, end - start)
} else {
let current_len = *self.chunk_lens.back().unwrap();
(
self.history_abs_start + self.window_size - current_len,
current_len,
)
}
}
#[cfg(test)]
pub(crate) fn push_test_chunk(&mut self, data: Vec<u8>) {
self.history.extend_from_slice(&data);
self.window_size += data.len();
self.chunk_lens.push_back(data.len());
}
pub(crate) fn relative_position(&self, abs_pos: usize) -> Option<u32> {
let shifted_abs = abs_pos.checked_add(self.index_shift)?;
let rel = shifted_abs.checked_sub(self.position_base)?;
let rel_u32 = u32::try_from(rel).ok()?;
if !self.allow_zero_relative_position && self.position_base == 0 && rel_u32 == 0 {
return None;
}
(rel_u32 < u32::MAX).then_some(rel_u32)
}
pub(crate) fn window_low_abs_for_target(&self, target_abs: usize) -> usize {
let history_low = self.history_abs_start;
let window_low = target_abs.saturating_sub(self.max_window_size);
history_low.max(window_low)
}
#[inline(always)]
pub(crate) fn bt_log(&self) -> usize {
self.chain_log.saturating_sub(1)
}
#[inline(always)]
pub(crate) fn bt_mask(&self) -> usize {
(1usize << self.bt_log()) - 1
}
#[inline(always)]
pub(crate) fn bt_pair_index_for_abs(&self, abs_pos: usize) -> usize {
let bt_pos = abs_pos.wrapping_add(self.index_shift);
2 * (bt_pos & self.bt_mask())
}
#[inline(always)]
pub(crate) fn stored_abs_position_fast(
stored: u32,
position_base: usize,
index_shift: usize,
) -> Option<usize> {
if stored == HC_EMPTY {
return None;
}
let shifted = position_base + (stored as usize - 1);
if shifted < index_shift {
return None;
}
Some(shifted - index_shift)
}
pub(crate) fn reset(&mut self, _reuse_space: impl FnMut(Vec<u8>)) {
let next_floor = self.history_abs_end();
self.window_size = 0;
self.chunk_lens.clear();
self.history.clear();
self.history_start = 0;
self.offset_hist = [1, 4, 8];
self.skip_insert_until_abs = 0;
self.borrowed_input = None;
self.borrowed_block = None;
self.dictionary_limit_abs = None;
self.dictionary_primed_for_frame = false;
self.allow_zero_relative_position = false;
if next_floor <= REBASE_RESET_FLOOR_CEILING {
self.history_abs_start = next_floor;
self.next_to_update3 = next_floor;
} else {
self.history_abs_start = 0;
self.position_base = 0;
self.index_shift = 0;
self.next_to_update3 = 0;
self.hash_table.fill(HC_EMPTY);
self.hash3_table.fill(HC_EMPTY);
self.chain_table.fill(HC_EMPTY);
}
}
pub(crate) fn opt_start_cursor_and_litlen(&self, current_abs_start: usize) -> (usize, usize) {
let start_cursor = usize::from(current_abs_start == self.history_abs_start);
(start_cursor, start_cursor)
}
#[inline(always)]
pub(crate) fn bt_insert_step_no_rebase(
&mut self,
abs_pos: usize,
current_abs_end: usize,
target_abs: usize,
) -> usize {
#[cfg(all(target_arch = "aarch64", target_endian = "little"))]
unsafe {
self.bt_insert_step_no_rebase_neon(abs_pos, current_abs_end, target_abs)
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
{
use crate::encoding::fastpath::{FastpathKernel, select_kernel};
match select_kernel() {
FastpathKernel::Avx2Bmi2 => unsafe {
self.bt_insert_step_no_rebase_avx2_bmi2(abs_pos, current_abs_end, target_abs)
},
FastpathKernel::Sse42 => unsafe {
self.bt_insert_step_no_rebase_sse42(abs_pos, current_abs_end, target_abs)
},
FastpathKernel::Scalar => {
self.bt_insert_step_no_rebase_scalar(abs_pos, current_abs_end, target_abs)
}
}
}
#[cfg(not(any(
all(target_arch = "aarch64", target_endian = "little"),
target_arch = "x86",
target_arch = "x86_64"
)))]
{
self.bt_insert_step_no_rebase_scalar(abs_pos, current_abs_end, target_abs)
}
}
#[cfg(all(target_arch = "aarch64", target_endian = "little"))]
#[target_feature(enable = "neon")]
pub(crate) unsafe fn bt_insert_step_no_rebase_neon(
&mut self,
abs_pos: usize,
current_abs_end: usize,
target_abs: usize,
) -> usize {
let search_depth = self.search_depth;
super::super::match_generator::bt_insert_step_no_rebase_body!(
self,
search_depth,
abs_pos,
current_abs_end,
target_abs,
crate::encoding::fastpath::neon::count_match_from_indices
)
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[target_feature(enable = "sse4.2")]
pub(crate) unsafe fn bt_insert_step_no_rebase_sse42(
&mut self,
abs_pos: usize,
current_abs_end: usize,
target_abs: usize,
) -> usize {
let search_depth = self.search_depth;
super::super::match_generator::bt_insert_step_no_rebase_body!(
self,
search_depth,
abs_pos,
current_abs_end,
target_abs,
crate::encoding::fastpath::sse42::count_match_from_indices
)
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[target_feature(enable = "avx2,bmi2")]
pub(crate) unsafe fn bt_insert_step_no_rebase_avx2_bmi2(
&mut self,
abs_pos: usize,
current_abs_end: usize,
target_abs: usize,
) -> usize {
let search_depth = self.search_depth;
super::super::match_generator::bt_insert_step_no_rebase_body!(
self,
search_depth,
abs_pos,
current_abs_end,
target_abs,
crate::encoding::fastpath::avx2_bmi2::count_match_from_indices
)
}
#[cfg(not(all(target_arch = "aarch64", target_endian = "little")))]
pub(crate) fn bt_insert_step_no_rebase_scalar(
&mut self,
abs_pos: usize,
current_abs_end: usize,
target_abs: usize,
) -> usize {
let search_depth = self.search_depth;
super::super::match_generator::bt_insert_step_no_rebase_body!(
self,
search_depth,
abs_pos,
current_abs_end,
target_abs,
crate::encoding::fastpath::scalar::count_match_from_indices
)
}
#[allow(dead_code)]
#[allow(clippy::too_many_arguments)]
#[inline(always)]
pub(crate) fn bt_insert_and_collect_matches(
&mut self,
abs_pos: usize,
current_abs_end: usize,
profile: HcOptimalCostProfile,
min_match_len: usize,
best_len_for_skip: &mut usize,
out: &mut Vec<MatchCandidate>,
) {
#[cfg(all(target_arch = "aarch64", target_endian = "little"))]
unsafe {
self.bt_insert_and_collect_matches_neon(
abs_pos,
current_abs_end,
profile,
min_match_len,
best_len_for_skip,
out,
)
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
{
use crate::encoding::fastpath::{FastpathKernel, select_kernel};
match select_kernel() {
FastpathKernel::Avx2Bmi2 => unsafe {
self.bt_insert_and_collect_matches_avx2_bmi2(
abs_pos,
current_abs_end,
profile,
min_match_len,
best_len_for_skip,
out,
)
},
FastpathKernel::Sse42 => unsafe {
self.bt_insert_and_collect_matches_sse42(
abs_pos,
current_abs_end,
profile,
min_match_len,
best_len_for_skip,
out,
)
},
FastpathKernel::Scalar => self.bt_insert_and_collect_matches_scalar(
abs_pos,
current_abs_end,
profile,
min_match_len,
best_len_for_skip,
out,
),
}
}
#[cfg(not(any(
all(target_arch = "aarch64", target_endian = "little"),
target_arch = "x86",
target_arch = "x86_64"
)))]
{
self.bt_insert_and_collect_matches_scalar(
abs_pos,
current_abs_end,
profile,
min_match_len,
best_len_for_skip,
out,
)
}
}
#[cfg(all(target_arch = "aarch64", target_endian = "little"))]
#[target_feature(enable = "neon")]
#[allow(clippy::too_many_arguments)]
pub(crate) unsafe fn bt_insert_and_collect_matches_neon(
&mut self,
abs_pos: usize,
current_abs_end: usize,
profile: HcOptimalCostProfile,
min_match_len: usize,
best_len_for_skip: &mut usize,
out: &mut Vec<MatchCandidate>,
) {
let search_depth = self.search_depth;
super::super::match_generator::bt_insert_and_collect_matches_body!(
self,
search_depth,
abs_pos,
current_abs_end,
profile,
min_match_len,
best_len_for_skip,
out,
crate::encoding::fastpath::neon::count_match_from_indices,
)
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[target_feature(enable = "sse4.2")]
#[allow(clippy::too_many_arguments)]
pub(crate) unsafe fn bt_insert_and_collect_matches_sse42(
&mut self,
abs_pos: usize,
current_abs_end: usize,
profile: HcOptimalCostProfile,
min_match_len: usize,
best_len_for_skip: &mut usize,
out: &mut Vec<MatchCandidate>,
) {
let search_depth = self.search_depth;
super::super::match_generator::bt_insert_and_collect_matches_body!(
self,
search_depth,
abs_pos,
current_abs_end,
profile,
min_match_len,
best_len_for_skip,
out,
crate::encoding::fastpath::sse42::count_match_from_indices,
)
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[target_feature(enable = "avx2,bmi2")]
#[allow(clippy::too_many_arguments)]
pub(crate) unsafe fn bt_insert_and_collect_matches_avx2_bmi2(
&mut self,
abs_pos: usize,
current_abs_end: usize,
profile: HcOptimalCostProfile,
min_match_len: usize,
best_len_for_skip: &mut usize,
out: &mut Vec<MatchCandidate>,
) {
let search_depth = self.search_depth;
super::super::match_generator::bt_insert_and_collect_matches_body!(
self,
search_depth,
abs_pos,
current_abs_end,
profile,
min_match_len,
best_len_for_skip,
out,
crate::encoding::fastpath::avx2_bmi2::count_match_from_indices,
)
}
#[cfg(not(all(target_arch = "aarch64", target_endian = "little")))]
#[allow(clippy::too_many_arguments)]
pub(crate) fn bt_insert_and_collect_matches_scalar(
&mut self,
abs_pos: usize,
current_abs_end: usize,
profile: HcOptimalCostProfile,
min_match_len: usize,
best_len_for_skip: &mut usize,
out: &mut Vec<MatchCandidate>,
) {
let search_depth = self.search_depth;
super::super::match_generator::bt_insert_and_collect_matches_body!(
self,
search_depth,
abs_pos,
current_abs_end,
profile,
min_match_len,
best_len_for_skip,
out,
crate::encoding::fastpath::scalar::count_match_from_indices,
)
}
pub(crate) fn replay_history_for_rebase_bt(&mut self, history_start: usize, abs_pos: usize) {
let rebuild_end = self.history_abs_end();
let mut pos = history_start;
while pos < abs_pos {
let forward = self.bt_insert_step_no_rebase(pos, rebuild_end, abs_pos);
let step = forward.max(1).min(abs_pos - pos);
pos += step;
}
}
#[inline(always)]
pub(crate) fn bt_update_tree_until(&mut self, abs_pos: usize, current_abs_end: usize) {
#[cfg(all(target_arch = "aarch64", target_endian = "little"))]
unsafe {
self.bt_update_tree_until_neon(abs_pos, current_abs_end)
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
{
use crate::encoding::fastpath::{FastpathKernel, select_kernel};
match select_kernel() {
FastpathKernel::Avx2Bmi2 => unsafe {
self.bt_update_tree_until_avx2_bmi2(abs_pos, current_abs_end)
},
FastpathKernel::Sse42 => unsafe {
self.bt_update_tree_until_sse42(abs_pos, current_abs_end)
},
FastpathKernel::Scalar => {
self.bt_update_tree_until_scalar(abs_pos, current_abs_end)
}
}
}
#[cfg(not(any(
all(target_arch = "aarch64", target_endian = "little"),
target_arch = "x86",
target_arch = "x86_64"
)))]
{
self.bt_update_tree_until_scalar(abs_pos, current_abs_end)
}
}
#[cfg(all(target_arch = "aarch64", target_endian = "little"))]
#[target_feature(enable = "neon")]
pub(crate) unsafe fn bt_update_tree_until_neon(
&mut self,
abs_pos: usize,
current_abs_end: usize,
) {
if self.skip_insert_until_abs < self.history_abs_start {
self.skip_insert_until_abs = self.history_abs_start;
}
let mut update_abs = self.skip_insert_until_abs;
let is_btultra2 = self.is_btultra2;
while update_abs < abs_pos {
if !self.can_skip_rebase_check_at(update_abs, abs_pos, is_btultra2) {
self.maybe_rebase_positions(update_abs);
}
let forward =
unsafe { self.bt_insert_step_no_rebase_neon(update_abs, current_abs_end, abs_pos) };
update_abs += forward.max(1).min(abs_pos - update_abs);
}
self.skip_insert_until_abs = abs_pos;
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[target_feature(enable = "sse4.2")]
pub(crate) unsafe fn bt_update_tree_until_sse42(
&mut self,
abs_pos: usize,
current_abs_end: usize,
) {
if self.skip_insert_until_abs < self.history_abs_start {
self.skip_insert_until_abs = self.history_abs_start;
}
let mut update_abs = self.skip_insert_until_abs;
let is_btultra2 = self.is_btultra2;
while update_abs < abs_pos {
if !self.can_skip_rebase_check_at(update_abs, abs_pos, is_btultra2) {
self.maybe_rebase_positions(update_abs);
}
let forward = unsafe {
self.bt_insert_step_no_rebase_sse42(update_abs, current_abs_end, abs_pos)
};
update_abs += forward.max(1).min(abs_pos - update_abs);
}
self.skip_insert_until_abs = abs_pos;
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[target_feature(enable = "avx2,bmi2")]
pub(crate) unsafe fn bt_update_tree_until_avx2_bmi2(
&mut self,
abs_pos: usize,
current_abs_end: usize,
) {
if self.skip_insert_until_abs < self.history_abs_start {
self.skip_insert_until_abs = self.history_abs_start;
}
let mut update_abs = self.skip_insert_until_abs;
let is_btultra2 = self.is_btultra2;
while update_abs < abs_pos {
if !self.can_skip_rebase_check_at(update_abs, abs_pos, is_btultra2) {
self.maybe_rebase_positions(update_abs);
}
let forward = unsafe {
self.bt_insert_step_no_rebase_avx2_bmi2(update_abs, current_abs_end, abs_pos)
};
update_abs += forward.max(1).min(abs_pos - update_abs);
}
self.skip_insert_until_abs = abs_pos;
}
#[cfg(not(all(target_arch = "aarch64", target_endian = "little")))]
pub(crate) fn bt_update_tree_until_scalar(&mut self, abs_pos: usize, current_abs_end: usize) {
if self.skip_insert_until_abs < self.history_abs_start {
self.skip_insert_until_abs = self.history_abs_start;
}
let mut update_abs = self.skip_insert_until_abs;
let is_btultra2 = self.is_btultra2;
while update_abs < abs_pos {
if !self.can_skip_rebase_check_at(update_abs, abs_pos, is_btultra2) {
self.maybe_rebase_positions(update_abs);
}
let forward =
self.bt_insert_step_no_rebase_scalar(update_abs, current_abs_end, abs_pos);
update_abs += forward.max(1).min(abs_pos - update_abs);
}
self.skip_insert_until_abs = abs_pos;
}
pub(crate) fn update_hash3_until(&mut self, abs_pos: usize) {
let is_btultra2 = self.is_btultra2;
if self.next_to_update3 < self.history_abs_start {
self.next_to_update3 = self.history_abs_start;
}
if self.next_to_update3 >= abs_pos {
return;
}
while self.next_to_update3 < abs_pos {
if !self.can_skip_rebase_check_at(self.next_to_update3, abs_pos, is_btultra2) {
self.maybe_rebase_positions(self.next_to_update3);
}
self.insert_hash3_only_no_rebase(self.next_to_update3);
self.next_to_update3 += 1;
}
}
#[inline]
pub(crate) fn maybe_rebase_positions(&mut self, abs_pos: usize) {
let is_btultra2 = self.is_btultra2;
if self.needs_rebase(abs_pos, is_btultra2) {
self.rebase_positions_cold(abs_pos);
}
}
#[cold]
#[inline(never)]
pub(crate) fn rebase_positions_cold(&mut self, abs_pos: usize) {
self.begin_rebase();
let history_start = self.history_abs_start;
if self.uses_bt {
self.replay_history_for_rebase_bt(history_start, abs_pos);
} else {
self.replay_history_for_rebase_hc(history_start, abs_pos);
}
self.next_to_update3 = history_start;
if self.hash3_log != 0 {
self.update_hash3_until(abs_pos);
}
}
#[inline]
pub(crate) fn insert_position(&mut self, abs_pos: usize) {
self.maybe_rebase_positions(abs_pos);
self.insert_position_no_rebase(abs_pos);
}
pub(crate) fn insert_positions(&mut self, start: usize, end: usize) {
if start < end {
let is_btultra2 = self.is_btultra2;
if self.needs_rebase(start, is_btultra2) || self.needs_rebase(end - 1, is_btultra2) {
for pos in start..end {
self.insert_position(pos);
}
} else {
self.fill_hash_chain_positions(start, end);
}
}
self.next_to_update3 = self.next_to_update3.max(end);
}
pub(crate) fn insert_match_span(&mut self, start: usize, end: usize) {
const SKIP_THRESHOLD: usize = 384;
const MAX_START: usize = 96;
const MAX_END: usize = 32;
if end.saturating_sub(start) > SKIP_THRESHOLD {
self.insert_positions(start, start + MAX_START);
self.insert_positions(end - MAX_END, end);
} else {
self.insert_positions(start, end);
}
}
#[inline]
fn fill_hash_chain_positions(&mut self, start: usize, end: usize) {
let history_abs_start = self.history_abs_start;
let position_base = self.position_base;
let index_shift = self.index_shift;
let hash_log = self.hash_log;
let chain_mask = (1usize << self.chain_log) - 1;
let (concat_ptr, concat_len) = {
let live = self.live_history();
(live.as_ptr(), live.len())
};
let hash_ptr = self.hash_table.as_mut_ptr();
let chain_ptr = self.chain_table.as_mut_ptr();
debug_assert!(self.hash_table.len() == 1usize << hash_log);
debug_assert!(self.chain_table.len() == 1usize << self.chain_log);
let hashable_end = end.min(history_abs_start + concat_len.saturating_sub(3));
if start >= hashable_end {
return;
}
let mut src = unsafe { concat_ptr.add(start - history_abs_start) };
let mut rel = (start + index_shift - position_base) as u32;
for _ in start..hashable_end {
unsafe {
let value = Self::read_le_u32_ptr(src);
let hash = Self::hash_value_with_mls(value, hash_log, 4);
let chain_idx = (rel as usize) & chain_mask;
let prev = *hash_ptr.add(hash);
*chain_ptr.add(chain_idx) = prev;
*hash_ptr.add(hash) = rel + 1;
src = src.add(1);
}
rel = rel.wrapping_add(1);
}
}
pub(crate) fn insert_positions_with_step(&mut self, start: usize, end: usize, step: usize) {
if step == 0 {
return;
}
let mut pos = start;
while pos < end {
self.insert_position(pos);
let next = pos.saturating_add(step);
if next <= pos {
break;
}
pos = next;
}
}
pub(crate) fn backfill_boundary_positions(
&mut self,
current_abs_start: usize,
current_abs_end: usize,
) {
let backfill_start = current_abs_start
.saturating_sub(3)
.max(self.history_abs_start);
if backfill_start < current_abs_start {
if self.uses_bt {
self.bt_update_tree_until(current_abs_start, current_abs_end);
} else {
self.insert_positions(backfill_start, current_abs_start);
}
}
}
pub(crate) fn apply_limited_update_after_long_match(&mut self, current_abs_start: usize) {
if !self.uses_bt {
return;
}
let gap = current_abs_start.saturating_sub(self.skip_insert_until_abs);
if gap > 384 {
self.skip_insert_until_abs = current_abs_start - (gap - 384).min(192);
}
}
pub(crate) fn bt_insert_sparse_incompressible_block(
&mut self,
current_abs_start: usize,
current_abs_end: usize,
) {
let mut pos = current_abs_start;
while pos < current_abs_end {
self.maybe_rebase_positions(pos);
let _ = self.bt_insert_step_no_rebase(pos, current_abs_end, current_abs_end);
self.insert_hash3_only_no_rebase(pos);
let next = pos.saturating_add(INCOMPRESSIBLE_SKIP_STEP);
if next <= pos {
break;
}
pos = next;
}
let dense_tail = HC_MIN_MATCH_LEN + INCOMPRESSIBLE_SKIP_STEP;
let tail_start = current_abs_end
.saturating_sub(dense_tail)
.max(self.history_abs_start)
.max(current_abs_start);
for pos in tail_start..current_abs_end {
if (pos - current_abs_start).is_multiple_of(INCOMPRESSIBLE_SKIP_STEP) {
continue;
}
self.maybe_rebase_positions(pos);
let _ = self.bt_insert_step_no_rebase(pos, current_abs_end, current_abs_end);
self.insert_hash3_only_no_rebase(pos);
}
self.skip_insert_until_abs = self.skip_insert_until_abs.max(current_abs_end);
self.next_to_update3 = self.next_to_update3.max(current_abs_end);
}
pub(crate) fn skip_matching_dict_bt(&mut self) {
self.ensure_tables();
let committed_end = self.history_abs_start + self.window_size;
self.skip_insert_until_abs = self.skip_insert_until_abs.max(committed_end);
self.next_to_update3 = self.next_to_update3.max(committed_end);
}
pub(crate) fn skip_matching(&mut self, incompressible_hint: Option<bool>) {
self.ensure_tables();
let (current_abs_start, current_len) = self.current_block_range();
let current_abs_end = current_abs_start + current_len;
self.backfill_boundary_positions(current_abs_start, current_abs_end);
if self.uses_bt {
if incompressible_hint == Some(true) {
self.bt_insert_sparse_incompressible_block(current_abs_start, current_abs_end);
return;
}
self.bt_update_tree_until(current_abs_end, current_abs_end);
return;
}
if incompressible_hint == Some(true) {
self.insert_positions_with_step(
current_abs_start,
current_abs_end,
INCOMPRESSIBLE_SKIP_STEP,
);
let dense_tail = HC_MIN_MATCH_LEN + INCOMPRESSIBLE_SKIP_STEP;
let tail_start = current_abs_end
.saturating_sub(dense_tail)
.max(self.history_abs_start);
let tail_start = tail_start.max(current_abs_start);
for pos in tail_start..current_abs_end {
if !(pos - current_abs_start).is_multiple_of(INCOMPRESSIBLE_SKIP_STEP) {
self.insert_position(pos);
}
}
} else {
self.insert_positions(current_abs_start, current_abs_end);
}
}
#[allow(dead_code)]
#[inline(always)]
pub(crate) fn hash3_candidate(
&self,
abs_pos: usize,
current_abs_end: usize,
min_match_len: usize,
) -> Option<MatchCandidate> {
#[cfg(all(target_arch = "aarch64", target_endian = "little"))]
unsafe {
self.hash3_candidate_neon(abs_pos, current_abs_end, min_match_len)
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
{
use crate::encoding::fastpath::{FastpathKernel, select_kernel};
match select_kernel() {
FastpathKernel::Avx2Bmi2 => unsafe {
self.hash3_candidate_avx2_bmi2(abs_pos, current_abs_end, min_match_len)
},
FastpathKernel::Sse42 => unsafe {
self.hash3_candidate_sse42(abs_pos, current_abs_end, min_match_len)
},
FastpathKernel::Scalar => {
self.hash3_candidate_scalar(abs_pos, current_abs_end, min_match_len)
}
}
}
#[cfg(not(any(
all(target_arch = "aarch64", target_endian = "little"),
target_arch = "x86",
target_arch = "x86_64"
)))]
{
self.hash3_candidate_scalar(abs_pos, current_abs_end, min_match_len)
}
}
#[cfg(all(target_arch = "aarch64", target_endian = "little"))]
#[target_feature(enable = "neon")]
pub(crate) unsafe fn hash3_candidate_neon(
&self,
abs_pos: usize,
current_abs_end: usize,
min_match_len: usize,
) -> Option<MatchCandidate> {
super::super::match_generator::hash3_candidate_body!(
self,
abs_pos,
current_abs_end,
min_match_len,
crate::encoding::fastpath::neon::common_prefix_len_ptr,
)
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[target_feature(enable = "sse4.2")]
pub(crate) unsafe fn hash3_candidate_sse42(
&self,
abs_pos: usize,
current_abs_end: usize,
min_match_len: usize,
) -> Option<MatchCandidate> {
super::super::match_generator::hash3_candidate_body!(
self,
abs_pos,
current_abs_end,
min_match_len,
crate::encoding::fastpath::sse42::common_prefix_len_ptr,
)
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[target_feature(enable = "avx2,bmi2")]
pub(crate) unsafe fn hash3_candidate_avx2_bmi2(
&self,
abs_pos: usize,
current_abs_end: usize,
min_match_len: usize,
) -> Option<MatchCandidate> {
super::super::match_generator::hash3_candidate_body!(
self,
abs_pos,
current_abs_end,
min_match_len,
crate::encoding::fastpath::avx2_bmi2::common_prefix_len_ptr,
)
}
#[cfg(not(all(target_arch = "aarch64", target_endian = "little")))]
pub(crate) fn hash3_candidate_scalar(
&self,
abs_pos: usize,
current_abs_end: usize,
min_match_len: usize,
) -> Option<MatchCandidate> {
super::super::match_generator::hash3_candidate_body!(
self,
abs_pos,
current_abs_end,
min_match_len,
crate::encoding::fastpath::scalar::common_prefix_len_ptr,
)
}
pub(crate) fn begin_rebase(&mut self) {
self.position_base = self.history_abs_start;
self.index_shift = 0;
self.allow_zero_relative_position = true;
self.hash_table.fill(HC_EMPTY);
self.hash3_table.fill(HC_EMPTY);
self.chain_table.fill(HC_EMPTY);
}
pub(crate) fn replay_history_for_rebase_hc(&mut self, history_start: usize, abs_pos: usize) {
for pos in history_start..abs_pos {
self.insert_position_no_rebase(pos);
}
}
pub(crate) fn emit_optimal_plan(
&mut self,
current_len: usize,
plan: &[HcOptimalSequence],
handle_sequence: &mut impl for<'a> FnMut(Sequence<'a>),
) {
let current: &[u8] = unsafe {
let ls = self.get_last_space();
debug_assert!(
current_len <= ls.len(),
"current_len ({current_len}) exceeds block size ({})",
ls.len()
);
core::slice::from_raw_parts(ls.as_ptr(), current_len)
};
if plan.is_empty() {
handle_sequence(Sequence::Literals { literals: current });
return;
}
let mut literals_start = 0usize;
for item in plan {
let lit_len = item.lit_len as usize;
let match_len = item.match_len as usize;
let Some(start) = literals_start.checked_add(lit_len) else {
continue;
};
let Some(end) = start.checked_add(match_len) else {
continue;
};
if end > current_len {
continue;
}
let literals = ¤t[literals_start..start];
handle_sequence(Sequence::Triple {
literals,
offset: item.offset as usize,
match_len,
});
encode_offset_with_history(item.offset, literals.len() as u32, &mut self.offset_hist);
literals_start = end;
}
if literals_start < current_len {
handle_sequence(Sequence::Literals {
literals: ¤t[literals_start..],
});
}
}
}
#[cfg(test)]
mod storage_tests {
use alloc::vec;
use super::*;
fn new_table(window: usize) -> MatchTable {
let mut t = MatchTable::new(window);
t.hash_log = 8;
t.chain_log = 8;
t.hash3_log = 0;
t
}
#[test]
fn set_dictionary_limit_from_primed_bytes_zero_clears_limit() {
let mut t = new_table(64);
t.history_abs_start = 100;
t.dictionary_limit_abs = Some(123);
t.set_dictionary_limit_from_primed_bytes(0);
assert_eq!(t.dictionary_limit_abs, None);
}
#[test]
fn set_dictionary_limit_from_primed_bytes_offsets_from_history_start() {
let mut t = new_table(64);
t.history_abs_start = 100;
t.set_dictionary_limit_from_primed_bytes(40);
assert_eq!(t.dictionary_limit_abs, Some(140));
}
#[test]
fn dms_cache_rebuilds_across_hc_bt_layout_switch() {
let mut t = new_table(64);
t.hash_log = 12;
t.search_mls = 4;
t.push_test_chunk(vec![7u8; 48]);
t.ensure_tables();
let region = 48;
t.prime_dms_hc(region);
assert_eq!(t.dms.table().unwrap().layout, DmsDictLayout::Hc);
assert_eq!(t.dms.table().unwrap().chain_table.len(), region);
t.prime_dms_bt(region);
assert_eq!(t.dms.table().unwrap().layout, DmsDictLayout::Bt);
assert_eq!(t.dms.table().unwrap().chain_table.len(), 2 * region);
t.prime_dms_hc(region);
assert_eq!(t.dms.table().unwrap().layout, DmsDictLayout::Hc);
assert_eq!(t.dms.table().unwrap().chain_table.len(), region);
}
#[test]
fn skip_matching_bt_incompressible_routes_through_sparse_block() {
let mut t = new_table(32);
t.push_test_chunk(vec![0u8; 32]);
t.ensure_tables();
t.uses_bt = true;
t.is_btultra2 = false;
t.search_depth = 4;
let before_skip_until = t.skip_insert_until_abs;
t.skip_matching(Some(true));
assert!(t.skip_insert_until_abs >= t.window_size);
assert!(t.skip_insert_until_abs > before_skip_until);
}
#[test]
fn skip_matching_bt_dense_routes_through_bt_update_tree() {
let mut t = new_table(32);
t.push_test_chunk(vec![1u8; 32]);
t.ensure_tables();
t.uses_bt = true;
t.is_btultra2 = false;
t.search_depth = 4;
t.skip_matching(None);
assert_eq!(t.skip_insert_until_abs, t.history_abs_start + t.window_size);
}
#[test]
fn replay_history_for_rebase_bt_walks_inserted_prefix() {
let mut t = new_table(64);
t.history = vec![0u8; 64];
for (i, slot) in t.history.iter_mut().enumerate() {
*slot = (i % 17) as u8;
}
t.history_start = 0;
t.history_abs_start = 0;
t.window_size = 64;
t.position_base = 0;
t.search_depth = 4;
t.uses_bt = true;
t.ensure_tables();
assert!(t.hash_table.iter().all(|&v| v == HC_EMPTY));
t.replay_history_for_rebase_bt(0, 32);
assert!(
t.hash_table.iter().any(|&v| v != HC_EMPTY),
"BT replay must populate hash table"
);
}
#[test]
fn begin_rebase_clears_index_tables_and_resets_base() {
let mut t = new_table(32);
t.hash_table = vec![7; 16];
t.chain_table = vec![9; 16];
t.hash3_table = vec![5; 16];
t.history_abs_start = 50;
t.position_base = 0;
t.index_shift = 4;
t.allow_zero_relative_position = false;
t.begin_rebase();
assert_eq!(t.position_base, 50);
assert_eq!(t.index_shift, 0);
assert!(t.allow_zero_relative_position);
assert!(t.hash_table.iter().all(|&v| v == HC_EMPTY));
assert!(t.chain_table.iter().all(|&v| v == HC_EMPTY));
assert!(t.hash3_table.iter().all(|&v| v == HC_EMPTY));
}
#[test]
fn rebase_positions_cold_rebuilds_hash3_for_btultra2() {
let mut t = new_table(64);
t.history = b"abcdef_abcdef_abcdef_abcdef_abcdef_abcdef".to_vec();
t.history_start = 0;
t.history_abs_start = 0;
t.window_size = t.history.len();
t.chunk_lens.push_back(t.history.len());
t.hash_log = 8;
t.chain_log = 8;
t.hash3_log = 6;
t.is_btultra2 = true;
t.search_depth = 4;
t.ensure_tables();
t.update_hash3_until(20);
assert!(
t.hash3_table.iter().any(|&v| v != HC_EMPTY),
"fixture precondition: hash3 must be non-empty before rebase"
);
t.rebase_positions_cold(20);
assert!(
t.hash3_table.iter().any(|&v| v != HC_EMPTY),
"rebase must repopulate the HC3 side table — \
btultra2 short-match selection depends on it"
);
}
#[test]
fn insert_positions_with_step_zero_step_is_noop() {
let mut t = new_table(32);
t.history = vec![0u8; 32];
t.push_test_chunk(vec![0u8; 32]);
t.ensure_tables();
let next_to_update3_before = t.next_to_update3;
t.insert_positions_with_step(0, 16, 0);
assert!(t.hash_table.iter().all(|&v| v == HC_EMPTY));
assert_eq!(t.next_to_update3, next_to_update3_before);
}
#[test]
fn insert_positions_with_step_saturating_step_breaks_loop() {
let mut t = new_table(32);
t.history = vec![1u8; 32];
t.push_test_chunk(vec![1u8; 32]);
t.ensure_tables();
t.insert_positions_with_step(0, 16, usize::MAX);
let non_empty = t.hash_table.iter().filter(|&&v| v != HC_EMPTY).count();
assert!(
non_empty <= 1,
"step=usize::MAX must break after the first insert"
);
}
#[test]
fn apply_limited_update_after_long_match_hc_mode_is_noop() {
let mut t = new_table(32);
t.uses_bt = false;
t.skip_insert_until_abs = 100;
t.apply_limited_update_after_long_match(1000);
assert_eq!(
t.skip_insert_until_abs, 100,
"HC mode must not adjust skip cursor"
);
}
#[test]
fn apply_limited_update_after_long_match_bt_mode_caps_gap_at_384() {
let mut t = new_table(32);
t.uses_bt = true;
t.skip_insert_until_abs = 0;
t.apply_limited_update_after_long_match(1000);
assert_eq!(t.skip_insert_until_abs, 808);
}
#[test]
fn apply_limited_update_after_long_match_small_gap_is_noop() {
let mut t = new_table(32);
t.uses_bt = true;
t.skip_insert_until_abs = 800;
t.apply_limited_update_after_long_match(1000);
assert_eq!(t.skip_insert_until_abs, 800);
}
#[test]
fn emit_optimal_plan_empty_plan_emits_full_literals() {
let mut t = new_table(8);
t.push_test_chunk(b"abcdefgh".to_vec());
let mut emitted: Vec<u8> = Vec::new();
t.emit_optimal_plan(8, &[], &mut |seq| {
if let Sequence::Literals { literals } = seq {
emitted.extend_from_slice(literals);
}
});
assert_eq!(emitted, b"abcdefgh");
}
#[test]
fn emit_optimal_plan_skips_oversized_plan_item_and_emits_trailing_literals() {
let mut t = new_table(8);
t.push_test_chunk(b"abcdefgh".to_vec());
let plan = [HcOptimalSequence {
offset: 1,
lit_len: 4,
match_len: 99, }];
let mut triples = 0usize;
let mut trailing: Vec<u8> = Vec::new();
t.emit_optimal_plan(8, &plan, &mut |seq| match seq {
Sequence::Triple { .. } => triples += 1,
Sequence::Literals { literals } => trailing.extend_from_slice(literals),
});
assert_eq!(triples, 0, "oversized plan item must be skipped");
assert_eq!(
trailing, b"abcdefgh",
"trailing-literals path must emit the full window when plan skipped everything"
);
}
}