use alloc::collections::VecDeque;
use alloc::vec::Vec;
use super::super::Sequence;
use super::super::blocks::encode_offset_with_history;
use super::super::cost_model::{HC_OPT_NUM, HcOptimalCostProfile};
use super::super::hc::HC_MIN_MATCH_LEN;
use super::super::opt::types::{HcOptimalSequence, MatchCandidate};
use super::helpers::INCOMPRESSIBLE_SKIP_STEP;
pub(crate) const STREAM_ABS_HEADROOM: usize = HC_OPT_NUM + 16;
#[inline]
pub(crate) fn check_stream_abs_headroom(
history_abs_start: usize,
window_size: usize,
data_len: usize,
) {
let _future_abs_end = history_abs_start
.checked_add(window_size)
.and_then(|p| p.checked_add(data_len))
.and_then(|p| p.checked_add(STREAM_ABS_HEADROOM))
.expect(
"structured-zstd: cumulative input would leave less than \
STREAM_ABS_HEADROOM (HC_OPT_NUM + 16) slack below \
`usize::MAX` for the encoder's absolute-position \
lookahead; on 32-bit targets, split the input into \
smaller frames or use a 64-bit build",
);
}
#[cfg(test)]
mod bt_pair_index_wrap_tests {
use super::MatchTable;
#[test]
fn bt_pair_index_matches_modulo_ring_after_wraparound() {
let mut table = MatchTable::new(1 << 20);
table.chain_log = 4;
table.index_shift = usize::MAX - 5;
let bt_mask = table.bt_mask();
assert_eq!(bt_mask, 0b0111);
for abs_pos in [0usize, 1, 4, 5, 6, 7, 8, 12, 17] {
let got = table.bt_pair_index_for_abs(abs_pos);
let expected = 2 * (abs_pos.wrapping_add(table.index_shift) & bt_mask);
assert_eq!(
got, expected,
"abs_pos={abs_pos}: wrapping_add ring slot must match the masked sum"
);
}
assert_eq!(table.bt_pair_index_for_abs(7), 2);
assert_eq!(table.bt_pair_index_for_abs(14), 0);
}
#[test]
fn bt_pair_index_matches_modulo_ring_without_overflow() {
let mut table = MatchTable::new(1 << 20);
table.chain_log = 8; table.index_shift = 17;
let bt_mask = table.bt_mask();
for abs_pos in [0usize, 1, 16, 32, 64, 127, 128, 255, 1 << 20] {
let got = table.bt_pair_index_for_abs(abs_pos);
let expected = 2 * ((abs_pos + table.index_shift) & bt_mask);
assert_eq!(got, expected, "abs_pos={abs_pos}");
}
}
}
#[cfg(test)]
mod stream_abs_headroom_tests {
use super::{STREAM_ABS_HEADROOM, check_stream_abs_headroom};
#[test]
fn accepts_exactly_at_the_boundary() {
let history_abs_start = usize::MAX - STREAM_ABS_HEADROOM - 2;
check_stream_abs_headroom(history_abs_start, 1, 1);
}
#[test]
fn accepts_well_below_the_boundary() {
check_stream_abs_headroom(0, 1 << 20, 1 << 20);
}
#[test]
#[should_panic(expected = "STREAM_ABS_HEADROOM")]
fn rejects_one_byte_past_the_boundary() {
let history_abs_start = usize::MAX - STREAM_ABS_HEADROOM - 1;
check_stream_abs_headroom(history_abs_start, 1, 1);
}
#[test]
#[should_panic(expected = "STREAM_ABS_HEADROOM")]
fn rejects_history_abs_start_already_too_high() {
check_stream_abs_headroom(usize::MAX - 10, 0, 0);
}
}
pub(crate) const HC_PRIME3BYTES: u32 = 506_832_829;
pub(crate) const HC_PRIME4BYTES: u32 = 2_654_435_761;
pub(crate) const HC_EMPTY: u32 = 0;
pub(crate) const HC_HASH_LOG: usize = 20;
pub(crate) const HC_CHAIN_LOG: usize = 19;
pub(crate) const HC3_HASH_LOG: usize = 17;
pub(crate) struct MatchTable {
pub(crate) max_window_size: usize,
pub(crate) window: VecDeque<Vec<u8>>,
pub(crate) window_size: usize,
pub(crate) history: Vec<u8>,
pub(crate) history_start: usize,
pub(crate) history_abs_start: usize,
pub(crate) position_base: usize,
pub(crate) index_shift: usize,
pub(crate) offset_hist: [u32; 3],
pub(crate) hash_table: Vec<u32>,
pub(crate) hash3_table: Vec<u32>,
pub(crate) chain_table: Vec<u32>,
pub(crate) hash_log: usize,
pub(crate) chain_log: usize,
pub(crate) hash3_log: usize,
pub(crate) next_to_update3: usize,
pub(crate) skip_insert_until_abs: usize,
pub(crate) dictionary_limit_abs: Option<usize>,
pub(crate) dictionary_primed_for_frame: bool,
pub(crate) allow_zero_relative_position: bool,
pub(crate) search_depth: usize,
pub(crate) is_btultra2: bool,
pub(crate) uses_bt: bool,
}
impl MatchTable {
pub(crate) fn new(max_window_size: usize) -> Self {
Self {
max_window_size,
window: VecDeque::new(),
window_size: 0,
history: Vec::new(),
history_start: 0,
history_abs_start: 0,
position_base: 0,
index_shift: 0,
offset_hist: [1, 4, 8],
hash_table: Vec::new(),
hash3_table: Vec::new(),
chain_table: Vec::new(),
hash_log: HC_HASH_LOG,
chain_log: HC_CHAIN_LOG,
hash3_log: HC3_HASH_LOG,
next_to_update3: 0,
skip_insert_until_abs: 0,
dictionary_limit_abs: None,
dictionary_primed_for_frame: false,
allow_zero_relative_position: false,
search_depth: 0,
is_btultra2: false,
uses_bt: false,
}
}
#[inline(always)]
pub(crate) fn can_skip_rebase_check_at(
&self,
abs_pos: usize,
max_abs_pos: usize,
is_btultra2: bool,
) -> bool {
let max_rel_no_rebase = (u32::MAX as usize).saturating_sub(2);
self.position_base == 0
&& self.index_shift == 0
&& max_abs_pos <= max_rel_no_rebase
&& (self.allow_zero_relative_position
|| abs_pos > self.history_abs_start
|| (is_btultra2 && abs_pos == self.history_abs_start))
}
#[inline]
pub(crate) fn needs_rebase(&self, abs_pos: usize, is_btultra2: bool) -> bool {
if is_btultra2
&& !self.allow_zero_relative_position
&& self.position_base == 0
&& abs_pos == 0
{
return false;
}
self.relative_position(abs_pos)
.is_none_or(|relative| relative >= u32::MAX - 1)
}
pub(crate) fn insert_hash3_only_no_rebase(&mut self, abs_pos: usize) {
if self.hash3_log == 0 {
return;
}
let idx = abs_pos - self.history_abs_start;
let concat = &self.history[self.history_start..];
if idx + 4 > concat.len() {
return;
}
let Some(relative_pos) = self.relative_position(abs_pos) else {
return;
};
let hash3 = Self::hash_position_at(concat, idx, self.hash3_log, 3);
self.hash3_table[hash3] = relative_pos + 1;
}
#[inline]
pub(crate) fn insert_position_no_rebase(&mut self, abs_pos: usize) {
let idx = abs_pos.wrapping_sub(self.history_abs_start);
let concat = &self.history[self.history_start..];
if idx + 4 > concat.len() {
return;
}
let hash = Self::hash_position_at(concat, idx, self.hash_log, 4);
let Some(relative_pos) = self.relative_position(abs_pos) else {
return;
};
let stored = relative_pos + 1;
let chain_mask = (1usize << self.chain_log) - 1;
let chain_idx = relative_pos as usize & chain_mask;
debug_assert!(hash < self.hash_table.len());
debug_assert!(chain_idx < self.chain_table.len());
unsafe {
let prev = *self.hash_table.get_unchecked(hash);
*self.chain_table.get_unchecked_mut(chain_idx) = prev;
*self.hash_table.get_unchecked_mut(hash) = stored;
}
}
pub(crate) fn ensure_tables(&mut self) {
let hash_size = 1 << self.hash_log;
if self.hash_table.len() != hash_size {
self.hash_table = alloc::vec![HC_EMPTY; hash_size];
}
let chain_size = 1 << self.chain_log;
if self.chain_table.len() != chain_size {
self.chain_table = alloc::vec![HC_EMPTY; chain_size];
}
let hash3_size = if self.hash3_log == 0 {
0
} else {
1 << self.hash3_log
};
if self.hash3_table.len() != hash3_size {
self.hash3_table = alloc::vec![HC_EMPTY; hash3_size];
}
}
#[inline(always)]
pub(crate) fn read_le_u32(data: &[u8]) -> u32 {
debug_assert!(data.len() >= 4);
unsafe { Self::read_le_u32_ptr(data.as_ptr()) }
}
#[inline(always)]
pub(crate) unsafe fn read_le_u32_ptr(ptr: *const u8) -> u32 {
unsafe { u32::from_le(core::ptr::read_unaligned(ptr as *const u32)) }
}
#[inline(always)]
pub(crate) fn hash_value_with_mls(value: u32, hash_log: usize, mls: usize) -> usize {
match mls {
3 => (((value << 8).wrapping_mul(HC_PRIME3BYTES)) >> (32 - hash_log)) as usize,
_ => ((value.wrapping_mul(HC_PRIME4BYTES)) >> (32 - hash_log)) as usize,
}
}
#[inline(always)]
pub(crate) fn hash_position_with_mls(data: &[u8], hash_log: usize, mls: usize) -> usize {
let value = Self::read_le_u32(data);
Self::hash_value_with_mls(value, hash_log, mls)
}
#[inline(always)]
pub(crate) fn hash_position_at(data: &[u8], idx: usize, hash_log: usize, mls: usize) -> usize {
debug_assert!(idx + 4 <= data.len());
let value = unsafe { Self::read_le_u32_ptr(data.as_ptr().add(idx)) };
Self::hash_value_with_mls(value, hash_log, mls)
}
#[inline(always)]
pub(crate) fn hash_position(&self, data: &[u8]) -> usize {
Self::hash_position_with_mls(data, self.hash_log, 4)
}
#[cfg(test)]
pub(crate) fn hash3_position(data: &[u8], hash_log: usize) -> usize {
let value = Self::read_le_u32(data);
(((value << 8).wrapping_mul(HC_PRIME3BYTES)) >> (32 - hash_log)) as usize
}
pub(crate) fn mark_dictionary_primed(&mut self) {
self.dictionary_primed_for_frame = true;
}
pub(crate) fn set_dictionary_limit_from_primed_bytes(&mut self, primed_len: usize) {
self.dictionary_limit_abs = if primed_len == 0 {
None
} else {
Some(self.history_abs_start.saturating_add(primed_len))
};
}
pub(crate) fn add_data(&mut self, data: Vec<u8>, mut reuse_space: impl FnMut(Vec<u8>)) {
assert!(data.len() <= self.max_window_size);
check_stream_abs_headroom(self.history_abs_start, self.window_size, data.len());
while self.window_size + data.len() > self.max_window_size {
let removed = self.window.pop_front().unwrap();
self.window_size -= removed.len();
self.history_start += removed.len();
self.history_abs_start += removed.len();
reuse_space(removed);
}
self.compact_history();
self.history.extend_from_slice(&data);
self.next_to_update3 = self.next_to_update3.max(self.history_abs_start);
self.window_size += data.len();
self.window.push_back(data);
}
pub(crate) fn trim_to_window(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
while self.window_size > self.max_window_size {
let removed = self.window.pop_front().unwrap();
self.window_size -= removed.len();
self.history_start += removed.len();
self.history_abs_start += removed.len();
reuse_space(removed);
}
}
pub(crate) fn compact_history(&mut self) {
if self.history_start == 0 {
return;
}
if self.history_start >= self.max_window_size
|| self.history_start * 2 >= self.history.len()
{
self.history.drain(..self.history_start);
self.history_start = 0;
}
}
pub(crate) fn live_history(&self) -> &[u8] {
&self.history[self.history_start..]
}
pub(crate) fn history_abs_end(&self) -> usize {
self.history_abs_start + self.live_history().len()
}
pub(crate) fn get_last_space(&self) -> &[u8] {
self.window.back().unwrap().as_slice()
}
pub(crate) fn relative_position(&self, abs_pos: usize) -> Option<u32> {
let shifted_abs = abs_pos.checked_add(self.index_shift)?;
let rel = shifted_abs.checked_sub(self.position_base)?;
let rel_u32 = u32::try_from(rel).ok()?;
if !self.allow_zero_relative_position && self.position_base == 0 && rel_u32 == 0 {
return None;
}
(rel_u32 < u32::MAX).then_some(rel_u32)
}
pub(crate) fn window_low_abs_for_target(&self, target_abs: usize) -> usize {
let history_low = self.history_abs_start;
let window_low = target_abs.saturating_sub(self.max_window_size);
history_low.max(window_low)
}
#[inline(always)]
pub(crate) fn bt_log(&self) -> usize {
self.chain_log.saturating_sub(1)
}
#[inline(always)]
pub(crate) fn bt_mask(&self) -> usize {
(1usize << self.bt_log()) - 1
}
#[inline(always)]
pub(crate) fn bt_pair_index_for_abs(&self, abs_pos: usize) -> usize {
let bt_pos = abs_pos.wrapping_add(self.index_shift);
2 * (bt_pos & self.bt_mask())
}
#[inline(always)]
pub(crate) fn stored_abs_position_fast(
stored: u32,
position_base: usize,
index_shift: usize,
) -> Option<usize> {
if stored == HC_EMPTY {
return None;
}
let shifted = position_base + (stored as usize - 1);
if shifted < index_shift {
return None;
}
Some(shifted - index_shift)
}
pub(crate) fn reset(&mut self, mut reuse_space: impl FnMut(Vec<u8>)) {
self.window_size = 0;
self.history.clear();
self.history_start = 0;
self.history_abs_start = 0;
self.position_base = 0;
self.index_shift = 0;
self.offset_hist = [1, 4, 8];
self.next_to_update3 = 0;
self.skip_insert_until_abs = 0;
self.dictionary_limit_abs = None;
self.dictionary_primed_for_frame = false;
self.allow_zero_relative_position = false;
self.hash_table.fill(HC_EMPTY);
self.hash3_table.fill(HC_EMPTY);
self.chain_table.fill(HC_EMPTY);
for mut data in self.window.drain(..) {
data.resize(data.capacity(), 0);
reuse_space(data);
}
}
pub(crate) fn donor_opt_start_cursor_and_litlen(
&self,
current_abs_start: usize,
) -> (usize, usize) {
let start_cursor = usize::from(current_abs_start == self.history_abs_start);
(start_cursor, start_cursor)
}
#[inline(always)]
pub(crate) fn bt_insert_step_no_rebase(
&mut self,
abs_pos: usize,
current_abs_end: usize,
target_abs: usize,
) -> usize {
#[cfg(all(target_arch = "aarch64", target_endian = "little"))]
unsafe {
self.bt_insert_step_no_rebase_neon(abs_pos, current_abs_end, target_abs)
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
{
use crate::encoding::fastpath::{FastpathKernel, select_kernel};
match select_kernel() {
FastpathKernel::Avx2Bmi2 => unsafe {
self.bt_insert_step_no_rebase_avx2_bmi2(abs_pos, current_abs_end, target_abs)
},
FastpathKernel::Sse42 => unsafe {
self.bt_insert_step_no_rebase_sse42(abs_pos, current_abs_end, target_abs)
},
FastpathKernel::Scalar => {
self.bt_insert_step_no_rebase_scalar(abs_pos, current_abs_end, target_abs)
}
}
}
#[cfg(not(any(
all(target_arch = "aarch64", target_endian = "little"),
target_arch = "x86",
target_arch = "x86_64"
)))]
{
self.bt_insert_step_no_rebase_scalar(abs_pos, current_abs_end, target_abs)
}
}
#[cfg(all(target_arch = "aarch64", target_endian = "little"))]
#[target_feature(enable = "neon")]
pub(crate) unsafe fn bt_insert_step_no_rebase_neon(
&mut self,
abs_pos: usize,
current_abs_end: usize,
target_abs: usize,
) -> usize {
let search_depth = self.search_depth;
super::super::match_generator::bt_insert_step_no_rebase_body!(
self,
search_depth,
abs_pos,
current_abs_end,
target_abs,
crate::encoding::fastpath::neon::count_match_from_indices
)
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[target_feature(enable = "sse4.2")]
pub(crate) unsafe fn bt_insert_step_no_rebase_sse42(
&mut self,
abs_pos: usize,
current_abs_end: usize,
target_abs: usize,
) -> usize {
let search_depth = self.search_depth;
super::super::match_generator::bt_insert_step_no_rebase_body!(
self,
search_depth,
abs_pos,
current_abs_end,
target_abs,
crate::encoding::fastpath::sse42::count_match_from_indices
)
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[target_feature(enable = "avx2,bmi2")]
pub(crate) unsafe fn bt_insert_step_no_rebase_avx2_bmi2(
&mut self,
abs_pos: usize,
current_abs_end: usize,
target_abs: usize,
) -> usize {
let search_depth = self.search_depth;
super::super::match_generator::bt_insert_step_no_rebase_body!(
self,
search_depth,
abs_pos,
current_abs_end,
target_abs,
crate::encoding::fastpath::avx2_bmi2::count_match_from_indices
)
}
#[cfg(not(all(target_arch = "aarch64", target_endian = "little")))]
pub(crate) fn bt_insert_step_no_rebase_scalar(
&mut self,
abs_pos: usize,
current_abs_end: usize,
target_abs: usize,
) -> usize {
let search_depth = self.search_depth;
super::super::match_generator::bt_insert_step_no_rebase_body!(
self,
search_depth,
abs_pos,
current_abs_end,
target_abs,
crate::encoding::fastpath::scalar::count_match_from_indices
)
}
#[allow(dead_code)]
#[allow(clippy::too_many_arguments)]
#[inline(always)]
pub(crate) fn bt_insert_and_collect_matches(
&mut self,
abs_pos: usize,
current_abs_end: usize,
profile: HcOptimalCostProfile,
min_match_len: usize,
best_len_for_skip: &mut usize,
out: &mut Vec<MatchCandidate>,
) {
#[cfg(all(target_arch = "aarch64", target_endian = "little"))]
unsafe {
self.bt_insert_and_collect_matches_neon(
abs_pos,
current_abs_end,
profile,
min_match_len,
best_len_for_skip,
out,
)
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
{
use crate::encoding::fastpath::{FastpathKernel, select_kernel};
match select_kernel() {
FastpathKernel::Avx2Bmi2 => unsafe {
self.bt_insert_and_collect_matches_avx2_bmi2(
abs_pos,
current_abs_end,
profile,
min_match_len,
best_len_for_skip,
out,
)
},
FastpathKernel::Sse42 => unsafe {
self.bt_insert_and_collect_matches_sse42(
abs_pos,
current_abs_end,
profile,
min_match_len,
best_len_for_skip,
out,
)
},
FastpathKernel::Scalar => self.bt_insert_and_collect_matches_scalar(
abs_pos,
current_abs_end,
profile,
min_match_len,
best_len_for_skip,
out,
),
}
}
#[cfg(not(any(
all(target_arch = "aarch64", target_endian = "little"),
target_arch = "x86",
target_arch = "x86_64"
)))]
{
self.bt_insert_and_collect_matches_scalar(
abs_pos,
current_abs_end,
profile,
min_match_len,
best_len_for_skip,
out,
)
}
}
#[cfg(all(target_arch = "aarch64", target_endian = "little"))]
#[target_feature(enable = "neon")]
#[allow(clippy::too_many_arguments)]
pub(crate) unsafe fn bt_insert_and_collect_matches_neon(
&mut self,
abs_pos: usize,
current_abs_end: usize,
profile: HcOptimalCostProfile,
min_match_len: usize,
best_len_for_skip: &mut usize,
out: &mut Vec<MatchCandidate>,
) {
let search_depth = self.search_depth;
super::super::match_generator::bt_insert_and_collect_matches_body!(
self,
search_depth,
abs_pos,
current_abs_end,
profile,
min_match_len,
best_len_for_skip,
out,
crate::encoding::fastpath::neon::count_match_from_indices,
)
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[target_feature(enable = "sse4.2")]
#[allow(clippy::too_many_arguments)]
pub(crate) unsafe fn bt_insert_and_collect_matches_sse42(
&mut self,
abs_pos: usize,
current_abs_end: usize,
profile: HcOptimalCostProfile,
min_match_len: usize,
best_len_for_skip: &mut usize,
out: &mut Vec<MatchCandidate>,
) {
let search_depth = self.search_depth;
super::super::match_generator::bt_insert_and_collect_matches_body!(
self,
search_depth,
abs_pos,
current_abs_end,
profile,
min_match_len,
best_len_for_skip,
out,
crate::encoding::fastpath::sse42::count_match_from_indices,
)
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[target_feature(enable = "avx2,bmi2")]
#[allow(clippy::too_many_arguments)]
pub(crate) unsafe fn bt_insert_and_collect_matches_avx2_bmi2(
&mut self,
abs_pos: usize,
current_abs_end: usize,
profile: HcOptimalCostProfile,
min_match_len: usize,
best_len_for_skip: &mut usize,
out: &mut Vec<MatchCandidate>,
) {
let search_depth = self.search_depth;
super::super::match_generator::bt_insert_and_collect_matches_body!(
self,
search_depth,
abs_pos,
current_abs_end,
profile,
min_match_len,
best_len_for_skip,
out,
crate::encoding::fastpath::avx2_bmi2::count_match_from_indices,
)
}
#[cfg(not(all(target_arch = "aarch64", target_endian = "little")))]
#[allow(clippy::too_many_arguments)]
pub(crate) fn bt_insert_and_collect_matches_scalar(
&mut self,
abs_pos: usize,
current_abs_end: usize,
profile: HcOptimalCostProfile,
min_match_len: usize,
best_len_for_skip: &mut usize,
out: &mut Vec<MatchCandidate>,
) {
let search_depth = self.search_depth;
super::super::match_generator::bt_insert_and_collect_matches_body!(
self,
search_depth,
abs_pos,
current_abs_end,
profile,
min_match_len,
best_len_for_skip,
out,
crate::encoding::fastpath::scalar::count_match_from_indices,
)
}
pub(crate) fn replay_history_for_rebase_bt(&mut self, history_start: usize, abs_pos: usize) {
let rebuild_end = self.history_abs_end();
let mut pos = history_start;
while pos < abs_pos {
let forward = self.bt_insert_step_no_rebase(pos, rebuild_end, abs_pos);
let step = forward.max(1).min(abs_pos - pos);
pos += step;
}
}
#[inline(always)]
pub(crate) fn bt_update_tree_until(&mut self, abs_pos: usize, current_abs_end: usize) {
#[cfg(all(target_arch = "aarch64", target_endian = "little"))]
unsafe {
self.bt_update_tree_until_neon(abs_pos, current_abs_end)
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
{
use crate::encoding::fastpath::{FastpathKernel, select_kernel};
match select_kernel() {
FastpathKernel::Avx2Bmi2 => unsafe {
self.bt_update_tree_until_avx2_bmi2(abs_pos, current_abs_end)
},
FastpathKernel::Sse42 => unsafe {
self.bt_update_tree_until_sse42(abs_pos, current_abs_end)
},
FastpathKernel::Scalar => {
self.bt_update_tree_until_scalar(abs_pos, current_abs_end)
}
}
}
#[cfg(not(any(
all(target_arch = "aarch64", target_endian = "little"),
target_arch = "x86",
target_arch = "x86_64"
)))]
{
self.bt_update_tree_until_scalar(abs_pos, current_abs_end)
}
}
#[cfg(all(target_arch = "aarch64", target_endian = "little"))]
#[target_feature(enable = "neon")]
pub(crate) unsafe fn bt_update_tree_until_neon(
&mut self,
abs_pos: usize,
current_abs_end: usize,
) {
if self.skip_insert_until_abs < self.history_abs_start {
self.skip_insert_until_abs = self.history_abs_start;
}
let mut update_abs = self.skip_insert_until_abs;
let is_btultra2 = self.is_btultra2;
while update_abs < abs_pos {
if !self.can_skip_rebase_check_at(update_abs, abs_pos, is_btultra2) {
self.maybe_rebase_positions(update_abs);
}
let forward =
unsafe { self.bt_insert_step_no_rebase_neon(update_abs, current_abs_end, abs_pos) };
update_abs += forward.max(1).min(abs_pos - update_abs);
}
self.skip_insert_until_abs = abs_pos;
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[target_feature(enable = "sse4.2")]
pub(crate) unsafe fn bt_update_tree_until_sse42(
&mut self,
abs_pos: usize,
current_abs_end: usize,
) {
if self.skip_insert_until_abs < self.history_abs_start {
self.skip_insert_until_abs = self.history_abs_start;
}
let mut update_abs = self.skip_insert_until_abs;
let is_btultra2 = self.is_btultra2;
while update_abs < abs_pos {
if !self.can_skip_rebase_check_at(update_abs, abs_pos, is_btultra2) {
self.maybe_rebase_positions(update_abs);
}
let forward = unsafe {
self.bt_insert_step_no_rebase_sse42(update_abs, current_abs_end, abs_pos)
};
update_abs += forward.max(1).min(abs_pos - update_abs);
}
self.skip_insert_until_abs = abs_pos;
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[target_feature(enable = "avx2,bmi2")]
pub(crate) unsafe fn bt_update_tree_until_avx2_bmi2(
&mut self,
abs_pos: usize,
current_abs_end: usize,
) {
if self.skip_insert_until_abs < self.history_abs_start {
self.skip_insert_until_abs = self.history_abs_start;
}
let mut update_abs = self.skip_insert_until_abs;
let is_btultra2 = self.is_btultra2;
while update_abs < abs_pos {
if !self.can_skip_rebase_check_at(update_abs, abs_pos, is_btultra2) {
self.maybe_rebase_positions(update_abs);
}
let forward = unsafe {
self.bt_insert_step_no_rebase_avx2_bmi2(update_abs, current_abs_end, abs_pos)
};
update_abs += forward.max(1).min(abs_pos - update_abs);
}
self.skip_insert_until_abs = abs_pos;
}
#[cfg(not(all(target_arch = "aarch64", target_endian = "little")))]
pub(crate) fn bt_update_tree_until_scalar(&mut self, abs_pos: usize, current_abs_end: usize) {
if self.skip_insert_until_abs < self.history_abs_start {
self.skip_insert_until_abs = self.history_abs_start;
}
let mut update_abs = self.skip_insert_until_abs;
let is_btultra2 = self.is_btultra2;
while update_abs < abs_pos {
if !self.can_skip_rebase_check_at(update_abs, abs_pos, is_btultra2) {
self.maybe_rebase_positions(update_abs);
}
let forward =
self.bt_insert_step_no_rebase_scalar(update_abs, current_abs_end, abs_pos);
update_abs += forward.max(1).min(abs_pos - update_abs);
}
self.skip_insert_until_abs = abs_pos;
}
pub(crate) fn update_hash3_until(&mut self, abs_pos: usize) {
let is_btultra2 = self.is_btultra2;
if self.next_to_update3 < self.history_abs_start {
self.next_to_update3 = self.history_abs_start;
}
if self.next_to_update3 >= abs_pos {
return;
}
while self.next_to_update3 < abs_pos {
if !self.can_skip_rebase_check_at(self.next_to_update3, abs_pos, is_btultra2) {
self.maybe_rebase_positions(self.next_to_update3);
}
self.insert_hash3_only_no_rebase(self.next_to_update3);
self.next_to_update3 += 1;
}
}
#[inline]
pub(crate) fn maybe_rebase_positions(&mut self, abs_pos: usize) {
let is_btultra2 = self.is_btultra2;
if self.needs_rebase(abs_pos, is_btultra2) {
self.rebase_positions_cold(abs_pos);
}
}
#[cold]
#[inline(never)]
pub(crate) fn rebase_positions_cold(&mut self, abs_pos: usize) {
self.begin_rebase();
let history_start = self.history_abs_start;
if self.uses_bt {
self.replay_history_for_rebase_bt(history_start, abs_pos);
} else {
self.replay_history_for_rebase_hc(history_start, abs_pos);
}
self.next_to_update3 = history_start;
if self.hash3_log != 0 {
self.update_hash3_until(abs_pos);
}
}
#[inline]
pub(crate) fn insert_position(&mut self, abs_pos: usize) {
self.maybe_rebase_positions(abs_pos);
self.insert_position_no_rebase(abs_pos);
}
pub(crate) fn insert_positions(&mut self, start: usize, end: usize) {
for pos in start..end {
self.insert_position(pos);
}
self.next_to_update3 = self.next_to_update3.max(end);
}
pub(crate) fn insert_positions_with_step(&mut self, start: usize, end: usize, step: usize) {
if step == 0 {
return;
}
let mut pos = start;
while pos < end {
self.insert_position(pos);
let next = pos.saturating_add(step);
if next <= pos {
break;
}
pos = next;
}
}
pub(crate) fn backfill_boundary_positions(
&mut self,
current_abs_start: usize,
current_abs_end: usize,
) {
let backfill_start = current_abs_start
.saturating_sub(3)
.max(self.history_abs_start);
if backfill_start < current_abs_start {
if self.uses_bt {
self.bt_update_tree_until(current_abs_start, current_abs_end);
} else {
self.insert_positions(backfill_start, current_abs_start);
}
}
}
pub(crate) fn apply_limited_update_after_long_match(&mut self, current_abs_start: usize) {
if !self.uses_bt {
return;
}
let gap = current_abs_start.saturating_sub(self.skip_insert_until_abs);
if gap > 384 {
self.skip_insert_until_abs = current_abs_start - (gap - 384).min(192);
}
}
pub(crate) fn bt_insert_sparse_incompressible_block(
&mut self,
current_abs_start: usize,
current_abs_end: usize,
) {
let mut pos = current_abs_start;
while pos < current_abs_end {
self.maybe_rebase_positions(pos);
let _ = self.bt_insert_step_no_rebase(pos, current_abs_end, current_abs_end);
self.insert_hash3_only_no_rebase(pos);
let next = pos.saturating_add(INCOMPRESSIBLE_SKIP_STEP);
if next <= pos {
break;
}
pos = next;
}
let dense_tail = HC_MIN_MATCH_LEN + INCOMPRESSIBLE_SKIP_STEP;
let tail_start = current_abs_end
.saturating_sub(dense_tail)
.max(self.history_abs_start)
.max(current_abs_start);
for pos in tail_start..current_abs_end {
if (pos - current_abs_start).is_multiple_of(INCOMPRESSIBLE_SKIP_STEP) {
continue;
}
self.maybe_rebase_positions(pos);
let _ = self.bt_insert_step_no_rebase(pos, current_abs_end, current_abs_end);
self.insert_hash3_only_no_rebase(pos);
}
self.skip_insert_until_abs = self.skip_insert_until_abs.max(current_abs_end);
self.next_to_update3 = self.next_to_update3.max(current_abs_end);
}
pub(crate) fn skip_matching(&mut self, incompressible_hint: Option<bool>) {
self.ensure_tables();
let current_len = self.window.back().unwrap().len();
let current_abs_start = self.history_abs_start + self.window_size - current_len;
let current_abs_end = current_abs_start + current_len;
self.backfill_boundary_positions(current_abs_start, current_abs_end);
if self.uses_bt {
if incompressible_hint == Some(true) {
self.bt_insert_sparse_incompressible_block(current_abs_start, current_abs_end);
return;
}
self.bt_update_tree_until(current_abs_end, current_abs_end);
return;
}
if incompressible_hint == Some(true) {
self.insert_positions_with_step(
current_abs_start,
current_abs_end,
INCOMPRESSIBLE_SKIP_STEP,
);
let dense_tail = HC_MIN_MATCH_LEN + INCOMPRESSIBLE_SKIP_STEP;
let tail_start = current_abs_end
.saturating_sub(dense_tail)
.max(self.history_abs_start);
let tail_start = tail_start.max(current_abs_start);
for pos in tail_start..current_abs_end {
if !(pos - current_abs_start).is_multiple_of(INCOMPRESSIBLE_SKIP_STEP) {
self.insert_position(pos);
}
}
} else {
self.insert_positions(current_abs_start, current_abs_end);
}
}
#[allow(dead_code)]
#[inline(always)]
pub(crate) fn hash3_candidate(
&self,
abs_pos: usize,
current_abs_end: usize,
min_match_len: usize,
) -> Option<MatchCandidate> {
#[cfg(all(target_arch = "aarch64", target_endian = "little"))]
unsafe {
self.hash3_candidate_neon(abs_pos, current_abs_end, min_match_len)
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
{
use crate::encoding::fastpath::{FastpathKernel, select_kernel};
match select_kernel() {
FastpathKernel::Avx2Bmi2 => unsafe {
self.hash3_candidate_avx2_bmi2(abs_pos, current_abs_end, min_match_len)
},
FastpathKernel::Sse42 => unsafe {
self.hash3_candidate_sse42(abs_pos, current_abs_end, min_match_len)
},
FastpathKernel::Scalar => {
self.hash3_candidate_scalar(abs_pos, current_abs_end, min_match_len)
}
}
}
#[cfg(not(any(
all(target_arch = "aarch64", target_endian = "little"),
target_arch = "x86",
target_arch = "x86_64"
)))]
{
self.hash3_candidate_scalar(abs_pos, current_abs_end, min_match_len)
}
}
#[cfg(all(target_arch = "aarch64", target_endian = "little"))]
#[target_feature(enable = "neon")]
pub(crate) unsafe fn hash3_candidate_neon(
&self,
abs_pos: usize,
current_abs_end: usize,
min_match_len: usize,
) -> Option<MatchCandidate> {
super::super::match_generator::hash3_candidate_body!(
self,
abs_pos,
current_abs_end,
min_match_len,
crate::encoding::fastpath::neon::common_prefix_len_ptr,
)
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[target_feature(enable = "sse4.2")]
pub(crate) unsafe fn hash3_candidate_sse42(
&self,
abs_pos: usize,
current_abs_end: usize,
min_match_len: usize,
) -> Option<MatchCandidate> {
super::super::match_generator::hash3_candidate_body!(
self,
abs_pos,
current_abs_end,
min_match_len,
crate::encoding::fastpath::sse42::common_prefix_len_ptr,
)
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[target_feature(enable = "avx2,bmi2")]
pub(crate) unsafe fn hash3_candidate_avx2_bmi2(
&self,
abs_pos: usize,
current_abs_end: usize,
min_match_len: usize,
) -> Option<MatchCandidate> {
super::super::match_generator::hash3_candidate_body!(
self,
abs_pos,
current_abs_end,
min_match_len,
crate::encoding::fastpath::avx2_bmi2::common_prefix_len_ptr,
)
}
#[cfg(not(all(target_arch = "aarch64", target_endian = "little")))]
pub(crate) fn hash3_candidate_scalar(
&self,
abs_pos: usize,
current_abs_end: usize,
min_match_len: usize,
) -> Option<MatchCandidate> {
super::super::match_generator::hash3_candidate_body!(
self,
abs_pos,
current_abs_end,
min_match_len,
crate::encoding::fastpath::scalar::common_prefix_len_ptr,
)
}
pub(crate) fn begin_rebase(&mut self) {
self.position_base = self.history_abs_start;
self.index_shift = 0;
self.allow_zero_relative_position = true;
self.hash_table.fill(HC_EMPTY);
self.hash3_table.fill(HC_EMPTY);
self.chain_table.fill(HC_EMPTY);
}
pub(crate) fn replay_history_for_rebase_hc(&mut self, history_start: usize, abs_pos: usize) {
for pos in history_start..abs_pos {
self.insert_position_no_rebase(pos);
}
}
pub(crate) fn emit_optimal_plan(
&mut self,
current_len: usize,
plan: &[HcOptimalSequence],
handle_sequence: &mut impl for<'a> FnMut(Sequence<'a>),
) {
let current = self.window.back().unwrap().as_slice();
if plan.is_empty() {
handle_sequence(Sequence::Literals { literals: current });
return;
}
let mut literals_start = 0usize;
for item in plan {
let lit_len = item.lit_len as usize;
let match_len = item.match_len as usize;
let Some(start) = literals_start.checked_add(lit_len) else {
continue;
};
let Some(end) = start.checked_add(match_len) else {
continue;
};
if end > current_len {
continue;
}
let literals = ¤t[literals_start..start];
handle_sequence(Sequence::Triple {
literals,
offset: item.offset as usize,
match_len,
});
encode_offset_with_history(item.offset, literals.len() as u32, &mut self.offset_hist);
literals_start = end;
}
if literals_start < current_len {
handle_sequence(Sequence::Literals {
literals: ¤t[literals_start..],
});
}
}
}
#[cfg(test)]
mod storage_tests {
use alloc::vec;
use super::*;
fn new_table(window: usize) -> MatchTable {
let mut t = MatchTable::new(window);
t.window_size = window;
t.hash_log = 8;
t.chain_log = 8;
t.hash3_log = 0;
t
}
#[test]
fn set_dictionary_limit_from_primed_bytes_zero_clears_limit() {
let mut t = new_table(64);
t.history_abs_start = 100;
t.dictionary_limit_abs = Some(123);
t.set_dictionary_limit_from_primed_bytes(0);
assert_eq!(t.dictionary_limit_abs, None);
}
#[test]
fn set_dictionary_limit_from_primed_bytes_offsets_from_history_start() {
let mut t = new_table(64);
t.history_abs_start = 100;
t.set_dictionary_limit_from_primed_bytes(40);
assert_eq!(t.dictionary_limit_abs, Some(140));
}
#[test]
fn skip_matching_bt_incompressible_routes_through_sparse_block() {
let mut t = new_table(32);
t.window.push_back(vec![0u8; 32]);
t.ensure_tables();
t.uses_bt = true;
t.is_btultra2 = false;
t.search_depth = 4;
let before_skip_until = t.skip_insert_until_abs;
t.skip_matching(Some(true));
assert!(t.skip_insert_until_abs >= t.window_size);
assert!(t.skip_insert_until_abs > before_skip_until);
}
#[test]
fn skip_matching_bt_dense_routes_through_bt_update_tree() {
let mut t = new_table(32);
t.window.push_back(vec![1u8; 32]);
t.ensure_tables();
t.uses_bt = true;
t.is_btultra2 = false;
t.search_depth = 4;
t.skip_matching(None);
assert_eq!(t.skip_insert_until_abs, t.history_abs_start + t.window_size);
}
#[test]
fn replay_history_for_rebase_bt_walks_inserted_prefix() {
let mut t = new_table(64);
t.history = vec![0u8; 64];
for (i, slot) in t.history.iter_mut().enumerate() {
*slot = (i % 17) as u8;
}
t.history_start = 0;
t.history_abs_start = 0;
t.window_size = 64;
t.position_base = 0;
t.search_depth = 4;
t.uses_bt = true;
t.ensure_tables();
assert!(t.hash_table.iter().all(|&v| v == HC_EMPTY));
t.replay_history_for_rebase_bt(0, 32);
assert!(
t.hash_table.iter().any(|&v| v != HC_EMPTY),
"BT replay must populate hash table"
);
}
#[test]
fn begin_rebase_clears_index_tables_and_resets_base() {
let mut t = new_table(32);
t.hash_table = vec![7; 16];
t.chain_table = vec![9; 16];
t.hash3_table = vec![5; 16];
t.history_abs_start = 50;
t.position_base = 0;
t.index_shift = 4;
t.allow_zero_relative_position = false;
t.begin_rebase();
assert_eq!(t.position_base, 50);
assert_eq!(t.index_shift, 0);
assert!(t.allow_zero_relative_position);
assert!(t.hash_table.iter().all(|&v| v == HC_EMPTY));
assert!(t.chain_table.iter().all(|&v| v == HC_EMPTY));
assert!(t.hash3_table.iter().all(|&v| v == HC_EMPTY));
}
#[test]
fn rebase_positions_cold_rebuilds_hash3_for_btultra2() {
let mut t = new_table(64);
t.history = b"abcdef_abcdef_abcdef_abcdef_abcdef_abcdef".to_vec();
t.history_start = 0;
t.history_abs_start = 0;
t.window_size = t.history.len();
t.window.push_back(t.history.clone());
t.hash_log = 8;
t.chain_log = 8;
t.hash3_log = 6;
t.is_btultra2 = true;
t.search_depth = 4;
t.ensure_tables();
t.update_hash3_until(20);
assert!(
t.hash3_table.iter().any(|&v| v != HC_EMPTY),
"fixture precondition: hash3 must be non-empty before rebase"
);
t.rebase_positions_cold(20);
assert!(
t.hash3_table.iter().any(|&v| v != HC_EMPTY),
"rebase must repopulate the HC3 side table — \
btultra2 short-match selection depends on it"
);
}
#[test]
fn insert_positions_with_step_zero_step_is_noop() {
let mut t = new_table(32);
t.history = vec![0u8; 32];
t.window.push_back(vec![0u8; 32]);
t.ensure_tables();
let next_to_update3_before = t.next_to_update3;
t.insert_positions_with_step(0, 16, 0);
assert!(t.hash_table.iter().all(|&v| v == HC_EMPTY));
assert_eq!(t.next_to_update3, next_to_update3_before);
}
#[test]
fn insert_positions_with_step_saturating_step_breaks_loop() {
let mut t = new_table(32);
t.history = vec![1u8; 32];
t.window.push_back(vec![1u8; 32]);
t.ensure_tables();
t.insert_positions_with_step(0, 16, usize::MAX);
let non_empty = t.hash_table.iter().filter(|&&v| v != HC_EMPTY).count();
assert!(
non_empty <= 1,
"step=usize::MAX must break after the first insert"
);
}
#[test]
fn apply_limited_update_after_long_match_hc_mode_is_noop() {
let mut t = new_table(32);
t.uses_bt = false;
t.skip_insert_until_abs = 100;
t.apply_limited_update_after_long_match(1000);
assert_eq!(
t.skip_insert_until_abs, 100,
"HC mode must not adjust skip cursor"
);
}
#[test]
fn apply_limited_update_after_long_match_bt_mode_caps_gap_at_384() {
let mut t = new_table(32);
t.uses_bt = true;
t.skip_insert_until_abs = 0;
t.apply_limited_update_after_long_match(1000);
assert_eq!(t.skip_insert_until_abs, 808);
}
#[test]
fn apply_limited_update_after_long_match_small_gap_is_noop() {
let mut t = new_table(32);
t.uses_bt = true;
t.skip_insert_until_abs = 800;
t.apply_limited_update_after_long_match(1000);
assert_eq!(t.skip_insert_until_abs, 800);
}
#[test]
fn emit_optimal_plan_empty_plan_emits_full_literals() {
let mut t = new_table(8);
t.window.push_back(b"abcdefgh".to_vec());
let mut emitted: Vec<u8> = Vec::new();
t.emit_optimal_plan(8, &[], &mut |seq| {
if let Sequence::Literals { literals } = seq {
emitted.extend_from_slice(literals);
}
});
assert_eq!(emitted, b"abcdefgh");
}
#[test]
fn emit_optimal_plan_skips_oversized_plan_item_and_emits_trailing_literals() {
let mut t = new_table(8);
t.window.push_back(b"abcdefgh".to_vec());
let plan = [HcOptimalSequence {
offset: 1,
lit_len: 4,
match_len: 99, }];
let mut triples = 0usize;
let mut trailing: Vec<u8> = Vec::new();
t.emit_optimal_plan(8, &plan, &mut |seq| match seq {
Sequence::Triple { .. } => triples += 1,
Sequence::Literals { literals } => trailing.extend_from_slice(literals),
});
assert_eq!(triples, 0, "oversized plan item must be skipped");
assert_eq!(
trailing, b"abcdefgh",
"trailing-literals path must emit the full window when plan skipped everything"
);
}
}