1use alloc::vec::Vec;
9use super::BETTER_WINDOW_LOG;
13use super::CompressionLevel;
14use super::Matcher;
15use super::Sequence;
16use super::blocks::encode_offset_with_history;
17use super::bt::BtMatcher;
18#[cfg(test)]
19use super::cost_model::HC_MAX_LIT;
20use super::cost_model::{
21 HC_BITCOST_MULTIPLIER, HC_FORMAT_MINMATCH, HC_OPT_NUM, HC_PREDEF_THRESHOLD, HcOptState,
22 HcOptimalCostProfile,
23};
24#[cfg(test)]
25use super::cost_model::{HC_BLOCKSIZE_MAX, HC_MAX_LL, HC_MAX_ML, HC_MAX_OFF, HcOptPriceType};
26use super::dfast::DfastMatchGenerator;
27#[cfg(test)]
33use super::match_table::helpers::INCOMPRESSIBLE_SKIP_STEP;
34use super::match_table::helpers::MIN_MATCH_LEN;
35#[cfg(test)]
36use super::match_table::helpers::common_prefix_len;
37#[cfg(test)]
38use super::opt::ldm::HcRawSeq;
39use super::opt::ldm::{HcOptLdmState, HcRawSeqStore};
40use super::opt::types::{
41 HcCandidateQuery, HcOptimalNode, HcOptimalPlanBuffers, HcOptimalPlanState, HcOptimalSequence,
42 MatchCandidate,
43};
44use super::row::RowMatchGenerator;
45use super::simple::fast_matcher::{FAST_LEVEL_1_HASH_LOG, FAST_LEVEL_1_MLS, FastKernelMatcher};
46#[cfg(all(
47 test,
48 feature = "std",
49 target_arch = "aarch64",
50 target_endian = "little"
51))]
52use std::arch::is_aarch64_feature_detected;
53#[cfg(all(test, feature = "std", target_arch = "x86_64"))]
54use std::arch::is_x86_feature_detected;
55
56pub(crate) const DFAST_MIN_MATCH_LEN: usize = 5;
57pub(crate) const DFAST_SHORT_HASH_LOOKAHEAD: usize = 4;
58pub(crate) const ROW_MIN_MATCH_LEN: usize = 6;
59pub(crate) const DFAST_TARGET_LEN: usize = 48;
60pub(crate) const DFAST_HASH_BITS: usize = 17;
83pub(crate) const DFAST_SHORT_HASH_BITS_DELTA: usize = 1;
89pub(crate) const DFAST_EMPTY_SLOT: u32 = 0;
97
98pub(crate) const DFAST_REBASE_GUARD_BAND: u32 = 1u32 << 30;
105pub(crate) const DFAST_SKIP_SEARCH_STRENGTH: usize = 6;
106pub(crate) const DFAST_SKIP_STEP_GROWTH_INTERVAL: usize = 1 << DFAST_SKIP_SEARCH_STRENGTH;
107pub(crate) const DFAST_LOCAL_SKIP_TRIGGER: usize = 256;
108pub(crate) const DFAST_MAX_SKIP_STEP: usize = 8;
109pub(crate) const DFAST_INCOMPRESSIBLE_SKIP_STEP: usize = 16;
110pub(crate) const ROW_HASH_BITS: usize = 20;
111pub(crate) const ROW_LOG: usize = 5;
112pub(crate) const ROW_SEARCH_DEPTH: usize = 16;
113pub(crate) const ROW_TARGET_LEN: usize = 48;
114pub(crate) const ROW_TAG_BITS: usize = 8;
115pub(crate) const ROW_EMPTY_SLOT: usize = usize::MAX;
116pub(crate) const ROW_HASH_KEY_LEN: usize = 4;
117#[cfg(test)]
124use super::match_table::storage::{HC_PRIME3BYTES, HC_PRIME4BYTES};
125
126#[cfg(test)]
131use super::match_table::storage::HC_EMPTY;
132use super::match_table::storage::{HC_CHAIN_LOG, HC_HASH_LOG, HC3_HASH_LOG};
133const HC_SEARCH_DEPTH: usize = 16;
138use super::hc::HC_MIN_MATCH_LEN;
141const HC_OPT_MIN_MATCH_LEN: usize = HC_FORMAT_MINMATCH;
142const HC_TARGET_LEN: usize = 48;
143
144use super::hc::MAX_HC_SEARCH_DEPTH;
146
147#[derive(Copy, Clone)]
155struct HcConfig {
156 hash_log: usize,
157 chain_log: usize,
158 search_depth: usize,
159 target_len: usize,
160}
161
162#[derive(Copy, Clone)]
163pub(crate) struct RowConfig {
164 pub(crate) hash_bits: usize,
165 pub(crate) row_log: usize,
166 pub(crate) search_depth: usize,
167 pub(crate) target_len: usize,
168}
169
170const HC_CONFIG: HcConfig = HcConfig {
171 hash_log: HC_HASH_LOG,
172 chain_log: HC_CHAIN_LOG,
173 search_depth: HC_SEARCH_DEPTH,
174 target_len: HC_TARGET_LEN,
175};
176
177const BEST_HC_CONFIG: HcConfig = HcConfig {
179 hash_log: 21,
180 chain_log: 20,
181 search_depth: 32,
182 target_len: 128,
183};
184
185const BTOPT_HC_CONFIG: HcConfig = HcConfig {
186 hash_log: 23,
187 chain_log: 22,
188 search_depth: 32,
189 target_len: 256,
190};
191
192const BTULTRA_HC_CONFIG: HcConfig = HcConfig {
193 hash_log: 23,
194 chain_log: 23,
195 search_depth: 32,
196 target_len: 256,
197};
198
199const BTULTRA2_HC_CONFIG: HcConfig = HcConfig {
200 hash_log: 24,
201 chain_log: 24,
202 search_depth: 512,
203 target_len: 256,
204};
205
206const BTULTRA2_HC_CONFIG_L22: HcConfig = HcConfig {
207 hash_log: 25,
208 chain_log: 27,
209 search_depth: 512,
210 target_len: 999,
211};
212
213const BTULTRA2_HC_CONFIG_L22_256K: HcConfig = HcConfig {
214 hash_log: 19,
215 chain_log: 19,
216 search_depth: 1 << 13,
217 target_len: 999,
218};
219
220const BTULTRA2_HC_CONFIG_L22_128K: HcConfig = HcConfig {
221 hash_log: 17,
222 chain_log: 18,
223 search_depth: 1 << 11,
224 target_len: 999,
225};
226
227const BTULTRA2_HC_CONFIG_L22_16K: HcConfig = HcConfig {
228 hash_log: 15,
229 chain_log: 15,
230 search_depth: 1 << 10,
231 target_len: 999,
232};
233
234const ROW_CONFIG: RowConfig = RowConfig {
235 hash_bits: ROW_HASH_BITS,
236 row_log: ROW_LOG,
237 search_depth: ROW_SEARCH_DEPTH,
238 target_len: ROW_TARGET_LEN,
239};
240
241#[derive(Copy, Clone)]
247struct LevelParams {
248 strategy_tag: super::strategy::StrategyTag,
249 window_log: u8,
250 fast_hash_log: u32,
253 fast_mls: u32,
256 fast_step_size: usize,
263 lazy_depth: u8,
264 hc: HcConfig,
265 row: RowConfig,
266}
267
268impl LevelParams {
269 fn backend(&self) -> super::strategy::BackendTag {
273 self.strategy_tag.backend()
274 }
275}
276
277fn dfast_hash_bits_for_window(max_window_size: usize) -> usize {
278 let window_log = (usize::BITS - 1 - max_window_size.leading_zeros()) as usize;
279 window_log.clamp(MIN_WINDOW_LOG as usize, DFAST_HASH_BITS)
280}
281
282fn row_hash_bits_for_window(max_window_size: usize) -> usize {
283 let window_log = (usize::BITS - 1 - max_window_size.leading_zeros()) as usize;
284 window_log.clamp(MIN_WINDOW_LOG as usize, ROW_HASH_BITS)
285}
286
287#[rustfmt::skip]
295const LEVEL_TABLE: [LevelParams; 22] = [
296 LevelParams { strategy_tag: super::strategy::StrategyTag::Fast, window_log: 19, fast_hash_log: 14, fast_mls: 7, fast_step_size: 2, lazy_depth: 0, hc: HC_CONFIG, row: ROW_CONFIG },
299 LevelParams { strategy_tag: super::strategy::StrategyTag::Dfast, window_log: 19, fast_hash_log: 14, fast_mls: 7, fast_step_size: 2, lazy_depth: 1, hc: HC_CONFIG, row: ROW_CONFIG },
300 LevelParams { strategy_tag: super::strategy::StrategyTag::Dfast, window_log: 22, fast_hash_log: 14, fast_mls: 7, fast_step_size: 2, lazy_depth: 1, hc: HC_CONFIG, row: ROW_CONFIG },
301 LevelParams { strategy_tag: super::strategy::StrategyTag::Greedy, window_log: 22, fast_hash_log: 14, fast_mls: 7, fast_step_size: 2, lazy_depth: 0, hc: HC_CONFIG, row: ROW_CONFIG },
302 LevelParams { strategy_tag: super::strategy::StrategyTag::Lazy, window_log: 22, fast_hash_log: 14, fast_mls: 7, fast_step_size: 2, lazy_depth: 1, hc: HcConfig { hash_log: 18, chain_log: 17, search_depth: 4, target_len: 32 }, row: ROW_CONFIG },
303 LevelParams { strategy_tag: super::strategy::StrategyTag::Lazy, window_log: BETTER_WINDOW_LOG, fast_hash_log: 14, fast_mls: 7, fast_step_size: 2, lazy_depth: 1, hc: HcConfig { hash_log: 19, chain_log: 18, search_depth: 8, target_len: 48 }, row: ROW_CONFIG },
304 LevelParams { strategy_tag: super::strategy::StrategyTag::Lazy, window_log: BETTER_WINDOW_LOG, fast_hash_log: 14, fast_mls: 7, fast_step_size: 2, lazy_depth: 2, hc: HcConfig { hash_log: 20, chain_log: 19, search_depth: 16, target_len: 48 }, row: ROW_CONFIG },
305 LevelParams { strategy_tag: super::strategy::StrategyTag::Lazy, window_log: BETTER_WINDOW_LOG, fast_hash_log: 14, fast_mls: 7, fast_step_size: 2, lazy_depth: 2, hc: HcConfig { hash_log: 20, chain_log: 19, search_depth: 24, target_len: 64 }, row: ROW_CONFIG },
306 LevelParams { strategy_tag: super::strategy::StrategyTag::Lazy, window_log: BETTER_WINDOW_LOG, fast_hash_log: 14, fast_mls: 7, fast_step_size: 2, lazy_depth: 2, hc: HcConfig { hash_log: 21, chain_log: 20, search_depth: 24, target_len: 64 }, row: ROW_CONFIG },
307 LevelParams { strategy_tag: super::strategy::StrategyTag::Lazy, window_log: 24, fast_hash_log: 14, fast_mls: 7, fast_step_size: 2, lazy_depth: 2, hc: HcConfig { hash_log: 21, chain_log: 20, search_depth: 28, target_len: 96 }, row: ROW_CONFIG },
308 LevelParams { strategy_tag: super::strategy::StrategyTag::Lazy, window_log: 24, fast_hash_log: 14, fast_mls: 7, fast_step_size: 2, lazy_depth: 2, hc: BEST_HC_CONFIG, row: ROW_CONFIG },
309 LevelParams { strategy_tag: super::strategy::StrategyTag::Lazy, window_log: 25, fast_hash_log: 14, fast_mls: 7, fast_step_size: 2, lazy_depth: 2, hc: HcConfig { hash_log: 22, chain_log: 21, search_depth: 32, target_len: 128 }, row: ROW_CONFIG },
310 LevelParams { strategy_tag: super::strategy::StrategyTag::Lazy, window_log: 25, fast_hash_log: 14, fast_mls: 7, fast_step_size: 2, lazy_depth: 2, hc: HcConfig { hash_log: 22, chain_log: 21, search_depth: 32, target_len: 160 }, row: ROW_CONFIG },
311 LevelParams { strategy_tag: super::strategy::StrategyTag::Lazy, window_log: 25, fast_hash_log: 14, fast_mls: 7, fast_step_size: 2, lazy_depth: 2, hc: HcConfig { hash_log: 22, chain_log: 22, search_depth: 32, target_len: 192 }, row: ROW_CONFIG },
312 LevelParams { strategy_tag: super::strategy::StrategyTag::Lazy, window_log: 26, fast_hash_log: 14, fast_mls: 7, fast_step_size: 2, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 22, search_depth: 32, target_len: 192 }, row: ROW_CONFIG },
313 LevelParams { strategy_tag: super::strategy::StrategyTag::BtOpt, window_log: 26, fast_hash_log: 14, fast_mls: 7, fast_step_size: 2, lazy_depth: 2, hc: BTOPT_HC_CONFIG, row: ROW_CONFIG },
314 LevelParams { strategy_tag: super::strategy::StrategyTag::BtOpt, window_log: 26, fast_hash_log: 14, fast_mls: 7, fast_step_size: 2, lazy_depth: 2, hc: BTOPT_HC_CONFIG, row: ROW_CONFIG },
315 LevelParams { strategy_tag: super::strategy::StrategyTag::BtUltra, window_log: 26, fast_hash_log: 14, fast_mls: 7, fast_step_size: 2, lazy_depth: 2, hc: BTULTRA_HC_CONFIG, row: ROW_CONFIG },
316 LevelParams { strategy_tag: super::strategy::StrategyTag::BtUltra, window_log: 26, fast_hash_log: 14, fast_mls: 7, fast_step_size: 2, lazy_depth: 2, hc: BTULTRA_HC_CONFIG, row: ROW_CONFIG },
317 LevelParams { strategy_tag: super::strategy::StrategyTag::BtUltra2, window_log: 26, fast_hash_log: 14, fast_mls: 7, fast_step_size: 2, lazy_depth: 2, hc: BTULTRA2_HC_CONFIG, row: ROW_CONFIG },
318 LevelParams { strategy_tag: super::strategy::StrategyTag::BtUltra2, window_log: 26, fast_hash_log: 14, fast_mls: 7, fast_step_size: 2, lazy_depth: 2, hc: BTULTRA2_HC_CONFIG, row: ROW_CONFIG },
319 LevelParams { strategy_tag: super::strategy::StrategyTag::BtUltra2, window_log: 27, fast_hash_log: 14, fast_mls: 7, fast_step_size: 2, lazy_depth: 2, hc: BTULTRA2_HC_CONFIG_L22, row: ROW_CONFIG },
320];
321
322pub(crate) const MIN_WINDOW_LOG: u8 = 10;
324const MIN_HINTED_WINDOW_LOG: u8 = 14;
330
331fn adjust_params_for_source_size(mut params: LevelParams, src_size: u64) -> LevelParams {
341 let src_log = if src_size == 0 {
346 MIN_WINDOW_LOG
347 } else {
348 (64 - (src_size - 1).leading_zeros()) as u8 };
350 let src_log = src_log.max(MIN_WINDOW_LOG).max(MIN_HINTED_WINDOW_LOG);
351 if src_log < params.window_log {
352 params.window_log = src_log;
353 }
354 let backend = params.backend();
358 if backend == super::strategy::BackendTag::HashChain {
359 if (src_log + 2) < params.hc.hash_log as u8 {
360 params.hc.hash_log = (src_log + 2) as usize;
361 }
362 if (src_log + 1) < params.hc.chain_log as u8 {
363 params.hc.chain_log = (src_log + 1) as usize;
364 }
365 } else if backend == super::strategy::BackendTag::Row {
366 let max_window_size = 1usize << params.window_log;
367 params.row.hash_bits = row_hash_bits_for_window(max_window_size);
368 }
369 params
370}
371
372fn level22_btultra2_params_for_source_size(source_size: Option<u64>) -> LevelParams {
373 let mut hc = match source_size {
374 Some(size) if size <= 16 * 1024 => BTULTRA2_HC_CONFIG_L22_16K,
375 Some(size) if size <= 128 * 1024 => BTULTRA2_HC_CONFIG_L22_128K,
376 Some(size) if size <= 256 * 1024 => BTULTRA2_HC_CONFIG_L22_256K,
377 _ => BTULTRA2_HC_CONFIG_L22,
378 };
379 let mut window_log = match source_size {
380 Some(size) if size <= 16 * 1024 => 14,
381 Some(size) if size <= 128 * 1024 => 17,
382 Some(size) if size <= 256 * 1024 => 18,
383 _ => 27,
384 };
385 if let Some(size) = source_size
386 && size > 256 * 1024
387 {
388 let src_log = if size == 0 {
389 MIN_WINDOW_LOG
390 } else {
391 (64 - (size - 1).leading_zeros()) as u8
392 };
393 window_log = window_log.min(src_log.max(MIN_WINDOW_LOG));
394 let adjusted_table_log = window_log as usize + 1;
395 hc.hash_log = hc.hash_log.min(adjusted_table_log);
396 hc.chain_log = hc.chain_log.min(adjusted_table_log);
397 }
398 LevelParams {
399 strategy_tag: super::strategy::StrategyTag::BtUltra2,
400 window_log,
401 fast_hash_log: 14,
402 fast_mls: 7,
403 fast_step_size: 2,
404 lazy_depth: 2,
405 hc,
406 row: ROW_CONFIG,
407 }
408}
409
410fn resolve_level_params(level: CompressionLevel, source_size: Option<u64>) -> LevelParams {
413 if matches!(level, CompressionLevel::Level(22)) {
414 return level22_btultra2_params_for_source_size(source_size);
415 }
416 let params = match level {
417 CompressionLevel::Uncompressed => LevelParams {
418 strategy_tag: super::strategy::StrategyTag::Fast,
419 window_log: 17,
423 fast_hash_log: 14,
426 fast_mls: 6,
427 fast_step_size: 2,
430 lazy_depth: 0,
431 hc: HC_CONFIG,
432 row: ROW_CONFIG,
433 },
434 CompressionLevel::Fastest => {
435 let mut p = LEVEL_TABLE[0];
442 p.fast_hash_log = 14;
443 p.fast_mls = 6;
444 p.fast_step_size = 2;
445 p
446 }
447 CompressionLevel::Default => LEVEL_TABLE[2],
448 CompressionLevel::Better => LEVEL_TABLE[6],
449 CompressionLevel::Best => LEVEL_TABLE[10],
450 CompressionLevel::Level(n) => {
451 if n > 0 {
452 let idx = (n as usize).min(CompressionLevel::MAX_LEVEL as usize) - 1;
453 LEVEL_TABLE[idx]
454 } else if n == 0 {
455 LEVEL_TABLE[CompressionLevel::DEFAULT_LEVEL as usize - 1]
457 } else {
458 let clamped = n.max(CompressionLevel::MIN_LEVEL);
468 let target_length = (-clamped) as usize;
469 let step_size = target_length + 1;
470 LevelParams {
471 strategy_tag: super::strategy::StrategyTag::Fast,
472 window_log: 19,
473 fast_hash_log: 14,
474 fast_mls: 6,
475 fast_step_size: step_size,
476 lazy_depth: 0,
477 hc: HC_CONFIG,
478 row: ROW_CONFIG,
479 }
480 }
481 }
482 };
483 if let Some(size) = source_size {
484 adjust_params_for_source_size(params, size)
485 } else {
486 params
487 }
488}
489
490enum MatcherStorage {
508 Simple(FastKernelMatcher),
515 Dfast(DfastMatchGenerator),
520 Row(RowMatchGenerator),
524 HashChain(HcMatchGenerator),
536}
537
538impl MatcherStorage {
539 fn backend(&self) -> super::strategy::BackendTag {
541 use super::strategy::BackendTag;
542 match self {
543 Self::Simple(_) => BackendTag::Simple,
544 Self::Dfast(_) => BackendTag::Dfast,
545 Self::Row(_) => BackendTag::Row,
546 Self::HashChain(_) => BackendTag::HashChain,
547 }
548 }
549}
550
551pub struct MatchGeneratorDriver {
553 vec_pool: Vec<Vec<u8>>,
554 storage: MatcherStorage,
561 strategy_tag: super::strategy::StrategyTag,
567 slice_size: usize,
568 base_slice_size: usize,
569 reported_window_size: usize,
572 dictionary_retained_budget: usize,
575 source_size_hint: Option<u64>,
577}
578
579impl MatchGeneratorDriver {
580 pub(crate) fn new(slice_size: usize, max_slices_in_window: usize) -> Self {
585 assert!(
602 slice_size > 0,
603 "MatchGeneratorDriver::new requires slice_size > 0 (got 0)",
604 );
605 assert!(
606 max_slices_in_window > 0,
607 "MatchGeneratorDriver::new requires max_slices_in_window > 0 (got 0)",
608 );
609 let max_window_size = max_slices_in_window
610 .checked_mul(slice_size)
611 .expect("MatchGeneratorDriver::new: slice_size * max_slices_in_window overflows usize");
612 let next_pow2 = max_window_size.checked_next_power_of_two().expect(
627 "MatchGeneratorDriver::new: max_window_size too large for \
628 next_power_of_two without overflow",
629 );
630 let window_log_init = next_pow2.trailing_zeros() as u8;
631 Self {
632 vec_pool: Vec::new(),
633 storage: MatcherStorage::Simple(FastKernelMatcher::with_params(
634 window_log_init,
635 FAST_LEVEL_1_HASH_LOG,
636 FAST_LEVEL_1_MLS,
637 2, )),
639 strategy_tag: super::strategy::StrategyTag::Fast,
640 slice_size,
641 base_slice_size: slice_size,
642 reported_window_size: next_pow2,
651 dictionary_retained_budget: 0,
652 source_size_hint: None,
653 }
654 }
655
656 fn level_params(level: CompressionLevel, source_size: Option<u64>) -> LevelParams {
657 resolve_level_params(level, source_size)
658 }
659
660 fn active_backend(&self) -> super::strategy::BackendTag {
663 self.storage.backend()
664 }
665
666 fn simple_mut(&mut self) -> &mut FastKernelMatcher {
667 match &mut self.storage {
668 MatcherStorage::Simple(m) => m,
669 _ => panic!("simple backend must be initialized by reset() before use"),
670 }
671 }
672
673 fn recycle_simple_space(&mut self) {
687 if let Some(space) = self.simple_mut().take_recycled_space() {
688 self.vec_pool.push(space);
700 }
701 }
702
703 #[cfg(test)]
704 fn dfast_matcher(&self) -> &DfastMatchGenerator {
705 match &self.storage {
706 MatcherStorage::Dfast(m) => m,
707 _ => panic!("dfast backend must be initialized by reset() before use"),
708 }
709 }
710
711 fn dfast_matcher_mut(&mut self) -> &mut DfastMatchGenerator {
712 match &mut self.storage {
713 MatcherStorage::Dfast(m) => m,
714 _ => panic!("dfast backend must be initialized by reset() before use"),
715 }
716 }
717
718 #[cfg(test)]
719 fn row_matcher(&self) -> &RowMatchGenerator {
720 match &self.storage {
721 MatcherStorage::Row(m) => m,
722 _ => panic!("row backend must be initialized by reset() before use"),
723 }
724 }
725
726 fn row_matcher_mut(&mut self) -> &mut RowMatchGenerator {
727 match &mut self.storage {
728 MatcherStorage::Row(m) => m,
729 _ => panic!("row backend must be initialized by reset() before use"),
730 }
731 }
732
733 #[cfg(test)]
734 fn hc_matcher(&self) -> &HcMatchGenerator {
735 match &self.storage {
736 MatcherStorage::HashChain(m) => m,
737 _ => panic!("hash chain backend must be initialized by reset() before use"),
738 }
739 }
740
741 fn hc_matcher_mut(&mut self) -> &mut HcMatchGenerator {
742 match &mut self.storage {
743 MatcherStorage::HashChain(m) => m,
744 _ => panic!("hash chain backend must be initialized by reset() before use"),
745 }
746 }
747
748 #[must_use]
757 fn retire_dictionary_budget(&mut self, evicted_bytes: usize) -> bool {
758 let reclaimed = evicted_bytes.min(self.dictionary_retained_budget);
759 if reclaimed == 0 {
760 return false;
761 }
762 self.dictionary_retained_budget -= reclaimed;
763 match self.active_backend() {
764 super::strategy::BackendTag::Simple => {
765 let matcher = self.simple_mut();
766 matcher.max_window_size = matcher.max_window_size.saturating_sub(reclaimed);
767 }
768 super::strategy::BackendTag::Dfast => {
769 let matcher = self.dfast_matcher_mut();
770 matcher.max_window_size = matcher.max_window_size.saturating_sub(reclaimed);
771 }
772 super::strategy::BackendTag::Row => {
773 let matcher = self.row_matcher_mut();
774 matcher.max_window_size = matcher.max_window_size.saturating_sub(reclaimed);
775 }
776 super::strategy::BackendTag::HashChain => {
777 let matcher = self.hc_matcher_mut();
778 matcher.table.max_window_size =
779 matcher.table.max_window_size.saturating_sub(reclaimed);
780 }
781 }
782 true
783 }
784
785 fn trim_after_budget_retire(&mut self) {
786 loop {
787 let mut evicted_bytes = 0usize;
788 match self.active_backend() {
789 super::strategy::BackendTag::Simple => {
790 let MatcherStorage::Simple(m) = &mut self.storage else {
799 unreachable!("active_backend() == Simple proven above");
800 };
801 evicted_bytes += m.trim_to_window();
802 }
803 super::strategy::BackendTag::Dfast => {
804 let dfast = self.dfast_matcher_mut();
813 let pre = dfast.window_size;
814 dfast.trim_to_window();
815 evicted_bytes += pre - dfast.window_size;
816 }
817 super::strategy::BackendTag::Row => {
818 let mut retired = Vec::new();
819 self.row_matcher_mut().trim_to_window(|data| {
820 evicted_bytes += data.len();
821 retired.push(data);
822 });
823 for mut data in retired {
824 data.resize(data.capacity(), 0);
825 self.vec_pool.push(data);
826 }
827 }
828 super::strategy::BackendTag::HashChain => {
829 let mut retired = Vec::new();
830 self.hc_matcher_mut().table.trim_to_window(|data| {
831 evicted_bytes += data.len();
832 retired.push(data);
833 });
834 for mut data in retired {
835 data.resize(data.capacity(), 0);
836 self.vec_pool.push(data);
837 }
838 }
839 }
840 if evicted_bytes == 0 {
841 break;
842 }
843 let _ = self.retire_dictionary_budget(evicted_bytes);
857 }
858 }
859
860 fn skip_matching_for_dictionary_priming(&mut self) {
861 match self.active_backend() {
862 super::strategy::BackendTag::Simple => {
863 self.simple_mut().skip_matching_with_hint(Some(false));
864 self.recycle_simple_space();
865 }
866 super::strategy::BackendTag::Dfast => self.dfast_matcher_mut().skip_matching_dense(),
867 super::strategy::BackendTag::Row => {
868 self.row_matcher_mut().skip_matching_with_hint(Some(false))
869 }
870 super::strategy::BackendTag::HashChain => {
871 self.hc_matcher_mut().skip_matching(Some(false))
872 }
873 }
874 }
875}
876
877impl Matcher for MatchGeneratorDriver {
878 fn supports_dictionary_priming(&self) -> bool {
879 true
880 }
881
882 fn set_source_size_hint(&mut self, size: u64) {
883 self.source_size_hint = Some(size);
884 }
885
886 fn reset(&mut self, level: CompressionLevel) {
887 let hint = self.source_size_hint.take();
888 let hinted = hint.is_some();
889 let params = Self::level_params(level, hint);
890 let next_backend = params.backend();
891 let max_window_size = 1usize << params.window_log;
892 self.dictionary_retained_budget = 0;
893 if self.active_backend() != next_backend {
894 match &mut self.storage {
900 MatcherStorage::Simple(_m) => {
901 }
908 MatcherStorage::Dfast(m) => {
909 m.short_hash = Vec::new();
922 m.long_hash = Vec::new();
923 m.reset();
924 }
925 MatcherStorage::Row(m) => {
926 m.row_heads = Vec::new();
927 m.row_positions = Vec::new();
928 m.row_tags = Vec::new();
929 let vec_pool = &mut self.vec_pool;
930 m.reset(|mut data| {
931 data.resize(data.capacity(), 0);
932 vec_pool.push(data);
933 });
934 }
935 MatcherStorage::HashChain(m) => {
936 m.table.hash_table = Vec::new();
944 m.table.chain_table = Vec::new();
945 m.table.hash3_table = Vec::new();
946 let vec_pool = &mut self.vec_pool;
947 m.reset(|mut data| {
948 data.resize(data.capacity(), 0);
949 vec_pool.push(data);
950 });
951 }
952 }
953 self.storage = match next_backend {
956 super::strategy::BackendTag::Simple => {
957 MatcherStorage::Simple(FastKernelMatcher::with_params(
964 params.window_log,
965 params.fast_hash_log,
966 params.fast_mls,
967 params.fast_step_size,
968 ))
969 }
970 super::strategy::BackendTag::Dfast => {
971 MatcherStorage::Dfast(DfastMatchGenerator::new(max_window_size))
972 }
973 super::strategy::BackendTag::Row => {
974 MatcherStorage::Row(RowMatchGenerator::new(max_window_size))
975 }
976 super::strategy::BackendTag::HashChain => {
977 MatcherStorage::HashChain(HcMatchGenerator::new(max_window_size))
978 }
979 };
980 }
981
982 self.strategy_tag = params.strategy_tag;
988 self.slice_size = self.base_slice_size.min(max_window_size);
989 self.reported_window_size = max_window_size;
990 let strategy_tag = self.strategy_tag;
991 match &mut self.storage {
992 MatcherStorage::Simple(m) => {
993 m.reset(
997 params.window_log,
998 params.fast_hash_log,
999 params.fast_mls,
1000 params.fast_step_size,
1001 );
1002 }
1003 MatcherStorage::Dfast(dfast) => {
1004 dfast.max_window_size = max_window_size;
1005 dfast.lazy_depth = params.lazy_depth;
1006 dfast.use_fast_loop = matches!(
1007 level,
1008 CompressionLevel::Default
1009 | CompressionLevel::Level(0)
1010 | CompressionLevel::Level(3)
1011 );
1012 dfast.set_hash_bits(if hinted {
1013 dfast_hash_bits_for_window(max_window_size)
1014 } else {
1015 DFAST_HASH_BITS
1016 });
1017 dfast.reset();
1021 }
1022 MatcherStorage::Row(row) => {
1023 row.max_window_size = max_window_size;
1024 row.lazy_depth = params.lazy_depth;
1025 row.configure(params.row);
1026 if hinted {
1027 row.set_hash_bits(row_hash_bits_for_window(max_window_size));
1028 }
1029 let vec_pool = &mut self.vec_pool;
1030 row.reset(|mut data| {
1031 data.resize(data.capacity(), 0);
1032 vec_pool.push(data);
1033 });
1034 }
1035 MatcherStorage::HashChain(hc) => {
1036 hc.table.max_window_size = max_window_size;
1037 hc.hc.lazy_depth = params.lazy_depth;
1038 hc.configure(params.hc, strategy_tag, params.window_log);
1039 let vec_pool = &mut self.vec_pool;
1040 hc.reset(|mut data| {
1041 data.resize(data.capacity(), 0);
1042 vec_pool.push(data);
1043 });
1044 }
1045 }
1046 }
1047
1048 fn prime_with_dictionary(&mut self, dict_content: &[u8], offset_hist: [u32; 3]) {
1049 match self.active_backend() {
1050 super::strategy::BackendTag::Simple => {
1051 self.simple_mut().prime_offset_history(offset_hist);
1060 }
1061 super::strategy::BackendTag::Dfast => {
1062 self.dfast_matcher_mut().offset_hist = offset_hist
1063 }
1064 super::strategy::BackendTag::Row => self.row_matcher_mut().offset_hist = offset_hist,
1065 super::strategy::BackendTag::HashChain => {
1066 let matcher = self.hc_matcher_mut();
1067 matcher.table.offset_hist = offset_hist;
1068 matcher.table.mark_dictionary_primed();
1069 }
1070 }
1071
1072 if dict_content.is_empty() {
1073 return;
1074 }
1075
1076 const MAX_PRIMED_WINDOW_SIZE: usize =
1091 (u32::MAX as usize - crate::common::MAX_BLOCK_SIZE as usize) / 2;
1092
1093 let requested_dict_budget = dict_content.len();
1107 let base_max_window_size = match self.active_backend() {
1108 super::strategy::BackendTag::Simple => self.simple_mut().max_window_size,
1109 super::strategy::BackendTag::Dfast => self.dfast_matcher_mut().max_window_size,
1110 super::strategy::BackendTag::Row => self.row_matcher_mut().max_window_size,
1111 super::strategy::BackendTag::HashChain => self.hc_matcher_mut().table.max_window_size,
1112 };
1113 match self.active_backend() {
1114 super::strategy::BackendTag::Simple => {
1115 let matcher = self.simple_mut();
1116 matcher.max_window_size = matcher
1117 .max_window_size
1118 .saturating_add(requested_dict_budget)
1119 .min(MAX_PRIMED_WINDOW_SIZE);
1120 }
1121 super::strategy::BackendTag::Dfast => {
1122 let matcher = self.dfast_matcher_mut();
1123 matcher.max_window_size = matcher
1124 .max_window_size
1125 .saturating_add(requested_dict_budget)
1126 .min(MAX_PRIMED_WINDOW_SIZE);
1127 }
1128 super::strategy::BackendTag::Row => {
1129 let matcher = self.row_matcher_mut();
1130 matcher.max_window_size = matcher
1131 .max_window_size
1132 .saturating_add(requested_dict_budget)
1133 .min(MAX_PRIMED_WINDOW_SIZE);
1134 }
1135 super::strategy::BackendTag::HashChain => {
1136 let matcher = self.hc_matcher_mut();
1137 matcher.table.max_window_size = matcher
1138 .table
1139 .max_window_size
1140 .saturating_add(requested_dict_budget)
1141 .min(MAX_PRIMED_WINDOW_SIZE);
1142 }
1143 }
1144
1145 let mut start = 0usize;
1146 let mut committed_dict_budget = 0usize;
1147 let min_primed_tail = match self.active_backend() {
1151 super::strategy::BackendTag::Simple => MIN_MATCH_LEN,
1152 super::strategy::BackendTag::Dfast
1153 | super::strategy::BackendTag::Row
1154 | super::strategy::BackendTag::HashChain => 4,
1155 };
1156 while start < dict_content.len() {
1157 let end = (start + self.slice_size).min(dict_content.len());
1158 if end - start < min_primed_tail {
1159 break;
1160 }
1161 let mut space = self.get_next_space();
1162 space.clear();
1163 space.extend_from_slice(&dict_content[start..end]);
1164 self.commit_space(space);
1165 self.skip_matching_for_dictionary_priming();
1166 committed_dict_budget += end - start;
1167 start = end;
1168 }
1169
1170 let capped_retained_budget = MAX_PRIMED_WINDOW_SIZE.saturating_sub(base_max_window_size);
1180 let granted_retained_budget = committed_dict_budget.min(capped_retained_budget);
1181 let final_max_window_size = base_max_window_size.saturating_add(granted_retained_budget);
1182 match self.active_backend() {
1183 super::strategy::BackendTag::Simple => {
1184 self.simple_mut().max_window_size = final_max_window_size;
1185 }
1186 super::strategy::BackendTag::Dfast => {
1187 self.dfast_matcher_mut().max_window_size = final_max_window_size;
1188 }
1189 super::strategy::BackendTag::Row => {
1190 self.row_matcher_mut().max_window_size = final_max_window_size;
1191 }
1192 super::strategy::BackendTag::HashChain => {
1193 self.hc_matcher_mut().table.max_window_size = final_max_window_size;
1194 }
1195 }
1196 if granted_retained_budget > 0 {
1197 self.dictionary_retained_budget = self
1198 .dictionary_retained_budget
1199 .saturating_add(granted_retained_budget);
1200 }
1201 if self.active_backend() == super::strategy::BackendTag::HashChain {
1202 self.hc_matcher_mut()
1203 .table
1204 .set_dictionary_limit_from_primed_bytes(committed_dict_budget);
1205 }
1206 }
1207
1208 fn seed_dictionary_entropy(
1209 &mut self,
1210 huff: Option<&crate::huff0::huff0_encoder::HuffmanTable>,
1211 ll: Option<&crate::fse::fse_encoder::FSETable>,
1212 ml: Option<&crate::fse::fse_encoder::FSETable>,
1213 of: Option<&crate::fse::fse_encoder::FSETable>,
1214 ) {
1215 if self.active_backend() == super::strategy::BackendTag::HashChain {
1216 self.hc_matcher_mut()
1217 .seed_dictionary_entropy(huff, ll, ml, of);
1218 }
1219 }
1220
1221 fn window_size(&self) -> u64 {
1222 self.reported_window_size as u64
1223 }
1224
1225 fn get_next_space(&mut self) -> Vec<u8> {
1226 if let Some(mut space) = self.vec_pool.pop() {
1227 if space.len() > self.slice_size {
1228 space.truncate(self.slice_size);
1229 }
1230 if space.len() < self.slice_size {
1231 space.resize(self.slice_size, 0);
1232 }
1233 return space;
1234 }
1235 alloc::vec![0; self.slice_size]
1236 }
1237
1238 fn get_last_space(&mut self) -> &[u8] {
1239 match &self.storage {
1240 MatcherStorage::Simple(m) => m.last_committed_space(),
1241 MatcherStorage::Dfast(m) => m.get_last_space(),
1242 MatcherStorage::Row(m) => m.get_last_space(),
1243 MatcherStorage::HashChain(m) => m.table.get_last_space(),
1244 }
1245 }
1246
1247 fn commit_space(&mut self, space: Vec<u8>) {
1248 let mut evicted_bytes = 0usize;
1249 let vec_pool = &mut self.vec_pool;
1255 match &mut self.storage {
1256 MatcherStorage::Simple(m) => {
1257 let pre = m.history_len_for_eviction_accounting();
1267 m.accept_data(space);
1268 let post = m.history_len_for_eviction_accounting();
1269 evicted_bytes += pre.saturating_sub(post);
1280 }
1281 MatcherStorage::Dfast(m) => {
1282 let pre = m.window_size;
1304 let space_len = space.len();
1305 m.add_data(space, |mut data| {
1306 data.resize(data.capacity(), 0);
1307 vec_pool.push(data);
1308 });
1309 evicted_bytes += pre.saturating_add(space_len).saturating_sub(m.window_size);
1310 }
1311 MatcherStorage::Row(m) => {
1312 m.add_data(space, |mut data| {
1313 evicted_bytes += data.len();
1314 data.resize(data.capacity(), 0);
1315 vec_pool.push(data);
1316 });
1317 }
1318 MatcherStorage::HashChain(m) => {
1319 m.table.add_data(space, |mut data| {
1320 evicted_bytes += data.len();
1321 data.resize(data.capacity(), 0);
1322 vec_pool.push(data);
1323 });
1324 }
1325 }
1326 if self.retire_dictionary_budget(evicted_bytes) {
1336 self.trim_after_budget_retire();
1337 }
1338 }
1339
1340 fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
1341 use super::strategy::{self, StrategyTag};
1342 match self.strategy_tag {
1351 StrategyTag::Fast => self.compress_block::<strategy::Fast>(&mut handle_sequence),
1352 StrategyTag::Dfast => self.compress_block::<strategy::Dfast>(&mut handle_sequence),
1353 StrategyTag::Greedy => self.compress_block::<strategy::Greedy>(&mut handle_sequence),
1354 StrategyTag::Lazy => self.compress_block::<strategy::Lazy>(&mut handle_sequence),
1355 StrategyTag::BtOpt => self.compress_block::<strategy::BtOpt>(&mut handle_sequence),
1356 StrategyTag::BtUltra => self.compress_block::<strategy::BtUltra>(&mut handle_sequence),
1357 StrategyTag::BtUltra2 => {
1358 self.compress_block::<strategy::BtUltra2>(&mut handle_sequence)
1359 }
1360 }
1361 }
1362
1363 fn skip_matching(&mut self) {
1364 self.skip_matching_with_hint(None);
1365 }
1366
1367 fn skip_matching_with_hint(&mut self, incompressible_hint: Option<bool>) {
1368 match self.active_backend() {
1369 super::strategy::BackendTag::Simple => {
1370 self.simple_mut()
1371 .skip_matching_with_hint(incompressible_hint);
1372 self.recycle_simple_space();
1373 }
1374 super::strategy::BackendTag::Dfast => {
1375 self.dfast_matcher_mut().skip_matching(incompressible_hint)
1376 }
1377 super::strategy::BackendTag::Row => self
1378 .row_matcher_mut()
1379 .skip_matching_with_hint(incompressible_hint),
1380 super::strategy::BackendTag::HashChain => {
1381 self.hc_matcher_mut().skip_matching(incompressible_hint)
1382 }
1383 }
1384 }
1385}
1386
1387impl MatchGeneratorDriver {
1388 fn compress_block<S: super::strategy::Strategy>(
1397 &mut self,
1398 handle_sequence: &mut impl for<'a> FnMut(Sequence<'a>),
1399 ) {
1400 use super::strategy::BackendTag;
1401 match S::BACKEND {
1402 BackendTag::Simple => {
1403 self.simple_mut().start_matching(&mut *handle_sequence);
1414 self.recycle_simple_space();
1415 }
1416 BackendTag::Dfast => self.dfast_matcher_mut().start_matching(handle_sequence),
1417 BackendTag::Row => {
1418 let matcher = self.row_matcher_mut();
1436 debug_assert_eq!(
1437 matcher.lazy_depth, 0,
1438 "Row backend currently expects lazy_depth == 0 (donor-greedy); \
1439 wire a depth-aware dispatch before routing lazy levels here",
1440 );
1441 matcher.start_matching_greedy(handle_sequence);
1442 }
1443 BackendTag::HashChain => self
1444 .hc_matcher_mut()
1445 .start_matching_strategy::<S>(handle_sequence),
1446 }
1447 }
1448}
1449
1450pub(crate) enum HcBackend {
1464 Hc,
1466 Bt(alloc::boxed::Box<super::bt::BtMatcher>),
1470}
1471
1472impl HcBackend {
1473 #[inline(always)]
1480 pub(crate) fn bt_mut(&mut self) -> &mut super::bt::BtMatcher {
1481 match self {
1482 Self::Bt(bt) => bt,
1483 Self::Hc => unreachable!("BT-only accessor called in HC mode"),
1484 }
1485 }
1486}
1487
1488struct HcMatchGenerator {
1489 table: super::match_table::storage::MatchTable,
1494 hc: super::hc::HcMatcher,
1498 backend: HcBackend,
1503 strategy_tag: super::strategy::StrategyTag,
1515}
1516
1517macro_rules! bt_insert_step_no_rebase_body {
1533 ($table:expr, $search_depth:expr, $abs_pos:ident, $current_abs_end:ident, $target_abs:ident, $cmf:path) => {{
1534 let idx = $abs_pos - $table.history_abs_start;
1535 let concat = &$table.history[$table.history_start..];
1536 if idx + 8 > concat.len() {
1537 return 1;
1538 }
1539 debug_assert!(
1540 $abs_pos <= $current_abs_end,
1541 "BT walker called past current block end"
1542 );
1543 let tail_limit = $current_abs_end - $abs_pos;
1544 let hash = $crate::encoding::match_table::storage::MatchTable::hash_position_at(
1545 concat,
1546 idx,
1547 $table.hash_log,
1548 $crate::encoding::bt::BtMatcher::HASH_MLS,
1549 );
1550 let Some(relative_pos) = $table.relative_position($abs_pos) else {
1551 return 1;
1552 };
1553 let stored = relative_pos + 1;
1554 let bt_mask = $table.bt_mask();
1555 let bt_low = $abs_pos.saturating_sub(bt_mask);
1561 let window_low = $table.window_low_abs_for_target($target_abs);
1562 let mut match_end_abs = $abs_pos + 9;
1571 let mut best_len = 8usize;
1572 let mut compares_left = $search_depth;
1573 let mut common_length_smaller = 0usize;
1574 let mut common_length_larger = 0usize;
1575 let pair_idx = $table.bt_pair_index_for_abs($abs_pos);
1576 let mut smaller_slot = pair_idx;
1577 let mut larger_slot = pair_idx + 1;
1578 let mut match_stored = $table.hash_table[hash];
1579 $table.hash_table[hash] = stored;
1580
1581 while compares_left > 0 {
1582 let Some(candidate_abs) =
1583 $crate::encoding::match_table::storage::MatchTable::stored_abs_position_fast(
1584 match_stored,
1585 $table.position_base,
1586 $table.index_shift,
1587 )
1588 else {
1589 break;
1590 };
1591 if candidate_abs < window_low || candidate_abs >= $abs_pos {
1592 break;
1593 }
1594 compares_left -= 1;
1595
1596 let next_pair_idx = $table.bt_pair_index_for_abs(candidate_abs);
1597 let next_smaller = $table.chain_table[next_pair_idx];
1598 let next_larger = $table.chain_table[next_pair_idx + 1];
1599 let seed_len = common_length_smaller.min(common_length_larger);
1600 let candidate_idx = candidate_abs - $table.history_abs_start;
1601 let match_len = unsafe { $cmf(concat, idx, candidate_idx, tail_limit, seed_len) };
1606
1607 if match_len > best_len {
1608 best_len = match_len;
1609 let candidate_end = candidate_abs + match_len;
1613 if candidate_end > match_end_abs {
1614 match_end_abs = candidate_end;
1615 }
1616 }
1617
1618 if match_len >= tail_limit {
1619 break;
1620 }
1621
1622 let candidate_next = candidate_idx + match_len;
1623 let current_next = idx + match_len;
1624 if concat[candidate_next] < concat[current_next] {
1625 $table.chain_table[smaller_slot] = match_stored;
1626 common_length_smaller = match_len;
1627 if candidate_abs <= bt_low {
1628 smaller_slot = usize::MAX;
1629 break;
1630 }
1631 smaller_slot = next_pair_idx + 1;
1632 match_stored = next_larger;
1633 } else {
1634 $table.chain_table[larger_slot] = match_stored;
1635 common_length_larger = match_len;
1636 if candidate_abs <= bt_low {
1637 larger_slot = usize::MAX;
1638 break;
1639 }
1640 larger_slot = next_pair_idx;
1641 match_stored = next_smaller;
1642 }
1643 }
1644
1645 if smaller_slot != usize::MAX {
1646 $table.chain_table[smaller_slot] = $crate::encoding::match_table::storage::HC_EMPTY;
1647 }
1648 if larger_slot != usize::MAX {
1649 $table.chain_table[larger_slot] = $crate::encoding::match_table::storage::HC_EMPTY;
1650 }
1651
1652 let speed_positions = if best_len > 384 {
1653 (best_len - 384).min(192)
1654 } else {
1655 0
1656 };
1657 speed_positions.max(match_end_abs - ($abs_pos + 8))
1667 }};
1668}
1669pub(crate) use bt_insert_step_no_rebase_body;
1670
1671macro_rules! build_optimal_plan_impl_body {
1686 (
1687 $self:expr,
1688 $strategy_ty:ty,
1689 $current:ident,
1690 $current_abs_start:ident,
1691 $current_len:ident,
1692 $initial_state:ident,
1693 $stats:ident,
1694 $out:ident,
1695 $collect:ident $(,)?
1696 ) => {{
1697 let current_abs_end = $current_abs_start + $current_len;
1698 let min_match_len = HC_OPT_MIN_MATCH_LEN;
1699 let frontier_limit = $current_len.min(HC_OPT_NUM - 1);
1701 let initial_reps = $initial_state.reps;
1702 let initial_litlen = $initial_state.litlen;
1703 let mut profile = $initial_state.profile;
1704 profile.sufficient_match_len = $self.hc.sufficient_match_len_for_pass(profile);
1705 debug_assert!(
1717 <$strategy_ty as super::strategy::Strategy>::USE_BT,
1718 "build_optimal_plan_impl_body called on non-BT strategy"
1719 );
1720 let abort_on_worse_match: bool =
1721 <$strategy_ty as super::strategy::Strategy>::OPT_LEVEL == 0;
1722 let opt_level: bool = <$strategy_ty as super::strategy::Strategy>::OPT_LEVEL >= 2;
1723 let mut nodes = core::mem::take(&mut $self.backend.bt_mut().opt_nodes_scratch);
1724 let frontier_buffer_size = frontier_limit + 2;
1726 if nodes.len() < frontier_buffer_size {
1727 nodes.resize(frontier_buffer_size, HcOptimalNode::default());
1728 }
1729 let mut candidates = core::mem::take(&mut $self.backend.bt_mut().opt_candidates_scratch);
1730 candidates.clear();
1731 if candidates.capacity() < MAX_HC_SEARCH_DEPTH {
1732 candidates.reserve_exact(MAX_HC_SEARCH_DEPTH - candidates.capacity());
1733 }
1734 let mut store = core::mem::take(&mut $self.backend.bt_mut().opt_store_scratch);
1735 store.clear();
1736 let mut ll_prices = core::mem::take(&mut $self.backend.bt_mut().opt_ll_price_scratch);
1737 let mut ll_price_generations =
1738 core::mem::take(&mut $self.backend.bt_mut().opt_ll_price_generation);
1739 if ll_prices.len() <= frontier_limit {
1740 ll_prices.resize(frontier_limit + 1, 0);
1741 ll_price_generations.resize(frontier_limit + 1, 0);
1742 }
1743 $self.backend.bt_mut().opt_ll_price_stamp = $self
1744 .backend
1745 .bt_mut()
1746 .opt_ll_price_stamp
1747 .wrapping_add(1)
1748 .max(1);
1749 let ll_price_stamp = $self.backend.bt_mut().opt_ll_price_stamp;
1750 $self.backend.bt_mut().opt_lit_price_stamp = $self
1751 .backend
1752 .bt_mut()
1753 .opt_lit_price_stamp
1754 .wrapping_add(1)
1755 .max(1);
1756 let lit_price_stamp = $self.backend.bt_mut().opt_lit_price_stamp;
1757 let mut ml_prices = core::mem::take(&mut $self.backend.bt_mut().opt_ml_price_scratch);
1758 let mut ml_price_generations =
1759 core::mem::take(&mut $self.backend.bt_mut().opt_ml_price_generation);
1760 if ml_prices.len() <= frontier_limit {
1761 ml_prices.resize(frontier_limit + 1, 0);
1762 ml_price_generations.resize(frontier_limit + 1, 0);
1763 }
1764 $self.backend.bt_mut().opt_ml_price_stamp = $self
1765 .backend
1766 .bt_mut()
1767 .opt_ml_price_stamp
1768 .wrapping_add(1)
1769 .max(1);
1770 let ml_price_stamp = $self.backend.bt_mut().opt_ml_price_stamp;
1771 nodes[0] = HcOptimalNode {
1772 price: BtMatcher::cached_lit_length_price(
1773 profile,
1774 $stats,
1775 initial_litlen,
1776 &mut ll_prices,
1777 &mut ll_price_generations,
1778 ll_price_stamp,
1779 ),
1780 litlen: initial_litlen as u32,
1781 reps: initial_reps,
1782 ..HcOptimalNode::default()
1783 };
1784 let sufficient_len = profile.sufficient_match_len;
1785 let ll0_price = BtMatcher::cached_lit_length_price(
1786 profile,
1787 $stats,
1788 0,
1789 &mut ll_prices,
1790 &mut ll_price_generations,
1791 ll_price_stamp,
1792 );
1793 let ll1_price = BtMatcher::cached_lit_length_price(
1794 profile,
1795 $stats,
1796 1,
1797 &mut ll_prices,
1798 &mut ll_price_generations,
1799 ll_price_stamp,
1800 );
1801 let mut pos = 1usize;
1802 let mut last_pos = 0usize;
1803 let mut forced_end: Option<usize> = None;
1804 let mut forced_end_state: Option<HcOptimalNode> = None;
1805 let mut seed_forced_shortest_path = false;
1806 let mut opt_ldm = HcOptLdmState {
1807 seq_store: HcRawSeqStore {
1808 pos: 0,
1809 pos_in_sequence: 0,
1810 size: $self.backend.bt_mut().ldm_sequences.len(),
1811 },
1812 ..HcOptLdmState::default()
1813 };
1814 let has_ldm = !$self.backend.bt_mut().ldm_sequences.is_empty();
1815 if has_ldm {
1816 $self
1817 .backend
1818 .bt_mut()
1819 .ldm_get_next_match_and_update_seq_store(&mut opt_ldm, 0, $current_len);
1820 }
1821
1822 if $current_len >= min_match_len {
1825 let seed_ldm = if has_ldm {
1826 $self.backend.bt_mut().ldm_process_match_candidate(
1827 &mut opt_ldm,
1828 0,
1829 $current_len,
1830 min_match_len,
1831 )
1832 } else {
1833 None
1834 };
1835 candidates.clear();
1836 unsafe {
1840 $self.$collect::<$strategy_ty, true>(
1841 $current_abs_start,
1842 current_abs_end,
1843 profile,
1844 HcCandidateQuery {
1845 reps: initial_reps,
1846 lit_len: initial_litlen,
1847 ldm_candidate: seed_ldm,
1848 },
1849 &mut candidates,
1850 )
1851 };
1852 if !candidates.is_empty() {
1853 last_pos = (min_match_len - 1).min(frontier_limit);
1855 for p in 1..min_match_len.min(nodes.len()) {
1856 BtMatcher::reset_opt_node(&mut nodes[p]);
1857 let seed_litlen = initial_litlen
1867 .checked_add(p)
1868 .and_then(|s| u32::try_from(s).ok())
1869 .expect("optimal parser seed litlen out of u32 range");
1870 nodes[p].litlen = seed_litlen;
1871 }
1872 }
1873
1874 if let Some(candidate) = candidates.last() {
1875 let longest_len = candidate.match_len.min($current_len);
1876 if longest_len > sufficient_len {
1877 let off_base = BtMatcher::encode_offset_base_with_reps(
1878 candidate.offset as u32,
1879 initial_litlen,
1880 initial_reps,
1881 );
1882 let off_price = profile
1883 .offset_price_for::<ACCURATE_PRICE, FAVOR_SMALL_OFFSETS>($stats, off_base);
1884 let ml_price = BtMatcher::cached_match_length_price(
1885 profile,
1886 $stats,
1887 longest_len,
1888 &mut ml_prices,
1889 &mut ml_price_generations,
1890 ml_price_stamp,
1891 );
1892 let seq_cost = BtMatcher::add_prices(
1893 ll0_price,
1894 profile.match_price_from_parts(off_price, ml_price, $stats),
1895 );
1896 let forced_price = BtMatcher::add_prices(nodes[0].price, seq_cost);
1897 let forced_state = HcOptimalNode {
1898 price: forced_price,
1899 off: candidate.offset as u32,
1900 mlen: longest_len as u32,
1901 litlen: 0,
1902 reps: initial_reps,
1903 };
1904 if longest_len < nodes.len() && forced_price < nodes[longest_len].price {
1905 nodes[longest_len] = forced_state;
1906 }
1907 forced_end = Some(longest_len);
1908 forced_end_state = Some(forced_state);
1909 seed_forced_shortest_path = true;
1910 }
1911 }
1912 if !seed_forced_shortest_path {
1913 let mut prev_max_len = min_match_len - 1;
1914 for candidate in candidates.iter() {
1915 let max_match_len = candidate.match_len.min(frontier_limit);
1916 if max_match_len < min_match_len {
1917 continue;
1918 }
1919 let start_len = (prev_max_len + 1).max(min_match_len);
1920 if start_len > max_match_len {
1921 prev_max_len = prev_max_len.max(max_match_len);
1922 continue;
1923 }
1924 if max_match_len > last_pos {
1925 BtMatcher::reset_opt_nodes(&mut nodes, last_pos + 1, max_match_len);
1926 }
1927 let off_base = BtMatcher::encode_offset_base_with_reps(
1928 candidate.offset as u32,
1929 initial_litlen,
1930 initial_reps,
1931 );
1932 let off_price = profile
1933 .offset_price_for::<ACCURATE_PRICE, FAVOR_SMALL_OFFSETS>($stats, off_base);
1934 debug_assert!(max_match_len < nodes.len());
1935 let nodes0_price = nodes[0].price;
1936 for match_len in (start_len..=max_match_len).rev() {
1937 let ml_price = BtMatcher::cached_match_length_price(
1938 profile,
1939 $stats,
1940 match_len,
1941 &mut ml_prices,
1942 &mut ml_price_generations,
1943 ml_price_stamp,
1944 );
1945 let seq_cost = BtMatcher::add_prices(
1946 ll0_price,
1947 profile.match_price_from_parts(off_price, ml_price, $stats),
1948 );
1949 let next_cost = BtMatcher::add_prices(nodes0_price, seq_cost);
1950 let node_price = unsafe { nodes.get_unchecked(match_len).price };
1951 if match_len > last_pos || next_cost < node_price {
1952 let slot = unsafe { nodes.get_unchecked_mut(match_len) };
1953 *slot = HcOptimalNode {
1954 price: next_cost,
1955 off: candidate.offset as u32,
1956 mlen: match_len as u32,
1957 litlen: 0,
1958 reps: initial_reps,
1959 };
1960 if match_len > last_pos {
1961 last_pos = match_len;
1962 }
1963 } else if abort_on_worse_match {
1964 break;
1965 }
1966 }
1967 prev_max_len = prev_max_len.max(max_match_len);
1968 }
1969 if last_pos + 1 < nodes.len() {
1970 nodes[last_pos + 1].price = u32::MAX;
1971 }
1972 }
1973 }
1974 while !seed_forced_shortest_path && pos <= last_pos && pos <= frontier_limit {
1975 debug_assert!(pos + 1 < nodes.len());
1976 let prev_node = unsafe { *nodes.get_unchecked(pos - 1) };
1977 if prev_node.price != u32::MAX {
1978 let lit_len = prev_node.litlen as usize + 1;
1979 let lit_price = {
1980 let bt = $self.backend.bt_mut();
1981 BtMatcher::cached_literal_price(
1982 profile,
1983 $stats,
1984 $current[pos - 1],
1985 &mut bt.opt_lit_price_scratch,
1986 &mut bt.opt_lit_price_generation,
1987 lit_price_stamp,
1988 )
1989 };
1990 let ll_delta = BtMatcher::cached_lit_length_delta_price(
1991 profile,
1992 $stats,
1993 lit_len,
1994 &mut ll_prices,
1995 &mut ll_price_generations,
1996 ll_price_stamp,
1997 );
1998 let lit_cost = BtMatcher::add_price_delta(prev_node.price, lit_price, ll_delta);
1999 let node_pos_price = unsafe { nodes.get_unchecked(pos).price };
2000 if lit_cost <= node_pos_price {
2001 let prev_match = unsafe { *nodes.get_unchecked(pos) };
2002 let slot = unsafe { nodes.get_unchecked_mut(pos) };
2003 *slot = prev_node;
2004 slot.litlen = lit_len as u32;
2005 slot.price = lit_cost;
2006 #[allow(clippy::collapsible_if)]
2007 if opt_level
2008 && prev_match.mlen > 0
2009 && prev_match.litlen == 0
2010 && pos < $current_len
2011 {
2012 if ll1_price < ll0_price {
2013 let next_lit_price = {
2014 let bt = $self.backend.bt_mut();
2015 BtMatcher::cached_literal_price(
2016 profile,
2017 $stats,
2018 $current[pos],
2019 &mut bt.opt_lit_price_scratch,
2020 &mut bt.opt_lit_price_generation,
2021 lit_price_stamp,
2022 )
2023 };
2024 let with1literal = BtMatcher::add_price_delta(
2025 prev_match.price,
2026 next_lit_price,
2027 ll1_price as i32 - ll0_price as i32,
2028 );
2029 let ll_delta_next = BtMatcher::cached_lit_length_delta_price(
2030 profile,
2031 $stats,
2032 lit_len + 1,
2033 &mut ll_prices,
2034 &mut ll_price_generations,
2035 ll_price_stamp,
2036 );
2037 let with_more_literals =
2038 BtMatcher::add_price_delta(lit_cost, next_lit_price, ll_delta_next);
2039 let next = pos + 1;
2040 let next_price = unsafe { nodes.get_unchecked(next).price };
2041 if with1literal < with_more_literals && with1literal < next_price {
2042 debug_assert!(pos >= prev_match.mlen as usize);
2044 let prev_pos = pos - prev_match.mlen as usize;
2045 {
2046 let prev_state = unsafe { *nodes.get_unchecked(prev_pos) };
2047 let (_, reps_after_match) = BtMatcher::encode_offset_with_reps(
2048 prev_match.off,
2049 prev_state.litlen as usize,
2050 prev_state.reps,
2051 );
2052 let slot = unsafe { nodes.get_unchecked_mut(next) };
2053 *slot = prev_match;
2054 slot.reps = reps_after_match;
2055 slot.litlen = 1;
2056 slot.price = with1literal;
2057 if next > last_pos {
2058 last_pos = next;
2059 }
2060 }
2061 }
2062 }
2063 }
2064 }
2065 }
2066
2067 let mut base_node = unsafe { *nodes.get_unchecked(pos) };
2068 if base_node.price == u32::MAX {
2069 pos += 1;
2070 continue;
2071 }
2072 if base_node.mlen > 0 && base_node.litlen == 0 {
2073 debug_assert!(pos >= base_node.mlen as usize);
2075 let prev_pos = pos - base_node.mlen as usize;
2076 {
2077 let prev_state = unsafe { *nodes.get_unchecked(prev_pos) };
2078 let (_, reps_after_match) = BtMatcher::encode_offset_with_reps(
2079 base_node.off,
2080 prev_state.litlen as usize,
2081 prev_state.reps,
2082 );
2083 base_node.reps = reps_after_match;
2084 unsafe { nodes.get_unchecked_mut(pos).reps = reps_after_match };
2085 }
2086 }
2087 let base_cost = base_node.price;
2088
2089 if pos + 8 > $current_len {
2090 pos += 1;
2091 continue;
2092 }
2093
2094 if pos == last_pos {
2095 break;
2096 }
2097
2098 let next_price = unsafe { nodes.get_unchecked(pos + 1).price };
2099 if abort_on_worse_match
2100 && next_price <= base_cost.saturating_add(HC_BITCOST_MULTIPLIER / 2)
2101 {
2102 pos += 1;
2103 continue;
2104 }
2105
2106 let abs_pos = $current_abs_start + pos;
2107 let ldm_candidate = if has_ldm {
2108 $self.backend.bt_mut().ldm_process_match_candidate(
2109 &mut opt_ldm,
2110 pos,
2111 $current_len - pos,
2112 min_match_len,
2113 )
2114 } else {
2115 None
2116 };
2117 candidates.clear();
2118 unsafe {
2120 $self.$collect::<$strategy_ty, true>(
2121 abs_pos,
2122 current_abs_end,
2123 profile,
2124 HcCandidateQuery {
2125 reps: base_node.reps,
2126 lit_len: base_node.litlen as usize,
2127 ldm_candidate,
2128 },
2129 &mut candidates,
2130 )
2131 };
2132 if let Some(candidate) = candidates.last() {
2133 let longest_len = candidate.match_len.min($current_len - pos);
2134 if longest_len > sufficient_len
2135 || pos + longest_len >= HC_OPT_NUM
2136 || pos + longest_len >= $current_len
2137 {
2138 let lit_len = base_node.litlen as usize;
2139 let off_base = BtMatcher::encode_offset_base_with_reps(
2140 candidate.offset as u32,
2141 lit_len,
2142 base_node.reps,
2143 );
2144 let off_price = profile
2145 .offset_price_for::<ACCURATE_PRICE, FAVOR_SMALL_OFFSETS>($stats, off_base);
2146 let ml_price = BtMatcher::cached_match_length_price(
2147 profile,
2148 $stats,
2149 longest_len,
2150 &mut ml_prices,
2151 &mut ml_price_generations,
2152 ml_price_stamp,
2153 );
2154 let seq_cost = BtMatcher::add_prices(
2155 ll0_price,
2156 profile.match_price_from_parts(off_price, ml_price, $stats),
2157 );
2158 let forced_price = BtMatcher::add_prices(base_cost, seq_cost);
2159 let end_pos = (pos + longest_len).min($current_len);
2160 forced_end = Some(end_pos);
2161 forced_end_state = Some(HcOptimalNode {
2162 price: forced_price,
2163 off: candidate.offset as u32,
2164 mlen: longest_len as u32,
2165 litlen: 0,
2166 reps: base_node.reps,
2167 });
2168 break;
2169 }
2170 }
2171 let mut prev_max_len = min_match_len - 1;
2172 for candidate in candidates.iter() {
2173 debug_assert!(pos <= frontier_limit);
2177 let max_match_len = candidate
2178 .match_len
2179 .min($current_len - pos)
2180 .min(frontier_limit - pos);
2181 let min_len = min_match_len;
2182 if max_match_len < min_len {
2183 continue;
2184 }
2185 let start_len = (prev_max_len + 1).max(min_len);
2186 if start_len > max_match_len {
2187 prev_max_len = prev_max_len.max(max_match_len);
2188 continue;
2189 }
2190 let max_next = pos + max_match_len;
2191 if max_next > last_pos {
2192 BtMatcher::reset_opt_nodes(&mut nodes, last_pos + 1, max_next);
2193 }
2194 let lit_len = base_node.litlen as usize;
2195 let off_base = BtMatcher::encode_offset_base_with_reps(
2196 candidate.offset as u32,
2197 lit_len,
2198 base_node.reps,
2199 );
2200 let off_price = profile
2201 .offset_price_for::<ACCURATE_PRICE, FAVOR_SMALL_OFFSETS>($stats, off_base);
2202 debug_assert!(pos + max_match_len < nodes.len());
2203 for match_len in (start_len..=max_match_len).rev() {
2204 let next = pos + match_len;
2205 let ml_price = BtMatcher::cached_match_length_price(
2206 profile,
2207 $stats,
2208 match_len,
2209 &mut ml_prices,
2210 &mut ml_price_generations,
2211 ml_price_stamp,
2212 );
2213 let seq_cost = BtMatcher::add_prices(
2214 ll0_price,
2215 profile.match_price_from_parts(off_price, ml_price, $stats),
2216 );
2217 let next_cost = BtMatcher::add_prices(base_cost, seq_cost);
2218 let node_next_price = unsafe { nodes.get_unchecked(next).price };
2219 let improved = next > last_pos || next_cost < node_next_price;
2220 if improved {
2221 let slot = unsafe { nodes.get_unchecked_mut(next) };
2222 *slot = HcOptimalNode {
2223 price: next_cost,
2224 off: candidate.offset as u32,
2225 mlen: match_len as u32,
2226 litlen: 0,
2227 reps: base_node.reps,
2228 };
2229 if next > last_pos {
2230 last_pos = next;
2231 }
2232 } else if abort_on_worse_match {
2233 break;
2234 }
2235 }
2236 prev_max_len = prev_max_len.max(max_match_len);
2237 }
2238
2239 if last_pos + 1 < nodes.len() {
2240 unsafe {
2241 nodes.get_unchecked_mut(last_pos + 1).price = u32::MAX;
2242 }
2243 }
2244 pos += 1;
2245 }
2246
2247 if last_pos == 0 {
2248 if $current_len == 0 {
2249 let price = nodes[0].price;
2250 return $self.backend.bt_mut().finish_optimal_plan(
2251 HcOptimalPlanBuffers {
2252 nodes,
2253 candidates,
2254 store,
2255 ll_prices,
2256 ll_price_generations,
2257 ml_prices,
2258 ml_price_generations,
2259 },
2260 (price, initial_reps, initial_litlen, 0),
2261 );
2262 }
2263 let lit_price = {
2264 let bt = $self.backend.bt_mut();
2265 BtMatcher::cached_literal_price(
2266 profile,
2267 $stats,
2268 $current[0],
2269 &mut bt.opt_lit_price_scratch,
2270 &mut bt.opt_lit_price_generation,
2271 lit_price_stamp,
2272 )
2273 };
2274 let next_litlen = initial_litlen
2281 .checked_add(1)
2282 .expect("optimal parser next litlen out of usize range");
2283 let ll_delta = BtMatcher::cached_lit_length_delta_price(
2284 profile,
2285 $stats,
2286 next_litlen,
2287 &mut ll_prices,
2288 &mut ll_price_generations,
2289 ll_price_stamp,
2290 );
2291 let price = BtMatcher::add_price_delta(nodes[0].price, lit_price, ll_delta);
2292 return $self.backend.bt_mut().finish_optimal_plan(
2293 HcOptimalPlanBuffers {
2294 nodes,
2295 candidates,
2296 store,
2297 ll_prices,
2298 ll_price_generations,
2299 ml_prices,
2300 ml_price_generations,
2301 },
2302 (price, initial_reps, next_litlen, 1),
2303 );
2304 }
2305
2306 let target_pos = forced_end.unwrap_or(last_pos.min(frontier_limit));
2307 let last_stretch = if let Some(forced_state) = forced_end_state {
2308 forced_state
2309 } else {
2310 nodes[target_pos]
2311 };
2312 if last_stretch.price == u32::MAX {
2313 return $self.backend.bt_mut().finish_optimal_plan(
2314 HcOptimalPlanBuffers {
2315 nodes,
2316 candidates,
2317 store,
2318 ll_prices,
2319 ll_price_generations,
2320 ml_prices,
2321 ml_price_generations,
2322 },
2323 (u32::MAX, initial_reps, initial_litlen, $current_len),
2324 );
2325 }
2326
2327 if last_stretch.mlen == 0 {
2328 return $self.backend.bt_mut().finish_optimal_plan(
2329 HcOptimalPlanBuffers {
2330 nodes,
2331 candidates,
2332 store,
2333 ll_prices,
2334 ll_price_generations,
2335 ml_prices,
2336 ml_price_generations,
2337 },
2338 (
2339 last_stretch.price,
2340 last_stretch.reps,
2341 last_stretch.litlen as usize,
2342 target_pos.min($current_len),
2343 ),
2344 );
2345 }
2346
2347 let mut cur = target_pos.saturating_sub(last_stretch.mlen as usize);
2348 let end_reps = if last_stretch.litlen == 0 {
2349 let prev_state = nodes[cur];
2350 let (_, reps_after_match) = BtMatcher::encode_offset_with_reps(
2351 last_stretch.off,
2352 prev_state.litlen as usize,
2353 prev_state.reps,
2354 );
2355 reps_after_match
2356 } else {
2357 let tail_literals = last_stretch.litlen as usize;
2358 if cur < tail_literals {
2359 return $self.backend.bt_mut().finish_optimal_plan(
2360 HcOptimalPlanBuffers {
2361 nodes,
2362 candidates,
2363 store,
2364 ll_prices,
2365 ll_price_generations,
2366 ml_prices,
2367 ml_price_generations,
2368 },
2369 (
2370 last_stretch.price,
2371 last_stretch.reps,
2372 tail_literals,
2373 target_pos.min($current_len),
2374 ),
2375 );
2376 }
2377 cur -= tail_literals;
2378 last_stretch.reps
2379 };
2380 let store_end = cur + 2;
2381 if store.len() <= store_end {
2382 store.resize(store_end + 1, HcOptimalNode::default());
2383 }
2384 let mut store_start;
2385 let mut stretch_pos = cur;
2386
2387 if last_stretch.litlen > 0 {
2388 store[store_end] = HcOptimalNode {
2389 litlen: last_stretch.litlen,
2390 mlen: 0,
2391 ..HcOptimalNode::default()
2392 };
2393 store_start = store_end.saturating_sub(1);
2394 store[store_start] = last_stretch;
2395 }
2396 store[store_end] = last_stretch;
2397 store_start = store_end;
2398
2399 loop {
2400 let next_stretch = nodes[stretch_pos];
2401 store[store_start].litlen = next_stretch.litlen;
2402 if next_stretch.mlen == 0 {
2403 break;
2404 }
2405 if store_start == 0 {
2406 break;
2407 }
2408 store_start -= 1;
2409 store[store_start] = next_stretch;
2410 let litlen = next_stretch.litlen as usize;
2417 let mlen = next_stretch.mlen as usize;
2418 debug_assert!(litlen + mlen <= $current_len);
2419 let step = litlen + mlen;
2420 if step == 0 || stretch_pos < step {
2421 break;
2422 }
2423 stretch_pos -= step;
2424 }
2425
2426 let mut tail_literals = initial_litlen;
2427 let mut store_pos = store_start;
2428 while store_pos <= store_end {
2429 let stretch = store[store_pos];
2430 let llen = stretch.litlen as usize;
2431 let mlen = stretch.mlen as usize;
2432 if mlen == 0 {
2433 tail_literals = llen;
2434 store_pos += 1;
2435 continue;
2436 }
2437 $out.push(HcOptimalSequence {
2438 offset: stretch.off,
2439 match_len: mlen as u32,
2440 lit_len: llen as u32,
2441 });
2442 tail_literals = 0;
2443 store_pos += 1;
2444 }
2445 let result = (
2446 last_stretch.price,
2447 end_reps,
2448 if last_stretch.litlen > 0 {
2449 last_stretch.litlen as usize
2450 } else {
2451 tail_literals
2452 },
2453 target_pos.min($current_len),
2454 );
2455 $self.backend.bt_mut().finish_optimal_plan(
2456 HcOptimalPlanBuffers {
2457 nodes,
2458 candidates,
2459 store,
2460 ll_prices,
2461 ll_price_generations,
2462 ml_prices,
2463 ml_price_generations,
2464 },
2465 result,
2466 )
2467 }};
2468}
2469
2470macro_rules! collect_optimal_candidates_initialized_body {
2479 (
2480 $self:expr,
2481 $strategy_ty:ty,
2482 $abs_pos:ident,
2483 $current_abs_end:ident,
2484 $profile:ident,
2485 $query:ident,
2486 $out:ident,
2487 $bt_matchfinder:ident,
2488 $bt_update:ident,
2489 $bt_insert:ident,
2490 $for_each_rep:ident,
2491 $hash3:ident,
2492 $cpl:path $(,)?
2493 ) => {{
2494 let use_hash3: bool = <$strategy_ty as super::strategy::Strategy>::USE_HASH3;
2503 debug_assert!(!$self.table.hash_table.is_empty());
2504 debug_assert!($self.table.hash3_log == 0 || !$self.table.hash3_table.is_empty());
2505 debug_assert!(
2506 !use_hash3 || $self.table.hash3_log != 0,
2507 "Strategy::USE_HASH3 = true but runtime hash3_log is 0 — call configure() first",
2508 );
2509 debug_assert!(!$self.table.chain_table.is_empty());
2510 let min_match_len = HC_OPT_MIN_MATCH_LEN;
2511 let reps = $query.reps;
2512 let lit_len = $query.lit_len;
2513 let ldm_candidate = $query.ldm_candidate;
2514 $out.clear();
2515 if $abs_pos < $self.table.skip_insert_until_abs {
2516 if let Some(ldm) = ldm_candidate {
2517 let mut best_len_for_skip = 0usize;
2518 let _ = super::bt::BtMatcher::push_candidate_ladder(
2519 $out,
2520 &mut best_len_for_skip,
2521 ldm,
2522 min_match_len,
2523 );
2524 }
2525 return;
2526 }
2527 if $bt_matchfinder {
2528 unsafe { $self.table.$bt_update($abs_pos, $current_abs_end) };
2531 }
2532 let current_idx = $abs_pos - $self.table.history_abs_start;
2533 if current_idx + 4 > $self.table.live_history().len() {
2534 if let Some(ldm) = ldm_candidate {
2535 let mut best_len_for_skip = 0usize;
2536 let _ = super::bt::BtMatcher::push_candidate_ladder(
2537 $out,
2538 &mut best_len_for_skip,
2539 ldm,
2540 min_match_len,
2541 );
2542 }
2543 return;
2544 }
2545 let mut best_len_for_skip = 0usize;
2546 let mut skip_further_match_search = false;
2547 let mut rep_len_candidate_found = false;
2548 unsafe {
2550 $self.hc.$for_each_rep(
2551 &$self.table,
2552 $abs_pos,
2553 lit_len,
2554 reps,
2555 $current_abs_end,
2556 min_match_len,
2557 |rep| {
2558 if rep.match_len >= min_match_len {
2559 rep_len_candidate_found = true;
2560 }
2561 let _ = super::bt::BtMatcher::push_candidate_ladder(
2562 $out,
2563 &mut best_len_for_skip,
2564 rep,
2565 min_match_len,
2566 );
2567 if rep.match_len > $profile.sufficient_match_len {
2568 skip_further_match_search = true;
2569 }
2570 if $abs_pos + rep.match_len >= $current_abs_end {
2577 skip_further_match_search = true;
2578 }
2579 },
2580 )
2581 };
2582 if use_hash3 && !skip_further_match_search && best_len_for_skip < min_match_len {
2586 $self.table.update_hash3_until($abs_pos);
2587 if let Some(h3) = unsafe {
2589 $self
2590 .table
2591 .$hash3($abs_pos, $current_abs_end, min_match_len)
2592 } {
2593 let _ = super::bt::BtMatcher::push_candidate_ladder(
2594 $out,
2595 &mut best_len_for_skip,
2596 h3,
2597 min_match_len,
2598 );
2599 if !rep_len_candidate_found
2600 && (h3.match_len > $profile.sufficient_match_len
2601 || $abs_pos + h3.match_len >= $current_abs_end)
2602 {
2603 $self.table.skip_insert_until_abs = $abs_pos + 1;
2604 skip_further_match_search = true;
2605 }
2606 }
2607 }
2608 if !skip_further_match_search && $bt_matchfinder {
2609 unsafe {
2611 $self.table.$bt_insert(
2612 $abs_pos,
2613 $current_abs_end,
2614 $profile,
2615 min_match_len,
2616 &mut best_len_for_skip,
2617 $out,
2618 )
2619 };
2620 } else if !skip_further_match_search {
2621 $self.table.insert_position($abs_pos);
2622 let max_chain_depth = $profile.max_chain_depth.min($self.hc.search_depth);
2623 let concat = &$self.table.history[$self.table.history_start..];
2624 let mut match_end_abs = $abs_pos + 9;
2628 if max_chain_depth > 0 {
2629 for (visited, candidate_abs) in $self
2630 .hc
2631 .chain_candidates(&$self.table, $abs_pos)
2632 .into_iter()
2633 .enumerate()
2634 {
2635 if visited >= max_chain_depth {
2636 break;
2637 }
2638 if candidate_abs == usize::MAX {
2639 break;
2640 }
2641 if candidate_abs < $self.table.history_abs_start || candidate_abs >= $abs_pos {
2642 continue;
2643 }
2644 let candidate_idx = candidate_abs - $self.table.history_abs_start;
2645 debug_assert!(
2646 $abs_pos <= $current_abs_end,
2647 "HC chain walker called past current block end"
2648 );
2649 let tail_limit = $current_abs_end - $abs_pos;
2650 let base = concat.as_ptr();
2651 let match_len =
2656 unsafe { $cpl(base.add(candidate_idx), base.add(current_idx), tail_limit) };
2657 if match_len < min_match_len {
2658 continue;
2659 }
2660 let offset = $abs_pos - candidate_abs;
2661 if super::bt::BtMatcher::push_candidate_ladder(
2662 $out,
2663 &mut best_len_for_skip,
2664 MatchCandidate {
2665 start: $abs_pos,
2666 offset,
2667 match_len,
2668 },
2669 min_match_len,
2670 ) {
2671 let candidate_end = candidate_abs + match_len;
2672 if candidate_end > match_end_abs {
2673 match_end_abs = candidate_end;
2674 }
2675 }
2676 if match_len > HC_OPT_NUM || $abs_pos + match_len >= $current_abs_end {
2677 break;
2678 }
2679 }
2680 }
2681 $self.table.skip_insert_until_abs =
2684 $self.table.skip_insert_until_abs.max(match_end_abs - 8);
2685 }
2686 if let Some(ldm) = ldm_candidate {
2687 let _ = super::bt::BtMatcher::push_candidate_ladder(
2688 $out,
2689 &mut best_len_for_skip,
2690 ldm,
2691 min_match_len,
2692 );
2693 }
2694 }};
2695}
2696
2697macro_rules! hash3_candidate_body {
2702 (
2703 $table:expr,
2704 $abs_pos:ident,
2705 $current_abs_end:ident,
2706 $min_match_len:ident,
2707 $cpl:path $(,)?
2708 ) => {{
2709 if $table.hash3_log == 0 {
2710 return None;
2711 }
2712 let idx = $abs_pos.checked_sub($table.history_abs_start)?;
2713 let concat = $table.live_history();
2714 if idx + 4 > concat.len() {
2715 return None;
2716 }
2717 let hash3 = $crate::encoding::match_table::storage::MatchTable::hash_position_at(
2718 concat,
2719 idx,
2720 $table.hash3_log,
2721 3,
2722 );
2723 let entry = $table
2724 .hash3_table
2725 .get(hash3)
2726 .copied()
2727 .unwrap_or($crate::encoding::match_table::storage::HC_EMPTY);
2728 let candidate_abs =
2729 $crate::encoding::match_table::storage::MatchTable::stored_abs_position_fast(
2730 entry,
2731 $table.position_base,
2732 $table.index_shift,
2733 )?;
2734 if candidate_abs < $table.history_abs_start || candidate_abs >= $abs_pos {
2735 return None;
2736 }
2737 let offset = $abs_pos - candidate_abs;
2738 if offset >= $crate::encoding::bt::HC3_MAX_OFFSET {
2739 return None;
2740 }
2741 let candidate_idx = candidate_abs - $table.history_abs_start;
2742 let tail_limit = $current_abs_end.saturating_sub($abs_pos);
2743 let base = concat.as_ptr();
2744 let match_len = unsafe { $cpl(base.add(candidate_idx), base.add(idx), tail_limit) };
2747 (match_len >= $min_match_len).then_some($crate::encoding::opt::types::MatchCandidate {
2748 start: $abs_pos,
2749 offset,
2750 match_len,
2751 })
2752 }};
2753}
2754pub(crate) use hash3_candidate_body;
2755
2756macro_rules! for_each_repcode_candidate_body {
2766 (
2767 $table:expr,
2768 $abs_pos:ident,
2769 $lit_len:ident,
2770 $reps:ident,
2771 $current_abs_end:ident,
2772 $min_match_len:ident,
2773 $f:ident,
2774 $cpl:path $(,)?
2775 ) => {{
2776 let rep_offsets: [Option<usize>; 3] = if $lit_len == 0 {
2777 [
2778 Some($reps[1] as usize),
2779 Some($reps[2] as usize),
2780 ($reps[0] > 1).then_some(($reps[0] - 1) as usize),
2781 ]
2782 } else {
2783 [
2784 Some($reps[0] as usize),
2785 Some($reps[1] as usize),
2786 Some($reps[2] as usize),
2787 ]
2788 };
2789 let concat = $table.live_history();
2790 let current_idx = $abs_pos - $table.history_abs_start;
2791 if current_idx + 4 > concat.len() {
2792 return;
2793 }
2794 let tail_limit = $current_abs_end.saturating_sub($abs_pos);
2795 let base = concat.as_ptr();
2796 let concat_len = concat.len();
2797 for rep in rep_offsets.into_iter().flatten() {
2798 if rep == 0 || rep > $abs_pos {
2799 continue;
2800 }
2801 let candidate_pos = $abs_pos - rep;
2802 if candidate_pos < $table.history_abs_start {
2803 continue;
2804 }
2805 let candidate_idx = candidate_pos - $table.history_abs_start;
2806 let max = (concat_len - candidate_idx)
2811 .min(concat_len - current_idx)
2812 .min(tail_limit);
2813 let match_len = unsafe { $cpl(base.add(candidate_idx), base.add(current_idx), max) };
2814 if match_len < $min_match_len {
2815 continue;
2816 }
2817 $f(MatchCandidate {
2818 start: $abs_pos,
2819 offset: rep,
2820 match_len,
2821 });
2822 }
2823 }};
2824}
2825pub(crate) use for_each_repcode_candidate_body;
2826
2827macro_rules! bt_insert_and_collect_matches_body {
2834 (
2835 $table:expr,
2836 $search_depth:expr,
2837 $abs_pos:ident,
2838 $current_abs_end:ident,
2839 $profile:ident,
2840 $min_match_len:ident,
2841 $best_len_for_skip:ident,
2842 $out:ident,
2843 $cmf:path $(,)?
2844 ) => {{
2845 let idx = $abs_pos - $table.history_abs_start;
2846 let concat = &$table.history[$table.history_start..];
2847 if idx + 8 > concat.len() {
2848 return;
2849 }
2850 debug_assert!(
2851 $abs_pos <= $current_abs_end,
2852 "BT collect called past current block end"
2853 );
2854 let tail_limit = $current_abs_end - $abs_pos;
2855 let hash = $crate::encoding::match_table::storage::MatchTable::hash_position_at(
2856 concat,
2857 idx,
2858 $table.hash_log,
2859 $crate::encoding::bt::BtMatcher::HASH_MLS,
2860 );
2861 let Some(relative_pos) = $table.relative_position($abs_pos) else {
2862 return;
2863 };
2864 let stored = relative_pos + 1;
2865 let bt_mask = $table.bt_mask();
2866 let bt_low = $abs_pos.saturating_sub(bt_mask);
2869 let window_low = $table.window_low_abs_for_target($abs_pos);
2870 let mut match_end_abs = $abs_pos + 9;
2874 let mut compares_left = $profile.max_chain_depth.min($search_depth);
2875 let mut common_length_smaller = 0usize;
2876 let mut common_length_larger = 0usize;
2877 let pair_idx = $table.bt_pair_index_for_abs($abs_pos);
2878 let mut smaller_slot = pair_idx;
2879 let mut larger_slot = pair_idx + 1;
2880 let mut match_stored = $table.hash_table[hash];
2881 $table.hash_table[hash] = stored;
2882 debug_assert!(
2887 $min_match_len >= $crate::encoding::cost_model::HC_FORMAT_MINMATCH,
2888 "min_match_len must be at least HC_FORMAT_MINMATCH"
2889 );
2890 let mut best_len = (*$best_len_for_skip).max($min_match_len - 1);
2891
2892 while compares_left > 0 {
2893 let Some(candidate_abs) =
2894 $crate::encoding::match_table::storage::MatchTable::stored_abs_position_fast(
2895 match_stored,
2896 $table.position_base,
2897 $table.index_shift,
2898 )
2899 else {
2900 break;
2901 };
2902 if candidate_abs < window_low || candidate_abs >= $abs_pos {
2903 break;
2904 }
2905 compares_left -= 1;
2906
2907 let next_pair_idx = $table.bt_pair_index_for_abs(candidate_abs);
2908 let next_smaller = $table.chain_table[next_pair_idx];
2909 let next_larger = $table.chain_table[next_pair_idx + 1];
2910 let seed_len = common_length_smaller.min(common_length_larger);
2911 let candidate_idx = candidate_abs - $table.history_abs_start;
2912 let match_len = unsafe { $cmf(concat, idx, candidate_idx, tail_limit, seed_len) };
2915
2916 if match_len > best_len {
2917 let offset = $abs_pos - candidate_abs;
2918 let accepted = $crate::encoding::bt::BtMatcher::push_candidate_ladder(
2919 $out,
2920 $best_len_for_skip,
2921 $crate::encoding::opt::types::MatchCandidate {
2922 start: $abs_pos,
2923 offset,
2924 match_len,
2925 },
2926 $min_match_len,
2927 );
2928 if accepted {
2929 best_len = match_len;
2930 let candidate_end = candidate_abs + match_len;
2938 if candidate_end > match_end_abs {
2939 match_end_abs = candidate_end;
2940 }
2941 if match_len >= tail_limit
2942 || match_len > $crate::encoding::cost_model::HC_OPT_NUM
2943 {
2944 break;
2945 }
2946 }
2947 }
2948
2949 if match_len >= tail_limit {
2950 break;
2951 }
2952
2953 let candidate_next = candidate_idx + match_len;
2954 let current_next = idx + match_len;
2955 if concat[candidate_next] < concat[current_next] {
2956 $table.chain_table[smaller_slot] = match_stored;
2957 common_length_smaller = match_len;
2958 if candidate_abs <= bt_low {
2959 smaller_slot = usize::MAX;
2960 break;
2961 }
2962 smaller_slot = next_pair_idx + 1;
2963 match_stored = next_larger;
2964 } else {
2965 $table.chain_table[larger_slot] = match_stored;
2966 common_length_larger = match_len;
2967 if candidate_abs <= bt_low {
2968 larger_slot = usize::MAX;
2969 break;
2970 }
2971 larger_slot = next_pair_idx;
2972 match_stored = next_smaller;
2973 }
2974 }
2975
2976 if smaller_slot != usize::MAX {
2977 $table.chain_table[smaller_slot] = $crate::encoding::match_table::storage::HC_EMPTY;
2978 }
2979 if larger_slot != usize::MAX {
2980 $table.chain_table[larger_slot] = $crate::encoding::match_table::storage::HC_EMPTY;
2981 }
2982 $table.skip_insert_until_abs = match_end_abs - 8;
2985 }};
2986}
2987pub(crate) use bt_insert_and_collect_matches_body;
2988
2989impl HcMatchGenerator {
2990 fn should_run_btultra2_seed_pass<S: super::strategy::Strategy>(
2991 &self,
2992 current_len: usize,
2993 ) -> bool {
2994 if !(S::OPT_LEVEL == 2 && S::USE_HASH3) {
3001 return false;
3002 }
3003 let HcBackend::Bt(bt) = &self.backend else {
3004 return false;
3005 };
3006 bt.opt_state.lit_length_sum == 0
3007 && bt.opt_state.dictionary_seed.is_none()
3008 && !self.table.dictionary_primed_for_frame
3009 && bt.ldm_sequences.is_empty()
3010 && self.table.window_size == current_len
3011 && self.table.history_abs_start == 0
3012 && self.table.window.len() == 1
3013 && current_len > HC_PREDEF_THRESHOLD
3014 }
3015
3016 fn new(max_window_size: usize) -> Self {
3017 Self {
3018 table: super::match_table::storage::MatchTable::new(max_window_size),
3019 hc: super::hc::HcMatcher::new(2, HC_SEARCH_DEPTH, HC_TARGET_LEN),
3020 backend: HcBackend::Hc,
3023 strategy_tag: super::strategy::StrategyTag::Lazy,
3030 }
3031 }
3032
3033 fn configure(&mut self, config: HcConfig, tag: super::strategy::StrategyTag, window_log: u8) {
3034 use super::strategy::StrategyTag;
3035 self.strategy_tag = tag;
3039 let is_btultra2 = tag == StrategyTag::BtUltra2;
3040 let uses_bt = matches!(
3041 tag,
3042 StrategyTag::BtOpt | StrategyTag::BtUltra | StrategyTag::BtUltra2
3043 );
3044 let next_hash3_log = if is_btultra2 {
3045 HC3_HASH_LOG.min(window_log as usize)
3046 } else {
3047 0
3048 };
3049 let resize = self.table.hash_log != config.hash_log
3050 || self.table.chain_log != config.chain_log
3051 || self.table.hash3_log != next_hash3_log;
3052 self.table.hash_log = config.hash_log;
3053 self.table.chain_log = config.chain_log;
3054 self.table.hash3_log = next_hash3_log;
3055 self.hc.search_depth = if uses_bt {
3056 config.search_depth
3057 } else {
3058 config.search_depth.min(MAX_HC_SEARCH_DEPTH)
3059 };
3060 self.hc.target_len = config.target_len;
3061 self.table.search_depth = self.hc.search_depth;
3065 self.table.is_btultra2 = is_btultra2;
3066 self.table.uses_bt = uses_bt;
3067 match (&self.backend, self.table.uses_bt) {
3071 (HcBackend::Hc, true) => {
3072 self.backend = HcBackend::Bt(alloc::boxed::Box::new(super::bt::BtMatcher::new()));
3073 }
3074 (HcBackend::Bt(_), false) => {
3075 self.backend = HcBackend::Hc;
3076 }
3077 _ => {}
3078 }
3079 if resize && !self.table.hash_table.is_empty() {
3080 self.table.hash_table.clear();
3082 self.table.hash3_table.clear();
3083 self.table.chain_table.clear();
3084 }
3085 }
3086
3087 fn seed_dictionary_entropy(
3088 &mut self,
3089 huff: Option<&crate::huff0::huff0_encoder::HuffmanTable>,
3090 ll: Option<&crate::fse::fse_encoder::FSETable>,
3091 ml: Option<&crate::fse::fse_encoder::FSETable>,
3092 of: Option<&crate::fse::fse_encoder::FSETable>,
3093 ) {
3094 if let HcBackend::Bt(bt) = &mut self.backend {
3095 bt.opt_state.seed_dictionary_entropy(huff, ll, ml, of);
3096 }
3097 }
3098
3099 fn reset(&mut self, reuse_space: impl FnMut(Vec<u8>)) {
3100 self.table.reset(reuse_space);
3101 if let HcBackend::Bt(bt) = &mut self.backend {
3102 bt.reset();
3103 }
3104 }
3105
3106 fn skip_matching(&mut self, incompressible_hint: Option<bool>) {
3109 self.table.skip_matching(incompressible_hint);
3110 }
3111
3112 #[cfg(test)]
3118 fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
3119 use super::strategy::{self, StrategyTag};
3120 match self.strategy_tag {
3126 StrategyTag::Fast | StrategyTag::Dfast | StrategyTag::Greedy | StrategyTag::Lazy => {
3127 self.start_matching_lazy(&mut handle_sequence)
3128 }
3129 StrategyTag::BtOpt => {
3130 self.start_matching_optimal::<strategy::BtOpt>(&mut handle_sequence)
3131 }
3132 StrategyTag::BtUltra => {
3133 self.start_matching_optimal::<strategy::BtUltra>(&mut handle_sequence)
3134 }
3135 StrategyTag::BtUltra2 => {
3136 self.start_matching_optimal::<strategy::BtUltra2>(&mut handle_sequence)
3137 }
3138 }
3139 }
3140
3141 pub(crate) fn start_matching_strategy<S: super::strategy::Strategy>(
3152 &mut self,
3153 handle_sequence: &mut impl for<'a> FnMut(Sequence<'a>),
3154 ) {
3155 debug_assert_eq!(
3156 self.table.uses_bt,
3157 S::USE_BT,
3158 "Strategy::USE_BT disagrees with runtime table.uses_bt at HC dispatch"
3159 );
3160 if S::USE_BT {
3161 self.start_matching_optimal::<S>(handle_sequence)
3162 } else {
3163 self.start_matching_lazy(handle_sequence)
3164 }
3165 }
3166
3167 fn start_matching_lazy(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
3168 self.table.ensure_tables();
3169
3170 let current_len = self.table.window.back().unwrap().len();
3171 if current_len == 0 {
3172 return;
3173 }
3174
3175 let current_abs_start = self.table.history_abs_start + self.table.window_size - current_len;
3176 let current_abs_end = current_abs_start + current_len;
3177 self.table
3178 .backfill_boundary_positions(current_abs_start, current_abs_end);
3179
3180 let mut pos = 0usize;
3181 let mut literals_start = 0usize;
3182 while pos + HC_MIN_MATCH_LEN <= current_len {
3183 let abs_pos = current_abs_start + pos;
3184 let lit_len = pos - literals_start;
3185
3186 let best = self.hc.find_best_match(&self.table, abs_pos, lit_len);
3187 if let Some(candidate) = self.hc.pick_lazy_match(&self.table, abs_pos, lit_len, best) {
3188 self.table
3189 .insert_positions(abs_pos, candidate.start + candidate.match_len);
3190 let current = self.table.window.back().unwrap().as_slice();
3191 let start = candidate.start - current_abs_start;
3192 let literals = ¤t[literals_start..start];
3193 handle_sequence(Sequence::Triple {
3194 literals,
3195 offset: candidate.offset,
3196 match_len: candidate.match_len,
3197 });
3198 let _ = encode_offset_with_history(
3199 candidate.offset as u32,
3200 literals.len() as u32,
3201 &mut self.table.offset_hist,
3202 );
3203 pos = start + candidate.match_len;
3204 literals_start = pos;
3205 } else {
3206 self.table.insert_position(abs_pos);
3207 pos += 1;
3208 }
3209 }
3210
3211 while pos + 4 <= current_len {
3214 self.table.insert_position(current_abs_start + pos);
3215 pos += 1;
3216 }
3217
3218 if literals_start < current_len {
3219 let current = self.table.window.back().unwrap().as_slice();
3220 handle_sequence(Sequence::Literals {
3221 literals: ¤t[literals_start..],
3222 });
3223 }
3224 }
3225
3226 fn start_matching_optimal<S: super::strategy::Strategy>(
3227 &mut self,
3228 mut handle_sequence: impl for<'a> FnMut(Sequence<'a>),
3229 ) {
3230 self.table.ensure_tables();
3231 let current_len = self.table.window.back().unwrap().len();
3232 if current_len == 0 {
3233 return;
3234 }
3235 let current_ptr = self.table.window.back().unwrap().as_ptr();
3236 let current = unsafe { core::slice::from_raw_parts(current_ptr, current_len) };
3240
3241 let current_abs_start = self.table.history_abs_start + self.table.window_size - current_len;
3242 let current_abs_end = current_abs_start + current_len;
3243 self.table
3244 .apply_limited_update_after_long_match(current_abs_start);
3245 let hash3_start_cursor = self
3246 .table
3247 .skip_insert_until_abs
3248 .max(self.table.history_abs_start);
3249 self.table
3250 .backfill_boundary_positions(current_abs_start, current_abs_end);
3251 self.table.next_to_update3 = hash3_start_cursor;
3252 let live_history = self.table.live_history();
3267 let history_abs_start = self.table.history_abs_start;
3268 self.backend.bt_mut().prepare_ldm_candidates(
3269 live_history,
3270 history_abs_start,
3271 current_abs_start,
3272 current_len,
3273 );
3274
3275 if self.should_run_btultra2_seed_pass::<S>(current_len) {
3276 self.run_btultra2_seed_pass(current, current_abs_start, current_len);
3277 }
3278
3279 let profile = HcOptimalCostProfile::const_for_strategy::<S>();
3285 let mut opt_state =
3286 core::mem::replace(&mut self.backend.bt_mut().opt_state, HcOptState::new());
3287 opt_state.rescale_freqs(current, profile);
3288 let mut best_plan = core::mem::take(&mut self.backend.bt_mut().opt_segment_plan_scratch);
3289 best_plan.clear();
3290 let mut plan_reps = self.table.offset_hist;
3291 let (mut cursor, mut plan_litlen) = self
3292 .table
3293 .donor_opt_start_cursor_and_litlen(current_abs_start);
3294 let mut plan_literals_cursor = 0usize;
3295 let match_loop_limit = current_len.saturating_sub(8);
3296 while cursor < match_loop_limit {
3297 let remaining_len = current_len - cursor;
3298 let segment_abs_start = current_abs_start + cursor;
3299 let segment_start = best_plan.len();
3300 let (_, end_reps, end_litlen, consumed_len) = self.build_optimal_plan::<S>(
3301 ¤t[cursor..],
3302 segment_abs_start,
3303 remaining_len,
3304 HcOptimalPlanState {
3305 reps: plan_reps,
3306 litlen: plan_litlen,
3307 profile,
3308 },
3309 &opt_state,
3310 &mut best_plan,
3311 );
3312 BtMatcher::update_plan_stats_segment(
3313 current,
3314 current_len,
3315 &best_plan[segment_start..],
3316 &mut plan_literals_cursor,
3317 &mut plan_reps,
3318 &mut opt_state,
3319 profile.accurate,
3320 );
3321 plan_reps = end_reps;
3322 plan_litlen = end_litlen;
3323 cursor += consumed_len;
3324 }
3325
3326 self.table
3327 .emit_optimal_plan(current_len, &best_plan, &mut handle_sequence);
3328 best_plan.clear();
3329 self.backend.bt_mut().opt_segment_plan_scratch = best_plan;
3330 self.backend.bt_mut().opt_state = opt_state;
3331 }
3332
3333 fn run_btultra2_seed_pass(
3334 &mut self,
3335 current: &[u8],
3336 current_abs_start: usize,
3337 current_len: usize,
3338 ) {
3339 type S = super::strategy::BtUltra2;
3344 let seed_profile = HcOptimalCostProfile::const_for_strategy::<S>();
3345 let mut opt_state =
3346 core::mem::replace(&mut self.backend.bt_mut().opt_state, HcOptState::new());
3347 opt_state.rescale_freqs(current, seed_profile);
3348 let mut seed_reps = self.table.offset_hist;
3349 let (mut cursor, mut seed_litlen) = self
3350 .table
3351 .donor_opt_start_cursor_and_litlen(current_abs_start);
3352 let mut seed_literals_cursor = 0usize;
3353 let mut seed_plan = core::mem::take(&mut self.backend.bt_mut().opt_seed_plan_scratch);
3354 seed_plan.clear();
3355 let match_loop_limit = current_len.saturating_sub(8);
3356 while cursor < match_loop_limit {
3357 let remaining_len = current_len - cursor;
3358 let segment_abs_start = current_abs_start + cursor;
3359 let segment_start = seed_plan.len();
3360 let (_, end_reps, end_litlen, consumed_len) = self.build_optimal_plan::<S>(
3361 ¤t[cursor..],
3362 segment_abs_start,
3363 remaining_len,
3364 HcOptimalPlanState {
3365 reps: seed_reps,
3366 litlen: seed_litlen,
3367 profile: seed_profile,
3368 },
3369 &opt_state,
3370 &mut seed_plan,
3371 );
3372 BtMatcher::update_plan_stats_segment(
3373 current,
3374 current_len,
3375 &seed_plan[segment_start..],
3376 &mut seed_literals_cursor,
3377 &mut seed_reps,
3378 &mut opt_state,
3379 seed_profile.accurate,
3380 );
3381 seed_plan.truncate(segment_start);
3382 seed_reps = end_reps;
3383 seed_litlen = end_litlen;
3384 cursor += consumed_len;
3385 }
3386 seed_plan.clear();
3387 self.backend.bt_mut().opt_seed_plan_scratch = seed_plan;
3388 self.backend.bt_mut().opt_state = opt_state;
3389
3390 self.table.position_base = self.table.history_abs_start;
3393 self.table.index_shift = current_len;
3394 self.table.next_to_update3 = current_abs_start;
3395 self.table.skip_insert_until_abs = current_abs_start;
3396 self.table.allow_zero_relative_position = true;
3402 }
3403
3404 fn build_optimal_plan<S: super::strategy::Strategy>(
3405 &mut self,
3406 current: &[u8],
3407 current_abs_start: usize,
3408 current_len: usize,
3409 initial_state: HcOptimalPlanState,
3410 stats: &HcOptState,
3411 out: &mut Vec<HcOptimalSequence>,
3412 ) -> (u32, [u32; 3], usize, usize) {
3413 debug_assert!(S::USE_BT, "build_optimal_plan called on non-BT strategy");
3414 debug_assert_eq!(initial_state.profile.accurate, S::ACCURATE_PRICE);
3415 debug_assert_eq!(
3416 initial_state.profile.favor_small_offsets,
3417 S::FAVOR_SMALL_OFFSETS
3418 );
3419 let profile = initial_state.profile;
3426 match (profile.accurate, profile.favor_small_offsets) {
3427 (true, false) => self.build_optimal_plan_impl::<S, true, false>(
3428 current,
3429 current_abs_start,
3430 current_len,
3431 initial_state,
3432 stats,
3433 out,
3434 ),
3435 (true, true) => self.build_optimal_plan_impl::<S, true, true>(
3436 current,
3437 current_abs_start,
3438 current_len,
3439 initial_state,
3440 stats,
3441 out,
3442 ),
3443 (false, false) => self.build_optimal_plan_impl::<S, false, false>(
3444 current,
3445 current_abs_start,
3446 current_len,
3447 initial_state,
3448 stats,
3449 out,
3450 ),
3451 (false, true) => self.build_optimal_plan_impl::<S, false, true>(
3452 current,
3453 current_abs_start,
3454 current_len,
3455 initial_state,
3456 stats,
3457 out,
3458 ),
3459 }
3460 }
3461
3462 #[inline(always)]
3471 fn build_optimal_plan_impl<
3472 S: super::strategy::Strategy,
3473 const ACCURATE_PRICE: bool,
3474 const FAVOR_SMALL_OFFSETS: bool,
3475 >(
3476 &mut self,
3477 current: &[u8],
3478 current_abs_start: usize,
3479 current_len: usize,
3480 initial_state: HcOptimalPlanState,
3481 stats: &HcOptState,
3482 out: &mut Vec<HcOptimalSequence>,
3483 ) -> (u32, [u32; 3], usize, usize) {
3484 #[cfg(all(target_arch = "aarch64", target_endian = "little"))]
3485 unsafe {
3486 self.build_optimal_plan_impl_neon::<S, ACCURATE_PRICE, FAVOR_SMALL_OFFSETS>(
3487 current,
3488 current_abs_start,
3489 current_len,
3490 initial_state,
3491 stats,
3492 out,
3493 )
3494 }
3495 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
3496 {
3497 use crate::encoding::fastpath::{FastpathKernel, select_kernel};
3498 match select_kernel() {
3499 FastpathKernel::Avx2Bmi2 => unsafe {
3500 self.build_optimal_plan_impl_avx2_bmi2::<S, ACCURATE_PRICE, FAVOR_SMALL_OFFSETS>(
3501 current,
3502 current_abs_start,
3503 current_len,
3504 initial_state,
3505 stats,
3506 out,
3507 )
3508 },
3509 FastpathKernel::Sse42 => unsafe {
3510 self.build_optimal_plan_impl_sse42::<S, ACCURATE_PRICE, FAVOR_SMALL_OFFSETS>(
3511 current,
3512 current_abs_start,
3513 current_len,
3514 initial_state,
3515 stats,
3516 out,
3517 )
3518 },
3519 FastpathKernel::Scalar => self
3520 .build_optimal_plan_impl_scalar::<S, ACCURATE_PRICE, FAVOR_SMALL_OFFSETS>(
3521 current,
3522 current_abs_start,
3523 current_len,
3524 initial_state,
3525 stats,
3526 out,
3527 ),
3528 }
3529 }
3530 #[cfg(not(any(
3531 all(target_arch = "aarch64", target_endian = "little"),
3532 target_arch = "x86",
3533 target_arch = "x86_64"
3534 )))]
3535 {
3536 self.build_optimal_plan_impl_scalar::<S, ACCURATE_PRICE, FAVOR_SMALL_OFFSETS>(
3537 current,
3538 current_abs_start,
3539 current_len,
3540 initial_state,
3541 stats,
3542 out,
3543 )
3544 }
3545 }
3546
3547 #[cfg(all(target_arch = "aarch64", target_endian = "little"))]
3551 #[target_feature(enable = "neon")]
3552 unsafe fn build_optimal_plan_impl_neon<
3553 S: super::strategy::Strategy,
3554 const ACCURATE_PRICE: bool,
3555 const FAVOR_SMALL_OFFSETS: bool,
3556 >(
3557 &mut self,
3558 current: &[u8],
3559 current_abs_start: usize,
3560 current_len: usize,
3561 initial_state: HcOptimalPlanState,
3562 stats: &HcOptState,
3563 out: &mut Vec<HcOptimalSequence>,
3564 ) -> (u32, [u32; 3], usize, usize) {
3565 build_optimal_plan_impl_body!(
3566 self,
3567 S,
3568 current,
3569 current_abs_start,
3570 current_len,
3571 initial_state,
3572 stats,
3573 out,
3574 collect_optimal_candidates_initialized_neon,
3575 )
3576 }
3577
3578 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
3579 #[target_feature(enable = "sse4.2")]
3580 unsafe fn build_optimal_plan_impl_sse42<
3581 S: super::strategy::Strategy,
3582 const ACCURATE_PRICE: bool,
3583 const FAVOR_SMALL_OFFSETS: bool,
3584 >(
3585 &mut self,
3586 current: &[u8],
3587 current_abs_start: usize,
3588 current_len: usize,
3589 initial_state: HcOptimalPlanState,
3590 stats: &HcOptState,
3591 out: &mut Vec<HcOptimalSequence>,
3592 ) -> (u32, [u32; 3], usize, usize) {
3593 build_optimal_plan_impl_body!(
3594 self,
3595 S,
3596 current,
3597 current_abs_start,
3598 current_len,
3599 initial_state,
3600 stats,
3601 out,
3602 collect_optimal_candidates_initialized_sse42,
3603 )
3604 }
3605
3606 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
3607 #[target_feature(enable = "avx2,bmi2")]
3608 unsafe fn build_optimal_plan_impl_avx2_bmi2<
3609 S: super::strategy::Strategy,
3610 const ACCURATE_PRICE: bool,
3611 const FAVOR_SMALL_OFFSETS: bool,
3612 >(
3613 &mut self,
3614 current: &[u8],
3615 current_abs_start: usize,
3616 current_len: usize,
3617 initial_state: HcOptimalPlanState,
3618 stats: &HcOptState,
3619 out: &mut Vec<HcOptimalSequence>,
3620 ) -> (u32, [u32; 3], usize, usize) {
3621 build_optimal_plan_impl_body!(
3622 self,
3623 S,
3624 current,
3625 current_abs_start,
3626 current_len,
3627 initial_state,
3628 stats,
3629 out,
3630 collect_optimal_candidates_initialized_avx2_bmi2,
3631 )
3632 }
3633
3634 #[cfg(not(all(target_arch = "aarch64", target_endian = "little")))]
3635 #[allow(unused_unsafe)]
3639 fn build_optimal_plan_impl_scalar<
3640 S: super::strategy::Strategy,
3641 const ACCURATE_PRICE: bool,
3642 const FAVOR_SMALL_OFFSETS: bool,
3643 >(
3644 &mut self,
3645 current: &[u8],
3646 current_abs_start: usize,
3647 current_len: usize,
3648 initial_state: HcOptimalPlanState,
3649 stats: &HcOptState,
3650 out: &mut Vec<HcOptimalSequence>,
3651 ) -> (u32, [u32; 3], usize, usize) {
3652 build_optimal_plan_impl_body!(
3653 self,
3654 S,
3655 current,
3656 current_abs_start,
3657 current_len,
3658 initial_state,
3659 stats,
3660 out,
3661 collect_optimal_candidates_initialized_scalar,
3662 )
3663 }
3664
3665 #[cfg(test)]
3666 fn collect_optimal_candidates(
3667 &mut self,
3668 abs_pos: usize,
3669 current_abs_end: usize,
3670 profile: HcOptimalCostProfile,
3671 query: HcCandidateQuery,
3672 out: &mut Vec<MatchCandidate>,
3673 ) {
3674 use super::strategy::{self, StrategyTag};
3675 self.table.ensure_tables();
3676 match self.strategy_tag {
3682 StrategyTag::BtUltra2 => self
3683 .collect_optimal_candidates_initialized::<strategy::BtUltra2, true>(
3684 abs_pos,
3685 current_abs_end,
3686 profile,
3687 query,
3688 out,
3689 ),
3690 StrategyTag::BtUltra => self
3691 .collect_optimal_candidates_initialized::<strategy::BtUltra, true>(
3692 abs_pos,
3693 current_abs_end,
3694 profile,
3695 query,
3696 out,
3697 ),
3698 StrategyTag::BtOpt => self
3699 .collect_optimal_candidates_initialized::<strategy::BtOpt, true>(
3700 abs_pos,
3701 current_abs_end,
3702 profile,
3703 query,
3704 out,
3705 ),
3706 StrategyTag::Fast | StrategyTag::Dfast | StrategyTag::Greedy | StrategyTag::Lazy => {
3707 self.collect_optimal_candidates_initialized::<strategy::Lazy, false>(
3708 abs_pos,
3709 current_abs_end,
3710 profile,
3711 query,
3712 out,
3713 )
3714 }
3715 }
3716 }
3717
3718 #[allow(dead_code)]
3728 #[inline(always)]
3729 fn collect_optimal_candidates_initialized<
3730 S: super::strategy::Strategy,
3731 const USE_BT_MATCHFINDER: bool,
3732 >(
3733 &mut self,
3734 abs_pos: usize,
3735 current_abs_end: usize,
3736 profile: HcOptimalCostProfile,
3737 query: HcCandidateQuery,
3738 out: &mut Vec<MatchCandidate>,
3739 ) {
3740 #[cfg(all(target_arch = "aarch64", target_endian = "little"))]
3741 unsafe {
3742 self.collect_optimal_candidates_initialized_neon::<S, USE_BT_MATCHFINDER>(
3743 abs_pos,
3744 current_abs_end,
3745 profile,
3746 query,
3747 out,
3748 )
3749 }
3750 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
3751 {
3752 use crate::encoding::fastpath::{FastpathKernel, select_kernel};
3753 match select_kernel() {
3754 FastpathKernel::Avx2Bmi2 => unsafe {
3755 self.collect_optimal_candidates_initialized_avx2_bmi2::<S, USE_BT_MATCHFINDER>(
3756 abs_pos,
3757 current_abs_end,
3758 profile,
3759 query,
3760 out,
3761 )
3762 },
3763 FastpathKernel::Sse42 => unsafe {
3764 self.collect_optimal_candidates_initialized_sse42::<S, USE_BT_MATCHFINDER>(
3765 abs_pos,
3766 current_abs_end,
3767 profile,
3768 query,
3769 out,
3770 )
3771 },
3772 FastpathKernel::Scalar => self
3773 .collect_optimal_candidates_initialized_scalar::<S, USE_BT_MATCHFINDER>(
3774 abs_pos,
3775 current_abs_end,
3776 profile,
3777 query,
3778 out,
3779 ),
3780 }
3781 }
3782 #[cfg(not(any(
3783 all(target_arch = "aarch64", target_endian = "little"),
3784 target_arch = "x86",
3785 target_arch = "x86_64"
3786 )))]
3787 {
3788 self.collect_optimal_candidates_initialized_scalar::<S, USE_BT_MATCHFINDER>(
3789 abs_pos,
3790 current_abs_end,
3791 profile,
3792 query,
3793 out,
3794 )
3795 }
3796 }
3797
3798 #[cfg(all(target_arch = "aarch64", target_endian = "little"))]
3804 #[target_feature(enable = "neon")]
3805 unsafe fn collect_optimal_candidates_initialized_neon<
3806 S: super::strategy::Strategy,
3807 const USE_BT_MATCHFINDER: bool,
3808 >(
3809 &mut self,
3810 abs_pos: usize,
3811 current_abs_end: usize,
3812 profile: HcOptimalCostProfile,
3813 query: HcCandidateQuery,
3814 out: &mut Vec<MatchCandidate>,
3815 ) {
3816 collect_optimal_candidates_initialized_body!(
3817 self,
3818 S,
3819 abs_pos,
3820 current_abs_end,
3821 profile,
3822 query,
3823 out,
3824 USE_BT_MATCHFINDER,
3825 bt_update_tree_until_neon,
3826 bt_insert_and_collect_matches_neon,
3827 for_each_repcode_candidate_with_reps_neon,
3828 hash3_candidate_neon,
3829 crate::encoding::fastpath::neon::common_prefix_len_ptr,
3830 )
3831 }
3832
3833 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
3834 #[target_feature(enable = "sse4.2")]
3835 unsafe fn collect_optimal_candidates_initialized_sse42<
3836 S: super::strategy::Strategy,
3837 const USE_BT_MATCHFINDER: bool,
3838 >(
3839 &mut self,
3840 abs_pos: usize,
3841 current_abs_end: usize,
3842 profile: HcOptimalCostProfile,
3843 query: HcCandidateQuery,
3844 out: &mut Vec<MatchCandidate>,
3845 ) {
3846 collect_optimal_candidates_initialized_body!(
3847 self,
3848 S,
3849 abs_pos,
3850 current_abs_end,
3851 profile,
3852 query,
3853 out,
3854 USE_BT_MATCHFINDER,
3855 bt_update_tree_until_sse42,
3856 bt_insert_and_collect_matches_sse42,
3857 for_each_repcode_candidate_with_reps_sse42,
3858 hash3_candidate_sse42,
3859 crate::encoding::fastpath::sse42::common_prefix_len_ptr,
3860 )
3861 }
3862
3863 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
3864 #[target_feature(enable = "avx2,bmi2")]
3865 unsafe fn collect_optimal_candidates_initialized_avx2_bmi2<
3866 S: super::strategy::Strategy,
3867 const USE_BT_MATCHFINDER: bool,
3868 >(
3869 &mut self,
3870 abs_pos: usize,
3871 current_abs_end: usize,
3872 profile: HcOptimalCostProfile,
3873 query: HcCandidateQuery,
3874 out: &mut Vec<MatchCandidate>,
3875 ) {
3876 collect_optimal_candidates_initialized_body!(
3877 self,
3878 S,
3879 abs_pos,
3880 current_abs_end,
3881 profile,
3882 query,
3883 out,
3884 USE_BT_MATCHFINDER,
3885 bt_update_tree_until_avx2_bmi2,
3886 bt_insert_and_collect_matches_avx2_bmi2,
3887 for_each_repcode_candidate_with_reps_avx2_bmi2,
3888 hash3_candidate_avx2_bmi2,
3889 crate::encoding::fastpath::avx2_bmi2::common_prefix_len_ptr,
3890 )
3891 }
3892
3893 #[cfg(not(all(target_arch = "aarch64", target_endian = "little")))]
3894 #[allow(unused_unsafe)]
3897 fn collect_optimal_candidates_initialized_scalar<
3898 S: super::strategy::Strategy,
3899 const USE_BT_MATCHFINDER: bool,
3900 >(
3901 &mut self,
3902 abs_pos: usize,
3903 current_abs_end: usize,
3904 profile: HcOptimalCostProfile,
3905 query: HcCandidateQuery,
3906 out: &mut Vec<MatchCandidate>,
3907 ) {
3908 collect_optimal_candidates_initialized_body!(
3909 self,
3910 S,
3911 abs_pos,
3912 current_abs_end,
3913 profile,
3914 query,
3915 out,
3916 USE_BT_MATCHFINDER,
3917 bt_update_tree_until_scalar,
3918 bt_insert_and_collect_matches_scalar,
3919 for_each_repcode_candidate_with_reps_scalar,
3920 hash3_candidate_scalar,
3921 crate::encoding::fastpath::scalar::common_prefix_len_ptr,
3922 )
3923 }
3924}
3925
3926#[cfg(any())] #[test]
3928fn matches() {
3929 let mut matcher = MatchGenerator::new(1000);
3930 let mut original_data = Vec::new();
3931 let mut reconstructed = Vec::new();
3932
3933 let replay_sequence = |seq: Sequence<'_>, reconstructed: &mut Vec<u8>| match seq {
3934 Sequence::Literals { literals } => {
3935 assert!(!literals.is_empty());
3936 reconstructed.extend_from_slice(literals);
3937 }
3938 Sequence::Triple {
3939 literals,
3940 offset,
3941 match_len,
3942 } => {
3943 assert!(offset > 0);
3944 assert!(match_len >= MIN_MATCH_LEN);
3945 reconstructed.extend_from_slice(literals);
3946 assert!(offset <= reconstructed.len());
3947 let start = reconstructed.len() - offset;
3948 for i in 0..match_len {
3949 let byte = reconstructed[start + i];
3950 reconstructed.push(byte);
3951 }
3952 }
3953 };
3954
3955 matcher.add_data(
3956 alloc::vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
3957 SuffixStore::with_capacity(100),
3958 |_, _| {},
3959 );
3960 original_data.extend_from_slice(&[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
3961
3962 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
3963
3964 assert!(!matcher.next_sequence(|_| {}));
3965
3966 matcher.add_data(
3967 alloc::vec![
3968 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 0, 0, 0, 0, 0,
3969 ],
3970 SuffixStore::with_capacity(100),
3971 |_, _| {},
3972 );
3973 original_data.extend_from_slice(&[
3974 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 0, 0, 0, 0, 0,
3975 ]);
3976
3977 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
3978 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
3979 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
3980 assert!(!matcher.next_sequence(|_| {}));
3981
3982 matcher.add_data(
3983 alloc::vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 0, 0, 0, 0],
3984 SuffixStore::with_capacity(100),
3985 |_, _| {},
3986 );
3987 original_data.extend_from_slice(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 0, 0, 0, 0]);
3988
3989 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
3990 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
3991 assert!(!matcher.next_sequence(|_| {}));
3992
3993 matcher.add_data(
3994 alloc::vec![0, 0, 0, 0, 0],
3995 SuffixStore::with_capacity(100),
3996 |_, _| {},
3997 );
3998 original_data.extend_from_slice(&[0, 0, 0, 0, 0]);
3999
4000 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
4001 assert!(!matcher.next_sequence(|_| {}));
4002
4003 matcher.add_data(
4004 alloc::vec![7, 8, 9, 10, 11],
4005 SuffixStore::with_capacity(100),
4006 |_, _| {},
4007 );
4008 original_data.extend_from_slice(&[7, 8, 9, 10, 11]);
4009
4010 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
4011 assert!(!matcher.next_sequence(|_| {}));
4012
4013 matcher.add_data(
4014 alloc::vec![1, 3, 5, 7, 9],
4015 SuffixStore::with_capacity(100),
4016 |_, _| {},
4017 );
4018 matcher.skip_matching();
4019 original_data.extend_from_slice(&[1, 3, 5, 7, 9]);
4020 reconstructed.extend_from_slice(&[1, 3, 5, 7, 9]);
4021 assert!(!matcher.next_sequence(|_| {}));
4022
4023 matcher.add_data(
4024 alloc::vec![1, 3, 5, 7, 9],
4025 SuffixStore::with_capacity(100),
4026 |_, _| {},
4027 );
4028 original_data.extend_from_slice(&[1, 3, 5, 7, 9]);
4029
4030 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
4031 assert!(!matcher.next_sequence(|_| {}));
4032
4033 matcher.add_data(
4034 alloc::vec![0, 0, 11, 13, 15, 17, 20, 11, 13, 15, 17, 20, 21, 23],
4035 SuffixStore::with_capacity(100),
4036 |_, _| {},
4037 );
4038 original_data.extend_from_slice(&[0, 0, 11, 13, 15, 17, 20, 11, 13, 15, 17, 20, 21, 23]);
4039
4040 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
4041 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
4042 assert!(!matcher.next_sequence(|_| {}));
4043
4044 assert_eq!(reconstructed, original_data);
4045}
4046
4047#[test]
4048fn dfast_matches_roundtrip_multi_block_pattern() {
4049 let pattern = [9, 21, 44, 184, 19, 96, 171, 109, 141, 251];
4050 let first_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
4051 let second_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
4052
4053 let mut matcher = DfastMatchGenerator::new(1 << 22);
4054 let replay_sequence = |decoded: &mut Vec<u8>, seq: Sequence<'_>| match seq {
4055 Sequence::Literals { literals } => decoded.extend_from_slice(literals),
4056 Sequence::Triple {
4057 literals,
4058 offset,
4059 match_len,
4060 } => {
4061 decoded.extend_from_slice(literals);
4062 let start = decoded.len() - offset;
4063 for i in 0..match_len {
4064 let byte = decoded[start + i];
4065 decoded.push(byte);
4066 }
4067 }
4068 };
4069
4070 matcher.add_data(first_block.clone(), |_| {});
4071 let mut history = Vec::new();
4072 matcher.start_matching(|seq| replay_sequence(&mut history, seq));
4073 assert_eq!(history, first_block);
4074
4075 matcher.add_data(second_block.clone(), |_| {});
4076 let prefix_len = history.len();
4077 matcher.start_matching(|seq| replay_sequence(&mut history, seq));
4078
4079 assert_eq!(&history[prefix_len..], second_block.as_slice());
4080}
4081
4082#[test]
4099fn dfast_accepts_exact_five_byte_match() {
4100 let mut data = Vec::new();
4109 data.extend_from_slice(b"ABCDE"); data.extend_from_slice(b"!!!!!!!!!!!!!!!!!!!!!!!"); data.extend_from_slice(b"ABCDE"); data.push(b'F'); assert_eq!(data.len(), 34);
4114
4115 let mut matcher = DfastMatchGenerator::new(1 << 22);
4116 matcher.add_data(data.clone(), |_| {});
4117
4118 let mut saw_five_byte_match = false;
4119 let mut saw_longer_match = false;
4120 matcher.start_matching(|seq| {
4121 if let Sequence::Triple {
4122 offset, match_len, ..
4123 } = seq
4124 {
4125 if offset == 28 && match_len == 5 {
4126 saw_five_byte_match = true;
4127 } else if offset == 28 && match_len > 5 {
4128 saw_longer_match = true;
4129 }
4130 }
4131 });
4132
4133 assert!(
4134 saw_five_byte_match,
4135 "dfast must accept the exact-5-byte match — a 6-byte floor would skip it"
4136 );
4137 assert!(
4138 !saw_longer_match,
4139 "fixture pinned to length 5 — byte 33 ('F') must terminate the extension"
4140 );
4141}
4142
4143#[test]
4144fn driver_switches_backends_and_initializes_dfast_via_reset() {
4145 let mut driver = MatchGeneratorDriver::new(32, 2);
4146
4147 driver.reset(CompressionLevel::Default);
4148 assert_eq!(driver.active_backend(), super::strategy::BackendTag::Dfast);
4149 assert_eq!(driver.window_size(), (1u64 << 22));
4150
4151 let mut first = driver.get_next_space();
4152 first[..12].copy_from_slice(b"abcabcabcabc");
4153 first.truncate(12);
4154 driver.commit_space(first);
4155 assert_eq!(driver.get_last_space(), b"abcabcabcabc");
4156 driver.skip_matching_with_hint(None);
4157
4158 let mut second = driver.get_next_space();
4159 second[..12].copy_from_slice(b"abcabcabcabc");
4160 second.truncate(12);
4161 driver.commit_space(second);
4162
4163 let mut reconstructed = b"abcabcabcabc".to_vec();
4164 driver.start_matching(|seq| match seq {
4165 Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
4166 Sequence::Triple {
4167 literals,
4168 offset,
4169 match_len,
4170 } => {
4171 reconstructed.extend_from_slice(literals);
4172 let start = reconstructed.len() - offset;
4173 for i in 0..match_len {
4174 let byte = reconstructed[start + i];
4175 reconstructed.push(byte);
4176 }
4177 }
4178 });
4179 assert_eq!(reconstructed, b"abcabcabcabcabcabcabcabc");
4180
4181 driver.reset(CompressionLevel::Fastest);
4182 assert_eq!(driver.window_size(), (1u64 << 19));
4183}
4184
4185#[test]
4186fn driver_level4_selects_row_backend() {
4187 let mut driver = MatchGeneratorDriver::new(32, 2);
4188 driver.reset(CompressionLevel::Level(4));
4189 assert_eq!(driver.active_backend(), super::strategy::BackendTag::Row);
4190 assert_eq!(
4201 driver.row_matcher().lazy_depth,
4202 0,
4203 "L4 must route to start_matching_greedy (lazy_depth == 0)",
4204 );
4205}
4206
4207#[test]
4218fn driver_level4_greedy_round_trip_single_slice() {
4219 let mut driver = MatchGeneratorDriver::new(64, 2);
4220 driver.reset(CompressionLevel::Level(4));
4221 let input = b"abcdefgh_abcdefgh_abcdefgh_abcdefgh";
4222 let mut space = driver.get_next_space();
4223 space[..input.len()].copy_from_slice(input);
4224 space.truncate(input.len());
4225 driver.commit_space(space);
4226
4227 let mut reconstructed: Vec<u8> = Vec::new();
4228 let mut saw_triple = false;
4229 driver.start_matching(|seq| match seq {
4230 Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
4231 Sequence::Triple {
4232 literals,
4233 offset,
4234 match_len,
4235 } => {
4236 saw_triple = true;
4237 reconstructed.extend_from_slice(literals);
4238 let start = reconstructed.len() - offset;
4239 for i in 0..match_len {
4240 let byte = reconstructed[start + i];
4241 reconstructed.push(byte);
4242 }
4243 }
4244 });
4245 assert_eq!(
4246 reconstructed,
4247 input.to_vec(),
4248 "L4 greedy parse failed to reconstruct repeating-pattern input",
4249 );
4250 assert!(
4251 saw_triple,
4252 "L4 greedy parse on a repeating pattern must emit at least one match (Triple)",
4253 );
4254}
4255
4256#[test]
4257fn driver_level4_greedy_round_trip_cross_slice() {
4258 let mut driver = MatchGeneratorDriver::new(32, 4);
4263 driver.reset(CompressionLevel::Level(4));
4264 let chunk = b"the quick brown fox jumps over!!";
4265 assert_eq!(chunk.len(), 32);
4266
4267 let mut first = driver.get_next_space();
4268 first[..chunk.len()].copy_from_slice(chunk);
4269 first.truncate(chunk.len());
4270 driver.commit_space(first);
4271
4272 let mut first_recon: Vec<u8> = Vec::new();
4273 driver.start_matching(|seq| match seq {
4274 Sequence::Literals { literals } => first_recon.extend_from_slice(literals),
4275 Sequence::Triple {
4276 literals,
4277 offset,
4278 match_len,
4279 } => {
4280 first_recon.extend_from_slice(literals);
4281 let start = first_recon.len() - offset;
4282 for i in 0..match_len {
4283 let byte = first_recon[start + i];
4284 first_recon.push(byte);
4285 }
4286 }
4287 });
4288 assert_eq!(
4289 first_recon,
4290 chunk.to_vec(),
4291 "first slice failed to round-trip"
4292 );
4293
4294 let mut second = driver.get_next_space();
4295 second[..chunk.len()].copy_from_slice(chunk);
4296 second.truncate(chunk.len());
4297 driver.commit_space(second);
4298
4299 let mut full = first_recon.clone();
4300 let mut saw_cross_slice_match = false;
4301 driver.start_matching(|seq| match seq {
4302 Sequence::Literals { literals } => full.extend_from_slice(literals),
4303 Sequence::Triple {
4304 literals,
4305 offset,
4306 match_len,
4307 } => {
4308 if offset >= chunk.len() {
4312 saw_cross_slice_match = true;
4313 }
4314 full.extend_from_slice(literals);
4315 let start = full.len() - offset;
4316 for i in 0..match_len {
4317 let byte = full[start + i];
4318 full.push(byte);
4319 }
4320 }
4321 });
4322 let mut expected = chunk.to_vec();
4323 expected.extend_from_slice(chunk);
4324 assert_eq!(
4325 full, expected,
4326 "cross-slice L4 greedy parse failed to reconstruct"
4327 );
4328 assert!(
4329 saw_cross_slice_match,
4330 "L4 greedy parse must match across slice boundaries (history is shared)",
4331 );
4332}
4333
4334#[cfg(test)]
4338fn l4_greedy_round_trip(slice_size: usize, max_slices: usize, data: &[u8]) -> (usize, usize) {
4339 let mut driver = MatchGeneratorDriver::new(slice_size, max_slices);
4340 driver.reset(CompressionLevel::Level(4));
4341
4342 let mut reconstructed: Vec<u8> = Vec::with_capacity(data.len());
4343 let mut triple_count = 0usize;
4344 let mut max_offset = 0usize;
4345
4346 let mut offset_in_data = 0usize;
4351 while offset_in_data < data.len() {
4352 let mut space = driver.get_next_space();
4353 let space_cap = space.len();
4354 let take = (data.len() - offset_in_data).min(space_cap);
4355 space[..take].copy_from_slice(&data[offset_in_data..offset_in_data + take]);
4356 space.truncate(take);
4357 driver.commit_space(space);
4358 offset_in_data += take;
4359
4360 driver.start_matching(|seq| match seq {
4361 Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
4362 Sequence::Triple {
4363 literals,
4364 offset,
4365 match_len,
4366 } => {
4367 triple_count += 1;
4368 if offset > max_offset {
4369 max_offset = offset;
4370 }
4371 reconstructed.extend_from_slice(literals);
4372 let start = reconstructed.len() - offset;
4373 for i in 0..match_len {
4374 let byte = reconstructed[start + i];
4375 reconstructed.push(byte);
4376 }
4377 }
4378 });
4379 }
4380
4381 if data.is_empty() {
4385 let mut space = driver.get_next_space();
4386 space.truncate(0);
4387 driver.commit_space(space);
4388 driver.start_matching(|seq| match seq {
4389 Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
4390 Sequence::Triple { .. } => panic!("empty input must not emit any matches"),
4391 });
4392 }
4393
4394 assert_eq!(reconstructed, data, "L4 greedy round-trip diverged");
4395 (triple_count, max_offset)
4396}
4397
4398#[test]
4409fn driver_level4_greedy_tail_rep_only_reachable() {
4410 let first: &[u8] = b"ABCDABCDABCDABCD"; let second: &[u8] = b"ABCDA"; let mut driver = MatchGeneratorDriver::new(16, 2);
4425 driver.reset(CompressionLevel::Level(4));
4426
4427 let mut first_space = driver.get_next_space();
4428 first_space[..first.len()].copy_from_slice(first);
4429 first_space.truncate(first.len());
4430 driver.commit_space(first_space);
4431 driver.start_matching(|_| {});
4432
4433 let mut second_space = driver.get_next_space();
4434 second_space[..second.len()].copy_from_slice(second);
4435 second_space.truncate(second.len());
4436 driver.commit_space(second_space);
4437
4438 let mut second_slice_triples = 0usize;
4439 driver.start_matching(|seq| {
4440 if matches!(seq, Sequence::Triple { .. }) {
4441 second_slice_triples += 1;
4442 }
4443 });
4444
4445 assert!(
4446 second_slice_triples >= 1,
4447 "tail rep-only position must produce a match in the second slice \
4448 (got {second_slice_triples} triples)",
4449 );
4450}
4451
4452#[test]
4453fn driver_level4_greedy_empty_input_emits_nothing() {
4454 let mut driver = MatchGeneratorDriver::new(64, 2);
4458 driver.reset(CompressionLevel::Level(4));
4459 let mut space = driver.get_next_space();
4464 space.truncate(0);
4465 driver.commit_space(space);
4466 let mut emitted_anything = false;
4467 driver.start_matching(|_| emitted_anything = true);
4468 assert!(!emitted_anything, "empty slice must not emit any sequences",);
4469}
4470
4471#[test]
4472fn driver_level4_greedy_sub_min_lookahead_input() {
4473 let data: &[u8] = b"abcd"; let (triples, _) = l4_greedy_round_trip(64, 2, data);
4478 assert_eq!(
4479 triples, 0,
4480 "sub-min-lookahead input must not emit any matches (got {triples})",
4481 );
4482}
4483
4484#[test]
4485fn driver_level4_greedy_incompressible_input() {
4486 let mut data = alloc::vec::Vec::with_capacity(256);
4491 let mut x: u32 = 0xDEAD_BEEF;
4492 for _ in 0..256 {
4493 x = x.wrapping_mul(1_103_515_245).wrapping_add(12345);
4494 data.push((x >> 16) as u8);
4495 }
4496 let (_triples, _) = l4_greedy_round_trip(64, 8, &data);
4497 }
4500
4501#[test]
4502fn driver_level4_greedy_long_literal_run_skip_step_growth() {
4503 let mut data = alloc::vec::Vec::with_capacity(2048);
4518 let mut x: u32 = 0xC0FF_EE00;
4519 for _ in 0..2048 {
4520 x = x.wrapping_mul(0x9E37_79B9).wrapping_add(0xCAFEBABE);
4521 data.push((x >> 24) as u8);
4522 }
4523 let (_triples, _) = l4_greedy_round_trip(512, 8, &data);
4524}
4525
4526#[test]
4527fn driver_level4_greedy_all_zeros_heavy_rep1() {
4528 let data: Vec<u8> = alloc::vec![0u8; 128];
4533 let (triples, max_offset) = l4_greedy_round_trip(64, 8, &data);
4534 assert!(
4535 triples >= 1,
4536 "all-zeros input must produce at least one rep1 match",
4537 );
4538 assert_eq!(
4542 max_offset, 1,
4543 "all-zeros L4 greedy parse should commit at offset 1 (got {max_offset})",
4544 );
4545}
4546
4547#[test]
4553fn driver_level4_greedy_periodic_pattern_rep_cascade() {
4554 let unit: &[u8] = b"alpha_beta_gamma";
4555 assert_eq!(unit.len(), 16);
4556 let mut data: Vec<u8> = Vec::with_capacity(unit.len() * 32);
4557 for _ in 0..32 {
4558 data.extend_from_slice(unit);
4559 }
4560 let (triples, max_offset) = l4_greedy_round_trip(64, 16, &data);
4561 assert!(
4562 triples >= 1,
4563 "periodic 16-byte payload must emit matches (got {triples})",
4564 );
4565 assert!(
4566 max_offset >= 16,
4567 "periodic 16-byte payload must produce at least one offset >= 16 \
4568 (got max_offset = {max_offset})",
4569 );
4570}
4571
4572#[test]
4573fn driver_reset_keeps_strategy_tag_in_sync_with_active_backend() {
4574 use super::strategy::StrategyTag;
4575
4576 fn check(level: CompressionLevel, expected: StrategyTag) {
4577 let mut driver = MatchGeneratorDriver::new(32, 2);
4578 driver.reset(level);
4579 assert_eq!(
4580 driver.strategy_tag, expected,
4581 "strategy_tag wrong for {level:?}"
4582 );
4583 assert_eq!(
4584 driver.strategy_tag.backend(),
4585 driver.active_backend(),
4586 "strategy_tag backend disagrees with active_backend for {level:?}"
4587 );
4588 }
4589
4590 check(CompressionLevel::Level(1), StrategyTag::Fast);
4591 check(CompressionLevel::Level(2), StrategyTag::Dfast);
4592 check(CompressionLevel::Level(3), StrategyTag::Dfast);
4593 check(CompressionLevel::Level(4), StrategyTag::Greedy);
4594 check(CompressionLevel::Level(7), StrategyTag::Lazy);
4595 check(CompressionLevel::Level(15), StrategyTag::Lazy);
4596 check(CompressionLevel::Level(16), StrategyTag::BtOpt);
4597 check(CompressionLevel::Level(18), StrategyTag::BtUltra);
4598 check(CompressionLevel::Level(22), StrategyTag::BtUltra2);
4599 check(CompressionLevel::Fastest, StrategyTag::Fast);
4600 check(CompressionLevel::Default, StrategyTag::Dfast);
4601 check(CompressionLevel::Better, StrategyTag::Lazy);
4602 check(CompressionLevel::Best, StrategyTag::Lazy);
4603}
4604
4605#[test]
4606fn level_16_17_map_to_btopt_strategy() {
4607 use super::strategy::{BackendTag, StrategyTag};
4608 let p16 = resolve_level_params(CompressionLevel::Level(16), None);
4609 let p17 = resolve_level_params(CompressionLevel::Level(17), None);
4610 assert_eq!(p16.backend(), BackendTag::HashChain);
4611 assert_eq!(p17.backend(), BackendTag::HashChain);
4612 assert_eq!(StrategyTag::for_level(16), StrategyTag::BtOpt);
4613 assert_eq!(StrategyTag::for_level(17), StrategyTag::BtOpt);
4614}
4615
4616#[test]
4617fn level_18_19_map_to_btultra_strategy() {
4618 use super::strategy::{BackendTag, StrategyTag};
4619 let p18 = resolve_level_params(CompressionLevel::Level(18), None);
4620 let p19 = resolve_level_params(CompressionLevel::Level(19), None);
4621 assert_eq!(p18.backend(), BackendTag::HashChain);
4622 assert_eq!(p19.backend(), BackendTag::HashChain);
4623 assert_eq!(StrategyTag::for_level(18), StrategyTag::BtUltra);
4624 assert_eq!(StrategyTag::for_level(19), StrategyTag::BtUltra);
4625}
4626
4627#[test]
4628fn level_20_22_map_to_btultra2_strategy() {
4629 use super::strategy::{BackendTag, StrategyTag};
4630 for level in 20..=22 {
4631 let params = resolve_level_params(CompressionLevel::Level(level), None);
4632 assert_eq!(params.backend(), BackendTag::HashChain);
4633 assert_eq!(StrategyTag::for_level(level as u8), StrategyTag::BtUltra2);
4634 }
4635}
4636
4637#[test]
4638fn level22_uses_donor_target_length_and_large_input_tables() {
4639 let params = resolve_level_params(CompressionLevel::Level(22), None);
4640 assert_eq!(params.window_log, 27);
4641 assert_eq!(params.hc.hash_log, 25);
4642 assert_eq!(params.hc.chain_log, 27);
4643 assert_eq!(params.hc.search_depth, 1 << 9);
4644 assert_eq!(params.hc.target_len, 999);
4645}
4646
4647#[test]
4648fn level22_source_size_hint_uses_donor_btultra2_tiers() {
4649 let p16k = resolve_level_params(CompressionLevel::Level(22), Some(16 * 1024));
4650 assert_eq!(p16k.window_log, 14);
4651 assert_eq!(p16k.hc.hash_log, 15);
4652 assert_eq!(p16k.hc.chain_log, 15);
4653 assert_eq!(p16k.hc.search_depth, 1 << 10);
4654 assert_eq!(p16k.hc.target_len, 999);
4655
4656 let p128k = resolve_level_params(CompressionLevel::Level(22), Some(128 * 1024));
4657 assert_eq!(p128k.window_log, 17);
4658 assert_eq!(p128k.hc.hash_log, 17);
4659 assert_eq!(p128k.hc.chain_log, 18);
4660 assert_eq!(p128k.hc.search_depth, 1 << 11);
4661 assert_eq!(p128k.hc.target_len, 999);
4662
4663 let p256k = resolve_level_params(CompressionLevel::Level(22), Some(256 * 1024));
4664 assert_eq!(p256k.window_log, 18);
4665 assert_eq!(p256k.hc.hash_log, 19);
4666 assert_eq!(p256k.hc.chain_log, 19);
4667 assert_eq!(p256k.hc.search_depth, 1 << 13);
4668 assert_eq!(p256k.hc.target_len, 999);
4669}
4670
4671#[test]
4672fn level22_small_source_size_hint_matches_donor_cparams() {
4673 use zstd::zstd_safe::zstd_sys;
4674
4675 let source_size = 15_027u64;
4676 let donor = unsafe { zstd_sys::ZSTD_getCParams(22, source_size, 0) };
4677 let params = resolve_level_params(CompressionLevel::Level(22), Some(source_size));
4678
4679 assert_eq!(params.window_log as u32, donor.windowLog);
4680 assert_eq!(params.hc.chain_log as u32, donor.chainLog);
4681 assert_eq!(params.hc.hash_log as u32, donor.hashLog);
4682 assert_eq!(params.hc.search_depth as u32, 1u32 << donor.searchLog);
4683 assert_eq!(HC_OPT_MIN_MATCH_LEN as u32, donor.minMatch);
4684 assert_eq!(params.hc.target_len as u32, donor.targetLength);
4685}
4686
4687#[test]
4688fn level22_small_source_uses_window_bounded_hash3_log() {
4689 let mut hc = HcMatchGenerator::new(1 << 14);
4690 hc.configure(
4691 BTULTRA2_HC_CONFIG_L22_16K,
4692 super::strategy::StrategyTag::BtUltra2,
4693 14,
4694 );
4695 assert_eq!(hc.table.hash3_log, 14);
4696
4697 hc.configure(
4698 BTULTRA2_HC_CONFIG_L22,
4699 super::strategy::StrategyTag::BtUltra2,
4700 27,
4701 );
4702 assert_eq!(hc.table.hash3_log, HC3_HASH_LOG);
4703}
4704
4705#[test]
4706fn btultra2_seed_pass_initializes_opt_state() {
4707 let mut hc = HcMatchGenerator::new(1 << 20);
4708 hc.configure(
4709 BTULTRA2_HC_CONFIG,
4710 super::strategy::StrategyTag::BtUltra2,
4711 26,
4712 );
4713 let data: Vec<u8> = (0..32 * 1024).map(|i| (i % 251) as u8).collect();
4714 hc.table.add_data(data, |_| {});
4715 hc.start_matching(|_| {});
4716 assert!(
4717 hc.backend.bt_mut().opt_state.lit_length_sum > 0,
4718 "btultra2 first block should seed non-zero sequence statistics"
4719 );
4720 assert!(
4721 hc.backend.bt_mut().opt_state.off_code_sum > 0,
4722 "btultra2 first block should seed offset-code statistics"
4723 );
4724}
4725
4726#[test]
4727fn btultra2_profile_disables_small_offset_handicap() {
4728 let profile = HcOptimalCostProfile::const_for_strategy::<super::strategy::BtUltra2>();
4734 assert!(
4735 !profile.favor_small_offsets,
4736 "btultra2 should match donor opt2 offset pricing"
4737 );
4738 assert!(
4739 profile.accurate,
4740 "btultra2 should use donor opt2 accurate pricing"
4741 );
4742}
4743
4744#[test]
4745fn btultra_profile_keeps_donor_search_depth_budget() {
4746 let p = HcOptimalCostProfile::const_for_strategy::<super::strategy::BtUltra>();
4747 assert_eq!(
4748 p.max_chain_depth, 32,
4749 "btultra should not cap chain depth below donor opt2 search budget"
4750 );
4751}
4752
4753#[test]
4754fn btopt_profile_keeps_donor_search_depth_budget() {
4755 let p = HcOptimalCostProfile::const_for_strategy::<super::strategy::BtOpt>();
4756 assert_eq!(
4757 p.max_chain_depth, 32,
4758 "btopt should not cap chain depth below donor btopt search budget"
4759 );
4760}
4761
4762#[test]
4763fn sufficient_match_len_is_clamped_by_target_len() {
4764 let mut hc = HcMatchGenerator::new(1 << 20);
4765 hc.configure(
4766 BTULTRA2_HC_CONFIG,
4767 super::strategy::StrategyTag::BtUltra2,
4768 26,
4769 );
4770 hc.hc.target_len = 13;
4771 let profile = HcOptimalCostProfile::const_for_strategy::<super::strategy::BtUltra2>();
4772 assert_eq!(hc.hc.sufficient_match_len_for_pass(profile), 13);
4773}
4774
4775#[test]
4776fn opt_modes_use_target_len_as_sufficient_len() {
4777 use super::strategy;
4778 let mut hc = HcMatchGenerator::new(1 << 20);
4779 hc.hc.target_len = 57;
4780 let profiles = [
4781 HcOptimalCostProfile::const_for_strategy::<strategy::BtOpt>(),
4782 HcOptimalCostProfile::const_for_strategy::<strategy::BtUltra>(),
4783 HcOptimalCostProfile::const_for_strategy::<strategy::BtUltra2>(),
4784 ];
4785 for profile in profiles {
4786 assert_eq!(hc.hc.sufficient_match_len_for_pass(profile), 57);
4787 }
4788}
4789
4790#[test]
4791fn sufficient_match_len_is_capped_by_opt_num() {
4792 let mut hc = HcMatchGenerator::new(1 << 20);
4793 hc.hc.target_len = usize::MAX / 2;
4794 let profile = HcOptimalCostProfile::const_for_strategy::<super::strategy::BtUltra2>();
4795 assert_eq!(hc.hc.sufficient_match_len_for_pass(profile), HC_OPT_NUM - 1);
4796}
4797
4798#[test]
4799#[allow(clippy::borrow_deref_ref)]
4800fn dictionary_entropy_seed_initializes_opt_state_from_tables() {
4801 let mut hc = HcMatchGenerator::new(1 << 20);
4802 hc.configure(
4803 BTULTRA2_HC_CONFIG,
4804 super::strategy::StrategyTag::BtUltra2,
4805 26,
4806 );
4807
4808 let huff = crate::huff0::huff0_encoder::HuffmanTable::build_from_data(
4809 b"aaabbbbccccddddeeeeefffffgggg",
4810 );
4811 let ll = crate::fse::fse_encoder::default_ll_table();
4812 let ml = crate::fse::fse_encoder::default_ml_table();
4813 let of = crate::fse::fse_encoder::default_of_table();
4814 hc.seed_dictionary_entropy(Some(&huff), Some(&*ll), Some(&*ml), Some(&*of));
4815
4816 hc.backend.bt_mut().opt_state.rescale_freqs(
4817 b"abcd",
4818 HcOptimalCostProfile::const_for_strategy::<super::strategy::BtUltra2>(),
4819 );
4820
4821 let base_ll_freqs: [u32; HC_MAX_LL + 1] = [
4822 4, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4823 1, 1, 1, 1, 1, 1,
4824 ];
4825
4826 assert_ne!(
4827 hc.backend.bt_mut().opt_state.lit_length_freq,
4828 base_ll_freqs,
4829 "dictionary entropy should override fallback LL bootstrap frequencies"
4830 );
4831 assert!(
4832 hc.backend
4833 .bt_mut()
4834 .opt_state
4835 .match_length_freq
4836 .iter()
4837 .any(|&v| v != 1),
4838 "dictionary entropy should seed non-uniform ML frequencies"
4839 );
4840 assert_ne!(
4841 hc.backend.bt_mut().opt_state.off_code_freq[0],
4842 6,
4843 "dictionary entropy should override fallback OF bootstrap frequencies"
4844 );
4845}
4846
4847#[test]
4848#[allow(clippy::borrow_deref_ref)]
4849fn dictionary_fse_seed_applies_without_huffman_seed() {
4850 let mut hc = HcMatchGenerator::new(1 << 20);
4851 hc.configure(
4852 BTULTRA2_HC_CONFIG,
4853 super::strategy::StrategyTag::BtUltra2,
4854 26,
4855 );
4856
4857 let ll = crate::fse::fse_encoder::default_ll_table();
4858 let ml = crate::fse::fse_encoder::default_ml_table();
4859 let of = crate::fse::fse_encoder::default_of_table();
4860 hc.seed_dictionary_entropy(None, Some(&*ll), Some(&*ml), Some(&*of));
4861 hc.backend.bt_mut().opt_state.rescale_freqs(
4862 b"abcd",
4863 HcOptimalCostProfile::const_for_strategy::<super::strategy::BtUltra2>(),
4864 );
4865
4866 let base_ll_freqs: [u32; HC_MAX_LL + 1] = [
4867 4, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4868 1, 1, 1, 1, 1, 1,
4869 ];
4870 assert_ne!(
4871 hc.backend.bt_mut().opt_state.lit_length_freq,
4872 base_ll_freqs,
4873 "FSE seed should still override LL bootstrap frequencies without huffman seed"
4874 );
4875 assert!(
4876 hc.backend
4877 .bt_mut()
4878 .opt_state
4879 .match_length_freq
4880 .iter()
4881 .any(|&v| v != 1),
4882 "FSE seed should still seed non-uniform ML frequencies"
4883 );
4884 assert_ne!(
4885 hc.backend.bt_mut().opt_state.off_code_freq[0],
4886 6,
4887 "FSE seed should still override OF bootstrap frequencies without huffman seed"
4888 );
4889}
4890
4891#[test]
4892#[allow(clippy::borrow_deref_ref)]
4893fn dictionary_seed_overrides_predef_price_mode_on_tiny_input() {
4894 let mut hc = HcMatchGenerator::new(1 << 20);
4895 hc.configure(
4896 BTULTRA2_HC_CONFIG,
4897 super::strategy::StrategyTag::BtUltra2,
4898 26,
4899 );
4900
4901 let ll = crate::fse::fse_encoder::default_ll_table();
4902 let ml = crate::fse::fse_encoder::default_ml_table();
4903 let of = crate::fse::fse_encoder::default_of_table();
4904 hc.seed_dictionary_entropy(None, Some(&*ll), Some(&*ml), Some(&*of));
4905 hc.backend.bt_mut().opt_state.rescale_freqs(
4906 b"abc",
4907 HcOptimalCostProfile::const_for_strategy::<super::strategy::BtUltra2>(),
4908 );
4909 assert!(
4910 matches!(
4911 hc.backend.bt_mut().opt_state.price_type,
4912 HcOptPriceType::Dynamic
4913 ),
4914 "dictionary-seeded first block should stay in dynamic mode even for tiny src"
4915 );
4916}
4917
4918#[test]
4919fn lit_length_price_blocksize_max_costs_one_extra_bit() {
4920 let profile_predef = HcOptimalCostProfile::const_for_strategy::<super::strategy::BtUltra2>();
4921 let mut stats_predef = HcOptState::new();
4922 stats_predef.price_type = HcOptPriceType::Predefined;
4923 let predef_max = profile_predef.lit_length_price(&stats_predef, HC_BLOCKSIZE_MAX);
4924 let predef_prev =
4925 profile_predef.lit_length_price(&stats_predef, HC_BLOCKSIZE_MAX.saturating_sub(1));
4926 assert_eq!(
4927 predef_max,
4928 predef_prev + HC_BITCOST_MULTIPLIER,
4929 "predefined litLength pricing at BLOCKSIZE_MAX must add exactly one bit"
4930 );
4931
4932 let profile_dyn = HcOptimalCostProfile::const_for_strategy::<super::strategy::BtUltra2>();
4933 let mut stats_dyn = HcOptState::new();
4934 stats_dyn.price_type = HcOptPriceType::Dynamic;
4935 stats_dyn.lit_length_freq.fill(1);
4936 stats_dyn.lit_length_sum = (HC_MAX_LL + 1) as u32;
4937 stats_dyn.match_length_freq.fill(1);
4938 stats_dyn.match_length_sum = (HC_MAX_ML + 1) as u32;
4939 stats_dyn.off_code_freq.fill(1);
4940 stats_dyn.off_code_sum = (HC_MAX_OFF + 1) as u32;
4941 stats_dyn.lit_freq.fill(1);
4942 stats_dyn.lit_sum = (HC_MAX_LIT + 1) as u32;
4943 stats_dyn.set_base_prices(true);
4944 let dyn_max = profile_dyn.lit_length_price(&stats_dyn, HC_BLOCKSIZE_MAX);
4945 let dyn_prev = profile_dyn.lit_length_price(&stats_dyn, HC_BLOCKSIZE_MAX.saturating_sub(1));
4946 assert_eq!(
4947 dyn_max,
4948 dyn_prev + HC_BITCOST_MULTIPLIER,
4949 "dynamic litLength pricing at BLOCKSIZE_MAX must add exactly one bit"
4950 );
4951}
4952
4953#[test]
4954#[allow(clippy::borrow_deref_ref)]
4955fn btultra2_seed_pass_disabled_when_dictionary_entropy_seed_present() {
4956 let mut hc = HcMatchGenerator::new(1 << 20);
4957 hc.configure(
4958 BTULTRA2_HC_CONFIG,
4959 super::strategy::StrategyTag::BtUltra2,
4960 26,
4961 );
4962 let ll = crate::fse::fse_encoder::default_ll_table();
4963 let ml = crate::fse::fse_encoder::default_ml_table();
4964 let of = crate::fse::fse_encoder::default_of_table();
4965 hc.seed_dictionary_entropy(None, Some(&*ll), Some(&*ml), Some(&*of));
4966 assert!(
4967 !hc.should_run_btultra2_seed_pass::<super::strategy::BtUltra2>(HC_PREDEF_THRESHOLD + 1),
4968 "dictionary-seeded first block should skip btultra2 warmup pass"
4969 );
4970}
4971
4972#[test]
4973fn btultra2_seed_pass_disabled_when_prefix_history_exists() {
4974 let mut hc = HcMatchGenerator::new(1 << 20);
4975 hc.configure(
4976 BTULTRA2_HC_CONFIG,
4977 super::strategy::StrategyTag::BtUltra2,
4978 26,
4979 );
4980 hc.table.history_abs_start = 17;
4981 hc.table.window.push_back(b"abcdefghijklmnop".to_vec());
4982 assert!(
4983 !hc.should_run_btultra2_seed_pass::<super::strategy::BtUltra2>(HC_PREDEF_THRESHOLD + 9),
4984 "btultra2 warmup must be first-block only (no prefix history)"
4985 );
4986}
4987
4988#[test]
4989fn btultra2_seed_pass_disabled_for_tiny_block() {
4990 let mut hc = HcMatchGenerator::new(1 << 20);
4991 hc.configure(
4992 BTULTRA2_HC_CONFIG,
4993 super::strategy::StrategyTag::BtUltra2,
4994 26,
4995 );
4996 assert!(
4997 !hc.should_run_btultra2_seed_pass::<super::strategy::BtUltra2>(HC_PREDEF_THRESHOLD),
4998 "btultra2 warmup should not run at or below predefined threshold"
4999 );
5000}
5001
5002#[test]
5003fn btultra2_seed_pass_disabled_after_stats_initialized() {
5004 let mut hc = HcMatchGenerator::new(1 << 20);
5005 hc.configure(
5006 BTULTRA2_HC_CONFIG,
5007 super::strategy::StrategyTag::BtUltra2,
5008 26,
5009 );
5010 hc.backend.bt_mut().opt_state.lit_length_sum = 1;
5011 assert!(
5012 !hc.should_run_btultra2_seed_pass::<super::strategy::BtUltra2>(HC_PREDEF_THRESHOLD + 32),
5013 "btultra2 warmup should run only for first block before stats are initialized"
5014 );
5015}
5016
5017#[test]
5018fn btultra2_seed_pass_disabled_when_not_at_frame_start() {
5019 let mut hc = HcMatchGenerator::new(1 << 20);
5020 hc.configure(
5021 BTULTRA2_HC_CONFIG,
5022 super::strategy::StrategyTag::BtUltra2,
5023 26,
5024 );
5025 hc.table.window_size = HC_PREDEF_THRESHOLD + 64;
5028 hc.table
5029 .window
5030 .push_back(alloc::vec![b'A'; HC_PREDEF_THRESHOLD + 32]);
5031 assert!(
5032 !hc.should_run_btultra2_seed_pass::<super::strategy::BtUltra2>(HC_PREDEF_THRESHOLD + 32),
5033 "btultra2 warmup must not run after frame start"
5034 );
5035}
5036
5037#[test]
5038fn btultra2_seed_pass_disabled_when_ldm_sequences_exist() {
5039 let mut hc = HcMatchGenerator::new(1 << 20);
5040 hc.configure(
5041 BTULTRA2_HC_CONFIG,
5042 super::strategy::StrategyTag::BtUltra2,
5043 26,
5044 );
5045 hc.table.window_size = HC_PREDEF_THRESHOLD + 64;
5046 hc.table
5047 .window
5048 .push_back(alloc::vec![b'A'; HC_PREDEF_THRESHOLD + 64]);
5049 hc.backend.bt_mut().ldm_sequences.push(HcRawSeq {
5050 lit_length: 8,
5051 offset: 16,
5052 match_length: 32,
5053 });
5054 assert!(
5055 !hc.should_run_btultra2_seed_pass::<super::strategy::BtUltra2>(HC_PREDEF_THRESHOLD + 32),
5056 "btultra2 warmup must not run when LDM already produced sequences"
5057 );
5058}
5059
5060#[test]
5061fn literal_price_uses_eight_bits_when_literals_uncompressed() {
5062 let profile = HcOptimalCostProfile::const_for_strategy::<super::strategy::BtUltra2>();
5063 let mut stats = HcOptState::new();
5064 stats.set_literals_compressed_for_tests(false);
5065 stats.price_type = HcOptPriceType::Predefined;
5066 assert_eq!(
5067 profile.literal_price(&stats, b'a'),
5068 8 * HC_BITCOST_MULTIPLIER,
5069 "uncompressed literals should cost 8 bits regardless of price mode"
5070 );
5071}
5072
5073#[test]
5074fn update_stats_skips_literal_frequencies_when_uncompressed() {
5075 let mut stats = HcOptState::new();
5076 stats.set_literals_compressed_for_tests(false);
5077 stats.update_stats(3, b"abc", 4, 8);
5078 assert_eq!(
5079 stats.lit_sum, 0,
5080 "literal sum must remain unchanged when literal compression is disabled"
5081 );
5082 assert_eq!(
5083 stats.lit_freq.iter().copied().sum::<u32>(),
5084 0,
5085 "literal frequencies must not be updated when literal compression is disabled"
5086 );
5087 assert_eq!(
5088 stats.lit_length_sum, 1,
5089 "literal-length stats still update for sequence modeling"
5090 );
5091 assert_eq!(
5092 stats.match_length_sum, 1,
5093 "match-length stats still update for sequence modeling"
5094 );
5095 assert_eq!(
5096 stats.off_code_sum, 1,
5097 "offset-code stats still update for sequence modeling"
5098 );
5099}
5100
5101#[test]
5102#[allow(clippy::borrow_deref_ref)]
5103fn dictionary_huffman_seed_ignored_when_literals_uncompressed() {
5104 let mut stats = HcOptState::new();
5105 stats.set_literals_compressed_for_tests(false);
5106 let huff = crate::huff0::huff0_encoder::HuffmanTable::build_from_data(
5107 b"aaaaabbbbcccddeeff00112233445566778899",
5108 );
5109 let ll = crate::fse::fse_encoder::default_ll_table();
5110 let ml = crate::fse::fse_encoder::default_ml_table();
5111 let of = crate::fse::fse_encoder::default_of_table();
5112 stats.seed_dictionary_entropy(Some(&huff), Some(&*ll), Some(&*ml), Some(&*of));
5113 stats.rescale_freqs(
5114 b"abcd",
5115 HcOptimalCostProfile::const_for_strategy::<super::strategy::BtUltra2>(),
5116 );
5117 assert_eq!(
5118 stats.lit_sum, 0,
5119 "literal sum must stay zero when literals are uncompressed"
5120 );
5121 assert_eq!(
5122 stats.lit_freq.iter().copied().sum::<u32>(),
5123 0,
5124 "literal frequencies must ignore dictionary huffman seed when uncompressed"
5125 );
5126}
5127
5128#[test]
5129fn hc_repcode_candidates_respect_litlen_dependent_rep_order() {
5130 let mut hc = HcMatchGenerator::new(64);
5131 hc.table.history = b"xxxxxxABCDEFABCDEF".to_vec();
5132 hc.table.history_start = 0;
5133 hc.table.history_abs_start = 0;
5134
5135 let abs_pos = 12usize; let current_abs_end = hc.table.history.len();
5137 let reps = [6u32, 3u32, 9u32];
5138
5139 let mut lit_pos_candidates = Vec::new();
5140 hc.hc.for_each_repcode_candidate_with_reps(
5141 &hc.table,
5142 abs_pos,
5143 1,
5144 reps,
5145 current_abs_end,
5146 HC_OPT_MIN_MATCH_LEN,
5147 |c| {
5148 lit_pos_candidates.push(c.offset);
5149 },
5150 );
5151 assert!(
5152 lit_pos_candidates.contains(&6),
5153 "when lit_len>0, rep0 should be considered and match"
5154 );
5155
5156 let mut ll0_candidates = Vec::new();
5157 hc.hc.for_each_repcode_candidate_with_reps(
5158 &hc.table,
5159 abs_pos,
5160 0,
5161 reps,
5162 current_abs_end,
5163 HC_OPT_MIN_MATCH_LEN,
5164 |c| {
5165 ll0_candidates.push(c.offset);
5166 },
5167 );
5168 assert!(
5169 !ll0_candidates.contains(&6),
5170 "when lit_len==0, rep0 is not directly eligible (ll0 semantics)"
5171 );
5172}
5173
5174#[test]
5175fn hc_collect_optimal_candidates_keeps_reps_when_chain_depth_zero() {
5176 let mut hc = HcMatchGenerator::new(64);
5177 hc.hc.search_depth = 0;
5178 hc.table.history = b"xyzxyzxyzxyz".to_vec();
5179 hc.table.history_start = 0;
5180 hc.table.history_abs_start = 0;
5181
5182 let abs_pos = 6usize;
5183 let current_abs_end = hc.table.history.len();
5184 let profile = HcOptimalCostProfile {
5185 max_chain_depth: 0,
5186 sufficient_match_len: usize::MAX / 2,
5187 accurate: false,
5188 favor_small_offsets: false,
5189 };
5190 let mut out = Vec::new();
5191 hc.collect_optimal_candidates(
5192 abs_pos,
5193 current_abs_end,
5194 profile,
5195 HcCandidateQuery {
5196 reps: [3, 6, 9],
5197 lit_len: 1,
5198 ldm_candidate: None,
5199 },
5200 &mut out,
5201 );
5202 assert!(
5203 !out.is_empty(),
5204 "rep candidates should remain available even when chain depth is zero"
5205 );
5206 assert!(
5207 out.iter().any(|c| c.offset == 3),
5208 "rep0 candidate should be retained"
5209 );
5210}
5211
5212#[test]
5213fn hc_collect_optimal_candidates_rep_tail_match_skips_chain_probe() {
5214 let mut hc = HcMatchGenerator::new(64);
5215 hc.table.history = b"aaaaaaaaaa".to_vec();
5216 hc.table.history_start = 0;
5217 hc.table.history_abs_start = 0;
5218 hc.table.position_base = 0;
5219 hc.hc.search_depth = 32;
5220 let abs_pos = 6usize;
5221 hc.table.ensure_tables();
5222 hc.table.insert_positions(0, abs_pos);
5223
5224 let profile = HcOptimalCostProfile {
5225 max_chain_depth: 32,
5226 sufficient_match_len: usize::MAX / 2,
5227 accurate: true,
5228 favor_small_offsets: false,
5229 };
5230 let mut out = Vec::new();
5231 hc.collect_optimal_candidates(
5232 abs_pos,
5233 hc.table.history.len(),
5234 profile,
5235 HcCandidateQuery {
5236 reps: [1, 4, 8],
5237 lit_len: 1,
5238 ldm_candidate: None,
5239 },
5240 &mut out,
5241 );
5242
5243 assert!(
5244 out.iter()
5245 .all(|candidate| matches!(candidate.offset, 1 | 4)),
5246 "terminal rep match should return before chain probing adds non-rep offsets"
5247 );
5248}
5249
5250#[test]
5251fn hc_collect_optimal_candidates_long_chain_match_advances_skip_window() {
5252 let mut hc = HcMatchGenerator::new(128);
5253 hc.table.history = b"abcabcabcabcabcabcabcabc".to_vec();
5254 hc.table.history_start = 0;
5255 hc.table.history_abs_start = 0;
5256 hc.table.position_base = 0;
5257 hc.hc.search_depth = 32;
5258 let abs_pos = 9usize;
5259 hc.table.ensure_tables();
5260 hc.table.insert_positions(0, abs_pos);
5261 hc.table.skip_insert_until_abs = 0;
5262
5263 let profile = HcOptimalCostProfile {
5264 max_chain_depth: 32,
5265 sufficient_match_len: usize::MAX / 2,
5266 accurate: true,
5267 favor_small_offsets: false,
5268 };
5269 let mut out = Vec::new();
5270 hc.collect_optimal_candidates(
5271 abs_pos,
5272 hc.table.history.len(),
5273 profile,
5274 HcCandidateQuery {
5275 reps: [1, 4, 8],
5276 lit_len: 1,
5277 ldm_candidate: None,
5278 },
5279 &mut out,
5280 );
5281
5282 assert!(
5283 hc.table.skip_insert_until_abs > abs_pos,
5284 "long chain match should advance skip window to avoid redundant immediate insertions"
5285 );
5286}
5287
5288#[test]
5289fn hc_collect_optimal_candidates_chain_fast_skip_uses_match_end_minus_8() {
5290 let mut hc = HcMatchGenerator::new(128);
5291 hc.table.history = b"abcabcabcabcabcabcabcabc".to_vec();
5292 hc.table.history_start = 0;
5293 hc.table.history_abs_start = 0;
5294 hc.table.position_base = 0;
5295 hc.hc.search_depth = 32;
5296 let abs_pos = 9usize;
5297 hc.table.ensure_tables();
5298 hc.table.insert_positions(0, abs_pos);
5299 hc.table.skip_insert_until_abs = 0;
5300
5301 let profile = HcOptimalCostProfile {
5302 max_chain_depth: 32,
5303 sufficient_match_len: 10,
5304 accurate: true,
5305 favor_small_offsets: false,
5306 };
5307 let mut out = Vec::new();
5308 hc.collect_optimal_candidates(
5309 abs_pos,
5310 hc.table.history.len(),
5311 profile,
5312 HcCandidateQuery {
5313 reps: [1, 4, 8],
5314 lit_len: 1,
5315 ldm_candidate: None,
5316 },
5317 &mut out,
5318 );
5319
5320 let best_match_end = out
5321 .iter()
5322 .map(|candidate| candidate.start.saturating_add(candidate.match_len))
5323 .max()
5324 .expect("expected at least one candidate");
5325 assert!(
5326 hc.table.skip_insert_until_abs > abs_pos,
5327 "chain fast-skip must advance past current position"
5328 );
5329 assert!(
5330 hc.table.skip_insert_until_abs <= best_match_end.saturating_sub(8),
5331 "chain fast-skip must not exceed donor-style matchEndIdx - 8 bound"
5332 );
5333}
5334
5335#[test]
5336fn hc_collect_optimal_candidates_advances_skip_window_on_plain_bt_path() {
5337 let mut hc = HcMatchGenerator::new(256);
5338 hc.table.history = b"abcdefghijklmnop".to_vec();
5339 hc.table.history_start = 0;
5340 hc.table.history_abs_start = 0;
5341 hc.table.position_base = 0;
5342 hc.hc.search_depth = 0;
5343 hc.table.ensure_tables();
5344
5345 let abs_pos = 8usize;
5346 hc.table.skip_insert_until_abs = 0;
5347
5348 let profile = HcOptimalCostProfile {
5349 max_chain_depth: 0,
5350 sufficient_match_len: usize::MAX / 2,
5351 accurate: true,
5352 favor_small_offsets: false,
5353 };
5354 let mut out = Vec::new();
5355 hc.collect_optimal_candidates(
5356 abs_pos,
5357 hc.table.history.len(),
5358 profile,
5359 HcCandidateQuery {
5360 reps: [1, 4, 8],
5361 lit_len: 1,
5362 ldm_candidate: None,
5363 },
5364 &mut out,
5365 );
5366
5367 assert_eq!(
5368 hc.table.skip_insert_until_abs,
5369 abs_pos.saturating_add(1),
5370 "plain BT path should advance skip window by 1 via donor matchEndIdx baseline"
5371 );
5372}
5373
5374#[test]
5387fn hc_ldm_candidates_are_merged_into_optimal_candidates() {
5388 let mut hc = HcMatchGenerator::new(512);
5389 hc.table.history = (0..256).map(|i| (i % 251) as u8).collect();
5390 hc.table.history_start = 0;
5391 hc.table.history_abs_start = 0;
5392
5393 let abs_pos = 128usize;
5394 let current_abs_end = 256usize;
5395 let ldm = MatchCandidate {
5396 start: abs_pos,
5397 offset: 96,
5398 match_len: 40,
5399 };
5400
5401 let profile = HcOptimalCostProfile {
5402 max_chain_depth: 0,
5403 sufficient_match_len: usize::MAX / 2,
5404 accurate: true,
5405 favor_small_offsets: false,
5406 };
5407 let mut out = Vec::new();
5408 hc.collect_optimal_candidates(
5409 abs_pos,
5410 current_abs_end,
5411 profile,
5412 HcCandidateQuery {
5413 reps: [1, 4, 8],
5414 lit_len: 1,
5415 ldm_candidate: Some(ldm),
5416 },
5417 &mut out,
5418 );
5419 assert!(
5420 out.iter().any(
5421 |candidate| candidate.offset == ldm.offset && candidate.match_len == ldm.match_len
5422 ),
5423 "LDM candidate should be present in optimal candidate set"
5424 );
5425}
5426
5427#[test]
5428fn btultra_and_btultra2_both_keep_dictionary_candidates() {
5429 use super::strategy::StrategyTag;
5437
5438 let test_config = HcConfig {
5439 hash_log: 23,
5440 chain_log: 22,
5441 search_depth: 32,
5442 target_len: 256,
5443 };
5444 let window_log = 20u8;
5445
5446 let prepare_history = |hc: &mut HcMatchGenerator, abs_pos: usize| {
5447 hc.table.history = alloc::vec![0u8; 160];
5448 for i in 0..64 {
5449 hc.table.history[i] = b'a' + (i % 7) as u8;
5450 }
5451 for i in 64..160 {
5452 hc.table.history[i] = b'k' + (i % 5) as u8;
5453 }
5454 for i in 0..24 {
5455 hc.table.history[abs_pos + i] = hc.table.history[16 + i];
5456 }
5457 hc.table.history_start = 0;
5458 hc.table.history_abs_start = 0;
5459 hc.table.position_base = 0;
5460 hc.table.ensure_tables();
5461 hc.table.insert_positions(0, abs_pos);
5462 hc.table.dictionary_limit_abs = Some(64);
5463 hc.table.skip_insert_until_abs = 0;
5464 };
5465
5466 let profile = HcOptimalCostProfile {
5467 max_chain_depth: 32,
5468 sufficient_match_len: usize::MAX / 2,
5469 accurate: true,
5470 favor_small_offsets: false,
5471 };
5472 let abs_pos = 96usize;
5473 let mut out = Vec::new();
5474
5475 let mut hc = HcMatchGenerator::new(256);
5476 hc.configure(test_config, StrategyTag::BtUltra2, window_log);
5477 prepare_history(&mut hc, abs_pos);
5478 hc.collect_optimal_candidates(
5479 abs_pos,
5480 160,
5481 profile,
5482 HcCandidateQuery {
5483 reps: [1, 4, 8],
5484 lit_len: 1,
5485 ldm_candidate: None,
5486 },
5487 &mut out,
5488 );
5489 assert!(
5490 out.iter().any(|candidate| candidate.offset >= 32),
5491 "btultra2 should retain dictionary candidates on donor-parity path"
5492 );
5493
5494 let mut hc = HcMatchGenerator::new(256);
5495 hc.configure(test_config, StrategyTag::BtUltra, window_log);
5496 prepare_history(&mut hc, abs_pos);
5497 hc.collect_optimal_candidates(
5498 abs_pos,
5499 160,
5500 profile,
5501 HcCandidateQuery {
5502 reps: [1, 4, 8],
5503 lit_len: 1,
5504 ldm_candidate: None,
5505 },
5506 &mut out,
5507 );
5508 assert!(
5509 out.iter().any(|candidate| candidate.offset >= 32),
5510 "btultra should retain dictionary candidates"
5511 );
5512}
5513
5514#[test]
5515fn driver_small_source_hint_shrinks_dfast_hash_tables() {
5516 let mut driver = MatchGeneratorDriver::new(32, 2);
5517
5518 driver.reset(CompressionLevel::Level(2));
5519 let mut space = driver.get_next_space();
5520 space[..12].copy_from_slice(b"abcabcabcabc");
5521 space.truncate(12);
5522 driver.commit_space(space);
5523 driver.skip_matching_with_hint(None);
5524 let full_long = driver.dfast_matcher().long_hash.len();
5527 let full_short = driver.dfast_matcher().short_hash.len();
5528 assert_eq!(full_long, 1 << DFAST_HASH_BITS);
5529 assert_eq!(
5530 full_short,
5531 1 << (DFAST_HASH_BITS - DFAST_SHORT_HASH_BITS_DELTA)
5532 );
5533
5534 driver.set_source_size_hint(1024);
5535 driver.reset(CompressionLevel::Level(2));
5536 let mut space = driver.get_next_space();
5537 space[..12].copy_from_slice(b"xyzxyzxyzxyz");
5538 space.truncate(12);
5539 driver.commit_space(space);
5540 driver.skip_matching_with_hint(None);
5541 let hinted_long = driver.dfast_matcher().long_hash.len();
5542 let hinted_short = driver.dfast_matcher().short_hash.len();
5543
5544 assert_eq!(driver.window_size(), 1 << MIN_HINTED_WINDOW_LOG);
5545 assert_eq!(hinted_long, 1 << MIN_HINTED_WINDOW_LOG);
5552 let expected_hinted_short_bits = (MIN_HINTED_WINDOW_LOG as usize)
5553 .saturating_sub(DFAST_SHORT_HASH_BITS_DELTA)
5554 .max(MIN_WINDOW_LOG as usize);
5555 assert_eq!(
5556 hinted_short,
5557 1 << expected_hinted_short_bits,
5558 "short table must sit one DFAST_SHORT_HASH_BITS_DELTA below the long table \
5559 (clamped at MIN_WINDOW_LOG) — a regression that pulls it up to the long-table \
5560 floor would still satisfy the `< full_short` bound below and slip through"
5561 );
5562 assert!(
5563 hinted_long < full_long && hinted_short < full_short,
5564 "tiny source hint should reduce both dfast tables"
5565 );
5566}
5567
5568#[test]
5569fn driver_small_source_hint_shrinks_row_hash_tables() {
5570 let mut driver = MatchGeneratorDriver::new(32, 2);
5571
5572 driver.reset(CompressionLevel::Level(4));
5573 let mut space = driver.get_next_space();
5574 space[..12].copy_from_slice(b"abcabcabcabc");
5575 space.truncate(12);
5576 driver.commit_space(space);
5577 driver.skip_matching_with_hint(None);
5578 let full_rows = driver.row_matcher().row_heads.len();
5579 assert_eq!(full_rows, 1 << (ROW_HASH_BITS - ROW_LOG));
5580
5581 driver.set_source_size_hint(1024);
5582 driver.reset(CompressionLevel::Level(4));
5583 let mut space = driver.get_next_space();
5584 space[..12].copy_from_slice(b"xyzxyzxyzxyz");
5585 space.truncate(12);
5586 driver.commit_space(space);
5587 driver.skip_matching_with_hint(None);
5588 let hinted_rows = driver.row_matcher().row_heads.len();
5589
5590 assert_eq!(driver.window_size(), 1 << MIN_HINTED_WINDOW_LOG);
5591 assert_eq!(
5592 hinted_rows,
5593 1 << ((MIN_HINTED_WINDOW_LOG as usize) - ROW_LOG)
5594 );
5595 assert!(
5596 hinted_rows < full_rows,
5597 "tiny source hint should reduce row hash table footprint"
5598 );
5599}
5600
5601#[test]
5602fn row_matches_roundtrip_multi_block_pattern() {
5603 let pattern = [7, 13, 44, 184, 19, 96, 171, 109, 141, 251];
5604 let first_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
5605 let second_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
5606
5607 let mut matcher = RowMatchGenerator::new(1 << 22);
5608 matcher.configure(ROW_CONFIG);
5609 matcher.ensure_tables();
5610 let replay_sequence = |decoded: &mut Vec<u8>, seq: Sequence<'_>| match seq {
5611 Sequence::Literals { literals } => decoded.extend_from_slice(literals),
5612 Sequence::Triple {
5613 literals,
5614 offset,
5615 match_len,
5616 } => {
5617 decoded.extend_from_slice(literals);
5618 let start = decoded.len() - offset;
5619 for i in 0..match_len {
5620 let byte = decoded[start + i];
5621 decoded.push(byte);
5622 }
5623 }
5624 };
5625
5626 matcher.add_data(first_block.clone(), |_| {});
5627 let mut history = Vec::new();
5628 matcher.start_matching(|seq| replay_sequence(&mut history, seq));
5629 assert_eq!(history, first_block);
5630
5631 matcher.add_data(second_block.clone(), |_| {});
5632 let prefix_len = history.len();
5633 matcher.start_matching(|seq| replay_sequence(&mut history, seq));
5634
5635 assert_eq!(&history[prefix_len..], second_block.as_slice());
5636
5637 let third_block: Vec<u8> = (0u8..=255).collect();
5639 matcher.add_data(third_block.clone(), |_| {});
5640 let third_prefix = history.len();
5641 matcher.start_matching(|seq| replay_sequence(&mut history, seq));
5642 assert_eq!(&history[third_prefix..], third_block.as_slice());
5643}
5644
5645#[test]
5646fn row_short_block_emits_literals_only() {
5647 let mut matcher = RowMatchGenerator::new(1 << 22);
5648 matcher.configure(ROW_CONFIG);
5649
5650 matcher.add_data(b"abcde".to_vec(), |_| {});
5651
5652 let mut saw_triple = false;
5653 let mut reconstructed = Vec::new();
5654 matcher.start_matching(|seq| match seq {
5655 Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
5656 Sequence::Triple { .. } => saw_triple = true,
5657 });
5658
5659 assert!(
5660 !saw_triple,
5661 "row backend must not emit triples for short blocks"
5662 );
5663 assert_eq!(reconstructed, b"abcde");
5664
5665 saw_triple = false;
5667 matcher.add_data(b"abcdeabcde".to_vec(), |_| {});
5668 matcher.start_matching(|seq| {
5669 if let Sequence::Triple { .. } = seq {
5670 saw_triple = true;
5671 }
5672 });
5673 assert!(
5674 saw_triple,
5675 "row backend should emit triples on repeated data"
5676 );
5677}
5678
5679#[test]
5680fn row_pick_lazy_returns_best_when_lookahead_is_out_of_bounds() {
5681 let mut matcher = RowMatchGenerator::new(1 << 22);
5682 matcher.configure(ROW_CONFIG);
5683 matcher.add_data(b"abcabc".to_vec(), |_| {});
5684
5685 let best = MatchCandidate {
5686 start: 0,
5687 offset: 1,
5688 match_len: ROW_MIN_MATCH_LEN,
5689 };
5690 let picked = matcher
5691 .pick_lazy_match(0, 0, Some(best))
5692 .expect("best candidate must survive");
5693
5694 assert_eq!(picked.start, best.start);
5695 assert_eq!(picked.offset, best.offset);
5696 assert_eq!(picked.match_len, best.match_len);
5697}
5698
5699#[test]
5700fn row_backfills_previous_block_tail_for_cross_boundary_match() {
5701 let mut matcher = RowMatchGenerator::new(1 << 22);
5702 matcher.configure(ROW_CONFIG);
5703
5704 let mut first_block = alloc::vec![0xA5; 64];
5705 first_block.extend_from_slice(b"XYZ");
5706 let second_block = b"XYZXYZtail".to_vec();
5707
5708 let replay_sequence = |decoded: &mut Vec<u8>, seq: Sequence<'_>| match seq {
5709 Sequence::Literals { literals } => decoded.extend_from_slice(literals),
5710 Sequence::Triple {
5711 literals,
5712 offset,
5713 match_len,
5714 } => {
5715 decoded.extend_from_slice(literals);
5716 let start = decoded.len() - offset;
5717 for i in 0..match_len {
5718 let byte = decoded[start + i];
5719 decoded.push(byte);
5720 }
5721 }
5722 };
5723
5724 matcher.add_data(first_block.clone(), |_| {});
5725 let mut reconstructed = Vec::new();
5726 matcher.start_matching(|seq| replay_sequence(&mut reconstructed, seq));
5727 assert_eq!(reconstructed, first_block);
5728
5729 matcher.add_data(second_block.clone(), |_| {});
5730 let mut saw_cross_boundary = false;
5731 let prefix_len = reconstructed.len();
5732 matcher.start_matching(|seq| {
5733 if let Sequence::Triple {
5734 literals,
5735 offset,
5736 match_len,
5737 } = seq
5738 && literals.is_empty()
5739 && offset == 3
5740 && match_len >= ROW_MIN_MATCH_LEN
5741 {
5742 saw_cross_boundary = true;
5743 }
5744 replay_sequence(&mut reconstructed, seq);
5745 });
5746
5747 assert!(
5748 saw_cross_boundary,
5749 "row matcher should reuse the 3-byte previous-block tail"
5750 );
5751 assert_eq!(&reconstructed[prefix_len..], second_block.as_slice());
5752}
5753
5754#[test]
5755fn row_skip_matching_with_incompressible_hint_uses_sparse_prefix() {
5756 let data = deterministic_high_entropy_bytes(0xA713_9C5D_44E2_10B1, 4096);
5757
5758 let mut dense = RowMatchGenerator::new(1 << 22);
5759 dense.configure(ROW_CONFIG);
5760 dense.add_data(data.clone(), |_| {});
5761 dense.skip_matching_with_hint(Some(false));
5762 let dense_slots = dense
5763 .row_positions
5764 .iter()
5765 .filter(|&&pos| pos != ROW_EMPTY_SLOT)
5766 .count();
5767
5768 let mut sparse = RowMatchGenerator::new(1 << 22);
5769 sparse.configure(ROW_CONFIG);
5770 sparse.add_data(data, |_| {});
5771 sparse.skip_matching_with_hint(Some(true));
5772 let sparse_slots = sparse
5773 .row_positions
5774 .iter()
5775 .filter(|&&pos| pos != ROW_EMPTY_SLOT)
5776 .count();
5777
5778 assert!(
5779 sparse_slots < dense_slots,
5780 "incompressible hint should seed fewer row slots (sparse={sparse_slots}, dense={dense_slots})"
5781 );
5782}
5783
5784#[test]
5798fn row_skip_matching_with_none_hint_leaves_interior_empty() {
5799 let data = deterministic_high_entropy_bytes(0x9B47_F2A1_8C5E_3306, 4096);
5800
5801 let mut none_hint = RowMatchGenerator::new(1 << 22);
5802 none_hint.configure(ROW_CONFIG);
5803 none_hint.add_data(data.clone(), |_| {});
5804 none_hint.skip_matching_with_hint(None);
5805 let none_slots = none_hint
5806 .row_positions
5807 .iter()
5808 .filter(|&&pos| pos != ROW_EMPTY_SLOT)
5809 .count();
5810
5811 let mut dense = RowMatchGenerator::new(1 << 22);
5814 dense.configure(ROW_CONFIG);
5815 dense.add_data(data, |_| {});
5816 dense.skip_matching_with_hint(Some(false));
5817 let dense_slots = dense
5818 .row_positions
5819 .iter()
5820 .filter(|&&pos| pos != ROW_EMPTY_SLOT)
5821 .count();
5822
5823 assert_eq!(
5828 none_slots, 0,
5829 "None hint at block_start=0 must leave row table fully empty \
5830 (donor parity — interior NOT inserted, no pre-block backfill possible)",
5831 );
5832 assert!(
5833 dense_slots > 0,
5834 "Some(false) dict-priming path must still insert densely \
5835 (sanity check: control case for the `none_slots == 0` assertion)",
5836 );
5837}
5838
5839#[test]
5840fn driver_unhinted_level2_keeps_default_dfast_hash_table_size() {
5841 let mut driver = MatchGeneratorDriver::new(32, 2);
5842
5843 driver.reset(CompressionLevel::Level(2));
5844 let mut space = driver.get_next_space();
5845 space[..12].copy_from_slice(b"abcabcabcabc");
5846 space.truncate(12);
5847 driver.commit_space(space);
5848 driver.skip_matching_with_hint(None);
5849
5850 let long_len = driver.dfast_matcher().long_hash.len();
5854 let short_len = driver.dfast_matcher().short_hash.len();
5855 assert_eq!(
5856 long_len,
5857 1 << DFAST_HASH_BITS,
5858 "unhinted Level(2) should keep default long-hash table size"
5859 );
5860 assert_eq!(
5861 short_len,
5862 1 << (DFAST_HASH_BITS - DFAST_SHORT_HASH_BITS_DELTA),
5863 "unhinted Level(2) short-hash should be one bit smaller than long-hash"
5864 );
5865}
5866
5867#[cfg(any())] #[test]
5869fn simple_backend_rejects_undersized_pooled_suffix_store() {
5870 let mut driver = MatchGeneratorDriver::new(128 * 1024, 2);
5871 driver.reset(CompressionLevel::Fastest);
5872
5873 driver.suffix_pool.push(SuffixStore::with_capacity(1024));
5874
5875 let mut space = driver.get_next_space();
5876 space.clear();
5877 space.resize(4096, 0xAB);
5878 driver.commit_space(space);
5879
5880 let last_suffix_slots = driver
5881 .simple()
5882 .window
5883 .last()
5884 .expect("window entry must exist after commit")
5885 .suffixes
5886 .slots
5887 .len();
5888 assert!(
5889 last_suffix_slots >= 4096,
5890 "undersized pooled suffix store must not be reused for larger blocks"
5891 );
5892}
5893
5894#[test]
5895fn source_hint_clamps_driver_slice_size_to_window() {
5896 let mut driver = MatchGeneratorDriver::new(128 * 1024, 2);
5897 driver.set_source_size_hint(1024);
5898 driver.reset(CompressionLevel::Default);
5899
5900 let window = driver.window_size() as usize;
5901 assert_eq!(window, 1 << MIN_HINTED_WINDOW_LOG);
5902 assert_eq!(driver.slice_size, window);
5903
5904 let space = driver.get_next_space();
5905 assert_eq!(space.len(), window);
5906 driver.commit_space(space);
5907}
5908
5909#[test]
5910fn pooled_space_keeps_capacity_when_slice_size_shrinks() {
5911 let mut driver = MatchGeneratorDriver::new(128 * 1024, 2);
5912 driver.reset(CompressionLevel::Default);
5913
5914 let large = driver.get_next_space();
5915 let large_capacity = large.capacity();
5916 assert!(large_capacity >= 128 * 1024);
5917 driver.commit_space(large);
5918
5919 driver.set_source_size_hint(1024);
5920 driver.reset(CompressionLevel::Default);
5921
5922 let small = driver.get_next_space();
5923 assert_eq!(small.len(), 1 << MIN_HINTED_WINDOW_LOG);
5924 assert!(
5925 small.capacity() >= large_capacity,
5926 "pooled buffer capacity should be preserved to avoid shrink/grow churn"
5927 );
5928}
5929
5930#[test]
5931fn driver_best_to_fastest_releases_oversized_hc_tables() {
5932 let mut driver = MatchGeneratorDriver::new(32, 2);
5933
5934 driver.reset(CompressionLevel::Best);
5936 assert_eq!(driver.window_size(), (1u64 << 24));
5937
5938 let mut space = driver.get_next_space();
5940 space[..12].copy_from_slice(b"abcabcabcabc");
5941 space.truncate(12);
5942 driver.commit_space(space);
5943 driver.skip_matching_with_hint(None);
5944
5945 driver.reset(CompressionLevel::Fastest);
5960 assert_eq!(driver.window_size(), (1u64 << 19));
5961 assert_eq!(driver.active_backend(), super::strategy::BackendTag::Simple);
5962}
5963
5964#[test]
5965fn driver_better_to_best_resizes_hc_tables() {
5966 let mut driver = MatchGeneratorDriver::new(32, 2);
5967
5968 driver.reset(CompressionLevel::Better);
5970 assert_eq!(driver.window_size(), (1u64 << 23));
5971
5972 let mut space = driver.get_next_space();
5973 space[..12].copy_from_slice(b"abcabcabcabc");
5974 space.truncate(12);
5975 driver.commit_space(space);
5976 driver.skip_matching_with_hint(None);
5977
5978 let hc = driver.hc_matcher();
5979 let better_hash_len = hc.table.hash_table.len();
5980 let better_chain_len = hc.table.chain_table.len();
5981
5982 driver.reset(CompressionLevel::Best);
5984 assert_eq!(driver.window_size(), (1u64 << 24));
5985
5986 let mut space = driver.get_next_space();
5988 space[..12].copy_from_slice(b"xyzxyzxyzxyz");
5989 space.truncate(12);
5990 driver.commit_space(space);
5991 driver.skip_matching_with_hint(None);
5992
5993 let hc = driver.hc_matcher();
5994 assert!(
5995 hc.table.hash_table.len() > better_hash_len,
5996 "Best hash_table ({}) should be larger than Better ({})",
5997 hc.table.hash_table.len(),
5998 better_hash_len
5999 );
6000 assert!(
6001 hc.table.chain_table.len() > better_chain_len,
6002 "Best chain_table ({}) should be larger than Better ({})",
6003 hc.table.chain_table.len(),
6004 better_chain_len
6005 );
6006}
6007
6008#[cfg(any())]
6009#[test]
6011fn prime_with_dictionary_preserves_history_for_first_full_block() {
6012 let mut driver = MatchGeneratorDriver::new(8, 1);
6013 driver.reset(CompressionLevel::Fastest);
6014
6015 driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
6016
6017 let mut space = driver.get_next_space();
6018 space.clear();
6019 space.extend_from_slice(b"abcdefgh");
6020 driver.commit_space(space);
6021
6022 let mut saw_match = false;
6023 driver.start_matching(|seq| {
6024 if let Sequence::Triple {
6025 literals,
6026 offset,
6027 match_len,
6028 } = seq
6029 && literals.is_empty()
6030 && offset == 8
6031 && match_len >= MIN_MATCH_LEN
6032 {
6033 saw_match = true;
6034 }
6035 });
6036
6037 assert!(
6038 saw_match,
6039 "first full block should still match dictionary-primed history"
6040 );
6041}
6042
6043#[cfg(any())]
6044#[test]
6046fn prime_with_large_dictionary_preserves_early_history_until_first_block() {
6047 let mut driver = MatchGeneratorDriver::new(8, 1);
6048 driver.reset(CompressionLevel::Fastest);
6049
6050 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
6051
6052 let mut space = driver.get_next_space();
6053 space.clear();
6054 space.extend_from_slice(b"abcdefgh");
6055 driver.commit_space(space);
6056
6057 let mut saw_match = false;
6058 driver.start_matching(|seq| {
6059 if let Sequence::Triple {
6060 literals,
6061 offset,
6062 match_len,
6063 } = seq
6064 && literals.is_empty()
6065 && offset == 24
6066 && match_len >= MIN_MATCH_LEN
6067 {
6068 saw_match = true;
6069 }
6070 });
6071
6072 assert!(
6073 saw_match,
6074 "dictionary bytes should remain addressable until frame output exceeds the live window"
6075 );
6076}
6077
6078#[test]
6079fn prime_with_dictionary_applies_offset_history_even_when_content_is_empty() {
6080 let mut driver = MatchGeneratorDriver::new(8, 1);
6081 driver.reset(CompressionLevel::Fastest);
6082
6083 driver.prime_with_dictionary(&[], [11, 7, 3]);
6084
6085 assert_eq!(driver.simple_mut().offset_hist, [11, 7, 3]);
6086}
6087
6088#[test]
6089fn hc_prime_with_empty_dictionary_disables_btultra2_seed_pass() {
6090 let mut driver = MatchGeneratorDriver::new(8, 1);
6091 driver.reset(CompressionLevel::Better);
6092
6093 driver.prime_with_dictionary(&[], [11, 7, 3]);
6094
6095 assert_eq!(driver.hc_matcher().table.offset_hist, [11, 7, 3]);
6096 assert!(
6097 !driver
6098 .hc_matcher()
6099 .should_run_btultra2_seed_pass::<super::strategy::BtUltra2>(HC_PREDEF_THRESHOLD + 1),
6100 "btultra2 warmup must stay disabled after dictionary priming, even when dict content is empty"
6101 );
6102}
6103
6104#[test]
6105fn hc_prime_with_dictionary_disables_btultra2_seed_pass() {
6106 let mut driver = MatchGeneratorDriver::new(8, 1);
6107 driver.reset(CompressionLevel::Better);
6108
6109 driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
6110
6111 assert!(
6112 !driver
6113 .hc_matcher()
6114 .should_run_btultra2_seed_pass::<super::strategy::BtUltra2>(HC_PREDEF_THRESHOLD + 1),
6115 "btultra2 warmup must stay disabled after dictionary priming with content"
6116 );
6117}
6118
6119#[test]
6120fn dfast_prime_with_dictionary_preserves_history_for_first_full_block() {
6121 let mut driver = MatchGeneratorDriver::new(8, 1);
6122 driver.reset(CompressionLevel::Level(2));
6123
6124 driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
6125
6126 let mut space = driver.get_next_space();
6127 space.clear();
6128 space.extend_from_slice(b"abcdefgh");
6129 driver.commit_space(space);
6130
6131 let mut saw_match = false;
6132 driver.start_matching(|seq| {
6133 if let Sequence::Triple {
6134 literals,
6135 offset,
6136 match_len,
6137 } = seq
6138 && literals.is_empty()
6139 && offset == 8
6140 && match_len >= DFAST_MIN_MATCH_LEN
6141 {
6142 saw_match = true;
6143 }
6144 });
6145
6146 assert!(
6147 saw_match,
6148 "dfast backend should match dictionary-primed history in first full block"
6149 );
6150}
6151
6152#[test]
6153fn prime_with_dictionary_does_not_inflate_reported_window_size() {
6154 let mut driver = MatchGeneratorDriver::new(8, 1);
6155 driver.reset(CompressionLevel::Fastest);
6156
6157 let before = driver.window_size();
6158 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
6159 let after = driver.window_size();
6160
6161 assert_eq!(
6162 after, before,
6163 "dictionary retention budget must not change reported frame window size"
6164 );
6165}
6166
6167#[cfg(any())] #[test]
6169fn prime_with_dictionary_does_not_reuse_tiny_suffix_store() {
6170 let mut driver = MatchGeneratorDriver::new(8, 2);
6171 driver.reset(CompressionLevel::Fastest);
6172
6173 driver.prime_with_dictionary(b"abcdefghi", [1, 4, 8]);
6176
6177 assert!(
6178 driver
6179 .simple()
6180 .window
6181 .iter()
6182 .all(|entry| entry.data.len() >= MIN_MATCH_LEN),
6183 "dictionary priming must not commit tails shorter than MIN_MATCH_LEN"
6184 );
6185}
6186
6187#[test]
6188fn prime_with_dictionary_counts_only_committed_tail_budget() {
6189 let mut driver = MatchGeneratorDriver::new(8, 1);
6190 driver.reset(CompressionLevel::Fastest);
6191
6192 let before = driver.simple_mut().max_window_size;
6193 driver.prime_with_dictionary(b"abcdefghi", [1, 4, 8]);
6195
6196 assert_eq!(
6197 driver.simple_mut().max_window_size,
6198 before + 8,
6199 "retention budget must account only for dictionary bytes actually committed to history"
6200 );
6201}
6202
6203#[test]
6204fn dfast_prime_with_dictionary_counts_four_byte_tail_budget() {
6205 let mut driver = MatchGeneratorDriver::new(8, 1);
6206 driver.reset(CompressionLevel::Level(2));
6207
6208 let before = driver.dfast_matcher().max_window_size;
6209 driver.prime_with_dictionary(b"abcdefghijkl", [1, 4, 8]);
6212
6213 assert_eq!(
6214 driver.dfast_matcher().max_window_size,
6215 before + 12,
6216 "dfast retention budget should include 4-byte dictionary tails"
6217 );
6218}
6219
6220#[test]
6221fn row_prime_with_dictionary_preserves_history_for_first_full_block() {
6222 let mut driver = MatchGeneratorDriver::new(8, 1);
6223 driver.reset(CompressionLevel::Level(4));
6224
6225 driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
6226
6227 let mut space = driver.get_next_space();
6228 space.clear();
6229 space.extend_from_slice(b"abcdefgh");
6230 driver.commit_space(space);
6231
6232 let mut saw_match = false;
6233 driver.start_matching(|seq| {
6234 if let Sequence::Triple {
6235 literals,
6236 offset,
6237 match_len,
6238 } = seq
6239 && literals.is_empty()
6240 && offset == 8
6241 && match_len >= ROW_MIN_MATCH_LEN
6242 {
6243 saw_match = true;
6244 }
6245 });
6246
6247 assert!(
6248 saw_match,
6249 "row backend should match dictionary-primed history in first full block"
6250 );
6251}
6252
6253#[test]
6254fn row_prime_with_dictionary_subtracts_uncommitted_tail_budget() {
6255 let mut driver = MatchGeneratorDriver::new(8, 1);
6256 driver.reset(CompressionLevel::Level(4));
6257
6258 let base_window = driver.row_matcher().max_window_size;
6259 driver.prime_with_dictionary(b"abcdefghi", [1, 4, 8]);
6262
6263 assert_eq!(
6264 driver.row_matcher().max_window_size,
6265 base_window + 8,
6266 "row retained window must exclude uncommitted 1-byte tail"
6267 );
6268}
6269
6270#[test]
6271fn prime_with_dictionary_budget_shrinks_after_row_eviction() {
6272 let mut driver = MatchGeneratorDriver::new(8, 1);
6273 driver.reset(CompressionLevel::Level(4));
6274 driver.row_matcher_mut().max_window_size = 8;
6276 driver.reported_window_size = 8;
6277
6278 let base_window = driver.row_matcher().max_window_size;
6279 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
6280 assert_eq!(driver.row_matcher().max_window_size, base_window + 24);
6281
6282 for block in [b"AAAAAAAA", b"BBBBBBBB"] {
6283 let mut space = driver.get_next_space();
6284 space.clear();
6285 space.extend_from_slice(block);
6286 driver.commit_space(space);
6287 driver.skip_matching_with_hint(None);
6288 }
6289
6290 assert_eq!(
6291 driver.dictionary_retained_budget, 0,
6292 "dictionary budget should be fully retired once primed dict slices are evicted"
6293 );
6294 assert_eq!(
6295 driver.row_matcher().max_window_size,
6296 base_window,
6297 "retired dictionary budget must not remain reusable for live history"
6298 );
6299}
6300
6301#[test]
6311fn row_get_last_space_then_reset_to_fastest_drops_row_variant() {
6312 let mut driver = MatchGeneratorDriver::new(8, 1);
6313 driver.reset(CompressionLevel::Level(4));
6314 assert_eq!(driver.active_backend(), super::strategy::BackendTag::Row);
6315
6316 let mut space = driver.get_next_space();
6317 space.clear();
6318 space.extend_from_slice(b"row-data");
6319 driver.commit_space(space);
6320
6321 assert_eq!(driver.get_last_space(), b"row-data");
6322
6323 driver.reset(CompressionLevel::Fastest);
6324 assert_eq!(driver.active_backend(), super::strategy::BackendTag::Simple);
6325}
6326
6327#[test]
6338fn driver_reset_from_row_backend_recycles_row_buffers_into_pool() {
6339 let mut driver = MatchGeneratorDriver::new(8, 1);
6340 driver.reset(CompressionLevel::Level(4));
6341 assert_eq!(driver.active_backend(), super::strategy::BackendTag::Row);
6342
6343 let mut space = driver.get_next_space();
6344 space.extend_from_slice(b"row-data-to-recycle");
6345 driver.commit_space(space);
6346
6347 let before_pool = driver.vec_pool.len();
6348 driver.reset(CompressionLevel::Fastest);
6349
6350 assert_eq!(driver.active_backend(), super::strategy::BackendTag::Simple);
6351 assert!(
6357 driver.vec_pool.len() > before_pool,
6358 "row reset must recycle the committed row history buffer into vec_pool \
6359 (before_pool = {before_pool}, after = {})",
6360 driver.vec_pool.len()
6361 );
6362}
6363
6364#[test]
6365fn adjust_params_for_zero_source_size_uses_min_hinted_window_floor() {
6366 let mut params = resolve_level_params(CompressionLevel::Level(4), None);
6367 params.window_log = 22;
6368 let adjusted = adjust_params_for_source_size(params, 0);
6369 assert_eq!(adjusted.window_log, MIN_HINTED_WINDOW_LOG);
6370}
6371
6372#[test]
6373fn common_prefix_len_matches_scalar_reference_across_offsets() {
6374 fn scalar_reference(a: &[u8], b: &[u8]) -> usize {
6375 a.iter()
6376 .zip(b.iter())
6377 .take_while(|(lhs, rhs)| lhs == rhs)
6378 .count()
6379 }
6380
6381 for total_len in [
6382 0usize, 1, 5, 15, 16, 17, 31, 32, 33, 64, 65, 127, 191, 257, 320,
6383 ] {
6384 let base: Vec<u8> = (0..total_len)
6385 .map(|i| ((i * 13 + 7) & 0xFF) as u8)
6386 .collect();
6387
6388 for start in [0usize, 1, 3] {
6389 if start > total_len {
6390 continue;
6391 }
6392 let a = &base[start..];
6393 let b = a.to_vec();
6394 assert_eq!(
6395 common_prefix_len(a, &b),
6396 scalar_reference(a, &b),
6397 "equal slices total_len={total_len} start={start}"
6398 );
6399
6400 let len = a.len();
6401 for mismatch in [0usize, 1, 7, 15, 16, 31, 32, 47, 63, 95, 127, 128, 129, 191] {
6402 if mismatch >= len {
6403 continue;
6404 }
6405 let mut altered = b.clone();
6406 altered[mismatch] ^= 0x5A;
6407 assert_eq!(
6408 common_prefix_len(a, &altered),
6409 scalar_reference(a, &altered),
6410 "total_len={total_len} start={start} mismatch={mismatch}"
6411 );
6412 }
6413
6414 if len > 0 {
6415 let mismatch = len - 1;
6416 let mut altered = b.clone();
6417 altered[mismatch] ^= 0xA5;
6418 assert_eq!(
6419 common_prefix_len(a, &altered),
6420 scalar_reference(a, &altered),
6421 "tail mismatch total_len={total_len} start={start} mismatch={mismatch}"
6422 );
6423 }
6424 }
6425 }
6426
6427 let long = alloc::vec![0xAB; 320];
6428 let shorter = alloc::vec![0xAB; 137];
6429 assert_eq!(
6430 common_prefix_len(&long, &shorter),
6431 scalar_reference(&long, &shorter)
6432 );
6433}
6434
6435#[test]
6436fn row_pick_lazy_returns_none_when_next_is_better() {
6437 let mut matcher = RowMatchGenerator::new(1 << 22);
6438 matcher.configure(ROW_CONFIG);
6439 matcher.add_data(alloc::vec![b'a'; 64], |_| {});
6440 matcher.ensure_tables();
6441
6442 let abs_pos = matcher.history_abs_start + 16;
6443 let best = MatchCandidate {
6444 start: abs_pos,
6445 offset: 8,
6446 match_len: ROW_MIN_MATCH_LEN,
6447 };
6448 assert!(
6449 matcher.pick_lazy_match(abs_pos, 0, Some(best)).is_none(),
6450 "lazy picker should defer when next position is clearly better"
6451 );
6452}
6453
6454#[test]
6455fn row_pick_lazy_depth2_returns_none_when_next2_significantly_better() {
6456 let mut matcher = RowMatchGenerator::new(1 << 22);
6457 matcher.configure(ROW_CONFIG);
6458 matcher.lazy_depth = 2;
6459 matcher.search_depth = 0;
6460 matcher.offset_hist = [6, 9, 1];
6461
6462 let mut data = alloc::vec![b'x'; 40];
6463 data[11..30].copy_from_slice(b"EFABCABCAEFABCAEFAB");
6464 matcher.add_data(data, |_| {});
6465 matcher.ensure_tables();
6466
6467 let abs_pos = matcher.history_abs_start + 20;
6468 let best = matcher
6469 .best_match(abs_pos, 0)
6470 .expect("expected baseline repcode match");
6471 assert_eq!(best.offset, 9);
6472 assert_eq!(best.match_len, ROW_MIN_MATCH_LEN);
6473
6474 if let Some(next) = matcher.best_match(abs_pos + 1, 1) {
6475 assert!(next.match_len <= best.match_len);
6476 }
6477
6478 let next2 = matcher
6479 .best_match(abs_pos + 2, 2)
6480 .expect("expected +2 candidate");
6481 assert!(
6482 next2.match_len > best.match_len + 1,
6483 "+2 candidate must be significantly better for depth-2 lazy skip"
6484 );
6485 assert!(
6486 matcher.pick_lazy_match(abs_pos, 0, Some(best)).is_none(),
6487 "lazy picker should defer when +2 candidate is significantly better"
6488 );
6489}
6490
6491#[test]
6492fn row_pick_lazy_depth2_keeps_best_when_next2_is_only_one_byte_better() {
6493 let mut matcher = RowMatchGenerator::new(1 << 22);
6494 matcher.configure(ROW_CONFIG);
6495 matcher.lazy_depth = 2;
6496 matcher.search_depth = 0;
6497 matcher.offset_hist = [6, 9, 1];
6498
6499 let mut data = alloc::vec![b'x'; 40];
6500 data[11..30].copy_from_slice(b"EFABCABCAEFABCAEFAZ");
6501 matcher.add_data(data, |_| {});
6502 matcher.ensure_tables();
6503
6504 let abs_pos = matcher.history_abs_start + 20;
6505 let best = matcher
6506 .best_match(abs_pos, 0)
6507 .expect("expected baseline repcode match");
6508 assert_eq!(best.offset, 9);
6509 assert_eq!(best.match_len, ROW_MIN_MATCH_LEN);
6510
6511 let next2 = matcher
6512 .best_match(abs_pos + 2, 2)
6513 .expect("expected +2 candidate");
6514 assert_eq!(next2.match_len, best.match_len + 1);
6515 let chosen = matcher
6516 .pick_lazy_match(abs_pos, 0, Some(best))
6517 .expect("lazy picker should keep current best");
6518 assert_eq!(chosen.start, best.start);
6519 assert_eq!(chosen.offset, best.offset);
6520 assert_eq!(chosen.match_len, best.match_len);
6521}
6522
6523#[test]
6525fn row_hash_and_row_extracts_high_bits() {
6526 let mut matcher = RowMatchGenerator::new(1 << 22);
6527 matcher.configure(ROW_CONFIG);
6528 matcher.add_data(
6529 alloc::vec![
6530 0xAA, 0xBB, 0xCC, 0x11, 0x10, 0x20, 0x30, 0x40, 0xAA, 0xBB, 0xCC, 0x22, 0x50, 0x60,
6531 0x70, 0x80,
6532 ],
6533 |_| {},
6534 );
6535 matcher.ensure_tables();
6536
6537 let pos = matcher.history_abs_start + 8;
6538 let (row, tag) = matcher
6539 .hash_and_row(pos)
6540 .expect("row hash should be available");
6541
6542 let idx = pos - matcher.history_abs_start;
6543 let concat = matcher.live_history();
6544 let value = u32::from_le_bytes(concat[idx..idx + ROW_HASH_KEY_LEN].try_into().unwrap()) as u64;
6545 let hash = crate::encoding::fastpath::hash_mix_u64_with_kernel(matcher.hash_kernel, value);
6546 let total_bits = matcher.row_hash_log + ROW_TAG_BITS;
6547 let combined = hash >> (u64::BITS as usize - total_bits);
6548 let expected_row =
6549 ((combined >> ROW_TAG_BITS) as usize) & ((1usize << matcher.row_hash_log) - 1);
6550 let expected_tag = combined as u8;
6551
6552 assert_eq!(row, expected_row);
6553 assert_eq!(tag, expected_tag);
6554}
6555
6556#[test]
6557fn row_repcode_skips_candidate_before_history_start() {
6558 let mut matcher = RowMatchGenerator::new(1 << 22);
6559 matcher.configure(ROW_CONFIG);
6560 matcher.history = alloc::vec![b'a'; 20];
6561 matcher.history_start = 0;
6562 matcher.history_abs_start = 10;
6563 matcher.offset_hist = [3, 0, 0];
6564
6565 assert!(matcher.repcode_candidate(12, 1).is_none());
6566}
6567
6568#[test]
6569fn row_repcode_returns_none_when_position_too_close_to_history_end() {
6570 let mut matcher = RowMatchGenerator::new(1 << 22);
6571 matcher.configure(ROW_CONFIG);
6572 matcher.history = b"abcde".to_vec();
6573 matcher.history_start = 0;
6574 matcher.history_abs_start = 0;
6575 matcher.offset_hist = [1, 0, 0];
6576
6577 assert!(matcher.repcode_candidate(4, 1).is_none());
6578}
6579
6580#[cfg(all(feature = "std", target_arch = "x86_64"))]
6581#[test]
6582fn hash_mix_sse42_path_is_available_and_matches_accelerated_impl_when_supported() {
6583 use crate::encoding::fastpath::{self, FastpathKernel};
6584 if !is_x86_feature_detected!("sse4.2") {
6585 return;
6586 }
6587 let v = 0x0123_4567_89AB_CDEFu64;
6588 let accelerated = unsafe { fastpath::sse42::hash_mix_u64(v) };
6590 let dispatched = fastpath::dispatch_hash_mix_u64(v);
6592 let kernel = fastpath::select_kernel();
6593 if kernel == FastpathKernel::Sse42 {
6594 assert_eq!(dispatched, accelerated);
6595 } else {
6596 assert_eq!(dispatched, accelerated, "AVX2/SSE4.2 share CRC32 mix");
6598 }
6599}
6600
6601#[cfg(all(feature = "std", target_arch = "aarch64", target_endian = "little"))]
6602#[test]
6603fn hash_mix_crc_path_is_available_and_matches_accelerated_impl_when_supported() {
6604 use crate::encoding::fastpath;
6605 if !is_aarch64_feature_detected!("crc") {
6606 return;
6607 }
6608 let v = 0x0123_4567_89AB_CDEFu64;
6609 let accelerated = unsafe { fastpath::neon::hash_mix_u64(v) };
6611 let dispatched = fastpath::dispatch_hash_mix_u64(v);
6612 assert_eq!(dispatched, accelerated);
6613}
6614
6615#[test]
6616fn hc_hash3_position_matches_donor_formula() {
6617 let bytes = [b'a', b'b', b'c', b'd'];
6618 let read32 = u32::from_le_bytes(bytes);
6619 let expected = (((read32 << 8).wrapping_mul(HC_PRIME3BYTES)) >> (32 - HC3_HASH_LOG)) as usize;
6620 assert_eq!(
6621 super::match_table::storage::MatchTable::hash3_position(&bytes, HC3_HASH_LOG),
6622 expected
6623 );
6624}
6625
6626#[test]
6627fn hc_hash_position_matches_donor_hash4_formula() {
6628 let mut hc = HcMatchGenerator::new(1 << 20);
6629 hc.configure(HC_CONFIG, super::strategy::StrategyTag::Lazy, 22);
6630 let bytes = [b'a', b'b', b'c', b'd'];
6631 let read32 = u32::from_le_bytes(bytes);
6632 let expected = ((read32.wrapping_mul(HC_PRIME4BYTES)) >> (32 - hc.table.hash_log)) as usize;
6633 assert_eq!(hc.table.hash_position(&bytes), expected);
6634}
6635
6636#[test]
6637fn btultra2_main_hash_uses_donor_hash4_formula() {
6638 let mut hc = HcMatchGenerator::new(1 << 20);
6639 hc.configure(
6640 BTULTRA2_HC_CONFIG_L22,
6641 super::strategy::StrategyTag::BtUltra2,
6642 27,
6643 );
6644 let bytes = [b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h'];
6645 let read32 = u32::from_le_bytes(bytes[..4].try_into().unwrap());
6646 let expected = ((read32.wrapping_mul(HC_PRIME4BYTES)) >> (32 - hc.table.hash_log)) as usize;
6647 let actual = super::match_table::storage::MatchTable::hash_position_with_mls(
6648 &bytes,
6649 hc.table.hash_log,
6650 super::bt::BtMatcher::HASH_MLS,
6651 );
6652 assert_eq!(actual, expected);
6653}
6654
6655#[test]
6656fn row_candidate_returns_none_when_abs_pos_near_end_of_history() {
6657 let mut matcher = RowMatchGenerator::new(1 << 22);
6658 matcher.configure(ROW_CONFIG);
6659 matcher.history = b"abcde".to_vec();
6660 matcher.history_start = 0;
6661 matcher.history_abs_start = 0;
6662
6663 assert!(matcher.row_candidate(0, 0).is_none());
6664}
6665
6666#[test]
6667fn hc_chain_candidates_returns_sentinels_for_short_suffix() {
6668 let mut hc = HcMatchGenerator::new(32);
6669 hc.table.history = b"abc".to_vec();
6670 hc.table.history_start = 0;
6671 hc.table.history_abs_start = 0;
6672 hc.table.ensure_tables();
6673
6674 let candidates = hc.hc.chain_candidates(&hc.table, 0);
6675 assert!(candidates.iter().all(|&pos| pos == usize::MAX));
6676}
6677
6678#[test]
6679fn hc_reset_refills_existing_tables_with_empty_sentinel() {
6680 let mut hc = HcMatchGenerator::new(32);
6681 hc.table.add_data(b"abcdeabcde".to_vec(), |_| {});
6682 hc.table.ensure_tables();
6683 assert!(!hc.table.hash_table.is_empty());
6684 assert!(!hc.table.chain_table.is_empty());
6685 hc.table.hash_table.fill(123);
6686 hc.table.chain_table.fill(456);
6687
6688 hc.reset(|_| {});
6689
6690 assert!(hc.table.hash_table.iter().all(|&v| v == HC_EMPTY));
6691 assert!(hc.table.chain_table.iter().all(|&v| v == HC_EMPTY));
6692}
6693
6694#[test]
6695fn hc_start_matching_returns_early_for_empty_current_block() {
6696 let mut hc = HcMatchGenerator::new(32);
6697 hc.table.add_data(Vec::new(), |_| {});
6698 let mut called = false;
6699 hc.start_matching(|_| called = true);
6700 assert!(!called, "empty current block should not emit sequences");
6701}
6702
6703#[cfg(test)]
6704fn deterministic_high_entropy_bytes(seed: u64, len: usize) -> Vec<u8> {
6705 let mut out = Vec::with_capacity(len);
6706 let mut state = seed;
6707 for _ in 0..len {
6708 state ^= state << 13;
6709 state ^= state >> 7;
6710 state ^= state << 17;
6711 out.push((state >> 40) as u8);
6712 }
6713 out
6714}
6715
6716#[cfg(test)]
6717fn level22_donor_block_ranges(data: &[u8]) -> Vec<(usize, usize)> {
6718 let mut ranges = Vec::new();
6719 let mut cursor = 0usize;
6720 let mut savings = 0i64;
6721 while cursor < data.len() {
6722 let remaining = data.len() - cursor;
6723 let candidate_len = remaining.min(HC_BLOCKSIZE_MAX);
6724 let block_len = crate::encoding::frame_compressor::donor_optimal_block_size(
6725 CompressionLevel::Level(22),
6726 &data[cursor..cursor + candidate_len],
6727 remaining,
6728 HC_BLOCKSIZE_MAX,
6729 savings,
6730 )
6731 .min(candidate_len)
6732 .max(1);
6733 ranges.push((cursor, block_len));
6734 cursor += block_len;
6735 if cursor >= HC_BLOCKSIZE_MAX {
6739 savings = 3;
6740 }
6741 }
6742 ranges
6743}
6744
6745#[cfg(test)]
6746fn merge_block_delimiters_like_donor(
6747 sequences: Vec<(usize, usize, usize)>,
6748) -> Vec<(usize, usize, usize)> {
6749 let mut out = Vec::with_capacity(sequences.len());
6750 let mut pending_lits = 0usize;
6751 for (lit_len, offset, match_len) in sequences {
6752 if offset == 0 && match_len == 0 {
6753 pending_lits = pending_lits.saturating_add(lit_len);
6754 continue;
6755 }
6756 out.push((lit_len.saturating_add(pending_lits), offset, match_len));
6757 pending_lits = 0;
6758 }
6759 if pending_lits > 0 {
6760 out.push((pending_lits, 0, 0));
6761 }
6762 out
6763}
6764
6765#[cfg(test)]
6766fn collect_level22_sequences(data: &[u8]) -> Vec<(usize, usize, usize)> {
6767 merge_block_delimiters_like_donor(collect_level22_sequences_with_delimiters(data))
6768 .into_iter()
6769 .filter(|(_, offset, match_len)| *offset != 0 || *match_len != 0)
6770 .collect()
6771}
6772
6773#[cfg(test)]
6774fn collect_level22_sequences_with_delimiters(data: &[u8]) -> Vec<(usize, usize, usize)> {
6775 let mut driver = MatchGeneratorDriver::new(HC_BLOCKSIZE_MAX, 1);
6776 driver.set_source_size_hint(data.len() as u64);
6777 driver.reset(CompressionLevel::Level(22));
6778
6779 let mut sequences = Vec::new();
6780 for (chunk_start, chunk_len) in level22_donor_block_ranges(data) {
6781 let chunk = &data[chunk_start..chunk_start + chunk_len];
6782 let mut space = driver.get_next_space();
6783 space[..chunk.len()].copy_from_slice(chunk);
6784 space.truncate(chunk.len());
6785 driver.commit_space(space);
6786 driver.start_matching(|seq| {
6787 let entry = match seq {
6788 Sequence::Literals { literals } => (literals.len(), 0usize, 0usize),
6789 Sequence::Triple {
6790 literals,
6791 offset,
6792 match_len,
6793 } => (literals.len(), offset, match_len),
6794 };
6795 sequences.push(entry);
6796 });
6797 }
6798 sequences
6799}
6800
6801#[cfg(test)]
6802fn donor_level22_sequences(data: &[u8]) -> Vec<(usize, usize, usize)> {
6803 merge_block_delimiters_like_donor(donor_level22_sequences_with_delimiters(data))
6804 .into_iter()
6805 .filter(|(_, offset, match_len)| *offset != 0 || *match_len != 0)
6806 .collect()
6807}
6808
6809#[cfg(test)]
6810fn donor_level22_sequences_with_delimiters(data: &[u8]) -> Vec<(usize, usize, usize)> {
6811 use zstd::zstd_safe;
6812 use zstd::zstd_safe::zstd_sys;
6813
6814 fn assert_zstd_ok(code: usize, context: &str) {
6815 assert_eq!(
6816 unsafe { zstd_sys::ZSTD_isError(code) },
6817 0,
6818 "{context} failed: {}",
6819 zstd_safe::get_error_name(code)
6820 );
6821 }
6822
6823 unsafe {
6824 let cctx = zstd_sys::ZSTD_createCCtx();
6825 assert!(!cctx.is_null(), "ZSTD_createCCtx returned null");
6826
6827 assert_zstd_ok(
6828 zstd_sys::ZSTD_CCtx_setParameter(
6829 cctx,
6830 zstd_sys::ZSTD_cParameter::ZSTD_c_compressionLevel,
6831 22,
6832 ),
6833 "ZSTD_c_compressionLevel",
6834 );
6835
6836 let seq_capacity = zstd_safe::sequence_bound(data.len());
6837 let mut seqs = alloc::vec![
6838 zstd_sys::ZSTD_Sequence {
6839 offset: 0,
6840 litLength: 0,
6841 matchLength: 0,
6842 rep: 0,
6843 };
6844 seq_capacity
6845 ];
6846
6847 let seq_count = zstd_sys::ZSTD_generateSequences(
6848 cctx,
6849 seqs.as_mut_ptr(),
6850 seqs.len(),
6851 data.as_ptr().cast(),
6852 data.len(),
6853 );
6854 assert_zstd_ok(seq_count, "ZSTD_generateSequences");
6855 let rc = zstd_sys::ZSTD_freeCCtx(cctx);
6856 assert_eq!(rc, 0, "ZSTD_freeCCtx failed");
6857
6858 seqs.truncate(seq_count);
6859 seqs.into_iter()
6860 .map(|seq| {
6861 (
6862 seq.litLength as usize,
6863 seq.offset as usize,
6864 seq.matchLength as usize,
6865 )
6866 })
6867 .collect()
6868 }
6869}
6870
6871#[test]
6872fn level22_sequences_match_donor_on_corpus_proxy() {
6873 let data = include_bytes!("../../decodecorpus_files/z000033");
6874 assert_level22_sequences_match_donor(data);
6875}
6876
6877#[test]
6878fn level22_sequences_match_donor_on_small_corpus_proxy() {
6879 let data = include_bytes!("../../decodecorpus_files/z000030");
6880 assert_level22_sequences_match_donor(data);
6881}
6882
6883#[cfg(test)]
6884fn assert_level22_sequences_match_donor(data: &[u8]) {
6885 let rust = collect_level22_sequences(data);
6886 let donor = donor_level22_sequences(data);
6887
6888 if rust != donor {
6889 let first_diff = rust
6890 .iter()
6891 .zip(donor.iter())
6892 .position(|(lhs, rhs)| lhs != rhs)
6893 .unwrap_or_else(|| rust.len().min(donor.len()));
6894 let rust_pos = rust
6895 .iter()
6896 .take(first_diff)
6897 .fold(0usize, |acc, seq| acc + seq.0 + seq.2);
6898 let donor_pos = donor
6899 .iter()
6900 .take(first_diff)
6901 .fold(0usize, |acc, seq| acc + seq.0 + seq.2);
6902 let start = first_diff.saturating_sub(4);
6903 let rust_window = &rust[start..rust.len().min(first_diff + 4)];
6904 let donor_window = &donor[start..donor.len().min(first_diff + 4)];
6905 let mut reps = [1u32, 4, 8];
6906 for (lit_len, offset, _) in rust.iter().take(first_diff) {
6907 let _ = encode_offset_with_history(*offset as u32, *lit_len as u32, &mut reps);
6908 }
6909 panic!(
6910 "level22 sequence path diverged at idx {}: rust={:?} donor={:?} (rust_len={} donor_len={} rust_pos={} donor_pos={} reps_before={:?} rust_window={:?} donor_window={:?} block_ranges={:?})",
6911 first_diff,
6912 rust.get(first_diff),
6913 donor.get(first_diff),
6914 rust.len(),
6915 donor.len(),
6916 rust_pos,
6917 donor_pos,
6918 reps,
6919 rust_window,
6920 donor_window,
6921 level22_donor_block_ranges(data)
6922 .into_iter()
6923 .filter(|(start, len)| *start <= rust_pos && rust_pos < start + len)
6924 .collect::<Vec<_>>(),
6925 );
6926 }
6927}
6928
6929#[test]
6930fn hc_sparse_skip_matching_preserves_tail_cross_block_match() {
6931 let mut matcher = HcMatchGenerator::new(1 << 22);
6932 let tail = b"Qz9kLm2Rp";
6933 let mut first = deterministic_high_entropy_bytes(0xD1B5_4A32_9C77_0E19, 4096);
6934 let tail_start = first.len() - tail.len();
6935 first[tail_start..].copy_from_slice(tail);
6936 matcher.table.add_data(first.clone(), |_| {});
6937 matcher.skip_matching(Some(true));
6938
6939 let mut second = tail.to_vec();
6940 second.extend_from_slice(b"after-tail-literals");
6941 matcher.table.add_data(second, |_| {});
6942
6943 let mut first_sequence = None;
6944 matcher.start_matching(|seq| {
6945 if first_sequence.is_some() {
6946 return;
6947 }
6948 first_sequence = Some(match seq {
6949 Sequence::Literals { literals } => (literals.len(), 0usize, 0usize),
6950 Sequence::Triple {
6951 literals,
6952 offset,
6953 match_len,
6954 } => (literals.len(), offset, match_len),
6955 });
6956 });
6957
6958 let (literals_len, offset, match_len) =
6959 first_sequence.expect("expected at least one sequence after sparse skip");
6960 assert_eq!(
6961 literals_len, 0,
6962 "first sequence should start at block boundary"
6963 );
6964 assert_eq!(
6965 offset,
6966 tail.len(),
6967 "first match should reference previous tail"
6968 );
6969 assert!(
6970 match_len >= tail.len(),
6971 "tail-aligned cross-block match must be preserved"
6972 );
6973}
6974
6975#[test]
6976fn btultra2_sparse_skip_matching_preserves_tail_cross_block_match() {
6977 let mut matcher = HcMatchGenerator::new(1 << 20);
6978 matcher.configure(
6979 BTULTRA2_HC_CONFIG_L22,
6980 super::strategy::StrategyTag::BtUltra2,
6981 20,
6982 );
6983 let tail = b"Bt9kLm2Rp";
6984 let mut first = deterministic_high_entropy_bytes(0xA9C3_7F21_D4E8_510B, 4096);
6985 let tail_start = first.len() - tail.len();
6986 first[tail_start..].copy_from_slice(tail);
6987 matcher.table.add_data(first, |_| {});
6988 matcher.skip_matching(Some(true));
6989
6990 let mut second = tail.to_vec();
6991 second.extend_from_slice(b"after-tail-literals");
6992 matcher.table.add_data(second, |_| {});
6993
6994 let mut first_sequence = None;
6995 matcher.start_matching(|seq| {
6996 if first_sequence.is_some() {
6997 return;
6998 }
6999 first_sequence = Some(match seq {
7000 Sequence::Literals { literals } => (literals.len(), 0usize, 0usize),
7001 Sequence::Triple {
7002 literals,
7003 offset,
7004 match_len,
7005 } => (literals.len(), offset, match_len),
7006 });
7007 });
7008
7009 let (literals_len, offset, match_len) =
7010 first_sequence.expect("expected at least one sequence after sparse BT skip");
7011 assert_eq!(
7012 literals_len, 0,
7013 "BT sparse skip should preserve an immediate boundary match"
7014 );
7015 assert_eq!(
7016 offset,
7017 tail.len(),
7018 "first BT match should reference previous tail"
7019 );
7020 assert!(
7021 match_len >= tail.len(),
7022 "BT sparse skip must seed the dense tail for cross-block matching"
7023 );
7024}
7025
7026#[test]
7027fn hc_sparse_skip_matching_does_not_reinsert_sparse_tail_positions() {
7028 let mut matcher = HcMatchGenerator::new(1 << 22);
7029 let first = deterministic_high_entropy_bytes(0xC2B2_AE3D_27D4_EB4F, 4096);
7030 matcher.table.add_data(first.clone(), |_| {});
7031 matcher.skip_matching(Some(true));
7032
7033 let current_len = first.len();
7034 let current_abs_start =
7035 matcher.table.history_abs_start + matcher.table.window_size - current_len;
7036 let current_abs_end = current_abs_start + current_len;
7037 let dense_tail = HC_MIN_MATCH_LEN + INCOMPRESSIBLE_SKIP_STEP;
7038 let tail_start = current_abs_end
7039 .saturating_sub(dense_tail)
7040 .max(matcher.table.history_abs_start)
7041 .max(current_abs_start);
7042
7043 let overlap_pos = (tail_start..current_abs_end)
7044 .find(|&pos| (pos - current_abs_start).is_multiple_of(INCOMPRESSIBLE_SKIP_STEP))
7045 .expect("fixture should contain at least one sparse-grid overlap in dense tail");
7046
7047 let rel = matcher
7048 .table
7049 .relative_position(overlap_pos)
7050 .expect("overlap position should be representable as relative position");
7051 let chain_idx = rel as usize & ((1 << matcher.table.chain_log) - 1);
7052 assert_ne!(
7053 matcher.table.chain_table[chain_idx],
7054 rel + 1,
7055 "sparse-grid tail positions must not be reinserted (self-loop chain entry)"
7056 );
7057}
7058
7059#[test]
7060fn hc_compact_history_drains_when_threshold_crossed() {
7061 let mut hc = HcMatchGenerator::new(8);
7062 hc.table.history = b"abcdefghijklmnopqrstuvwxyz".to_vec();
7063 hc.table.history_start = 16;
7064 hc.table.compact_history();
7065 assert_eq!(hc.table.history_start, 0);
7066 assert_eq!(hc.table.history, b"qrstuvwxyz");
7067}
7068
7069#[test]
7070fn hc_insert_position_no_rebase_returns_when_relative_pos_unavailable() {
7071 let mut hc = HcMatchGenerator::new(32);
7072 hc.table.history = b"abcdefghijklmnop".to_vec();
7073 hc.table.history_abs_start = 0;
7074 hc.table.position_base = 1;
7075 hc.table.ensure_tables();
7076 let before_hash = hc.table.hash_table.clone();
7077 let before_chain = hc.table.chain_table.clone();
7078
7079 hc.table.insert_position_no_rebase(0);
7080
7081 assert_eq!(hc.table.hash_table, before_hash);
7082 assert_eq!(hc.table.chain_table, before_chain);
7083}
7084
7085#[test]
7086fn hc_insert_positions_advances_next_to_update3_for_contiguous_range() {
7087 let mut hc = HcMatchGenerator::new(64);
7088 hc.table.history = b"abcdefghijklmnopqrstuvwxyz".to_vec();
7089 hc.table.history_start = 0;
7090 hc.table.history_abs_start = 0;
7091 hc.table.position_base = 0;
7092 hc.table.ensure_tables();
7093 hc.table.next_to_update3 = 0;
7094
7095 hc.table.insert_positions(0, 9);
7096
7097 assert_eq!(
7098 hc.table.next_to_update3, 9,
7099 "contiguous insert_positions should advance hash3 update cursor"
7100 );
7101}
7102
7103#[test]
7104fn hc_insert_positions_with_step_keeps_next_to_update3_cursor_for_sparse_ranges() {
7105 let mut hc = HcMatchGenerator::new(64);
7106 hc.table.history = b"abcdefghijklmnopqrstuvwxyz".to_vec();
7107 hc.table.history_start = 0;
7108 hc.table.history_abs_start = 0;
7109 hc.table.position_base = 0;
7110 hc.table.ensure_tables();
7111 hc.table.next_to_update3 = 0;
7112
7113 hc.table.insert_positions_with_step(0, 16, 4);
7114
7115 assert_eq!(
7116 hc.table.next_to_update3, 0,
7117 "sparse insert_positions_with_step must not mark skipped positions as hash3-updated"
7118 );
7119}
7120
7121#[cfg(any())]
7122#[test]
7124fn prime_with_dictionary_budget_shrinks_after_simple_eviction() {
7125 let mut driver = MatchGeneratorDriver::new(8, 1);
7126 driver.reset(CompressionLevel::Fastest);
7127 driver.simple_mut().max_window_size = 8;
7130 driver.reported_window_size = 8;
7131
7132 let base_window = driver.simple_mut().max_window_size;
7133 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
7134 assert_eq!(driver.simple_mut().max_window_size, base_window + 24);
7135
7136 for block in [b"AAAAAAAA", b"BBBBBBBB"] {
7137 let mut space = driver.get_next_space();
7138 space.clear();
7139 space.extend_from_slice(block);
7140 driver.commit_space(space);
7141 driver.skip_matching_with_hint(None);
7142 }
7143
7144 assert_eq!(
7145 driver.dictionary_retained_budget, 0,
7146 "dictionary budget should be fully retired once primed dict slices are evicted"
7147 );
7148 assert_eq!(
7149 driver.simple_mut().max_window_size,
7150 base_window,
7151 "retired dictionary budget must not remain reusable for live history"
7152 );
7153}
7154
7155#[test]
7156fn prime_with_dictionary_budget_shrinks_after_dfast_eviction() {
7157 let mut driver = MatchGeneratorDriver::new(8, 1);
7158 driver.reset(CompressionLevel::Level(2));
7159 driver.dfast_matcher_mut().max_window_size = 8;
7162 driver.reported_window_size = 8;
7163
7164 let base_window = driver.dfast_matcher().max_window_size;
7165 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
7166 assert_eq!(driver.dfast_matcher().max_window_size, base_window + 24);
7167
7168 for block in [b"AAAAAAAA", b"BBBBBBBB"] {
7169 let mut space = driver.get_next_space();
7170 space.clear();
7171 space.extend_from_slice(block);
7172 driver.commit_space(space);
7173 driver.skip_matching_with_hint(None);
7174 }
7175
7176 assert_eq!(
7177 driver.dictionary_retained_budget, 0,
7178 "dictionary budget should be fully retired once primed dict slices are evicted"
7179 );
7180 assert_eq!(
7181 driver.dfast_matcher().max_window_size,
7182 base_window,
7183 "retired dictionary budget must not remain reusable for live history"
7184 );
7185}
7186
7187#[test]
7188fn hc_prime_with_dictionary_preserves_history_for_first_full_block() {
7189 let mut driver = MatchGeneratorDriver::new(8, 1);
7190 driver.reset(CompressionLevel::Better);
7191
7192 driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
7193
7194 let mut space = driver.get_next_space();
7195 space.clear();
7196 space.extend_from_slice(b"abcdefgh");
7199 driver.commit_space(space);
7200
7201 let mut saw_match = false;
7202 driver.start_matching(|seq| {
7203 if let Sequence::Triple {
7204 literals,
7205 offset,
7206 match_len,
7207 } = seq
7208 && literals.is_empty()
7209 && offset == 8
7210 && match_len >= HC_MIN_MATCH_LEN
7211 {
7212 saw_match = true;
7213 }
7214 });
7215
7216 assert!(
7217 saw_match,
7218 "hash-chain backend should match dictionary-primed history in first full block"
7219 );
7220}
7221
7222#[test]
7223fn prime_with_dictionary_budget_shrinks_after_hc_eviction() {
7224 let mut driver = MatchGeneratorDriver::new(8, 1);
7225 driver.reset(CompressionLevel::Better);
7226 driver.hc_matcher_mut().table.max_window_size = 8;
7228 driver.reported_window_size = 8;
7229
7230 let base_window = driver.hc_matcher().table.max_window_size;
7231 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
7232 assert_eq!(driver.hc_matcher().table.max_window_size, base_window + 24);
7233
7234 for block in [b"AAAAAAAA", b"BBBBBBBB"] {
7235 let mut space = driver.get_next_space();
7236 space.clear();
7237 space.extend_from_slice(block);
7238 driver.commit_space(space);
7239 driver.skip_matching_with_hint(None);
7240 }
7241
7242 assert_eq!(
7243 driver.dictionary_retained_budget, 0,
7244 "dictionary budget should be fully retired once primed dict slices are evicted"
7245 );
7246 assert_eq!(
7247 driver.hc_matcher().table.max_window_size,
7248 base_window,
7249 "retired dictionary budget must not remain reusable for live history"
7250 );
7251}
7252
7253#[test]
7254fn hc_rebases_positions_after_u32_boundary() {
7255 let mut matcher = HcMatchGenerator::new(64);
7256 matcher.table.add_data(b"abcdeabcdeabcde".to_vec(), |_| {});
7257 matcher.table.ensure_tables();
7258 matcher.table.position_base = 0;
7259 let history_abs_start: usize = match (u64::from(u32::MAX) + 64).try_into() {
7260 Ok(value) => value,
7261 Err(_) => return,
7262 };
7263 matcher.table.history_abs_start = history_abs_start;
7266 matcher.skip_matching(None);
7267 assert_eq!(
7268 matcher.table.position_base, matcher.table.history_abs_start,
7269 "rebase should anchor to the oldest live absolute position"
7270 );
7271
7272 assert!(
7273 matcher
7274 .table
7275 .hash_table
7276 .iter()
7277 .any(|entry| *entry != HC_EMPTY),
7278 "HC hash table should still be populated after crossing u32 boundary"
7279 );
7280
7281 let abs_pos = matcher.table.history_abs_start + 10;
7283 let candidates = matcher.hc.chain_candidates(&matcher.table, abs_pos);
7284 assert!(
7285 candidates.iter().any(|candidate| *candidate != usize::MAX),
7286 "chain_candidates should return valid matches after rebase"
7287 );
7288}
7289
7290#[test]
7291fn hc_rebase_rebuilds_only_inserted_prefix() {
7292 let mut matcher = HcMatchGenerator::new(64);
7293 matcher.table.add_data(b"abcdeabcdeabcde".to_vec(), |_| {});
7294 matcher.table.ensure_tables();
7295 matcher.table.position_base = 0;
7296 let history_abs_start: usize = match (u64::from(u32::MAX) + 64).try_into() {
7297 Ok(value) => value,
7298 Err(_) => return,
7299 };
7300 matcher.table.history_abs_start = history_abs_start;
7301 let abs_pos = matcher.table.history_abs_start + 6;
7302
7303 let mut expected = HcMatchGenerator::new(64);
7304 expected.table.add_data(b"abcdeabcdeabcde".to_vec(), |_| {});
7305 expected.table.ensure_tables();
7306 expected.table.history_abs_start = history_abs_start;
7307 expected.table.position_base = expected.table.history_abs_start;
7308 expected.table.hash_table.fill(HC_EMPTY);
7309 expected.table.chain_table.fill(HC_EMPTY);
7310 for pos in expected.table.history_abs_start..abs_pos {
7311 expected.table.insert_position_no_rebase(pos);
7312 }
7313
7314 matcher.table.maybe_rebase_positions(abs_pos);
7315
7316 assert_eq!(
7317 matcher.table.position_base, matcher.table.history_abs_start,
7318 "rebase should still anchor to the oldest live absolute position"
7319 );
7320 assert_eq!(
7321 matcher.table.hash_table, expected.table.hash_table,
7322 "rebase must rebuild only positions already inserted before abs_pos"
7323 );
7324 assert_eq!(
7325 matcher.table.chain_table, expected.table.chain_table,
7326 "future positions must not be pre-seeded into HC chains during rebase"
7327 );
7328}
7329
7330#[cfg(any())] #[test]
7332fn suffix_store_with_single_slot_does_not_panic_on_keying() {
7333 let mut suffixes = SuffixStore::with_capacity(1);
7334 suffixes.insert(b"abcde", 0);
7335 assert!(suffixes.contains_key(b"abcde"));
7336 assert_eq!(suffixes.get(b"abcde"), Some(0));
7337}
7338
7339#[cfg(any())]
7340#[test]
7342fn fastest_reset_uses_interleaved_hash_fill_step() {
7343 let mut driver = MatchGeneratorDriver::new(32, 2);
7344
7345 driver.reset(CompressionLevel::Uncompressed);
7346 assert_eq!(driver.simple().hash_fill_step, 1);
7347
7348 driver.reset(CompressionLevel::Fastest);
7349 assert_eq!(driver.simple().hash_fill_step, FAST_HASH_FILL_STEP);
7350
7351 driver.reset(CompressionLevel::Better);
7354 assert_eq!(
7355 driver.active_backend(),
7356 super::strategy::BackendTag::HashChain
7357 );
7358 assert_eq!(driver.window_size(), (1u64 << 23));
7359 assert_eq!(driver.hc_matcher().hc.lazy_depth, 2);
7360}
7361
7362#[cfg(any())] #[test]
7364fn simple_matcher_updates_offset_history_after_emitting_match() {
7365 let mut matcher = MatchGenerator::new(64);
7366 matcher.add_data(
7367 b"abcdeabcdeabcde".to_vec(),
7368 SuffixStore::with_capacity(64),
7369 |_, _| {},
7370 );
7371
7372 assert!(matcher.next_sequence(|seq| {
7373 assert_eq!(
7374 seq,
7375 Sequence::Triple {
7376 literals: b"abcde",
7377 offset: 5,
7378 match_len: 10,
7379 }
7380 );
7381 }));
7382 assert_eq!(matcher.offset_hist, [5, 1, 4]);
7383}
7384
7385#[cfg(any())] #[test]
7387fn simple_matcher_zero_literal_repcode_checks_rep1_before_hash_lookup() {
7388 let mut matcher = MatchGenerator::new(64);
7389 matcher.add_data(
7390 b"abcdefghijabcdefghij".to_vec(),
7391 SuffixStore::with_capacity(64),
7392 |_, _| {},
7393 );
7394
7395 matcher.suffix_idx = 10;
7396 matcher.last_idx_in_sequence = 10;
7397 matcher.offset_hist = [99, 10, 4];
7398
7399 let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data[10..], 0);
7400 assert_eq!(candidate, Some((10, 10)));
7401}
7402
7403#[cfg(any())] #[test]
7405fn simple_matcher_repcode_can_target_previous_window_entry() {
7406 let mut matcher = MatchGenerator::new(64);
7407 matcher.add_data(
7408 b"abcdefghij".to_vec(),
7409 SuffixStore::with_capacity(64),
7410 |_, _| {},
7411 );
7412 matcher.skip_matching();
7413 matcher.add_data(
7414 b"abcdefghij".to_vec(),
7415 SuffixStore::with_capacity(64),
7416 |_, _| {},
7417 );
7418
7419 matcher.offset_hist = [99, 10, 4];
7420
7421 let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data, 0);
7422 assert_eq!(candidate, Some((10, 10)));
7423}
7424
7425#[cfg(any())] #[test]
7427fn simple_matcher_zero_literal_repcode_checks_rep2() {
7428 let mut matcher = MatchGenerator::new(64);
7429 matcher.add_data(
7430 b"abcdefghijabcdefghij".to_vec(),
7431 SuffixStore::with_capacity(64),
7432 |_, _| {},
7433 );
7434 matcher.suffix_idx = 10;
7435 matcher.last_idx_in_sequence = 10;
7436 matcher.offset_hist = [99, 4, 10];
7438
7439 let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data[10..], 0);
7440 assert_eq!(candidate, Some((10, 10)));
7441}
7442
7443#[cfg(any())] #[test]
7445fn simple_matcher_zero_literal_repcode_checks_rep0_minus1() {
7446 let mut matcher = MatchGenerator::new(64);
7447 matcher.add_data(
7448 b"abcdefghijabcdefghij".to_vec(),
7449 SuffixStore::with_capacity(64),
7450 |_, _| {},
7451 );
7452 matcher.suffix_idx = 10;
7453 matcher.last_idx_in_sequence = 10;
7454 matcher.offset_hist = [11, 4, 99];
7456
7457 let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data[10..], 0);
7458 assert_eq!(candidate, Some((10, 10)));
7459}
7460
7461#[cfg(any())] #[test]
7463fn simple_matcher_repcode_rejects_offsets_beyond_searchable_prefix() {
7464 let mut matcher = MatchGenerator::new(64);
7465 matcher.add_data(
7466 b"abcdefghij".to_vec(),
7467 SuffixStore::with_capacity(64),
7468 |_, _| {},
7469 );
7470 matcher.skip_matching();
7471 matcher.add_data(
7472 b"klmnopqrst".to_vec(),
7473 SuffixStore::with_capacity(64),
7474 |_, _| {},
7475 );
7476 matcher.suffix_idx = 3;
7477
7478 let candidate = matcher.offset_match_len(14, &matcher.window.last().unwrap().data[3..]);
7479 assert_eq!(candidate, None);
7480}
7481
7482#[cfg(any())] #[test]
7484fn simple_matcher_skip_matching_seeds_every_position_even_with_fast_step() {
7485 let mut matcher = MatchGenerator::new(64);
7486 matcher.hash_fill_step = FAST_HASH_FILL_STEP;
7487 matcher.add_data(
7488 b"abcdefghijklmnop".to_vec(),
7489 SuffixStore::with_capacity(64),
7490 |_, _| {},
7491 );
7492 matcher.skip_matching();
7493 matcher.add_data(b"bcdef".to_vec(), SuffixStore::with_capacity(64), |_, _| {});
7494
7495 assert!(matcher.next_sequence(|seq| {
7496 assert_eq!(
7497 seq,
7498 Sequence::Triple {
7499 literals: b"",
7500 offset: 15,
7501 match_len: 5,
7502 }
7503 );
7504 }));
7505 assert!(!matcher.next_sequence(|_| {}));
7506}
7507
7508#[cfg(any())] #[test]
7510fn simple_matcher_skip_matching_with_incompressible_hint_uses_sparse_prefix() {
7511 let mut matcher = MatchGenerator::new(128);
7512 let first = b"abcdefghijklmnopqrstuvwxyz012345".to_vec();
7513 let sparse_probe = first[3..3 + MIN_MATCH_LEN].to_vec();
7514 let tail_start = first.len() - MIN_MATCH_LEN;
7515 let tail_probe = first[tail_start..tail_start + MIN_MATCH_LEN].to_vec();
7516 matcher.add_data(first, SuffixStore::with_capacity(256), |_, _| {});
7517
7518 matcher.skip_matching_with_hint(Some(true));
7519
7520 matcher.add_data(sparse_probe, SuffixStore::with_capacity(256), |_, _| {});
7522 let mut sparse_first_is_literals = None;
7523 assert!(matcher.next_sequence(|seq| {
7524 if sparse_first_is_literals.is_none() {
7525 sparse_first_is_literals = Some(matches!(seq, Sequence::Literals { .. }));
7526 }
7527 }));
7528 assert!(
7529 sparse_first_is_literals.unwrap_or(false),
7530 "sparse-start probe should not produce an immediate match"
7531 );
7532
7533 let mut matcher = MatchGenerator::new(128);
7535 matcher.add_data(
7536 b"abcdefghijklmnopqrstuvwxyz012345".to_vec(),
7537 SuffixStore::with_capacity(256),
7538 |_, _| {},
7539 );
7540 matcher.skip_matching_with_hint(Some(true));
7541 matcher.add_data(tail_probe, SuffixStore::with_capacity(256), |_, _| {});
7542 let mut tail_first_is_immediate_match = None;
7543 assert!(matcher.next_sequence(|seq| {
7544 if tail_first_is_immediate_match.is_none() {
7545 tail_first_is_immediate_match =
7546 Some(matches!(seq, Sequence::Triple { literals, .. } if literals.is_empty()));
7547 }
7548 }));
7549 assert!(
7550 tail_first_is_immediate_match.unwrap_or(false),
7551 "dense tail probe should match immediately at block start"
7552 );
7553}
7554
7555#[cfg(any())] #[test]
7557fn simple_matcher_add_suffixes_till_backfills_last_searchable_anchor() {
7558 let mut matcher = MatchGenerator::new(64);
7559 matcher.hash_fill_step = FAST_HASH_FILL_STEP;
7560 matcher.add_data(
7561 b"01234abcde".to_vec(),
7562 SuffixStore::with_capacity(64),
7563 |_, _| {},
7564 );
7565 matcher.add_suffixes_till(10, FAST_HASH_FILL_STEP);
7566
7567 let last = matcher.window.last().unwrap();
7568 let tail = &last.data[5..10];
7569 assert_eq!(last.suffixes.get(tail), Some(5));
7570}
7571
7572#[cfg(any())] #[test]
7574fn simple_matcher_add_suffixes_till_skips_when_idx_below_min_match_len() {
7575 let mut matcher = MatchGenerator::new(128);
7576 matcher.hash_fill_step = FAST_HASH_FILL_STEP;
7577 matcher.add_data(
7578 b"abcdefghijklmnopqrstuvwxyz".to_vec(),
7579 SuffixStore::with_capacity(1 << 16),
7580 |_, _| {},
7581 );
7582
7583 matcher.add_suffixes_till(MIN_MATCH_LEN - 1, FAST_HASH_FILL_STEP);
7584
7585 let last = matcher.window.last().unwrap();
7586 let first_key = &last.data[..MIN_MATCH_LEN];
7587 assert_eq!(last.suffixes.get(first_key), None);
7588}
7589
7590#[cfg(any())] #[test]
7592fn simple_matcher_add_suffixes_till_fast_step_registers_interleaved_positions() {
7593 let mut matcher = MatchGenerator::new(128);
7594 matcher.hash_fill_step = FAST_HASH_FILL_STEP;
7595 matcher.add_data(
7596 b"abcdefghijklmnopqrstuvwxyz".to_vec(),
7597 SuffixStore::with_capacity(1 << 16),
7598 |_, _| {},
7599 );
7600
7601 matcher.add_suffixes_till(17, FAST_HASH_FILL_STEP);
7602
7603 let last = matcher.window.last().unwrap();
7604 for pos in [0usize, 3, 6, 9, 12] {
7605 let key = &last.data[pos..pos + MIN_MATCH_LEN];
7606 assert_eq!(
7607 last.suffixes.get(key),
7608 Some(pos),
7609 "expected interleaved suffix registration at pos {pos}"
7610 );
7611 }
7612}
7613
7614#[test]
7615fn dfast_skip_matching_handles_window_eviction() {
7616 let mut matcher = DfastMatchGenerator::new(16);
7617
7618 matcher.add_data(alloc::vec![1, 2, 3, 4, 5, 6], |_| {});
7619 matcher.skip_matching(None);
7620 matcher.add_data(alloc::vec![7, 8, 9, 10, 11, 12], |_| {});
7621 matcher.skip_matching(None);
7622 matcher.add_data(alloc::vec![7, 8, 9, 10, 11, 12], |_| {});
7623
7624 let mut reconstructed = alloc::vec![7, 8, 9, 10, 11, 12];
7625 matcher.start_matching(|seq| match seq {
7626 Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
7627 Sequence::Triple {
7628 literals,
7629 offset,
7630 match_len,
7631 } => {
7632 reconstructed.extend_from_slice(literals);
7633 let start = reconstructed.len() - offset;
7634 for i in 0..match_len {
7635 let byte = reconstructed[start + i];
7636 reconstructed.push(byte);
7637 }
7638 }
7639 });
7640
7641 assert_eq!(reconstructed, [7, 8, 9, 10, 11, 12, 7, 8, 9, 10, 11, 12]);
7642}
7643
7644#[test]
7645fn dfast_add_data_callback_reports_evicted_len_not_capacity() {
7646 let mut matcher = DfastMatchGenerator::new(8);
7647
7648 let mut first = Vec::with_capacity(64);
7649 first.extend_from_slice(b"abcdefgh");
7650 matcher.add_data(first, |_| {});
7651
7652 let mut second = Vec::with_capacity(64);
7653 second.extend_from_slice(b"ijklmnop");
7654
7655 let mut observed_evicted_len = None;
7656 matcher.add_data(second, |data| {
7657 observed_evicted_len = Some(data.len());
7658 });
7659
7660 assert_eq!(
7661 observed_evicted_len,
7662 Some(8),
7663 "eviction callback must report evicted byte length, not backing capacity"
7664 );
7665}
7666
7667#[test]
7731fn dfast_commit_space_eviction_uses_window_size_delta() {
7732 use crate::encoding::CompressionLevel;
7733
7734 let mut driver = MatchGeneratorDriver::new(10, 1);
7735 driver.reset(CompressionLevel::Level(2));
7736 assert!(matches!(driver.storage, MatcherStorage::Dfast(_)));
7737
7738 driver.dfast_matcher_mut().max_window_size = 10;
7743 driver.dictionary_retained_budget = 100;
7744
7745 let mut space1 = Vec::with_capacity(64);
7746 space1.extend_from_slice(b"abcd");
7747 driver.commit_space(space1);
7748 assert_eq!(
7749 driver.dictionary_retained_budget, 100,
7750 "1st commit fills window 0 → 4, no eviction, no retire"
7751 );
7752
7753 let mut space2 = Vec::with_capacity(64);
7754 space2.extend_from_slice(b"efgh");
7755 driver.commit_space(space2);
7756 assert_eq!(
7757 driver.dictionary_retained_budget, 100,
7758 "2nd commit fills window 4 → 8, no eviction, no retire"
7759 );
7760
7761 let mut space3 = Vec::with_capacity(64);
7762 space3.extend_from_slice(b"ijklm");
7763 driver.commit_space(space3);
7764 assert_eq!(
7765 driver.dictionary_retained_budget, 87,
7766 "3rd commit + trim_after_budget_retire cascade. With the fix \
7767 (evicted=4 from window_size delta) the cascade reclaims 100 \
7768 → 96 → 92 → 87. With the bug (evicted=5 from data.len()) the \
7769 3rd commit would panic on `data.len() <= max_window_size` \
7770 after the 2nd commit's cascade had already shrunk \
7771 max_window_size to 0."
7772 );
7773 assert_eq!(
7774 driver.dfast_matcher_mut().max_window_size,
7775 0,
7776 "cascade drains max_window_size to 0 once budget reclaim \
7777 exceeds the initial window size"
7778 );
7779}
7780
7781#[test]
7782fn dfast_trim_to_window_evicts_oldest_block_by_length() {
7783 let mut matcher = DfastMatchGenerator::new(16);
7792
7793 let mut first = Vec::with_capacity(64);
7794 first.extend_from_slice(b"abcdefgh");
7795 matcher.add_data(first, |_| {});
7796
7797 let mut second = Vec::with_capacity(64);
7798 second.extend_from_slice(b"ijklmnop");
7799 matcher.add_data(second, |_| {});
7800
7801 assert_eq!(matcher.window_size, 16);
7802 assert_eq!(matcher.window_blocks.len(), 2);
7803
7804 matcher.max_window_size = 8;
7805
7806 matcher.trim_to_window();
7807
7808 assert_eq!(
7816 matcher.window_size, 8,
7817 "exactly one 8-byte block must remain"
7818 );
7819 assert_eq!(matcher.window_blocks.len(), 1);
7820 assert_eq!(matcher.history_abs_start, 8);
7821}
7822
7823#[test]
7824fn dfast_inserts_tail_positions_for_next_block_matching() {
7825 let mut matcher = DfastMatchGenerator::new(1 << 22);
7826
7827 matcher.add_data(b"012345bcdea".to_vec(), |_| {});
7828 let mut history = Vec::new();
7829 matcher.start_matching(|seq| match seq {
7830 Sequence::Literals { literals } => history.extend_from_slice(literals),
7831 Sequence::Triple { .. } => unreachable!("first block should not match history"),
7832 });
7833 assert_eq!(history, b"012345bcdea");
7834
7835 matcher.add_data(b"bcdeabcdeab".to_vec(), |_| {});
7836 let mut saw_first_sequence = false;
7837 matcher.start_matching(|seq| {
7838 assert!(!saw_first_sequence, "expected a single cross-block match");
7839 saw_first_sequence = true;
7840 match seq {
7841 Sequence::Literals { .. } => {
7842 panic!("expected tail-anchored cross-block match before any literals")
7843 }
7844 Sequence::Triple {
7845 literals,
7846 offset,
7847 match_len,
7848 } => {
7849 assert_eq!(literals, b"");
7850 assert_eq!(offset, 5);
7851 assert_eq!(match_len, 11);
7852 let start = history.len() - offset;
7853 for i in 0..match_len {
7854 let byte = history[start + i];
7855 history.push(byte);
7856 }
7857 }
7858 }
7859 });
7860
7861 assert!(
7862 saw_first_sequence,
7863 "expected tail-anchored cross-block match"
7864 );
7865 assert_eq!(history, b"012345bcdeabcdeabcdeab");
7866}
7867
7868#[test]
7895fn hashchain_inserts_tail_positions_for_next_block_matching() {
7896 let mut matcher = HcMatchGenerator::new(1 << 22);
7897 matcher.configure(HC_CONFIG, super::strategy::StrategyTag::Lazy, 22);
7898
7899 matcher.table.add_data(b"PQRSTBCD".to_vec(), |_| {});
7900 let mut history = alloc::vec::Vec::new();
7901 matcher.start_matching(|seq| match seq {
7902 Sequence::Literals { literals } => history.extend_from_slice(literals),
7903 Sequence::Triple { .. } => unreachable!("first block has no internal repeats"),
7904 });
7905 assert_eq!(history, b"PQRSTBCD");
7906
7907 matcher.table.add_data(b"BCDBCDBCDB".to_vec(), |_| {});
7908 let mut first_sequence_offset: Option<usize> = None;
7909 let mut first_sequence_match_len: Option<usize> = None;
7910 matcher.start_matching(|seq| {
7911 if first_sequence_offset.is_some() {
7912 return;
7913 }
7914 match seq {
7915 Sequence::Literals { .. } => {
7916 panic!(
7917 "expected tail-anchored cross-block match before any literals — \
7918 backfill_boundary_positions did not seed positions 5/6/7"
7919 )
7920 }
7921 Sequence::Triple {
7922 literals,
7923 offset,
7924 match_len,
7925 } => {
7926 assert_eq!(literals, b"", "no leading literals on the boundary match");
7927 first_sequence_offset = Some(offset);
7928 first_sequence_match_len = Some(match_len);
7929 }
7930 }
7931 });
7932
7933 let offset = first_sequence_offset.expect(
7934 "expected tail-anchored cross-block match emitted from backfill_boundary_positions",
7935 );
7936 assert!(
7937 (1..=3).contains(&offset),
7938 "boundary match offset {offset} must point into the unhashable tail \
7939 (positions 5/6/7 of an 8-byte block 1) so the test specifically \
7940 locks down backfill_boundary_positions",
7941 );
7942 assert_eq!(
7943 offset, 3,
7944 "candidate position must land at 5 (= block_1_len - 3) so the 4-byte \
7945 window `data[5..9] = b\"BCDB\"` matches block 2's first hash lookup",
7946 );
7947 let match_len = first_sequence_match_len.unwrap();
7948 assert!(
7949 match_len >= HC_MIN_MATCH_LEN,
7950 "match_len {match_len} must clear the HC min-match floor",
7951 );
7952}
7953
7954#[test]
7955fn dfast_dense_skip_matching_backfills_previous_tail_for_next_block() {
7956 let mut matcher = DfastMatchGenerator::new(1 << 22);
7957 let tail = b"Qz9kLm2Rp";
7958 let mut first = b"0123456789abcdef".to_vec();
7959 first.extend_from_slice(tail);
7960 matcher.add_data(first.clone(), |_| {});
7961 matcher.skip_matching(Some(false));
7962
7963 let mut second = tail.to_vec();
7964 second.extend_from_slice(b"after-tail-literals");
7965 matcher.add_data(second, |_| {});
7966
7967 let mut first_sequence = None;
7968 matcher.start_matching(|seq| {
7969 if first_sequence.is_some() {
7970 return;
7971 }
7972 first_sequence = Some(match seq {
7973 Sequence::Literals { literals } => (literals.len(), 0usize, 0usize),
7974 Sequence::Triple {
7975 literals,
7976 offset,
7977 match_len,
7978 } => (literals.len(), offset, match_len),
7979 });
7980 });
7981
7982 let (lit_len, offset, match_len) = first_sequence.expect("expected at least one sequence");
7983 assert_eq!(
7984 lit_len, 0,
7985 "expected immediate cross-block match at block start"
7986 );
7987 assert_eq!(
7988 offset,
7989 tail.len(),
7990 "expected dense skip to preserve cross-boundary tail match"
7991 );
7992 assert!(
7993 match_len >= DFAST_MIN_MATCH_LEN,
7994 "match length should satisfy dfast minimum match length"
7995 );
7996}
7997
7998#[test]
7999fn dfast_sparse_skip_matching_preserves_tail_cross_block_match() {
8000 let mut matcher = DfastMatchGenerator::new(1 << 22);
8001 let tail = b"Qz9kLm2Rp";
8002 let mut first = deterministic_high_entropy_bytes(0x9E37_79B9_7F4A_7C15, 4096);
8003 let tail_start = first.len() - tail.len();
8004 first[tail_start..].copy_from_slice(tail);
8005 matcher.add_data(first.clone(), |_| {});
8006
8007 matcher.skip_matching(Some(true));
8008
8009 let mut second = tail.to_vec();
8010 second.extend_from_slice(b"after-tail-literals");
8011 matcher.add_data(second, |_| {});
8012
8013 let mut first_sequence = None;
8014 matcher.start_matching(|seq| {
8015 if first_sequence.is_some() {
8016 return;
8017 }
8018 first_sequence = Some(match seq {
8019 Sequence::Literals { literals } => (literals.len(), 0usize, 0usize),
8020 Sequence::Triple {
8021 literals,
8022 offset,
8023 match_len,
8024 } => (literals.len(), offset, match_len),
8025 });
8026 });
8027
8028 let (lit_len, offset, match_len) = first_sequence.expect("expected at least one sequence");
8029 assert_eq!(
8030 lit_len, 0,
8031 "expected immediate cross-block match at block start"
8032 );
8033 assert_eq!(
8034 offset,
8035 tail.len(),
8036 "expected match against densely seeded tail"
8037 );
8038 assert!(
8039 match_len >= DFAST_MIN_MATCH_LEN,
8040 "match length should satisfy dfast minimum match length"
8041 );
8042}
8043
8044#[test]
8045fn dfast_skip_matching_dense_backfills_newly_hashable_long_tail_positions() {
8046 let mut matcher = DfastMatchGenerator::new(1 << 22);
8047 let first = deterministic_high_entropy_bytes(0x7A64_0315_D4E1_91C3, 4096);
8048 let first_len = first.len();
8049 matcher.add_data(first, |_| {});
8050 matcher.skip_matching_dense();
8051
8052 matcher.add_data(alloc::vec![0xAB], |_| {});
8055 matcher.skip_matching_dense();
8056
8057 let target_abs_pos = first_len - 7;
8058 let target_rel = target_abs_pos - matcher.history_abs_start;
8059 let live = matcher.live_history();
8060 assert!(
8061 target_rel + 8 <= live.len(),
8062 "fixture must make the boundary start long-hashable"
8063 );
8064 let long_hash = matcher.long_hash_index(&live[target_rel..]);
8065 let target_slot = matcher.pack_slot(target_abs_pos);
8066 assert_ne!(
8069 target_slot, DFAST_EMPTY_SLOT,
8070 "pack_slot must never return the empty-slot sentinel for a real position"
8071 );
8072 assert_eq!(
8073 matcher.long_hash[long_hash], target_slot,
8074 "dense skip must seed long-hash entry for newly hashable boundary start"
8075 );
8076}
8077
8078#[test]
8079fn dfast_seed_remaining_hashable_starts_seeds_last_short_hash_positions() {
8080 let mut matcher = DfastMatchGenerator::new(1 << 20);
8081 let block = deterministic_high_entropy_bytes(0x13F0_9A6D_55CE_7B21, 64);
8082 matcher.add_data(block, |_| {});
8083 matcher.ensure_hash_tables();
8084
8085 let current_len = matcher.window_blocks.back().copied().unwrap_or(0);
8086 let current_abs_start = matcher.history_abs_start + matcher.window_size - current_len;
8087 let seed_start = current_len - DFAST_MIN_MATCH_LEN;
8088 matcher.seed_remaining_hashable_starts(current_abs_start, current_len, seed_start);
8089
8090 let target_abs_pos = current_abs_start + current_len - 4;
8091 let target_rel = target_abs_pos - matcher.history_abs_start;
8092 let live = matcher.live_history();
8093 assert!(
8094 target_rel + 4 <= live.len(),
8095 "fixture must leave the last short-hash start valid"
8096 );
8097 let short_hash = matcher.short_hash_index(&live[target_rel..]);
8098 let target_slot = matcher.pack_slot(target_abs_pos);
8099 assert_ne!(
8100 target_slot, DFAST_EMPTY_SLOT,
8101 "pack_slot must never return the empty-slot sentinel for a real position"
8102 );
8103 assert_eq!(
8104 matcher.short_hash[short_hash], target_slot,
8105 "tail seeding must include the last 4-byte-hashable start"
8106 );
8107}
8108
8109#[test]
8110fn dfast_seed_remaining_hashable_starts_handles_pos_at_block_end() {
8111 let mut matcher = DfastMatchGenerator::new(1 << 20);
8112 let block = deterministic_high_entropy_bytes(0x7BB2_DA91_441E_C0EF, 64);
8113 matcher.add_data(block, |_| {});
8114 matcher.ensure_hash_tables();
8115
8116 let current_len = matcher.window_blocks.back().copied().unwrap_or(0);
8117 let current_abs_start = matcher.history_abs_start + matcher.window_size - current_len;
8118 matcher.seed_remaining_hashable_starts(current_abs_start, current_len, current_len);
8119
8120 let target_abs_pos = current_abs_start + current_len - 4;
8121 let target_rel = target_abs_pos - matcher.history_abs_start;
8122 let live = matcher.live_history();
8123 assert!(
8124 target_rel + 4 <= live.len(),
8125 "fixture must leave the last short-hash start valid"
8126 );
8127 let short_hash = matcher.short_hash_index(&live[target_rel..]);
8128 let target_slot = matcher.pack_slot(target_abs_pos);
8129 assert_ne!(
8130 target_slot, DFAST_EMPTY_SLOT,
8131 "pack_slot must never return the empty-slot sentinel for a real position"
8132 );
8133 assert_eq!(
8134 matcher.short_hash[short_hash], target_slot,
8135 "tail seeding must still include the last 4-byte-hashable start when pos is at block end"
8136 );
8137}
8138
8139#[test]
8155fn dfast_ensure_room_for_rebases_above_guard_band() {
8156 let mut dfast = DfastMatchGenerator::new(1 << 22);
8157 dfast.set_hash_bits(10);
8158 dfast.ensure_hash_tables();
8159
8160 let early_abs = 1024usize;
8168 let early_packed = dfast.pack_slot(early_abs);
8169 assert_ne!(early_packed, DFAST_EMPTY_SLOT);
8170 dfast.short_hash[0] = early_packed;
8171 dfast.long_hash[0] = early_packed;
8172
8173 let trigger_abs = (u32::MAX as usize) - (DFAST_REBASE_GUARD_BAND as usize) + 1;
8179 assert_eq!(dfast.position_base, 0);
8180 dfast.ensure_room_for(trigger_abs);
8181 assert_eq!(
8182 dfast.position_base, DFAST_REBASE_GUARD_BAND as usize,
8183 "rebase must advance position_base by DFAST_REBASE_GUARD_BAND"
8184 );
8185
8186 assert_eq!(
8192 dfast.short_hash[0], DFAST_EMPTY_SLOT,
8193 "pre-rebase short-hash entries below the reducer must become empty"
8194 );
8195 assert_eq!(
8196 dfast.long_hash[0], DFAST_EMPTY_SLOT,
8197 "pre-rebase long-hash entries below the reducer must become empty"
8198 );
8199
8200 let post_packed = dfast.pack_slot(trigger_abs);
8204 assert_ne!(post_packed, DFAST_EMPTY_SLOT);
8205 let unpacked = dfast.position_base + (post_packed as usize) - 1;
8206 assert_eq!(
8207 unpacked, trigger_abs,
8208 "post-rebase pack/unpack must round-trip the absolute position"
8209 );
8210}
8211
8212#[test]
8213fn dfast_sparse_skip_matching_backfills_previous_tail_for_consecutive_sparse_blocks() {
8214 let mut matcher = DfastMatchGenerator::new(1 << 22);
8215 let boundary_prefix = [0xFA, 0xFB, 0xFC];
8216 let boundary_suffix = [0xFD, 0xEE, 0xAD, 0xBE, 0xEF, 0x11, 0x22, 0x33];
8217
8218 let mut first = deterministic_high_entropy_bytes(0xA5A5_5A5A_C3C3_3C3C, 4096);
8219 let first_tail_start = first.len() - boundary_prefix.len();
8220 first[first_tail_start..].copy_from_slice(&boundary_prefix);
8221 matcher.add_data(first, |_| {});
8222 matcher.skip_matching(Some(true));
8223
8224 let mut second = deterministic_high_entropy_bytes(0xA5A5_5A5A_C3C3_3C3C, 4096);
8225 second[..boundary_suffix.len()].copy_from_slice(&boundary_suffix);
8226 matcher.add_data(second.clone(), |_| {});
8227 matcher.skip_matching(Some(true));
8228
8229 let mut third = boundary_prefix.to_vec();
8230 third.extend_from_slice(&boundary_suffix);
8231 third.extend_from_slice(b"-trailing-literals");
8232 matcher.add_data(third, |_| {});
8233
8234 let mut first_sequence = None;
8235 matcher.start_matching(|seq| {
8236 if first_sequence.is_some() {
8237 return;
8238 }
8239 first_sequence = Some(match seq {
8240 Sequence::Literals { literals } => (literals.len(), 0usize, 0usize),
8241 Sequence::Triple {
8242 literals,
8243 offset,
8244 match_len,
8245 } => (literals.len(), offset, match_len),
8246 });
8247 });
8248
8249 let (lit_len, offset, match_len) = first_sequence.expect("expected at least one sequence");
8250 assert_eq!(
8251 lit_len, 0,
8252 "expected immediate match from the prior sparse-skip boundary"
8253 );
8254 assert_eq!(
8255 offset,
8256 second.len() + boundary_prefix.len(),
8257 "expected match against backfilled first→second boundary start"
8258 );
8259 assert!(
8260 match_len >= DFAST_MIN_MATCH_LEN,
8261 "match length should satisfy dfast minimum match length"
8262 );
8263}
8264
8265#[test]
8266fn fastest_hint_iteration_23_sequences_reconstruct_source() {
8267 fn generate_data(seed: u64, len: usize) -> Vec<u8> {
8268 let mut state = seed;
8269 let mut data = Vec::with_capacity(len);
8270 for _ in 0..len {
8271 state = state
8272 .wrapping_mul(6364136223846793005)
8273 .wrapping_add(1442695040888963407);
8274 data.push((state >> 33) as u8);
8275 }
8276 data
8277 }
8278
8279 let i = 23u64;
8280 let len = (i * 89 % 16384) as usize;
8281 let mut data = generate_data(i, len);
8282 let repeat = data[128..256].to_vec();
8285 data.extend_from_slice(&repeat);
8286 data.extend_from_slice(&repeat);
8287
8288 let mut driver = MatchGeneratorDriver::new(1024 * 128, 1);
8289 driver.set_source_size_hint(data.len() as u64);
8290 driver.reset(CompressionLevel::Fastest);
8291 let mut space = driver.get_next_space();
8292 space[..data.len()].copy_from_slice(&data);
8293 space.truncate(data.len());
8294 driver.commit_space(space);
8295
8296 let mut rebuilt = Vec::with_capacity(data.len());
8297 let mut saw_triple = false;
8298 driver.start_matching(|seq| match seq {
8299 Sequence::Literals { literals } => rebuilt.extend_from_slice(literals),
8300 Sequence::Triple {
8301 literals,
8302 offset,
8303 match_len,
8304 } => {
8305 saw_triple = true;
8306 rebuilt.extend_from_slice(literals);
8307 assert!(offset > 0, "offset must be non-zero");
8308 assert!(
8309 offset <= rebuilt.len(),
8310 "offset must reference already-produced bytes: offset={} produced={}",
8311 offset,
8312 rebuilt.len()
8313 );
8314 let start = rebuilt.len() - offset;
8315 for idx in 0..match_len {
8316 let b = rebuilt[start + idx];
8317 rebuilt.push(b);
8318 }
8319 }
8320 });
8321
8322 let _ = saw_triple;
8332 assert_eq!(rebuilt, data);
8333}
8334
8335#[test]
8336fn fast_levels_dispatch_per_level_hash_log_and_mls() {
8337 let p1 = resolve_level_params(CompressionLevel::Level(1), None);
8340 assert_eq!(p1.fast_hash_log, 14);
8341 assert_eq!(p1.fast_mls, 7);
8342 assert_eq!(p1.fast_step_size, 2);
8343
8344 for n in -7..=-1 {
8351 let p = resolve_level_params(CompressionLevel::Level(n), None);
8352 assert_eq!(p.fast_hash_log, 14, "Level({n}) fast_hash_log");
8353 assert_eq!(p.fast_mls, 6, "Level({n}) fast_mls");
8354 let expected_step = ((-n) as usize) + 1;
8355 assert_eq!(p.fast_step_size, expected_step, "Level({n}) fast_step_size");
8356 }
8357
8358 let pf = resolve_level_params(CompressionLevel::Fastest, None);
8361 assert_eq!(
8362 (
8363 pf.window_log,
8364 pf.fast_hash_log,
8365 pf.fast_mls,
8366 pf.fast_step_size
8367 ),
8368 (19, 14, 6, 2),
8369 );
8370 let pu = resolve_level_params(CompressionLevel::Uncompressed, None);
8373 assert_eq!(
8374 (
8375 pu.window_log,
8376 pu.fast_hash_log,
8377 pu.fast_mls,
8378 pu.fast_step_size
8379 ),
8380 (17, 14, 6, 2),
8381 );
8382}
8383
8384#[test]
8392fn fast_levels_driver_wiring_threads_cparams_into_inner_matcher() {
8393 let mut driver = MatchGeneratorDriver::new(64 * 1024, 1);
8394
8395 let fast_levels = [
8396 CompressionLevel::Level(1),
8397 CompressionLevel::Fastest,
8398 CompressionLevel::Uncompressed,
8399 CompressionLevel::Level(-1),
8400 CompressionLevel::Level(-2),
8401 CompressionLevel::Level(-3),
8402 CompressionLevel::Level(-4),
8403 CompressionLevel::Level(-5),
8404 CompressionLevel::Level(-6),
8405 CompressionLevel::Level(-7),
8406 ];
8407
8408 for &level in &fast_levels {
8409 let p = resolve_level_params(level, None);
8410 assert_eq!(
8414 p.strategy_tag,
8415 super::strategy::StrategyTag::Fast,
8416 "{level:?} must resolve to Fast strategy",
8417 );
8418
8419 crate::encoding::Matcher::reset(&mut driver, CompressionLevel::Default);
8429
8430 crate::encoding::Matcher::reset(&mut driver, level);
8433
8434 let m = driver.simple_mut();
8435 assert_eq!(
8436 m.hash_log(),
8437 p.fast_hash_log,
8438 "{level:?}: inner matcher hash_log mismatch — argument swap?",
8439 );
8440 assert_eq!(
8441 m.mls(),
8442 p.fast_mls,
8443 "{level:?}: inner matcher mls mismatch — argument swap?",
8444 );
8445 assert_eq!(
8446 m.step_size(),
8447 p.fast_step_size,
8448 "{level:?}: inner matcher step_size mismatch — stale value carried from prior reset?",
8449 );
8450 }
8451}