1use alloc::vec::Vec;
9use super::BETTER_WINDOW_LOG;
13use super::CompressionLevel;
14use super::Matcher;
15use super::Sequence;
16use super::blocks::encode_offset_with_history;
17use super::bt::BtMatcher;
18#[cfg(test)]
19use super::cost_model::HC_MAX_LIT;
20use super::cost_model::{
21 HC_BITCOST_MULTIPLIER, HC_FORMAT_MINMATCH, HC_OPT_NUM, HC_PREDEF_THRESHOLD, HcOptState,
22 HcOptimalCostProfile,
23};
24#[cfg(test)]
25use super::cost_model::{HC_BLOCKSIZE_MAX, HC_MAX_LL, HC_MAX_ML, HC_MAX_OFF, HcOptPriceType};
26use super::dfast::DfastMatchGenerator;
27#[cfg(test)]
33use super::match_table::helpers::INCOMPRESSIBLE_SKIP_STEP;
34use super::match_table::helpers::MIN_MATCH_LEN;
35#[cfg(test)]
36use super::match_table::helpers::common_prefix_len;
37#[cfg(test)]
38use super::opt::ldm::HcRawSeq;
39use super::opt::ldm::{HcOptLdmState, HcRawSeqStore};
40use super::opt::types::{
41 HcCandidateQuery, HcOptimalNode, HcOptimalPlanBuffers, HcOptimalPlanState, HcOptimalSequence,
42 MatchCandidate,
43};
44use super::row::RowMatchGenerator;
45use super::simple::fast_matcher::{FAST_LEVEL_1_HASH_LOG, FAST_LEVEL_1_MLS, FastKernelMatcher};
46#[cfg(all(
47 test,
48 feature = "std",
49 target_arch = "aarch64",
50 target_endian = "little"
51))]
52use std::arch::is_aarch64_feature_detected;
53#[cfg(all(test, feature = "std", target_arch = "x86_64"))]
54use std::arch::is_x86_feature_detected;
55
56pub(crate) const DFAST_MIN_MATCH_LEN: usize = 5;
57pub(crate) const DFAST_SHORT_HASH_LOOKAHEAD: usize = 4;
58pub(crate) const ROW_MIN_MATCH_LEN: usize = 6;
59pub(crate) const DFAST_TARGET_LEN: usize = 48;
60pub(crate) const DFAST_HASH_BITS: usize = 17;
83pub(crate) const DFAST_SHORT_HASH_BITS_DELTA: usize = 1;
89pub(crate) const DFAST_EMPTY_SLOT: u32 = 0;
97
98pub(crate) const DFAST_REBASE_GUARD_BAND: u32 = 1u32 << 30;
105pub(crate) const DFAST_SKIP_SEARCH_STRENGTH: usize = 6;
106pub(crate) const DFAST_SKIP_STEP_GROWTH_INTERVAL: usize = 1 << DFAST_SKIP_SEARCH_STRENGTH;
107pub(crate) const DFAST_LOCAL_SKIP_TRIGGER: usize = 256;
108pub(crate) const DFAST_MAX_SKIP_STEP: usize = 8;
109pub(crate) const DFAST_INCOMPRESSIBLE_SKIP_STEP: usize = 16;
110pub(crate) const ROW_HASH_BITS: usize = 20;
111pub(crate) const ROW_LOG: usize = 5;
112pub(crate) const ROW_SEARCH_DEPTH: usize = 16;
113pub(crate) const ROW_TARGET_LEN: usize = 48;
114pub(crate) const ROW_TAG_BITS: usize = 8;
115pub(crate) const ROW_EMPTY_SLOT: usize = usize::MAX;
116pub(crate) const ROW_HASH_KEY_LEN: usize = 4;
117#[cfg(test)]
124use super::match_table::storage::{HC_PRIME3BYTES, HC_PRIME4BYTES};
125
126#[cfg(test)]
131use super::match_table::storage::HC_EMPTY;
132use super::match_table::storage::{HC_CHAIN_LOG, HC_HASH_LOG, HC3_HASH_LOG};
133const HC_SEARCH_DEPTH: usize = 16;
138use super::hc::HC_MIN_MATCH_LEN;
141const HC_OPT_MIN_MATCH_LEN: usize = HC_FORMAT_MINMATCH;
142const HC_TARGET_LEN: usize = 48;
143
144use super::hc::MAX_HC_SEARCH_DEPTH;
146
147#[derive(Copy, Clone)]
155struct HcConfig {
156 hash_log: usize,
157 chain_log: usize,
158 search_depth: usize,
159 target_len: usize,
160}
161
162#[derive(Copy, Clone)]
163pub(crate) struct RowConfig {
164 pub(crate) hash_bits: usize,
165 pub(crate) row_log: usize,
166 pub(crate) search_depth: usize,
167 pub(crate) target_len: usize,
168}
169
170const HC_CONFIG: HcConfig = HcConfig {
171 hash_log: HC_HASH_LOG,
172 chain_log: HC_CHAIN_LOG,
173 search_depth: HC_SEARCH_DEPTH,
174 target_len: HC_TARGET_LEN,
175};
176
177const BTOPT_HC_CONFIG: HcConfig = HcConfig {
178 hash_log: 23,
179 chain_log: 22,
180 search_depth: 32,
181 target_len: 256,
182};
183
184const BTULTRA_HC_CONFIG: HcConfig = HcConfig {
185 hash_log: 23,
186 chain_log: 23,
187 search_depth: 32,
188 target_len: 256,
189};
190
191const BTULTRA2_HC_CONFIG: HcConfig = HcConfig {
192 hash_log: 24,
193 chain_log: 24,
194 search_depth: 512,
195 target_len: 256,
196};
197
198const BTULTRA2_HC_CONFIG_L22: HcConfig = HcConfig {
199 hash_log: 25,
200 chain_log: 27,
201 search_depth: 512,
202 target_len: 999,
203};
204
205const BTULTRA2_HC_CONFIG_L22_256K: HcConfig = HcConfig {
206 hash_log: 19,
207 chain_log: 19,
208 search_depth: 1 << 13,
209 target_len: 999,
210};
211
212const BTULTRA2_HC_CONFIG_L22_128K: HcConfig = HcConfig {
213 hash_log: 17,
214 chain_log: 18,
215 search_depth: 1 << 11,
216 target_len: 999,
217};
218
219const BTULTRA2_HC_CONFIG_L22_16K: HcConfig = HcConfig {
220 hash_log: 15,
221 chain_log: 15,
222 search_depth: 1 << 10,
223 target_len: 999,
224};
225
226const ROW_CONFIG: RowConfig = RowConfig {
227 hash_bits: ROW_HASH_BITS,
228 row_log: ROW_LOG,
229 search_depth: ROW_SEARCH_DEPTH,
230 target_len: ROW_TARGET_LEN,
231};
232
233#[derive(Copy, Clone)]
239struct LevelParams {
240 strategy_tag: super::strategy::StrategyTag,
241 window_log: u8,
242 fast_hash_log: u32,
245 fast_mls: u32,
248 fast_step_size: usize,
255 lazy_depth: u8,
256 hc: HcConfig,
257 row: RowConfig,
258}
259
260impl LevelParams {
261 fn backend(&self) -> super::strategy::BackendTag {
265 self.strategy_tag.backend()
266 }
267}
268
269fn dfast_hash_bits_for_window(max_window_size: usize) -> usize {
270 let window_log = (usize::BITS - 1 - max_window_size.leading_zeros()) as usize;
271 window_log.clamp(MIN_WINDOW_LOG as usize, DFAST_HASH_BITS)
272}
273
274fn row_hash_bits_for_window(max_window_size: usize) -> usize {
275 let window_log = (usize::BITS - 1 - max_window_size.leading_zeros()) as usize;
276 window_log.clamp(MIN_WINDOW_LOG as usize, ROW_HASH_BITS)
277}
278
279#[rustfmt::skip]
287const LEVEL_TABLE: [LevelParams; 22] = [
288 LevelParams { strategy_tag: super::strategy::StrategyTag::Fast, window_log: 19, fast_hash_log: 14, fast_mls: 7, fast_step_size: 2, lazy_depth: 0, hc: HC_CONFIG, row: ROW_CONFIG },
291 LevelParams { strategy_tag: super::strategy::StrategyTag::Fast, window_log: 20, fast_hash_log: 16, fast_mls: 6, fast_step_size: 2, lazy_depth: 0, hc: HC_CONFIG, row: ROW_CONFIG },
292 LevelParams { strategy_tag: super::strategy::StrategyTag::Dfast, window_log: 22, fast_hash_log: 14, fast_mls: 7, fast_step_size: 2, lazy_depth: 1, hc: HC_CONFIG, row: ROW_CONFIG },
293 LevelParams { strategy_tag: super::strategy::StrategyTag::Dfast, window_log: 22, fast_hash_log: 14, fast_mls: 7, fast_step_size: 2, lazy_depth: 1, hc: HC_CONFIG, row: ROW_CONFIG },
294 LevelParams { strategy_tag: super::strategy::StrategyTag::Greedy, window_log: 22, fast_hash_log: 14, fast_mls: 7, fast_step_size: 2, lazy_depth: 0, hc: HcConfig { hash_log: 18, chain_log: 17, search_depth: 4, target_len: 2 }, row: ROW_CONFIG },
302 LevelParams { strategy_tag: super::strategy::StrategyTag::Lazy, window_log: BETTER_WINDOW_LOG, fast_hash_log: 14, fast_mls: 7, fast_step_size: 2, lazy_depth: 1, hc: HcConfig { hash_log: 19, chain_log: 18, search_depth: 8, target_len: 4 }, row: ROW_CONFIG },
303 LevelParams { strategy_tag: super::strategy::StrategyTag::Lazy, window_log: BETTER_WINDOW_LOG, fast_hash_log: 14, fast_mls: 7, fast_step_size: 2, lazy_depth: 1, hc: HcConfig { hash_log: 20, chain_log: 19, search_depth: 16, target_len: 8 }, row: ROW_CONFIG },
304 LevelParams { strategy_tag: super::strategy::StrategyTag::Lazy, window_log: BETTER_WINDOW_LOG, fast_hash_log: 14, fast_mls: 7, fast_step_size: 2, lazy_depth: 2, hc: HcConfig { hash_log: 20, chain_log: 19, search_depth: 24, target_len: 16 }, row: ROW_CONFIG },
305 LevelParams { strategy_tag: super::strategy::StrategyTag::Lazy, window_log: BETTER_WINDOW_LOG, fast_hash_log: 14, fast_mls: 7, fast_step_size: 2, lazy_depth: 2, hc: HcConfig { hash_log: 21, chain_log: 20, search_depth: 24, target_len: 16 }, row: ROW_CONFIG },
306 LevelParams { strategy_tag: super::strategy::StrategyTag::Lazy, window_log: 24, fast_hash_log: 14, fast_mls: 7, fast_step_size: 2, lazy_depth: 2, hc: HcConfig { hash_log: 21, chain_log: 20, search_depth: 28, target_len: 16 }, row: ROW_CONFIG },
307 LevelParams { strategy_tag: super::strategy::StrategyTag::Lazy, window_log: 24, fast_hash_log: 14, fast_mls: 7, fast_step_size: 2, lazy_depth: 2, hc: HcConfig { hash_log: 21, chain_log: 20, search_depth: 32, target_len: 16 }, row: ROW_CONFIG },
308 LevelParams { strategy_tag: super::strategy::StrategyTag::Lazy, window_log: 25, fast_hash_log: 14, fast_mls: 7, fast_step_size: 2, lazy_depth: 2, hc: HcConfig { hash_log: 22, chain_log: 21, search_depth: 32, target_len: 32 }, row: ROW_CONFIG },
309 LevelParams { strategy_tag: super::strategy::StrategyTag::Lazy, window_log: 25, fast_hash_log: 14, fast_mls: 7, fast_step_size: 2, lazy_depth: 2, hc: HcConfig { hash_log: 22, chain_log: 21, search_depth: 32, target_len: 32 }, row: ROW_CONFIG },
310 LevelParams { strategy_tag: super::strategy::StrategyTag::Lazy, window_log: 25, fast_hash_log: 14, fast_mls: 7, fast_step_size: 2, lazy_depth: 2, hc: HcConfig { hash_log: 22, chain_log: 22, search_depth: 32, target_len: 32 }, row: ROW_CONFIG },
311 LevelParams { strategy_tag: super::strategy::StrategyTag::Lazy, window_log: 26, fast_hash_log: 14, fast_mls: 7, fast_step_size: 2, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 22, search_depth: 32, target_len: 32 }, row: ROW_CONFIG },
312 LevelParams { strategy_tag: super::strategy::StrategyTag::BtOpt, window_log: 26, fast_hash_log: 14, fast_mls: 7, fast_step_size: 2, lazy_depth: 2, hc: BTOPT_HC_CONFIG, row: ROW_CONFIG },
313 LevelParams { strategy_tag: super::strategy::StrategyTag::BtOpt, window_log: 26, fast_hash_log: 14, fast_mls: 7, fast_step_size: 2, lazy_depth: 2, hc: BTOPT_HC_CONFIG, row: ROW_CONFIG },
314 LevelParams { strategy_tag: super::strategy::StrategyTag::BtUltra, window_log: 26, fast_hash_log: 14, fast_mls: 7, fast_step_size: 2, lazy_depth: 2, hc: BTULTRA_HC_CONFIG, row: ROW_CONFIG },
315 LevelParams { strategy_tag: super::strategy::StrategyTag::BtUltra, window_log: 26, fast_hash_log: 14, fast_mls: 7, fast_step_size: 2, lazy_depth: 2, hc: BTULTRA_HC_CONFIG, row: ROW_CONFIG },
316 LevelParams { strategy_tag: super::strategy::StrategyTag::BtUltra2, window_log: 26, fast_hash_log: 14, fast_mls: 7, fast_step_size: 2, lazy_depth: 2, hc: BTULTRA2_HC_CONFIG, row: ROW_CONFIG },
317 LevelParams { strategy_tag: super::strategy::StrategyTag::BtUltra2, window_log: 26, fast_hash_log: 14, fast_mls: 7, fast_step_size: 2, lazy_depth: 2, hc: BTULTRA2_HC_CONFIG, row: ROW_CONFIG },
318 LevelParams { strategy_tag: super::strategy::StrategyTag::BtUltra2, window_log: 27, fast_hash_log: 14, fast_mls: 7, fast_step_size: 2, lazy_depth: 2, hc: BTULTRA2_HC_CONFIG_L22, row: ROW_CONFIG },
319];
320
321pub(crate) const MIN_WINDOW_LOG: u8 = 10;
323const MIN_HINTED_WINDOW_LOG: u8 = 14;
329
330fn adjust_params_for_source_size(mut params: LevelParams, src_size: u64) -> LevelParams {
340 let src_log = if src_size == 0 {
345 MIN_WINDOW_LOG
346 } else {
347 (64 - (src_size - 1).leading_zeros()) as u8 };
349 let src_log = src_log.max(MIN_WINDOW_LOG).max(MIN_HINTED_WINDOW_LOG);
350 if src_log < params.window_log {
351 params.window_log = src_log;
352 }
353 let backend = params.backend();
357 if backend == super::strategy::BackendTag::HashChain {
358 if (src_log + 2) < params.hc.hash_log as u8 {
359 params.hc.hash_log = (src_log + 2) as usize;
360 }
361 if (src_log + 1) < params.hc.chain_log as u8 {
362 params.hc.chain_log = (src_log + 1) as usize;
363 }
364 } else if backend == super::strategy::BackendTag::Row {
365 let max_window_size = 1usize << params.window_log;
366 params.row.hash_bits = row_hash_bits_for_window(max_window_size);
367 }
368 params
369}
370
371fn level22_btultra2_params_for_source_size(source_size: Option<u64>) -> LevelParams {
372 let mut hc = match source_size {
373 Some(size) if size <= 16 * 1024 => BTULTRA2_HC_CONFIG_L22_16K,
374 Some(size) if size <= 128 * 1024 => BTULTRA2_HC_CONFIG_L22_128K,
375 Some(size) if size <= 256 * 1024 => BTULTRA2_HC_CONFIG_L22_256K,
376 _ => BTULTRA2_HC_CONFIG_L22,
377 };
378 let mut window_log = match source_size {
379 Some(size) if size <= 16 * 1024 => 14,
380 Some(size) if size <= 128 * 1024 => 17,
381 Some(size) if size <= 256 * 1024 => 18,
382 _ => 27,
383 };
384 if let Some(size) = source_size
385 && size > 256 * 1024
386 {
387 let src_log = if size == 0 {
388 MIN_WINDOW_LOG
389 } else {
390 (64 - (size - 1).leading_zeros()) as u8
391 };
392 window_log = window_log.min(src_log.max(MIN_WINDOW_LOG));
393 let adjusted_table_log = window_log as usize + 1;
394 hc.hash_log = hc.hash_log.min(adjusted_table_log);
395 hc.chain_log = hc.chain_log.min(adjusted_table_log);
396 }
397 LevelParams {
398 strategy_tag: super::strategy::StrategyTag::BtUltra2,
399 window_log,
400 fast_hash_log: 14,
401 fast_mls: 7,
402 fast_step_size: 2,
403 lazy_depth: 2,
404 hc,
405 row: ROW_CONFIG,
406 }
407}
408
409fn resolve_level_params(level: CompressionLevel, source_size: Option<u64>) -> LevelParams {
412 if matches!(level, CompressionLevel::Level(22)) {
413 return level22_btultra2_params_for_source_size(source_size);
414 }
415 let params = match level {
416 CompressionLevel::Uncompressed => LevelParams {
417 strategy_tag: super::strategy::StrategyTag::Fast,
418 window_log: 17,
422 fast_hash_log: 14,
425 fast_mls: 6,
426 fast_step_size: 2,
429 lazy_depth: 0,
430 hc: HC_CONFIG,
431 row: ROW_CONFIG,
432 },
433 CompressionLevel::Fastest => {
434 let mut p = LEVEL_TABLE[0];
441 p.fast_hash_log = 14;
442 p.fast_mls = 6;
443 p.fast_step_size = 2;
444 p
445 }
446 CompressionLevel::Default => LEVEL_TABLE[2],
447 CompressionLevel::Better => LEVEL_TABLE[6],
448 CompressionLevel::Best => LEVEL_TABLE[10],
449 CompressionLevel::Level(n) => {
450 if n > 0 {
451 let idx = (n as usize).min(CompressionLevel::MAX_LEVEL as usize) - 1;
452 LEVEL_TABLE[idx]
453 } else if n == 0 {
454 LEVEL_TABLE[CompressionLevel::DEFAULT_LEVEL as usize - 1]
456 } else {
457 let clamped = n.max(CompressionLevel::MIN_LEVEL);
467 let target_length = (-clamped) as usize;
468 let step_size = target_length + 1;
469 LevelParams {
470 strategy_tag: super::strategy::StrategyTag::Fast,
471 window_log: 19,
472 fast_hash_log: 14,
473 fast_mls: 6,
474 fast_step_size: step_size,
475 lazy_depth: 0,
476 hc: HC_CONFIG,
477 row: ROW_CONFIG,
478 }
479 }
480 }
481 };
482 if let Some(size) = source_size {
483 adjust_params_for_source_size(params, size)
484 } else {
485 params
486 }
487}
488
489enum MatcherStorage {
507 Simple(FastKernelMatcher),
514 Dfast(DfastMatchGenerator),
519 Row(RowMatchGenerator),
523 HashChain(HcMatchGenerator),
535}
536
537impl MatcherStorage {
538 fn backend(&self) -> super::strategy::BackendTag {
540 use super::strategy::BackendTag;
541 match self {
542 Self::Simple(_) => BackendTag::Simple,
543 Self::Dfast(_) => BackendTag::Dfast,
544 Self::Row(_) => BackendTag::Row,
545 Self::HashChain(_) => BackendTag::HashChain,
546 }
547 }
548}
549
550pub struct MatchGeneratorDriver {
552 vec_pool: Vec<Vec<u8>>,
553 storage: MatcherStorage,
560 strategy_tag: super::strategy::StrategyTag,
566 slice_size: usize,
567 base_slice_size: usize,
568 reported_window_size: usize,
571 dictionary_retained_budget: usize,
574 source_size_hint: Option<u64>,
576}
577
578impl MatchGeneratorDriver {
579 pub(crate) fn new(slice_size: usize, max_slices_in_window: usize) -> Self {
584 assert!(
601 slice_size > 0,
602 "MatchGeneratorDriver::new requires slice_size > 0 (got 0)",
603 );
604 assert!(
605 max_slices_in_window > 0,
606 "MatchGeneratorDriver::new requires max_slices_in_window > 0 (got 0)",
607 );
608 let max_window_size = max_slices_in_window
609 .checked_mul(slice_size)
610 .expect("MatchGeneratorDriver::new: slice_size * max_slices_in_window overflows usize");
611 let next_pow2 = max_window_size.checked_next_power_of_two().expect(
626 "MatchGeneratorDriver::new: max_window_size too large for \
627 next_power_of_two without overflow",
628 );
629 let window_log_init = next_pow2.trailing_zeros() as u8;
630 Self {
631 vec_pool: Vec::new(),
632 storage: MatcherStorage::Simple(FastKernelMatcher::with_params(
633 window_log_init,
634 FAST_LEVEL_1_HASH_LOG,
635 FAST_LEVEL_1_MLS,
636 2, )),
638 strategy_tag: super::strategy::StrategyTag::Fast,
639 slice_size,
640 base_slice_size: slice_size,
641 reported_window_size: next_pow2,
650 dictionary_retained_budget: 0,
651 source_size_hint: None,
652 }
653 }
654
655 fn level_params(level: CompressionLevel, source_size: Option<u64>) -> LevelParams {
656 resolve_level_params(level, source_size)
657 }
658
659 fn active_backend(&self) -> super::strategy::BackendTag {
662 self.storage.backend()
663 }
664
665 fn simple_mut(&mut self) -> &mut FastKernelMatcher {
666 match &mut self.storage {
667 MatcherStorage::Simple(m) => m,
668 _ => panic!("simple backend must be initialized by reset() before use"),
669 }
670 }
671
672 fn recycle_simple_space(&mut self) {
686 if let Some(space) = self.simple_mut().take_recycled_space() {
687 self.vec_pool.push(space);
699 }
700 }
701
702 #[cfg(test)]
703 fn dfast_matcher(&self) -> &DfastMatchGenerator {
704 match &self.storage {
705 MatcherStorage::Dfast(m) => m,
706 _ => panic!("dfast backend must be initialized by reset() before use"),
707 }
708 }
709
710 fn dfast_matcher_mut(&mut self) -> &mut DfastMatchGenerator {
711 match &mut self.storage {
712 MatcherStorage::Dfast(m) => m,
713 _ => panic!("dfast backend must be initialized by reset() before use"),
714 }
715 }
716
717 #[cfg(test)]
718 fn row_matcher(&self) -> &RowMatchGenerator {
719 match &self.storage {
720 MatcherStorage::Row(m) => m,
721 _ => panic!("row backend must be initialized by reset() before use"),
722 }
723 }
724
725 fn row_matcher_mut(&mut self) -> &mut RowMatchGenerator {
726 match &mut self.storage {
727 MatcherStorage::Row(m) => m,
728 _ => panic!("row backend must be initialized by reset() before use"),
729 }
730 }
731
732 #[cfg(test)]
733 fn hc_matcher(&self) -> &HcMatchGenerator {
734 match &self.storage {
735 MatcherStorage::HashChain(m) => m,
736 _ => panic!("hash chain backend must be initialized by reset() before use"),
737 }
738 }
739
740 fn hc_matcher_mut(&mut self) -> &mut HcMatchGenerator {
741 match &mut self.storage {
742 MatcherStorage::HashChain(m) => m,
743 _ => panic!("hash chain backend must be initialized by reset() before use"),
744 }
745 }
746
747 #[must_use]
756 fn retire_dictionary_budget(&mut self, evicted_bytes: usize) -> bool {
757 let reclaimed = evicted_bytes.min(self.dictionary_retained_budget);
758 if reclaimed == 0 {
759 return false;
760 }
761 self.dictionary_retained_budget -= reclaimed;
762 match self.active_backend() {
763 super::strategy::BackendTag::Simple => {
764 let matcher = self.simple_mut();
765 matcher.max_window_size = matcher.max_window_size.saturating_sub(reclaimed);
766 }
767 super::strategy::BackendTag::Dfast => {
768 let matcher = self.dfast_matcher_mut();
769 matcher.max_window_size = matcher.max_window_size.saturating_sub(reclaimed);
770 }
771 super::strategy::BackendTag::Row => {
772 let matcher = self.row_matcher_mut();
773 matcher.max_window_size = matcher.max_window_size.saturating_sub(reclaimed);
774 }
775 super::strategy::BackendTag::HashChain => {
776 let matcher = self.hc_matcher_mut();
777 matcher.table.max_window_size =
778 matcher.table.max_window_size.saturating_sub(reclaimed);
779 }
780 }
781 true
782 }
783
784 fn trim_after_budget_retire(&mut self) {
785 loop {
786 let mut evicted_bytes = 0usize;
787 match self.active_backend() {
788 super::strategy::BackendTag::Simple => {
789 let MatcherStorage::Simple(m) = &mut self.storage else {
798 unreachable!("active_backend() == Simple proven above");
799 };
800 evicted_bytes += m.trim_to_window();
801 }
802 super::strategy::BackendTag::Dfast => {
803 let dfast = self.dfast_matcher_mut();
812 let pre = dfast.window_size;
813 dfast.trim_to_window();
814 evicted_bytes += pre - dfast.window_size;
815 }
816 super::strategy::BackendTag::Row => {
817 let mut retired = Vec::new();
818 self.row_matcher_mut().trim_to_window(|data| {
819 evicted_bytes += data.len();
820 retired.push(data);
821 });
822 for mut data in retired {
823 data.resize(data.capacity(), 0);
824 self.vec_pool.push(data);
825 }
826 }
827 super::strategy::BackendTag::HashChain => {
828 let mut retired = Vec::new();
829 self.hc_matcher_mut().table.trim_to_window(|data| {
830 evicted_bytes += data.len();
831 retired.push(data);
832 });
833 for mut data in retired {
834 data.resize(data.capacity(), 0);
835 self.vec_pool.push(data);
836 }
837 }
838 }
839 if evicted_bytes == 0 {
840 break;
841 }
842 let _ = self.retire_dictionary_budget(evicted_bytes);
856 }
857 }
858
859 fn skip_matching_for_dictionary_priming(&mut self) {
860 match self.active_backend() {
861 super::strategy::BackendTag::Simple => {
862 self.simple_mut().skip_matching_with_hint(Some(false));
863 self.recycle_simple_space();
864 }
865 super::strategy::BackendTag::Dfast => self.dfast_matcher_mut().skip_matching_dense(),
866 super::strategy::BackendTag::Row => {
867 self.row_matcher_mut().skip_matching_with_hint(Some(false))
868 }
869 super::strategy::BackendTag::HashChain => {
870 self.hc_matcher_mut().skip_matching(Some(false))
871 }
872 }
873 }
874}
875
876impl Matcher for MatchGeneratorDriver {
877 fn supports_dictionary_priming(&self) -> bool {
878 true
879 }
880
881 fn set_source_size_hint(&mut self, size: u64) {
882 self.source_size_hint = Some(size);
883 }
884
885 fn reset(&mut self, level: CompressionLevel) {
886 let hint = self.source_size_hint.take();
887 let hinted = hint.is_some();
888 let params = Self::level_params(level, hint);
889 let next_backend = params.backend();
890 let max_window_size = 1usize << params.window_log;
891 self.dictionary_retained_budget = 0;
892 if self.active_backend() != next_backend {
893 match &mut self.storage {
899 MatcherStorage::Simple(_m) => {
900 }
907 MatcherStorage::Dfast(m) => {
908 m.short_hash = Vec::new();
921 m.long_hash = Vec::new();
922 m.reset();
923 }
924 MatcherStorage::Row(m) => {
925 m.row_heads = Vec::new();
926 m.row_positions = Vec::new();
927 m.row_tags = Vec::new();
928 let vec_pool = &mut self.vec_pool;
929 m.reset(|mut data| {
930 data.resize(data.capacity(), 0);
931 vec_pool.push(data);
932 });
933 }
934 MatcherStorage::HashChain(m) => {
935 m.table.hash_table = Vec::new();
943 m.table.chain_table = Vec::new();
944 m.table.hash3_table = Vec::new();
945 let vec_pool = &mut self.vec_pool;
946 m.reset(|mut data| {
947 data.resize(data.capacity(), 0);
948 vec_pool.push(data);
949 });
950 }
951 }
952 self.storage = match next_backend {
955 super::strategy::BackendTag::Simple => {
956 MatcherStorage::Simple(FastKernelMatcher::with_params(
963 params.window_log,
964 params.fast_hash_log,
965 params.fast_mls,
966 params.fast_step_size,
967 ))
968 }
969 super::strategy::BackendTag::Dfast => {
970 MatcherStorage::Dfast(DfastMatchGenerator::new(max_window_size))
971 }
972 super::strategy::BackendTag::Row => {
973 MatcherStorage::Row(RowMatchGenerator::new(max_window_size))
974 }
975 super::strategy::BackendTag::HashChain => {
976 MatcherStorage::HashChain(HcMatchGenerator::new(max_window_size))
977 }
978 };
979 }
980
981 self.strategy_tag = params.strategy_tag;
987 self.slice_size = self.base_slice_size.min(max_window_size);
988 self.reported_window_size = max_window_size;
989 let strategy_tag = self.strategy_tag;
990 match &mut self.storage {
991 MatcherStorage::Simple(m) => {
992 m.reset(
996 params.window_log,
997 params.fast_hash_log,
998 params.fast_mls,
999 params.fast_step_size,
1000 );
1001 }
1002 MatcherStorage::Dfast(dfast) => {
1003 dfast.max_window_size = max_window_size;
1004 dfast.lazy_depth = params.lazy_depth;
1005 dfast.use_fast_loop = matches!(
1006 level,
1007 CompressionLevel::Default
1008 | CompressionLevel::Level(0)
1009 | CompressionLevel::Level(3)
1010 );
1011 dfast.set_hash_bits(if hinted {
1012 dfast_hash_bits_for_window(max_window_size)
1013 } else {
1014 DFAST_HASH_BITS
1015 });
1016 dfast.reset();
1020 }
1021 MatcherStorage::Row(row) => {
1022 row.max_window_size = max_window_size;
1023 row.lazy_depth = params.lazy_depth;
1024 row.configure(params.row);
1025 if hinted {
1026 row.set_hash_bits(row_hash_bits_for_window(max_window_size));
1027 }
1028 let vec_pool = &mut self.vec_pool;
1029 row.reset(|mut data| {
1030 data.resize(data.capacity(), 0);
1031 vec_pool.push(data);
1032 });
1033 }
1034 MatcherStorage::HashChain(hc) => {
1035 hc.table.max_window_size = max_window_size;
1036 hc.hc.lazy_depth = params.lazy_depth;
1037 hc.configure(params.hc, strategy_tag, params.window_log);
1038 let vec_pool = &mut self.vec_pool;
1039 hc.reset(|mut data| {
1040 data.resize(data.capacity(), 0);
1041 vec_pool.push(data);
1042 });
1043 }
1044 }
1045 }
1046
1047 fn prime_with_dictionary(&mut self, dict_content: &[u8], offset_hist: [u32; 3]) {
1048 match self.active_backend() {
1049 super::strategy::BackendTag::Simple => {
1050 self.simple_mut().prime_offset_history(offset_hist);
1059 }
1060 super::strategy::BackendTag::Dfast => {
1061 self.dfast_matcher_mut().offset_hist = offset_hist
1062 }
1063 super::strategy::BackendTag::Row => self.row_matcher_mut().offset_hist = offset_hist,
1064 super::strategy::BackendTag::HashChain => {
1065 let matcher = self.hc_matcher_mut();
1066 matcher.table.offset_hist = offset_hist;
1067 matcher.table.mark_dictionary_primed();
1068 }
1069 }
1070
1071 if dict_content.is_empty() {
1072 return;
1073 }
1074
1075 const MAX_PRIMED_WINDOW_SIZE: usize =
1090 (u32::MAX as usize - crate::common::MAX_BLOCK_SIZE as usize) / 2;
1091
1092 let requested_dict_budget = dict_content.len();
1106 let base_max_window_size = match self.active_backend() {
1107 super::strategy::BackendTag::Simple => self.simple_mut().max_window_size,
1108 super::strategy::BackendTag::Dfast => self.dfast_matcher_mut().max_window_size,
1109 super::strategy::BackendTag::Row => self.row_matcher_mut().max_window_size,
1110 super::strategy::BackendTag::HashChain => self.hc_matcher_mut().table.max_window_size,
1111 };
1112 match self.active_backend() {
1113 super::strategy::BackendTag::Simple => {
1114 let matcher = self.simple_mut();
1115 matcher.max_window_size = matcher
1116 .max_window_size
1117 .saturating_add(requested_dict_budget)
1118 .min(MAX_PRIMED_WINDOW_SIZE);
1119 }
1120 super::strategy::BackendTag::Dfast => {
1121 let matcher = self.dfast_matcher_mut();
1122 matcher.max_window_size = matcher
1123 .max_window_size
1124 .saturating_add(requested_dict_budget)
1125 .min(MAX_PRIMED_WINDOW_SIZE);
1126 }
1127 super::strategy::BackendTag::Row => {
1128 let matcher = self.row_matcher_mut();
1129 matcher.max_window_size = matcher
1130 .max_window_size
1131 .saturating_add(requested_dict_budget)
1132 .min(MAX_PRIMED_WINDOW_SIZE);
1133 }
1134 super::strategy::BackendTag::HashChain => {
1135 let matcher = self.hc_matcher_mut();
1136 matcher.table.max_window_size = matcher
1137 .table
1138 .max_window_size
1139 .saturating_add(requested_dict_budget)
1140 .min(MAX_PRIMED_WINDOW_SIZE);
1141 }
1142 }
1143
1144 let mut start = 0usize;
1145 let mut committed_dict_budget = 0usize;
1146 let min_primed_tail = match self.active_backend() {
1150 super::strategy::BackendTag::Simple => MIN_MATCH_LEN,
1151 super::strategy::BackendTag::Dfast
1152 | super::strategy::BackendTag::Row
1153 | super::strategy::BackendTag::HashChain => 4,
1154 };
1155 while start < dict_content.len() {
1156 let end = (start + self.slice_size).min(dict_content.len());
1157 if end - start < min_primed_tail {
1158 break;
1159 }
1160 let mut space = self.get_next_space();
1161 space.clear();
1162 space.extend_from_slice(&dict_content[start..end]);
1163 self.commit_space(space);
1164 self.skip_matching_for_dictionary_priming();
1165 committed_dict_budget += end - start;
1166 start = end;
1167 }
1168
1169 let capped_retained_budget = MAX_PRIMED_WINDOW_SIZE.saturating_sub(base_max_window_size);
1179 let granted_retained_budget = committed_dict_budget.min(capped_retained_budget);
1180 let final_max_window_size = base_max_window_size.saturating_add(granted_retained_budget);
1181 match self.active_backend() {
1182 super::strategy::BackendTag::Simple => {
1183 self.simple_mut().max_window_size = final_max_window_size;
1184 }
1185 super::strategy::BackendTag::Dfast => {
1186 self.dfast_matcher_mut().max_window_size = final_max_window_size;
1187 }
1188 super::strategy::BackendTag::Row => {
1189 self.row_matcher_mut().max_window_size = final_max_window_size;
1190 }
1191 super::strategy::BackendTag::HashChain => {
1192 self.hc_matcher_mut().table.max_window_size = final_max_window_size;
1193 }
1194 }
1195 if granted_retained_budget > 0 {
1196 self.dictionary_retained_budget = self
1197 .dictionary_retained_budget
1198 .saturating_add(granted_retained_budget);
1199 }
1200 if self.active_backend() == super::strategy::BackendTag::HashChain {
1201 self.hc_matcher_mut()
1202 .table
1203 .set_dictionary_limit_from_primed_bytes(committed_dict_budget);
1204 }
1205 }
1206
1207 fn seed_dictionary_entropy(
1208 &mut self,
1209 huff: Option<&crate::huff0::huff0_encoder::HuffmanTable>,
1210 ll: Option<&crate::fse::fse_encoder::FSETable>,
1211 ml: Option<&crate::fse::fse_encoder::FSETable>,
1212 of: Option<&crate::fse::fse_encoder::FSETable>,
1213 ) {
1214 if self.active_backend() == super::strategy::BackendTag::HashChain {
1215 self.hc_matcher_mut()
1216 .seed_dictionary_entropy(huff, ll, ml, of);
1217 }
1218 }
1219
1220 fn window_size(&self) -> u64 {
1221 self.reported_window_size as u64
1222 }
1223
1224 fn get_next_space(&mut self) -> Vec<u8> {
1225 if let Some(mut space) = self.vec_pool.pop() {
1226 if space.len() > self.slice_size {
1227 space.truncate(self.slice_size);
1228 }
1229 if space.len() < self.slice_size {
1230 space.resize(self.slice_size, 0);
1231 }
1232 return space;
1233 }
1234 alloc::vec![0; self.slice_size]
1235 }
1236
1237 fn get_last_space(&mut self) -> &[u8] {
1238 match &self.storage {
1239 MatcherStorage::Simple(m) => m.last_committed_space(),
1240 MatcherStorage::Dfast(m) => m.get_last_space(),
1241 MatcherStorage::Row(m) => m.get_last_space(),
1242 MatcherStorage::HashChain(m) => m.table.get_last_space(),
1243 }
1244 }
1245
1246 fn commit_space(&mut self, space: Vec<u8>) {
1247 let mut evicted_bytes = 0usize;
1248 let vec_pool = &mut self.vec_pool;
1254 match &mut self.storage {
1255 MatcherStorage::Simple(m) => {
1256 let pre = m.history_len_for_eviction_accounting();
1266 m.accept_data(space);
1267 let post = m.history_len_for_eviction_accounting();
1268 evicted_bytes += pre.saturating_sub(post);
1279 }
1280 MatcherStorage::Dfast(m) => {
1281 let pre = m.window_size;
1303 let space_len = space.len();
1304 m.add_data(space, |mut data| {
1305 data.resize(data.capacity(), 0);
1306 vec_pool.push(data);
1307 });
1308 evicted_bytes += pre.saturating_add(space_len).saturating_sub(m.window_size);
1309 }
1310 MatcherStorage::Row(m) => {
1311 m.add_data(space, |mut data| {
1312 evicted_bytes += data.len();
1313 data.resize(data.capacity(), 0);
1314 vec_pool.push(data);
1315 });
1316 }
1317 MatcherStorage::HashChain(m) => {
1318 m.table.add_data(space, |mut data| {
1319 evicted_bytes += data.len();
1320 data.resize(data.capacity(), 0);
1321 vec_pool.push(data);
1322 });
1323 }
1324 }
1325 if self.retire_dictionary_budget(evicted_bytes) {
1335 self.trim_after_budget_retire();
1336 }
1337 }
1338
1339 fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
1340 use super::strategy::{self, StrategyTag};
1341 match self.strategy_tag {
1350 StrategyTag::Fast => self.compress_block::<strategy::Fast>(&mut handle_sequence),
1351 StrategyTag::Dfast => self.compress_block::<strategy::Dfast>(&mut handle_sequence),
1352 StrategyTag::Greedy => self.compress_block::<strategy::Greedy>(&mut handle_sequence),
1353 StrategyTag::Lazy => self.compress_block::<strategy::Lazy>(&mut handle_sequence),
1354 StrategyTag::BtOpt => self.compress_block::<strategy::BtOpt>(&mut handle_sequence),
1355 StrategyTag::BtUltra => self.compress_block::<strategy::BtUltra>(&mut handle_sequence),
1356 StrategyTag::BtUltra2 => {
1357 self.compress_block::<strategy::BtUltra2>(&mut handle_sequence)
1358 }
1359 }
1360 }
1361
1362 fn skip_matching(&mut self) {
1363 self.skip_matching_with_hint(None);
1364 }
1365
1366 fn skip_matching_with_hint(&mut self, incompressible_hint: Option<bool>) {
1367 match self.active_backend() {
1368 super::strategy::BackendTag::Simple => {
1369 self.simple_mut()
1370 .skip_matching_with_hint(incompressible_hint);
1371 self.recycle_simple_space();
1372 }
1373 super::strategy::BackendTag::Dfast => {
1374 self.dfast_matcher_mut().skip_matching(incompressible_hint)
1375 }
1376 super::strategy::BackendTag::Row => self
1377 .row_matcher_mut()
1378 .skip_matching_with_hint(incompressible_hint),
1379 super::strategy::BackendTag::HashChain => {
1380 self.hc_matcher_mut().skip_matching(incompressible_hint)
1381 }
1382 }
1383 }
1384}
1385
1386impl MatchGeneratorDriver {
1387 fn compress_block<S: super::strategy::Strategy>(
1396 &mut self,
1397 handle_sequence: &mut impl for<'a> FnMut(Sequence<'a>),
1398 ) {
1399 use super::strategy::BackendTag;
1400 match S::BACKEND {
1401 BackendTag::Simple => {
1402 self.simple_mut().start_matching(&mut *handle_sequence);
1413 self.recycle_simple_space();
1414 }
1415 BackendTag::Dfast => self.dfast_matcher_mut().start_matching(handle_sequence),
1416 BackendTag::Row => {
1417 let matcher = self.row_matcher_mut();
1435 debug_assert_eq!(
1436 matcher.lazy_depth, 0,
1437 "Row backend currently expects lazy_depth == 0 (donor-greedy); \
1438 wire a depth-aware dispatch before routing lazy levels here",
1439 );
1440 matcher.start_matching_greedy(handle_sequence);
1441 }
1442 BackendTag::HashChain => self
1443 .hc_matcher_mut()
1444 .start_matching_strategy::<S>(handle_sequence),
1445 }
1446 }
1447}
1448
1449pub(crate) enum HcBackend {
1463 Hc,
1465 Bt(alloc::boxed::Box<super::bt::BtMatcher>),
1469}
1470
1471impl HcBackend {
1472 #[inline(always)]
1479 pub(crate) fn bt_mut(&mut self) -> &mut super::bt::BtMatcher {
1480 match self {
1481 Self::Bt(bt) => bt,
1482 Self::Hc => unreachable!("BT-only accessor called in HC mode"),
1483 }
1484 }
1485}
1486
1487struct HcMatchGenerator {
1488 table: super::match_table::storage::MatchTable,
1493 hc: super::hc::HcMatcher,
1497 backend: HcBackend,
1502 strategy_tag: super::strategy::StrategyTag,
1514}
1515
1516macro_rules! bt_insert_step_no_rebase_body {
1532 ($table:expr, $search_depth:expr, $abs_pos:ident, $current_abs_end:ident, $target_abs:ident, $cmf:path) => {{
1533 let idx = $abs_pos - $table.history_abs_start;
1534 let concat = &$table.history[$table.history_start..];
1535 if idx + 8 > concat.len() {
1536 return 1;
1537 }
1538 debug_assert!(
1539 $abs_pos <= $current_abs_end,
1540 "BT walker called past current block end"
1541 );
1542 let tail_limit = $current_abs_end - $abs_pos;
1543 let hash = $crate::encoding::match_table::storage::MatchTable::hash_position_at(
1544 concat,
1545 idx,
1546 $table.hash_log,
1547 $crate::encoding::bt::BtMatcher::HASH_MLS,
1548 );
1549 let Some(relative_pos) = $table.relative_position($abs_pos) else {
1550 return 1;
1551 };
1552 let stored = relative_pos + 1;
1553 let bt_mask = $table.bt_mask();
1554 let bt_low = $abs_pos.saturating_sub(bt_mask);
1560 let window_low = $table.window_low_abs_for_target($target_abs);
1561 let mut match_end_abs = $abs_pos + 9;
1570 let mut best_len = 8usize;
1571 let mut compares_left = $search_depth;
1572 let mut common_length_smaller = 0usize;
1573 let mut common_length_larger = 0usize;
1574 let pair_idx = $table.bt_pair_index_for_abs($abs_pos);
1575 let mut smaller_slot = pair_idx;
1576 let mut larger_slot = pair_idx + 1;
1577 let mut match_stored = $table.hash_table[hash];
1578 $table.hash_table[hash] = stored;
1579
1580 while compares_left > 0 {
1581 let Some(candidate_abs) =
1582 $crate::encoding::match_table::storage::MatchTable::stored_abs_position_fast(
1583 match_stored,
1584 $table.position_base,
1585 $table.index_shift,
1586 )
1587 else {
1588 break;
1589 };
1590 if candidate_abs < window_low || candidate_abs >= $abs_pos {
1591 break;
1592 }
1593 compares_left -= 1;
1594
1595 let next_pair_idx = $table.bt_pair_index_for_abs(candidate_abs);
1596 let next_smaller = $table.chain_table[next_pair_idx];
1597 let next_larger = $table.chain_table[next_pair_idx + 1];
1598 let seed_len = common_length_smaller.min(common_length_larger);
1599 let candidate_idx = candidate_abs - $table.history_abs_start;
1600 let match_len = unsafe { $cmf(concat, idx, candidate_idx, tail_limit, seed_len) };
1605
1606 if match_len > best_len {
1607 best_len = match_len;
1608 let candidate_end = candidate_abs + match_len;
1612 if candidate_end > match_end_abs {
1613 match_end_abs = candidate_end;
1614 }
1615 }
1616
1617 if match_len >= tail_limit {
1618 break;
1619 }
1620
1621 let candidate_next = candidate_idx + match_len;
1622 let current_next = idx + match_len;
1623 if concat[candidate_next] < concat[current_next] {
1624 $table.chain_table[smaller_slot] = match_stored;
1625 common_length_smaller = match_len;
1626 if candidate_abs <= bt_low {
1627 smaller_slot = usize::MAX;
1628 break;
1629 }
1630 smaller_slot = next_pair_idx + 1;
1631 match_stored = next_larger;
1632 } else {
1633 $table.chain_table[larger_slot] = match_stored;
1634 common_length_larger = match_len;
1635 if candidate_abs <= bt_low {
1636 larger_slot = usize::MAX;
1637 break;
1638 }
1639 larger_slot = next_pair_idx;
1640 match_stored = next_smaller;
1641 }
1642 }
1643
1644 if smaller_slot != usize::MAX {
1645 $table.chain_table[smaller_slot] = $crate::encoding::match_table::storage::HC_EMPTY;
1646 }
1647 if larger_slot != usize::MAX {
1648 $table.chain_table[larger_slot] = $crate::encoding::match_table::storage::HC_EMPTY;
1649 }
1650
1651 let speed_positions = if best_len > 384 {
1652 (best_len - 384).min(192)
1653 } else {
1654 0
1655 };
1656 speed_positions.max(match_end_abs - ($abs_pos + 8))
1666 }};
1667}
1668pub(crate) use bt_insert_step_no_rebase_body;
1669
1670macro_rules! build_optimal_plan_impl_body {
1685 (
1686 $self:expr,
1687 $strategy_ty:ty,
1688 $current:ident,
1689 $current_abs_start:ident,
1690 $current_len:ident,
1691 $initial_state:ident,
1692 $stats:ident,
1693 $out:ident,
1694 $collect:ident $(,)?
1695 ) => {{
1696 let current_abs_end = $current_abs_start + $current_len;
1697 let min_match_len = HC_OPT_MIN_MATCH_LEN;
1698 let frontier_limit = $current_len.min(HC_OPT_NUM - 1);
1700 let initial_reps = $initial_state.reps;
1701 let initial_litlen = $initial_state.litlen;
1702 let mut profile = $initial_state.profile;
1703 profile.sufficient_match_len = $self.hc.sufficient_match_len_for_pass(profile);
1704 debug_assert!(
1716 <$strategy_ty as super::strategy::Strategy>::USE_BT,
1717 "build_optimal_plan_impl_body called on non-BT strategy"
1718 );
1719 let abort_on_worse_match: bool =
1720 <$strategy_ty as super::strategy::Strategy>::OPT_LEVEL == 0;
1721 let opt_level: bool = <$strategy_ty as super::strategy::Strategy>::OPT_LEVEL >= 2;
1722 let mut nodes = core::mem::take(&mut $self.backend.bt_mut().opt_nodes_scratch);
1723 let frontier_buffer_size = frontier_limit + 2;
1725 if nodes.len() < frontier_buffer_size {
1726 nodes.resize(frontier_buffer_size, HcOptimalNode::default());
1727 }
1728 let mut candidates = core::mem::take(&mut $self.backend.bt_mut().opt_candidates_scratch);
1729 candidates.clear();
1730 if candidates.capacity() < MAX_HC_SEARCH_DEPTH {
1731 candidates.reserve_exact(MAX_HC_SEARCH_DEPTH - candidates.capacity());
1732 }
1733 let mut store = core::mem::take(&mut $self.backend.bt_mut().opt_store_scratch);
1734 store.clear();
1735 let mut ll_prices = core::mem::take(&mut $self.backend.bt_mut().opt_ll_price_scratch);
1736 let mut ll_price_generations =
1737 core::mem::take(&mut $self.backend.bt_mut().opt_ll_price_generation);
1738 if ll_prices.len() <= frontier_limit {
1739 ll_prices.resize(frontier_limit + 1, 0);
1740 ll_price_generations.resize(frontier_limit + 1, 0);
1741 }
1742 $self.backend.bt_mut().opt_ll_price_stamp = $self
1743 .backend
1744 .bt_mut()
1745 .opt_ll_price_stamp
1746 .wrapping_add(1)
1747 .max(1);
1748 let ll_price_stamp = $self.backend.bt_mut().opt_ll_price_stamp;
1749 $self.backend.bt_mut().opt_lit_price_stamp = $self
1750 .backend
1751 .bt_mut()
1752 .opt_lit_price_stamp
1753 .wrapping_add(1)
1754 .max(1);
1755 let lit_price_stamp = $self.backend.bt_mut().opt_lit_price_stamp;
1756 let mut ml_prices = core::mem::take(&mut $self.backend.bt_mut().opt_ml_price_scratch);
1757 let mut ml_price_generations =
1758 core::mem::take(&mut $self.backend.bt_mut().opt_ml_price_generation);
1759 if ml_prices.len() <= frontier_limit {
1760 ml_prices.resize(frontier_limit + 1, 0);
1761 ml_price_generations.resize(frontier_limit + 1, 0);
1762 }
1763 $self.backend.bt_mut().opt_ml_price_stamp = $self
1764 .backend
1765 .bt_mut()
1766 .opt_ml_price_stamp
1767 .wrapping_add(1)
1768 .max(1);
1769 let ml_price_stamp = $self.backend.bt_mut().opt_ml_price_stamp;
1770 nodes[0] = HcOptimalNode {
1771 price: BtMatcher::cached_lit_length_price(
1772 profile,
1773 $stats,
1774 initial_litlen,
1775 &mut ll_prices,
1776 &mut ll_price_generations,
1777 ll_price_stamp,
1778 ),
1779 litlen: initial_litlen as u32,
1780 reps: initial_reps,
1781 ..HcOptimalNode::default()
1782 };
1783 let sufficient_len = profile.sufficient_match_len;
1784 let ll0_price = BtMatcher::cached_lit_length_price(
1785 profile,
1786 $stats,
1787 0,
1788 &mut ll_prices,
1789 &mut ll_price_generations,
1790 ll_price_stamp,
1791 );
1792 let ll1_price = BtMatcher::cached_lit_length_price(
1793 profile,
1794 $stats,
1795 1,
1796 &mut ll_prices,
1797 &mut ll_price_generations,
1798 ll_price_stamp,
1799 );
1800 let mut pos = 1usize;
1801 let mut last_pos = 0usize;
1802 let mut forced_end: Option<usize> = None;
1803 let mut forced_end_state: Option<HcOptimalNode> = None;
1804 let mut seed_forced_shortest_path = false;
1805 let mut opt_ldm = HcOptLdmState {
1806 seq_store: HcRawSeqStore {
1807 pos: 0,
1808 pos_in_sequence: 0,
1809 size: $self.backend.bt_mut().ldm_sequences.len(),
1810 },
1811 ..HcOptLdmState::default()
1812 };
1813 let has_ldm = !$self.backend.bt_mut().ldm_sequences.is_empty();
1814 if has_ldm {
1815 $self
1816 .backend
1817 .bt_mut()
1818 .ldm_get_next_match_and_update_seq_store(&mut opt_ldm, 0, $current_len);
1819 }
1820
1821 if $current_len >= min_match_len {
1824 let seed_ldm = if has_ldm {
1825 $self.backend.bt_mut().ldm_process_match_candidate(
1826 &mut opt_ldm,
1827 0,
1828 $current_len,
1829 min_match_len,
1830 )
1831 } else {
1832 None
1833 };
1834 candidates.clear();
1835 unsafe {
1839 $self.$collect::<$strategy_ty, true>(
1840 $current_abs_start,
1841 current_abs_end,
1842 profile,
1843 HcCandidateQuery {
1844 reps: initial_reps,
1845 lit_len: initial_litlen,
1846 ldm_candidate: seed_ldm,
1847 },
1848 &mut candidates,
1849 )
1850 };
1851 if !candidates.is_empty() {
1852 last_pos = (min_match_len - 1).min(frontier_limit);
1854 for p in 1..min_match_len.min(nodes.len()) {
1855 BtMatcher::reset_opt_node(&mut nodes[p]);
1856 let seed_litlen = initial_litlen
1866 .checked_add(p)
1867 .and_then(|s| u32::try_from(s).ok())
1868 .expect("optimal parser seed litlen out of u32 range");
1869 nodes[p].litlen = seed_litlen;
1870 }
1871 }
1872
1873 if let Some(candidate) = candidates.last() {
1874 let longest_len = candidate.match_len.min($current_len);
1875 if longest_len > sufficient_len {
1876 let off_base = BtMatcher::encode_offset_base_with_reps(
1877 candidate.offset as u32,
1878 initial_litlen,
1879 initial_reps,
1880 );
1881 let off_price = profile
1882 .offset_price_for::<ACCURATE_PRICE, FAVOR_SMALL_OFFSETS>($stats, off_base);
1883 let ml_price = BtMatcher::cached_match_length_price(
1884 profile,
1885 $stats,
1886 longest_len,
1887 &mut ml_prices,
1888 &mut ml_price_generations,
1889 ml_price_stamp,
1890 );
1891 let seq_cost = BtMatcher::add_prices(
1892 ll0_price,
1893 profile.match_price_from_parts(off_price, ml_price, $stats),
1894 );
1895 let forced_price = BtMatcher::add_prices(nodes[0].price, seq_cost);
1896 let forced_state = HcOptimalNode {
1897 price: forced_price,
1898 off: candidate.offset as u32,
1899 mlen: longest_len as u32,
1900 litlen: 0,
1901 reps: initial_reps,
1902 };
1903 if longest_len < nodes.len() && forced_price < nodes[longest_len].price {
1904 nodes[longest_len] = forced_state;
1905 }
1906 forced_end = Some(longest_len);
1907 forced_end_state = Some(forced_state);
1908 seed_forced_shortest_path = true;
1909 }
1910 }
1911 if !seed_forced_shortest_path {
1912 let mut prev_max_len = min_match_len - 1;
1913 for candidate in candidates.iter() {
1914 let max_match_len = candidate.match_len.min(frontier_limit);
1915 if max_match_len < min_match_len {
1916 continue;
1917 }
1918 let start_len = (prev_max_len + 1).max(min_match_len);
1919 if start_len > max_match_len {
1920 prev_max_len = prev_max_len.max(max_match_len);
1921 continue;
1922 }
1923 if max_match_len > last_pos {
1924 BtMatcher::reset_opt_nodes(&mut nodes, last_pos + 1, max_match_len);
1925 }
1926 let off_base = BtMatcher::encode_offset_base_with_reps(
1927 candidate.offset as u32,
1928 initial_litlen,
1929 initial_reps,
1930 );
1931 let off_price = profile
1932 .offset_price_for::<ACCURATE_PRICE, FAVOR_SMALL_OFFSETS>($stats, off_base);
1933 debug_assert!(max_match_len < nodes.len());
1934 let nodes0_price = nodes[0].price;
1935 for match_len in (start_len..=max_match_len).rev() {
1936 let ml_price = BtMatcher::cached_match_length_price(
1937 profile,
1938 $stats,
1939 match_len,
1940 &mut ml_prices,
1941 &mut ml_price_generations,
1942 ml_price_stamp,
1943 );
1944 let seq_cost = BtMatcher::add_prices(
1945 ll0_price,
1946 profile.match_price_from_parts(off_price, ml_price, $stats),
1947 );
1948 let next_cost = BtMatcher::add_prices(nodes0_price, seq_cost);
1949 let node_price = unsafe { nodes.get_unchecked(match_len).price };
1950 if match_len > last_pos || next_cost < node_price {
1951 let slot = unsafe { nodes.get_unchecked_mut(match_len) };
1952 *slot = HcOptimalNode {
1953 price: next_cost,
1954 off: candidate.offset as u32,
1955 mlen: match_len as u32,
1956 litlen: 0,
1957 reps: initial_reps,
1958 };
1959 if match_len > last_pos {
1960 last_pos = match_len;
1961 }
1962 } else if abort_on_worse_match {
1963 break;
1964 }
1965 }
1966 prev_max_len = prev_max_len.max(max_match_len);
1967 }
1968 if last_pos + 1 < nodes.len() {
1969 nodes[last_pos + 1].price = u32::MAX;
1970 }
1971 }
1972 }
1973 while !seed_forced_shortest_path && pos <= last_pos && pos <= frontier_limit {
1974 debug_assert!(pos + 1 < nodes.len());
1975 let prev_node = unsafe { *nodes.get_unchecked(pos - 1) };
1976 if prev_node.price != u32::MAX {
1977 let lit_len = prev_node.litlen as usize + 1;
1978 let lit_price = {
1979 let bt = $self.backend.bt_mut();
1980 BtMatcher::cached_literal_price(
1981 profile,
1982 $stats,
1983 $current[pos - 1],
1984 &mut bt.opt_lit_price_scratch,
1985 &mut bt.opt_lit_price_generation,
1986 lit_price_stamp,
1987 )
1988 };
1989 let ll_delta = BtMatcher::cached_lit_length_delta_price(
1990 profile,
1991 $stats,
1992 lit_len,
1993 &mut ll_prices,
1994 &mut ll_price_generations,
1995 ll_price_stamp,
1996 );
1997 let lit_cost = BtMatcher::add_price_delta(prev_node.price, lit_price, ll_delta);
1998 let node_pos_price = unsafe { nodes.get_unchecked(pos).price };
1999 if lit_cost <= node_pos_price {
2000 let prev_match = unsafe { *nodes.get_unchecked(pos) };
2001 let slot = unsafe { nodes.get_unchecked_mut(pos) };
2002 *slot = prev_node;
2003 slot.litlen = lit_len as u32;
2004 slot.price = lit_cost;
2005 #[allow(clippy::collapsible_if)]
2006 if opt_level
2007 && prev_match.mlen > 0
2008 && prev_match.litlen == 0
2009 && pos < $current_len
2010 {
2011 if ll1_price < ll0_price {
2012 let next_lit_price = {
2013 let bt = $self.backend.bt_mut();
2014 BtMatcher::cached_literal_price(
2015 profile,
2016 $stats,
2017 $current[pos],
2018 &mut bt.opt_lit_price_scratch,
2019 &mut bt.opt_lit_price_generation,
2020 lit_price_stamp,
2021 )
2022 };
2023 let with1literal = BtMatcher::add_price_delta(
2024 prev_match.price,
2025 next_lit_price,
2026 ll1_price as i32 - ll0_price as i32,
2027 );
2028 let ll_delta_next = BtMatcher::cached_lit_length_delta_price(
2029 profile,
2030 $stats,
2031 lit_len + 1,
2032 &mut ll_prices,
2033 &mut ll_price_generations,
2034 ll_price_stamp,
2035 );
2036 let with_more_literals =
2037 BtMatcher::add_price_delta(lit_cost, next_lit_price, ll_delta_next);
2038 let next = pos + 1;
2039 let next_price = unsafe { nodes.get_unchecked(next).price };
2040 if with1literal < with_more_literals && with1literal < next_price {
2041 debug_assert!(pos >= prev_match.mlen as usize);
2043 let prev_pos = pos - prev_match.mlen as usize;
2044 {
2045 let prev_state = unsafe { *nodes.get_unchecked(prev_pos) };
2046 let (_, reps_after_match) = BtMatcher::encode_offset_with_reps(
2047 prev_match.off,
2048 prev_state.litlen as usize,
2049 prev_state.reps,
2050 );
2051 let slot = unsafe { nodes.get_unchecked_mut(next) };
2052 *slot = prev_match;
2053 slot.reps = reps_after_match;
2054 slot.litlen = 1;
2055 slot.price = with1literal;
2056 if next > last_pos {
2057 last_pos = next;
2058 }
2059 }
2060 }
2061 }
2062 }
2063 }
2064 }
2065
2066 let mut base_node = unsafe { *nodes.get_unchecked(pos) };
2067 if base_node.price == u32::MAX {
2068 pos += 1;
2069 continue;
2070 }
2071 if base_node.mlen > 0 && base_node.litlen == 0 {
2072 debug_assert!(pos >= base_node.mlen as usize);
2074 let prev_pos = pos - base_node.mlen as usize;
2075 {
2076 let prev_state = unsafe { *nodes.get_unchecked(prev_pos) };
2077 let (_, reps_after_match) = BtMatcher::encode_offset_with_reps(
2078 base_node.off,
2079 prev_state.litlen as usize,
2080 prev_state.reps,
2081 );
2082 base_node.reps = reps_after_match;
2083 unsafe { nodes.get_unchecked_mut(pos).reps = reps_after_match };
2084 }
2085 }
2086 let base_cost = base_node.price;
2087
2088 if pos + 8 > $current_len {
2089 pos += 1;
2090 continue;
2091 }
2092
2093 if pos == last_pos {
2094 break;
2095 }
2096
2097 let next_price = unsafe { nodes.get_unchecked(pos + 1).price };
2098 if abort_on_worse_match
2099 && next_price <= base_cost.saturating_add(HC_BITCOST_MULTIPLIER / 2)
2100 {
2101 pos += 1;
2102 continue;
2103 }
2104
2105 let abs_pos = $current_abs_start + pos;
2106 let ldm_candidate = if has_ldm {
2107 $self.backend.bt_mut().ldm_process_match_candidate(
2108 &mut opt_ldm,
2109 pos,
2110 $current_len - pos,
2111 min_match_len,
2112 )
2113 } else {
2114 None
2115 };
2116 candidates.clear();
2117 unsafe {
2119 $self.$collect::<$strategy_ty, true>(
2120 abs_pos,
2121 current_abs_end,
2122 profile,
2123 HcCandidateQuery {
2124 reps: base_node.reps,
2125 lit_len: base_node.litlen as usize,
2126 ldm_candidate,
2127 },
2128 &mut candidates,
2129 )
2130 };
2131 if let Some(candidate) = candidates.last() {
2132 let longest_len = candidate.match_len.min($current_len - pos);
2133 if longest_len > sufficient_len
2134 || pos + longest_len >= HC_OPT_NUM
2135 || pos + longest_len >= $current_len
2136 {
2137 let lit_len = base_node.litlen as usize;
2138 let off_base = BtMatcher::encode_offset_base_with_reps(
2139 candidate.offset as u32,
2140 lit_len,
2141 base_node.reps,
2142 );
2143 let off_price = profile
2144 .offset_price_for::<ACCURATE_PRICE, FAVOR_SMALL_OFFSETS>($stats, off_base);
2145 let ml_price = BtMatcher::cached_match_length_price(
2146 profile,
2147 $stats,
2148 longest_len,
2149 &mut ml_prices,
2150 &mut ml_price_generations,
2151 ml_price_stamp,
2152 );
2153 let seq_cost = BtMatcher::add_prices(
2154 ll0_price,
2155 profile.match_price_from_parts(off_price, ml_price, $stats),
2156 );
2157 let forced_price = BtMatcher::add_prices(base_cost, seq_cost);
2158 let end_pos = (pos + longest_len).min($current_len);
2159 forced_end = Some(end_pos);
2160 forced_end_state = Some(HcOptimalNode {
2161 price: forced_price,
2162 off: candidate.offset as u32,
2163 mlen: longest_len as u32,
2164 litlen: 0,
2165 reps: base_node.reps,
2166 });
2167 break;
2168 }
2169 }
2170 let mut prev_max_len = min_match_len - 1;
2171 for candidate in candidates.iter() {
2172 debug_assert!(pos <= frontier_limit);
2176 let max_match_len = candidate
2177 .match_len
2178 .min($current_len - pos)
2179 .min(frontier_limit - pos);
2180 let min_len = min_match_len;
2181 if max_match_len < min_len {
2182 continue;
2183 }
2184 let start_len = (prev_max_len + 1).max(min_len);
2185 if start_len > max_match_len {
2186 prev_max_len = prev_max_len.max(max_match_len);
2187 continue;
2188 }
2189 let max_next = pos + max_match_len;
2190 if max_next > last_pos {
2191 BtMatcher::reset_opt_nodes(&mut nodes, last_pos + 1, max_next);
2192 }
2193 let lit_len = base_node.litlen as usize;
2194 let off_base = BtMatcher::encode_offset_base_with_reps(
2195 candidate.offset as u32,
2196 lit_len,
2197 base_node.reps,
2198 );
2199 let off_price = profile
2200 .offset_price_for::<ACCURATE_PRICE, FAVOR_SMALL_OFFSETS>($stats, off_base);
2201 debug_assert!(pos + max_match_len < nodes.len());
2202 for match_len in (start_len..=max_match_len).rev() {
2203 let next = pos + match_len;
2204 let ml_price = BtMatcher::cached_match_length_price(
2205 profile,
2206 $stats,
2207 match_len,
2208 &mut ml_prices,
2209 &mut ml_price_generations,
2210 ml_price_stamp,
2211 );
2212 let seq_cost = BtMatcher::add_prices(
2213 ll0_price,
2214 profile.match_price_from_parts(off_price, ml_price, $stats),
2215 );
2216 let next_cost = BtMatcher::add_prices(base_cost, seq_cost);
2217 let node_next_price = unsafe { nodes.get_unchecked(next).price };
2218 let improved = next > last_pos || next_cost < node_next_price;
2219 if improved {
2220 let slot = unsafe { nodes.get_unchecked_mut(next) };
2221 *slot = HcOptimalNode {
2222 price: next_cost,
2223 off: candidate.offset as u32,
2224 mlen: match_len as u32,
2225 litlen: 0,
2226 reps: base_node.reps,
2227 };
2228 if next > last_pos {
2229 last_pos = next;
2230 }
2231 } else if abort_on_worse_match {
2232 break;
2233 }
2234 }
2235 prev_max_len = prev_max_len.max(max_match_len);
2236 }
2237
2238 if last_pos + 1 < nodes.len() {
2239 unsafe {
2240 nodes.get_unchecked_mut(last_pos + 1).price = u32::MAX;
2241 }
2242 }
2243 pos += 1;
2244 }
2245
2246 if last_pos == 0 {
2247 if $current_len == 0 {
2248 let price = nodes[0].price;
2249 return $self.backend.bt_mut().finish_optimal_plan(
2250 HcOptimalPlanBuffers {
2251 nodes,
2252 candidates,
2253 store,
2254 ll_prices,
2255 ll_price_generations,
2256 ml_prices,
2257 ml_price_generations,
2258 },
2259 (price, initial_reps, initial_litlen, 0),
2260 );
2261 }
2262 let lit_price = {
2263 let bt = $self.backend.bt_mut();
2264 BtMatcher::cached_literal_price(
2265 profile,
2266 $stats,
2267 $current[0],
2268 &mut bt.opt_lit_price_scratch,
2269 &mut bt.opt_lit_price_generation,
2270 lit_price_stamp,
2271 )
2272 };
2273 let next_litlen = initial_litlen
2280 .checked_add(1)
2281 .expect("optimal parser next litlen out of usize range");
2282 let ll_delta = BtMatcher::cached_lit_length_delta_price(
2283 profile,
2284 $stats,
2285 next_litlen,
2286 &mut ll_prices,
2287 &mut ll_price_generations,
2288 ll_price_stamp,
2289 );
2290 let price = BtMatcher::add_price_delta(nodes[0].price, lit_price, ll_delta);
2291 return $self.backend.bt_mut().finish_optimal_plan(
2292 HcOptimalPlanBuffers {
2293 nodes,
2294 candidates,
2295 store,
2296 ll_prices,
2297 ll_price_generations,
2298 ml_prices,
2299 ml_price_generations,
2300 },
2301 (price, initial_reps, next_litlen, 1),
2302 );
2303 }
2304
2305 let target_pos = forced_end.unwrap_or(last_pos.min(frontier_limit));
2306 let last_stretch = if let Some(forced_state) = forced_end_state {
2307 forced_state
2308 } else {
2309 nodes[target_pos]
2310 };
2311 if last_stretch.price == u32::MAX {
2312 return $self.backend.bt_mut().finish_optimal_plan(
2313 HcOptimalPlanBuffers {
2314 nodes,
2315 candidates,
2316 store,
2317 ll_prices,
2318 ll_price_generations,
2319 ml_prices,
2320 ml_price_generations,
2321 },
2322 (u32::MAX, initial_reps, initial_litlen, $current_len),
2323 );
2324 }
2325
2326 if last_stretch.mlen == 0 {
2327 return $self.backend.bt_mut().finish_optimal_plan(
2328 HcOptimalPlanBuffers {
2329 nodes,
2330 candidates,
2331 store,
2332 ll_prices,
2333 ll_price_generations,
2334 ml_prices,
2335 ml_price_generations,
2336 },
2337 (
2338 last_stretch.price,
2339 last_stretch.reps,
2340 last_stretch.litlen as usize,
2341 target_pos.min($current_len),
2342 ),
2343 );
2344 }
2345
2346 let mut cur = target_pos.saturating_sub(last_stretch.mlen as usize);
2347 let end_reps = if last_stretch.litlen == 0 {
2348 let prev_state = nodes[cur];
2349 let (_, reps_after_match) = BtMatcher::encode_offset_with_reps(
2350 last_stretch.off,
2351 prev_state.litlen as usize,
2352 prev_state.reps,
2353 );
2354 reps_after_match
2355 } else {
2356 let tail_literals = last_stretch.litlen as usize;
2357 if cur < tail_literals {
2358 return $self.backend.bt_mut().finish_optimal_plan(
2359 HcOptimalPlanBuffers {
2360 nodes,
2361 candidates,
2362 store,
2363 ll_prices,
2364 ll_price_generations,
2365 ml_prices,
2366 ml_price_generations,
2367 },
2368 (
2369 last_stretch.price,
2370 last_stretch.reps,
2371 tail_literals,
2372 target_pos.min($current_len),
2373 ),
2374 );
2375 }
2376 cur -= tail_literals;
2377 last_stretch.reps
2378 };
2379 let store_end = cur + 2;
2380 if store.len() <= store_end {
2381 store.resize(store_end + 1, HcOptimalNode::default());
2382 }
2383 let mut store_start;
2384 let mut stretch_pos = cur;
2385
2386 if last_stretch.litlen > 0 {
2387 store[store_end] = HcOptimalNode {
2388 litlen: last_stretch.litlen,
2389 mlen: 0,
2390 ..HcOptimalNode::default()
2391 };
2392 store_start = store_end.saturating_sub(1);
2393 store[store_start] = last_stretch;
2394 }
2395 store[store_end] = last_stretch;
2396 store_start = store_end;
2397
2398 loop {
2399 let next_stretch = nodes[stretch_pos];
2400 store[store_start].litlen = next_stretch.litlen;
2401 if next_stretch.mlen == 0 {
2402 break;
2403 }
2404 if store_start == 0 {
2405 break;
2406 }
2407 store_start -= 1;
2408 store[store_start] = next_stretch;
2409 let litlen = next_stretch.litlen as usize;
2416 let mlen = next_stretch.mlen as usize;
2417 debug_assert!(litlen + mlen <= $current_len);
2418 let step = litlen + mlen;
2419 if step == 0 || stretch_pos < step {
2420 break;
2421 }
2422 stretch_pos -= step;
2423 }
2424
2425 let mut tail_literals = initial_litlen;
2426 let mut store_pos = store_start;
2427 while store_pos <= store_end {
2428 let stretch = store[store_pos];
2429 let llen = stretch.litlen as usize;
2430 let mlen = stretch.mlen as usize;
2431 if mlen == 0 {
2432 tail_literals = llen;
2433 store_pos += 1;
2434 continue;
2435 }
2436 $out.push(HcOptimalSequence {
2437 offset: stretch.off,
2438 match_len: mlen as u32,
2439 lit_len: llen as u32,
2440 });
2441 tail_literals = 0;
2442 store_pos += 1;
2443 }
2444 let result = (
2445 last_stretch.price,
2446 end_reps,
2447 if last_stretch.litlen > 0 {
2448 last_stretch.litlen as usize
2449 } else {
2450 tail_literals
2451 },
2452 target_pos.min($current_len),
2453 );
2454 $self.backend.bt_mut().finish_optimal_plan(
2455 HcOptimalPlanBuffers {
2456 nodes,
2457 candidates,
2458 store,
2459 ll_prices,
2460 ll_price_generations,
2461 ml_prices,
2462 ml_price_generations,
2463 },
2464 result,
2465 )
2466 }};
2467}
2468
2469macro_rules! collect_optimal_candidates_initialized_body {
2478 (
2479 $self:expr,
2480 $strategy_ty:ty,
2481 $abs_pos:ident,
2482 $current_abs_end:ident,
2483 $profile:ident,
2484 $query:ident,
2485 $out:ident,
2486 $bt_matchfinder:ident,
2487 $bt_update:ident,
2488 $bt_insert:ident,
2489 $for_each_rep:ident,
2490 $hash3:ident,
2491 $cpl:path $(,)?
2492 ) => {{
2493 let use_hash3: bool = <$strategy_ty as super::strategy::Strategy>::USE_HASH3;
2502 debug_assert!(!$self.table.hash_table.is_empty());
2503 debug_assert!($self.table.hash3_log == 0 || !$self.table.hash3_table.is_empty());
2504 debug_assert!(
2505 !use_hash3 || $self.table.hash3_log != 0,
2506 "Strategy::USE_HASH3 = true but runtime hash3_log is 0 — call configure() first",
2507 );
2508 debug_assert!(!$self.table.chain_table.is_empty());
2509 let min_match_len = HC_OPT_MIN_MATCH_LEN;
2510 let reps = $query.reps;
2511 let lit_len = $query.lit_len;
2512 let ldm_candidate = $query.ldm_candidate;
2513 $out.clear();
2514 if $abs_pos < $self.table.skip_insert_until_abs {
2515 if let Some(ldm) = ldm_candidate {
2516 let mut best_len_for_skip = 0usize;
2517 let _ = super::bt::BtMatcher::push_candidate_ladder(
2518 $out,
2519 &mut best_len_for_skip,
2520 ldm,
2521 min_match_len,
2522 );
2523 }
2524 return;
2525 }
2526 if $bt_matchfinder {
2527 unsafe { $self.table.$bt_update($abs_pos, $current_abs_end) };
2530 }
2531 let current_idx = $abs_pos - $self.table.history_abs_start;
2532 if current_idx + 4 > $self.table.live_history().len() {
2533 if let Some(ldm) = ldm_candidate {
2534 let mut best_len_for_skip = 0usize;
2535 let _ = super::bt::BtMatcher::push_candidate_ladder(
2536 $out,
2537 &mut best_len_for_skip,
2538 ldm,
2539 min_match_len,
2540 );
2541 }
2542 return;
2543 }
2544 let mut best_len_for_skip = 0usize;
2545 let mut skip_further_match_search = false;
2546 let mut rep_len_candidate_found = false;
2547 unsafe {
2549 $self.hc.$for_each_rep(
2550 &$self.table,
2551 $abs_pos,
2552 lit_len,
2553 reps,
2554 $current_abs_end,
2555 min_match_len,
2556 |rep| {
2557 if rep.match_len >= min_match_len {
2558 rep_len_candidate_found = true;
2559 }
2560 let _ = super::bt::BtMatcher::push_candidate_ladder(
2561 $out,
2562 &mut best_len_for_skip,
2563 rep,
2564 min_match_len,
2565 );
2566 if rep.match_len > $profile.sufficient_match_len {
2567 skip_further_match_search = true;
2568 }
2569 if $abs_pos + rep.match_len >= $current_abs_end {
2576 skip_further_match_search = true;
2577 }
2578 },
2579 )
2580 };
2581 if use_hash3 && !skip_further_match_search && best_len_for_skip < min_match_len {
2585 $self.table.update_hash3_until($abs_pos);
2586 if let Some(h3) = unsafe {
2588 $self
2589 .table
2590 .$hash3($abs_pos, $current_abs_end, min_match_len)
2591 } {
2592 let _ = super::bt::BtMatcher::push_candidate_ladder(
2593 $out,
2594 &mut best_len_for_skip,
2595 h3,
2596 min_match_len,
2597 );
2598 if !rep_len_candidate_found
2599 && (h3.match_len > $profile.sufficient_match_len
2600 || $abs_pos + h3.match_len >= $current_abs_end)
2601 {
2602 $self.table.skip_insert_until_abs = $abs_pos + 1;
2603 skip_further_match_search = true;
2604 }
2605 }
2606 }
2607 if !skip_further_match_search && $bt_matchfinder {
2608 unsafe {
2610 $self.table.$bt_insert(
2611 $abs_pos,
2612 $current_abs_end,
2613 $profile,
2614 min_match_len,
2615 &mut best_len_for_skip,
2616 $out,
2617 )
2618 };
2619 } else if !skip_further_match_search {
2620 $self.table.insert_position($abs_pos);
2621 let max_chain_depth = $profile.max_chain_depth.min($self.hc.search_depth);
2622 let concat = &$self.table.history[$self.table.history_start..];
2623 let mut match_end_abs = $abs_pos + 9;
2627 if max_chain_depth > 0 {
2628 for (visited, candidate_abs) in $self
2629 .hc
2630 .chain_candidates(&$self.table, $abs_pos)
2631 .into_iter()
2632 .enumerate()
2633 {
2634 if visited >= max_chain_depth {
2635 break;
2636 }
2637 if candidate_abs == usize::MAX {
2638 break;
2639 }
2640 if candidate_abs < $self.table.history_abs_start || candidate_abs >= $abs_pos {
2641 continue;
2642 }
2643 let candidate_idx = candidate_abs - $self.table.history_abs_start;
2644 debug_assert!(
2645 $abs_pos <= $current_abs_end,
2646 "HC chain walker called past current block end"
2647 );
2648 let tail_limit = $current_abs_end - $abs_pos;
2649 let base = concat.as_ptr();
2650 let match_len =
2655 unsafe { $cpl(base.add(candidate_idx), base.add(current_idx), tail_limit) };
2656 if match_len < min_match_len {
2657 continue;
2658 }
2659 let offset = $abs_pos - candidate_abs;
2660 if super::bt::BtMatcher::push_candidate_ladder(
2661 $out,
2662 &mut best_len_for_skip,
2663 MatchCandidate {
2664 start: $abs_pos,
2665 offset,
2666 match_len,
2667 },
2668 min_match_len,
2669 ) {
2670 let candidate_end = candidate_abs + match_len;
2671 if candidate_end > match_end_abs {
2672 match_end_abs = candidate_end;
2673 }
2674 }
2675 if match_len > HC_OPT_NUM || $abs_pos + match_len >= $current_abs_end {
2676 break;
2677 }
2678 }
2679 }
2680 $self.table.skip_insert_until_abs =
2683 $self.table.skip_insert_until_abs.max(match_end_abs - 8);
2684 }
2685 if let Some(ldm) = ldm_candidate {
2686 let _ = super::bt::BtMatcher::push_candidate_ladder(
2687 $out,
2688 &mut best_len_for_skip,
2689 ldm,
2690 min_match_len,
2691 );
2692 }
2693 }};
2694}
2695
2696macro_rules! hash3_candidate_body {
2701 (
2702 $table:expr,
2703 $abs_pos:ident,
2704 $current_abs_end:ident,
2705 $min_match_len:ident,
2706 $cpl:path $(,)?
2707 ) => {{
2708 if $table.hash3_log == 0 {
2709 return None;
2710 }
2711 let idx = $abs_pos.checked_sub($table.history_abs_start)?;
2712 let concat = $table.live_history();
2713 if idx + 4 > concat.len() {
2714 return None;
2715 }
2716 let hash3 = $crate::encoding::match_table::storage::MatchTable::hash_position_at(
2717 concat,
2718 idx,
2719 $table.hash3_log,
2720 3,
2721 );
2722 let entry = $table
2723 .hash3_table
2724 .get(hash3)
2725 .copied()
2726 .unwrap_or($crate::encoding::match_table::storage::HC_EMPTY);
2727 let candidate_abs =
2728 $crate::encoding::match_table::storage::MatchTable::stored_abs_position_fast(
2729 entry,
2730 $table.position_base,
2731 $table.index_shift,
2732 )?;
2733 if candidate_abs < $table.history_abs_start || candidate_abs >= $abs_pos {
2734 return None;
2735 }
2736 let offset = $abs_pos - candidate_abs;
2737 if offset >= $crate::encoding::bt::HC3_MAX_OFFSET {
2738 return None;
2739 }
2740 let candidate_idx = candidate_abs - $table.history_abs_start;
2741 let tail_limit = $current_abs_end.saturating_sub($abs_pos);
2742 let base = concat.as_ptr();
2743 let match_len = unsafe { $cpl(base.add(candidate_idx), base.add(idx), tail_limit) };
2746 (match_len >= $min_match_len).then_some($crate::encoding::opt::types::MatchCandidate {
2747 start: $abs_pos,
2748 offset,
2749 match_len,
2750 })
2751 }};
2752}
2753pub(crate) use hash3_candidate_body;
2754
2755macro_rules! for_each_repcode_candidate_body {
2765 (
2766 $table:expr,
2767 $abs_pos:ident,
2768 $lit_len:ident,
2769 $reps:ident,
2770 $current_abs_end:ident,
2771 $min_match_len:ident,
2772 $f:ident,
2773 $cpl:path $(,)?
2774 ) => {{
2775 let rep_offsets: [Option<usize>; 3] = if $lit_len == 0 {
2776 [
2777 Some($reps[1] as usize),
2778 Some($reps[2] as usize),
2779 ($reps[0] > 1).then_some(($reps[0] - 1) as usize),
2780 ]
2781 } else {
2782 [
2783 Some($reps[0] as usize),
2784 Some($reps[1] as usize),
2785 Some($reps[2] as usize),
2786 ]
2787 };
2788 let concat = $table.live_history();
2789 let current_idx = $abs_pos - $table.history_abs_start;
2790 if current_idx + 4 > concat.len() {
2791 return;
2792 }
2793 let tail_limit = $current_abs_end.saturating_sub($abs_pos);
2794 let base = concat.as_ptr();
2795 let concat_len = concat.len();
2796 for rep in rep_offsets.into_iter().flatten() {
2797 if rep == 0 || rep > $abs_pos {
2798 continue;
2799 }
2800 let candidate_pos = $abs_pos - rep;
2801 if candidate_pos < $table.history_abs_start {
2802 continue;
2803 }
2804 let candidate_idx = candidate_pos - $table.history_abs_start;
2805 let max = (concat_len - candidate_idx)
2810 .min(concat_len - current_idx)
2811 .min(tail_limit);
2812 let match_len = unsafe { $cpl(base.add(candidate_idx), base.add(current_idx), max) };
2813 if match_len < $min_match_len {
2814 continue;
2815 }
2816 $f(MatchCandidate {
2817 start: $abs_pos,
2818 offset: rep,
2819 match_len,
2820 });
2821 }
2822 }};
2823}
2824pub(crate) use for_each_repcode_candidate_body;
2825
2826macro_rules! bt_insert_and_collect_matches_body {
2833 (
2834 $table:expr,
2835 $search_depth:expr,
2836 $abs_pos:ident,
2837 $current_abs_end:ident,
2838 $profile:ident,
2839 $min_match_len:ident,
2840 $best_len_for_skip:ident,
2841 $out:ident,
2842 $cmf:path $(,)?
2843 ) => {{
2844 let idx = $abs_pos - $table.history_abs_start;
2845 let concat = &$table.history[$table.history_start..];
2846 if idx + 8 > concat.len() {
2847 return;
2848 }
2849 debug_assert!(
2850 $abs_pos <= $current_abs_end,
2851 "BT collect called past current block end"
2852 );
2853 let tail_limit = $current_abs_end - $abs_pos;
2854 let hash = $crate::encoding::match_table::storage::MatchTable::hash_position_at(
2855 concat,
2856 idx,
2857 $table.hash_log,
2858 $crate::encoding::bt::BtMatcher::HASH_MLS,
2859 );
2860 let Some(relative_pos) = $table.relative_position($abs_pos) else {
2861 return;
2862 };
2863 let stored = relative_pos + 1;
2864 let bt_mask = $table.bt_mask();
2865 let bt_low = $abs_pos.saturating_sub(bt_mask);
2868 let window_low = $table.window_low_abs_for_target($abs_pos);
2869 let mut match_end_abs = $abs_pos + 9;
2873 let mut compares_left = $profile.max_chain_depth.min($search_depth);
2874 let mut common_length_smaller = 0usize;
2875 let mut common_length_larger = 0usize;
2876 let pair_idx = $table.bt_pair_index_for_abs($abs_pos);
2877 let mut smaller_slot = pair_idx;
2878 let mut larger_slot = pair_idx + 1;
2879 let mut match_stored = $table.hash_table[hash];
2880 $table.hash_table[hash] = stored;
2881 debug_assert!(
2886 $min_match_len >= $crate::encoding::cost_model::HC_FORMAT_MINMATCH,
2887 "min_match_len must be at least HC_FORMAT_MINMATCH"
2888 );
2889 let mut best_len = (*$best_len_for_skip).max($min_match_len - 1);
2890
2891 while compares_left > 0 {
2892 let Some(candidate_abs) =
2893 $crate::encoding::match_table::storage::MatchTable::stored_abs_position_fast(
2894 match_stored,
2895 $table.position_base,
2896 $table.index_shift,
2897 )
2898 else {
2899 break;
2900 };
2901 if candidate_abs < window_low || candidate_abs >= $abs_pos {
2902 break;
2903 }
2904 compares_left -= 1;
2905
2906 let next_pair_idx = $table.bt_pair_index_for_abs(candidate_abs);
2907 let next_smaller = $table.chain_table[next_pair_idx];
2908 let next_larger = $table.chain_table[next_pair_idx + 1];
2909 let seed_len = common_length_smaller.min(common_length_larger);
2910 let candidate_idx = candidate_abs - $table.history_abs_start;
2911 let match_len = unsafe { $cmf(concat, idx, candidate_idx, tail_limit, seed_len) };
2914
2915 if match_len > best_len {
2916 let offset = $abs_pos - candidate_abs;
2917 let accepted = $crate::encoding::bt::BtMatcher::push_candidate_ladder(
2918 $out,
2919 $best_len_for_skip,
2920 $crate::encoding::opt::types::MatchCandidate {
2921 start: $abs_pos,
2922 offset,
2923 match_len,
2924 },
2925 $min_match_len,
2926 );
2927 if accepted {
2928 best_len = match_len;
2929 let candidate_end = candidate_abs + match_len;
2937 if candidate_end > match_end_abs {
2938 match_end_abs = candidate_end;
2939 }
2940 if match_len >= tail_limit
2941 || match_len > $crate::encoding::cost_model::HC_OPT_NUM
2942 {
2943 break;
2944 }
2945 }
2946 }
2947
2948 if match_len >= tail_limit {
2949 break;
2950 }
2951
2952 let candidate_next = candidate_idx + match_len;
2953 let current_next = idx + match_len;
2954 if concat[candidate_next] < concat[current_next] {
2955 $table.chain_table[smaller_slot] = match_stored;
2956 common_length_smaller = match_len;
2957 if candidate_abs <= bt_low {
2958 smaller_slot = usize::MAX;
2959 break;
2960 }
2961 smaller_slot = next_pair_idx + 1;
2962 match_stored = next_larger;
2963 } else {
2964 $table.chain_table[larger_slot] = match_stored;
2965 common_length_larger = match_len;
2966 if candidate_abs <= bt_low {
2967 larger_slot = usize::MAX;
2968 break;
2969 }
2970 larger_slot = next_pair_idx;
2971 match_stored = next_smaller;
2972 }
2973 }
2974
2975 if smaller_slot != usize::MAX {
2976 $table.chain_table[smaller_slot] = $crate::encoding::match_table::storage::HC_EMPTY;
2977 }
2978 if larger_slot != usize::MAX {
2979 $table.chain_table[larger_slot] = $crate::encoding::match_table::storage::HC_EMPTY;
2980 }
2981 $table.skip_insert_until_abs = match_end_abs - 8;
2984 }};
2985}
2986pub(crate) use bt_insert_and_collect_matches_body;
2987
2988impl HcMatchGenerator {
2989 fn should_run_btultra2_seed_pass<S: super::strategy::Strategy>(
2990 &self,
2991 current_len: usize,
2992 ) -> bool {
2993 if !(S::OPT_LEVEL == 2 && S::USE_HASH3) {
3000 return false;
3001 }
3002 let HcBackend::Bt(bt) = &self.backend else {
3003 return false;
3004 };
3005 bt.opt_state.lit_length_sum == 0
3006 && bt.opt_state.dictionary_seed.is_none()
3007 && !self.table.dictionary_primed_for_frame
3008 && bt.ldm_sequences.is_empty()
3009 && self.table.window_size == current_len
3010 && self.table.history_abs_start == 0
3011 && self.table.window.len() == 1
3012 && current_len > HC_PREDEF_THRESHOLD
3013 }
3014
3015 fn new(max_window_size: usize) -> Self {
3016 Self {
3017 table: super::match_table::storage::MatchTable::new(max_window_size),
3018 hc: super::hc::HcMatcher::new(2, HC_SEARCH_DEPTH, HC_TARGET_LEN),
3019 backend: HcBackend::Hc,
3022 strategy_tag: super::strategy::StrategyTag::Lazy,
3029 }
3030 }
3031
3032 fn configure(&mut self, config: HcConfig, tag: super::strategy::StrategyTag, window_log: u8) {
3033 use super::strategy::StrategyTag;
3034 self.strategy_tag = tag;
3038 let is_btultra2 = tag == StrategyTag::BtUltra2;
3039 let uses_bt = matches!(
3040 tag,
3041 StrategyTag::BtOpt | StrategyTag::BtUltra | StrategyTag::BtUltra2
3042 );
3043 let next_hash3_log = if is_btultra2 {
3044 HC3_HASH_LOG.min(window_log as usize)
3045 } else {
3046 0
3047 };
3048 let resize = self.table.hash_log != config.hash_log
3049 || self.table.chain_log != config.chain_log
3050 || self.table.hash3_log != next_hash3_log;
3051 self.table.hash_log = config.hash_log;
3052 self.table.chain_log = config.chain_log;
3053 self.table.hash3_log = next_hash3_log;
3054 self.hc.search_depth = if uses_bt {
3055 config.search_depth
3056 } else {
3057 config.search_depth.min(MAX_HC_SEARCH_DEPTH)
3058 };
3059 self.hc.target_len = config.target_len;
3060 self.table.search_depth = self.hc.search_depth;
3064 self.table.is_btultra2 = is_btultra2;
3065 self.table.uses_bt = uses_bt;
3066 match (&self.backend, self.table.uses_bt) {
3070 (HcBackend::Hc, true) => {
3071 self.backend = HcBackend::Bt(alloc::boxed::Box::new(super::bt::BtMatcher::new()));
3072 }
3073 (HcBackend::Bt(_), false) => {
3074 self.backend = HcBackend::Hc;
3075 }
3076 _ => {}
3077 }
3078 if resize && !self.table.hash_table.is_empty() {
3079 self.table.hash_table.clear();
3081 self.table.hash3_table.clear();
3082 self.table.chain_table.clear();
3083 }
3084 }
3085
3086 fn seed_dictionary_entropy(
3087 &mut self,
3088 huff: Option<&crate::huff0::huff0_encoder::HuffmanTable>,
3089 ll: Option<&crate::fse::fse_encoder::FSETable>,
3090 ml: Option<&crate::fse::fse_encoder::FSETable>,
3091 of: Option<&crate::fse::fse_encoder::FSETable>,
3092 ) {
3093 if let HcBackend::Bt(bt) = &mut self.backend {
3094 bt.opt_state.seed_dictionary_entropy(huff, ll, ml, of);
3095 }
3096 }
3097
3098 fn reset(&mut self, reuse_space: impl FnMut(Vec<u8>)) {
3099 self.table.reset(reuse_space);
3100 if let HcBackend::Bt(bt) = &mut self.backend {
3101 bt.reset();
3102 }
3103 }
3104
3105 fn skip_matching(&mut self, incompressible_hint: Option<bool>) {
3108 self.table.skip_matching(incompressible_hint);
3109 }
3110
3111 #[cfg(test)]
3117 fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
3118 use super::strategy::{self, StrategyTag};
3119 match self.strategy_tag {
3125 StrategyTag::Fast | StrategyTag::Dfast | StrategyTag::Greedy | StrategyTag::Lazy => {
3126 self.start_matching_lazy(&mut handle_sequence)
3127 }
3128 StrategyTag::BtOpt => {
3129 self.start_matching_optimal::<strategy::BtOpt>(&mut handle_sequence)
3130 }
3131 StrategyTag::BtUltra => {
3132 self.start_matching_optimal::<strategy::BtUltra>(&mut handle_sequence)
3133 }
3134 StrategyTag::BtUltra2 => {
3135 self.start_matching_optimal::<strategy::BtUltra2>(&mut handle_sequence)
3136 }
3137 }
3138 }
3139
3140 pub(crate) fn start_matching_strategy<S: super::strategy::Strategy>(
3151 &mut self,
3152 handle_sequence: &mut impl for<'a> FnMut(Sequence<'a>),
3153 ) {
3154 debug_assert_eq!(
3155 self.table.uses_bt,
3156 S::USE_BT,
3157 "Strategy::USE_BT disagrees with runtime table.uses_bt at HC dispatch"
3158 );
3159 if S::USE_BT {
3160 self.start_matching_optimal::<S>(handle_sequence)
3161 } else {
3162 self.start_matching_lazy(handle_sequence)
3163 }
3164 }
3165
3166 fn start_matching_lazy(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
3167 self.table.ensure_tables();
3168
3169 let current_len = self.table.window.back().unwrap().len();
3170 if current_len == 0 {
3171 return;
3172 }
3173
3174 let current_abs_start = self.table.history_abs_start + self.table.window_size - current_len;
3175 let current_abs_end = current_abs_start + current_len;
3176 self.table
3177 .backfill_boundary_positions(current_abs_start, current_abs_end);
3178
3179 let mut pos = 0usize;
3180 let mut literals_start = 0usize;
3181 while pos + HC_MIN_MATCH_LEN <= current_len {
3182 let abs_pos = current_abs_start + pos;
3183 let lit_len = pos - literals_start;
3184
3185 let best = self.hc.find_best_match(&self.table, abs_pos, lit_len);
3186 if let Some(candidate) = self.hc.pick_lazy_match(&self.table, abs_pos, lit_len, best) {
3187 self.table
3188 .insert_positions(abs_pos, candidate.start + candidate.match_len);
3189 let current = self.table.window.back().unwrap().as_slice();
3190 let start = candidate.start - current_abs_start;
3191 let literals = ¤t[literals_start..start];
3192 handle_sequence(Sequence::Triple {
3193 literals,
3194 offset: candidate.offset,
3195 match_len: candidate.match_len,
3196 });
3197 let _ = encode_offset_with_history(
3198 candidate.offset as u32,
3199 literals.len() as u32,
3200 &mut self.table.offset_hist,
3201 );
3202 pos = start + candidate.match_len;
3203 literals_start = pos;
3204 } else {
3205 self.table.insert_position(abs_pos);
3206 pos += 1;
3207 }
3208 }
3209
3210 while pos + 4 <= current_len {
3213 self.table.insert_position(current_abs_start + pos);
3214 pos += 1;
3215 }
3216
3217 if literals_start < current_len {
3218 let current = self.table.window.back().unwrap().as_slice();
3219 handle_sequence(Sequence::Literals {
3220 literals: ¤t[literals_start..],
3221 });
3222 }
3223 }
3224
3225 fn start_matching_optimal<S: super::strategy::Strategy>(
3226 &mut self,
3227 mut handle_sequence: impl for<'a> FnMut(Sequence<'a>),
3228 ) {
3229 self.table.ensure_tables();
3230 let current_len = self.table.window.back().unwrap().len();
3231 if current_len == 0 {
3232 return;
3233 }
3234 let current_ptr = self.table.window.back().unwrap().as_ptr();
3235 let current = unsafe { core::slice::from_raw_parts(current_ptr, current_len) };
3239
3240 let current_abs_start = self.table.history_abs_start + self.table.window_size - current_len;
3241 let current_abs_end = current_abs_start + current_len;
3242 self.table
3243 .apply_limited_update_after_long_match(current_abs_start);
3244 let hash3_start_cursor = self
3245 .table
3246 .skip_insert_until_abs
3247 .max(self.table.history_abs_start);
3248 self.table
3249 .backfill_boundary_positions(current_abs_start, current_abs_end);
3250 self.table.next_to_update3 = hash3_start_cursor;
3251 let live_history = self.table.live_history();
3266 let history_abs_start = self.table.history_abs_start;
3267 self.backend.bt_mut().prepare_ldm_candidates(
3268 live_history,
3269 history_abs_start,
3270 current_abs_start,
3271 current_len,
3272 );
3273
3274 if self.should_run_btultra2_seed_pass::<S>(current_len) {
3275 self.run_btultra2_seed_pass(current, current_abs_start, current_len);
3276 }
3277
3278 let profile = HcOptimalCostProfile::const_for_strategy::<S>();
3284 let mut opt_state =
3285 core::mem::replace(&mut self.backend.bt_mut().opt_state, HcOptState::new());
3286 opt_state.rescale_freqs(current, profile);
3287 let mut best_plan = core::mem::take(&mut self.backend.bt_mut().opt_segment_plan_scratch);
3288 best_plan.clear();
3289 let mut plan_reps = self.table.offset_hist;
3290 let (mut cursor, mut plan_litlen) = self
3291 .table
3292 .donor_opt_start_cursor_and_litlen(current_abs_start);
3293 let mut plan_literals_cursor = 0usize;
3294 let match_loop_limit = current_len.saturating_sub(8);
3295 while cursor < match_loop_limit {
3296 let remaining_len = current_len - cursor;
3297 let segment_abs_start = current_abs_start + cursor;
3298 let segment_start = best_plan.len();
3299 let (_, end_reps, end_litlen, consumed_len) = self.build_optimal_plan::<S>(
3300 ¤t[cursor..],
3301 segment_abs_start,
3302 remaining_len,
3303 HcOptimalPlanState {
3304 reps: plan_reps,
3305 litlen: plan_litlen,
3306 profile,
3307 },
3308 &opt_state,
3309 &mut best_plan,
3310 );
3311 BtMatcher::update_plan_stats_segment(
3312 current,
3313 current_len,
3314 &best_plan[segment_start..],
3315 &mut plan_literals_cursor,
3316 &mut plan_reps,
3317 &mut opt_state,
3318 profile.accurate,
3319 );
3320 plan_reps = end_reps;
3321 plan_litlen = end_litlen;
3322 cursor += consumed_len;
3323 }
3324
3325 self.table
3326 .emit_optimal_plan(current_len, &best_plan, &mut handle_sequence);
3327 best_plan.clear();
3328 self.backend.bt_mut().opt_segment_plan_scratch = best_plan;
3329 self.backend.bt_mut().opt_state = opt_state;
3330 }
3331
3332 fn run_btultra2_seed_pass(
3333 &mut self,
3334 current: &[u8],
3335 current_abs_start: usize,
3336 current_len: usize,
3337 ) {
3338 type S = super::strategy::BtUltra2;
3343 let seed_profile = HcOptimalCostProfile::const_for_strategy::<S>();
3344 let mut opt_state =
3345 core::mem::replace(&mut self.backend.bt_mut().opt_state, HcOptState::new());
3346 opt_state.rescale_freqs(current, seed_profile);
3347 let mut seed_reps = self.table.offset_hist;
3348 let (mut cursor, mut seed_litlen) = self
3349 .table
3350 .donor_opt_start_cursor_and_litlen(current_abs_start);
3351 let mut seed_literals_cursor = 0usize;
3352 let mut seed_plan = core::mem::take(&mut self.backend.bt_mut().opt_seed_plan_scratch);
3353 seed_plan.clear();
3354 let match_loop_limit = current_len.saturating_sub(8);
3355 while cursor < match_loop_limit {
3356 let remaining_len = current_len - cursor;
3357 let segment_abs_start = current_abs_start + cursor;
3358 let segment_start = seed_plan.len();
3359 let (_, end_reps, end_litlen, consumed_len) = self.build_optimal_plan::<S>(
3360 ¤t[cursor..],
3361 segment_abs_start,
3362 remaining_len,
3363 HcOptimalPlanState {
3364 reps: seed_reps,
3365 litlen: seed_litlen,
3366 profile: seed_profile,
3367 },
3368 &opt_state,
3369 &mut seed_plan,
3370 );
3371 BtMatcher::update_plan_stats_segment(
3372 current,
3373 current_len,
3374 &seed_plan[segment_start..],
3375 &mut seed_literals_cursor,
3376 &mut seed_reps,
3377 &mut opt_state,
3378 seed_profile.accurate,
3379 );
3380 seed_plan.truncate(segment_start);
3381 seed_reps = end_reps;
3382 seed_litlen = end_litlen;
3383 cursor += consumed_len;
3384 }
3385 seed_plan.clear();
3386 self.backend.bt_mut().opt_seed_plan_scratch = seed_plan;
3387 self.backend.bt_mut().opt_state = opt_state;
3388
3389 self.table.position_base = self.table.history_abs_start;
3392 self.table.index_shift = current_len;
3393 self.table.next_to_update3 = current_abs_start;
3394 self.table.skip_insert_until_abs = current_abs_start;
3395 self.table.allow_zero_relative_position = true;
3401 }
3402
3403 fn build_optimal_plan<S: super::strategy::Strategy>(
3404 &mut self,
3405 current: &[u8],
3406 current_abs_start: usize,
3407 current_len: usize,
3408 initial_state: HcOptimalPlanState,
3409 stats: &HcOptState,
3410 out: &mut Vec<HcOptimalSequence>,
3411 ) -> (u32, [u32; 3], usize, usize) {
3412 debug_assert!(S::USE_BT, "build_optimal_plan called on non-BT strategy");
3413 debug_assert_eq!(initial_state.profile.accurate, S::ACCURATE_PRICE);
3414 debug_assert_eq!(
3415 initial_state.profile.favor_small_offsets,
3416 S::FAVOR_SMALL_OFFSETS
3417 );
3418 let profile = initial_state.profile;
3425 match (profile.accurate, profile.favor_small_offsets) {
3426 (true, false) => self.build_optimal_plan_impl::<S, true, false>(
3427 current,
3428 current_abs_start,
3429 current_len,
3430 initial_state,
3431 stats,
3432 out,
3433 ),
3434 (true, true) => self.build_optimal_plan_impl::<S, true, true>(
3435 current,
3436 current_abs_start,
3437 current_len,
3438 initial_state,
3439 stats,
3440 out,
3441 ),
3442 (false, false) => self.build_optimal_plan_impl::<S, false, false>(
3443 current,
3444 current_abs_start,
3445 current_len,
3446 initial_state,
3447 stats,
3448 out,
3449 ),
3450 (false, true) => self.build_optimal_plan_impl::<S, false, true>(
3451 current,
3452 current_abs_start,
3453 current_len,
3454 initial_state,
3455 stats,
3456 out,
3457 ),
3458 }
3459 }
3460
3461 #[inline(always)]
3470 fn build_optimal_plan_impl<
3471 S: super::strategy::Strategy,
3472 const ACCURATE_PRICE: bool,
3473 const FAVOR_SMALL_OFFSETS: bool,
3474 >(
3475 &mut self,
3476 current: &[u8],
3477 current_abs_start: usize,
3478 current_len: usize,
3479 initial_state: HcOptimalPlanState,
3480 stats: &HcOptState,
3481 out: &mut Vec<HcOptimalSequence>,
3482 ) -> (u32, [u32; 3], usize, usize) {
3483 #[cfg(all(target_arch = "aarch64", target_endian = "little"))]
3484 unsafe {
3485 self.build_optimal_plan_impl_neon::<S, ACCURATE_PRICE, FAVOR_SMALL_OFFSETS>(
3486 current,
3487 current_abs_start,
3488 current_len,
3489 initial_state,
3490 stats,
3491 out,
3492 )
3493 }
3494 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
3495 {
3496 use crate::encoding::fastpath::{FastpathKernel, select_kernel};
3497 match select_kernel() {
3498 FastpathKernel::Avx2Bmi2 => unsafe {
3499 self.build_optimal_plan_impl_avx2_bmi2::<S, ACCURATE_PRICE, FAVOR_SMALL_OFFSETS>(
3500 current,
3501 current_abs_start,
3502 current_len,
3503 initial_state,
3504 stats,
3505 out,
3506 )
3507 },
3508 FastpathKernel::Sse42 => unsafe {
3509 self.build_optimal_plan_impl_sse42::<S, ACCURATE_PRICE, FAVOR_SMALL_OFFSETS>(
3510 current,
3511 current_abs_start,
3512 current_len,
3513 initial_state,
3514 stats,
3515 out,
3516 )
3517 },
3518 FastpathKernel::Scalar => self
3519 .build_optimal_plan_impl_scalar::<S, ACCURATE_PRICE, FAVOR_SMALL_OFFSETS>(
3520 current,
3521 current_abs_start,
3522 current_len,
3523 initial_state,
3524 stats,
3525 out,
3526 ),
3527 }
3528 }
3529 #[cfg(not(any(
3530 all(target_arch = "aarch64", target_endian = "little"),
3531 target_arch = "x86",
3532 target_arch = "x86_64"
3533 )))]
3534 {
3535 self.build_optimal_plan_impl_scalar::<S, ACCURATE_PRICE, FAVOR_SMALL_OFFSETS>(
3536 current,
3537 current_abs_start,
3538 current_len,
3539 initial_state,
3540 stats,
3541 out,
3542 )
3543 }
3544 }
3545
3546 #[cfg(all(target_arch = "aarch64", target_endian = "little"))]
3550 #[target_feature(enable = "neon")]
3551 unsafe fn build_optimal_plan_impl_neon<
3552 S: super::strategy::Strategy,
3553 const ACCURATE_PRICE: bool,
3554 const FAVOR_SMALL_OFFSETS: bool,
3555 >(
3556 &mut self,
3557 current: &[u8],
3558 current_abs_start: usize,
3559 current_len: usize,
3560 initial_state: HcOptimalPlanState,
3561 stats: &HcOptState,
3562 out: &mut Vec<HcOptimalSequence>,
3563 ) -> (u32, [u32; 3], usize, usize) {
3564 build_optimal_plan_impl_body!(
3565 self,
3566 S,
3567 current,
3568 current_abs_start,
3569 current_len,
3570 initial_state,
3571 stats,
3572 out,
3573 collect_optimal_candidates_initialized_neon,
3574 )
3575 }
3576
3577 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
3578 #[target_feature(enable = "sse4.2")]
3579 unsafe fn build_optimal_plan_impl_sse42<
3580 S: super::strategy::Strategy,
3581 const ACCURATE_PRICE: bool,
3582 const FAVOR_SMALL_OFFSETS: bool,
3583 >(
3584 &mut self,
3585 current: &[u8],
3586 current_abs_start: usize,
3587 current_len: usize,
3588 initial_state: HcOptimalPlanState,
3589 stats: &HcOptState,
3590 out: &mut Vec<HcOptimalSequence>,
3591 ) -> (u32, [u32; 3], usize, usize) {
3592 build_optimal_plan_impl_body!(
3593 self,
3594 S,
3595 current,
3596 current_abs_start,
3597 current_len,
3598 initial_state,
3599 stats,
3600 out,
3601 collect_optimal_candidates_initialized_sse42,
3602 )
3603 }
3604
3605 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
3606 #[target_feature(enable = "avx2,bmi2")]
3607 unsafe fn build_optimal_plan_impl_avx2_bmi2<
3608 S: super::strategy::Strategy,
3609 const ACCURATE_PRICE: bool,
3610 const FAVOR_SMALL_OFFSETS: bool,
3611 >(
3612 &mut self,
3613 current: &[u8],
3614 current_abs_start: usize,
3615 current_len: usize,
3616 initial_state: HcOptimalPlanState,
3617 stats: &HcOptState,
3618 out: &mut Vec<HcOptimalSequence>,
3619 ) -> (u32, [u32; 3], usize, usize) {
3620 build_optimal_plan_impl_body!(
3621 self,
3622 S,
3623 current,
3624 current_abs_start,
3625 current_len,
3626 initial_state,
3627 stats,
3628 out,
3629 collect_optimal_candidates_initialized_avx2_bmi2,
3630 )
3631 }
3632
3633 #[cfg(not(all(target_arch = "aarch64", target_endian = "little")))]
3634 #[allow(unused_unsafe)]
3638 fn build_optimal_plan_impl_scalar<
3639 S: super::strategy::Strategy,
3640 const ACCURATE_PRICE: bool,
3641 const FAVOR_SMALL_OFFSETS: bool,
3642 >(
3643 &mut self,
3644 current: &[u8],
3645 current_abs_start: usize,
3646 current_len: usize,
3647 initial_state: HcOptimalPlanState,
3648 stats: &HcOptState,
3649 out: &mut Vec<HcOptimalSequence>,
3650 ) -> (u32, [u32; 3], usize, usize) {
3651 build_optimal_plan_impl_body!(
3652 self,
3653 S,
3654 current,
3655 current_abs_start,
3656 current_len,
3657 initial_state,
3658 stats,
3659 out,
3660 collect_optimal_candidates_initialized_scalar,
3661 )
3662 }
3663
3664 #[cfg(test)]
3665 fn collect_optimal_candidates(
3666 &mut self,
3667 abs_pos: usize,
3668 current_abs_end: usize,
3669 profile: HcOptimalCostProfile,
3670 query: HcCandidateQuery,
3671 out: &mut Vec<MatchCandidate>,
3672 ) {
3673 use super::strategy::{self, StrategyTag};
3674 self.table.ensure_tables();
3675 match self.strategy_tag {
3681 StrategyTag::BtUltra2 => self
3682 .collect_optimal_candidates_initialized::<strategy::BtUltra2, true>(
3683 abs_pos,
3684 current_abs_end,
3685 profile,
3686 query,
3687 out,
3688 ),
3689 StrategyTag::BtUltra => self
3690 .collect_optimal_candidates_initialized::<strategy::BtUltra, true>(
3691 abs_pos,
3692 current_abs_end,
3693 profile,
3694 query,
3695 out,
3696 ),
3697 StrategyTag::BtOpt => self
3698 .collect_optimal_candidates_initialized::<strategy::BtOpt, true>(
3699 abs_pos,
3700 current_abs_end,
3701 profile,
3702 query,
3703 out,
3704 ),
3705 StrategyTag::Fast | StrategyTag::Dfast | StrategyTag::Greedy | StrategyTag::Lazy => {
3706 self.collect_optimal_candidates_initialized::<strategy::Lazy, false>(
3707 abs_pos,
3708 current_abs_end,
3709 profile,
3710 query,
3711 out,
3712 )
3713 }
3714 }
3715 }
3716
3717 #[allow(dead_code)]
3727 #[inline(always)]
3728 fn collect_optimal_candidates_initialized<
3729 S: super::strategy::Strategy,
3730 const USE_BT_MATCHFINDER: bool,
3731 >(
3732 &mut self,
3733 abs_pos: usize,
3734 current_abs_end: usize,
3735 profile: HcOptimalCostProfile,
3736 query: HcCandidateQuery,
3737 out: &mut Vec<MatchCandidate>,
3738 ) {
3739 #[cfg(all(target_arch = "aarch64", target_endian = "little"))]
3740 unsafe {
3741 self.collect_optimal_candidates_initialized_neon::<S, USE_BT_MATCHFINDER>(
3742 abs_pos,
3743 current_abs_end,
3744 profile,
3745 query,
3746 out,
3747 )
3748 }
3749 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
3750 {
3751 use crate::encoding::fastpath::{FastpathKernel, select_kernel};
3752 match select_kernel() {
3753 FastpathKernel::Avx2Bmi2 => unsafe {
3754 self.collect_optimal_candidates_initialized_avx2_bmi2::<S, USE_BT_MATCHFINDER>(
3755 abs_pos,
3756 current_abs_end,
3757 profile,
3758 query,
3759 out,
3760 )
3761 },
3762 FastpathKernel::Sse42 => unsafe {
3763 self.collect_optimal_candidates_initialized_sse42::<S, USE_BT_MATCHFINDER>(
3764 abs_pos,
3765 current_abs_end,
3766 profile,
3767 query,
3768 out,
3769 )
3770 },
3771 FastpathKernel::Scalar => self
3772 .collect_optimal_candidates_initialized_scalar::<S, USE_BT_MATCHFINDER>(
3773 abs_pos,
3774 current_abs_end,
3775 profile,
3776 query,
3777 out,
3778 ),
3779 }
3780 }
3781 #[cfg(not(any(
3782 all(target_arch = "aarch64", target_endian = "little"),
3783 target_arch = "x86",
3784 target_arch = "x86_64"
3785 )))]
3786 {
3787 self.collect_optimal_candidates_initialized_scalar::<S, USE_BT_MATCHFINDER>(
3788 abs_pos,
3789 current_abs_end,
3790 profile,
3791 query,
3792 out,
3793 )
3794 }
3795 }
3796
3797 #[cfg(all(target_arch = "aarch64", target_endian = "little"))]
3803 #[target_feature(enable = "neon")]
3804 unsafe fn collect_optimal_candidates_initialized_neon<
3805 S: super::strategy::Strategy,
3806 const USE_BT_MATCHFINDER: bool,
3807 >(
3808 &mut self,
3809 abs_pos: usize,
3810 current_abs_end: usize,
3811 profile: HcOptimalCostProfile,
3812 query: HcCandidateQuery,
3813 out: &mut Vec<MatchCandidate>,
3814 ) {
3815 collect_optimal_candidates_initialized_body!(
3816 self,
3817 S,
3818 abs_pos,
3819 current_abs_end,
3820 profile,
3821 query,
3822 out,
3823 USE_BT_MATCHFINDER,
3824 bt_update_tree_until_neon,
3825 bt_insert_and_collect_matches_neon,
3826 for_each_repcode_candidate_with_reps_neon,
3827 hash3_candidate_neon,
3828 crate::encoding::fastpath::neon::common_prefix_len_ptr,
3829 )
3830 }
3831
3832 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
3833 #[target_feature(enable = "sse4.2")]
3834 unsafe fn collect_optimal_candidates_initialized_sse42<
3835 S: super::strategy::Strategy,
3836 const USE_BT_MATCHFINDER: bool,
3837 >(
3838 &mut self,
3839 abs_pos: usize,
3840 current_abs_end: usize,
3841 profile: HcOptimalCostProfile,
3842 query: HcCandidateQuery,
3843 out: &mut Vec<MatchCandidate>,
3844 ) {
3845 collect_optimal_candidates_initialized_body!(
3846 self,
3847 S,
3848 abs_pos,
3849 current_abs_end,
3850 profile,
3851 query,
3852 out,
3853 USE_BT_MATCHFINDER,
3854 bt_update_tree_until_sse42,
3855 bt_insert_and_collect_matches_sse42,
3856 for_each_repcode_candidate_with_reps_sse42,
3857 hash3_candidate_sse42,
3858 crate::encoding::fastpath::sse42::common_prefix_len_ptr,
3859 )
3860 }
3861
3862 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
3863 #[target_feature(enable = "avx2,bmi2")]
3864 unsafe fn collect_optimal_candidates_initialized_avx2_bmi2<
3865 S: super::strategy::Strategy,
3866 const USE_BT_MATCHFINDER: bool,
3867 >(
3868 &mut self,
3869 abs_pos: usize,
3870 current_abs_end: usize,
3871 profile: HcOptimalCostProfile,
3872 query: HcCandidateQuery,
3873 out: &mut Vec<MatchCandidate>,
3874 ) {
3875 collect_optimal_candidates_initialized_body!(
3876 self,
3877 S,
3878 abs_pos,
3879 current_abs_end,
3880 profile,
3881 query,
3882 out,
3883 USE_BT_MATCHFINDER,
3884 bt_update_tree_until_avx2_bmi2,
3885 bt_insert_and_collect_matches_avx2_bmi2,
3886 for_each_repcode_candidate_with_reps_avx2_bmi2,
3887 hash3_candidate_avx2_bmi2,
3888 crate::encoding::fastpath::avx2_bmi2::common_prefix_len_ptr,
3889 )
3890 }
3891
3892 #[cfg(not(all(target_arch = "aarch64", target_endian = "little")))]
3893 #[allow(unused_unsafe)]
3896 fn collect_optimal_candidates_initialized_scalar<
3897 S: super::strategy::Strategy,
3898 const USE_BT_MATCHFINDER: bool,
3899 >(
3900 &mut self,
3901 abs_pos: usize,
3902 current_abs_end: usize,
3903 profile: HcOptimalCostProfile,
3904 query: HcCandidateQuery,
3905 out: &mut Vec<MatchCandidate>,
3906 ) {
3907 collect_optimal_candidates_initialized_body!(
3908 self,
3909 S,
3910 abs_pos,
3911 current_abs_end,
3912 profile,
3913 query,
3914 out,
3915 USE_BT_MATCHFINDER,
3916 bt_update_tree_until_scalar,
3917 bt_insert_and_collect_matches_scalar,
3918 for_each_repcode_candidate_with_reps_scalar,
3919 hash3_candidate_scalar,
3920 crate::encoding::fastpath::scalar::common_prefix_len_ptr,
3921 )
3922 }
3923}
3924
3925#[cfg(any())] #[test]
3927fn matches() {
3928 let mut matcher = MatchGenerator::new(1000);
3929 let mut original_data = Vec::new();
3930 let mut reconstructed = Vec::new();
3931
3932 let replay_sequence = |seq: Sequence<'_>, reconstructed: &mut Vec<u8>| match seq {
3933 Sequence::Literals { literals } => {
3934 assert!(!literals.is_empty());
3935 reconstructed.extend_from_slice(literals);
3936 }
3937 Sequence::Triple {
3938 literals,
3939 offset,
3940 match_len,
3941 } => {
3942 assert!(offset > 0);
3943 assert!(match_len >= MIN_MATCH_LEN);
3944 reconstructed.extend_from_slice(literals);
3945 assert!(offset <= reconstructed.len());
3946 let start = reconstructed.len() - offset;
3947 for i in 0..match_len {
3948 let byte = reconstructed[start + i];
3949 reconstructed.push(byte);
3950 }
3951 }
3952 };
3953
3954 matcher.add_data(
3955 alloc::vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
3956 SuffixStore::with_capacity(100),
3957 |_, _| {},
3958 );
3959 original_data.extend_from_slice(&[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
3960
3961 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
3962
3963 assert!(!matcher.next_sequence(|_| {}));
3964
3965 matcher.add_data(
3966 alloc::vec![
3967 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 0, 0, 0, 0, 0,
3968 ],
3969 SuffixStore::with_capacity(100),
3970 |_, _| {},
3971 );
3972 original_data.extend_from_slice(&[
3973 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 0, 0, 0, 0, 0,
3974 ]);
3975
3976 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
3977 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
3978 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
3979 assert!(!matcher.next_sequence(|_| {}));
3980
3981 matcher.add_data(
3982 alloc::vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 0, 0, 0, 0],
3983 SuffixStore::with_capacity(100),
3984 |_, _| {},
3985 );
3986 original_data.extend_from_slice(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 0, 0, 0, 0]);
3987
3988 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
3989 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
3990 assert!(!matcher.next_sequence(|_| {}));
3991
3992 matcher.add_data(
3993 alloc::vec![0, 0, 0, 0, 0],
3994 SuffixStore::with_capacity(100),
3995 |_, _| {},
3996 );
3997 original_data.extend_from_slice(&[0, 0, 0, 0, 0]);
3998
3999 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
4000 assert!(!matcher.next_sequence(|_| {}));
4001
4002 matcher.add_data(
4003 alloc::vec![7, 8, 9, 10, 11],
4004 SuffixStore::with_capacity(100),
4005 |_, _| {},
4006 );
4007 original_data.extend_from_slice(&[7, 8, 9, 10, 11]);
4008
4009 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
4010 assert!(!matcher.next_sequence(|_| {}));
4011
4012 matcher.add_data(
4013 alloc::vec![1, 3, 5, 7, 9],
4014 SuffixStore::with_capacity(100),
4015 |_, _| {},
4016 );
4017 matcher.skip_matching();
4018 original_data.extend_from_slice(&[1, 3, 5, 7, 9]);
4019 reconstructed.extend_from_slice(&[1, 3, 5, 7, 9]);
4020 assert!(!matcher.next_sequence(|_| {}));
4021
4022 matcher.add_data(
4023 alloc::vec![1, 3, 5, 7, 9],
4024 SuffixStore::with_capacity(100),
4025 |_, _| {},
4026 );
4027 original_data.extend_from_slice(&[1, 3, 5, 7, 9]);
4028
4029 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
4030 assert!(!matcher.next_sequence(|_| {}));
4031
4032 matcher.add_data(
4033 alloc::vec![0, 0, 11, 13, 15, 17, 20, 11, 13, 15, 17, 20, 21, 23],
4034 SuffixStore::with_capacity(100),
4035 |_, _| {},
4036 );
4037 original_data.extend_from_slice(&[0, 0, 11, 13, 15, 17, 20, 11, 13, 15, 17, 20, 21, 23]);
4038
4039 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
4040 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
4041 assert!(!matcher.next_sequence(|_| {}));
4042
4043 assert_eq!(reconstructed, original_data);
4044}
4045
4046#[test]
4047fn dfast_matches_roundtrip_multi_block_pattern() {
4048 let pattern = [9, 21, 44, 184, 19, 96, 171, 109, 141, 251];
4049 let first_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
4050 let second_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
4051
4052 let mut matcher = DfastMatchGenerator::new(1 << 22);
4053 let replay_sequence = |decoded: &mut Vec<u8>, seq: Sequence<'_>| match seq {
4054 Sequence::Literals { literals } => decoded.extend_from_slice(literals),
4055 Sequence::Triple {
4056 literals,
4057 offset,
4058 match_len,
4059 } => {
4060 decoded.extend_from_slice(literals);
4061 let start = decoded.len() - offset;
4062 for i in 0..match_len {
4063 let byte = decoded[start + i];
4064 decoded.push(byte);
4065 }
4066 }
4067 };
4068
4069 matcher.add_data(first_block.clone(), |_| {});
4070 let mut history = Vec::new();
4071 matcher.start_matching(|seq| replay_sequence(&mut history, seq));
4072 assert_eq!(history, first_block);
4073
4074 matcher.add_data(second_block.clone(), |_| {});
4075 let prefix_len = history.len();
4076 matcher.start_matching(|seq| replay_sequence(&mut history, seq));
4077
4078 assert_eq!(&history[prefix_len..], second_block.as_slice());
4079}
4080
4081#[test]
4098fn dfast_accepts_exact_five_byte_match() {
4099 let mut data = Vec::new();
4108 data.extend_from_slice(b"ABCDE"); data.extend_from_slice(b"!!!!!!!!!!!!!!!!!!!!!!!"); data.extend_from_slice(b"ABCDE"); data.push(b'F'); assert_eq!(data.len(), 34);
4113
4114 let mut matcher = DfastMatchGenerator::new(1 << 22);
4115 matcher.add_data(data.clone(), |_| {});
4116
4117 let mut saw_five_byte_match = false;
4118 let mut saw_longer_match = false;
4119 matcher.start_matching(|seq| {
4120 if let Sequence::Triple {
4121 offset, match_len, ..
4122 } = seq
4123 {
4124 if offset == 28 && match_len == 5 {
4125 saw_five_byte_match = true;
4126 } else if offset == 28 && match_len > 5 {
4127 saw_longer_match = true;
4128 }
4129 }
4130 });
4131
4132 assert!(
4133 saw_five_byte_match,
4134 "dfast must accept the exact-5-byte match — a 6-byte floor would skip it"
4135 );
4136 assert!(
4137 !saw_longer_match,
4138 "fixture pinned to length 5 — byte 33 ('F') must terminate the extension"
4139 );
4140}
4141
4142#[test]
4143fn driver_switches_backends_and_initializes_dfast_via_reset() {
4144 let mut driver = MatchGeneratorDriver::new(32, 2);
4145
4146 driver.reset(CompressionLevel::Default);
4147 assert_eq!(driver.active_backend(), super::strategy::BackendTag::Dfast);
4148 assert_eq!(driver.window_size(), (1u64 << 22));
4149
4150 let mut first = driver.get_next_space();
4151 first[..12].copy_from_slice(b"abcabcabcabc");
4152 first.truncate(12);
4153 driver.commit_space(first);
4154 assert_eq!(driver.get_last_space(), b"abcabcabcabc");
4155 driver.skip_matching_with_hint(None);
4156
4157 let mut second = driver.get_next_space();
4158 second[..12].copy_from_slice(b"abcabcabcabc");
4159 second.truncate(12);
4160 driver.commit_space(second);
4161
4162 let mut reconstructed = b"abcabcabcabc".to_vec();
4163 driver.start_matching(|seq| match seq {
4164 Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
4165 Sequence::Triple {
4166 literals,
4167 offset,
4168 match_len,
4169 } => {
4170 reconstructed.extend_from_slice(literals);
4171 let start = reconstructed.len() - offset;
4172 for i in 0..match_len {
4173 let byte = reconstructed[start + i];
4174 reconstructed.push(byte);
4175 }
4176 }
4177 });
4178 assert_eq!(reconstructed, b"abcabcabcabcabcabcabcabc");
4179
4180 driver.reset(CompressionLevel::Fastest);
4181 assert_eq!(driver.window_size(), (1u64 << 19));
4182}
4183
4184#[test]
4185fn driver_level5_selects_row_backend() {
4186 let mut driver = MatchGeneratorDriver::new(32, 2);
4187 driver.reset(CompressionLevel::Level(5));
4188 assert_eq!(driver.active_backend(), super::strategy::BackendTag::Row);
4189 assert_eq!(
4200 driver.row_matcher().lazy_depth,
4201 0,
4202 "L4 must route to start_matching_greedy (lazy_depth == 0)",
4203 );
4204}
4205
4206#[test]
4217fn driver_level4_greedy_round_trip_single_slice() {
4218 let mut driver = MatchGeneratorDriver::new(64, 2);
4219 driver.reset(CompressionLevel::Level(4));
4220 let input = b"abcdefgh_abcdefgh_abcdefgh_abcdefgh";
4221 let mut space = driver.get_next_space();
4222 space[..input.len()].copy_from_slice(input);
4223 space.truncate(input.len());
4224 driver.commit_space(space);
4225
4226 let mut reconstructed: Vec<u8> = Vec::new();
4227 let mut saw_triple = false;
4228 driver.start_matching(|seq| match seq {
4229 Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
4230 Sequence::Triple {
4231 literals,
4232 offset,
4233 match_len,
4234 } => {
4235 saw_triple = true;
4236 reconstructed.extend_from_slice(literals);
4237 let start = reconstructed.len() - offset;
4238 for i in 0..match_len {
4239 let byte = reconstructed[start + i];
4240 reconstructed.push(byte);
4241 }
4242 }
4243 });
4244 assert_eq!(
4245 reconstructed,
4246 input.to_vec(),
4247 "L4 greedy parse failed to reconstruct repeating-pattern input",
4248 );
4249 assert!(
4250 saw_triple,
4251 "L4 greedy parse on a repeating pattern must emit at least one match (Triple)",
4252 );
4253}
4254
4255#[test]
4256fn driver_level4_greedy_round_trip_cross_slice() {
4257 let mut driver = MatchGeneratorDriver::new(32, 4);
4262 driver.reset(CompressionLevel::Level(4));
4263 let chunk = b"the quick brown fox jumps over!!";
4264 assert_eq!(chunk.len(), 32);
4265
4266 let mut first = driver.get_next_space();
4267 first[..chunk.len()].copy_from_slice(chunk);
4268 first.truncate(chunk.len());
4269 driver.commit_space(first);
4270
4271 let mut first_recon: Vec<u8> = Vec::new();
4272 driver.start_matching(|seq| match seq {
4273 Sequence::Literals { literals } => first_recon.extend_from_slice(literals),
4274 Sequence::Triple {
4275 literals,
4276 offset,
4277 match_len,
4278 } => {
4279 first_recon.extend_from_slice(literals);
4280 let start = first_recon.len() - offset;
4281 for i in 0..match_len {
4282 let byte = first_recon[start + i];
4283 first_recon.push(byte);
4284 }
4285 }
4286 });
4287 assert_eq!(
4288 first_recon,
4289 chunk.to_vec(),
4290 "first slice failed to round-trip"
4291 );
4292
4293 let mut second = driver.get_next_space();
4294 second[..chunk.len()].copy_from_slice(chunk);
4295 second.truncate(chunk.len());
4296 driver.commit_space(second);
4297
4298 let mut full = first_recon.clone();
4299 let mut saw_cross_slice_match = false;
4300 driver.start_matching(|seq| match seq {
4301 Sequence::Literals { literals } => full.extend_from_slice(literals),
4302 Sequence::Triple {
4303 literals,
4304 offset,
4305 match_len,
4306 } => {
4307 if offset >= chunk.len() {
4311 saw_cross_slice_match = true;
4312 }
4313 full.extend_from_slice(literals);
4314 let start = full.len() - offset;
4315 for i in 0..match_len {
4316 let byte = full[start + i];
4317 full.push(byte);
4318 }
4319 }
4320 });
4321 let mut expected = chunk.to_vec();
4322 expected.extend_from_slice(chunk);
4323 assert_eq!(
4324 full, expected,
4325 "cross-slice L4 greedy parse failed to reconstruct"
4326 );
4327 assert!(
4328 saw_cross_slice_match,
4329 "L4 greedy parse must match across slice boundaries (history is shared)",
4330 );
4331}
4332
4333#[cfg(test)]
4337fn l4_greedy_round_trip(slice_size: usize, max_slices: usize, data: &[u8]) -> (usize, usize) {
4338 let mut driver = MatchGeneratorDriver::new(slice_size, max_slices);
4339 driver.reset(CompressionLevel::Level(4));
4340
4341 let mut reconstructed: Vec<u8> = Vec::with_capacity(data.len());
4342 let mut triple_count = 0usize;
4343 let mut max_offset = 0usize;
4344
4345 let mut offset_in_data = 0usize;
4350 while offset_in_data < data.len() {
4351 let mut space = driver.get_next_space();
4352 let space_cap = space.len();
4353 let take = (data.len() - offset_in_data).min(space_cap);
4354 space[..take].copy_from_slice(&data[offset_in_data..offset_in_data + take]);
4355 space.truncate(take);
4356 driver.commit_space(space);
4357 offset_in_data += take;
4358
4359 driver.start_matching(|seq| match seq {
4360 Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
4361 Sequence::Triple {
4362 literals,
4363 offset,
4364 match_len,
4365 } => {
4366 triple_count += 1;
4367 if offset > max_offset {
4368 max_offset = offset;
4369 }
4370 reconstructed.extend_from_slice(literals);
4371 let start = reconstructed.len() - offset;
4372 for i in 0..match_len {
4373 let byte = reconstructed[start + i];
4374 reconstructed.push(byte);
4375 }
4376 }
4377 });
4378 }
4379
4380 if data.is_empty() {
4384 let mut space = driver.get_next_space();
4385 space.truncate(0);
4386 driver.commit_space(space);
4387 driver.start_matching(|seq| match seq {
4388 Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
4389 Sequence::Triple { .. } => panic!("empty input must not emit any matches"),
4390 });
4391 }
4392
4393 assert_eq!(reconstructed, data, "L4 greedy round-trip diverged");
4394 (triple_count, max_offset)
4395}
4396
4397#[test]
4408fn driver_level5_greedy_tail_rep_only_reachable() {
4409 let first: &[u8] = b"ABCDABCDABCDABCD"; let second: &[u8] = b"ABCDA"; let mut driver = MatchGeneratorDriver::new(16, 2);
4424 driver.reset(CompressionLevel::Level(5));
4425
4426 let mut first_space = driver.get_next_space();
4427 first_space[..first.len()].copy_from_slice(first);
4428 first_space.truncate(first.len());
4429 driver.commit_space(first_space);
4430 driver.start_matching(|_| {});
4431
4432 let mut second_space = driver.get_next_space();
4433 second_space[..second.len()].copy_from_slice(second);
4434 second_space.truncate(second.len());
4435 driver.commit_space(second_space);
4436
4437 let mut second_slice_triples = 0usize;
4438 driver.start_matching(|seq| {
4439 if matches!(seq, Sequence::Triple { .. }) {
4440 second_slice_triples += 1;
4441 }
4442 });
4443
4444 assert!(
4445 second_slice_triples >= 1,
4446 "tail rep-only position must produce a match in the second slice \
4447 (got {second_slice_triples} triples)",
4448 );
4449}
4450
4451#[test]
4452fn driver_level4_greedy_empty_input_emits_nothing() {
4453 let mut driver = MatchGeneratorDriver::new(64, 2);
4457 driver.reset(CompressionLevel::Level(4));
4458 let mut space = driver.get_next_space();
4463 space.truncate(0);
4464 driver.commit_space(space);
4465 let mut emitted_anything = false;
4466 driver.start_matching(|_| emitted_anything = true);
4467 assert!(!emitted_anything, "empty slice must not emit any sequences",);
4468}
4469
4470#[test]
4471fn driver_level4_greedy_sub_min_lookahead_input() {
4472 let data: &[u8] = b"abcd"; let (triples, _) = l4_greedy_round_trip(64, 2, data);
4477 assert_eq!(
4478 triples, 0,
4479 "sub-min-lookahead input must not emit any matches (got {triples})",
4480 );
4481}
4482
4483#[test]
4484fn driver_level4_greedy_incompressible_input() {
4485 let mut data = alloc::vec::Vec::with_capacity(256);
4490 let mut x: u32 = 0xDEAD_BEEF;
4491 for _ in 0..256 {
4492 x = x.wrapping_mul(1_103_515_245).wrapping_add(12345);
4493 data.push((x >> 16) as u8);
4494 }
4495 let (_triples, _) = l4_greedy_round_trip(64, 8, &data);
4496 }
4499
4500#[test]
4501fn driver_level4_greedy_long_literal_run_skip_step_growth() {
4502 let mut data = alloc::vec::Vec::with_capacity(2048);
4517 let mut x: u32 = 0xC0FF_EE00;
4518 for _ in 0..2048 {
4519 x = x.wrapping_mul(0x9E37_79B9).wrapping_add(0xCAFEBABE);
4520 data.push((x >> 24) as u8);
4521 }
4522 let (_triples, _) = l4_greedy_round_trip(512, 8, &data);
4523}
4524
4525#[test]
4526fn driver_level4_greedy_all_zeros_heavy_rep1() {
4527 let data: Vec<u8> = alloc::vec![0u8; 128];
4532 let (triples, max_offset) = l4_greedy_round_trip(64, 8, &data);
4533 assert!(
4534 triples >= 1,
4535 "all-zeros input must produce at least one rep1 match",
4536 );
4537 assert_eq!(
4541 max_offset, 1,
4542 "all-zeros L4 greedy parse should commit at offset 1 (got {max_offset})",
4543 );
4544}
4545
4546#[test]
4552fn driver_level4_greedy_periodic_pattern_rep_cascade() {
4553 let unit: &[u8] = b"alpha_beta_gamma";
4554 assert_eq!(unit.len(), 16);
4555 let mut data: Vec<u8> = Vec::with_capacity(unit.len() * 32);
4556 for _ in 0..32 {
4557 data.extend_from_slice(unit);
4558 }
4559 let (triples, max_offset) = l4_greedy_round_trip(64, 16, &data);
4560 assert!(
4561 triples >= 1,
4562 "periodic 16-byte payload must emit matches (got {triples})",
4563 );
4564 assert!(
4565 max_offset >= 16,
4566 "periodic 16-byte payload must produce at least one offset >= 16 \
4567 (got max_offset = {max_offset})",
4568 );
4569}
4570
4571#[test]
4572fn driver_reset_keeps_strategy_tag_in_sync_with_active_backend() {
4573 use super::strategy::StrategyTag;
4574
4575 fn check(level: CompressionLevel, expected: StrategyTag) {
4576 let mut driver = MatchGeneratorDriver::new(32, 2);
4577 driver.reset(level);
4578 assert_eq!(
4579 driver.strategy_tag, expected,
4580 "strategy_tag wrong for {level:?}"
4581 );
4582 assert_eq!(
4583 driver.strategy_tag.backend(),
4584 driver.active_backend(),
4585 "strategy_tag backend disagrees with active_backend for {level:?}"
4586 );
4587 }
4588
4589 check(CompressionLevel::Level(1), StrategyTag::Fast);
4590 check(CompressionLevel::Level(2), StrategyTag::Fast);
4591 check(CompressionLevel::Level(3), StrategyTag::Dfast);
4592 check(CompressionLevel::Level(4), StrategyTag::Dfast);
4593 check(CompressionLevel::Level(5), StrategyTag::Greedy);
4594 check(CompressionLevel::Level(7), StrategyTag::Lazy);
4595 check(CompressionLevel::Level(15), StrategyTag::Lazy);
4596 check(CompressionLevel::Level(16), StrategyTag::BtOpt);
4597 check(CompressionLevel::Level(18), StrategyTag::BtUltra);
4598 check(CompressionLevel::Level(22), StrategyTag::BtUltra2);
4599 check(CompressionLevel::Fastest, StrategyTag::Fast);
4600 check(CompressionLevel::Default, StrategyTag::Dfast);
4601 check(CompressionLevel::Better, StrategyTag::Lazy);
4602 check(CompressionLevel::Best, StrategyTag::Lazy);
4603}
4604
4605#[test]
4606fn level_16_17_map_to_btopt_strategy() {
4607 use super::strategy::{BackendTag, StrategyTag};
4608 let p16 = resolve_level_params(CompressionLevel::Level(16), None);
4609 let p17 = resolve_level_params(CompressionLevel::Level(17), None);
4610 assert_eq!(p16.backend(), BackendTag::HashChain);
4611 assert_eq!(p17.backend(), BackendTag::HashChain);
4612 assert_eq!(StrategyTag::for_level(16), StrategyTag::BtOpt);
4613 assert_eq!(StrategyTag::for_level(17), StrategyTag::BtOpt);
4614}
4615
4616#[test]
4617fn level_18_19_map_to_btultra_strategy() {
4618 use super::strategy::{BackendTag, StrategyTag};
4619 let p18 = resolve_level_params(CompressionLevel::Level(18), None);
4620 let p19 = resolve_level_params(CompressionLevel::Level(19), None);
4621 assert_eq!(p18.backend(), BackendTag::HashChain);
4622 assert_eq!(p19.backend(), BackendTag::HashChain);
4623 assert_eq!(StrategyTag::for_level(18), StrategyTag::BtUltra);
4624 assert_eq!(StrategyTag::for_level(19), StrategyTag::BtUltra);
4625}
4626
4627#[test]
4628fn level_20_22_map_to_btultra2_strategy() {
4629 use super::strategy::{BackendTag, StrategyTag};
4630 for level in 20..=22 {
4631 let params = resolve_level_params(CompressionLevel::Level(level), None);
4632 assert_eq!(params.backend(), BackendTag::HashChain);
4633 assert_eq!(StrategyTag::for_level(level as u8), StrategyTag::BtUltra2);
4634 }
4635}
4636
4637#[test]
4638fn level22_uses_donor_target_length_and_large_input_tables() {
4639 let params = resolve_level_params(CompressionLevel::Level(22), None);
4640 assert_eq!(params.window_log, 27);
4641 assert_eq!(params.hc.hash_log, 25);
4642 assert_eq!(params.hc.chain_log, 27);
4643 assert_eq!(params.hc.search_depth, 1 << 9);
4644 assert_eq!(params.hc.target_len, 999);
4645}
4646
4647#[test]
4648fn level22_source_size_hint_uses_donor_btultra2_tiers() {
4649 let p16k = resolve_level_params(CompressionLevel::Level(22), Some(16 * 1024));
4650 assert_eq!(p16k.window_log, 14);
4651 assert_eq!(p16k.hc.hash_log, 15);
4652 assert_eq!(p16k.hc.chain_log, 15);
4653 assert_eq!(p16k.hc.search_depth, 1 << 10);
4654 assert_eq!(p16k.hc.target_len, 999);
4655
4656 let p128k = resolve_level_params(CompressionLevel::Level(22), Some(128 * 1024));
4657 assert_eq!(p128k.window_log, 17);
4658 assert_eq!(p128k.hc.hash_log, 17);
4659 assert_eq!(p128k.hc.chain_log, 18);
4660 assert_eq!(p128k.hc.search_depth, 1 << 11);
4661 assert_eq!(p128k.hc.target_len, 999);
4662
4663 let p256k = resolve_level_params(CompressionLevel::Level(22), Some(256 * 1024));
4664 assert_eq!(p256k.window_log, 18);
4665 assert_eq!(p256k.hc.hash_log, 19);
4666 assert_eq!(p256k.hc.chain_log, 19);
4667 assert_eq!(p256k.hc.search_depth, 1 << 13);
4668 assert_eq!(p256k.hc.target_len, 999);
4669}
4670
4671#[test]
4672fn level22_small_source_size_hint_matches_donor_cparams() {
4673 use zstd::zstd_safe::zstd_sys;
4674
4675 let source_size = 15_027u64;
4676 let donor = unsafe { zstd_sys::ZSTD_getCParams(22, source_size, 0) };
4677 let params = resolve_level_params(CompressionLevel::Level(22), Some(source_size));
4678
4679 assert_eq!(params.window_log as u32, donor.windowLog);
4680 assert_eq!(params.hc.chain_log as u32, donor.chainLog);
4681 assert_eq!(params.hc.hash_log as u32, donor.hashLog);
4682 assert_eq!(params.hc.search_depth as u32, 1u32 << donor.searchLog);
4683 assert_eq!(HC_OPT_MIN_MATCH_LEN as u32, donor.minMatch);
4684 assert_eq!(params.hc.target_len as u32, donor.targetLength);
4685}
4686
4687#[test]
4688fn level22_small_source_uses_window_bounded_hash3_log() {
4689 let mut hc = HcMatchGenerator::new(1 << 14);
4690 hc.configure(
4691 BTULTRA2_HC_CONFIG_L22_16K,
4692 super::strategy::StrategyTag::BtUltra2,
4693 14,
4694 );
4695 assert_eq!(hc.table.hash3_log, 14);
4696
4697 hc.configure(
4698 BTULTRA2_HC_CONFIG_L22,
4699 super::strategy::StrategyTag::BtUltra2,
4700 27,
4701 );
4702 assert_eq!(hc.table.hash3_log, HC3_HASH_LOG);
4703}
4704
4705#[test]
4706fn btultra2_seed_pass_initializes_opt_state() {
4707 let mut hc = HcMatchGenerator::new(1 << 20);
4708 hc.configure(
4709 BTULTRA2_HC_CONFIG,
4710 super::strategy::StrategyTag::BtUltra2,
4711 26,
4712 );
4713 let data: Vec<u8> = (0..32 * 1024).map(|i| (i % 251) as u8).collect();
4714 hc.table.add_data(data, |_| {});
4715 hc.start_matching(|_| {});
4716 assert!(
4717 hc.backend.bt_mut().opt_state.lit_length_sum > 0,
4718 "btultra2 first block should seed non-zero sequence statistics"
4719 );
4720 assert!(
4721 hc.backend.bt_mut().opt_state.off_code_sum > 0,
4722 "btultra2 first block should seed offset-code statistics"
4723 );
4724}
4725
4726#[test]
4727fn btultra2_profile_disables_small_offset_handicap() {
4728 let profile = HcOptimalCostProfile::const_for_strategy::<super::strategy::BtUltra2>();
4734 assert!(
4735 !profile.favor_small_offsets,
4736 "btultra2 should match donor opt2 offset pricing"
4737 );
4738 assert!(
4739 profile.accurate,
4740 "btultra2 should use donor opt2 accurate pricing"
4741 );
4742}
4743
4744#[test]
4745fn btultra_profile_keeps_donor_search_depth_budget() {
4746 let p = HcOptimalCostProfile::const_for_strategy::<super::strategy::BtUltra>();
4747 assert_eq!(
4748 p.max_chain_depth, 32,
4749 "btultra should not cap chain depth below donor opt2 search budget"
4750 );
4751}
4752
4753#[test]
4754fn btopt_profile_keeps_donor_search_depth_budget() {
4755 let p = HcOptimalCostProfile::const_for_strategy::<super::strategy::BtOpt>();
4756 assert_eq!(
4757 p.max_chain_depth, 32,
4758 "btopt should not cap chain depth below donor btopt search budget"
4759 );
4760}
4761
4762#[test]
4763fn sufficient_match_len_is_clamped_by_target_len() {
4764 let mut hc = HcMatchGenerator::new(1 << 20);
4765 hc.configure(
4766 BTULTRA2_HC_CONFIG,
4767 super::strategy::StrategyTag::BtUltra2,
4768 26,
4769 );
4770 hc.hc.target_len = 13;
4771 let profile = HcOptimalCostProfile::const_for_strategy::<super::strategy::BtUltra2>();
4772 assert_eq!(hc.hc.sufficient_match_len_for_pass(profile), 13);
4773}
4774
4775#[test]
4776fn opt_modes_use_target_len_as_sufficient_len() {
4777 use super::strategy;
4778 let mut hc = HcMatchGenerator::new(1 << 20);
4779 hc.hc.target_len = 57;
4780 let profiles = [
4781 HcOptimalCostProfile::const_for_strategy::<strategy::BtOpt>(),
4782 HcOptimalCostProfile::const_for_strategy::<strategy::BtUltra>(),
4783 HcOptimalCostProfile::const_for_strategy::<strategy::BtUltra2>(),
4784 ];
4785 for profile in profiles {
4786 assert_eq!(hc.hc.sufficient_match_len_for_pass(profile), 57);
4787 }
4788}
4789
4790#[test]
4791fn sufficient_match_len_is_capped_by_opt_num() {
4792 let mut hc = HcMatchGenerator::new(1 << 20);
4793 hc.hc.target_len = usize::MAX / 2;
4794 let profile = HcOptimalCostProfile::const_for_strategy::<super::strategy::BtUltra2>();
4795 assert_eq!(hc.hc.sufficient_match_len_for_pass(profile), HC_OPT_NUM - 1);
4796}
4797
4798#[test]
4799#[allow(clippy::borrow_deref_ref)]
4800fn dictionary_entropy_seed_initializes_opt_state_from_tables() {
4801 let mut hc = HcMatchGenerator::new(1 << 20);
4802 hc.configure(
4803 BTULTRA2_HC_CONFIG,
4804 super::strategy::StrategyTag::BtUltra2,
4805 26,
4806 );
4807
4808 let huff = crate::huff0::huff0_encoder::HuffmanTable::build_from_data(
4809 b"aaabbbbccccddddeeeeefffffgggg",
4810 );
4811 let ll = crate::fse::fse_encoder::default_ll_table();
4812 let ml = crate::fse::fse_encoder::default_ml_table();
4813 let of = crate::fse::fse_encoder::default_of_table();
4814 hc.seed_dictionary_entropy(Some(&huff), Some(&*ll), Some(&*ml), Some(&*of));
4815
4816 hc.backend.bt_mut().opt_state.rescale_freqs(
4817 b"abcd",
4818 HcOptimalCostProfile::const_for_strategy::<super::strategy::BtUltra2>(),
4819 );
4820
4821 let base_ll_freqs: [u32; HC_MAX_LL + 1] = [
4822 4, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4823 1, 1, 1, 1, 1, 1,
4824 ];
4825
4826 assert_ne!(
4827 hc.backend.bt_mut().opt_state.lit_length_freq,
4828 base_ll_freqs,
4829 "dictionary entropy should override fallback LL bootstrap frequencies"
4830 );
4831 assert!(
4832 hc.backend
4833 .bt_mut()
4834 .opt_state
4835 .match_length_freq
4836 .iter()
4837 .any(|&v| v != 1),
4838 "dictionary entropy should seed non-uniform ML frequencies"
4839 );
4840 assert_ne!(
4841 hc.backend.bt_mut().opt_state.off_code_freq[0],
4842 6,
4843 "dictionary entropy should override fallback OF bootstrap frequencies"
4844 );
4845}
4846
4847#[test]
4848#[allow(clippy::borrow_deref_ref)]
4849fn dictionary_fse_seed_applies_without_huffman_seed() {
4850 let mut hc = HcMatchGenerator::new(1 << 20);
4851 hc.configure(
4852 BTULTRA2_HC_CONFIG,
4853 super::strategy::StrategyTag::BtUltra2,
4854 26,
4855 );
4856
4857 let ll = crate::fse::fse_encoder::default_ll_table();
4858 let ml = crate::fse::fse_encoder::default_ml_table();
4859 let of = crate::fse::fse_encoder::default_of_table();
4860 hc.seed_dictionary_entropy(None, Some(&*ll), Some(&*ml), Some(&*of));
4861 hc.backend.bt_mut().opt_state.rescale_freqs(
4862 b"abcd",
4863 HcOptimalCostProfile::const_for_strategy::<super::strategy::BtUltra2>(),
4864 );
4865
4866 let base_ll_freqs: [u32; HC_MAX_LL + 1] = [
4867 4, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4868 1, 1, 1, 1, 1, 1,
4869 ];
4870 assert_ne!(
4871 hc.backend.bt_mut().opt_state.lit_length_freq,
4872 base_ll_freqs,
4873 "FSE seed should still override LL bootstrap frequencies without huffman seed"
4874 );
4875 assert!(
4876 hc.backend
4877 .bt_mut()
4878 .opt_state
4879 .match_length_freq
4880 .iter()
4881 .any(|&v| v != 1),
4882 "FSE seed should still seed non-uniform ML frequencies"
4883 );
4884 assert_ne!(
4885 hc.backend.bt_mut().opt_state.off_code_freq[0],
4886 6,
4887 "FSE seed should still override OF bootstrap frequencies without huffman seed"
4888 );
4889}
4890
4891#[test]
4892#[allow(clippy::borrow_deref_ref)]
4893fn dictionary_seed_overrides_predef_price_mode_on_tiny_input() {
4894 let mut hc = HcMatchGenerator::new(1 << 20);
4895 hc.configure(
4896 BTULTRA2_HC_CONFIG,
4897 super::strategy::StrategyTag::BtUltra2,
4898 26,
4899 );
4900
4901 let ll = crate::fse::fse_encoder::default_ll_table();
4902 let ml = crate::fse::fse_encoder::default_ml_table();
4903 let of = crate::fse::fse_encoder::default_of_table();
4904 hc.seed_dictionary_entropy(None, Some(&*ll), Some(&*ml), Some(&*of));
4905 hc.backend.bt_mut().opt_state.rescale_freqs(
4906 b"abc",
4907 HcOptimalCostProfile::const_for_strategy::<super::strategy::BtUltra2>(),
4908 );
4909 assert!(
4910 matches!(
4911 hc.backend.bt_mut().opt_state.price_type,
4912 HcOptPriceType::Dynamic
4913 ),
4914 "dictionary-seeded first block should stay in dynamic mode even for tiny src"
4915 );
4916}
4917
4918#[test]
4919fn lit_length_price_blocksize_max_costs_one_extra_bit() {
4920 let profile_predef = HcOptimalCostProfile::const_for_strategy::<super::strategy::BtUltra2>();
4921 let mut stats_predef = HcOptState::new();
4922 stats_predef.price_type = HcOptPriceType::Predefined;
4923 let predef_max = profile_predef.lit_length_price(&stats_predef, HC_BLOCKSIZE_MAX);
4924 let predef_prev =
4925 profile_predef.lit_length_price(&stats_predef, HC_BLOCKSIZE_MAX.saturating_sub(1));
4926 assert_eq!(
4927 predef_max,
4928 predef_prev + HC_BITCOST_MULTIPLIER,
4929 "predefined litLength pricing at BLOCKSIZE_MAX must add exactly one bit"
4930 );
4931
4932 let profile_dyn = HcOptimalCostProfile::const_for_strategy::<super::strategy::BtUltra2>();
4933 let mut stats_dyn = HcOptState::new();
4934 stats_dyn.price_type = HcOptPriceType::Dynamic;
4935 stats_dyn.lit_length_freq.fill(1);
4936 stats_dyn.lit_length_sum = (HC_MAX_LL + 1) as u32;
4937 stats_dyn.match_length_freq.fill(1);
4938 stats_dyn.match_length_sum = (HC_MAX_ML + 1) as u32;
4939 stats_dyn.off_code_freq.fill(1);
4940 stats_dyn.off_code_sum = (HC_MAX_OFF + 1) as u32;
4941 stats_dyn.lit_freq.fill(1);
4942 stats_dyn.lit_sum = (HC_MAX_LIT + 1) as u32;
4943 stats_dyn.set_base_prices(true);
4944 let dyn_max = profile_dyn.lit_length_price(&stats_dyn, HC_BLOCKSIZE_MAX);
4945 let dyn_prev = profile_dyn.lit_length_price(&stats_dyn, HC_BLOCKSIZE_MAX.saturating_sub(1));
4946 assert_eq!(
4947 dyn_max,
4948 dyn_prev + HC_BITCOST_MULTIPLIER,
4949 "dynamic litLength pricing at BLOCKSIZE_MAX must add exactly one bit"
4950 );
4951}
4952
4953#[test]
4954#[allow(clippy::borrow_deref_ref)]
4955fn btultra2_seed_pass_disabled_when_dictionary_entropy_seed_present() {
4956 let mut hc = HcMatchGenerator::new(1 << 20);
4957 hc.configure(
4958 BTULTRA2_HC_CONFIG,
4959 super::strategy::StrategyTag::BtUltra2,
4960 26,
4961 );
4962 let ll = crate::fse::fse_encoder::default_ll_table();
4963 let ml = crate::fse::fse_encoder::default_ml_table();
4964 let of = crate::fse::fse_encoder::default_of_table();
4965 hc.seed_dictionary_entropy(None, Some(&*ll), Some(&*ml), Some(&*of));
4966 assert!(
4967 !hc.should_run_btultra2_seed_pass::<super::strategy::BtUltra2>(HC_PREDEF_THRESHOLD + 1),
4968 "dictionary-seeded first block should skip btultra2 warmup pass"
4969 );
4970}
4971
4972#[test]
4973fn btultra2_seed_pass_disabled_when_prefix_history_exists() {
4974 let mut hc = HcMatchGenerator::new(1 << 20);
4975 hc.configure(
4976 BTULTRA2_HC_CONFIG,
4977 super::strategy::StrategyTag::BtUltra2,
4978 26,
4979 );
4980 hc.table.history_abs_start = 17;
4981 hc.table.window.push_back(b"abcdefghijklmnop".to_vec());
4982 assert!(
4983 !hc.should_run_btultra2_seed_pass::<super::strategy::BtUltra2>(HC_PREDEF_THRESHOLD + 9),
4984 "btultra2 warmup must be first-block only (no prefix history)"
4985 );
4986}
4987
4988#[test]
4989fn btultra2_seed_pass_disabled_for_tiny_block() {
4990 let mut hc = HcMatchGenerator::new(1 << 20);
4991 hc.configure(
4992 BTULTRA2_HC_CONFIG,
4993 super::strategy::StrategyTag::BtUltra2,
4994 26,
4995 );
4996 assert!(
4997 !hc.should_run_btultra2_seed_pass::<super::strategy::BtUltra2>(HC_PREDEF_THRESHOLD),
4998 "btultra2 warmup should not run at or below predefined threshold"
4999 );
5000}
5001
5002#[test]
5003fn btultra2_seed_pass_disabled_after_stats_initialized() {
5004 let mut hc = HcMatchGenerator::new(1 << 20);
5005 hc.configure(
5006 BTULTRA2_HC_CONFIG,
5007 super::strategy::StrategyTag::BtUltra2,
5008 26,
5009 );
5010 hc.backend.bt_mut().opt_state.lit_length_sum = 1;
5011 assert!(
5012 !hc.should_run_btultra2_seed_pass::<super::strategy::BtUltra2>(HC_PREDEF_THRESHOLD + 32),
5013 "btultra2 warmup should run only for first block before stats are initialized"
5014 );
5015}
5016
5017#[test]
5018fn btultra2_seed_pass_disabled_when_not_at_frame_start() {
5019 let mut hc = HcMatchGenerator::new(1 << 20);
5020 hc.configure(
5021 BTULTRA2_HC_CONFIG,
5022 super::strategy::StrategyTag::BtUltra2,
5023 26,
5024 );
5025 hc.table.window_size = HC_PREDEF_THRESHOLD + 64;
5028 hc.table
5029 .window
5030 .push_back(alloc::vec![b'A'; HC_PREDEF_THRESHOLD + 32]);
5031 assert!(
5032 !hc.should_run_btultra2_seed_pass::<super::strategy::BtUltra2>(HC_PREDEF_THRESHOLD + 32),
5033 "btultra2 warmup must not run after frame start"
5034 );
5035}
5036
5037#[test]
5038fn btultra2_seed_pass_disabled_when_ldm_sequences_exist() {
5039 let mut hc = HcMatchGenerator::new(1 << 20);
5040 hc.configure(
5041 BTULTRA2_HC_CONFIG,
5042 super::strategy::StrategyTag::BtUltra2,
5043 26,
5044 );
5045 hc.table.window_size = HC_PREDEF_THRESHOLD + 64;
5046 hc.table
5047 .window
5048 .push_back(alloc::vec![b'A'; HC_PREDEF_THRESHOLD + 64]);
5049 hc.backend.bt_mut().ldm_sequences.push(HcRawSeq {
5050 lit_length: 8,
5051 offset: 16,
5052 match_length: 32,
5053 });
5054 assert!(
5055 !hc.should_run_btultra2_seed_pass::<super::strategy::BtUltra2>(HC_PREDEF_THRESHOLD + 32),
5056 "btultra2 warmup must not run when LDM already produced sequences"
5057 );
5058}
5059
5060#[test]
5061fn literal_price_uses_eight_bits_when_literals_uncompressed() {
5062 let profile = HcOptimalCostProfile::const_for_strategy::<super::strategy::BtUltra2>();
5063 let mut stats = HcOptState::new();
5064 stats.set_literals_compressed_for_tests(false);
5065 stats.price_type = HcOptPriceType::Predefined;
5066 assert_eq!(
5067 profile.literal_price(&stats, b'a'),
5068 8 * HC_BITCOST_MULTIPLIER,
5069 "uncompressed literals should cost 8 bits regardless of price mode"
5070 );
5071}
5072
5073#[test]
5074fn update_stats_skips_literal_frequencies_when_uncompressed() {
5075 let mut stats = HcOptState::new();
5076 stats.set_literals_compressed_for_tests(false);
5077 stats.update_stats(3, b"abc", 4, 8);
5078 assert_eq!(
5079 stats.lit_sum, 0,
5080 "literal sum must remain unchanged when literal compression is disabled"
5081 );
5082 assert_eq!(
5083 stats.lit_freq.iter().copied().sum::<u32>(),
5084 0,
5085 "literal frequencies must not be updated when literal compression is disabled"
5086 );
5087 assert_eq!(
5088 stats.lit_length_sum, 1,
5089 "literal-length stats still update for sequence modeling"
5090 );
5091 assert_eq!(
5092 stats.match_length_sum, 1,
5093 "match-length stats still update for sequence modeling"
5094 );
5095 assert_eq!(
5096 stats.off_code_sum, 1,
5097 "offset-code stats still update for sequence modeling"
5098 );
5099}
5100
5101#[test]
5102#[allow(clippy::borrow_deref_ref)]
5103fn dictionary_huffman_seed_ignored_when_literals_uncompressed() {
5104 let mut stats = HcOptState::new();
5105 stats.set_literals_compressed_for_tests(false);
5106 let huff = crate::huff0::huff0_encoder::HuffmanTable::build_from_data(
5107 b"aaaaabbbbcccddeeff00112233445566778899",
5108 );
5109 let ll = crate::fse::fse_encoder::default_ll_table();
5110 let ml = crate::fse::fse_encoder::default_ml_table();
5111 let of = crate::fse::fse_encoder::default_of_table();
5112 stats.seed_dictionary_entropy(Some(&huff), Some(&*ll), Some(&*ml), Some(&*of));
5113 stats.rescale_freqs(
5114 b"abcd",
5115 HcOptimalCostProfile::const_for_strategy::<super::strategy::BtUltra2>(),
5116 );
5117 assert_eq!(
5118 stats.lit_sum, 0,
5119 "literal sum must stay zero when literals are uncompressed"
5120 );
5121 assert_eq!(
5122 stats.lit_freq.iter().copied().sum::<u32>(),
5123 0,
5124 "literal frequencies must ignore dictionary huffman seed when uncompressed"
5125 );
5126}
5127
5128#[test]
5129fn hc_repcode_candidates_respect_litlen_dependent_rep_order() {
5130 let mut hc = HcMatchGenerator::new(64);
5131 hc.table.history = b"xxxxxxABCDEFABCDEF".to_vec();
5132 hc.table.history_start = 0;
5133 hc.table.history_abs_start = 0;
5134
5135 let abs_pos = 12usize; let current_abs_end = hc.table.history.len();
5137 let reps = [6u32, 3u32, 9u32];
5138
5139 let mut lit_pos_candidates = Vec::new();
5140 hc.hc.for_each_repcode_candidate_with_reps(
5141 &hc.table,
5142 abs_pos,
5143 1,
5144 reps,
5145 current_abs_end,
5146 HC_OPT_MIN_MATCH_LEN,
5147 |c| {
5148 lit_pos_candidates.push(c.offset);
5149 },
5150 );
5151 assert!(
5152 lit_pos_candidates.contains(&6),
5153 "when lit_len>0, rep0 should be considered and match"
5154 );
5155
5156 let mut ll0_candidates = Vec::new();
5157 hc.hc.for_each_repcode_candidate_with_reps(
5158 &hc.table,
5159 abs_pos,
5160 0,
5161 reps,
5162 current_abs_end,
5163 HC_OPT_MIN_MATCH_LEN,
5164 |c| {
5165 ll0_candidates.push(c.offset);
5166 },
5167 );
5168 assert!(
5169 !ll0_candidates.contains(&6),
5170 "when lit_len==0, rep0 is not directly eligible (ll0 semantics)"
5171 );
5172}
5173
5174#[test]
5175fn hc_collect_optimal_candidates_keeps_reps_when_chain_depth_zero() {
5176 let mut hc = HcMatchGenerator::new(64);
5177 hc.hc.search_depth = 0;
5178 hc.table.history = b"xyzxyzxyzxyz".to_vec();
5179 hc.table.history_start = 0;
5180 hc.table.history_abs_start = 0;
5181
5182 let abs_pos = 6usize;
5183 let current_abs_end = hc.table.history.len();
5184 let profile = HcOptimalCostProfile {
5185 max_chain_depth: 0,
5186 sufficient_match_len: usize::MAX / 2,
5187 accurate: false,
5188 favor_small_offsets: false,
5189 };
5190 let mut out = Vec::new();
5191 hc.collect_optimal_candidates(
5192 abs_pos,
5193 current_abs_end,
5194 profile,
5195 HcCandidateQuery {
5196 reps: [3, 6, 9],
5197 lit_len: 1,
5198 ldm_candidate: None,
5199 },
5200 &mut out,
5201 );
5202 assert!(
5203 !out.is_empty(),
5204 "rep candidates should remain available even when chain depth is zero"
5205 );
5206 assert!(
5207 out.iter().any(|c| c.offset == 3),
5208 "rep0 candidate should be retained"
5209 );
5210}
5211
5212#[test]
5213fn hc_collect_optimal_candidates_rep_tail_match_skips_chain_probe() {
5214 let mut hc = HcMatchGenerator::new(64);
5215 hc.table.history = b"aaaaaaaaaa".to_vec();
5216 hc.table.history_start = 0;
5217 hc.table.history_abs_start = 0;
5218 hc.table.position_base = 0;
5219 hc.hc.search_depth = 32;
5220 let abs_pos = 6usize;
5221 hc.table.ensure_tables();
5222 hc.table.insert_positions(0, abs_pos);
5223
5224 let profile = HcOptimalCostProfile {
5225 max_chain_depth: 32,
5226 sufficient_match_len: usize::MAX / 2,
5227 accurate: true,
5228 favor_small_offsets: false,
5229 };
5230 let mut out = Vec::new();
5231 hc.collect_optimal_candidates(
5232 abs_pos,
5233 hc.table.history.len(),
5234 profile,
5235 HcCandidateQuery {
5236 reps: [1, 4, 8],
5237 lit_len: 1,
5238 ldm_candidate: None,
5239 },
5240 &mut out,
5241 );
5242
5243 assert!(
5244 out.iter()
5245 .all(|candidate| matches!(candidate.offset, 1 | 4)),
5246 "terminal rep match should return before chain probing adds non-rep offsets"
5247 );
5248}
5249
5250#[test]
5251fn hc_collect_optimal_candidates_long_chain_match_advances_skip_window() {
5252 let mut hc = HcMatchGenerator::new(128);
5253 hc.table.history = b"abcabcabcabcabcabcabcabc".to_vec();
5254 hc.table.history_start = 0;
5255 hc.table.history_abs_start = 0;
5256 hc.table.position_base = 0;
5257 hc.hc.search_depth = 32;
5258 let abs_pos = 9usize;
5259 hc.table.ensure_tables();
5260 hc.table.insert_positions(0, abs_pos);
5261 hc.table.skip_insert_until_abs = 0;
5262
5263 let profile = HcOptimalCostProfile {
5264 max_chain_depth: 32,
5265 sufficient_match_len: usize::MAX / 2,
5266 accurate: true,
5267 favor_small_offsets: false,
5268 };
5269 let mut out = Vec::new();
5270 hc.collect_optimal_candidates(
5271 abs_pos,
5272 hc.table.history.len(),
5273 profile,
5274 HcCandidateQuery {
5275 reps: [1, 4, 8],
5276 lit_len: 1,
5277 ldm_candidate: None,
5278 },
5279 &mut out,
5280 );
5281
5282 assert!(
5283 hc.table.skip_insert_until_abs > abs_pos,
5284 "long chain match should advance skip window to avoid redundant immediate insertions"
5285 );
5286}
5287
5288#[test]
5289fn hc_collect_optimal_candidates_chain_fast_skip_uses_match_end_minus_8() {
5290 let mut hc = HcMatchGenerator::new(128);
5291 hc.table.history = b"abcabcabcabcabcabcabcabc".to_vec();
5292 hc.table.history_start = 0;
5293 hc.table.history_abs_start = 0;
5294 hc.table.position_base = 0;
5295 hc.hc.search_depth = 32;
5296 let abs_pos = 9usize;
5297 hc.table.ensure_tables();
5298 hc.table.insert_positions(0, abs_pos);
5299 hc.table.skip_insert_until_abs = 0;
5300
5301 let profile = HcOptimalCostProfile {
5302 max_chain_depth: 32,
5303 sufficient_match_len: 10,
5304 accurate: true,
5305 favor_small_offsets: false,
5306 };
5307 let mut out = Vec::new();
5308 hc.collect_optimal_candidates(
5309 abs_pos,
5310 hc.table.history.len(),
5311 profile,
5312 HcCandidateQuery {
5313 reps: [1, 4, 8],
5314 lit_len: 1,
5315 ldm_candidate: None,
5316 },
5317 &mut out,
5318 );
5319
5320 let best_match_end = out
5321 .iter()
5322 .map(|candidate| candidate.start.saturating_add(candidate.match_len))
5323 .max()
5324 .expect("expected at least one candidate");
5325 assert!(
5326 hc.table.skip_insert_until_abs > abs_pos,
5327 "chain fast-skip must advance past current position"
5328 );
5329 assert!(
5330 hc.table.skip_insert_until_abs <= best_match_end.saturating_sub(8),
5331 "chain fast-skip must not exceed donor-style matchEndIdx - 8 bound"
5332 );
5333}
5334
5335#[test]
5336fn hc_collect_optimal_candidates_advances_skip_window_on_plain_bt_path() {
5337 let mut hc = HcMatchGenerator::new(256);
5338 hc.table.history = b"abcdefghijklmnop".to_vec();
5339 hc.table.history_start = 0;
5340 hc.table.history_abs_start = 0;
5341 hc.table.position_base = 0;
5342 hc.hc.search_depth = 0;
5343 hc.table.ensure_tables();
5344
5345 let abs_pos = 8usize;
5346 hc.table.skip_insert_until_abs = 0;
5347
5348 let profile = HcOptimalCostProfile {
5349 max_chain_depth: 0,
5350 sufficient_match_len: usize::MAX / 2,
5351 accurate: true,
5352 favor_small_offsets: false,
5353 };
5354 let mut out = Vec::new();
5355 hc.collect_optimal_candidates(
5356 abs_pos,
5357 hc.table.history.len(),
5358 profile,
5359 HcCandidateQuery {
5360 reps: [1, 4, 8],
5361 lit_len: 1,
5362 ldm_candidate: None,
5363 },
5364 &mut out,
5365 );
5366
5367 assert_eq!(
5368 hc.table.skip_insert_until_abs,
5369 abs_pos.saturating_add(1),
5370 "plain BT path should advance skip window by 1 via donor matchEndIdx baseline"
5371 );
5372}
5373
5374#[test]
5387fn hc_ldm_candidates_are_merged_into_optimal_candidates() {
5388 let mut hc = HcMatchGenerator::new(512);
5389 hc.table.history = (0..256).map(|i| (i % 251) as u8).collect();
5390 hc.table.history_start = 0;
5391 hc.table.history_abs_start = 0;
5392
5393 let abs_pos = 128usize;
5394 let current_abs_end = 256usize;
5395 let ldm = MatchCandidate {
5396 start: abs_pos,
5397 offset: 96,
5398 match_len: 40,
5399 };
5400
5401 let profile = HcOptimalCostProfile {
5402 max_chain_depth: 0,
5403 sufficient_match_len: usize::MAX / 2,
5404 accurate: true,
5405 favor_small_offsets: false,
5406 };
5407 let mut out = Vec::new();
5408 hc.collect_optimal_candidates(
5409 abs_pos,
5410 current_abs_end,
5411 profile,
5412 HcCandidateQuery {
5413 reps: [1, 4, 8],
5414 lit_len: 1,
5415 ldm_candidate: Some(ldm),
5416 },
5417 &mut out,
5418 );
5419 assert!(
5420 out.iter().any(
5421 |candidate| candidate.offset == ldm.offset && candidate.match_len == ldm.match_len
5422 ),
5423 "LDM candidate should be present in optimal candidate set"
5424 );
5425}
5426
5427#[test]
5428fn btultra_and_btultra2_both_keep_dictionary_candidates() {
5429 use super::strategy::StrategyTag;
5437
5438 let test_config = HcConfig {
5439 hash_log: 23,
5440 chain_log: 22,
5441 search_depth: 32,
5442 target_len: 256,
5443 };
5444 let window_log = 20u8;
5445
5446 let prepare_history = |hc: &mut HcMatchGenerator, abs_pos: usize| {
5447 hc.table.history = alloc::vec![0u8; 160];
5448 for i in 0..64 {
5449 hc.table.history[i] = b'a' + (i % 7) as u8;
5450 }
5451 for i in 64..160 {
5452 hc.table.history[i] = b'k' + (i % 5) as u8;
5453 }
5454 for i in 0..24 {
5455 hc.table.history[abs_pos + i] = hc.table.history[16 + i];
5456 }
5457 hc.table.history_start = 0;
5458 hc.table.history_abs_start = 0;
5459 hc.table.position_base = 0;
5460 hc.table.ensure_tables();
5461 hc.table.insert_positions(0, abs_pos);
5462 hc.table.dictionary_limit_abs = Some(64);
5463 hc.table.skip_insert_until_abs = 0;
5464 };
5465
5466 let profile = HcOptimalCostProfile {
5467 max_chain_depth: 32,
5468 sufficient_match_len: usize::MAX / 2,
5469 accurate: true,
5470 favor_small_offsets: false,
5471 };
5472 let abs_pos = 96usize;
5473 let mut out = Vec::new();
5474
5475 let mut hc = HcMatchGenerator::new(256);
5476 hc.configure(test_config, StrategyTag::BtUltra2, window_log);
5477 prepare_history(&mut hc, abs_pos);
5478 hc.collect_optimal_candidates(
5479 abs_pos,
5480 160,
5481 profile,
5482 HcCandidateQuery {
5483 reps: [1, 4, 8],
5484 lit_len: 1,
5485 ldm_candidate: None,
5486 },
5487 &mut out,
5488 );
5489 assert!(
5490 out.iter().any(|candidate| candidate.offset >= 32),
5491 "btultra2 should retain dictionary candidates on donor-parity path"
5492 );
5493
5494 let mut hc = HcMatchGenerator::new(256);
5495 hc.configure(test_config, StrategyTag::BtUltra, window_log);
5496 prepare_history(&mut hc, abs_pos);
5497 hc.collect_optimal_candidates(
5498 abs_pos,
5499 160,
5500 profile,
5501 HcCandidateQuery {
5502 reps: [1, 4, 8],
5503 lit_len: 1,
5504 ldm_candidate: None,
5505 },
5506 &mut out,
5507 );
5508 assert!(
5509 out.iter().any(|candidate| candidate.offset >= 32),
5510 "btultra should retain dictionary candidates"
5511 );
5512}
5513
5514#[test]
5515fn driver_small_source_hint_shrinks_dfast_hash_tables() {
5516 let mut driver = MatchGeneratorDriver::new(32, 2);
5517
5518 driver.reset(CompressionLevel::Level(3));
5519 let mut space = driver.get_next_space();
5520 space[..12].copy_from_slice(b"abcabcabcabc");
5521 space.truncate(12);
5522 driver.commit_space(space);
5523 driver.skip_matching_with_hint(None);
5524 let full_long = driver.dfast_matcher().long_hash.len();
5527 let full_short = driver.dfast_matcher().short_hash.len();
5528 assert_eq!(full_long, 1 << DFAST_HASH_BITS);
5529 assert_eq!(
5530 full_short,
5531 1 << (DFAST_HASH_BITS - DFAST_SHORT_HASH_BITS_DELTA)
5532 );
5533
5534 driver.set_source_size_hint(1024);
5535 driver.reset(CompressionLevel::Level(3));
5536 let mut space = driver.get_next_space();
5537 space[..12].copy_from_slice(b"xyzxyzxyzxyz");
5538 space.truncate(12);
5539 driver.commit_space(space);
5540 driver.skip_matching_with_hint(None);
5541 let hinted_long = driver.dfast_matcher().long_hash.len();
5542 let hinted_short = driver.dfast_matcher().short_hash.len();
5543
5544 assert_eq!(driver.window_size(), 1 << MIN_HINTED_WINDOW_LOG);
5545 assert_eq!(hinted_long, 1 << MIN_HINTED_WINDOW_LOG);
5552 let expected_hinted_short_bits = (MIN_HINTED_WINDOW_LOG as usize)
5553 .saturating_sub(DFAST_SHORT_HASH_BITS_DELTA)
5554 .max(MIN_WINDOW_LOG as usize);
5555 assert_eq!(
5556 hinted_short,
5557 1 << expected_hinted_short_bits,
5558 "short table must sit one DFAST_SHORT_HASH_BITS_DELTA below the long table \
5559 (clamped at MIN_WINDOW_LOG) — a regression that pulls it up to the long-table \
5560 floor would still satisfy the `< full_short` bound below and slip through"
5561 );
5562 assert!(
5563 hinted_long < full_long && hinted_short < full_short,
5564 "tiny source hint should reduce both dfast tables"
5565 );
5566}
5567
5568#[test]
5569fn driver_small_source_hint_shrinks_row_hash_tables() {
5570 let mut driver = MatchGeneratorDriver::new(32, 2);
5571
5572 driver.reset(CompressionLevel::Level(5));
5573 let mut space = driver.get_next_space();
5574 space[..12].copy_from_slice(b"abcabcabcabc");
5575 space.truncate(12);
5576 driver.commit_space(space);
5577 driver.skip_matching_with_hint(None);
5578 let full_rows = driver.row_matcher().row_heads.len();
5579 assert_eq!(full_rows, 1 << (ROW_HASH_BITS - ROW_LOG));
5580
5581 driver.set_source_size_hint(1024);
5582 driver.reset(CompressionLevel::Level(5));
5583 let mut space = driver.get_next_space();
5584 space[..12].copy_from_slice(b"xyzxyzxyzxyz");
5585 space.truncate(12);
5586 driver.commit_space(space);
5587 driver.skip_matching_with_hint(None);
5588 let hinted_rows = driver.row_matcher().row_heads.len();
5589
5590 assert_eq!(driver.window_size(), 1 << MIN_HINTED_WINDOW_LOG);
5591 assert_eq!(
5592 hinted_rows,
5593 1 << ((MIN_HINTED_WINDOW_LOG as usize) - ROW_LOG)
5594 );
5595 assert!(
5596 hinted_rows < full_rows,
5597 "tiny source hint should reduce row hash table footprint"
5598 );
5599}
5600
5601#[test]
5602fn row_matches_roundtrip_multi_block_pattern() {
5603 let pattern = [7, 13, 44, 184, 19, 96, 171, 109, 141, 251];
5604 let first_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
5605 let second_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
5606
5607 let mut matcher = RowMatchGenerator::new(1 << 22);
5608 matcher.configure(ROW_CONFIG);
5609 matcher.ensure_tables();
5610 let replay_sequence = |decoded: &mut Vec<u8>, seq: Sequence<'_>| match seq {
5611 Sequence::Literals { literals } => decoded.extend_from_slice(literals),
5612 Sequence::Triple {
5613 literals,
5614 offset,
5615 match_len,
5616 } => {
5617 decoded.extend_from_slice(literals);
5618 let start = decoded.len() - offset;
5619 for i in 0..match_len {
5620 let byte = decoded[start + i];
5621 decoded.push(byte);
5622 }
5623 }
5624 };
5625
5626 matcher.add_data(first_block.clone(), |_| {});
5627 let mut history = Vec::new();
5628 matcher.start_matching(|seq| replay_sequence(&mut history, seq));
5629 assert_eq!(history, first_block);
5630
5631 matcher.add_data(second_block.clone(), |_| {});
5632 let prefix_len = history.len();
5633 matcher.start_matching(|seq| replay_sequence(&mut history, seq));
5634
5635 assert_eq!(&history[prefix_len..], second_block.as_slice());
5636
5637 let third_block: Vec<u8> = (0u8..=255).collect();
5639 matcher.add_data(third_block.clone(), |_| {});
5640 let third_prefix = history.len();
5641 matcher.start_matching(|seq| replay_sequence(&mut history, seq));
5642 assert_eq!(&history[third_prefix..], third_block.as_slice());
5643}
5644
5645#[test]
5646fn row_short_block_emits_literals_only() {
5647 let mut matcher = RowMatchGenerator::new(1 << 22);
5648 matcher.configure(ROW_CONFIG);
5649
5650 matcher.add_data(b"abcde".to_vec(), |_| {});
5651
5652 let mut saw_triple = false;
5653 let mut reconstructed = Vec::new();
5654 matcher.start_matching(|seq| match seq {
5655 Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
5656 Sequence::Triple { .. } => saw_triple = true,
5657 });
5658
5659 assert!(
5660 !saw_triple,
5661 "row backend must not emit triples for short blocks"
5662 );
5663 assert_eq!(reconstructed, b"abcde");
5664
5665 saw_triple = false;
5667 matcher.add_data(b"abcdeabcde".to_vec(), |_| {});
5668 matcher.start_matching(|seq| {
5669 if let Sequence::Triple { .. } = seq {
5670 saw_triple = true;
5671 }
5672 });
5673 assert!(
5674 saw_triple,
5675 "row backend should emit triples on repeated data"
5676 );
5677}
5678
5679#[test]
5680fn row_pick_lazy_returns_best_when_lookahead_is_out_of_bounds() {
5681 let mut matcher = RowMatchGenerator::new(1 << 22);
5682 matcher.configure(ROW_CONFIG);
5683 matcher.add_data(b"abcabc".to_vec(), |_| {});
5684
5685 let best = MatchCandidate {
5686 start: 0,
5687 offset: 1,
5688 match_len: ROW_MIN_MATCH_LEN,
5689 };
5690 let picked = matcher
5691 .pick_lazy_match(0, 0, Some(best))
5692 .expect("best candidate must survive");
5693
5694 assert_eq!(picked.start, best.start);
5695 assert_eq!(picked.offset, best.offset);
5696 assert_eq!(picked.match_len, best.match_len);
5697}
5698
5699#[test]
5700fn row_backfills_previous_block_tail_for_cross_boundary_match() {
5701 let mut matcher = RowMatchGenerator::new(1 << 22);
5702 matcher.configure(ROW_CONFIG);
5703
5704 let mut first_block = alloc::vec![0xA5; 64];
5705 first_block.extend_from_slice(b"XYZ");
5706 let second_block = b"XYZXYZtail".to_vec();
5707
5708 let replay_sequence = |decoded: &mut Vec<u8>, seq: Sequence<'_>| match seq {
5709 Sequence::Literals { literals } => decoded.extend_from_slice(literals),
5710 Sequence::Triple {
5711 literals,
5712 offset,
5713 match_len,
5714 } => {
5715 decoded.extend_from_slice(literals);
5716 let start = decoded.len() - offset;
5717 for i in 0..match_len {
5718 let byte = decoded[start + i];
5719 decoded.push(byte);
5720 }
5721 }
5722 };
5723
5724 matcher.add_data(first_block.clone(), |_| {});
5725 let mut reconstructed = Vec::new();
5726 matcher.start_matching(|seq| replay_sequence(&mut reconstructed, seq));
5727 assert_eq!(reconstructed, first_block);
5728
5729 matcher.add_data(second_block.clone(), |_| {});
5730 let mut saw_cross_boundary = false;
5731 let prefix_len = reconstructed.len();
5732 matcher.start_matching(|seq| {
5733 if let Sequence::Triple {
5734 literals,
5735 offset,
5736 match_len,
5737 } = seq
5738 && literals.is_empty()
5739 && offset == 3
5740 && match_len >= ROW_MIN_MATCH_LEN
5741 {
5742 saw_cross_boundary = true;
5743 }
5744 replay_sequence(&mut reconstructed, seq);
5745 });
5746
5747 assert!(
5748 saw_cross_boundary,
5749 "row matcher should reuse the 3-byte previous-block tail"
5750 );
5751 assert_eq!(&reconstructed[prefix_len..], second_block.as_slice());
5752}
5753
5754#[test]
5755fn row_skip_matching_with_incompressible_hint_uses_sparse_prefix() {
5756 let data = deterministic_high_entropy_bytes(0xA713_9C5D_44E2_10B1, 4096);
5757
5758 let mut dense = RowMatchGenerator::new(1 << 22);
5759 dense.configure(ROW_CONFIG);
5760 dense.add_data(data.clone(), |_| {});
5761 dense.skip_matching_with_hint(Some(false));
5762 let dense_slots = dense
5763 .row_positions
5764 .iter()
5765 .filter(|&&pos| pos != ROW_EMPTY_SLOT)
5766 .count();
5767
5768 let mut sparse = RowMatchGenerator::new(1 << 22);
5769 sparse.configure(ROW_CONFIG);
5770 sparse.add_data(data, |_| {});
5771 sparse.skip_matching_with_hint(Some(true));
5772 let sparse_slots = sparse
5773 .row_positions
5774 .iter()
5775 .filter(|&&pos| pos != ROW_EMPTY_SLOT)
5776 .count();
5777
5778 assert!(
5779 sparse_slots < dense_slots,
5780 "incompressible hint should seed fewer row slots (sparse={sparse_slots}, dense={dense_slots})"
5781 );
5782}
5783
5784#[test]
5798fn row_skip_matching_with_none_hint_leaves_interior_empty() {
5799 let data = deterministic_high_entropy_bytes(0x9B47_F2A1_8C5E_3306, 4096);
5800
5801 let mut none_hint = RowMatchGenerator::new(1 << 22);
5802 none_hint.configure(ROW_CONFIG);
5803 none_hint.add_data(data.clone(), |_| {});
5804 none_hint.skip_matching_with_hint(None);
5805 let none_slots = none_hint
5806 .row_positions
5807 .iter()
5808 .filter(|&&pos| pos != ROW_EMPTY_SLOT)
5809 .count();
5810
5811 let mut dense = RowMatchGenerator::new(1 << 22);
5814 dense.configure(ROW_CONFIG);
5815 dense.add_data(data, |_| {});
5816 dense.skip_matching_with_hint(Some(false));
5817 let dense_slots = dense
5818 .row_positions
5819 .iter()
5820 .filter(|&&pos| pos != ROW_EMPTY_SLOT)
5821 .count();
5822
5823 assert_eq!(
5828 none_slots, 0,
5829 "None hint at block_start=0 must leave row table fully empty \
5830 (donor parity — interior NOT inserted, no pre-block backfill possible)",
5831 );
5832 assert!(
5833 dense_slots > 0,
5834 "Some(false) dict-priming path must still insert densely \
5835 (sanity check: control case for the `none_slots == 0` assertion)",
5836 );
5837}
5838
5839#[test]
5840fn driver_unhinted_level2_keeps_default_dfast_hash_table_size() {
5841 let mut driver = MatchGeneratorDriver::new(32, 2);
5842
5843 driver.reset(CompressionLevel::Level(3));
5844 let mut space = driver.get_next_space();
5845 space[..12].copy_from_slice(b"abcabcabcabc");
5846 space.truncate(12);
5847 driver.commit_space(space);
5848 driver.skip_matching_with_hint(None);
5849
5850 let long_len = driver.dfast_matcher().long_hash.len();
5854 let short_len = driver.dfast_matcher().short_hash.len();
5855 assert_eq!(
5856 long_len,
5857 1 << DFAST_HASH_BITS,
5858 "unhinted Level(2) should keep default long-hash table size"
5859 );
5860 assert_eq!(
5861 short_len,
5862 1 << (DFAST_HASH_BITS - DFAST_SHORT_HASH_BITS_DELTA),
5863 "unhinted Level(2) short-hash should be one bit smaller than long-hash"
5864 );
5865}
5866
5867#[cfg(any())] #[test]
5869fn simple_backend_rejects_undersized_pooled_suffix_store() {
5870 let mut driver = MatchGeneratorDriver::new(128 * 1024, 2);
5871 driver.reset(CompressionLevel::Fastest);
5872
5873 driver.suffix_pool.push(SuffixStore::with_capacity(1024));
5874
5875 let mut space = driver.get_next_space();
5876 space.clear();
5877 space.resize(4096, 0xAB);
5878 driver.commit_space(space);
5879
5880 let last_suffix_slots = driver
5881 .simple()
5882 .window
5883 .last()
5884 .expect("window entry must exist after commit")
5885 .suffixes
5886 .slots
5887 .len();
5888 assert!(
5889 last_suffix_slots >= 4096,
5890 "undersized pooled suffix store must not be reused for larger blocks"
5891 );
5892}
5893
5894#[test]
5895fn source_hint_clamps_driver_slice_size_to_window() {
5896 let mut driver = MatchGeneratorDriver::new(128 * 1024, 2);
5897 driver.set_source_size_hint(1024);
5898 driver.reset(CompressionLevel::Default);
5899
5900 let window = driver.window_size() as usize;
5901 assert_eq!(window, 1 << MIN_HINTED_WINDOW_LOG);
5902 assert_eq!(driver.slice_size, window);
5903
5904 let space = driver.get_next_space();
5905 assert_eq!(space.len(), window);
5906 driver.commit_space(space);
5907}
5908
5909#[test]
5910fn pooled_space_keeps_capacity_when_slice_size_shrinks() {
5911 let mut driver = MatchGeneratorDriver::new(128 * 1024, 2);
5912 driver.reset(CompressionLevel::Default);
5913
5914 let large = driver.get_next_space();
5915 let large_capacity = large.capacity();
5916 assert!(large_capacity >= 128 * 1024);
5917 driver.commit_space(large);
5918
5919 driver.set_source_size_hint(1024);
5920 driver.reset(CompressionLevel::Default);
5921
5922 let small = driver.get_next_space();
5923 assert_eq!(small.len(), 1 << MIN_HINTED_WINDOW_LOG);
5924 assert!(
5925 small.capacity() >= large_capacity,
5926 "pooled buffer capacity should be preserved to avoid shrink/grow churn"
5927 );
5928}
5929
5930#[test]
5931fn driver_best_to_fastest_releases_oversized_hc_tables() {
5932 let mut driver = MatchGeneratorDriver::new(32, 2);
5933
5934 driver.reset(CompressionLevel::Best);
5936 assert_eq!(driver.window_size(), (1u64 << 24));
5937
5938 let mut space = driver.get_next_space();
5940 space[..12].copy_from_slice(b"abcabcabcabc");
5941 space.truncate(12);
5942 driver.commit_space(space);
5943 driver.skip_matching_with_hint(None);
5944
5945 driver.reset(CompressionLevel::Fastest);
5960 assert_eq!(driver.window_size(), (1u64 << 19));
5961 assert_eq!(driver.active_backend(), super::strategy::BackendTag::Simple);
5962}
5963
5964#[test]
5965fn driver_better_to_best_resizes_hc_tables() {
5966 let mut driver = MatchGeneratorDriver::new(32, 2);
5967
5968 driver.reset(CompressionLevel::Better);
5970 assert_eq!(driver.window_size(), (1u64 << 23));
5971
5972 let mut space = driver.get_next_space();
5973 space[..12].copy_from_slice(b"abcabcabcabc");
5974 space.truncate(12);
5975 driver.commit_space(space);
5976 driver.skip_matching_with_hint(None);
5977
5978 let hc = driver.hc_matcher();
5979 let better_hash_len = hc.table.hash_table.len();
5980 let better_chain_len = hc.table.chain_table.len();
5981
5982 driver.reset(CompressionLevel::Best);
5984 assert_eq!(driver.window_size(), (1u64 << 24));
5985
5986 let mut space = driver.get_next_space();
5988 space[..12].copy_from_slice(b"xyzxyzxyzxyz");
5989 space.truncate(12);
5990 driver.commit_space(space);
5991 driver.skip_matching_with_hint(None);
5992
5993 let hc = driver.hc_matcher();
5994 assert!(
5995 hc.table.hash_table.len() > better_hash_len,
5996 "Best hash_table ({}) should be larger than Better ({})",
5997 hc.table.hash_table.len(),
5998 better_hash_len
5999 );
6000 assert!(
6001 hc.table.chain_table.len() > better_chain_len,
6002 "Best chain_table ({}) should be larger than Better ({})",
6003 hc.table.chain_table.len(),
6004 better_chain_len
6005 );
6006}
6007
6008#[cfg(any())]
6009#[test]
6011fn prime_with_dictionary_preserves_history_for_first_full_block() {
6012 let mut driver = MatchGeneratorDriver::new(8, 1);
6013 driver.reset(CompressionLevel::Fastest);
6014
6015 driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
6016
6017 let mut space = driver.get_next_space();
6018 space.clear();
6019 space.extend_from_slice(b"abcdefgh");
6020 driver.commit_space(space);
6021
6022 let mut saw_match = false;
6023 driver.start_matching(|seq| {
6024 if let Sequence::Triple {
6025 literals,
6026 offset,
6027 match_len,
6028 } = seq
6029 && literals.is_empty()
6030 && offset == 8
6031 && match_len >= MIN_MATCH_LEN
6032 {
6033 saw_match = true;
6034 }
6035 });
6036
6037 assert!(
6038 saw_match,
6039 "first full block should still match dictionary-primed history"
6040 );
6041}
6042
6043#[cfg(any())]
6044#[test]
6046fn prime_with_large_dictionary_preserves_early_history_until_first_block() {
6047 let mut driver = MatchGeneratorDriver::new(8, 1);
6048 driver.reset(CompressionLevel::Fastest);
6049
6050 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
6051
6052 let mut space = driver.get_next_space();
6053 space.clear();
6054 space.extend_from_slice(b"abcdefgh");
6055 driver.commit_space(space);
6056
6057 let mut saw_match = false;
6058 driver.start_matching(|seq| {
6059 if let Sequence::Triple {
6060 literals,
6061 offset,
6062 match_len,
6063 } = seq
6064 && literals.is_empty()
6065 && offset == 24
6066 && match_len >= MIN_MATCH_LEN
6067 {
6068 saw_match = true;
6069 }
6070 });
6071
6072 assert!(
6073 saw_match,
6074 "dictionary bytes should remain addressable until frame output exceeds the live window"
6075 );
6076}
6077
6078#[test]
6079fn prime_with_dictionary_applies_offset_history_even_when_content_is_empty() {
6080 let mut driver = MatchGeneratorDriver::new(8, 1);
6081 driver.reset(CompressionLevel::Fastest);
6082
6083 driver.prime_with_dictionary(&[], [11, 7, 3]);
6084
6085 assert_eq!(driver.simple_mut().offset_hist, [11, 7, 3]);
6086}
6087
6088#[test]
6089fn hc_prime_with_empty_dictionary_disables_btultra2_seed_pass() {
6090 let mut driver = MatchGeneratorDriver::new(8, 1);
6091 driver.reset(CompressionLevel::Better);
6092
6093 driver.prime_with_dictionary(&[], [11, 7, 3]);
6094
6095 assert_eq!(driver.hc_matcher().table.offset_hist, [11, 7, 3]);
6096 assert!(
6097 !driver
6098 .hc_matcher()
6099 .should_run_btultra2_seed_pass::<super::strategy::BtUltra2>(HC_PREDEF_THRESHOLD + 1),
6100 "btultra2 warmup must stay disabled after dictionary priming, even when dict content is empty"
6101 );
6102}
6103
6104#[test]
6105fn hc_prime_with_dictionary_disables_btultra2_seed_pass() {
6106 let mut driver = MatchGeneratorDriver::new(8, 1);
6107 driver.reset(CompressionLevel::Better);
6108
6109 driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
6110
6111 assert!(
6112 !driver
6113 .hc_matcher()
6114 .should_run_btultra2_seed_pass::<super::strategy::BtUltra2>(HC_PREDEF_THRESHOLD + 1),
6115 "btultra2 warmup must stay disabled after dictionary priming with content"
6116 );
6117}
6118
6119#[test]
6120fn dfast_prime_with_dictionary_preserves_history_for_first_full_block() {
6121 let mut driver = MatchGeneratorDriver::new(8, 1);
6122 driver.reset(CompressionLevel::Level(4));
6127
6128 driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
6129
6130 let mut space = driver.get_next_space();
6131 space.clear();
6132 space.extend_from_slice(b"abcdefgh");
6133 driver.commit_space(space);
6134
6135 let mut saw_match = false;
6136 driver.start_matching(|seq| {
6137 if let Sequence::Triple {
6138 literals,
6139 offset,
6140 match_len,
6141 } = seq
6142 && literals.is_empty()
6143 && offset == 8
6144 && match_len >= DFAST_MIN_MATCH_LEN
6145 {
6146 saw_match = true;
6147 }
6148 });
6149
6150 assert!(
6151 saw_match,
6152 "dfast backend should match dictionary-primed history in first full block"
6153 );
6154}
6155
6156#[test]
6157fn prime_with_dictionary_does_not_inflate_reported_window_size() {
6158 let mut driver = MatchGeneratorDriver::new(8, 1);
6159 driver.reset(CompressionLevel::Fastest);
6160
6161 let before = driver.window_size();
6162 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
6163 let after = driver.window_size();
6164
6165 assert_eq!(
6166 after, before,
6167 "dictionary retention budget must not change reported frame window size"
6168 );
6169}
6170
6171#[cfg(any())] #[test]
6173fn prime_with_dictionary_does_not_reuse_tiny_suffix_store() {
6174 let mut driver = MatchGeneratorDriver::new(8, 2);
6175 driver.reset(CompressionLevel::Fastest);
6176
6177 driver.prime_with_dictionary(b"abcdefghi", [1, 4, 8]);
6180
6181 assert!(
6182 driver
6183 .simple()
6184 .window
6185 .iter()
6186 .all(|entry| entry.data.len() >= MIN_MATCH_LEN),
6187 "dictionary priming must not commit tails shorter than MIN_MATCH_LEN"
6188 );
6189}
6190
6191#[test]
6192fn prime_with_dictionary_counts_only_committed_tail_budget() {
6193 let mut driver = MatchGeneratorDriver::new(8, 1);
6194 driver.reset(CompressionLevel::Fastest);
6195
6196 let before = driver.simple_mut().max_window_size;
6197 driver.prime_with_dictionary(b"abcdefghi", [1, 4, 8]);
6199
6200 assert_eq!(
6201 driver.simple_mut().max_window_size,
6202 before + 8,
6203 "retention budget must account only for dictionary bytes actually committed to history"
6204 );
6205}
6206
6207#[test]
6208fn dfast_prime_with_dictionary_counts_four_byte_tail_budget() {
6209 let mut driver = MatchGeneratorDriver::new(8, 1);
6210 driver.reset(CompressionLevel::Level(3));
6211
6212 let before = driver.dfast_matcher().max_window_size;
6213 driver.prime_with_dictionary(b"abcdefghijkl", [1, 4, 8]);
6216
6217 assert_eq!(
6218 driver.dfast_matcher().max_window_size,
6219 before + 12,
6220 "dfast retention budget should include 4-byte dictionary tails"
6221 );
6222}
6223
6224#[test]
6225fn row_prime_with_dictionary_preserves_history_for_first_full_block() {
6226 let mut driver = MatchGeneratorDriver::new(8, 1);
6227 driver.reset(CompressionLevel::Level(4));
6228
6229 driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
6230
6231 let mut space = driver.get_next_space();
6232 space.clear();
6233 space.extend_from_slice(b"abcdefgh");
6234 driver.commit_space(space);
6235
6236 let mut saw_match = false;
6237 driver.start_matching(|seq| {
6238 if let Sequence::Triple {
6239 literals,
6240 offset,
6241 match_len,
6242 } = seq
6243 && literals.is_empty()
6244 && offset == 8
6245 && match_len >= ROW_MIN_MATCH_LEN
6246 {
6247 saw_match = true;
6248 }
6249 });
6250
6251 assert!(
6252 saw_match,
6253 "row backend should match dictionary-primed history in first full block"
6254 );
6255}
6256
6257#[test]
6258fn row_prime_with_dictionary_subtracts_uncommitted_tail_budget() {
6259 let mut driver = MatchGeneratorDriver::new(8, 1);
6260 driver.reset(CompressionLevel::Level(5));
6261
6262 let base_window = driver.row_matcher().max_window_size;
6263 driver.prime_with_dictionary(b"abcdefghi", [1, 4, 8]);
6266
6267 assert_eq!(
6268 driver.row_matcher().max_window_size,
6269 base_window + 8,
6270 "row retained window must exclude uncommitted 1-byte tail"
6271 );
6272}
6273
6274#[test]
6275fn prime_with_dictionary_budget_shrinks_after_row_eviction() {
6276 let mut driver = MatchGeneratorDriver::new(8, 1);
6277 driver.reset(CompressionLevel::Level(5));
6278 driver.row_matcher_mut().max_window_size = 8;
6280 driver.reported_window_size = 8;
6281
6282 let base_window = driver.row_matcher().max_window_size;
6283 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
6284 assert_eq!(driver.row_matcher().max_window_size, base_window + 24);
6285
6286 for block in [b"AAAAAAAA", b"BBBBBBBB"] {
6287 let mut space = driver.get_next_space();
6288 space.clear();
6289 space.extend_from_slice(block);
6290 driver.commit_space(space);
6291 driver.skip_matching_with_hint(None);
6292 }
6293
6294 assert_eq!(
6295 driver.dictionary_retained_budget, 0,
6296 "dictionary budget should be fully retired once primed dict slices are evicted"
6297 );
6298 assert_eq!(
6299 driver.row_matcher().max_window_size,
6300 base_window,
6301 "retired dictionary budget must not remain reusable for live history"
6302 );
6303}
6304
6305#[test]
6315fn row_get_last_space_then_reset_to_fastest_drops_row_variant() {
6316 let mut driver = MatchGeneratorDriver::new(8, 1);
6317 driver.reset(CompressionLevel::Level(5));
6318 assert_eq!(driver.active_backend(), super::strategy::BackendTag::Row);
6319
6320 let mut space = driver.get_next_space();
6321 space.clear();
6322 space.extend_from_slice(b"row-data");
6323 driver.commit_space(space);
6324
6325 assert_eq!(driver.get_last_space(), b"row-data");
6326
6327 driver.reset(CompressionLevel::Fastest);
6328 assert_eq!(driver.active_backend(), super::strategy::BackendTag::Simple);
6329}
6330
6331#[test]
6342fn driver_reset_from_row_backend_recycles_row_buffers_into_pool() {
6343 let mut driver = MatchGeneratorDriver::new(8, 1);
6344 driver.reset(CompressionLevel::Level(5));
6345 assert_eq!(driver.active_backend(), super::strategy::BackendTag::Row);
6346
6347 let mut space = driver.get_next_space();
6348 space.extend_from_slice(b"row-data-to-recycle");
6349 driver.commit_space(space);
6350
6351 let before_pool = driver.vec_pool.len();
6352 driver.reset(CompressionLevel::Fastest);
6353
6354 assert_eq!(driver.active_backend(), super::strategy::BackendTag::Simple);
6355 assert!(
6361 driver.vec_pool.len() > before_pool,
6362 "row reset must recycle the committed row history buffer into vec_pool \
6363 (before_pool = {before_pool}, after = {})",
6364 driver.vec_pool.len()
6365 );
6366}
6367
6368#[test]
6369fn adjust_params_for_zero_source_size_uses_min_hinted_window_floor() {
6370 let mut params = resolve_level_params(CompressionLevel::Level(4), None);
6371 params.window_log = 22;
6372 let adjusted = adjust_params_for_source_size(params, 0);
6373 assert_eq!(adjusted.window_log, MIN_HINTED_WINDOW_LOG);
6374}
6375
6376#[test]
6377fn common_prefix_len_matches_scalar_reference_across_offsets() {
6378 fn scalar_reference(a: &[u8], b: &[u8]) -> usize {
6379 a.iter()
6380 .zip(b.iter())
6381 .take_while(|(lhs, rhs)| lhs == rhs)
6382 .count()
6383 }
6384
6385 for total_len in [
6386 0usize, 1, 5, 15, 16, 17, 31, 32, 33, 64, 65, 127, 191, 257, 320,
6387 ] {
6388 let base: Vec<u8> = (0..total_len)
6389 .map(|i| ((i * 13 + 7) & 0xFF) as u8)
6390 .collect();
6391
6392 for start in [0usize, 1, 3] {
6393 if start > total_len {
6394 continue;
6395 }
6396 let a = &base[start..];
6397 let b = a.to_vec();
6398 assert_eq!(
6399 common_prefix_len(a, &b),
6400 scalar_reference(a, &b),
6401 "equal slices total_len={total_len} start={start}"
6402 );
6403
6404 let len = a.len();
6405 for mismatch in [0usize, 1, 7, 15, 16, 31, 32, 47, 63, 95, 127, 128, 129, 191] {
6406 if mismatch >= len {
6407 continue;
6408 }
6409 let mut altered = b.clone();
6410 altered[mismatch] ^= 0x5A;
6411 assert_eq!(
6412 common_prefix_len(a, &altered),
6413 scalar_reference(a, &altered),
6414 "total_len={total_len} start={start} mismatch={mismatch}"
6415 );
6416 }
6417
6418 if len > 0 {
6419 let mismatch = len - 1;
6420 let mut altered = b.clone();
6421 altered[mismatch] ^= 0xA5;
6422 assert_eq!(
6423 common_prefix_len(a, &altered),
6424 scalar_reference(a, &altered),
6425 "tail mismatch total_len={total_len} start={start} mismatch={mismatch}"
6426 );
6427 }
6428 }
6429 }
6430
6431 let long = alloc::vec![0xAB; 320];
6432 let shorter = alloc::vec![0xAB; 137];
6433 assert_eq!(
6434 common_prefix_len(&long, &shorter),
6435 scalar_reference(&long, &shorter)
6436 );
6437}
6438
6439#[test]
6440fn row_pick_lazy_returns_none_when_next_is_better() {
6441 let mut matcher = RowMatchGenerator::new(1 << 22);
6442 matcher.configure(ROW_CONFIG);
6443 matcher.add_data(alloc::vec![b'a'; 64], |_| {});
6444 matcher.ensure_tables();
6445
6446 let abs_pos = matcher.history_abs_start + 16;
6447 let best = MatchCandidate {
6448 start: abs_pos,
6449 offset: 8,
6450 match_len: ROW_MIN_MATCH_LEN,
6451 };
6452 assert!(
6453 matcher.pick_lazy_match(abs_pos, 0, Some(best)).is_none(),
6454 "lazy picker should defer when next position is clearly better"
6455 );
6456}
6457
6458#[test]
6459fn row_pick_lazy_depth2_returns_none_when_next2_significantly_better() {
6460 let mut matcher = RowMatchGenerator::new(1 << 22);
6461 matcher.configure(ROW_CONFIG);
6462 matcher.lazy_depth = 2;
6463 matcher.search_depth = 0;
6464 matcher.offset_hist = [6, 9, 1];
6465
6466 let mut data = alloc::vec![b'x'; 40];
6467 data[11..30].copy_from_slice(b"EFABCABCAEFABCAEFAB");
6468 matcher.add_data(data, |_| {});
6469 matcher.ensure_tables();
6470
6471 let abs_pos = matcher.history_abs_start + 20;
6472 let best = matcher
6473 .best_match(abs_pos, 0)
6474 .expect("expected baseline repcode match");
6475 assert_eq!(best.offset, 9);
6476 assert_eq!(best.match_len, ROW_MIN_MATCH_LEN);
6477
6478 if let Some(next) = matcher.best_match(abs_pos + 1, 1) {
6479 assert!(next.match_len <= best.match_len);
6480 }
6481
6482 let next2 = matcher
6483 .best_match(abs_pos + 2, 2)
6484 .expect("expected +2 candidate");
6485 assert!(
6486 next2.match_len > best.match_len + 1,
6487 "+2 candidate must be significantly better for depth-2 lazy skip"
6488 );
6489 assert!(
6490 matcher.pick_lazy_match(abs_pos, 0, Some(best)).is_none(),
6491 "lazy picker should defer when +2 candidate is significantly better"
6492 );
6493}
6494
6495#[test]
6496fn row_pick_lazy_depth2_keeps_best_when_next2_is_only_one_byte_better() {
6497 let mut matcher = RowMatchGenerator::new(1 << 22);
6498 matcher.configure(ROW_CONFIG);
6499 matcher.lazy_depth = 2;
6500 matcher.search_depth = 0;
6501 matcher.offset_hist = [6, 9, 1];
6502
6503 let mut data = alloc::vec![b'x'; 40];
6504 data[11..30].copy_from_slice(b"EFABCABCAEFABCAEFAZ");
6505 matcher.add_data(data, |_| {});
6506 matcher.ensure_tables();
6507
6508 let abs_pos = matcher.history_abs_start + 20;
6509 let best = matcher
6510 .best_match(abs_pos, 0)
6511 .expect("expected baseline repcode match");
6512 assert_eq!(best.offset, 9);
6513 assert_eq!(best.match_len, ROW_MIN_MATCH_LEN);
6514
6515 let next2 = matcher
6516 .best_match(abs_pos + 2, 2)
6517 .expect("expected +2 candidate");
6518 assert_eq!(next2.match_len, best.match_len + 1);
6519 let chosen = matcher
6520 .pick_lazy_match(abs_pos, 0, Some(best))
6521 .expect("lazy picker should keep current best");
6522 assert_eq!(chosen.start, best.start);
6523 assert_eq!(chosen.offset, best.offset);
6524 assert_eq!(chosen.match_len, best.match_len);
6525}
6526
6527#[test]
6529fn row_hash_and_row_extracts_high_bits() {
6530 let mut matcher = RowMatchGenerator::new(1 << 22);
6531 matcher.configure(ROW_CONFIG);
6532 matcher.add_data(
6533 alloc::vec![
6534 0xAA, 0xBB, 0xCC, 0x11, 0x10, 0x20, 0x30, 0x40, 0xAA, 0xBB, 0xCC, 0x22, 0x50, 0x60,
6535 0x70, 0x80,
6536 ],
6537 |_| {},
6538 );
6539 matcher.ensure_tables();
6540
6541 let pos = matcher.history_abs_start + 8;
6542 let (row, tag) = matcher
6543 .hash_and_row(pos)
6544 .expect("row hash should be available");
6545
6546 let idx = pos - matcher.history_abs_start;
6547 let concat = matcher.live_history();
6548 let value = u32::from_le_bytes(concat[idx..idx + ROW_HASH_KEY_LEN].try_into().unwrap()) as u64;
6549 let hash = crate::encoding::fastpath::hash_mix_u64_with_kernel(matcher.hash_kernel, value);
6550 let total_bits = matcher.row_hash_log + ROW_TAG_BITS;
6551 let combined = hash >> (u64::BITS as usize - total_bits);
6552 let expected_row =
6553 ((combined >> ROW_TAG_BITS) as usize) & ((1usize << matcher.row_hash_log) - 1);
6554 let expected_tag = combined as u8;
6555
6556 assert_eq!(row, expected_row);
6557 assert_eq!(tag, expected_tag);
6558}
6559
6560#[test]
6561fn row_repcode_skips_candidate_before_history_start() {
6562 let mut matcher = RowMatchGenerator::new(1 << 22);
6563 matcher.configure(ROW_CONFIG);
6564 matcher.history = alloc::vec![b'a'; 20];
6565 matcher.history_start = 0;
6566 matcher.history_abs_start = 10;
6567 matcher.offset_hist = [3, 0, 0];
6568
6569 assert!(matcher.repcode_candidate(12, 1).is_none());
6570}
6571
6572#[test]
6573fn row_repcode_returns_none_when_position_too_close_to_history_end() {
6574 let mut matcher = RowMatchGenerator::new(1 << 22);
6575 matcher.configure(ROW_CONFIG);
6576 matcher.history = b"abcde".to_vec();
6577 matcher.history_start = 0;
6578 matcher.history_abs_start = 0;
6579 matcher.offset_hist = [1, 0, 0];
6580
6581 assert!(matcher.repcode_candidate(4, 1).is_none());
6582}
6583
6584#[cfg(all(feature = "std", target_arch = "x86_64"))]
6585#[test]
6586fn hash_mix_sse42_path_is_available_and_matches_accelerated_impl_when_supported() {
6587 use crate::encoding::fastpath::{self, FastpathKernel};
6588 if !is_x86_feature_detected!("sse4.2") {
6589 return;
6590 }
6591 let v = 0x0123_4567_89AB_CDEFu64;
6592 let accelerated = unsafe { fastpath::sse42::hash_mix_u64(v) };
6594 let dispatched = fastpath::dispatch_hash_mix_u64(v);
6596 let kernel = fastpath::select_kernel();
6597 if kernel == FastpathKernel::Sse42 {
6598 assert_eq!(dispatched, accelerated);
6599 } else {
6600 assert_eq!(dispatched, accelerated, "AVX2/SSE4.2 share CRC32 mix");
6602 }
6603}
6604
6605#[cfg(all(feature = "std", target_arch = "aarch64", target_endian = "little"))]
6606#[test]
6607fn hash_mix_crc_path_is_available_and_matches_accelerated_impl_when_supported() {
6608 use crate::encoding::fastpath;
6609 if !is_aarch64_feature_detected!("crc") {
6610 return;
6611 }
6612 let v = 0x0123_4567_89AB_CDEFu64;
6613 let accelerated = unsafe { fastpath::neon::hash_mix_u64(v) };
6615 let dispatched = fastpath::dispatch_hash_mix_u64(v);
6616 assert_eq!(dispatched, accelerated);
6617}
6618
6619#[test]
6620fn hc_hash3_position_matches_donor_formula() {
6621 let bytes = [b'a', b'b', b'c', b'd'];
6622 let read32 = u32::from_le_bytes(bytes);
6623 let expected = (((read32 << 8).wrapping_mul(HC_PRIME3BYTES)) >> (32 - HC3_HASH_LOG)) as usize;
6624 assert_eq!(
6625 super::match_table::storage::MatchTable::hash3_position(&bytes, HC3_HASH_LOG),
6626 expected
6627 );
6628}
6629
6630#[test]
6631fn hc_hash_position_matches_donor_hash4_formula() {
6632 let mut hc = HcMatchGenerator::new(1 << 20);
6633 hc.configure(HC_CONFIG, super::strategy::StrategyTag::Lazy, 22);
6634 let bytes = [b'a', b'b', b'c', b'd'];
6635 let read32 = u32::from_le_bytes(bytes);
6636 let expected = ((read32.wrapping_mul(HC_PRIME4BYTES)) >> (32 - hc.table.hash_log)) as usize;
6637 assert_eq!(hc.table.hash_position(&bytes), expected);
6638}
6639
6640#[test]
6641fn btultra2_main_hash_uses_donor_hash4_formula() {
6642 let mut hc = HcMatchGenerator::new(1 << 20);
6643 hc.configure(
6644 BTULTRA2_HC_CONFIG_L22,
6645 super::strategy::StrategyTag::BtUltra2,
6646 27,
6647 );
6648 let bytes = [b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h'];
6649 let read32 = u32::from_le_bytes(bytes[..4].try_into().unwrap());
6650 let expected = ((read32.wrapping_mul(HC_PRIME4BYTES)) >> (32 - hc.table.hash_log)) as usize;
6651 let actual = super::match_table::storage::MatchTable::hash_position_with_mls(
6652 &bytes,
6653 hc.table.hash_log,
6654 super::bt::BtMatcher::HASH_MLS,
6655 );
6656 assert_eq!(actual, expected);
6657}
6658
6659#[test]
6660fn row_candidate_returns_none_when_abs_pos_near_end_of_history() {
6661 let mut matcher = RowMatchGenerator::new(1 << 22);
6662 matcher.configure(ROW_CONFIG);
6663 matcher.history = b"abcde".to_vec();
6664 matcher.history_start = 0;
6665 matcher.history_abs_start = 0;
6666
6667 assert!(matcher.row_candidate(0, 0).is_none());
6668}
6669
6670#[test]
6671fn hc_chain_candidates_returns_sentinels_for_short_suffix() {
6672 let mut hc = HcMatchGenerator::new(32);
6673 hc.table.history = b"abc".to_vec();
6674 hc.table.history_start = 0;
6675 hc.table.history_abs_start = 0;
6676 hc.table.ensure_tables();
6677
6678 let candidates = hc.hc.chain_candidates(&hc.table, 0);
6679 assert!(candidates.iter().all(|&pos| pos == usize::MAX));
6680}
6681
6682#[test]
6683fn hc_reset_refills_existing_tables_with_empty_sentinel() {
6684 let mut hc = HcMatchGenerator::new(32);
6685 hc.table.add_data(b"abcdeabcde".to_vec(), |_| {});
6686 hc.table.ensure_tables();
6687 assert!(!hc.table.hash_table.is_empty());
6688 assert!(!hc.table.chain_table.is_empty());
6689 hc.table.hash_table.fill(123);
6690 hc.table.chain_table.fill(456);
6691
6692 hc.reset(|_| {});
6693
6694 assert!(hc.table.hash_table.iter().all(|&v| v == HC_EMPTY));
6695 assert!(hc.table.chain_table.iter().all(|&v| v == HC_EMPTY));
6696}
6697
6698#[test]
6699fn hc_start_matching_returns_early_for_empty_current_block() {
6700 let mut hc = HcMatchGenerator::new(32);
6701 hc.table.add_data(Vec::new(), |_| {});
6702 let mut called = false;
6703 hc.start_matching(|_| called = true);
6704 assert!(!called, "empty current block should not emit sequences");
6705}
6706
6707#[cfg(test)]
6708fn deterministic_high_entropy_bytes(seed: u64, len: usize) -> Vec<u8> {
6709 let mut out = Vec::with_capacity(len);
6710 let mut state = seed;
6711 for _ in 0..len {
6712 state ^= state << 13;
6713 state ^= state >> 7;
6714 state ^= state << 17;
6715 out.push((state >> 40) as u8);
6716 }
6717 out
6718}
6719
6720#[cfg(test)]
6721fn level22_donor_block_ranges(data: &[u8]) -> Vec<(usize, usize)> {
6722 let mut ranges = Vec::new();
6723 let mut cursor = 0usize;
6724 let mut savings = 0i64;
6725 while cursor < data.len() {
6726 let remaining = data.len() - cursor;
6727 let candidate_len = remaining.min(HC_BLOCKSIZE_MAX);
6728 let block_len = crate::encoding::frame_compressor::donor_optimal_block_size(
6729 CompressionLevel::Level(22),
6730 &data[cursor..cursor + candidate_len],
6731 remaining,
6732 HC_BLOCKSIZE_MAX,
6733 savings,
6734 )
6735 .min(candidate_len)
6736 .max(1);
6737 ranges.push((cursor, block_len));
6738 cursor += block_len;
6739 if cursor >= HC_BLOCKSIZE_MAX {
6743 savings = 3;
6744 }
6745 }
6746 ranges
6747}
6748
6749#[cfg(test)]
6750fn merge_block_delimiters_like_donor(
6751 sequences: Vec<(usize, usize, usize)>,
6752) -> Vec<(usize, usize, usize)> {
6753 let mut out = Vec::with_capacity(sequences.len());
6754 let mut pending_lits = 0usize;
6755 for (lit_len, offset, match_len) in sequences {
6756 if offset == 0 && match_len == 0 {
6757 pending_lits = pending_lits.saturating_add(lit_len);
6758 continue;
6759 }
6760 out.push((lit_len.saturating_add(pending_lits), offset, match_len));
6761 pending_lits = 0;
6762 }
6763 if pending_lits > 0 {
6764 out.push((pending_lits, 0, 0));
6765 }
6766 out
6767}
6768
6769#[cfg(test)]
6770fn collect_level22_sequences(data: &[u8]) -> Vec<(usize, usize, usize)> {
6771 merge_block_delimiters_like_donor(collect_level22_sequences_with_delimiters(data))
6772 .into_iter()
6773 .filter(|(_, offset, match_len)| *offset != 0 || *match_len != 0)
6774 .collect()
6775}
6776
6777#[cfg(test)]
6778fn collect_level22_sequences_with_delimiters(data: &[u8]) -> Vec<(usize, usize, usize)> {
6779 let mut driver = MatchGeneratorDriver::new(HC_BLOCKSIZE_MAX, 1);
6780 driver.set_source_size_hint(data.len() as u64);
6781 driver.reset(CompressionLevel::Level(22));
6782
6783 let mut sequences = Vec::new();
6784 for (chunk_start, chunk_len) in level22_donor_block_ranges(data) {
6785 let chunk = &data[chunk_start..chunk_start + chunk_len];
6786 let mut space = driver.get_next_space();
6787 space[..chunk.len()].copy_from_slice(chunk);
6788 space.truncate(chunk.len());
6789 driver.commit_space(space);
6790 driver.start_matching(|seq| {
6791 let entry = match seq {
6792 Sequence::Literals { literals } => (literals.len(), 0usize, 0usize),
6793 Sequence::Triple {
6794 literals,
6795 offset,
6796 match_len,
6797 } => (literals.len(), offset, match_len),
6798 };
6799 sequences.push(entry);
6800 });
6801 }
6802 sequences
6803}
6804
6805#[cfg(test)]
6806fn donor_level22_sequences(data: &[u8]) -> Vec<(usize, usize, usize)> {
6807 merge_block_delimiters_like_donor(donor_level22_sequences_with_delimiters(data))
6808 .into_iter()
6809 .filter(|(_, offset, match_len)| *offset != 0 || *match_len != 0)
6810 .collect()
6811}
6812
6813#[cfg(test)]
6814fn donor_level22_sequences_with_delimiters(data: &[u8]) -> Vec<(usize, usize, usize)> {
6815 use zstd::zstd_safe;
6816 use zstd::zstd_safe::zstd_sys;
6817
6818 fn assert_zstd_ok(code: usize, context: &str) {
6819 assert_eq!(
6820 unsafe { zstd_sys::ZSTD_isError(code) },
6821 0,
6822 "{context} failed: {}",
6823 zstd_safe::get_error_name(code)
6824 );
6825 }
6826
6827 unsafe {
6828 let cctx = zstd_sys::ZSTD_createCCtx();
6829 assert!(!cctx.is_null(), "ZSTD_createCCtx returned null");
6830
6831 assert_zstd_ok(
6832 zstd_sys::ZSTD_CCtx_setParameter(
6833 cctx,
6834 zstd_sys::ZSTD_cParameter::ZSTD_c_compressionLevel,
6835 22,
6836 ),
6837 "ZSTD_c_compressionLevel",
6838 );
6839
6840 let seq_capacity = zstd_safe::sequence_bound(data.len());
6841 let mut seqs = alloc::vec![
6842 zstd_sys::ZSTD_Sequence {
6843 offset: 0,
6844 litLength: 0,
6845 matchLength: 0,
6846 rep: 0,
6847 };
6848 seq_capacity
6849 ];
6850
6851 let seq_count = zstd_sys::ZSTD_generateSequences(
6852 cctx,
6853 seqs.as_mut_ptr(),
6854 seqs.len(),
6855 data.as_ptr().cast(),
6856 data.len(),
6857 );
6858 assert_zstd_ok(seq_count, "ZSTD_generateSequences");
6859 let rc = zstd_sys::ZSTD_freeCCtx(cctx);
6860 assert_eq!(rc, 0, "ZSTD_freeCCtx failed");
6861
6862 seqs.truncate(seq_count);
6863 seqs.into_iter()
6864 .map(|seq| {
6865 (
6866 seq.litLength as usize,
6867 seq.offset as usize,
6868 seq.matchLength as usize,
6869 )
6870 })
6871 .collect()
6872 }
6873}
6874
6875#[test]
6876fn level22_sequences_match_donor_on_corpus_proxy() {
6877 let data = include_bytes!("../../decodecorpus_files/z000033");
6878 assert_level22_sequences_match_donor(data);
6879}
6880
6881#[test]
6882fn level22_sequences_match_donor_on_small_corpus_proxy() {
6883 let data = include_bytes!("../../decodecorpus_files/z000030");
6884 assert_level22_sequences_match_donor(data);
6885}
6886
6887#[cfg(test)]
6888fn assert_level22_sequences_match_donor(data: &[u8]) {
6889 let rust = collect_level22_sequences(data);
6890 let donor = donor_level22_sequences(data);
6891
6892 if rust != donor {
6893 let first_diff = rust
6894 .iter()
6895 .zip(donor.iter())
6896 .position(|(lhs, rhs)| lhs != rhs)
6897 .unwrap_or_else(|| rust.len().min(donor.len()));
6898 let rust_pos = rust
6899 .iter()
6900 .take(first_diff)
6901 .fold(0usize, |acc, seq| acc + seq.0 + seq.2);
6902 let donor_pos = donor
6903 .iter()
6904 .take(first_diff)
6905 .fold(0usize, |acc, seq| acc + seq.0 + seq.2);
6906 let start = first_diff.saturating_sub(4);
6907 let rust_window = &rust[start..rust.len().min(first_diff + 4)];
6908 let donor_window = &donor[start..donor.len().min(first_diff + 4)];
6909 let mut reps = [1u32, 4, 8];
6910 for (lit_len, offset, _) in rust.iter().take(first_diff) {
6911 let _ = encode_offset_with_history(*offset as u32, *lit_len as u32, &mut reps);
6912 }
6913 panic!(
6914 "level22 sequence path diverged at idx {}: rust={:?} donor={:?} (rust_len={} donor_len={} rust_pos={} donor_pos={} reps_before={:?} rust_window={:?} donor_window={:?} block_ranges={:?})",
6915 first_diff,
6916 rust.get(first_diff),
6917 donor.get(first_diff),
6918 rust.len(),
6919 donor.len(),
6920 rust_pos,
6921 donor_pos,
6922 reps,
6923 rust_window,
6924 donor_window,
6925 level22_donor_block_ranges(data)
6926 .into_iter()
6927 .filter(|(start, len)| *start <= rust_pos && rust_pos < start + len)
6928 .collect::<Vec<_>>(),
6929 );
6930 }
6931}
6932
6933#[test]
6934fn hc_sparse_skip_matching_preserves_tail_cross_block_match() {
6935 let mut matcher = HcMatchGenerator::new(1 << 22);
6936 let tail = b"Qz9kLm2Rp";
6937 let mut first = deterministic_high_entropy_bytes(0xD1B5_4A32_9C77_0E19, 4096);
6938 let tail_start = first.len() - tail.len();
6939 first[tail_start..].copy_from_slice(tail);
6940 matcher.table.add_data(first.clone(), |_| {});
6941 matcher.skip_matching(Some(true));
6942
6943 let mut second = tail.to_vec();
6944 second.extend_from_slice(b"after-tail-literals");
6945 matcher.table.add_data(second, |_| {});
6946
6947 let mut first_sequence = None;
6948 matcher.start_matching(|seq| {
6949 if first_sequence.is_some() {
6950 return;
6951 }
6952 first_sequence = Some(match seq {
6953 Sequence::Literals { literals } => (literals.len(), 0usize, 0usize),
6954 Sequence::Triple {
6955 literals,
6956 offset,
6957 match_len,
6958 } => (literals.len(), offset, match_len),
6959 });
6960 });
6961
6962 let (literals_len, offset, match_len) =
6963 first_sequence.expect("expected at least one sequence after sparse skip");
6964 assert_eq!(
6965 literals_len, 0,
6966 "first sequence should start at block boundary"
6967 );
6968 assert_eq!(
6969 offset,
6970 tail.len(),
6971 "first match should reference previous tail"
6972 );
6973 assert!(
6974 match_len >= tail.len(),
6975 "tail-aligned cross-block match must be preserved"
6976 );
6977}
6978
6979#[test]
6980fn btultra2_sparse_skip_matching_preserves_tail_cross_block_match() {
6981 let mut matcher = HcMatchGenerator::new(1 << 20);
6982 matcher.configure(
6983 BTULTRA2_HC_CONFIG_L22,
6984 super::strategy::StrategyTag::BtUltra2,
6985 20,
6986 );
6987 let tail = b"Bt9kLm2Rp";
6988 let mut first = deterministic_high_entropy_bytes(0xA9C3_7F21_D4E8_510B, 4096);
6989 let tail_start = first.len() - tail.len();
6990 first[tail_start..].copy_from_slice(tail);
6991 matcher.table.add_data(first, |_| {});
6992 matcher.skip_matching(Some(true));
6993
6994 let mut second = tail.to_vec();
6995 second.extend_from_slice(b"after-tail-literals");
6996 matcher.table.add_data(second, |_| {});
6997
6998 let mut first_sequence = None;
6999 matcher.start_matching(|seq| {
7000 if first_sequence.is_some() {
7001 return;
7002 }
7003 first_sequence = Some(match seq {
7004 Sequence::Literals { literals } => (literals.len(), 0usize, 0usize),
7005 Sequence::Triple {
7006 literals,
7007 offset,
7008 match_len,
7009 } => (literals.len(), offset, match_len),
7010 });
7011 });
7012
7013 let (literals_len, offset, match_len) =
7014 first_sequence.expect("expected at least one sequence after sparse BT skip");
7015 assert_eq!(
7016 literals_len, 0,
7017 "BT sparse skip should preserve an immediate boundary match"
7018 );
7019 assert_eq!(
7020 offset,
7021 tail.len(),
7022 "first BT match should reference previous tail"
7023 );
7024 assert!(
7025 match_len >= tail.len(),
7026 "BT sparse skip must seed the dense tail for cross-block matching"
7027 );
7028}
7029
7030#[test]
7031fn hc_sparse_skip_matching_does_not_reinsert_sparse_tail_positions() {
7032 let mut matcher = HcMatchGenerator::new(1 << 22);
7033 let first = deterministic_high_entropy_bytes(0xC2B2_AE3D_27D4_EB4F, 4096);
7034 matcher.table.add_data(first.clone(), |_| {});
7035 matcher.skip_matching(Some(true));
7036
7037 let current_len = first.len();
7038 let current_abs_start =
7039 matcher.table.history_abs_start + matcher.table.window_size - current_len;
7040 let current_abs_end = current_abs_start + current_len;
7041 let dense_tail = HC_MIN_MATCH_LEN + INCOMPRESSIBLE_SKIP_STEP;
7042 let tail_start = current_abs_end
7043 .saturating_sub(dense_tail)
7044 .max(matcher.table.history_abs_start)
7045 .max(current_abs_start);
7046
7047 let overlap_pos = (tail_start..current_abs_end)
7048 .find(|&pos| (pos - current_abs_start).is_multiple_of(INCOMPRESSIBLE_SKIP_STEP))
7049 .expect("fixture should contain at least one sparse-grid overlap in dense tail");
7050
7051 let rel = matcher
7052 .table
7053 .relative_position(overlap_pos)
7054 .expect("overlap position should be representable as relative position");
7055 let chain_idx = rel as usize & ((1 << matcher.table.chain_log) - 1);
7056 assert_ne!(
7057 matcher.table.chain_table[chain_idx],
7058 rel + 1,
7059 "sparse-grid tail positions must not be reinserted (self-loop chain entry)"
7060 );
7061}
7062
7063#[test]
7064fn hc_compact_history_drains_when_threshold_crossed() {
7065 let mut hc = HcMatchGenerator::new(8);
7066 hc.table.history = b"abcdefghijklmnopqrstuvwxyz".to_vec();
7067 hc.table.history_start = 16;
7068 hc.table.compact_history();
7069 assert_eq!(hc.table.history_start, 0);
7070 assert_eq!(hc.table.history, b"qrstuvwxyz");
7071}
7072
7073#[test]
7074fn hc_insert_position_no_rebase_returns_when_relative_pos_unavailable() {
7075 let mut hc = HcMatchGenerator::new(32);
7076 hc.table.history = b"abcdefghijklmnop".to_vec();
7077 hc.table.history_abs_start = 0;
7078 hc.table.position_base = 1;
7079 hc.table.ensure_tables();
7080 let before_hash = hc.table.hash_table.clone();
7081 let before_chain = hc.table.chain_table.clone();
7082
7083 hc.table.insert_position_no_rebase(0);
7084
7085 assert_eq!(hc.table.hash_table, before_hash);
7086 assert_eq!(hc.table.chain_table, before_chain);
7087}
7088
7089#[test]
7090fn hc_insert_positions_advances_next_to_update3_for_contiguous_range() {
7091 let mut hc = HcMatchGenerator::new(64);
7092 hc.table.history = b"abcdefghijklmnopqrstuvwxyz".to_vec();
7093 hc.table.history_start = 0;
7094 hc.table.history_abs_start = 0;
7095 hc.table.position_base = 0;
7096 hc.table.ensure_tables();
7097 hc.table.next_to_update3 = 0;
7098
7099 hc.table.insert_positions(0, 9);
7100
7101 assert_eq!(
7102 hc.table.next_to_update3, 9,
7103 "contiguous insert_positions should advance hash3 update cursor"
7104 );
7105}
7106
7107#[test]
7108fn hc_insert_positions_with_step_keeps_next_to_update3_cursor_for_sparse_ranges() {
7109 let mut hc = HcMatchGenerator::new(64);
7110 hc.table.history = b"abcdefghijklmnopqrstuvwxyz".to_vec();
7111 hc.table.history_start = 0;
7112 hc.table.history_abs_start = 0;
7113 hc.table.position_base = 0;
7114 hc.table.ensure_tables();
7115 hc.table.next_to_update3 = 0;
7116
7117 hc.table.insert_positions_with_step(0, 16, 4);
7118
7119 assert_eq!(
7120 hc.table.next_to_update3, 0,
7121 "sparse insert_positions_with_step must not mark skipped positions as hash3-updated"
7122 );
7123}
7124
7125#[cfg(any())]
7126#[test]
7128fn prime_with_dictionary_budget_shrinks_after_simple_eviction() {
7129 let mut driver = MatchGeneratorDriver::new(8, 1);
7130 driver.reset(CompressionLevel::Fastest);
7131 driver.simple_mut().max_window_size = 8;
7134 driver.reported_window_size = 8;
7135
7136 let base_window = driver.simple_mut().max_window_size;
7137 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
7138 assert_eq!(driver.simple_mut().max_window_size, base_window + 24);
7139
7140 for block in [b"AAAAAAAA", b"BBBBBBBB"] {
7141 let mut space = driver.get_next_space();
7142 space.clear();
7143 space.extend_from_slice(block);
7144 driver.commit_space(space);
7145 driver.skip_matching_with_hint(None);
7146 }
7147
7148 assert_eq!(
7149 driver.dictionary_retained_budget, 0,
7150 "dictionary budget should be fully retired once primed dict slices are evicted"
7151 );
7152 assert_eq!(
7153 driver.simple_mut().max_window_size,
7154 base_window,
7155 "retired dictionary budget must not remain reusable for live history"
7156 );
7157}
7158
7159#[test]
7160fn prime_with_dictionary_budget_shrinks_after_dfast_eviction() {
7161 let mut driver = MatchGeneratorDriver::new(8, 1);
7162 driver.reset(CompressionLevel::Level(3));
7163 driver.dfast_matcher_mut().max_window_size = 8;
7166 driver.reported_window_size = 8;
7167
7168 let base_window = driver.dfast_matcher().max_window_size;
7169 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
7170 assert_eq!(driver.dfast_matcher().max_window_size, base_window + 24);
7171
7172 for block in [b"AAAAAAAA", b"BBBBBBBB"] {
7173 let mut space = driver.get_next_space();
7174 space.clear();
7175 space.extend_from_slice(block);
7176 driver.commit_space(space);
7177 driver.skip_matching_with_hint(None);
7178 }
7179
7180 assert_eq!(
7181 driver.dictionary_retained_budget, 0,
7182 "dictionary budget should be fully retired once primed dict slices are evicted"
7183 );
7184 assert_eq!(
7185 driver.dfast_matcher().max_window_size,
7186 base_window,
7187 "retired dictionary budget must not remain reusable for live history"
7188 );
7189}
7190
7191#[test]
7192fn hc_prime_with_dictionary_preserves_history_for_first_full_block() {
7193 let mut driver = MatchGeneratorDriver::new(8, 1);
7194 driver.reset(CompressionLevel::Better);
7195
7196 driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
7197
7198 let mut space = driver.get_next_space();
7199 space.clear();
7200 space.extend_from_slice(b"abcdefgh");
7203 driver.commit_space(space);
7204
7205 let mut saw_match = false;
7206 driver.start_matching(|seq| {
7207 if let Sequence::Triple {
7208 literals,
7209 offset,
7210 match_len,
7211 } = seq
7212 && literals.is_empty()
7213 && offset == 8
7214 && match_len >= HC_MIN_MATCH_LEN
7215 {
7216 saw_match = true;
7217 }
7218 });
7219
7220 assert!(
7221 saw_match,
7222 "hash-chain backend should match dictionary-primed history in first full block"
7223 );
7224}
7225
7226#[test]
7227fn prime_with_dictionary_budget_shrinks_after_hc_eviction() {
7228 let mut driver = MatchGeneratorDriver::new(8, 1);
7229 driver.reset(CompressionLevel::Better);
7230 driver.hc_matcher_mut().table.max_window_size = 8;
7232 driver.reported_window_size = 8;
7233
7234 let base_window = driver.hc_matcher().table.max_window_size;
7235 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
7236 assert_eq!(driver.hc_matcher().table.max_window_size, base_window + 24);
7237
7238 for block in [b"AAAAAAAA", b"BBBBBBBB"] {
7239 let mut space = driver.get_next_space();
7240 space.clear();
7241 space.extend_from_slice(block);
7242 driver.commit_space(space);
7243 driver.skip_matching_with_hint(None);
7244 }
7245
7246 assert_eq!(
7247 driver.dictionary_retained_budget, 0,
7248 "dictionary budget should be fully retired once primed dict slices are evicted"
7249 );
7250 assert_eq!(
7251 driver.hc_matcher().table.max_window_size,
7252 base_window,
7253 "retired dictionary budget must not remain reusable for live history"
7254 );
7255}
7256
7257#[test]
7258fn hc_rebases_positions_after_u32_boundary() {
7259 let mut matcher = HcMatchGenerator::new(64);
7260 matcher.table.add_data(b"abcdeabcdeabcde".to_vec(), |_| {});
7261 matcher.table.ensure_tables();
7262 matcher.table.position_base = 0;
7263 let history_abs_start: usize = match (u64::from(u32::MAX) + 64).try_into() {
7264 Ok(value) => value,
7265 Err(_) => return,
7266 };
7267 matcher.table.history_abs_start = history_abs_start;
7270 matcher.skip_matching(None);
7271 assert_eq!(
7272 matcher.table.position_base, matcher.table.history_abs_start,
7273 "rebase should anchor to the oldest live absolute position"
7274 );
7275
7276 assert!(
7277 matcher
7278 .table
7279 .hash_table
7280 .iter()
7281 .any(|entry| *entry != HC_EMPTY),
7282 "HC hash table should still be populated after crossing u32 boundary"
7283 );
7284
7285 let abs_pos = matcher.table.history_abs_start + 10;
7287 let candidates = matcher.hc.chain_candidates(&matcher.table, abs_pos);
7288 assert!(
7289 candidates.iter().any(|candidate| *candidate != usize::MAX),
7290 "chain_candidates should return valid matches after rebase"
7291 );
7292}
7293
7294#[test]
7295fn hc_rebase_rebuilds_only_inserted_prefix() {
7296 let mut matcher = HcMatchGenerator::new(64);
7297 matcher.table.add_data(b"abcdeabcdeabcde".to_vec(), |_| {});
7298 matcher.table.ensure_tables();
7299 matcher.table.position_base = 0;
7300 let history_abs_start: usize = match (u64::from(u32::MAX) + 64).try_into() {
7301 Ok(value) => value,
7302 Err(_) => return,
7303 };
7304 matcher.table.history_abs_start = history_abs_start;
7305 let abs_pos = matcher.table.history_abs_start + 6;
7306
7307 let mut expected = HcMatchGenerator::new(64);
7308 expected.table.add_data(b"abcdeabcdeabcde".to_vec(), |_| {});
7309 expected.table.ensure_tables();
7310 expected.table.history_abs_start = history_abs_start;
7311 expected.table.position_base = expected.table.history_abs_start;
7312 expected.table.hash_table.fill(HC_EMPTY);
7313 expected.table.chain_table.fill(HC_EMPTY);
7314 for pos in expected.table.history_abs_start..abs_pos {
7315 expected.table.insert_position_no_rebase(pos);
7316 }
7317
7318 matcher.table.maybe_rebase_positions(abs_pos);
7319
7320 assert_eq!(
7321 matcher.table.position_base, matcher.table.history_abs_start,
7322 "rebase should still anchor to the oldest live absolute position"
7323 );
7324 assert_eq!(
7325 matcher.table.hash_table, expected.table.hash_table,
7326 "rebase must rebuild only positions already inserted before abs_pos"
7327 );
7328 assert_eq!(
7329 matcher.table.chain_table, expected.table.chain_table,
7330 "future positions must not be pre-seeded into HC chains during rebase"
7331 );
7332}
7333
7334#[cfg(any())] #[test]
7336fn suffix_store_with_single_slot_does_not_panic_on_keying() {
7337 let mut suffixes = SuffixStore::with_capacity(1);
7338 suffixes.insert(b"abcde", 0);
7339 assert!(suffixes.contains_key(b"abcde"));
7340 assert_eq!(suffixes.get(b"abcde"), Some(0));
7341}
7342
7343#[cfg(any())]
7344#[test]
7346fn fastest_reset_uses_interleaved_hash_fill_step() {
7347 let mut driver = MatchGeneratorDriver::new(32, 2);
7348
7349 driver.reset(CompressionLevel::Uncompressed);
7350 assert_eq!(driver.simple().hash_fill_step, 1);
7351
7352 driver.reset(CompressionLevel::Fastest);
7353 assert_eq!(driver.simple().hash_fill_step, FAST_HASH_FILL_STEP);
7354
7355 driver.reset(CompressionLevel::Better);
7358 assert_eq!(
7359 driver.active_backend(),
7360 super::strategy::BackendTag::HashChain
7361 );
7362 assert_eq!(driver.window_size(), (1u64 << 23));
7363 assert_eq!(driver.hc_matcher().hc.lazy_depth, 2);
7364}
7365
7366#[cfg(any())] #[test]
7368fn simple_matcher_updates_offset_history_after_emitting_match() {
7369 let mut matcher = MatchGenerator::new(64);
7370 matcher.add_data(
7371 b"abcdeabcdeabcde".to_vec(),
7372 SuffixStore::with_capacity(64),
7373 |_, _| {},
7374 );
7375
7376 assert!(matcher.next_sequence(|seq| {
7377 assert_eq!(
7378 seq,
7379 Sequence::Triple {
7380 literals: b"abcde",
7381 offset: 5,
7382 match_len: 10,
7383 }
7384 );
7385 }));
7386 assert_eq!(matcher.offset_hist, [5, 1, 4]);
7387}
7388
7389#[cfg(any())] #[test]
7391fn simple_matcher_zero_literal_repcode_checks_rep1_before_hash_lookup() {
7392 let mut matcher = MatchGenerator::new(64);
7393 matcher.add_data(
7394 b"abcdefghijabcdefghij".to_vec(),
7395 SuffixStore::with_capacity(64),
7396 |_, _| {},
7397 );
7398
7399 matcher.suffix_idx = 10;
7400 matcher.last_idx_in_sequence = 10;
7401 matcher.offset_hist = [99, 10, 4];
7402
7403 let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data[10..], 0);
7404 assert_eq!(candidate, Some((10, 10)));
7405}
7406
7407#[cfg(any())] #[test]
7409fn simple_matcher_repcode_can_target_previous_window_entry() {
7410 let mut matcher = MatchGenerator::new(64);
7411 matcher.add_data(
7412 b"abcdefghij".to_vec(),
7413 SuffixStore::with_capacity(64),
7414 |_, _| {},
7415 );
7416 matcher.skip_matching();
7417 matcher.add_data(
7418 b"abcdefghij".to_vec(),
7419 SuffixStore::with_capacity(64),
7420 |_, _| {},
7421 );
7422
7423 matcher.offset_hist = [99, 10, 4];
7424
7425 let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data, 0);
7426 assert_eq!(candidate, Some((10, 10)));
7427}
7428
7429#[cfg(any())] #[test]
7431fn simple_matcher_zero_literal_repcode_checks_rep2() {
7432 let mut matcher = MatchGenerator::new(64);
7433 matcher.add_data(
7434 b"abcdefghijabcdefghij".to_vec(),
7435 SuffixStore::with_capacity(64),
7436 |_, _| {},
7437 );
7438 matcher.suffix_idx = 10;
7439 matcher.last_idx_in_sequence = 10;
7440 matcher.offset_hist = [99, 4, 10];
7442
7443 let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data[10..], 0);
7444 assert_eq!(candidate, Some((10, 10)));
7445}
7446
7447#[cfg(any())] #[test]
7449fn simple_matcher_zero_literal_repcode_checks_rep0_minus1() {
7450 let mut matcher = MatchGenerator::new(64);
7451 matcher.add_data(
7452 b"abcdefghijabcdefghij".to_vec(),
7453 SuffixStore::with_capacity(64),
7454 |_, _| {},
7455 );
7456 matcher.suffix_idx = 10;
7457 matcher.last_idx_in_sequence = 10;
7458 matcher.offset_hist = [11, 4, 99];
7460
7461 let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data[10..], 0);
7462 assert_eq!(candidate, Some((10, 10)));
7463}
7464
7465#[cfg(any())] #[test]
7467fn simple_matcher_repcode_rejects_offsets_beyond_searchable_prefix() {
7468 let mut matcher = MatchGenerator::new(64);
7469 matcher.add_data(
7470 b"abcdefghij".to_vec(),
7471 SuffixStore::with_capacity(64),
7472 |_, _| {},
7473 );
7474 matcher.skip_matching();
7475 matcher.add_data(
7476 b"klmnopqrst".to_vec(),
7477 SuffixStore::with_capacity(64),
7478 |_, _| {},
7479 );
7480 matcher.suffix_idx = 3;
7481
7482 let candidate = matcher.offset_match_len(14, &matcher.window.last().unwrap().data[3..]);
7483 assert_eq!(candidate, None);
7484}
7485
7486#[cfg(any())] #[test]
7488fn simple_matcher_skip_matching_seeds_every_position_even_with_fast_step() {
7489 let mut matcher = MatchGenerator::new(64);
7490 matcher.hash_fill_step = FAST_HASH_FILL_STEP;
7491 matcher.add_data(
7492 b"abcdefghijklmnop".to_vec(),
7493 SuffixStore::with_capacity(64),
7494 |_, _| {},
7495 );
7496 matcher.skip_matching();
7497 matcher.add_data(b"bcdef".to_vec(), SuffixStore::with_capacity(64), |_, _| {});
7498
7499 assert!(matcher.next_sequence(|seq| {
7500 assert_eq!(
7501 seq,
7502 Sequence::Triple {
7503 literals: b"",
7504 offset: 15,
7505 match_len: 5,
7506 }
7507 );
7508 }));
7509 assert!(!matcher.next_sequence(|_| {}));
7510}
7511
7512#[cfg(any())] #[test]
7514fn simple_matcher_skip_matching_with_incompressible_hint_uses_sparse_prefix() {
7515 let mut matcher = MatchGenerator::new(128);
7516 let first = b"abcdefghijklmnopqrstuvwxyz012345".to_vec();
7517 let sparse_probe = first[3..3 + MIN_MATCH_LEN].to_vec();
7518 let tail_start = first.len() - MIN_MATCH_LEN;
7519 let tail_probe = first[tail_start..tail_start + MIN_MATCH_LEN].to_vec();
7520 matcher.add_data(first, SuffixStore::with_capacity(256), |_, _| {});
7521
7522 matcher.skip_matching_with_hint(Some(true));
7523
7524 matcher.add_data(sparse_probe, SuffixStore::with_capacity(256), |_, _| {});
7526 let mut sparse_first_is_literals = None;
7527 assert!(matcher.next_sequence(|seq| {
7528 if sparse_first_is_literals.is_none() {
7529 sparse_first_is_literals = Some(matches!(seq, Sequence::Literals { .. }));
7530 }
7531 }));
7532 assert!(
7533 sparse_first_is_literals.unwrap_or(false),
7534 "sparse-start probe should not produce an immediate match"
7535 );
7536
7537 let mut matcher = MatchGenerator::new(128);
7539 matcher.add_data(
7540 b"abcdefghijklmnopqrstuvwxyz012345".to_vec(),
7541 SuffixStore::with_capacity(256),
7542 |_, _| {},
7543 );
7544 matcher.skip_matching_with_hint(Some(true));
7545 matcher.add_data(tail_probe, SuffixStore::with_capacity(256), |_, _| {});
7546 let mut tail_first_is_immediate_match = None;
7547 assert!(matcher.next_sequence(|seq| {
7548 if tail_first_is_immediate_match.is_none() {
7549 tail_first_is_immediate_match =
7550 Some(matches!(seq, Sequence::Triple { literals, .. } if literals.is_empty()));
7551 }
7552 }));
7553 assert!(
7554 tail_first_is_immediate_match.unwrap_or(false),
7555 "dense tail probe should match immediately at block start"
7556 );
7557}
7558
7559#[cfg(any())] #[test]
7561fn simple_matcher_add_suffixes_till_backfills_last_searchable_anchor() {
7562 let mut matcher = MatchGenerator::new(64);
7563 matcher.hash_fill_step = FAST_HASH_FILL_STEP;
7564 matcher.add_data(
7565 b"01234abcde".to_vec(),
7566 SuffixStore::with_capacity(64),
7567 |_, _| {},
7568 );
7569 matcher.add_suffixes_till(10, FAST_HASH_FILL_STEP);
7570
7571 let last = matcher.window.last().unwrap();
7572 let tail = &last.data[5..10];
7573 assert_eq!(last.suffixes.get(tail), Some(5));
7574}
7575
7576#[cfg(any())] #[test]
7578fn simple_matcher_add_suffixes_till_skips_when_idx_below_min_match_len() {
7579 let mut matcher = MatchGenerator::new(128);
7580 matcher.hash_fill_step = FAST_HASH_FILL_STEP;
7581 matcher.add_data(
7582 b"abcdefghijklmnopqrstuvwxyz".to_vec(),
7583 SuffixStore::with_capacity(1 << 16),
7584 |_, _| {},
7585 );
7586
7587 matcher.add_suffixes_till(MIN_MATCH_LEN - 1, FAST_HASH_FILL_STEP);
7588
7589 let last = matcher.window.last().unwrap();
7590 let first_key = &last.data[..MIN_MATCH_LEN];
7591 assert_eq!(last.suffixes.get(first_key), None);
7592}
7593
7594#[cfg(any())] #[test]
7596fn simple_matcher_add_suffixes_till_fast_step_registers_interleaved_positions() {
7597 let mut matcher = MatchGenerator::new(128);
7598 matcher.hash_fill_step = FAST_HASH_FILL_STEP;
7599 matcher.add_data(
7600 b"abcdefghijklmnopqrstuvwxyz".to_vec(),
7601 SuffixStore::with_capacity(1 << 16),
7602 |_, _| {},
7603 );
7604
7605 matcher.add_suffixes_till(17, FAST_HASH_FILL_STEP);
7606
7607 let last = matcher.window.last().unwrap();
7608 for pos in [0usize, 3, 6, 9, 12] {
7609 let key = &last.data[pos..pos + MIN_MATCH_LEN];
7610 assert_eq!(
7611 last.suffixes.get(key),
7612 Some(pos),
7613 "expected interleaved suffix registration at pos {pos}"
7614 );
7615 }
7616}
7617
7618#[test]
7619fn dfast_skip_matching_handles_window_eviction() {
7620 let mut matcher = DfastMatchGenerator::new(16);
7621
7622 matcher.add_data(alloc::vec![1, 2, 3, 4, 5, 6], |_| {});
7623 matcher.skip_matching(None);
7624 matcher.add_data(alloc::vec![7, 8, 9, 10, 11, 12], |_| {});
7625 matcher.skip_matching(None);
7626 matcher.add_data(alloc::vec![7, 8, 9, 10, 11, 12], |_| {});
7627
7628 let mut reconstructed = alloc::vec![7, 8, 9, 10, 11, 12];
7629 matcher.start_matching(|seq| match seq {
7630 Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
7631 Sequence::Triple {
7632 literals,
7633 offset,
7634 match_len,
7635 } => {
7636 reconstructed.extend_from_slice(literals);
7637 let start = reconstructed.len() - offset;
7638 for i in 0..match_len {
7639 let byte = reconstructed[start + i];
7640 reconstructed.push(byte);
7641 }
7642 }
7643 });
7644
7645 assert_eq!(reconstructed, [7, 8, 9, 10, 11, 12, 7, 8, 9, 10, 11, 12]);
7646}
7647
7648#[test]
7649fn dfast_add_data_callback_reports_evicted_len_not_capacity() {
7650 let mut matcher = DfastMatchGenerator::new(8);
7651
7652 let mut first = Vec::with_capacity(64);
7653 first.extend_from_slice(b"abcdefgh");
7654 matcher.add_data(first, |_| {});
7655
7656 let mut second = Vec::with_capacity(64);
7657 second.extend_from_slice(b"ijklmnop");
7658
7659 let mut observed_evicted_len = None;
7660 matcher.add_data(second, |data| {
7661 observed_evicted_len = Some(data.len());
7662 });
7663
7664 assert_eq!(
7665 observed_evicted_len,
7666 Some(8),
7667 "eviction callback must report evicted byte length, not backing capacity"
7668 );
7669}
7670
7671#[test]
7735fn dfast_commit_space_eviction_uses_window_size_delta() {
7736 use crate::encoding::CompressionLevel;
7737
7738 let mut driver = MatchGeneratorDriver::new(10, 1);
7739 driver.reset(CompressionLevel::Level(3));
7740 assert!(matches!(driver.storage, MatcherStorage::Dfast(_)));
7741
7742 driver.dfast_matcher_mut().max_window_size = 10;
7747 driver.dictionary_retained_budget = 100;
7748
7749 let mut space1 = Vec::with_capacity(64);
7750 space1.extend_from_slice(b"abcd");
7751 driver.commit_space(space1);
7752 assert_eq!(
7753 driver.dictionary_retained_budget, 100,
7754 "1st commit fills window 0 → 4, no eviction, no retire"
7755 );
7756
7757 let mut space2 = Vec::with_capacity(64);
7758 space2.extend_from_slice(b"efgh");
7759 driver.commit_space(space2);
7760 assert_eq!(
7761 driver.dictionary_retained_budget, 100,
7762 "2nd commit fills window 4 → 8, no eviction, no retire"
7763 );
7764
7765 let mut space3 = Vec::with_capacity(64);
7766 space3.extend_from_slice(b"ijklm");
7767 driver.commit_space(space3);
7768 assert_eq!(
7769 driver.dictionary_retained_budget, 87,
7770 "3rd commit + trim_after_budget_retire cascade. With the fix \
7771 (evicted=4 from window_size delta) the cascade reclaims 100 \
7772 → 96 → 92 → 87. With the bug (evicted=5 from data.len()) the \
7773 3rd commit would panic on `data.len() <= max_window_size` \
7774 after the 2nd commit's cascade had already shrunk \
7775 max_window_size to 0."
7776 );
7777 assert_eq!(
7778 driver.dfast_matcher_mut().max_window_size,
7779 0,
7780 "cascade drains max_window_size to 0 once budget reclaim \
7781 exceeds the initial window size"
7782 );
7783}
7784
7785#[test]
7786fn dfast_trim_to_window_evicts_oldest_block_by_length() {
7787 let mut matcher = DfastMatchGenerator::new(16);
7796
7797 let mut first = Vec::with_capacity(64);
7798 first.extend_from_slice(b"abcdefgh");
7799 matcher.add_data(first, |_| {});
7800
7801 let mut second = Vec::with_capacity(64);
7802 second.extend_from_slice(b"ijklmnop");
7803 matcher.add_data(second, |_| {});
7804
7805 assert_eq!(matcher.window_size, 16);
7806 assert_eq!(matcher.window_blocks.len(), 2);
7807
7808 matcher.max_window_size = 8;
7809
7810 matcher.trim_to_window();
7811
7812 assert_eq!(
7820 matcher.window_size, 8,
7821 "exactly one 8-byte block must remain"
7822 );
7823 assert_eq!(matcher.window_blocks.len(), 1);
7824 assert_eq!(matcher.history_abs_start, 8);
7825}
7826
7827#[test]
7828fn dfast_inserts_tail_positions_for_next_block_matching() {
7829 let mut matcher = DfastMatchGenerator::new(1 << 22);
7830
7831 matcher.add_data(b"012345bcdea".to_vec(), |_| {});
7832 let mut history = Vec::new();
7833 matcher.start_matching(|seq| match seq {
7834 Sequence::Literals { literals } => history.extend_from_slice(literals),
7835 Sequence::Triple { .. } => unreachable!("first block should not match history"),
7836 });
7837 assert_eq!(history, b"012345bcdea");
7838
7839 matcher.add_data(b"bcdeabcdeab".to_vec(), |_| {});
7840 let mut saw_first_sequence = false;
7841 matcher.start_matching(|seq| {
7842 assert!(!saw_first_sequence, "expected a single cross-block match");
7843 saw_first_sequence = true;
7844 match seq {
7845 Sequence::Literals { .. } => {
7846 panic!("expected tail-anchored cross-block match before any literals")
7847 }
7848 Sequence::Triple {
7849 literals,
7850 offset,
7851 match_len,
7852 } => {
7853 assert_eq!(literals, b"");
7854 assert_eq!(offset, 5);
7855 assert_eq!(match_len, 11);
7856 let start = history.len() - offset;
7857 for i in 0..match_len {
7858 let byte = history[start + i];
7859 history.push(byte);
7860 }
7861 }
7862 }
7863 });
7864
7865 assert!(
7866 saw_first_sequence,
7867 "expected tail-anchored cross-block match"
7868 );
7869 assert_eq!(history, b"012345bcdeabcdeabcdeab");
7870}
7871
7872#[test]
7899fn hashchain_inserts_tail_positions_for_next_block_matching() {
7900 let mut matcher = HcMatchGenerator::new(1 << 22);
7901 matcher.configure(HC_CONFIG, super::strategy::StrategyTag::Lazy, 22);
7902
7903 matcher.table.add_data(b"PQRSTBCD".to_vec(), |_| {});
7904 let mut history = alloc::vec::Vec::new();
7905 matcher.start_matching(|seq| match seq {
7906 Sequence::Literals { literals } => history.extend_from_slice(literals),
7907 Sequence::Triple { .. } => unreachable!("first block has no internal repeats"),
7908 });
7909 assert_eq!(history, b"PQRSTBCD");
7910
7911 matcher.table.add_data(b"BCDBCDBCDB".to_vec(), |_| {});
7912 let mut first_sequence_offset: Option<usize> = None;
7913 let mut first_sequence_match_len: Option<usize> = None;
7914 matcher.start_matching(|seq| {
7915 if first_sequence_offset.is_some() {
7916 return;
7917 }
7918 match seq {
7919 Sequence::Literals { .. } => {
7920 panic!(
7921 "expected tail-anchored cross-block match before any literals — \
7922 backfill_boundary_positions did not seed positions 5/6/7"
7923 )
7924 }
7925 Sequence::Triple {
7926 literals,
7927 offset,
7928 match_len,
7929 } => {
7930 assert_eq!(literals, b"", "no leading literals on the boundary match");
7931 first_sequence_offset = Some(offset);
7932 first_sequence_match_len = Some(match_len);
7933 }
7934 }
7935 });
7936
7937 let offset = first_sequence_offset.expect(
7938 "expected tail-anchored cross-block match emitted from backfill_boundary_positions",
7939 );
7940 assert!(
7941 (1..=3).contains(&offset),
7942 "boundary match offset {offset} must point into the unhashable tail \
7943 (positions 5/6/7 of an 8-byte block 1) so the test specifically \
7944 locks down backfill_boundary_positions",
7945 );
7946 assert_eq!(
7947 offset, 3,
7948 "candidate position must land at 5 (= block_1_len - 3) so the 4-byte \
7949 window `data[5..9] = b\"BCDB\"` matches block 2's first hash lookup",
7950 );
7951 let match_len = first_sequence_match_len.unwrap();
7952 assert!(
7953 match_len >= HC_MIN_MATCH_LEN,
7954 "match_len {match_len} must clear the HC min-match floor",
7955 );
7956}
7957
7958#[test]
7959fn dfast_dense_skip_matching_backfills_previous_tail_for_next_block() {
7960 let mut matcher = DfastMatchGenerator::new(1 << 22);
7961 let tail = b"Qz9kLm2Rp";
7962 let mut first = b"0123456789abcdef".to_vec();
7963 first.extend_from_slice(tail);
7964 matcher.add_data(first.clone(), |_| {});
7965 matcher.skip_matching(Some(false));
7966
7967 let mut second = tail.to_vec();
7968 second.extend_from_slice(b"after-tail-literals");
7969 matcher.add_data(second, |_| {});
7970
7971 let mut first_sequence = None;
7972 matcher.start_matching(|seq| {
7973 if first_sequence.is_some() {
7974 return;
7975 }
7976 first_sequence = Some(match seq {
7977 Sequence::Literals { literals } => (literals.len(), 0usize, 0usize),
7978 Sequence::Triple {
7979 literals,
7980 offset,
7981 match_len,
7982 } => (literals.len(), offset, match_len),
7983 });
7984 });
7985
7986 let (lit_len, offset, match_len) = first_sequence.expect("expected at least one sequence");
7987 assert_eq!(
7988 lit_len, 0,
7989 "expected immediate cross-block match at block start"
7990 );
7991 assert_eq!(
7992 offset,
7993 tail.len(),
7994 "expected dense skip to preserve cross-boundary tail match"
7995 );
7996 assert!(
7997 match_len >= DFAST_MIN_MATCH_LEN,
7998 "match length should satisfy dfast minimum match length"
7999 );
8000}
8001
8002#[test]
8003fn dfast_sparse_skip_matching_preserves_tail_cross_block_match() {
8004 let mut matcher = DfastMatchGenerator::new(1 << 22);
8005 let tail = b"Qz9kLm2Rp";
8006 let mut first = deterministic_high_entropy_bytes(0x9E37_79B9_7F4A_7C15, 4096);
8007 let tail_start = first.len() - tail.len();
8008 first[tail_start..].copy_from_slice(tail);
8009 matcher.add_data(first.clone(), |_| {});
8010
8011 matcher.skip_matching(Some(true));
8012
8013 let mut second = tail.to_vec();
8014 second.extend_from_slice(b"after-tail-literals");
8015 matcher.add_data(second, |_| {});
8016
8017 let mut first_sequence = None;
8018 matcher.start_matching(|seq| {
8019 if first_sequence.is_some() {
8020 return;
8021 }
8022 first_sequence = Some(match seq {
8023 Sequence::Literals { literals } => (literals.len(), 0usize, 0usize),
8024 Sequence::Triple {
8025 literals,
8026 offset,
8027 match_len,
8028 } => (literals.len(), offset, match_len),
8029 });
8030 });
8031
8032 let (lit_len, offset, match_len) = first_sequence.expect("expected at least one sequence");
8033 assert_eq!(
8034 lit_len, 0,
8035 "expected immediate cross-block match at block start"
8036 );
8037 assert_eq!(
8038 offset,
8039 tail.len(),
8040 "expected match against densely seeded tail"
8041 );
8042 assert!(
8043 match_len >= DFAST_MIN_MATCH_LEN,
8044 "match length should satisfy dfast minimum match length"
8045 );
8046}
8047
8048#[test]
8049fn dfast_skip_matching_dense_backfills_newly_hashable_long_tail_positions() {
8050 let mut matcher = DfastMatchGenerator::new(1 << 22);
8051 let first = deterministic_high_entropy_bytes(0x7A64_0315_D4E1_91C3, 4096);
8052 let first_len = first.len();
8053 matcher.add_data(first, |_| {});
8054 matcher.skip_matching_dense();
8055
8056 matcher.add_data(alloc::vec![0xAB], |_| {});
8059 matcher.skip_matching_dense();
8060
8061 let target_abs_pos = first_len - 7;
8062 let target_rel = target_abs_pos - matcher.history_abs_start;
8063 let live = matcher.live_history();
8064 assert!(
8065 target_rel + 8 <= live.len(),
8066 "fixture must make the boundary start long-hashable"
8067 );
8068 let long_hash = matcher.long_hash_index(&live[target_rel..]);
8069 let target_slot = matcher.pack_slot(target_abs_pos);
8070 assert_ne!(
8073 target_slot, DFAST_EMPTY_SLOT,
8074 "pack_slot must never return the empty-slot sentinel for a real position"
8075 );
8076 assert_eq!(
8077 matcher.long_hash[long_hash], target_slot,
8078 "dense skip must seed long-hash entry for newly hashable boundary start"
8079 );
8080}
8081
8082#[test]
8083fn dfast_seed_remaining_hashable_starts_seeds_last_short_hash_positions() {
8084 let mut matcher = DfastMatchGenerator::new(1 << 20);
8085 let block = deterministic_high_entropy_bytes(0x13F0_9A6D_55CE_7B21, 64);
8086 matcher.add_data(block, |_| {});
8087 matcher.ensure_hash_tables();
8088
8089 let current_len = matcher.window_blocks.back().copied().unwrap_or(0);
8090 let current_abs_start = matcher.history_abs_start + matcher.window_size - current_len;
8091 let seed_start = current_len - DFAST_MIN_MATCH_LEN;
8092 matcher.seed_remaining_hashable_starts(current_abs_start, current_len, seed_start);
8093
8094 let target_abs_pos = current_abs_start + current_len - 4;
8095 let target_rel = target_abs_pos - matcher.history_abs_start;
8096 let live = matcher.live_history();
8097 assert!(
8098 target_rel + 4 <= live.len(),
8099 "fixture must leave the last short-hash start valid"
8100 );
8101 let short_hash = matcher.short_hash_index(&live[target_rel..]);
8102 let target_slot = matcher.pack_slot(target_abs_pos);
8103 assert_ne!(
8104 target_slot, DFAST_EMPTY_SLOT,
8105 "pack_slot must never return the empty-slot sentinel for a real position"
8106 );
8107 assert_eq!(
8108 matcher.short_hash[short_hash], target_slot,
8109 "tail seeding must include the last 4-byte-hashable start"
8110 );
8111}
8112
8113#[test]
8114fn dfast_seed_remaining_hashable_starts_handles_pos_at_block_end() {
8115 let mut matcher = DfastMatchGenerator::new(1 << 20);
8116 let block = deterministic_high_entropy_bytes(0x7BB2_DA91_441E_C0EF, 64);
8117 matcher.add_data(block, |_| {});
8118 matcher.ensure_hash_tables();
8119
8120 let current_len = matcher.window_blocks.back().copied().unwrap_or(0);
8121 let current_abs_start = matcher.history_abs_start + matcher.window_size - current_len;
8122 matcher.seed_remaining_hashable_starts(current_abs_start, current_len, current_len);
8123
8124 let target_abs_pos = current_abs_start + current_len - 4;
8125 let target_rel = target_abs_pos - matcher.history_abs_start;
8126 let live = matcher.live_history();
8127 assert!(
8128 target_rel + 4 <= live.len(),
8129 "fixture must leave the last short-hash start valid"
8130 );
8131 let short_hash = matcher.short_hash_index(&live[target_rel..]);
8132 let target_slot = matcher.pack_slot(target_abs_pos);
8133 assert_ne!(
8134 target_slot, DFAST_EMPTY_SLOT,
8135 "pack_slot must never return the empty-slot sentinel for a real position"
8136 );
8137 assert_eq!(
8138 matcher.short_hash[short_hash], target_slot,
8139 "tail seeding must still include the last 4-byte-hashable start when pos is at block end"
8140 );
8141}
8142
8143#[test]
8159fn dfast_ensure_room_for_rebases_above_guard_band() {
8160 let mut dfast = DfastMatchGenerator::new(1 << 22);
8161 dfast.set_hash_bits(10);
8162 dfast.ensure_hash_tables();
8163
8164 let early_abs = 1024usize;
8172 let early_packed = dfast.pack_slot(early_abs);
8173 assert_ne!(early_packed, DFAST_EMPTY_SLOT);
8174 dfast.short_hash[0] = early_packed;
8175 dfast.long_hash[0] = early_packed;
8176
8177 let trigger_abs = (u32::MAX as usize) - (DFAST_REBASE_GUARD_BAND as usize) + 1;
8183 assert_eq!(dfast.position_base, 0);
8184 dfast.ensure_room_for(trigger_abs);
8185 assert_eq!(
8186 dfast.position_base, DFAST_REBASE_GUARD_BAND as usize,
8187 "rebase must advance position_base by DFAST_REBASE_GUARD_BAND"
8188 );
8189
8190 assert_eq!(
8196 dfast.short_hash[0], DFAST_EMPTY_SLOT,
8197 "pre-rebase short-hash entries below the reducer must become empty"
8198 );
8199 assert_eq!(
8200 dfast.long_hash[0], DFAST_EMPTY_SLOT,
8201 "pre-rebase long-hash entries below the reducer must become empty"
8202 );
8203
8204 let post_packed = dfast.pack_slot(trigger_abs);
8208 assert_ne!(post_packed, DFAST_EMPTY_SLOT);
8209 let unpacked = dfast.position_base + (post_packed as usize) - 1;
8210 assert_eq!(
8211 unpacked, trigger_abs,
8212 "post-rebase pack/unpack must round-trip the absolute position"
8213 );
8214}
8215
8216#[test]
8217fn dfast_sparse_skip_matching_backfills_previous_tail_for_consecutive_sparse_blocks() {
8218 let mut matcher = DfastMatchGenerator::new(1 << 22);
8219 let boundary_prefix = [0xFA, 0xFB, 0xFC];
8220 let boundary_suffix = [0xFD, 0xEE, 0xAD, 0xBE, 0xEF, 0x11, 0x22, 0x33];
8221
8222 let mut first = deterministic_high_entropy_bytes(0xA5A5_5A5A_C3C3_3C3C, 4096);
8223 let first_tail_start = first.len() - boundary_prefix.len();
8224 first[first_tail_start..].copy_from_slice(&boundary_prefix);
8225 matcher.add_data(first, |_| {});
8226 matcher.skip_matching(Some(true));
8227
8228 let mut second = deterministic_high_entropy_bytes(0xA5A5_5A5A_C3C3_3C3C, 4096);
8229 second[..boundary_suffix.len()].copy_from_slice(&boundary_suffix);
8230 matcher.add_data(second.clone(), |_| {});
8231 matcher.skip_matching(Some(true));
8232
8233 let mut third = boundary_prefix.to_vec();
8234 third.extend_from_slice(&boundary_suffix);
8235 third.extend_from_slice(b"-trailing-literals");
8236 matcher.add_data(third, |_| {});
8237
8238 let mut first_sequence = None;
8239 matcher.start_matching(|seq| {
8240 if first_sequence.is_some() {
8241 return;
8242 }
8243 first_sequence = Some(match seq {
8244 Sequence::Literals { literals } => (literals.len(), 0usize, 0usize),
8245 Sequence::Triple {
8246 literals,
8247 offset,
8248 match_len,
8249 } => (literals.len(), offset, match_len),
8250 });
8251 });
8252
8253 let (lit_len, offset, match_len) = first_sequence.expect("expected at least one sequence");
8254 assert_eq!(
8255 lit_len, 0,
8256 "expected immediate match from the prior sparse-skip boundary"
8257 );
8258 assert_eq!(
8259 offset,
8260 second.len() + boundary_prefix.len(),
8261 "expected match against backfilled first→second boundary start"
8262 );
8263 assert!(
8264 match_len >= DFAST_MIN_MATCH_LEN,
8265 "match length should satisfy dfast minimum match length"
8266 );
8267}
8268
8269#[test]
8270fn fastest_hint_iteration_23_sequences_reconstruct_source() {
8271 fn generate_data(seed: u64, len: usize) -> Vec<u8> {
8272 let mut state = seed;
8273 let mut data = Vec::with_capacity(len);
8274 for _ in 0..len {
8275 state = state
8276 .wrapping_mul(6364136223846793005)
8277 .wrapping_add(1442695040888963407);
8278 data.push((state >> 33) as u8);
8279 }
8280 data
8281 }
8282
8283 let i = 23u64;
8284 let len = (i * 89 % 16384) as usize;
8285 let mut data = generate_data(i, len);
8286 let repeat = data[128..256].to_vec();
8289 data.extend_from_slice(&repeat);
8290 data.extend_from_slice(&repeat);
8291
8292 let mut driver = MatchGeneratorDriver::new(1024 * 128, 1);
8293 driver.set_source_size_hint(data.len() as u64);
8294 driver.reset(CompressionLevel::Fastest);
8295 let mut space = driver.get_next_space();
8296 space[..data.len()].copy_from_slice(&data);
8297 space.truncate(data.len());
8298 driver.commit_space(space);
8299
8300 let mut rebuilt = Vec::with_capacity(data.len());
8301 let mut saw_triple = false;
8302 driver.start_matching(|seq| match seq {
8303 Sequence::Literals { literals } => rebuilt.extend_from_slice(literals),
8304 Sequence::Triple {
8305 literals,
8306 offset,
8307 match_len,
8308 } => {
8309 saw_triple = true;
8310 rebuilt.extend_from_slice(literals);
8311 assert!(offset > 0, "offset must be non-zero");
8312 assert!(
8313 offset <= rebuilt.len(),
8314 "offset must reference already-produced bytes: offset={} produced={}",
8315 offset,
8316 rebuilt.len()
8317 );
8318 let start = rebuilt.len() - offset;
8319 for idx in 0..match_len {
8320 let b = rebuilt[start + idx];
8321 rebuilt.push(b);
8322 }
8323 }
8324 });
8325
8326 let _ = saw_triple;
8336 assert_eq!(rebuilt, data);
8337}
8338
8339#[test]
8340fn fast_levels_dispatch_per_level_hash_log_and_mls() {
8341 let p1 = resolve_level_params(CompressionLevel::Level(1), None);
8344 assert_eq!(p1.fast_hash_log, 14);
8345 assert_eq!(p1.fast_mls, 7);
8346 assert_eq!(p1.fast_step_size, 2);
8347
8348 for n in -7..=-1 {
8355 let p = resolve_level_params(CompressionLevel::Level(n), None);
8356 assert_eq!(p.fast_hash_log, 14, "Level({n}) fast_hash_log");
8357 assert_eq!(p.fast_mls, 6, "Level({n}) fast_mls");
8358 let expected_step = ((-n) as usize) + 1;
8359 assert_eq!(p.fast_step_size, expected_step, "Level({n}) fast_step_size");
8360 }
8361
8362 let pf = resolve_level_params(CompressionLevel::Fastest, None);
8365 assert_eq!(
8366 (
8367 pf.window_log,
8368 pf.fast_hash_log,
8369 pf.fast_mls,
8370 pf.fast_step_size
8371 ),
8372 (19, 14, 6, 2),
8373 );
8374 let pu = resolve_level_params(CompressionLevel::Uncompressed, None);
8377 assert_eq!(
8378 (
8379 pu.window_log,
8380 pu.fast_hash_log,
8381 pu.fast_mls,
8382 pu.fast_step_size
8383 ),
8384 (17, 14, 6, 2),
8385 );
8386}
8387
8388#[test]
8396fn fast_levels_driver_wiring_threads_cparams_into_inner_matcher() {
8397 let mut driver = MatchGeneratorDriver::new(64 * 1024, 1);
8398
8399 let fast_levels = [
8400 CompressionLevel::Level(1),
8401 CompressionLevel::Fastest,
8402 CompressionLevel::Uncompressed,
8403 CompressionLevel::Level(-1),
8404 CompressionLevel::Level(-2),
8405 CompressionLevel::Level(-3),
8406 CompressionLevel::Level(-4),
8407 CompressionLevel::Level(-5),
8408 CompressionLevel::Level(-6),
8409 CompressionLevel::Level(-7),
8410 ];
8411
8412 for &level in &fast_levels {
8413 let p = resolve_level_params(level, None);
8414 assert_eq!(
8418 p.strategy_tag,
8419 super::strategy::StrategyTag::Fast,
8420 "{level:?} must resolve to Fast strategy",
8421 );
8422
8423 crate::encoding::Matcher::reset(&mut driver, CompressionLevel::Default);
8433
8434 crate::encoding::Matcher::reset(&mut driver, level);
8437
8438 let m = driver.simple_mut();
8439 assert_eq!(
8440 m.hash_log(),
8441 p.fast_hash_log,
8442 "{level:?}: inner matcher hash_log mismatch — argument swap?",
8443 );
8444 assert_eq!(
8445 m.mls(),
8446 p.fast_mls,
8447 "{level:?}: inner matcher mls mismatch — argument swap?",
8448 );
8449 assert_eq!(
8450 m.step_size(),
8451 p.fast_step_size,
8452 "{level:?}: inner matcher step_size mismatch — stale value carried from prior reset?",
8453 );
8454 }
8455}
8456
8457#[test]
8469fn lazy_band_target_len_matches_donor_default_table() {
8470 use zstd::zstd_safe::zstd_sys;
8471
8472 for level in 5..=15i32 {
8473 let donor = unsafe { zstd_sys::ZSTD_getCParams(level, 0, 0) };
8476 let params = resolve_level_params(CompressionLevel::Level(level), None);
8477 assert_eq!(
8478 params.hc.target_len as u32, donor.targetLength,
8479 "L{level}: hc.target_len ({}) must match donor cParams.targetLength ({})",
8480 params.hc.target_len, donor.targetLength
8481 );
8482 }
8483}