1use alloc::vec::Vec;
9use super::BETTER_WINDOW_LOG;
13use super::CompressionLevel;
14use super::Matcher;
15use super::Sequence;
16use super::blocks::encode_offset_with_history;
17use super::bt::BtMatcher;
18#[cfg(test)]
19use super::cost_model::HC_MAX_LIT;
20use super::cost_model::{
21 HC_BITCOST_MULTIPLIER, HC_FORMAT_MINMATCH, HC_OPT_NUM, HC_PREDEF_THRESHOLD, HcOptState,
22 HcOptimalCostProfile,
23};
24#[cfg(test)]
25use super::cost_model::{HC_BLOCKSIZE_MAX, HC_MAX_LL, HC_MAX_ML, HC_MAX_OFF, HcOptPriceType};
26use super::dfast::DfastMatchGenerator;
27#[cfg(test)]
28use super::match_table::helpers::FAST_HASH_FILL_STEP;
29#[cfg(test)]
30use super::match_table::helpers::INCOMPRESSIBLE_SKIP_STEP;
31use super::match_table::helpers::MIN_MATCH_LEN;
32#[cfg(test)]
33use super::match_table::helpers::common_prefix_len;
34#[cfg(test)]
35use super::opt::ldm::HcRawSeq;
36use super::opt::ldm::{HcOptLdmState, HcRawSeqStore};
37use super::opt::types::{
38 HcCandidateQuery, HcOptimalNode, HcOptimalPlanBuffers, HcOptimalPlanState, HcOptimalSequence,
39 MatchCandidate,
40};
41use super::row::RowMatchGenerator;
42use super::simple::{MatchGenerator, SuffixStore};
43#[cfg(all(
44 test,
45 feature = "std",
46 target_arch = "aarch64",
47 target_endian = "little"
48))]
49use std::arch::is_aarch64_feature_detected;
50#[cfg(all(test, feature = "std", target_arch = "x86_64"))]
51use std::arch::is_x86_feature_detected;
52
53pub(crate) const DFAST_MIN_MATCH_LEN: usize = 5;
54pub(crate) const DFAST_SHORT_HASH_LOOKAHEAD: usize = 4;
55pub(crate) const ROW_MIN_MATCH_LEN: usize = 6;
56pub(crate) const DFAST_TARGET_LEN: usize = 48;
57pub(crate) const DFAST_HASH_BITS: usize = 17;
80pub(crate) const DFAST_SHORT_HASH_BITS_DELTA: usize = 1;
86pub(crate) const DFAST_EMPTY_SLOT: u32 = 0;
94
95pub(crate) const DFAST_REBASE_GUARD_BAND: u32 = 1u32 << 30;
102pub(crate) const DFAST_SKIP_SEARCH_STRENGTH: usize = 6;
103pub(crate) const DFAST_SKIP_STEP_GROWTH_INTERVAL: usize = 1 << DFAST_SKIP_SEARCH_STRENGTH;
104pub(crate) const DFAST_LOCAL_SKIP_TRIGGER: usize = 256;
105pub(crate) const DFAST_MAX_SKIP_STEP: usize = 8;
106pub(crate) const DFAST_INCOMPRESSIBLE_SKIP_STEP: usize = 16;
107pub(crate) const ROW_HASH_BITS: usize = 20;
108pub(crate) const ROW_LOG: usize = 5;
109pub(crate) const ROW_SEARCH_DEPTH: usize = 16;
110pub(crate) const ROW_TARGET_LEN: usize = 48;
111pub(crate) const ROW_TAG_BITS: usize = 8;
112pub(crate) const ROW_EMPTY_SLOT: usize = usize::MAX;
113pub(crate) const ROW_HASH_KEY_LEN: usize = 4;
114#[cfg(test)]
121use super::match_table::storage::{HC_PRIME3BYTES, HC_PRIME4BYTES};
122
123#[cfg(test)]
128use super::match_table::storage::HC_EMPTY;
129use super::match_table::storage::{HC_CHAIN_LOG, HC_HASH_LOG, HC3_HASH_LOG};
130const HC_SEARCH_DEPTH: usize = 16;
135use super::hc::HC_MIN_MATCH_LEN;
138const HC_OPT_MIN_MATCH_LEN: usize = HC_FORMAT_MINMATCH;
139const HC_TARGET_LEN: usize = 48;
140
141use super::hc::MAX_HC_SEARCH_DEPTH;
143
144#[derive(Copy, Clone)]
152struct HcConfig {
153 hash_log: usize,
154 chain_log: usize,
155 search_depth: usize,
156 target_len: usize,
157}
158
159#[derive(Copy, Clone)]
160pub(crate) struct RowConfig {
161 pub(crate) hash_bits: usize,
162 pub(crate) row_log: usize,
163 pub(crate) search_depth: usize,
164 pub(crate) target_len: usize,
165}
166
167const HC_CONFIG: HcConfig = HcConfig {
168 hash_log: HC_HASH_LOG,
169 chain_log: HC_CHAIN_LOG,
170 search_depth: HC_SEARCH_DEPTH,
171 target_len: HC_TARGET_LEN,
172};
173
174const BEST_HC_CONFIG: HcConfig = HcConfig {
176 hash_log: 21,
177 chain_log: 20,
178 search_depth: 32,
179 target_len: 128,
180};
181
182const BTOPT_HC_CONFIG: HcConfig = HcConfig {
183 hash_log: 23,
184 chain_log: 22,
185 search_depth: 32,
186 target_len: 256,
187};
188
189const BTULTRA_HC_CONFIG: HcConfig = HcConfig {
190 hash_log: 23,
191 chain_log: 23,
192 search_depth: 32,
193 target_len: 256,
194};
195
196const BTULTRA2_HC_CONFIG: HcConfig = HcConfig {
197 hash_log: 24,
198 chain_log: 24,
199 search_depth: 512,
200 target_len: 256,
201};
202
203const BTULTRA2_HC_CONFIG_L22: HcConfig = HcConfig {
204 hash_log: 25,
205 chain_log: 27,
206 search_depth: 512,
207 target_len: 999,
208};
209
210const BTULTRA2_HC_CONFIG_L22_256K: HcConfig = HcConfig {
211 hash_log: 19,
212 chain_log: 19,
213 search_depth: 1 << 13,
214 target_len: 999,
215};
216
217const BTULTRA2_HC_CONFIG_L22_128K: HcConfig = HcConfig {
218 hash_log: 17,
219 chain_log: 18,
220 search_depth: 1 << 11,
221 target_len: 999,
222};
223
224const BTULTRA2_HC_CONFIG_L22_16K: HcConfig = HcConfig {
225 hash_log: 15,
226 chain_log: 15,
227 search_depth: 1 << 10,
228 target_len: 999,
229};
230
231const ROW_CONFIG: RowConfig = RowConfig {
232 hash_bits: ROW_HASH_BITS,
233 row_log: ROW_LOG,
234 search_depth: ROW_SEARCH_DEPTH,
235 target_len: ROW_TARGET_LEN,
236};
237
238#[derive(Copy, Clone)]
244struct LevelParams {
245 strategy_tag: super::strategy::StrategyTag,
246 window_log: u8,
247 hash_fill_step: usize,
248 lazy_depth: u8,
249 hc: HcConfig,
250 row: RowConfig,
251}
252
253impl LevelParams {
254 fn backend(&self) -> super::strategy::BackendTag {
258 self.strategy_tag.backend()
259 }
260}
261
262fn dfast_hash_bits_for_window(max_window_size: usize) -> usize {
263 let window_log = (usize::BITS - 1 - max_window_size.leading_zeros()) as usize;
264 window_log.clamp(MIN_WINDOW_LOG as usize, DFAST_HASH_BITS)
265}
266
267fn row_hash_bits_for_window(max_window_size: usize) -> usize {
268 let window_log = (usize::BITS - 1 - max_window_size.leading_zeros()) as usize;
269 window_log.clamp(MIN_WINDOW_LOG as usize, ROW_HASH_BITS)
270}
271
272#[rustfmt::skip]
280const LEVEL_TABLE: [LevelParams; 22] = [
281 LevelParams { strategy_tag: super::strategy::StrategyTag::Fast, window_log: 19, hash_fill_step: 3, lazy_depth: 0, hc: HC_CONFIG, row: ROW_CONFIG },
284 LevelParams { strategy_tag: super::strategy::StrategyTag::Dfast, window_log: 19, hash_fill_step: 1, lazy_depth: 1, hc: HC_CONFIG, row: ROW_CONFIG },
285 LevelParams { strategy_tag: super::strategy::StrategyTag::Dfast, window_log: 22, hash_fill_step: 1, lazy_depth: 1, hc: HC_CONFIG, row: ROW_CONFIG },
286 LevelParams { strategy_tag: super::strategy::StrategyTag::Greedy, window_log: 22, hash_fill_step: 1, lazy_depth: 0, hc: HC_CONFIG, row: ROW_CONFIG },
287 LevelParams { strategy_tag: super::strategy::StrategyTag::Lazy, window_log: 22, hash_fill_step: 1, lazy_depth: 1, hc: HcConfig { hash_log: 18, chain_log: 17, search_depth: 4, target_len: 32 }, row: ROW_CONFIG },
288 LevelParams { strategy_tag: super::strategy::StrategyTag::Lazy, window_log: BETTER_WINDOW_LOG, hash_fill_step: 1, lazy_depth: 1, hc: HcConfig { hash_log: 19, chain_log: 18, search_depth: 8, target_len: 48 }, row: ROW_CONFIG },
289 LevelParams { strategy_tag: super::strategy::StrategyTag::Lazy, window_log: BETTER_WINDOW_LOG, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 20, chain_log: 19, search_depth: 16, target_len: 48 }, row: ROW_CONFIG },
290 LevelParams { strategy_tag: super::strategy::StrategyTag::Lazy, window_log: BETTER_WINDOW_LOG, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 20, chain_log: 19, search_depth: 24, target_len: 64 }, row: ROW_CONFIG },
291 LevelParams { strategy_tag: super::strategy::StrategyTag::Lazy, window_log: BETTER_WINDOW_LOG, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 21, chain_log: 20, search_depth: 24, target_len: 64 }, row: ROW_CONFIG },
292 LevelParams { strategy_tag: super::strategy::StrategyTag::Lazy, window_log: 24, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 21, chain_log: 20, search_depth: 28, target_len: 96 }, row: ROW_CONFIG },
293 LevelParams { strategy_tag: super::strategy::StrategyTag::Lazy, window_log: 24, hash_fill_step: 1, lazy_depth: 2, hc: BEST_HC_CONFIG, row: ROW_CONFIG },
294 LevelParams { strategy_tag: super::strategy::StrategyTag::Lazy, window_log: 25, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 22, chain_log: 21, search_depth: 32, target_len: 128 }, row: ROW_CONFIG },
295 LevelParams { strategy_tag: super::strategy::StrategyTag::Lazy, window_log: 25, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 22, chain_log: 21, search_depth: 32, target_len: 160 }, row: ROW_CONFIG },
296 LevelParams { strategy_tag: super::strategy::StrategyTag::Lazy, window_log: 25, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 22, chain_log: 22, search_depth: 32, target_len: 192 }, row: ROW_CONFIG },
297 LevelParams { strategy_tag: super::strategy::StrategyTag::Lazy, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: HcConfig { hash_log: 23, chain_log: 22, search_depth: 32, target_len: 192 }, row: ROW_CONFIG },
298 LevelParams { strategy_tag: super::strategy::StrategyTag::BtOpt, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: BTOPT_HC_CONFIG, row: ROW_CONFIG },
299 LevelParams { strategy_tag: super::strategy::StrategyTag::BtOpt, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: BTOPT_HC_CONFIG, row: ROW_CONFIG },
300 LevelParams { strategy_tag: super::strategy::StrategyTag::BtUltra, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: BTULTRA_HC_CONFIG, row: ROW_CONFIG },
301 LevelParams { strategy_tag: super::strategy::StrategyTag::BtUltra, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: BTULTRA_HC_CONFIG, row: ROW_CONFIG },
302 LevelParams { strategy_tag: super::strategy::StrategyTag::BtUltra2, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: BTULTRA2_HC_CONFIG, row: ROW_CONFIG },
303 LevelParams { strategy_tag: super::strategy::StrategyTag::BtUltra2, window_log: 26, hash_fill_step: 1, lazy_depth: 2, hc: BTULTRA2_HC_CONFIG, row: ROW_CONFIG },
304 LevelParams { strategy_tag: super::strategy::StrategyTag::BtUltra2, window_log: 27, hash_fill_step: 1, lazy_depth: 2, hc: BTULTRA2_HC_CONFIG_L22, row: ROW_CONFIG },
305];
306
307pub(crate) const MIN_WINDOW_LOG: u8 = 10;
309const MIN_HINTED_WINDOW_LOG: u8 = 14;
315
316fn adjust_params_for_source_size(mut params: LevelParams, src_size: u64) -> LevelParams {
326 let src_log = if src_size == 0 {
331 MIN_WINDOW_LOG
332 } else {
333 (64 - (src_size - 1).leading_zeros()) as u8 };
335 let src_log = src_log.max(MIN_WINDOW_LOG).max(MIN_HINTED_WINDOW_LOG);
336 if src_log < params.window_log {
337 params.window_log = src_log;
338 }
339 let backend = params.backend();
343 if backend == super::strategy::BackendTag::HashChain {
344 if (src_log + 2) < params.hc.hash_log as u8 {
345 params.hc.hash_log = (src_log + 2) as usize;
346 }
347 if (src_log + 1) < params.hc.chain_log as u8 {
348 params.hc.chain_log = (src_log + 1) as usize;
349 }
350 } else if backend == super::strategy::BackendTag::Row {
351 let max_window_size = 1usize << params.window_log;
352 params.row.hash_bits = row_hash_bits_for_window(max_window_size);
353 }
354 params
355}
356
357fn level22_btultra2_params_for_source_size(source_size: Option<u64>) -> LevelParams {
358 let mut hc = match source_size {
359 Some(size) if size <= 16 * 1024 => BTULTRA2_HC_CONFIG_L22_16K,
360 Some(size) if size <= 128 * 1024 => BTULTRA2_HC_CONFIG_L22_128K,
361 Some(size) if size <= 256 * 1024 => BTULTRA2_HC_CONFIG_L22_256K,
362 _ => BTULTRA2_HC_CONFIG_L22,
363 };
364 let mut window_log = match source_size {
365 Some(size) if size <= 16 * 1024 => 14,
366 Some(size) if size <= 128 * 1024 => 17,
367 Some(size) if size <= 256 * 1024 => 18,
368 _ => 27,
369 };
370 if let Some(size) = source_size
371 && size > 256 * 1024
372 {
373 let src_log = if size == 0 {
374 MIN_WINDOW_LOG
375 } else {
376 (64 - (size - 1).leading_zeros()) as u8
377 };
378 window_log = window_log.min(src_log.max(MIN_WINDOW_LOG));
379 let adjusted_table_log = window_log as usize + 1;
380 hc.hash_log = hc.hash_log.min(adjusted_table_log);
381 hc.chain_log = hc.chain_log.min(adjusted_table_log);
382 }
383 LevelParams {
384 strategy_tag: super::strategy::StrategyTag::BtUltra2,
385 window_log,
386 hash_fill_step: 1,
387 lazy_depth: 2,
388 hc,
389 row: ROW_CONFIG,
390 }
391}
392
393fn resolve_level_params(level: CompressionLevel, source_size: Option<u64>) -> LevelParams {
396 if matches!(level, CompressionLevel::Level(22)) {
397 return level22_btultra2_params_for_source_size(source_size);
398 }
399 let params = match level {
400 CompressionLevel::Uncompressed => LevelParams {
401 strategy_tag: super::strategy::StrategyTag::Fast,
402 window_log: 17,
403 hash_fill_step: 1,
404 lazy_depth: 0,
405 hc: HC_CONFIG,
406 row: ROW_CONFIG,
407 },
408 CompressionLevel::Fastest => LEVEL_TABLE[0],
409 CompressionLevel::Default => LEVEL_TABLE[2],
410 CompressionLevel::Better => LEVEL_TABLE[6],
411 CompressionLevel::Best => LEVEL_TABLE[10],
412 CompressionLevel::Level(n) => {
413 if n > 0 {
414 let idx = (n as usize).min(CompressionLevel::MAX_LEVEL as usize) - 1;
415 LEVEL_TABLE[idx]
416 } else if n == 0 {
417 LEVEL_TABLE[CompressionLevel::DEFAULT_LEVEL as usize - 1]
419 } else {
420 let acceleration =
428 (n.saturating_abs() as usize).min((-CompressionLevel::MIN_LEVEL) as usize);
429 let step = (acceleration + 3).min(128);
430 LevelParams {
431 strategy_tag: super::strategy::StrategyTag::Fast,
432 window_log: 19,
433 hash_fill_step: step,
434 lazy_depth: 0,
435 hc: HC_CONFIG,
436 row: ROW_CONFIG,
437 }
438 }
439 }
440 };
441 if let Some(size) = source_size {
442 adjust_params_for_source_size(params, size)
443 } else {
444 params
445 }
446}
447
448enum MatcherStorage {
466 Simple(MatchGenerator),
473 Dfast(DfastMatchGenerator),
478 Row(RowMatchGenerator),
482 HashChain(HcMatchGenerator),
494}
495
496impl MatcherStorage {
497 fn backend(&self) -> super::strategy::BackendTag {
499 use super::strategy::BackendTag;
500 match self {
501 Self::Simple(_) => BackendTag::Simple,
502 Self::Dfast(_) => BackendTag::Dfast,
503 Self::Row(_) => BackendTag::Row,
504 Self::HashChain(_) => BackendTag::HashChain,
505 }
506 }
507}
508
509pub struct MatchGeneratorDriver {
511 vec_pool: Vec<Vec<u8>>,
512 suffix_pool: Vec<SuffixStore>,
513 storage: MatcherStorage,
520 strategy_tag: super::strategy::StrategyTag,
526 slice_size: usize,
527 base_slice_size: usize,
528 reported_window_size: usize,
531 dictionary_retained_budget: usize,
534 source_size_hint: Option<u64>,
536}
537
538impl MatchGeneratorDriver {
539 pub(crate) fn new(slice_size: usize, max_slices_in_window: usize) -> Self {
544 let max_window_size = max_slices_in_window * slice_size;
545 Self {
546 vec_pool: Vec::new(),
547 suffix_pool: Vec::new(),
548 storage: MatcherStorage::Simple(MatchGenerator::new(max_window_size)),
549 strategy_tag: super::strategy::StrategyTag::Fast,
550 slice_size,
551 base_slice_size: slice_size,
552 reported_window_size: max_window_size,
553 dictionary_retained_budget: 0,
554 source_size_hint: None,
555 }
556 }
557
558 fn level_params(level: CompressionLevel, source_size: Option<u64>) -> LevelParams {
559 resolve_level_params(level, source_size)
560 }
561
562 fn active_backend(&self) -> super::strategy::BackendTag {
565 self.storage.backend()
566 }
567
568 #[cfg(test)]
569 fn simple(&self) -> &MatchGenerator {
570 match &self.storage {
571 MatcherStorage::Simple(m) => m,
572 _ => panic!("simple backend must be initialized by reset() before use"),
573 }
574 }
575
576 fn simple_mut(&mut self) -> &mut MatchGenerator {
577 match &mut self.storage {
578 MatcherStorage::Simple(m) => m,
579 _ => panic!("simple backend must be initialized by reset() before use"),
580 }
581 }
582
583 #[cfg(test)]
584 fn dfast_matcher(&self) -> &DfastMatchGenerator {
585 match &self.storage {
586 MatcherStorage::Dfast(m) => m,
587 _ => panic!("dfast backend must be initialized by reset() before use"),
588 }
589 }
590
591 fn dfast_matcher_mut(&mut self) -> &mut DfastMatchGenerator {
592 match &mut self.storage {
593 MatcherStorage::Dfast(m) => m,
594 _ => panic!("dfast backend must be initialized by reset() before use"),
595 }
596 }
597
598 #[cfg(test)]
599 fn row_matcher(&self) -> &RowMatchGenerator {
600 match &self.storage {
601 MatcherStorage::Row(m) => m,
602 _ => panic!("row backend must be initialized by reset() before use"),
603 }
604 }
605
606 fn row_matcher_mut(&mut self) -> &mut RowMatchGenerator {
607 match &mut self.storage {
608 MatcherStorage::Row(m) => m,
609 _ => panic!("row backend must be initialized by reset() before use"),
610 }
611 }
612
613 #[cfg(test)]
614 fn hc_matcher(&self) -> &HcMatchGenerator {
615 match &self.storage {
616 MatcherStorage::HashChain(m) => m,
617 _ => panic!("hash chain backend must be initialized by reset() before use"),
618 }
619 }
620
621 fn hc_matcher_mut(&mut self) -> &mut HcMatchGenerator {
622 match &mut self.storage {
623 MatcherStorage::HashChain(m) => m,
624 _ => panic!("hash chain backend must be initialized by reset() before use"),
625 }
626 }
627
628 #[must_use]
637 fn retire_dictionary_budget(&mut self, evicted_bytes: usize) -> bool {
638 let reclaimed = evicted_bytes.min(self.dictionary_retained_budget);
639 if reclaimed == 0 {
640 return false;
641 }
642 self.dictionary_retained_budget -= reclaimed;
643 match self.active_backend() {
644 super::strategy::BackendTag::Simple => {
645 let matcher = self.simple_mut();
646 matcher.max_window_size = matcher.max_window_size.saturating_sub(reclaimed);
647 }
648 super::strategy::BackendTag::Dfast => {
649 let matcher = self.dfast_matcher_mut();
650 matcher.max_window_size = matcher.max_window_size.saturating_sub(reclaimed);
651 }
652 super::strategy::BackendTag::Row => {
653 let matcher = self.row_matcher_mut();
654 matcher.max_window_size = matcher.max_window_size.saturating_sub(reclaimed);
655 }
656 super::strategy::BackendTag::HashChain => {
657 let matcher = self.hc_matcher_mut();
658 matcher.table.max_window_size =
659 matcher.table.max_window_size.saturating_sub(reclaimed);
660 }
661 }
662 true
663 }
664
665 fn trim_after_budget_retire(&mut self) {
666 loop {
667 let mut evicted_bytes = 0usize;
668 match self.active_backend() {
669 super::strategy::BackendTag::Simple => {
670 let vec_pool = &mut self.vec_pool;
671 let suffix_pool = &mut self.suffix_pool;
672 let MatcherStorage::Simple(m) = &mut self.storage else {
673 unreachable!("active_backend() == Simple proven above");
674 };
675 m.reserve(0, |mut data, mut suffixes| {
676 evicted_bytes += data.len();
677 data.resize(data.capacity(), 0);
678 vec_pool.push(data);
679 suffixes.slots.clear();
680 suffixes.slots.resize(suffixes.slots.capacity(), None);
681 suffix_pool.push(suffixes);
682 });
683 }
684 super::strategy::BackendTag::Dfast => {
685 let dfast = self.dfast_matcher_mut();
694 let pre = dfast.window_size;
695 dfast.trim_to_window();
696 evicted_bytes += pre - dfast.window_size;
697 }
698 super::strategy::BackendTag::Row => {
699 let mut retired = Vec::new();
700 self.row_matcher_mut().trim_to_window(|data| {
701 evicted_bytes += data.len();
702 retired.push(data);
703 });
704 for mut data in retired {
705 data.resize(data.capacity(), 0);
706 self.vec_pool.push(data);
707 }
708 }
709 super::strategy::BackendTag::HashChain => {
710 let mut retired = Vec::new();
711 self.hc_matcher_mut().table.trim_to_window(|data| {
712 evicted_bytes += data.len();
713 retired.push(data);
714 });
715 for mut data in retired {
716 data.resize(data.capacity(), 0);
717 self.vec_pool.push(data);
718 }
719 }
720 }
721 if evicted_bytes == 0 {
722 break;
723 }
724 let _ = self.retire_dictionary_budget(evicted_bytes);
738 }
739 }
740
741 fn skip_matching_for_dictionary_priming(&mut self) {
742 match self.active_backend() {
743 super::strategy::BackendTag::Simple => {
744 self.simple_mut().skip_matching_with_hint(Some(false))
745 }
746 super::strategy::BackendTag::Dfast => self.dfast_matcher_mut().skip_matching_dense(),
747 super::strategy::BackendTag::Row => {
748 self.row_matcher_mut().skip_matching_with_hint(Some(false))
749 }
750 super::strategy::BackendTag::HashChain => {
751 self.hc_matcher_mut().skip_matching(Some(false))
752 }
753 }
754 }
755}
756
757impl Matcher for MatchGeneratorDriver {
758 fn supports_dictionary_priming(&self) -> bool {
759 true
760 }
761
762 fn set_source_size_hint(&mut self, size: u64) {
763 self.source_size_hint = Some(size);
764 }
765
766 fn reset(&mut self, level: CompressionLevel) {
767 let hint = self.source_size_hint.take();
768 let hinted = hint.is_some();
769 let params = Self::level_params(level, hint);
770 let next_backend = params.backend();
771 let max_window_size = 1usize << params.window_log;
772 self.dictionary_retained_budget = 0;
773 if self.active_backend() != next_backend {
774 match &mut self.storage {
780 MatcherStorage::Simple(m) => {
781 let vec_pool = &mut self.vec_pool;
782 let suffix_pool = &mut self.suffix_pool;
783 m.reset(|mut data, mut suffixes| {
784 data.resize(data.capacity(), 0);
785 vec_pool.push(data);
786 suffixes.slots.clear();
787 suffixes.slots.resize(suffixes.slots.capacity(), None);
788 suffix_pool.push(suffixes);
789 });
790 }
791 MatcherStorage::Dfast(m) => {
792 m.short_hash = Vec::new();
805 m.long_hash = Vec::new();
806 m.reset();
807 }
808 MatcherStorage::Row(m) => {
809 m.row_heads = Vec::new();
810 m.row_positions = Vec::new();
811 m.row_tags = Vec::new();
812 let vec_pool = &mut self.vec_pool;
813 m.reset(|mut data| {
814 data.resize(data.capacity(), 0);
815 vec_pool.push(data);
816 });
817 }
818 MatcherStorage::HashChain(m) => {
819 m.table.hash_table = Vec::new();
827 m.table.chain_table = Vec::new();
828 m.table.hash3_table = Vec::new();
829 let vec_pool = &mut self.vec_pool;
830 m.reset(|mut data| {
831 data.resize(data.capacity(), 0);
832 vec_pool.push(data);
833 });
834 }
835 }
836 self.storage = match next_backend {
839 super::strategy::BackendTag::Simple => {
840 MatcherStorage::Simple(MatchGenerator::new(max_window_size))
841 }
842 super::strategy::BackendTag::Dfast => {
843 MatcherStorage::Dfast(DfastMatchGenerator::new(max_window_size))
844 }
845 super::strategy::BackendTag::Row => {
846 MatcherStorage::Row(RowMatchGenerator::new(max_window_size))
847 }
848 super::strategy::BackendTag::HashChain => {
849 MatcherStorage::HashChain(HcMatchGenerator::new(max_window_size))
850 }
851 };
852 }
853
854 self.strategy_tag = params.strategy_tag;
860 self.slice_size = self.base_slice_size.min(max_window_size);
861 self.reported_window_size = max_window_size;
862 let strategy_tag = self.strategy_tag;
863 match &mut self.storage {
864 MatcherStorage::Simple(m) => {
865 let vec_pool = &mut self.vec_pool;
866 let suffix_pool = &mut self.suffix_pool;
867 m.max_window_size = max_window_size;
868 m.hash_fill_step = params.hash_fill_step;
869 m.reset(|mut data, mut suffixes| {
870 data.resize(data.capacity(), 0);
871 vec_pool.push(data);
872 suffixes.slots.clear();
873 suffixes.slots.resize(suffixes.slots.capacity(), None);
874 suffix_pool.push(suffixes);
875 });
876 }
877 MatcherStorage::Dfast(dfast) => {
878 dfast.max_window_size = max_window_size;
879 dfast.lazy_depth = params.lazy_depth;
880 dfast.use_fast_loop = matches!(
881 level,
882 CompressionLevel::Default
883 | CompressionLevel::Level(0)
884 | CompressionLevel::Level(3)
885 );
886 dfast.set_hash_bits(if hinted {
887 dfast_hash_bits_for_window(max_window_size)
888 } else {
889 DFAST_HASH_BITS
890 });
891 dfast.reset();
895 }
896 MatcherStorage::Row(row) => {
897 row.max_window_size = max_window_size;
898 row.lazy_depth = params.lazy_depth;
899 row.configure(params.row);
900 if hinted {
901 row.set_hash_bits(row_hash_bits_for_window(max_window_size));
902 }
903 let vec_pool = &mut self.vec_pool;
904 row.reset(|mut data| {
905 data.resize(data.capacity(), 0);
906 vec_pool.push(data);
907 });
908 }
909 MatcherStorage::HashChain(hc) => {
910 hc.table.max_window_size = max_window_size;
911 hc.hc.lazy_depth = params.lazy_depth;
912 hc.configure(params.hc, strategy_tag, params.window_log);
913 let vec_pool = &mut self.vec_pool;
914 hc.reset(|mut data| {
915 data.resize(data.capacity(), 0);
916 vec_pool.push(data);
917 });
918 }
919 }
920 }
921
922 fn prime_with_dictionary(&mut self, dict_content: &[u8], offset_hist: [u32; 3]) {
923 match self.active_backend() {
924 super::strategy::BackendTag::Simple => self.simple_mut().offset_hist = offset_hist,
925 super::strategy::BackendTag::Dfast => {
926 self.dfast_matcher_mut().offset_hist = offset_hist
927 }
928 super::strategy::BackendTag::Row => self.row_matcher_mut().offset_hist = offset_hist,
929 super::strategy::BackendTag::HashChain => {
930 let matcher = self.hc_matcher_mut();
931 matcher.table.offset_hist = offset_hist;
932 matcher.table.mark_dictionary_primed();
933 }
934 }
935
936 if dict_content.is_empty() {
937 return;
938 }
939
940 let retained_dict_budget = dict_content.len();
943 match self.active_backend() {
944 super::strategy::BackendTag::Simple => {
945 let matcher = self.simple_mut();
946 matcher.max_window_size =
947 matcher.max_window_size.saturating_add(retained_dict_budget);
948 }
949 super::strategy::BackendTag::Dfast => {
950 let matcher = self.dfast_matcher_mut();
951 matcher.max_window_size =
952 matcher.max_window_size.saturating_add(retained_dict_budget);
953 }
954 super::strategy::BackendTag::Row => {
955 let matcher = self.row_matcher_mut();
956 matcher.max_window_size =
957 matcher.max_window_size.saturating_add(retained_dict_budget);
958 }
959 super::strategy::BackendTag::HashChain => {
960 let matcher = self.hc_matcher_mut();
961 matcher.table.max_window_size = matcher
962 .table
963 .max_window_size
964 .saturating_add(retained_dict_budget);
965 }
966 }
967
968 let mut start = 0usize;
969 let mut committed_dict_budget = 0usize;
970 let min_primed_tail = match self.active_backend() {
974 super::strategy::BackendTag::Simple => MIN_MATCH_LEN,
975 super::strategy::BackendTag::Dfast
976 | super::strategy::BackendTag::Row
977 | super::strategy::BackendTag::HashChain => 4,
978 };
979 while start < dict_content.len() {
980 let end = (start + self.slice_size).min(dict_content.len());
981 if end - start < min_primed_tail {
982 break;
983 }
984 let mut space = self.get_next_space();
985 space.clear();
986 space.extend_from_slice(&dict_content[start..end]);
987 self.commit_space(space);
988 self.skip_matching_for_dictionary_priming();
989 committed_dict_budget += end - start;
990 start = end;
991 }
992
993 let uncommitted_tail_budget = retained_dict_budget.saturating_sub(committed_dict_budget);
994 if uncommitted_tail_budget > 0 {
995 match self.active_backend() {
996 super::strategy::BackendTag::Simple => {
997 let matcher = self.simple_mut();
998 matcher.max_window_size = matcher
999 .max_window_size
1000 .saturating_sub(uncommitted_tail_budget);
1001 }
1002 super::strategy::BackendTag::Dfast => {
1003 let matcher = self.dfast_matcher_mut();
1004 matcher.max_window_size = matcher
1005 .max_window_size
1006 .saturating_sub(uncommitted_tail_budget);
1007 }
1008 super::strategy::BackendTag::Row => {
1009 let matcher = self.row_matcher_mut();
1010 matcher.max_window_size = matcher
1011 .max_window_size
1012 .saturating_sub(uncommitted_tail_budget);
1013 }
1014 super::strategy::BackendTag::HashChain => {
1015 let matcher = self.hc_matcher_mut();
1016 matcher.table.max_window_size = matcher
1017 .table
1018 .max_window_size
1019 .saturating_sub(uncommitted_tail_budget);
1020 }
1021 }
1022 }
1023 if committed_dict_budget > 0 {
1024 self.dictionary_retained_budget = self
1025 .dictionary_retained_budget
1026 .saturating_add(committed_dict_budget);
1027 }
1028 if self.active_backend() == super::strategy::BackendTag::HashChain {
1029 self.hc_matcher_mut()
1030 .table
1031 .set_dictionary_limit_from_primed_bytes(committed_dict_budget);
1032 }
1033 }
1034
1035 fn seed_dictionary_entropy(
1036 &mut self,
1037 huff: Option<&crate::huff0::huff0_encoder::HuffmanTable>,
1038 ll: Option<&crate::fse::fse_encoder::FSETable>,
1039 ml: Option<&crate::fse::fse_encoder::FSETable>,
1040 of: Option<&crate::fse::fse_encoder::FSETable>,
1041 ) {
1042 if self.active_backend() == super::strategy::BackendTag::HashChain {
1043 self.hc_matcher_mut()
1044 .seed_dictionary_entropy(huff, ll, ml, of);
1045 }
1046 }
1047
1048 fn window_size(&self) -> u64 {
1049 self.reported_window_size as u64
1050 }
1051
1052 fn get_next_space(&mut self) -> Vec<u8> {
1053 if let Some(mut space) = self.vec_pool.pop() {
1054 if space.len() > self.slice_size {
1055 space.truncate(self.slice_size);
1056 }
1057 if space.len() < self.slice_size {
1058 space.resize(self.slice_size, 0);
1059 }
1060 return space;
1061 }
1062 alloc::vec![0; self.slice_size]
1063 }
1064
1065 fn get_last_space(&mut self) -> &[u8] {
1066 match &self.storage {
1067 MatcherStorage::Simple(m) => m.window.last().unwrap().data.as_slice(),
1068 MatcherStorage::Dfast(m) => m.get_last_space(),
1069 MatcherStorage::Row(m) => m.get_last_space(),
1070 MatcherStorage::HashChain(m) => m.table.get_last_space(),
1071 }
1072 }
1073
1074 fn commit_space(&mut self, space: Vec<u8>) {
1075 let mut evicted_bytes = 0usize;
1076 let vec_pool = &mut self.vec_pool;
1080 let suffix_pool = &mut self.suffix_pool;
1081 match &mut self.storage {
1082 MatcherStorage::Simple(m) => {
1083 let suffixes = match suffix_pool.pop() {
1084 Some(store) if store.slots.len() >= space.len() => store,
1085 _ => SuffixStore::with_capacity(space.len()),
1086 };
1087 m.add_data(space, suffixes, |mut data, mut suffixes| {
1088 evicted_bytes += data.len();
1089 data.resize(data.capacity(), 0);
1090 vec_pool.push(data);
1091 suffixes.slots.clear();
1092 suffixes.slots.resize(suffixes.slots.capacity(), None);
1093 suffix_pool.push(suffixes);
1094 });
1095 }
1096 MatcherStorage::Dfast(m) => {
1097 let pre = m.window_size;
1119 let space_len = space.len();
1120 m.add_data(space, |mut data| {
1121 data.resize(data.capacity(), 0);
1122 vec_pool.push(data);
1123 });
1124 evicted_bytes += pre.saturating_add(space_len).saturating_sub(m.window_size);
1125 }
1126 MatcherStorage::Row(m) => {
1127 m.add_data(space, |mut data| {
1128 evicted_bytes += data.len();
1129 data.resize(data.capacity(), 0);
1130 vec_pool.push(data);
1131 });
1132 }
1133 MatcherStorage::HashChain(m) => {
1134 m.table.add_data(space, |mut data| {
1135 evicted_bytes += data.len();
1136 data.resize(data.capacity(), 0);
1137 vec_pool.push(data);
1138 });
1139 }
1140 }
1141 if self.retire_dictionary_budget(evicted_bytes) {
1151 self.trim_after_budget_retire();
1152 }
1153 }
1154
1155 fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
1156 use super::strategy::{self, StrategyTag};
1157 match self.strategy_tag {
1166 StrategyTag::Fast => self.compress_block::<strategy::Fast>(&mut handle_sequence),
1167 StrategyTag::Dfast => self.compress_block::<strategy::Dfast>(&mut handle_sequence),
1168 StrategyTag::Greedy => self.compress_block::<strategy::Greedy>(&mut handle_sequence),
1169 StrategyTag::Lazy => self.compress_block::<strategy::Lazy>(&mut handle_sequence),
1170 StrategyTag::BtOpt => self.compress_block::<strategy::BtOpt>(&mut handle_sequence),
1171 StrategyTag::BtUltra => self.compress_block::<strategy::BtUltra>(&mut handle_sequence),
1172 StrategyTag::BtUltra2 => {
1173 self.compress_block::<strategy::BtUltra2>(&mut handle_sequence)
1174 }
1175 }
1176 }
1177
1178 fn skip_matching(&mut self) {
1179 self.skip_matching_with_hint(None);
1180 }
1181
1182 fn skip_matching_with_hint(&mut self, incompressible_hint: Option<bool>) {
1183 match self.active_backend() {
1184 super::strategy::BackendTag::Simple => self
1185 .simple_mut()
1186 .skip_matching_with_hint(incompressible_hint),
1187 super::strategy::BackendTag::Dfast => {
1188 self.dfast_matcher_mut().skip_matching(incompressible_hint)
1189 }
1190 super::strategy::BackendTag::Row => self
1191 .row_matcher_mut()
1192 .skip_matching_with_hint(incompressible_hint),
1193 super::strategy::BackendTag::HashChain => {
1194 self.hc_matcher_mut().skip_matching(incompressible_hint)
1195 }
1196 }
1197 }
1198}
1199
1200impl MatchGeneratorDriver {
1201 fn compress_block<S: super::strategy::Strategy>(
1210 &mut self,
1211 handle_sequence: &mut impl for<'a> FnMut(Sequence<'a>),
1212 ) {
1213 use super::strategy::BackendTag;
1214 match S::BACKEND {
1215 BackendTag::Simple => {
1216 let matcher = self.simple_mut();
1224 while matcher.next_sequence(&mut *handle_sequence) {}
1225 }
1226 BackendTag::Dfast => self.dfast_matcher_mut().start_matching(handle_sequence),
1227 BackendTag::Row => {
1228 let matcher = self.row_matcher_mut();
1246 debug_assert_eq!(
1247 matcher.lazy_depth, 0,
1248 "Row backend currently expects lazy_depth == 0 (donor-greedy); \
1249 wire a depth-aware dispatch before routing lazy levels here",
1250 );
1251 matcher.start_matching_greedy(handle_sequence);
1252 }
1253 BackendTag::HashChain => self
1254 .hc_matcher_mut()
1255 .start_matching_strategy::<S>(handle_sequence),
1256 }
1257 }
1258}
1259
1260pub(crate) enum HcBackend {
1274 Hc,
1276 Bt(alloc::boxed::Box<super::bt::BtMatcher>),
1280}
1281
1282impl HcBackend {
1283 #[inline(always)]
1290 pub(crate) fn bt_mut(&mut self) -> &mut super::bt::BtMatcher {
1291 match self {
1292 Self::Bt(bt) => bt,
1293 Self::Hc => unreachable!("BT-only accessor called in HC mode"),
1294 }
1295 }
1296}
1297
1298struct HcMatchGenerator {
1299 table: super::match_table::storage::MatchTable,
1304 hc: super::hc::HcMatcher,
1308 backend: HcBackend,
1313 strategy_tag: super::strategy::StrategyTag,
1325}
1326
1327macro_rules! bt_insert_step_no_rebase_body {
1343 ($table:expr, $search_depth:expr, $abs_pos:ident, $current_abs_end:ident, $target_abs:ident, $cmf:path) => {{
1344 let idx = $abs_pos - $table.history_abs_start;
1345 let concat = &$table.history[$table.history_start..];
1346 if idx + 8 > concat.len() {
1347 return 1;
1348 }
1349 debug_assert!(
1350 $abs_pos <= $current_abs_end,
1351 "BT walker called past current block end"
1352 );
1353 let tail_limit = $current_abs_end - $abs_pos;
1354 let hash = $crate::encoding::match_table::storage::MatchTable::hash_position_at(
1355 concat,
1356 idx,
1357 $table.hash_log,
1358 $crate::encoding::bt::BtMatcher::HASH_MLS,
1359 );
1360 let Some(relative_pos) = $table.relative_position($abs_pos) else {
1361 return 1;
1362 };
1363 let stored = relative_pos + 1;
1364 let bt_mask = $table.bt_mask();
1365 let bt_low = $abs_pos.saturating_sub(bt_mask);
1371 let window_low = $table.window_low_abs_for_target($target_abs);
1372 let mut match_end_abs = $abs_pos + 9;
1381 let mut best_len = 8usize;
1382 let mut compares_left = $search_depth;
1383 let mut common_length_smaller = 0usize;
1384 let mut common_length_larger = 0usize;
1385 let pair_idx = $table.bt_pair_index_for_abs($abs_pos);
1386 let mut smaller_slot = pair_idx;
1387 let mut larger_slot = pair_idx + 1;
1388 let mut match_stored = $table.hash_table[hash];
1389 $table.hash_table[hash] = stored;
1390
1391 while compares_left > 0 {
1392 let Some(candidate_abs) =
1393 $crate::encoding::match_table::storage::MatchTable::stored_abs_position_fast(
1394 match_stored,
1395 $table.position_base,
1396 $table.index_shift,
1397 )
1398 else {
1399 break;
1400 };
1401 if candidate_abs < window_low || candidate_abs >= $abs_pos {
1402 break;
1403 }
1404 compares_left -= 1;
1405
1406 let next_pair_idx = $table.bt_pair_index_for_abs(candidate_abs);
1407 let next_smaller = $table.chain_table[next_pair_idx];
1408 let next_larger = $table.chain_table[next_pair_idx + 1];
1409 let seed_len = common_length_smaller.min(common_length_larger);
1410 let candidate_idx = candidate_abs - $table.history_abs_start;
1411 let match_len = unsafe { $cmf(concat, idx, candidate_idx, tail_limit, seed_len) };
1416
1417 if match_len > best_len {
1418 best_len = match_len;
1419 let candidate_end = candidate_abs + match_len;
1423 if candidate_end > match_end_abs {
1424 match_end_abs = candidate_end;
1425 }
1426 }
1427
1428 if match_len >= tail_limit {
1429 break;
1430 }
1431
1432 let candidate_next = candidate_idx + match_len;
1433 let current_next = idx + match_len;
1434 if concat[candidate_next] < concat[current_next] {
1435 $table.chain_table[smaller_slot] = match_stored;
1436 common_length_smaller = match_len;
1437 if candidate_abs <= bt_low {
1438 smaller_slot = usize::MAX;
1439 break;
1440 }
1441 smaller_slot = next_pair_idx + 1;
1442 match_stored = next_larger;
1443 } else {
1444 $table.chain_table[larger_slot] = match_stored;
1445 common_length_larger = match_len;
1446 if candidate_abs <= bt_low {
1447 larger_slot = usize::MAX;
1448 break;
1449 }
1450 larger_slot = next_pair_idx;
1451 match_stored = next_smaller;
1452 }
1453 }
1454
1455 if smaller_slot != usize::MAX {
1456 $table.chain_table[smaller_slot] = $crate::encoding::match_table::storage::HC_EMPTY;
1457 }
1458 if larger_slot != usize::MAX {
1459 $table.chain_table[larger_slot] = $crate::encoding::match_table::storage::HC_EMPTY;
1460 }
1461
1462 let speed_positions = if best_len > 384 {
1463 (best_len - 384).min(192)
1464 } else {
1465 0
1466 };
1467 speed_positions.max(match_end_abs - ($abs_pos + 8))
1477 }};
1478}
1479pub(crate) use bt_insert_step_no_rebase_body;
1480
1481macro_rules! build_optimal_plan_impl_body {
1496 (
1497 $self:expr,
1498 $strategy_ty:ty,
1499 $current:ident,
1500 $current_abs_start:ident,
1501 $current_len:ident,
1502 $initial_state:ident,
1503 $stats:ident,
1504 $out:ident,
1505 $collect:ident $(,)?
1506 ) => {{
1507 let current_abs_end = $current_abs_start + $current_len;
1508 let min_match_len = HC_OPT_MIN_MATCH_LEN;
1509 let frontier_limit = $current_len.min(HC_OPT_NUM - 1);
1511 let initial_reps = $initial_state.reps;
1512 let initial_litlen = $initial_state.litlen;
1513 let mut profile = $initial_state.profile;
1514 profile.sufficient_match_len = $self.hc.sufficient_match_len_for_pass(profile);
1515 debug_assert!(
1527 <$strategy_ty as super::strategy::Strategy>::USE_BT,
1528 "build_optimal_plan_impl_body called on non-BT strategy"
1529 );
1530 let abort_on_worse_match: bool =
1531 <$strategy_ty as super::strategy::Strategy>::OPT_LEVEL == 0;
1532 let opt_level: bool = <$strategy_ty as super::strategy::Strategy>::OPT_LEVEL >= 2;
1533 let mut nodes = core::mem::take(&mut $self.backend.bt_mut().opt_nodes_scratch);
1534 let frontier_buffer_size = frontier_limit + 2;
1536 if nodes.len() < frontier_buffer_size {
1537 nodes.resize(frontier_buffer_size, HcOptimalNode::default());
1538 }
1539 let mut candidates = core::mem::take(&mut $self.backend.bt_mut().opt_candidates_scratch);
1540 candidates.clear();
1541 if candidates.capacity() < MAX_HC_SEARCH_DEPTH {
1542 candidates.reserve_exact(MAX_HC_SEARCH_DEPTH - candidates.capacity());
1543 }
1544 let mut store = core::mem::take(&mut $self.backend.bt_mut().opt_store_scratch);
1545 store.clear();
1546 let mut ll_prices = core::mem::take(&mut $self.backend.bt_mut().opt_ll_price_scratch);
1547 let mut ll_price_generations =
1548 core::mem::take(&mut $self.backend.bt_mut().opt_ll_price_generation);
1549 if ll_prices.len() <= frontier_limit {
1550 ll_prices.resize(frontier_limit + 1, 0);
1551 ll_price_generations.resize(frontier_limit + 1, 0);
1552 }
1553 $self.backend.bt_mut().opt_ll_price_stamp = $self
1554 .backend
1555 .bt_mut()
1556 .opt_ll_price_stamp
1557 .wrapping_add(1)
1558 .max(1);
1559 let ll_price_stamp = $self.backend.bt_mut().opt_ll_price_stamp;
1560 $self.backend.bt_mut().opt_lit_price_stamp = $self
1561 .backend
1562 .bt_mut()
1563 .opt_lit_price_stamp
1564 .wrapping_add(1)
1565 .max(1);
1566 let lit_price_stamp = $self.backend.bt_mut().opt_lit_price_stamp;
1567 let mut ml_prices = core::mem::take(&mut $self.backend.bt_mut().opt_ml_price_scratch);
1568 let mut ml_price_generations =
1569 core::mem::take(&mut $self.backend.bt_mut().opt_ml_price_generation);
1570 if ml_prices.len() <= frontier_limit {
1571 ml_prices.resize(frontier_limit + 1, 0);
1572 ml_price_generations.resize(frontier_limit + 1, 0);
1573 }
1574 $self.backend.bt_mut().opt_ml_price_stamp = $self
1575 .backend
1576 .bt_mut()
1577 .opt_ml_price_stamp
1578 .wrapping_add(1)
1579 .max(1);
1580 let ml_price_stamp = $self.backend.bt_mut().opt_ml_price_stamp;
1581 nodes[0] = HcOptimalNode {
1582 price: BtMatcher::cached_lit_length_price(
1583 profile,
1584 $stats,
1585 initial_litlen,
1586 &mut ll_prices,
1587 &mut ll_price_generations,
1588 ll_price_stamp,
1589 ),
1590 litlen: initial_litlen as u32,
1591 reps: initial_reps,
1592 ..HcOptimalNode::default()
1593 };
1594 let sufficient_len = profile.sufficient_match_len;
1595 let ll0_price = BtMatcher::cached_lit_length_price(
1596 profile,
1597 $stats,
1598 0,
1599 &mut ll_prices,
1600 &mut ll_price_generations,
1601 ll_price_stamp,
1602 );
1603 let ll1_price = BtMatcher::cached_lit_length_price(
1604 profile,
1605 $stats,
1606 1,
1607 &mut ll_prices,
1608 &mut ll_price_generations,
1609 ll_price_stamp,
1610 );
1611 let mut pos = 1usize;
1612 let mut last_pos = 0usize;
1613 let mut forced_end: Option<usize> = None;
1614 let mut forced_end_state: Option<HcOptimalNode> = None;
1615 let mut seed_forced_shortest_path = false;
1616 let mut opt_ldm = HcOptLdmState {
1617 seq_store: HcRawSeqStore {
1618 pos: 0,
1619 pos_in_sequence: 0,
1620 size: $self.backend.bt_mut().ldm_sequences.len(),
1621 },
1622 ..HcOptLdmState::default()
1623 };
1624 let has_ldm = !$self.backend.bt_mut().ldm_sequences.is_empty();
1625 if has_ldm {
1626 $self
1627 .backend
1628 .bt_mut()
1629 .ldm_get_next_match_and_update_seq_store(&mut opt_ldm, 0, $current_len);
1630 }
1631
1632 if $current_len >= min_match_len {
1635 let seed_ldm = if has_ldm {
1636 $self.backend.bt_mut().ldm_process_match_candidate(
1637 &mut opt_ldm,
1638 0,
1639 $current_len,
1640 min_match_len,
1641 )
1642 } else {
1643 None
1644 };
1645 candidates.clear();
1646 unsafe {
1650 $self.$collect::<$strategy_ty, true>(
1651 $current_abs_start,
1652 current_abs_end,
1653 profile,
1654 HcCandidateQuery {
1655 reps: initial_reps,
1656 lit_len: initial_litlen,
1657 ldm_candidate: seed_ldm,
1658 },
1659 &mut candidates,
1660 )
1661 };
1662 if !candidates.is_empty() {
1663 last_pos = (min_match_len - 1).min(frontier_limit);
1665 for p in 1..min_match_len.min(nodes.len()) {
1666 BtMatcher::reset_opt_node(&mut nodes[p]);
1667 let seed_litlen = initial_litlen
1677 .checked_add(p)
1678 .and_then(|s| u32::try_from(s).ok())
1679 .expect("optimal parser seed litlen out of u32 range");
1680 nodes[p].litlen = seed_litlen;
1681 }
1682 }
1683
1684 if let Some(candidate) = candidates.last() {
1685 let longest_len = candidate.match_len.min($current_len);
1686 if longest_len > sufficient_len {
1687 let off_base = BtMatcher::encode_offset_base_with_reps(
1688 candidate.offset as u32,
1689 initial_litlen,
1690 initial_reps,
1691 );
1692 let off_price = profile
1693 .offset_price_for::<ACCURATE_PRICE, FAVOR_SMALL_OFFSETS>($stats, off_base);
1694 let ml_price = BtMatcher::cached_match_length_price(
1695 profile,
1696 $stats,
1697 longest_len,
1698 &mut ml_prices,
1699 &mut ml_price_generations,
1700 ml_price_stamp,
1701 );
1702 let seq_cost = BtMatcher::add_prices(
1703 ll0_price,
1704 profile.match_price_from_parts(off_price, ml_price, $stats),
1705 );
1706 let forced_price = BtMatcher::add_prices(nodes[0].price, seq_cost);
1707 let forced_state = HcOptimalNode {
1708 price: forced_price,
1709 off: candidate.offset as u32,
1710 mlen: longest_len as u32,
1711 litlen: 0,
1712 reps: initial_reps,
1713 };
1714 if longest_len < nodes.len() && forced_price < nodes[longest_len].price {
1715 nodes[longest_len] = forced_state;
1716 }
1717 forced_end = Some(longest_len);
1718 forced_end_state = Some(forced_state);
1719 seed_forced_shortest_path = true;
1720 }
1721 }
1722 if !seed_forced_shortest_path {
1723 let mut prev_max_len = min_match_len - 1;
1724 for candidate in candidates.iter() {
1725 let max_match_len = candidate.match_len.min(frontier_limit);
1726 if max_match_len < min_match_len {
1727 continue;
1728 }
1729 let start_len = (prev_max_len + 1).max(min_match_len);
1730 if start_len > max_match_len {
1731 prev_max_len = prev_max_len.max(max_match_len);
1732 continue;
1733 }
1734 if max_match_len > last_pos {
1735 BtMatcher::reset_opt_nodes(&mut nodes, last_pos + 1, max_match_len);
1736 }
1737 let off_base = BtMatcher::encode_offset_base_with_reps(
1738 candidate.offset as u32,
1739 initial_litlen,
1740 initial_reps,
1741 );
1742 let off_price = profile
1743 .offset_price_for::<ACCURATE_PRICE, FAVOR_SMALL_OFFSETS>($stats, off_base);
1744 debug_assert!(max_match_len < nodes.len());
1745 let nodes0_price = nodes[0].price;
1746 for match_len in (start_len..=max_match_len).rev() {
1747 let ml_price = BtMatcher::cached_match_length_price(
1748 profile,
1749 $stats,
1750 match_len,
1751 &mut ml_prices,
1752 &mut ml_price_generations,
1753 ml_price_stamp,
1754 );
1755 let seq_cost = BtMatcher::add_prices(
1756 ll0_price,
1757 profile.match_price_from_parts(off_price, ml_price, $stats),
1758 );
1759 let next_cost = BtMatcher::add_prices(nodes0_price, seq_cost);
1760 let node_price = unsafe { nodes.get_unchecked(match_len).price };
1761 if match_len > last_pos || next_cost < node_price {
1762 let slot = unsafe { nodes.get_unchecked_mut(match_len) };
1763 *slot = HcOptimalNode {
1764 price: next_cost,
1765 off: candidate.offset as u32,
1766 mlen: match_len as u32,
1767 litlen: 0,
1768 reps: initial_reps,
1769 };
1770 if match_len > last_pos {
1771 last_pos = match_len;
1772 }
1773 } else if abort_on_worse_match {
1774 break;
1775 }
1776 }
1777 prev_max_len = prev_max_len.max(max_match_len);
1778 }
1779 if last_pos + 1 < nodes.len() {
1780 nodes[last_pos + 1].price = u32::MAX;
1781 }
1782 }
1783 }
1784 while !seed_forced_shortest_path && pos <= last_pos && pos <= frontier_limit {
1785 debug_assert!(pos + 1 < nodes.len());
1786 let prev_node = unsafe { *nodes.get_unchecked(pos - 1) };
1787 if prev_node.price != u32::MAX {
1788 let lit_len = prev_node.litlen as usize + 1;
1789 let lit_price = {
1790 let bt = $self.backend.bt_mut();
1791 BtMatcher::cached_literal_price(
1792 profile,
1793 $stats,
1794 $current[pos - 1],
1795 &mut bt.opt_lit_price_scratch,
1796 &mut bt.opt_lit_price_generation,
1797 lit_price_stamp,
1798 )
1799 };
1800 let ll_delta = BtMatcher::cached_lit_length_delta_price(
1801 profile,
1802 $stats,
1803 lit_len,
1804 &mut ll_prices,
1805 &mut ll_price_generations,
1806 ll_price_stamp,
1807 );
1808 let lit_cost = BtMatcher::add_price_delta(prev_node.price, lit_price, ll_delta);
1809 let node_pos_price = unsafe { nodes.get_unchecked(pos).price };
1810 if lit_cost <= node_pos_price {
1811 let prev_match = unsafe { *nodes.get_unchecked(pos) };
1812 let slot = unsafe { nodes.get_unchecked_mut(pos) };
1813 *slot = prev_node;
1814 slot.litlen = lit_len as u32;
1815 slot.price = lit_cost;
1816 #[allow(clippy::collapsible_if)]
1817 if opt_level
1818 && prev_match.mlen > 0
1819 && prev_match.litlen == 0
1820 && pos < $current_len
1821 {
1822 if ll1_price < ll0_price {
1823 let next_lit_price = {
1824 let bt = $self.backend.bt_mut();
1825 BtMatcher::cached_literal_price(
1826 profile,
1827 $stats,
1828 $current[pos],
1829 &mut bt.opt_lit_price_scratch,
1830 &mut bt.opt_lit_price_generation,
1831 lit_price_stamp,
1832 )
1833 };
1834 let with1literal = BtMatcher::add_price_delta(
1835 prev_match.price,
1836 next_lit_price,
1837 ll1_price as i32 - ll0_price as i32,
1838 );
1839 let ll_delta_next = BtMatcher::cached_lit_length_delta_price(
1840 profile,
1841 $stats,
1842 lit_len + 1,
1843 &mut ll_prices,
1844 &mut ll_price_generations,
1845 ll_price_stamp,
1846 );
1847 let with_more_literals =
1848 BtMatcher::add_price_delta(lit_cost, next_lit_price, ll_delta_next);
1849 let next = pos + 1;
1850 let next_price = unsafe { nodes.get_unchecked(next).price };
1851 if with1literal < with_more_literals && with1literal < next_price {
1852 debug_assert!(pos >= prev_match.mlen as usize);
1854 let prev_pos = pos - prev_match.mlen as usize;
1855 {
1856 let prev_state = unsafe { *nodes.get_unchecked(prev_pos) };
1857 let (_, reps_after_match) = BtMatcher::encode_offset_with_reps(
1858 prev_match.off,
1859 prev_state.litlen as usize,
1860 prev_state.reps,
1861 );
1862 let slot = unsafe { nodes.get_unchecked_mut(next) };
1863 *slot = prev_match;
1864 slot.reps = reps_after_match;
1865 slot.litlen = 1;
1866 slot.price = with1literal;
1867 if next > last_pos {
1868 last_pos = next;
1869 }
1870 }
1871 }
1872 }
1873 }
1874 }
1875 }
1876
1877 let mut base_node = unsafe { *nodes.get_unchecked(pos) };
1878 if base_node.price == u32::MAX {
1879 pos += 1;
1880 continue;
1881 }
1882 if base_node.mlen > 0 && base_node.litlen == 0 {
1883 debug_assert!(pos >= base_node.mlen as usize);
1885 let prev_pos = pos - base_node.mlen as usize;
1886 {
1887 let prev_state = unsafe { *nodes.get_unchecked(prev_pos) };
1888 let (_, reps_after_match) = BtMatcher::encode_offset_with_reps(
1889 base_node.off,
1890 prev_state.litlen as usize,
1891 prev_state.reps,
1892 );
1893 base_node.reps = reps_after_match;
1894 unsafe { nodes.get_unchecked_mut(pos).reps = reps_after_match };
1895 }
1896 }
1897 let base_cost = base_node.price;
1898
1899 if pos + 8 > $current_len {
1900 pos += 1;
1901 continue;
1902 }
1903
1904 if pos == last_pos {
1905 break;
1906 }
1907
1908 let next_price = unsafe { nodes.get_unchecked(pos + 1).price };
1909 if abort_on_worse_match
1910 && next_price <= base_cost.saturating_add(HC_BITCOST_MULTIPLIER / 2)
1911 {
1912 pos += 1;
1913 continue;
1914 }
1915
1916 let abs_pos = $current_abs_start + pos;
1917 let ldm_candidate = if has_ldm {
1918 $self.backend.bt_mut().ldm_process_match_candidate(
1919 &mut opt_ldm,
1920 pos,
1921 $current_len - pos,
1922 min_match_len,
1923 )
1924 } else {
1925 None
1926 };
1927 candidates.clear();
1928 unsafe {
1930 $self.$collect::<$strategy_ty, true>(
1931 abs_pos,
1932 current_abs_end,
1933 profile,
1934 HcCandidateQuery {
1935 reps: base_node.reps,
1936 lit_len: base_node.litlen as usize,
1937 ldm_candidate,
1938 },
1939 &mut candidates,
1940 )
1941 };
1942 if let Some(candidate) = candidates.last() {
1943 let longest_len = candidate.match_len.min($current_len - pos);
1944 if longest_len > sufficient_len
1945 || pos + longest_len >= HC_OPT_NUM
1946 || pos + longest_len >= $current_len
1947 {
1948 let lit_len = base_node.litlen as usize;
1949 let off_base = BtMatcher::encode_offset_base_with_reps(
1950 candidate.offset as u32,
1951 lit_len,
1952 base_node.reps,
1953 );
1954 let off_price = profile
1955 .offset_price_for::<ACCURATE_PRICE, FAVOR_SMALL_OFFSETS>($stats, off_base);
1956 let ml_price = BtMatcher::cached_match_length_price(
1957 profile,
1958 $stats,
1959 longest_len,
1960 &mut ml_prices,
1961 &mut ml_price_generations,
1962 ml_price_stamp,
1963 );
1964 let seq_cost = BtMatcher::add_prices(
1965 ll0_price,
1966 profile.match_price_from_parts(off_price, ml_price, $stats),
1967 );
1968 let forced_price = BtMatcher::add_prices(base_cost, seq_cost);
1969 let end_pos = (pos + longest_len).min($current_len);
1970 forced_end = Some(end_pos);
1971 forced_end_state = Some(HcOptimalNode {
1972 price: forced_price,
1973 off: candidate.offset as u32,
1974 mlen: longest_len as u32,
1975 litlen: 0,
1976 reps: base_node.reps,
1977 });
1978 break;
1979 }
1980 }
1981 let mut prev_max_len = min_match_len - 1;
1982 for candidate in candidates.iter() {
1983 debug_assert!(pos <= frontier_limit);
1987 let max_match_len = candidate
1988 .match_len
1989 .min($current_len - pos)
1990 .min(frontier_limit - pos);
1991 let min_len = min_match_len;
1992 if max_match_len < min_len {
1993 continue;
1994 }
1995 let start_len = (prev_max_len + 1).max(min_len);
1996 if start_len > max_match_len {
1997 prev_max_len = prev_max_len.max(max_match_len);
1998 continue;
1999 }
2000 let max_next = pos + max_match_len;
2001 if max_next > last_pos {
2002 BtMatcher::reset_opt_nodes(&mut nodes, last_pos + 1, max_next);
2003 }
2004 let lit_len = base_node.litlen as usize;
2005 let off_base = BtMatcher::encode_offset_base_with_reps(
2006 candidate.offset as u32,
2007 lit_len,
2008 base_node.reps,
2009 );
2010 let off_price = profile
2011 .offset_price_for::<ACCURATE_PRICE, FAVOR_SMALL_OFFSETS>($stats, off_base);
2012 debug_assert!(pos + max_match_len < nodes.len());
2013 for match_len in (start_len..=max_match_len).rev() {
2014 let next = pos + match_len;
2015 let ml_price = BtMatcher::cached_match_length_price(
2016 profile,
2017 $stats,
2018 match_len,
2019 &mut ml_prices,
2020 &mut ml_price_generations,
2021 ml_price_stamp,
2022 );
2023 let seq_cost = BtMatcher::add_prices(
2024 ll0_price,
2025 profile.match_price_from_parts(off_price, ml_price, $stats),
2026 );
2027 let next_cost = BtMatcher::add_prices(base_cost, seq_cost);
2028 let node_next_price = unsafe { nodes.get_unchecked(next).price };
2029 let improved = next > last_pos || next_cost < node_next_price;
2030 if improved {
2031 let slot = unsafe { nodes.get_unchecked_mut(next) };
2032 *slot = HcOptimalNode {
2033 price: next_cost,
2034 off: candidate.offset as u32,
2035 mlen: match_len as u32,
2036 litlen: 0,
2037 reps: base_node.reps,
2038 };
2039 if next > last_pos {
2040 last_pos = next;
2041 }
2042 } else if abort_on_worse_match {
2043 break;
2044 }
2045 }
2046 prev_max_len = prev_max_len.max(max_match_len);
2047 }
2048
2049 if last_pos + 1 < nodes.len() {
2050 unsafe {
2051 nodes.get_unchecked_mut(last_pos + 1).price = u32::MAX;
2052 }
2053 }
2054 pos += 1;
2055 }
2056
2057 if last_pos == 0 {
2058 if $current_len == 0 {
2059 let price = nodes[0].price;
2060 return $self.backend.bt_mut().finish_optimal_plan(
2061 HcOptimalPlanBuffers {
2062 nodes,
2063 candidates,
2064 store,
2065 ll_prices,
2066 ll_price_generations,
2067 ml_prices,
2068 ml_price_generations,
2069 },
2070 (price, initial_reps, initial_litlen, 0),
2071 );
2072 }
2073 let lit_price = {
2074 let bt = $self.backend.bt_mut();
2075 BtMatcher::cached_literal_price(
2076 profile,
2077 $stats,
2078 $current[0],
2079 &mut bt.opt_lit_price_scratch,
2080 &mut bt.opt_lit_price_generation,
2081 lit_price_stamp,
2082 )
2083 };
2084 let next_litlen = initial_litlen
2091 .checked_add(1)
2092 .expect("optimal parser next litlen out of usize range");
2093 let ll_delta = BtMatcher::cached_lit_length_delta_price(
2094 profile,
2095 $stats,
2096 next_litlen,
2097 &mut ll_prices,
2098 &mut ll_price_generations,
2099 ll_price_stamp,
2100 );
2101 let price = BtMatcher::add_price_delta(nodes[0].price, lit_price, ll_delta);
2102 return $self.backend.bt_mut().finish_optimal_plan(
2103 HcOptimalPlanBuffers {
2104 nodes,
2105 candidates,
2106 store,
2107 ll_prices,
2108 ll_price_generations,
2109 ml_prices,
2110 ml_price_generations,
2111 },
2112 (price, initial_reps, next_litlen, 1),
2113 );
2114 }
2115
2116 let target_pos = forced_end.unwrap_or(last_pos.min(frontier_limit));
2117 let last_stretch = if let Some(forced_state) = forced_end_state {
2118 forced_state
2119 } else {
2120 nodes[target_pos]
2121 };
2122 if last_stretch.price == u32::MAX {
2123 return $self.backend.bt_mut().finish_optimal_plan(
2124 HcOptimalPlanBuffers {
2125 nodes,
2126 candidates,
2127 store,
2128 ll_prices,
2129 ll_price_generations,
2130 ml_prices,
2131 ml_price_generations,
2132 },
2133 (u32::MAX, initial_reps, initial_litlen, $current_len),
2134 );
2135 }
2136
2137 if last_stretch.mlen == 0 {
2138 return $self.backend.bt_mut().finish_optimal_plan(
2139 HcOptimalPlanBuffers {
2140 nodes,
2141 candidates,
2142 store,
2143 ll_prices,
2144 ll_price_generations,
2145 ml_prices,
2146 ml_price_generations,
2147 },
2148 (
2149 last_stretch.price,
2150 last_stretch.reps,
2151 last_stretch.litlen as usize,
2152 target_pos.min($current_len),
2153 ),
2154 );
2155 }
2156
2157 let mut cur = target_pos.saturating_sub(last_stretch.mlen as usize);
2158 let end_reps = if last_stretch.litlen == 0 {
2159 let prev_state = nodes[cur];
2160 let (_, reps_after_match) = BtMatcher::encode_offset_with_reps(
2161 last_stretch.off,
2162 prev_state.litlen as usize,
2163 prev_state.reps,
2164 );
2165 reps_after_match
2166 } else {
2167 let tail_literals = last_stretch.litlen as usize;
2168 if cur < tail_literals {
2169 return $self.backend.bt_mut().finish_optimal_plan(
2170 HcOptimalPlanBuffers {
2171 nodes,
2172 candidates,
2173 store,
2174 ll_prices,
2175 ll_price_generations,
2176 ml_prices,
2177 ml_price_generations,
2178 },
2179 (
2180 last_stretch.price,
2181 last_stretch.reps,
2182 tail_literals,
2183 target_pos.min($current_len),
2184 ),
2185 );
2186 }
2187 cur -= tail_literals;
2188 last_stretch.reps
2189 };
2190 let store_end = cur + 2;
2191 if store.len() <= store_end {
2192 store.resize(store_end + 1, HcOptimalNode::default());
2193 }
2194 let mut store_start;
2195 let mut stretch_pos = cur;
2196
2197 if last_stretch.litlen > 0 {
2198 store[store_end] = HcOptimalNode {
2199 litlen: last_stretch.litlen,
2200 mlen: 0,
2201 ..HcOptimalNode::default()
2202 };
2203 store_start = store_end.saturating_sub(1);
2204 store[store_start] = last_stretch;
2205 }
2206 store[store_end] = last_stretch;
2207 store_start = store_end;
2208
2209 loop {
2210 let next_stretch = nodes[stretch_pos];
2211 store[store_start].litlen = next_stretch.litlen;
2212 if next_stretch.mlen == 0 {
2213 break;
2214 }
2215 if store_start == 0 {
2216 break;
2217 }
2218 store_start -= 1;
2219 store[store_start] = next_stretch;
2220 let litlen = next_stretch.litlen as usize;
2227 let mlen = next_stretch.mlen as usize;
2228 debug_assert!(litlen + mlen <= $current_len);
2229 let step = litlen + mlen;
2230 if step == 0 || stretch_pos < step {
2231 break;
2232 }
2233 stretch_pos -= step;
2234 }
2235
2236 let mut tail_literals = initial_litlen;
2237 let mut store_pos = store_start;
2238 while store_pos <= store_end {
2239 let stretch = store[store_pos];
2240 let llen = stretch.litlen as usize;
2241 let mlen = stretch.mlen as usize;
2242 if mlen == 0 {
2243 tail_literals = llen;
2244 store_pos += 1;
2245 continue;
2246 }
2247 $out.push(HcOptimalSequence {
2248 offset: stretch.off,
2249 match_len: mlen as u32,
2250 lit_len: llen as u32,
2251 });
2252 tail_literals = 0;
2253 store_pos += 1;
2254 }
2255 let result = (
2256 last_stretch.price,
2257 end_reps,
2258 if last_stretch.litlen > 0 {
2259 last_stretch.litlen as usize
2260 } else {
2261 tail_literals
2262 },
2263 target_pos.min($current_len),
2264 );
2265 $self.backend.bt_mut().finish_optimal_plan(
2266 HcOptimalPlanBuffers {
2267 nodes,
2268 candidates,
2269 store,
2270 ll_prices,
2271 ll_price_generations,
2272 ml_prices,
2273 ml_price_generations,
2274 },
2275 result,
2276 )
2277 }};
2278}
2279
2280macro_rules! collect_optimal_candidates_initialized_body {
2289 (
2290 $self:expr,
2291 $strategy_ty:ty,
2292 $abs_pos:ident,
2293 $current_abs_end:ident,
2294 $profile:ident,
2295 $query:ident,
2296 $out:ident,
2297 $bt_matchfinder:ident,
2298 $bt_update:ident,
2299 $bt_insert:ident,
2300 $for_each_rep:ident,
2301 $hash3:ident,
2302 $cpl:path $(,)?
2303 ) => {{
2304 let use_hash3: bool = <$strategy_ty as super::strategy::Strategy>::USE_HASH3;
2313 debug_assert!(!$self.table.hash_table.is_empty());
2314 debug_assert!($self.table.hash3_log == 0 || !$self.table.hash3_table.is_empty());
2315 debug_assert!(
2316 !use_hash3 || $self.table.hash3_log != 0,
2317 "Strategy::USE_HASH3 = true but runtime hash3_log is 0 — call configure() first",
2318 );
2319 debug_assert!(!$self.table.chain_table.is_empty());
2320 let min_match_len = HC_OPT_MIN_MATCH_LEN;
2321 let reps = $query.reps;
2322 let lit_len = $query.lit_len;
2323 let ldm_candidate = $query.ldm_candidate;
2324 $out.clear();
2325 if $abs_pos < $self.table.skip_insert_until_abs {
2326 if let Some(ldm) = ldm_candidate {
2327 let mut best_len_for_skip = 0usize;
2328 let _ = super::bt::BtMatcher::push_candidate_ladder(
2329 $out,
2330 &mut best_len_for_skip,
2331 ldm,
2332 min_match_len,
2333 );
2334 }
2335 return;
2336 }
2337 if $bt_matchfinder {
2338 unsafe { $self.table.$bt_update($abs_pos, $current_abs_end) };
2341 }
2342 let current_idx = $abs_pos - $self.table.history_abs_start;
2343 if current_idx + 4 > $self.table.live_history().len() {
2344 if let Some(ldm) = ldm_candidate {
2345 let mut best_len_for_skip = 0usize;
2346 let _ = super::bt::BtMatcher::push_candidate_ladder(
2347 $out,
2348 &mut best_len_for_skip,
2349 ldm,
2350 min_match_len,
2351 );
2352 }
2353 return;
2354 }
2355 let mut best_len_for_skip = 0usize;
2356 let mut skip_further_match_search = false;
2357 let mut rep_len_candidate_found = false;
2358 unsafe {
2360 $self.hc.$for_each_rep(
2361 &$self.table,
2362 $abs_pos,
2363 lit_len,
2364 reps,
2365 $current_abs_end,
2366 min_match_len,
2367 |rep| {
2368 if rep.match_len >= min_match_len {
2369 rep_len_candidate_found = true;
2370 }
2371 let _ = super::bt::BtMatcher::push_candidate_ladder(
2372 $out,
2373 &mut best_len_for_skip,
2374 rep,
2375 min_match_len,
2376 );
2377 if rep.match_len > $profile.sufficient_match_len {
2378 skip_further_match_search = true;
2379 }
2380 if $abs_pos + rep.match_len >= $current_abs_end {
2387 skip_further_match_search = true;
2388 }
2389 },
2390 )
2391 };
2392 if use_hash3 && !skip_further_match_search && best_len_for_skip < min_match_len {
2396 $self.table.update_hash3_until($abs_pos);
2397 if let Some(h3) = unsafe {
2399 $self
2400 .table
2401 .$hash3($abs_pos, $current_abs_end, min_match_len)
2402 } {
2403 let _ = super::bt::BtMatcher::push_candidate_ladder(
2404 $out,
2405 &mut best_len_for_skip,
2406 h3,
2407 min_match_len,
2408 );
2409 if !rep_len_candidate_found
2410 && (h3.match_len > $profile.sufficient_match_len
2411 || $abs_pos + h3.match_len >= $current_abs_end)
2412 {
2413 $self.table.skip_insert_until_abs = $abs_pos + 1;
2414 skip_further_match_search = true;
2415 }
2416 }
2417 }
2418 if !skip_further_match_search && $bt_matchfinder {
2419 unsafe {
2421 $self.table.$bt_insert(
2422 $abs_pos,
2423 $current_abs_end,
2424 $profile,
2425 min_match_len,
2426 &mut best_len_for_skip,
2427 $out,
2428 )
2429 };
2430 } else if !skip_further_match_search {
2431 $self.table.insert_position($abs_pos);
2432 let max_chain_depth = $profile.max_chain_depth.min($self.hc.search_depth);
2433 let concat = &$self.table.history[$self.table.history_start..];
2434 let mut match_end_abs = $abs_pos + 9;
2438 if max_chain_depth > 0 {
2439 for (visited, candidate_abs) in $self
2440 .hc
2441 .chain_candidates(&$self.table, $abs_pos)
2442 .into_iter()
2443 .enumerate()
2444 {
2445 if visited >= max_chain_depth {
2446 break;
2447 }
2448 if candidate_abs == usize::MAX {
2449 break;
2450 }
2451 if candidate_abs < $self.table.history_abs_start || candidate_abs >= $abs_pos {
2452 continue;
2453 }
2454 let candidate_idx = candidate_abs - $self.table.history_abs_start;
2455 debug_assert!(
2456 $abs_pos <= $current_abs_end,
2457 "HC chain walker called past current block end"
2458 );
2459 let tail_limit = $current_abs_end - $abs_pos;
2460 let base = concat.as_ptr();
2461 let match_len =
2466 unsafe { $cpl(base.add(candidate_idx), base.add(current_idx), tail_limit) };
2467 if match_len < min_match_len {
2468 continue;
2469 }
2470 let offset = $abs_pos - candidate_abs;
2471 if super::bt::BtMatcher::push_candidate_ladder(
2472 $out,
2473 &mut best_len_for_skip,
2474 MatchCandidate {
2475 start: $abs_pos,
2476 offset,
2477 match_len,
2478 },
2479 min_match_len,
2480 ) {
2481 let candidate_end = candidate_abs + match_len;
2482 if candidate_end > match_end_abs {
2483 match_end_abs = candidate_end;
2484 }
2485 }
2486 if match_len > HC_OPT_NUM || $abs_pos + match_len >= $current_abs_end {
2487 break;
2488 }
2489 }
2490 }
2491 $self.table.skip_insert_until_abs =
2494 $self.table.skip_insert_until_abs.max(match_end_abs - 8);
2495 }
2496 if let Some(ldm) = ldm_candidate {
2497 let _ = super::bt::BtMatcher::push_candidate_ladder(
2498 $out,
2499 &mut best_len_for_skip,
2500 ldm,
2501 min_match_len,
2502 );
2503 }
2504 }};
2505}
2506
2507macro_rules! hash3_candidate_body {
2512 (
2513 $table:expr,
2514 $abs_pos:ident,
2515 $current_abs_end:ident,
2516 $min_match_len:ident,
2517 $cpl:path $(,)?
2518 ) => {{
2519 if $table.hash3_log == 0 {
2520 return None;
2521 }
2522 let idx = $abs_pos.checked_sub($table.history_abs_start)?;
2523 let concat = $table.live_history();
2524 if idx + 4 > concat.len() {
2525 return None;
2526 }
2527 let hash3 = $crate::encoding::match_table::storage::MatchTable::hash_position_at(
2528 concat,
2529 idx,
2530 $table.hash3_log,
2531 3,
2532 );
2533 let entry = $table
2534 .hash3_table
2535 .get(hash3)
2536 .copied()
2537 .unwrap_or($crate::encoding::match_table::storage::HC_EMPTY);
2538 let candidate_abs =
2539 $crate::encoding::match_table::storage::MatchTable::stored_abs_position_fast(
2540 entry,
2541 $table.position_base,
2542 $table.index_shift,
2543 )?;
2544 if candidate_abs < $table.history_abs_start || candidate_abs >= $abs_pos {
2545 return None;
2546 }
2547 let offset = $abs_pos - candidate_abs;
2548 if offset >= $crate::encoding::bt::HC3_MAX_OFFSET {
2549 return None;
2550 }
2551 let candidate_idx = candidate_abs - $table.history_abs_start;
2552 let tail_limit = $current_abs_end.saturating_sub($abs_pos);
2553 let base = concat.as_ptr();
2554 let match_len = unsafe { $cpl(base.add(candidate_idx), base.add(idx), tail_limit) };
2557 (match_len >= $min_match_len).then_some($crate::encoding::opt::types::MatchCandidate {
2558 start: $abs_pos,
2559 offset,
2560 match_len,
2561 })
2562 }};
2563}
2564pub(crate) use hash3_candidate_body;
2565
2566macro_rules! for_each_repcode_candidate_body {
2576 (
2577 $table:expr,
2578 $abs_pos:ident,
2579 $lit_len:ident,
2580 $reps:ident,
2581 $current_abs_end:ident,
2582 $min_match_len:ident,
2583 $f:ident,
2584 $cpl:path $(,)?
2585 ) => {{
2586 let rep_offsets: [Option<usize>; 3] = if $lit_len == 0 {
2587 [
2588 Some($reps[1] as usize),
2589 Some($reps[2] as usize),
2590 ($reps[0] > 1).then_some(($reps[0] - 1) as usize),
2591 ]
2592 } else {
2593 [
2594 Some($reps[0] as usize),
2595 Some($reps[1] as usize),
2596 Some($reps[2] as usize),
2597 ]
2598 };
2599 let concat = $table.live_history();
2600 let current_idx = $abs_pos - $table.history_abs_start;
2601 if current_idx + 4 > concat.len() {
2602 return;
2603 }
2604 let tail_limit = $current_abs_end.saturating_sub($abs_pos);
2605 let base = concat.as_ptr();
2606 let concat_len = concat.len();
2607 for rep in rep_offsets.into_iter().flatten() {
2608 if rep == 0 || rep > $abs_pos {
2609 continue;
2610 }
2611 let candidate_pos = $abs_pos - rep;
2612 if candidate_pos < $table.history_abs_start {
2613 continue;
2614 }
2615 let candidate_idx = candidate_pos - $table.history_abs_start;
2616 let max = (concat_len - candidate_idx)
2621 .min(concat_len - current_idx)
2622 .min(tail_limit);
2623 let match_len = unsafe { $cpl(base.add(candidate_idx), base.add(current_idx), max) };
2624 if match_len < $min_match_len {
2625 continue;
2626 }
2627 $f(MatchCandidate {
2628 start: $abs_pos,
2629 offset: rep,
2630 match_len,
2631 });
2632 }
2633 }};
2634}
2635pub(crate) use for_each_repcode_candidate_body;
2636
2637macro_rules! bt_insert_and_collect_matches_body {
2644 (
2645 $table:expr,
2646 $search_depth:expr,
2647 $abs_pos:ident,
2648 $current_abs_end:ident,
2649 $profile:ident,
2650 $min_match_len:ident,
2651 $best_len_for_skip:ident,
2652 $out:ident,
2653 $cmf:path $(,)?
2654 ) => {{
2655 let idx = $abs_pos - $table.history_abs_start;
2656 let concat = &$table.history[$table.history_start..];
2657 if idx + 8 > concat.len() {
2658 return;
2659 }
2660 debug_assert!(
2661 $abs_pos <= $current_abs_end,
2662 "BT collect called past current block end"
2663 );
2664 let tail_limit = $current_abs_end - $abs_pos;
2665 let hash = $crate::encoding::match_table::storage::MatchTable::hash_position_at(
2666 concat,
2667 idx,
2668 $table.hash_log,
2669 $crate::encoding::bt::BtMatcher::HASH_MLS,
2670 );
2671 let Some(relative_pos) = $table.relative_position($abs_pos) else {
2672 return;
2673 };
2674 let stored = relative_pos + 1;
2675 let bt_mask = $table.bt_mask();
2676 let bt_low = $abs_pos.saturating_sub(bt_mask);
2679 let window_low = $table.window_low_abs_for_target($abs_pos);
2680 let mut match_end_abs = $abs_pos + 9;
2684 let mut compares_left = $profile.max_chain_depth.min($search_depth);
2685 let mut common_length_smaller = 0usize;
2686 let mut common_length_larger = 0usize;
2687 let pair_idx = $table.bt_pair_index_for_abs($abs_pos);
2688 let mut smaller_slot = pair_idx;
2689 let mut larger_slot = pair_idx + 1;
2690 let mut match_stored = $table.hash_table[hash];
2691 $table.hash_table[hash] = stored;
2692 debug_assert!(
2697 $min_match_len >= $crate::encoding::cost_model::HC_FORMAT_MINMATCH,
2698 "min_match_len must be at least HC_FORMAT_MINMATCH"
2699 );
2700 let mut best_len = (*$best_len_for_skip).max($min_match_len - 1);
2701
2702 while compares_left > 0 {
2703 let Some(candidate_abs) =
2704 $crate::encoding::match_table::storage::MatchTable::stored_abs_position_fast(
2705 match_stored,
2706 $table.position_base,
2707 $table.index_shift,
2708 )
2709 else {
2710 break;
2711 };
2712 if candidate_abs < window_low || candidate_abs >= $abs_pos {
2713 break;
2714 }
2715 compares_left -= 1;
2716
2717 let next_pair_idx = $table.bt_pair_index_for_abs(candidate_abs);
2718 let next_smaller = $table.chain_table[next_pair_idx];
2719 let next_larger = $table.chain_table[next_pair_idx + 1];
2720 let seed_len = common_length_smaller.min(common_length_larger);
2721 let candidate_idx = candidate_abs - $table.history_abs_start;
2722 let match_len = unsafe { $cmf(concat, idx, candidate_idx, tail_limit, seed_len) };
2725
2726 if match_len > best_len {
2727 let offset = $abs_pos - candidate_abs;
2728 let accepted = $crate::encoding::bt::BtMatcher::push_candidate_ladder(
2729 $out,
2730 $best_len_for_skip,
2731 $crate::encoding::opt::types::MatchCandidate {
2732 start: $abs_pos,
2733 offset,
2734 match_len,
2735 },
2736 $min_match_len,
2737 );
2738 if accepted {
2739 best_len = match_len;
2740 let candidate_end = candidate_abs + match_len;
2748 if candidate_end > match_end_abs {
2749 match_end_abs = candidate_end;
2750 }
2751 if match_len >= tail_limit
2752 || match_len > $crate::encoding::cost_model::HC_OPT_NUM
2753 {
2754 break;
2755 }
2756 }
2757 }
2758
2759 if match_len >= tail_limit {
2760 break;
2761 }
2762
2763 let candidate_next = candidate_idx + match_len;
2764 let current_next = idx + match_len;
2765 if concat[candidate_next] < concat[current_next] {
2766 $table.chain_table[smaller_slot] = match_stored;
2767 common_length_smaller = match_len;
2768 if candidate_abs <= bt_low {
2769 smaller_slot = usize::MAX;
2770 break;
2771 }
2772 smaller_slot = next_pair_idx + 1;
2773 match_stored = next_larger;
2774 } else {
2775 $table.chain_table[larger_slot] = match_stored;
2776 common_length_larger = match_len;
2777 if candidate_abs <= bt_low {
2778 larger_slot = usize::MAX;
2779 break;
2780 }
2781 larger_slot = next_pair_idx;
2782 match_stored = next_smaller;
2783 }
2784 }
2785
2786 if smaller_slot != usize::MAX {
2787 $table.chain_table[smaller_slot] = $crate::encoding::match_table::storage::HC_EMPTY;
2788 }
2789 if larger_slot != usize::MAX {
2790 $table.chain_table[larger_slot] = $crate::encoding::match_table::storage::HC_EMPTY;
2791 }
2792 $table.skip_insert_until_abs = match_end_abs - 8;
2795 }};
2796}
2797pub(crate) use bt_insert_and_collect_matches_body;
2798
2799impl HcMatchGenerator {
2800 fn should_run_btultra2_seed_pass<S: super::strategy::Strategy>(
2801 &self,
2802 current_len: usize,
2803 ) -> bool {
2804 if !(S::OPT_LEVEL == 2 && S::USE_HASH3) {
2811 return false;
2812 }
2813 let HcBackend::Bt(bt) = &self.backend else {
2814 return false;
2815 };
2816 bt.opt_state.lit_length_sum == 0
2817 && bt.opt_state.dictionary_seed.is_none()
2818 && !self.table.dictionary_primed_for_frame
2819 && bt.ldm_sequences.is_empty()
2820 && self.table.window_size == current_len
2821 && self.table.history_abs_start == 0
2822 && self.table.window.len() == 1
2823 && current_len > HC_PREDEF_THRESHOLD
2824 }
2825
2826 fn new(max_window_size: usize) -> Self {
2827 Self {
2828 table: super::match_table::storage::MatchTable::new(max_window_size),
2829 hc: super::hc::HcMatcher::new(2, HC_SEARCH_DEPTH, HC_TARGET_LEN),
2830 backend: HcBackend::Hc,
2833 strategy_tag: super::strategy::StrategyTag::Lazy,
2840 }
2841 }
2842
2843 fn configure(&mut self, config: HcConfig, tag: super::strategy::StrategyTag, window_log: u8) {
2844 use super::strategy::StrategyTag;
2845 self.strategy_tag = tag;
2849 let is_btultra2 = tag == StrategyTag::BtUltra2;
2850 let uses_bt = matches!(
2851 tag,
2852 StrategyTag::BtOpt | StrategyTag::BtUltra | StrategyTag::BtUltra2
2853 );
2854 let next_hash3_log = if is_btultra2 {
2855 HC3_HASH_LOG.min(window_log as usize)
2856 } else {
2857 0
2858 };
2859 let resize = self.table.hash_log != config.hash_log
2860 || self.table.chain_log != config.chain_log
2861 || self.table.hash3_log != next_hash3_log;
2862 self.table.hash_log = config.hash_log;
2863 self.table.chain_log = config.chain_log;
2864 self.table.hash3_log = next_hash3_log;
2865 self.hc.search_depth = if uses_bt {
2866 config.search_depth
2867 } else {
2868 config.search_depth.min(MAX_HC_SEARCH_DEPTH)
2869 };
2870 self.hc.target_len = config.target_len;
2871 self.table.search_depth = self.hc.search_depth;
2875 self.table.is_btultra2 = is_btultra2;
2876 self.table.uses_bt = uses_bt;
2877 match (&self.backend, self.table.uses_bt) {
2881 (HcBackend::Hc, true) => {
2882 self.backend = HcBackend::Bt(alloc::boxed::Box::new(super::bt::BtMatcher::new()));
2883 }
2884 (HcBackend::Bt(_), false) => {
2885 self.backend = HcBackend::Hc;
2886 }
2887 _ => {}
2888 }
2889 if resize && !self.table.hash_table.is_empty() {
2890 self.table.hash_table.clear();
2892 self.table.hash3_table.clear();
2893 self.table.chain_table.clear();
2894 }
2895 }
2896
2897 fn seed_dictionary_entropy(
2898 &mut self,
2899 huff: Option<&crate::huff0::huff0_encoder::HuffmanTable>,
2900 ll: Option<&crate::fse::fse_encoder::FSETable>,
2901 ml: Option<&crate::fse::fse_encoder::FSETable>,
2902 of: Option<&crate::fse::fse_encoder::FSETable>,
2903 ) {
2904 if let HcBackend::Bt(bt) = &mut self.backend {
2905 bt.opt_state.seed_dictionary_entropy(huff, ll, ml, of);
2906 }
2907 }
2908
2909 fn reset(&mut self, reuse_space: impl FnMut(Vec<u8>)) {
2910 self.table.reset(reuse_space);
2911 if let HcBackend::Bt(bt) = &mut self.backend {
2912 bt.reset();
2913 }
2914 }
2915
2916 fn skip_matching(&mut self, incompressible_hint: Option<bool>) {
2919 self.table.skip_matching(incompressible_hint);
2920 }
2921
2922 #[cfg(test)]
2928 fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
2929 use super::strategy::{self, StrategyTag};
2930 match self.strategy_tag {
2936 StrategyTag::Fast | StrategyTag::Dfast | StrategyTag::Greedy | StrategyTag::Lazy => {
2937 self.start_matching_lazy(&mut handle_sequence)
2938 }
2939 StrategyTag::BtOpt => {
2940 self.start_matching_optimal::<strategy::BtOpt>(&mut handle_sequence)
2941 }
2942 StrategyTag::BtUltra => {
2943 self.start_matching_optimal::<strategy::BtUltra>(&mut handle_sequence)
2944 }
2945 StrategyTag::BtUltra2 => {
2946 self.start_matching_optimal::<strategy::BtUltra2>(&mut handle_sequence)
2947 }
2948 }
2949 }
2950
2951 pub(crate) fn start_matching_strategy<S: super::strategy::Strategy>(
2962 &mut self,
2963 handle_sequence: &mut impl for<'a> FnMut(Sequence<'a>),
2964 ) {
2965 debug_assert_eq!(
2966 self.table.uses_bt,
2967 S::USE_BT,
2968 "Strategy::USE_BT disagrees with runtime table.uses_bt at HC dispatch"
2969 );
2970 if S::USE_BT {
2971 self.start_matching_optimal::<S>(handle_sequence)
2972 } else {
2973 self.start_matching_lazy(handle_sequence)
2974 }
2975 }
2976
2977 fn start_matching_lazy(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
2978 self.table.ensure_tables();
2979
2980 let current_len = self.table.window.back().unwrap().len();
2981 if current_len == 0 {
2982 return;
2983 }
2984
2985 let current_abs_start = self.table.history_abs_start + self.table.window_size - current_len;
2986 let current_abs_end = current_abs_start + current_len;
2987 self.table
2988 .backfill_boundary_positions(current_abs_start, current_abs_end);
2989
2990 let mut pos = 0usize;
2991 let mut literals_start = 0usize;
2992 while pos + HC_MIN_MATCH_LEN <= current_len {
2993 let abs_pos = current_abs_start + pos;
2994 let lit_len = pos - literals_start;
2995
2996 let best = self.hc.find_best_match(&self.table, abs_pos, lit_len);
2997 if let Some(candidate) = self.hc.pick_lazy_match(&self.table, abs_pos, lit_len, best) {
2998 self.table
2999 .insert_positions(abs_pos, candidate.start + candidate.match_len);
3000 let current = self.table.window.back().unwrap().as_slice();
3001 let start = candidate.start - current_abs_start;
3002 let literals = ¤t[literals_start..start];
3003 handle_sequence(Sequence::Triple {
3004 literals,
3005 offset: candidate.offset,
3006 match_len: candidate.match_len,
3007 });
3008 let _ = encode_offset_with_history(
3009 candidate.offset as u32,
3010 literals.len() as u32,
3011 &mut self.table.offset_hist,
3012 );
3013 pos = start + candidate.match_len;
3014 literals_start = pos;
3015 } else {
3016 self.table.insert_position(abs_pos);
3017 pos += 1;
3018 }
3019 }
3020
3021 while pos + 4 <= current_len {
3024 self.table.insert_position(current_abs_start + pos);
3025 pos += 1;
3026 }
3027
3028 if literals_start < current_len {
3029 let current = self.table.window.back().unwrap().as_slice();
3030 handle_sequence(Sequence::Literals {
3031 literals: ¤t[literals_start..],
3032 });
3033 }
3034 }
3035
3036 fn start_matching_optimal<S: super::strategy::Strategy>(
3037 &mut self,
3038 mut handle_sequence: impl for<'a> FnMut(Sequence<'a>),
3039 ) {
3040 self.table.ensure_tables();
3041 let current_len = self.table.window.back().unwrap().len();
3042 if current_len == 0 {
3043 return;
3044 }
3045 let current_ptr = self.table.window.back().unwrap().as_ptr();
3046 let current = unsafe { core::slice::from_raw_parts(current_ptr, current_len) };
3050
3051 let current_abs_start = self.table.history_abs_start + self.table.window_size - current_len;
3052 let current_abs_end = current_abs_start + current_len;
3053 self.table
3054 .apply_limited_update_after_long_match(current_abs_start);
3055 let hash3_start_cursor = self
3056 .table
3057 .skip_insert_until_abs
3058 .max(self.table.history_abs_start);
3059 self.table
3060 .backfill_boundary_positions(current_abs_start, current_abs_end);
3061 self.table.next_to_update3 = hash3_start_cursor;
3062 let live_history = self.table.live_history();
3077 let history_abs_start = self.table.history_abs_start;
3078 self.backend.bt_mut().prepare_ldm_candidates(
3079 live_history,
3080 history_abs_start,
3081 current_abs_start,
3082 current_len,
3083 );
3084
3085 if self.should_run_btultra2_seed_pass::<S>(current_len) {
3086 self.run_btultra2_seed_pass(current, current_abs_start, current_len);
3087 }
3088
3089 let profile = HcOptimalCostProfile::const_for_strategy::<S>();
3095 let mut opt_state =
3096 core::mem::replace(&mut self.backend.bt_mut().opt_state, HcOptState::new());
3097 opt_state.rescale_freqs(current, profile);
3098 let mut best_plan = core::mem::take(&mut self.backend.bt_mut().opt_segment_plan_scratch);
3099 best_plan.clear();
3100 let mut plan_reps = self.table.offset_hist;
3101 let (mut cursor, mut plan_litlen) = self
3102 .table
3103 .donor_opt_start_cursor_and_litlen(current_abs_start);
3104 let mut plan_literals_cursor = 0usize;
3105 let match_loop_limit = current_len.saturating_sub(8);
3106 while cursor < match_loop_limit {
3107 let remaining_len = current_len - cursor;
3108 let segment_abs_start = current_abs_start + cursor;
3109 let segment_start = best_plan.len();
3110 let (_, end_reps, end_litlen, consumed_len) = self.build_optimal_plan::<S>(
3111 ¤t[cursor..],
3112 segment_abs_start,
3113 remaining_len,
3114 HcOptimalPlanState {
3115 reps: plan_reps,
3116 litlen: plan_litlen,
3117 profile,
3118 },
3119 &opt_state,
3120 &mut best_plan,
3121 );
3122 BtMatcher::update_plan_stats_segment(
3123 current,
3124 current_len,
3125 &best_plan[segment_start..],
3126 &mut plan_literals_cursor,
3127 &mut plan_reps,
3128 &mut opt_state,
3129 profile.accurate,
3130 );
3131 plan_reps = end_reps;
3132 plan_litlen = end_litlen;
3133 cursor += consumed_len;
3134 }
3135
3136 self.table
3137 .emit_optimal_plan(current_len, &best_plan, &mut handle_sequence);
3138 best_plan.clear();
3139 self.backend.bt_mut().opt_segment_plan_scratch = best_plan;
3140 self.backend.bt_mut().opt_state = opt_state;
3141 }
3142
3143 fn run_btultra2_seed_pass(
3144 &mut self,
3145 current: &[u8],
3146 current_abs_start: usize,
3147 current_len: usize,
3148 ) {
3149 type S = super::strategy::BtUltra2;
3154 let seed_profile = HcOptimalCostProfile::const_for_strategy::<S>();
3155 let mut opt_state =
3156 core::mem::replace(&mut self.backend.bt_mut().opt_state, HcOptState::new());
3157 opt_state.rescale_freqs(current, seed_profile);
3158 let mut seed_reps = self.table.offset_hist;
3159 let (mut cursor, mut seed_litlen) = self
3160 .table
3161 .donor_opt_start_cursor_and_litlen(current_abs_start);
3162 let mut seed_literals_cursor = 0usize;
3163 let mut seed_plan = core::mem::take(&mut self.backend.bt_mut().opt_seed_plan_scratch);
3164 seed_plan.clear();
3165 let match_loop_limit = current_len.saturating_sub(8);
3166 while cursor < match_loop_limit {
3167 let remaining_len = current_len - cursor;
3168 let segment_abs_start = current_abs_start + cursor;
3169 let segment_start = seed_plan.len();
3170 let (_, end_reps, end_litlen, consumed_len) = self.build_optimal_plan::<S>(
3171 ¤t[cursor..],
3172 segment_abs_start,
3173 remaining_len,
3174 HcOptimalPlanState {
3175 reps: seed_reps,
3176 litlen: seed_litlen,
3177 profile: seed_profile,
3178 },
3179 &opt_state,
3180 &mut seed_plan,
3181 );
3182 BtMatcher::update_plan_stats_segment(
3183 current,
3184 current_len,
3185 &seed_plan[segment_start..],
3186 &mut seed_literals_cursor,
3187 &mut seed_reps,
3188 &mut opt_state,
3189 seed_profile.accurate,
3190 );
3191 seed_plan.truncate(segment_start);
3192 seed_reps = end_reps;
3193 seed_litlen = end_litlen;
3194 cursor += consumed_len;
3195 }
3196 seed_plan.clear();
3197 self.backend.bt_mut().opt_seed_plan_scratch = seed_plan;
3198 self.backend.bt_mut().opt_state = opt_state;
3199
3200 self.table.position_base = self.table.history_abs_start;
3203 self.table.index_shift = current_len;
3204 self.table.next_to_update3 = current_abs_start;
3205 self.table.skip_insert_until_abs = current_abs_start;
3206 self.table.allow_zero_relative_position = true;
3212 }
3213
3214 fn build_optimal_plan<S: super::strategy::Strategy>(
3215 &mut self,
3216 current: &[u8],
3217 current_abs_start: usize,
3218 current_len: usize,
3219 initial_state: HcOptimalPlanState,
3220 stats: &HcOptState,
3221 out: &mut Vec<HcOptimalSequence>,
3222 ) -> (u32, [u32; 3], usize, usize) {
3223 debug_assert!(S::USE_BT, "build_optimal_plan called on non-BT strategy");
3224 debug_assert_eq!(initial_state.profile.accurate, S::ACCURATE_PRICE);
3225 debug_assert_eq!(
3226 initial_state.profile.favor_small_offsets,
3227 S::FAVOR_SMALL_OFFSETS
3228 );
3229 let profile = initial_state.profile;
3236 match (profile.accurate, profile.favor_small_offsets) {
3237 (true, false) => self.build_optimal_plan_impl::<S, true, false>(
3238 current,
3239 current_abs_start,
3240 current_len,
3241 initial_state,
3242 stats,
3243 out,
3244 ),
3245 (true, true) => self.build_optimal_plan_impl::<S, true, true>(
3246 current,
3247 current_abs_start,
3248 current_len,
3249 initial_state,
3250 stats,
3251 out,
3252 ),
3253 (false, false) => self.build_optimal_plan_impl::<S, false, false>(
3254 current,
3255 current_abs_start,
3256 current_len,
3257 initial_state,
3258 stats,
3259 out,
3260 ),
3261 (false, true) => self.build_optimal_plan_impl::<S, false, true>(
3262 current,
3263 current_abs_start,
3264 current_len,
3265 initial_state,
3266 stats,
3267 out,
3268 ),
3269 }
3270 }
3271
3272 #[inline(always)]
3281 fn build_optimal_plan_impl<
3282 S: super::strategy::Strategy,
3283 const ACCURATE_PRICE: bool,
3284 const FAVOR_SMALL_OFFSETS: bool,
3285 >(
3286 &mut self,
3287 current: &[u8],
3288 current_abs_start: usize,
3289 current_len: usize,
3290 initial_state: HcOptimalPlanState,
3291 stats: &HcOptState,
3292 out: &mut Vec<HcOptimalSequence>,
3293 ) -> (u32, [u32; 3], usize, usize) {
3294 #[cfg(all(target_arch = "aarch64", target_endian = "little"))]
3295 unsafe {
3296 self.build_optimal_plan_impl_neon::<S, ACCURATE_PRICE, FAVOR_SMALL_OFFSETS>(
3297 current,
3298 current_abs_start,
3299 current_len,
3300 initial_state,
3301 stats,
3302 out,
3303 )
3304 }
3305 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
3306 {
3307 use crate::encoding::fastpath::{FastpathKernel, select_kernel};
3308 match select_kernel() {
3309 FastpathKernel::Avx2Bmi2 => unsafe {
3310 self.build_optimal_plan_impl_avx2_bmi2::<S, ACCURATE_PRICE, FAVOR_SMALL_OFFSETS>(
3311 current,
3312 current_abs_start,
3313 current_len,
3314 initial_state,
3315 stats,
3316 out,
3317 )
3318 },
3319 FastpathKernel::Sse42 => unsafe {
3320 self.build_optimal_plan_impl_sse42::<S, ACCURATE_PRICE, FAVOR_SMALL_OFFSETS>(
3321 current,
3322 current_abs_start,
3323 current_len,
3324 initial_state,
3325 stats,
3326 out,
3327 )
3328 },
3329 FastpathKernel::Scalar => self
3330 .build_optimal_plan_impl_scalar::<S, ACCURATE_PRICE, FAVOR_SMALL_OFFSETS>(
3331 current,
3332 current_abs_start,
3333 current_len,
3334 initial_state,
3335 stats,
3336 out,
3337 ),
3338 }
3339 }
3340 #[cfg(not(any(
3341 all(target_arch = "aarch64", target_endian = "little"),
3342 target_arch = "x86",
3343 target_arch = "x86_64"
3344 )))]
3345 {
3346 self.build_optimal_plan_impl_scalar::<S, ACCURATE_PRICE, FAVOR_SMALL_OFFSETS>(
3347 current,
3348 current_abs_start,
3349 current_len,
3350 initial_state,
3351 stats,
3352 out,
3353 )
3354 }
3355 }
3356
3357 #[cfg(all(target_arch = "aarch64", target_endian = "little"))]
3361 #[target_feature(enable = "neon")]
3362 unsafe fn build_optimal_plan_impl_neon<
3363 S: super::strategy::Strategy,
3364 const ACCURATE_PRICE: bool,
3365 const FAVOR_SMALL_OFFSETS: bool,
3366 >(
3367 &mut self,
3368 current: &[u8],
3369 current_abs_start: usize,
3370 current_len: usize,
3371 initial_state: HcOptimalPlanState,
3372 stats: &HcOptState,
3373 out: &mut Vec<HcOptimalSequence>,
3374 ) -> (u32, [u32; 3], usize, usize) {
3375 build_optimal_plan_impl_body!(
3376 self,
3377 S,
3378 current,
3379 current_abs_start,
3380 current_len,
3381 initial_state,
3382 stats,
3383 out,
3384 collect_optimal_candidates_initialized_neon,
3385 )
3386 }
3387
3388 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
3389 #[target_feature(enable = "sse4.2")]
3390 unsafe fn build_optimal_plan_impl_sse42<
3391 S: super::strategy::Strategy,
3392 const ACCURATE_PRICE: bool,
3393 const FAVOR_SMALL_OFFSETS: bool,
3394 >(
3395 &mut self,
3396 current: &[u8],
3397 current_abs_start: usize,
3398 current_len: usize,
3399 initial_state: HcOptimalPlanState,
3400 stats: &HcOptState,
3401 out: &mut Vec<HcOptimalSequence>,
3402 ) -> (u32, [u32; 3], usize, usize) {
3403 build_optimal_plan_impl_body!(
3404 self,
3405 S,
3406 current,
3407 current_abs_start,
3408 current_len,
3409 initial_state,
3410 stats,
3411 out,
3412 collect_optimal_candidates_initialized_sse42,
3413 )
3414 }
3415
3416 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
3417 #[target_feature(enable = "avx2,bmi2")]
3418 unsafe fn build_optimal_plan_impl_avx2_bmi2<
3419 S: super::strategy::Strategy,
3420 const ACCURATE_PRICE: bool,
3421 const FAVOR_SMALL_OFFSETS: bool,
3422 >(
3423 &mut self,
3424 current: &[u8],
3425 current_abs_start: usize,
3426 current_len: usize,
3427 initial_state: HcOptimalPlanState,
3428 stats: &HcOptState,
3429 out: &mut Vec<HcOptimalSequence>,
3430 ) -> (u32, [u32; 3], usize, usize) {
3431 build_optimal_plan_impl_body!(
3432 self,
3433 S,
3434 current,
3435 current_abs_start,
3436 current_len,
3437 initial_state,
3438 stats,
3439 out,
3440 collect_optimal_candidates_initialized_avx2_bmi2,
3441 )
3442 }
3443
3444 #[cfg(not(all(target_arch = "aarch64", target_endian = "little")))]
3445 #[allow(unused_unsafe)]
3449 fn build_optimal_plan_impl_scalar<
3450 S: super::strategy::Strategy,
3451 const ACCURATE_PRICE: bool,
3452 const FAVOR_SMALL_OFFSETS: bool,
3453 >(
3454 &mut self,
3455 current: &[u8],
3456 current_abs_start: usize,
3457 current_len: usize,
3458 initial_state: HcOptimalPlanState,
3459 stats: &HcOptState,
3460 out: &mut Vec<HcOptimalSequence>,
3461 ) -> (u32, [u32; 3], usize, usize) {
3462 build_optimal_plan_impl_body!(
3463 self,
3464 S,
3465 current,
3466 current_abs_start,
3467 current_len,
3468 initial_state,
3469 stats,
3470 out,
3471 collect_optimal_candidates_initialized_scalar,
3472 )
3473 }
3474
3475 #[cfg(test)]
3476 fn collect_optimal_candidates(
3477 &mut self,
3478 abs_pos: usize,
3479 current_abs_end: usize,
3480 profile: HcOptimalCostProfile,
3481 query: HcCandidateQuery,
3482 out: &mut Vec<MatchCandidate>,
3483 ) {
3484 use super::strategy::{self, StrategyTag};
3485 self.table.ensure_tables();
3486 match self.strategy_tag {
3492 StrategyTag::BtUltra2 => self
3493 .collect_optimal_candidates_initialized::<strategy::BtUltra2, true>(
3494 abs_pos,
3495 current_abs_end,
3496 profile,
3497 query,
3498 out,
3499 ),
3500 StrategyTag::BtUltra => self
3501 .collect_optimal_candidates_initialized::<strategy::BtUltra, true>(
3502 abs_pos,
3503 current_abs_end,
3504 profile,
3505 query,
3506 out,
3507 ),
3508 StrategyTag::BtOpt => self
3509 .collect_optimal_candidates_initialized::<strategy::BtOpt, true>(
3510 abs_pos,
3511 current_abs_end,
3512 profile,
3513 query,
3514 out,
3515 ),
3516 StrategyTag::Fast | StrategyTag::Dfast | StrategyTag::Greedy | StrategyTag::Lazy => {
3517 self.collect_optimal_candidates_initialized::<strategy::Lazy, false>(
3518 abs_pos,
3519 current_abs_end,
3520 profile,
3521 query,
3522 out,
3523 )
3524 }
3525 }
3526 }
3527
3528 #[allow(dead_code)]
3538 #[inline(always)]
3539 fn collect_optimal_candidates_initialized<
3540 S: super::strategy::Strategy,
3541 const USE_BT_MATCHFINDER: bool,
3542 >(
3543 &mut self,
3544 abs_pos: usize,
3545 current_abs_end: usize,
3546 profile: HcOptimalCostProfile,
3547 query: HcCandidateQuery,
3548 out: &mut Vec<MatchCandidate>,
3549 ) {
3550 #[cfg(all(target_arch = "aarch64", target_endian = "little"))]
3551 unsafe {
3552 self.collect_optimal_candidates_initialized_neon::<S, USE_BT_MATCHFINDER>(
3553 abs_pos,
3554 current_abs_end,
3555 profile,
3556 query,
3557 out,
3558 )
3559 }
3560 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
3561 {
3562 use crate::encoding::fastpath::{FastpathKernel, select_kernel};
3563 match select_kernel() {
3564 FastpathKernel::Avx2Bmi2 => unsafe {
3565 self.collect_optimal_candidates_initialized_avx2_bmi2::<S, USE_BT_MATCHFINDER>(
3566 abs_pos,
3567 current_abs_end,
3568 profile,
3569 query,
3570 out,
3571 )
3572 },
3573 FastpathKernel::Sse42 => unsafe {
3574 self.collect_optimal_candidates_initialized_sse42::<S, USE_BT_MATCHFINDER>(
3575 abs_pos,
3576 current_abs_end,
3577 profile,
3578 query,
3579 out,
3580 )
3581 },
3582 FastpathKernel::Scalar => self
3583 .collect_optimal_candidates_initialized_scalar::<S, USE_BT_MATCHFINDER>(
3584 abs_pos,
3585 current_abs_end,
3586 profile,
3587 query,
3588 out,
3589 ),
3590 }
3591 }
3592 #[cfg(not(any(
3593 all(target_arch = "aarch64", target_endian = "little"),
3594 target_arch = "x86",
3595 target_arch = "x86_64"
3596 )))]
3597 {
3598 self.collect_optimal_candidates_initialized_scalar::<S, USE_BT_MATCHFINDER>(
3599 abs_pos,
3600 current_abs_end,
3601 profile,
3602 query,
3603 out,
3604 )
3605 }
3606 }
3607
3608 #[cfg(all(target_arch = "aarch64", target_endian = "little"))]
3614 #[target_feature(enable = "neon")]
3615 unsafe fn collect_optimal_candidates_initialized_neon<
3616 S: super::strategy::Strategy,
3617 const USE_BT_MATCHFINDER: bool,
3618 >(
3619 &mut self,
3620 abs_pos: usize,
3621 current_abs_end: usize,
3622 profile: HcOptimalCostProfile,
3623 query: HcCandidateQuery,
3624 out: &mut Vec<MatchCandidate>,
3625 ) {
3626 collect_optimal_candidates_initialized_body!(
3627 self,
3628 S,
3629 abs_pos,
3630 current_abs_end,
3631 profile,
3632 query,
3633 out,
3634 USE_BT_MATCHFINDER,
3635 bt_update_tree_until_neon,
3636 bt_insert_and_collect_matches_neon,
3637 for_each_repcode_candidate_with_reps_neon,
3638 hash3_candidate_neon,
3639 crate::encoding::fastpath::neon::common_prefix_len_ptr,
3640 )
3641 }
3642
3643 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
3644 #[target_feature(enable = "sse4.2")]
3645 unsafe fn collect_optimal_candidates_initialized_sse42<
3646 S: super::strategy::Strategy,
3647 const USE_BT_MATCHFINDER: bool,
3648 >(
3649 &mut self,
3650 abs_pos: usize,
3651 current_abs_end: usize,
3652 profile: HcOptimalCostProfile,
3653 query: HcCandidateQuery,
3654 out: &mut Vec<MatchCandidate>,
3655 ) {
3656 collect_optimal_candidates_initialized_body!(
3657 self,
3658 S,
3659 abs_pos,
3660 current_abs_end,
3661 profile,
3662 query,
3663 out,
3664 USE_BT_MATCHFINDER,
3665 bt_update_tree_until_sse42,
3666 bt_insert_and_collect_matches_sse42,
3667 for_each_repcode_candidate_with_reps_sse42,
3668 hash3_candidate_sse42,
3669 crate::encoding::fastpath::sse42::common_prefix_len_ptr,
3670 )
3671 }
3672
3673 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
3674 #[target_feature(enable = "avx2,bmi2")]
3675 unsafe fn collect_optimal_candidates_initialized_avx2_bmi2<
3676 S: super::strategy::Strategy,
3677 const USE_BT_MATCHFINDER: bool,
3678 >(
3679 &mut self,
3680 abs_pos: usize,
3681 current_abs_end: usize,
3682 profile: HcOptimalCostProfile,
3683 query: HcCandidateQuery,
3684 out: &mut Vec<MatchCandidate>,
3685 ) {
3686 collect_optimal_candidates_initialized_body!(
3687 self,
3688 S,
3689 abs_pos,
3690 current_abs_end,
3691 profile,
3692 query,
3693 out,
3694 USE_BT_MATCHFINDER,
3695 bt_update_tree_until_avx2_bmi2,
3696 bt_insert_and_collect_matches_avx2_bmi2,
3697 for_each_repcode_candidate_with_reps_avx2_bmi2,
3698 hash3_candidate_avx2_bmi2,
3699 crate::encoding::fastpath::avx2_bmi2::common_prefix_len_ptr,
3700 )
3701 }
3702
3703 #[cfg(not(all(target_arch = "aarch64", target_endian = "little")))]
3704 #[allow(unused_unsafe)]
3707 fn collect_optimal_candidates_initialized_scalar<
3708 S: super::strategy::Strategy,
3709 const USE_BT_MATCHFINDER: bool,
3710 >(
3711 &mut self,
3712 abs_pos: usize,
3713 current_abs_end: usize,
3714 profile: HcOptimalCostProfile,
3715 query: HcCandidateQuery,
3716 out: &mut Vec<MatchCandidate>,
3717 ) {
3718 collect_optimal_candidates_initialized_body!(
3719 self,
3720 S,
3721 abs_pos,
3722 current_abs_end,
3723 profile,
3724 query,
3725 out,
3726 USE_BT_MATCHFINDER,
3727 bt_update_tree_until_scalar,
3728 bt_insert_and_collect_matches_scalar,
3729 for_each_repcode_candidate_with_reps_scalar,
3730 hash3_candidate_scalar,
3731 crate::encoding::fastpath::scalar::common_prefix_len_ptr,
3732 )
3733 }
3734}
3735
3736#[test]
3737fn matches() {
3738 let mut matcher = MatchGenerator::new(1000);
3739 let mut original_data = Vec::new();
3740 let mut reconstructed = Vec::new();
3741
3742 let replay_sequence = |seq: Sequence<'_>, reconstructed: &mut Vec<u8>| match seq {
3743 Sequence::Literals { literals } => {
3744 assert!(!literals.is_empty());
3745 reconstructed.extend_from_slice(literals);
3746 }
3747 Sequence::Triple {
3748 literals,
3749 offset,
3750 match_len,
3751 } => {
3752 assert!(offset > 0);
3753 assert!(match_len >= MIN_MATCH_LEN);
3754 reconstructed.extend_from_slice(literals);
3755 assert!(offset <= reconstructed.len());
3756 let start = reconstructed.len() - offset;
3757 for i in 0..match_len {
3758 let byte = reconstructed[start + i];
3759 reconstructed.push(byte);
3760 }
3761 }
3762 };
3763
3764 matcher.add_data(
3765 alloc::vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
3766 SuffixStore::with_capacity(100),
3767 |_, _| {},
3768 );
3769 original_data.extend_from_slice(&[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
3770
3771 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
3772
3773 assert!(!matcher.next_sequence(|_| {}));
3774
3775 matcher.add_data(
3776 alloc::vec![
3777 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 0, 0, 0, 0, 0,
3778 ],
3779 SuffixStore::with_capacity(100),
3780 |_, _| {},
3781 );
3782 original_data.extend_from_slice(&[
3783 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 0, 0, 0, 0, 0,
3784 ]);
3785
3786 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
3787 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
3788 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
3789 assert!(!matcher.next_sequence(|_| {}));
3790
3791 matcher.add_data(
3792 alloc::vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 0, 0, 0, 0],
3793 SuffixStore::with_capacity(100),
3794 |_, _| {},
3795 );
3796 original_data.extend_from_slice(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 0, 0, 0, 0]);
3797
3798 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
3799 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
3800 assert!(!matcher.next_sequence(|_| {}));
3801
3802 matcher.add_data(
3803 alloc::vec![0, 0, 0, 0, 0],
3804 SuffixStore::with_capacity(100),
3805 |_, _| {},
3806 );
3807 original_data.extend_from_slice(&[0, 0, 0, 0, 0]);
3808
3809 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
3810 assert!(!matcher.next_sequence(|_| {}));
3811
3812 matcher.add_data(
3813 alloc::vec![7, 8, 9, 10, 11],
3814 SuffixStore::with_capacity(100),
3815 |_, _| {},
3816 );
3817 original_data.extend_from_slice(&[7, 8, 9, 10, 11]);
3818
3819 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
3820 assert!(!matcher.next_sequence(|_| {}));
3821
3822 matcher.add_data(
3823 alloc::vec![1, 3, 5, 7, 9],
3824 SuffixStore::with_capacity(100),
3825 |_, _| {},
3826 );
3827 matcher.skip_matching();
3828 original_data.extend_from_slice(&[1, 3, 5, 7, 9]);
3829 reconstructed.extend_from_slice(&[1, 3, 5, 7, 9]);
3830 assert!(!matcher.next_sequence(|_| {}));
3831
3832 matcher.add_data(
3833 alloc::vec![1, 3, 5, 7, 9],
3834 SuffixStore::with_capacity(100),
3835 |_, _| {},
3836 );
3837 original_data.extend_from_slice(&[1, 3, 5, 7, 9]);
3838
3839 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
3840 assert!(!matcher.next_sequence(|_| {}));
3841
3842 matcher.add_data(
3843 alloc::vec![0, 0, 11, 13, 15, 17, 20, 11, 13, 15, 17, 20, 21, 23],
3844 SuffixStore::with_capacity(100),
3845 |_, _| {},
3846 );
3847 original_data.extend_from_slice(&[0, 0, 11, 13, 15, 17, 20, 11, 13, 15, 17, 20, 21, 23]);
3848
3849 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
3850 matcher.next_sequence(|seq| replay_sequence(seq, &mut reconstructed));
3851 assert!(!matcher.next_sequence(|_| {}));
3852
3853 assert_eq!(reconstructed, original_data);
3854}
3855
3856#[test]
3857fn dfast_matches_roundtrip_multi_block_pattern() {
3858 let pattern = [9, 21, 44, 184, 19, 96, 171, 109, 141, 251];
3859 let first_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
3860 let second_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
3861
3862 let mut matcher = DfastMatchGenerator::new(1 << 22);
3863 let replay_sequence = |decoded: &mut Vec<u8>, seq: Sequence<'_>| match seq {
3864 Sequence::Literals { literals } => decoded.extend_from_slice(literals),
3865 Sequence::Triple {
3866 literals,
3867 offset,
3868 match_len,
3869 } => {
3870 decoded.extend_from_slice(literals);
3871 let start = decoded.len() - offset;
3872 for i in 0..match_len {
3873 let byte = decoded[start + i];
3874 decoded.push(byte);
3875 }
3876 }
3877 };
3878
3879 matcher.add_data(first_block.clone(), |_| {});
3880 let mut history = Vec::new();
3881 matcher.start_matching(|seq| replay_sequence(&mut history, seq));
3882 assert_eq!(history, first_block);
3883
3884 matcher.add_data(second_block.clone(), |_| {});
3885 let prefix_len = history.len();
3886 matcher.start_matching(|seq| replay_sequence(&mut history, seq));
3887
3888 assert_eq!(&history[prefix_len..], second_block.as_slice());
3889}
3890
3891#[test]
3908fn dfast_accepts_exact_five_byte_match() {
3909 let mut data = Vec::new();
3918 data.extend_from_slice(b"ABCDE"); data.extend_from_slice(b"!!!!!!!!!!!!!!!!!!!!!!!"); data.extend_from_slice(b"ABCDE"); data.push(b'F'); assert_eq!(data.len(), 34);
3923
3924 let mut matcher = DfastMatchGenerator::new(1 << 22);
3925 matcher.add_data(data.clone(), |_| {});
3926
3927 let mut saw_five_byte_match = false;
3928 let mut saw_longer_match = false;
3929 matcher.start_matching(|seq| {
3930 if let Sequence::Triple {
3931 offset, match_len, ..
3932 } = seq
3933 {
3934 if offset == 28 && match_len == 5 {
3935 saw_five_byte_match = true;
3936 } else if offset == 28 && match_len > 5 {
3937 saw_longer_match = true;
3938 }
3939 }
3940 });
3941
3942 assert!(
3943 saw_five_byte_match,
3944 "dfast must accept the exact-5-byte match — a 6-byte floor would skip it"
3945 );
3946 assert!(
3947 !saw_longer_match,
3948 "fixture pinned to length 5 — byte 33 ('F') must terminate the extension"
3949 );
3950}
3951
3952#[test]
3953fn driver_switches_backends_and_initializes_dfast_via_reset() {
3954 let mut driver = MatchGeneratorDriver::new(32, 2);
3955
3956 driver.reset(CompressionLevel::Default);
3957 assert_eq!(driver.active_backend(), super::strategy::BackendTag::Dfast);
3958 assert_eq!(driver.window_size(), (1u64 << 22));
3959
3960 let mut first = driver.get_next_space();
3961 first[..12].copy_from_slice(b"abcabcabcabc");
3962 first.truncate(12);
3963 driver.commit_space(first);
3964 assert_eq!(driver.get_last_space(), b"abcabcabcabc");
3965 driver.skip_matching_with_hint(None);
3966
3967 let mut second = driver.get_next_space();
3968 second[..12].copy_from_slice(b"abcabcabcabc");
3969 second.truncate(12);
3970 driver.commit_space(second);
3971
3972 let mut reconstructed = b"abcabcabcabc".to_vec();
3973 driver.start_matching(|seq| match seq {
3974 Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
3975 Sequence::Triple {
3976 literals,
3977 offset,
3978 match_len,
3979 } => {
3980 reconstructed.extend_from_slice(literals);
3981 let start = reconstructed.len() - offset;
3982 for i in 0..match_len {
3983 let byte = reconstructed[start + i];
3984 reconstructed.push(byte);
3985 }
3986 }
3987 });
3988 assert_eq!(reconstructed, b"abcabcabcabcabcabcabcabc");
3989
3990 driver.reset(CompressionLevel::Fastest);
3991 assert_eq!(driver.window_size(), (1u64 << 19));
3992}
3993
3994#[test]
3995fn driver_level4_selects_row_backend() {
3996 let mut driver = MatchGeneratorDriver::new(32, 2);
3997 driver.reset(CompressionLevel::Level(4));
3998 assert_eq!(driver.active_backend(), super::strategy::BackendTag::Row);
3999 assert_eq!(
4010 driver.row_matcher().lazy_depth,
4011 0,
4012 "L4 must route to start_matching_greedy (lazy_depth == 0)",
4013 );
4014}
4015
4016#[test]
4027fn driver_level4_greedy_round_trip_single_slice() {
4028 let mut driver = MatchGeneratorDriver::new(64, 2);
4029 driver.reset(CompressionLevel::Level(4));
4030 let input = b"abcdefgh_abcdefgh_abcdefgh_abcdefgh";
4031 let mut space = driver.get_next_space();
4032 space[..input.len()].copy_from_slice(input);
4033 space.truncate(input.len());
4034 driver.commit_space(space);
4035
4036 let mut reconstructed: Vec<u8> = Vec::new();
4037 let mut saw_triple = false;
4038 driver.start_matching(|seq| match seq {
4039 Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
4040 Sequence::Triple {
4041 literals,
4042 offset,
4043 match_len,
4044 } => {
4045 saw_triple = true;
4046 reconstructed.extend_from_slice(literals);
4047 let start = reconstructed.len() - offset;
4048 for i in 0..match_len {
4049 let byte = reconstructed[start + i];
4050 reconstructed.push(byte);
4051 }
4052 }
4053 });
4054 assert_eq!(
4055 reconstructed,
4056 input.to_vec(),
4057 "L4 greedy parse failed to reconstruct repeating-pattern input",
4058 );
4059 assert!(
4060 saw_triple,
4061 "L4 greedy parse on a repeating pattern must emit at least one match (Triple)",
4062 );
4063}
4064
4065#[test]
4066fn driver_level4_greedy_round_trip_cross_slice() {
4067 let mut driver = MatchGeneratorDriver::new(32, 4);
4072 driver.reset(CompressionLevel::Level(4));
4073 let chunk = b"the quick brown fox jumps over!!";
4074 assert_eq!(chunk.len(), 32);
4075
4076 let mut first = driver.get_next_space();
4077 first[..chunk.len()].copy_from_slice(chunk);
4078 first.truncate(chunk.len());
4079 driver.commit_space(first);
4080
4081 let mut first_recon: Vec<u8> = Vec::new();
4082 driver.start_matching(|seq| match seq {
4083 Sequence::Literals { literals } => first_recon.extend_from_slice(literals),
4084 Sequence::Triple {
4085 literals,
4086 offset,
4087 match_len,
4088 } => {
4089 first_recon.extend_from_slice(literals);
4090 let start = first_recon.len() - offset;
4091 for i in 0..match_len {
4092 let byte = first_recon[start + i];
4093 first_recon.push(byte);
4094 }
4095 }
4096 });
4097 assert_eq!(
4098 first_recon,
4099 chunk.to_vec(),
4100 "first slice failed to round-trip"
4101 );
4102
4103 let mut second = driver.get_next_space();
4104 second[..chunk.len()].copy_from_slice(chunk);
4105 second.truncate(chunk.len());
4106 driver.commit_space(second);
4107
4108 let mut full = first_recon.clone();
4109 let mut saw_cross_slice_match = false;
4110 driver.start_matching(|seq| match seq {
4111 Sequence::Literals { literals } => full.extend_from_slice(literals),
4112 Sequence::Triple {
4113 literals,
4114 offset,
4115 match_len,
4116 } => {
4117 if offset >= chunk.len() {
4121 saw_cross_slice_match = true;
4122 }
4123 full.extend_from_slice(literals);
4124 let start = full.len() - offset;
4125 for i in 0..match_len {
4126 let byte = full[start + i];
4127 full.push(byte);
4128 }
4129 }
4130 });
4131 let mut expected = chunk.to_vec();
4132 expected.extend_from_slice(chunk);
4133 assert_eq!(
4134 full, expected,
4135 "cross-slice L4 greedy parse failed to reconstruct"
4136 );
4137 assert!(
4138 saw_cross_slice_match,
4139 "L4 greedy parse must match across slice boundaries (history is shared)",
4140 );
4141}
4142
4143#[cfg(test)]
4147fn l4_greedy_round_trip(slice_size: usize, max_slices: usize, data: &[u8]) -> (usize, usize) {
4148 let mut driver = MatchGeneratorDriver::new(slice_size, max_slices);
4149 driver.reset(CompressionLevel::Level(4));
4150
4151 let mut reconstructed: Vec<u8> = Vec::with_capacity(data.len());
4152 let mut triple_count = 0usize;
4153 let mut max_offset = 0usize;
4154
4155 let mut offset_in_data = 0usize;
4160 while offset_in_data < data.len() {
4161 let mut space = driver.get_next_space();
4162 let space_cap = space.len();
4163 let take = (data.len() - offset_in_data).min(space_cap);
4164 space[..take].copy_from_slice(&data[offset_in_data..offset_in_data + take]);
4165 space.truncate(take);
4166 driver.commit_space(space);
4167 offset_in_data += take;
4168
4169 driver.start_matching(|seq| match seq {
4170 Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
4171 Sequence::Triple {
4172 literals,
4173 offset,
4174 match_len,
4175 } => {
4176 triple_count += 1;
4177 if offset > max_offset {
4178 max_offset = offset;
4179 }
4180 reconstructed.extend_from_slice(literals);
4181 let start = reconstructed.len() - offset;
4182 for i in 0..match_len {
4183 let byte = reconstructed[start + i];
4184 reconstructed.push(byte);
4185 }
4186 }
4187 });
4188 }
4189
4190 if data.is_empty() {
4194 let mut space = driver.get_next_space();
4195 space.truncate(0);
4196 driver.commit_space(space);
4197 driver.start_matching(|seq| match seq {
4198 Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
4199 Sequence::Triple { .. } => panic!("empty input must not emit any matches"),
4200 });
4201 }
4202
4203 assert_eq!(reconstructed, data, "L4 greedy round-trip diverged");
4204 (triple_count, max_offset)
4205}
4206
4207#[test]
4218fn driver_level4_greedy_tail_rep_only_reachable() {
4219 let first: &[u8] = b"ABCDABCDABCDABCD"; let second: &[u8] = b"ABCDA"; let mut driver = MatchGeneratorDriver::new(16, 2);
4234 driver.reset(CompressionLevel::Level(4));
4235
4236 let mut first_space = driver.get_next_space();
4237 first_space[..first.len()].copy_from_slice(first);
4238 first_space.truncate(first.len());
4239 driver.commit_space(first_space);
4240 driver.start_matching(|_| {});
4241
4242 let mut second_space = driver.get_next_space();
4243 second_space[..second.len()].copy_from_slice(second);
4244 second_space.truncate(second.len());
4245 driver.commit_space(second_space);
4246
4247 let mut second_slice_triples = 0usize;
4248 driver.start_matching(|seq| {
4249 if matches!(seq, Sequence::Triple { .. }) {
4250 second_slice_triples += 1;
4251 }
4252 });
4253
4254 assert!(
4255 second_slice_triples >= 1,
4256 "tail rep-only position must produce a match in the second slice \
4257 (got {second_slice_triples} triples)",
4258 );
4259}
4260
4261#[test]
4262fn driver_level4_greedy_empty_input_emits_nothing() {
4263 let mut driver = MatchGeneratorDriver::new(64, 2);
4267 driver.reset(CompressionLevel::Level(4));
4268 let mut space = driver.get_next_space();
4273 space.truncate(0);
4274 driver.commit_space(space);
4275 let mut emitted_anything = false;
4276 driver.start_matching(|_| emitted_anything = true);
4277 assert!(!emitted_anything, "empty slice must not emit any sequences",);
4278}
4279
4280#[test]
4281fn driver_level4_greedy_sub_min_lookahead_input() {
4282 let data: &[u8] = b"abcd"; let (triples, _) = l4_greedy_round_trip(64, 2, data);
4287 assert_eq!(
4288 triples, 0,
4289 "sub-min-lookahead input must not emit any matches (got {triples})",
4290 );
4291}
4292
4293#[test]
4294fn driver_level4_greedy_incompressible_input() {
4295 let mut data = alloc::vec::Vec::with_capacity(256);
4300 let mut x: u32 = 0xDEAD_BEEF;
4301 for _ in 0..256 {
4302 x = x.wrapping_mul(1_103_515_245).wrapping_add(12345);
4303 data.push((x >> 16) as u8);
4304 }
4305 let (_triples, _) = l4_greedy_round_trip(64, 8, &data);
4306 }
4309
4310#[test]
4311fn driver_level4_greedy_long_literal_run_skip_step_growth() {
4312 let mut data = alloc::vec::Vec::with_capacity(2048);
4327 let mut x: u32 = 0xC0FF_EE00;
4328 for _ in 0..2048 {
4329 x = x.wrapping_mul(0x9E37_79B9).wrapping_add(0xCAFEBABE);
4330 data.push((x >> 24) as u8);
4331 }
4332 let (_triples, _) = l4_greedy_round_trip(512, 8, &data);
4333}
4334
4335#[test]
4336fn driver_level4_greedy_all_zeros_heavy_rep1() {
4337 let data: Vec<u8> = alloc::vec![0u8; 128];
4342 let (triples, max_offset) = l4_greedy_round_trip(64, 8, &data);
4343 assert!(
4344 triples >= 1,
4345 "all-zeros input must produce at least one rep1 match",
4346 );
4347 assert_eq!(
4351 max_offset, 1,
4352 "all-zeros L4 greedy parse should commit at offset 1 (got {max_offset})",
4353 );
4354}
4355
4356#[test]
4362fn driver_level4_greedy_periodic_pattern_rep_cascade() {
4363 let unit: &[u8] = b"alpha_beta_gamma";
4364 assert_eq!(unit.len(), 16);
4365 let mut data: Vec<u8> = Vec::with_capacity(unit.len() * 32);
4366 for _ in 0..32 {
4367 data.extend_from_slice(unit);
4368 }
4369 let (triples, max_offset) = l4_greedy_round_trip(64, 16, &data);
4370 assert!(
4371 triples >= 1,
4372 "periodic 16-byte payload must emit matches (got {triples})",
4373 );
4374 assert!(
4375 max_offset >= 16,
4376 "periodic 16-byte payload must produce at least one offset >= 16 \
4377 (got max_offset = {max_offset})",
4378 );
4379}
4380
4381#[test]
4382fn driver_reset_keeps_strategy_tag_in_sync_with_active_backend() {
4383 use super::strategy::StrategyTag;
4384
4385 fn check(level: CompressionLevel, expected: StrategyTag) {
4386 let mut driver = MatchGeneratorDriver::new(32, 2);
4387 driver.reset(level);
4388 assert_eq!(
4389 driver.strategy_tag, expected,
4390 "strategy_tag wrong for {level:?}"
4391 );
4392 assert_eq!(
4393 driver.strategy_tag.backend(),
4394 driver.active_backend(),
4395 "strategy_tag backend disagrees with active_backend for {level:?}"
4396 );
4397 }
4398
4399 check(CompressionLevel::Level(1), StrategyTag::Fast);
4400 check(CompressionLevel::Level(2), StrategyTag::Dfast);
4401 check(CompressionLevel::Level(3), StrategyTag::Dfast);
4402 check(CompressionLevel::Level(4), StrategyTag::Greedy);
4403 check(CompressionLevel::Level(7), StrategyTag::Lazy);
4404 check(CompressionLevel::Level(15), StrategyTag::Lazy);
4405 check(CompressionLevel::Level(16), StrategyTag::BtOpt);
4406 check(CompressionLevel::Level(18), StrategyTag::BtUltra);
4407 check(CompressionLevel::Level(22), StrategyTag::BtUltra2);
4408 check(CompressionLevel::Fastest, StrategyTag::Fast);
4409 check(CompressionLevel::Default, StrategyTag::Dfast);
4410 check(CompressionLevel::Better, StrategyTag::Lazy);
4411 check(CompressionLevel::Best, StrategyTag::Lazy);
4412}
4413
4414#[test]
4415fn level_16_17_map_to_btopt_strategy() {
4416 use super::strategy::{BackendTag, StrategyTag};
4417 let p16 = resolve_level_params(CompressionLevel::Level(16), None);
4418 let p17 = resolve_level_params(CompressionLevel::Level(17), None);
4419 assert_eq!(p16.backend(), BackendTag::HashChain);
4420 assert_eq!(p17.backend(), BackendTag::HashChain);
4421 assert_eq!(StrategyTag::for_level(16), StrategyTag::BtOpt);
4422 assert_eq!(StrategyTag::for_level(17), StrategyTag::BtOpt);
4423}
4424
4425#[test]
4426fn level_18_19_map_to_btultra_strategy() {
4427 use super::strategy::{BackendTag, StrategyTag};
4428 let p18 = resolve_level_params(CompressionLevel::Level(18), None);
4429 let p19 = resolve_level_params(CompressionLevel::Level(19), None);
4430 assert_eq!(p18.backend(), BackendTag::HashChain);
4431 assert_eq!(p19.backend(), BackendTag::HashChain);
4432 assert_eq!(StrategyTag::for_level(18), StrategyTag::BtUltra);
4433 assert_eq!(StrategyTag::for_level(19), StrategyTag::BtUltra);
4434}
4435
4436#[test]
4437fn level_20_22_map_to_btultra2_strategy() {
4438 use super::strategy::{BackendTag, StrategyTag};
4439 for level in 20..=22 {
4440 let params = resolve_level_params(CompressionLevel::Level(level), None);
4441 assert_eq!(params.backend(), BackendTag::HashChain);
4442 assert_eq!(StrategyTag::for_level(level as u8), StrategyTag::BtUltra2);
4443 }
4444}
4445
4446#[test]
4447fn level22_uses_donor_target_length_and_large_input_tables() {
4448 let params = resolve_level_params(CompressionLevel::Level(22), None);
4449 assert_eq!(params.window_log, 27);
4450 assert_eq!(params.hc.hash_log, 25);
4451 assert_eq!(params.hc.chain_log, 27);
4452 assert_eq!(params.hc.search_depth, 1 << 9);
4453 assert_eq!(params.hc.target_len, 999);
4454}
4455
4456#[test]
4457fn level22_source_size_hint_uses_donor_btultra2_tiers() {
4458 let p16k = resolve_level_params(CompressionLevel::Level(22), Some(16 * 1024));
4459 assert_eq!(p16k.window_log, 14);
4460 assert_eq!(p16k.hc.hash_log, 15);
4461 assert_eq!(p16k.hc.chain_log, 15);
4462 assert_eq!(p16k.hc.search_depth, 1 << 10);
4463 assert_eq!(p16k.hc.target_len, 999);
4464
4465 let p128k = resolve_level_params(CompressionLevel::Level(22), Some(128 * 1024));
4466 assert_eq!(p128k.window_log, 17);
4467 assert_eq!(p128k.hc.hash_log, 17);
4468 assert_eq!(p128k.hc.chain_log, 18);
4469 assert_eq!(p128k.hc.search_depth, 1 << 11);
4470 assert_eq!(p128k.hc.target_len, 999);
4471
4472 let p256k = resolve_level_params(CompressionLevel::Level(22), Some(256 * 1024));
4473 assert_eq!(p256k.window_log, 18);
4474 assert_eq!(p256k.hc.hash_log, 19);
4475 assert_eq!(p256k.hc.chain_log, 19);
4476 assert_eq!(p256k.hc.search_depth, 1 << 13);
4477 assert_eq!(p256k.hc.target_len, 999);
4478}
4479
4480#[test]
4481fn level22_small_source_size_hint_matches_donor_cparams() {
4482 use zstd::zstd_safe::zstd_sys;
4483
4484 let source_size = 15_027u64;
4485 let donor = unsafe { zstd_sys::ZSTD_getCParams(22, source_size, 0) };
4486 let params = resolve_level_params(CompressionLevel::Level(22), Some(source_size));
4487
4488 assert_eq!(params.window_log as u32, donor.windowLog);
4489 assert_eq!(params.hc.chain_log as u32, donor.chainLog);
4490 assert_eq!(params.hc.hash_log as u32, donor.hashLog);
4491 assert_eq!(params.hc.search_depth as u32, 1u32 << donor.searchLog);
4492 assert_eq!(HC_OPT_MIN_MATCH_LEN as u32, donor.minMatch);
4493 assert_eq!(params.hc.target_len as u32, donor.targetLength);
4494}
4495
4496#[test]
4497fn level22_small_source_uses_window_bounded_hash3_log() {
4498 let mut hc = HcMatchGenerator::new(1 << 14);
4499 hc.configure(
4500 BTULTRA2_HC_CONFIG_L22_16K,
4501 super::strategy::StrategyTag::BtUltra2,
4502 14,
4503 );
4504 assert_eq!(hc.table.hash3_log, 14);
4505
4506 hc.configure(
4507 BTULTRA2_HC_CONFIG_L22,
4508 super::strategy::StrategyTag::BtUltra2,
4509 27,
4510 );
4511 assert_eq!(hc.table.hash3_log, HC3_HASH_LOG);
4512}
4513
4514#[test]
4515fn btultra2_seed_pass_initializes_opt_state() {
4516 let mut hc = HcMatchGenerator::new(1 << 20);
4517 hc.configure(
4518 BTULTRA2_HC_CONFIG,
4519 super::strategy::StrategyTag::BtUltra2,
4520 26,
4521 );
4522 let data: Vec<u8> = (0..32 * 1024).map(|i| (i % 251) as u8).collect();
4523 hc.table.add_data(data, |_| {});
4524 hc.start_matching(|_| {});
4525 assert!(
4526 hc.backend.bt_mut().opt_state.lit_length_sum > 0,
4527 "btultra2 first block should seed non-zero sequence statistics"
4528 );
4529 assert!(
4530 hc.backend.bt_mut().opt_state.off_code_sum > 0,
4531 "btultra2 first block should seed offset-code statistics"
4532 );
4533}
4534
4535#[test]
4536fn btultra2_profile_disables_small_offset_handicap() {
4537 let profile = HcOptimalCostProfile::const_for_strategy::<super::strategy::BtUltra2>();
4543 assert!(
4544 !profile.favor_small_offsets,
4545 "btultra2 should match donor opt2 offset pricing"
4546 );
4547 assert!(
4548 profile.accurate,
4549 "btultra2 should use donor opt2 accurate pricing"
4550 );
4551}
4552
4553#[test]
4554fn btultra_profile_keeps_donor_search_depth_budget() {
4555 let p = HcOptimalCostProfile::const_for_strategy::<super::strategy::BtUltra>();
4556 assert_eq!(
4557 p.max_chain_depth, 32,
4558 "btultra should not cap chain depth below donor opt2 search budget"
4559 );
4560}
4561
4562#[test]
4563fn btopt_profile_keeps_donor_search_depth_budget() {
4564 let p = HcOptimalCostProfile::const_for_strategy::<super::strategy::BtOpt>();
4565 assert_eq!(
4566 p.max_chain_depth, 32,
4567 "btopt should not cap chain depth below donor btopt search budget"
4568 );
4569}
4570
4571#[test]
4572fn sufficient_match_len_is_clamped_by_target_len() {
4573 let mut hc = HcMatchGenerator::new(1 << 20);
4574 hc.configure(
4575 BTULTRA2_HC_CONFIG,
4576 super::strategy::StrategyTag::BtUltra2,
4577 26,
4578 );
4579 hc.hc.target_len = 13;
4580 let profile = HcOptimalCostProfile::const_for_strategy::<super::strategy::BtUltra2>();
4581 assert_eq!(hc.hc.sufficient_match_len_for_pass(profile), 13);
4582}
4583
4584#[test]
4585fn opt_modes_use_target_len_as_sufficient_len() {
4586 use super::strategy;
4587 let mut hc = HcMatchGenerator::new(1 << 20);
4588 hc.hc.target_len = 57;
4589 let profiles = [
4590 HcOptimalCostProfile::const_for_strategy::<strategy::BtOpt>(),
4591 HcOptimalCostProfile::const_for_strategy::<strategy::BtUltra>(),
4592 HcOptimalCostProfile::const_for_strategy::<strategy::BtUltra2>(),
4593 ];
4594 for profile in profiles {
4595 assert_eq!(hc.hc.sufficient_match_len_for_pass(profile), 57);
4596 }
4597}
4598
4599#[test]
4600fn sufficient_match_len_is_capped_by_opt_num() {
4601 let mut hc = HcMatchGenerator::new(1 << 20);
4602 hc.hc.target_len = usize::MAX / 2;
4603 let profile = HcOptimalCostProfile::const_for_strategy::<super::strategy::BtUltra2>();
4604 assert_eq!(hc.hc.sufficient_match_len_for_pass(profile), HC_OPT_NUM - 1);
4605}
4606
4607#[test]
4608#[allow(clippy::borrow_deref_ref)]
4609fn dictionary_entropy_seed_initializes_opt_state_from_tables() {
4610 let mut hc = HcMatchGenerator::new(1 << 20);
4611 hc.configure(
4612 BTULTRA2_HC_CONFIG,
4613 super::strategy::StrategyTag::BtUltra2,
4614 26,
4615 );
4616
4617 let huff = crate::huff0::huff0_encoder::HuffmanTable::build_from_data(
4618 b"aaabbbbccccddddeeeeefffffgggg",
4619 );
4620 let ll = crate::fse::fse_encoder::default_ll_table();
4621 let ml = crate::fse::fse_encoder::default_ml_table();
4622 let of = crate::fse::fse_encoder::default_of_table();
4623 hc.seed_dictionary_entropy(Some(&huff), Some(&*ll), Some(&*ml), Some(&*of));
4624
4625 hc.backend.bt_mut().opt_state.rescale_freqs(
4626 b"abcd",
4627 HcOptimalCostProfile::const_for_strategy::<super::strategy::BtUltra2>(),
4628 );
4629
4630 let base_ll_freqs: [u32; HC_MAX_LL + 1] = [
4631 4, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4632 1, 1, 1, 1, 1, 1,
4633 ];
4634
4635 assert_ne!(
4636 hc.backend.bt_mut().opt_state.lit_length_freq,
4637 base_ll_freqs,
4638 "dictionary entropy should override fallback LL bootstrap frequencies"
4639 );
4640 assert!(
4641 hc.backend
4642 .bt_mut()
4643 .opt_state
4644 .match_length_freq
4645 .iter()
4646 .any(|&v| v != 1),
4647 "dictionary entropy should seed non-uniform ML frequencies"
4648 );
4649 assert_ne!(
4650 hc.backend.bt_mut().opt_state.off_code_freq[0],
4651 6,
4652 "dictionary entropy should override fallback OF bootstrap frequencies"
4653 );
4654}
4655
4656#[test]
4657#[allow(clippy::borrow_deref_ref)]
4658fn dictionary_fse_seed_applies_without_huffman_seed() {
4659 let mut hc = HcMatchGenerator::new(1 << 20);
4660 hc.configure(
4661 BTULTRA2_HC_CONFIG,
4662 super::strategy::StrategyTag::BtUltra2,
4663 26,
4664 );
4665
4666 let ll = crate::fse::fse_encoder::default_ll_table();
4667 let ml = crate::fse::fse_encoder::default_ml_table();
4668 let of = crate::fse::fse_encoder::default_of_table();
4669 hc.seed_dictionary_entropy(None, Some(&*ll), Some(&*ml), Some(&*of));
4670 hc.backend.bt_mut().opt_state.rescale_freqs(
4671 b"abcd",
4672 HcOptimalCostProfile::const_for_strategy::<super::strategy::BtUltra2>(),
4673 );
4674
4675 let base_ll_freqs: [u32; HC_MAX_LL + 1] = [
4676 4, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4677 1, 1, 1, 1, 1, 1,
4678 ];
4679 assert_ne!(
4680 hc.backend.bt_mut().opt_state.lit_length_freq,
4681 base_ll_freqs,
4682 "FSE seed should still override LL bootstrap frequencies without huffman seed"
4683 );
4684 assert!(
4685 hc.backend
4686 .bt_mut()
4687 .opt_state
4688 .match_length_freq
4689 .iter()
4690 .any(|&v| v != 1),
4691 "FSE seed should still seed non-uniform ML frequencies"
4692 );
4693 assert_ne!(
4694 hc.backend.bt_mut().opt_state.off_code_freq[0],
4695 6,
4696 "FSE seed should still override OF bootstrap frequencies without huffman seed"
4697 );
4698}
4699
4700#[test]
4701#[allow(clippy::borrow_deref_ref)]
4702fn dictionary_seed_overrides_predef_price_mode_on_tiny_input() {
4703 let mut hc = HcMatchGenerator::new(1 << 20);
4704 hc.configure(
4705 BTULTRA2_HC_CONFIG,
4706 super::strategy::StrategyTag::BtUltra2,
4707 26,
4708 );
4709
4710 let ll = crate::fse::fse_encoder::default_ll_table();
4711 let ml = crate::fse::fse_encoder::default_ml_table();
4712 let of = crate::fse::fse_encoder::default_of_table();
4713 hc.seed_dictionary_entropy(None, Some(&*ll), Some(&*ml), Some(&*of));
4714 hc.backend.bt_mut().opt_state.rescale_freqs(
4715 b"abc",
4716 HcOptimalCostProfile::const_for_strategy::<super::strategy::BtUltra2>(),
4717 );
4718 assert!(
4719 matches!(
4720 hc.backend.bt_mut().opt_state.price_type,
4721 HcOptPriceType::Dynamic
4722 ),
4723 "dictionary-seeded first block should stay in dynamic mode even for tiny src"
4724 );
4725}
4726
4727#[test]
4728fn lit_length_price_blocksize_max_costs_one_extra_bit() {
4729 let profile_predef = HcOptimalCostProfile::const_for_strategy::<super::strategy::BtUltra2>();
4730 let mut stats_predef = HcOptState::new();
4731 stats_predef.price_type = HcOptPriceType::Predefined;
4732 let predef_max = profile_predef.lit_length_price(&stats_predef, HC_BLOCKSIZE_MAX);
4733 let predef_prev =
4734 profile_predef.lit_length_price(&stats_predef, HC_BLOCKSIZE_MAX.saturating_sub(1));
4735 assert_eq!(
4736 predef_max,
4737 predef_prev + HC_BITCOST_MULTIPLIER,
4738 "predefined litLength pricing at BLOCKSIZE_MAX must add exactly one bit"
4739 );
4740
4741 let profile_dyn = HcOptimalCostProfile::const_for_strategy::<super::strategy::BtUltra2>();
4742 let mut stats_dyn = HcOptState::new();
4743 stats_dyn.price_type = HcOptPriceType::Dynamic;
4744 stats_dyn.lit_length_freq.fill(1);
4745 stats_dyn.lit_length_sum = (HC_MAX_LL + 1) as u32;
4746 stats_dyn.match_length_freq.fill(1);
4747 stats_dyn.match_length_sum = (HC_MAX_ML + 1) as u32;
4748 stats_dyn.off_code_freq.fill(1);
4749 stats_dyn.off_code_sum = (HC_MAX_OFF + 1) as u32;
4750 stats_dyn.lit_freq.fill(1);
4751 stats_dyn.lit_sum = (HC_MAX_LIT + 1) as u32;
4752 stats_dyn.set_base_prices(true);
4753 let dyn_max = profile_dyn.lit_length_price(&stats_dyn, HC_BLOCKSIZE_MAX);
4754 let dyn_prev = profile_dyn.lit_length_price(&stats_dyn, HC_BLOCKSIZE_MAX.saturating_sub(1));
4755 assert_eq!(
4756 dyn_max,
4757 dyn_prev + HC_BITCOST_MULTIPLIER,
4758 "dynamic litLength pricing at BLOCKSIZE_MAX must add exactly one bit"
4759 );
4760}
4761
4762#[test]
4763#[allow(clippy::borrow_deref_ref)]
4764fn btultra2_seed_pass_disabled_when_dictionary_entropy_seed_present() {
4765 let mut hc = HcMatchGenerator::new(1 << 20);
4766 hc.configure(
4767 BTULTRA2_HC_CONFIG,
4768 super::strategy::StrategyTag::BtUltra2,
4769 26,
4770 );
4771 let ll = crate::fse::fse_encoder::default_ll_table();
4772 let ml = crate::fse::fse_encoder::default_ml_table();
4773 let of = crate::fse::fse_encoder::default_of_table();
4774 hc.seed_dictionary_entropy(None, Some(&*ll), Some(&*ml), Some(&*of));
4775 assert!(
4776 !hc.should_run_btultra2_seed_pass::<super::strategy::BtUltra2>(HC_PREDEF_THRESHOLD + 1),
4777 "dictionary-seeded first block should skip btultra2 warmup pass"
4778 );
4779}
4780
4781#[test]
4782fn btultra2_seed_pass_disabled_when_prefix_history_exists() {
4783 let mut hc = HcMatchGenerator::new(1 << 20);
4784 hc.configure(
4785 BTULTRA2_HC_CONFIG,
4786 super::strategy::StrategyTag::BtUltra2,
4787 26,
4788 );
4789 hc.table.history_abs_start = 17;
4790 hc.table.window.push_back(b"abcdefghijklmnop".to_vec());
4791 assert!(
4792 !hc.should_run_btultra2_seed_pass::<super::strategy::BtUltra2>(HC_PREDEF_THRESHOLD + 9),
4793 "btultra2 warmup must be first-block only (no prefix history)"
4794 );
4795}
4796
4797#[test]
4798fn btultra2_seed_pass_disabled_for_tiny_block() {
4799 let mut hc = HcMatchGenerator::new(1 << 20);
4800 hc.configure(
4801 BTULTRA2_HC_CONFIG,
4802 super::strategy::StrategyTag::BtUltra2,
4803 26,
4804 );
4805 assert!(
4806 !hc.should_run_btultra2_seed_pass::<super::strategy::BtUltra2>(HC_PREDEF_THRESHOLD),
4807 "btultra2 warmup should not run at or below predefined threshold"
4808 );
4809}
4810
4811#[test]
4812fn btultra2_seed_pass_disabled_after_stats_initialized() {
4813 let mut hc = HcMatchGenerator::new(1 << 20);
4814 hc.configure(
4815 BTULTRA2_HC_CONFIG,
4816 super::strategy::StrategyTag::BtUltra2,
4817 26,
4818 );
4819 hc.backend.bt_mut().opt_state.lit_length_sum = 1;
4820 assert!(
4821 !hc.should_run_btultra2_seed_pass::<super::strategy::BtUltra2>(HC_PREDEF_THRESHOLD + 32),
4822 "btultra2 warmup should run only for first block before stats are initialized"
4823 );
4824}
4825
4826#[test]
4827fn btultra2_seed_pass_disabled_when_not_at_frame_start() {
4828 let mut hc = HcMatchGenerator::new(1 << 20);
4829 hc.configure(
4830 BTULTRA2_HC_CONFIG,
4831 super::strategy::StrategyTag::BtUltra2,
4832 26,
4833 );
4834 hc.table.window_size = HC_PREDEF_THRESHOLD + 64;
4837 hc.table
4838 .window
4839 .push_back(alloc::vec![b'A'; HC_PREDEF_THRESHOLD + 32]);
4840 assert!(
4841 !hc.should_run_btultra2_seed_pass::<super::strategy::BtUltra2>(HC_PREDEF_THRESHOLD + 32),
4842 "btultra2 warmup must not run after frame start"
4843 );
4844}
4845
4846#[test]
4847fn btultra2_seed_pass_disabled_when_ldm_sequences_exist() {
4848 let mut hc = HcMatchGenerator::new(1 << 20);
4849 hc.configure(
4850 BTULTRA2_HC_CONFIG,
4851 super::strategy::StrategyTag::BtUltra2,
4852 26,
4853 );
4854 hc.table.window_size = HC_PREDEF_THRESHOLD + 64;
4855 hc.table
4856 .window
4857 .push_back(alloc::vec![b'A'; HC_PREDEF_THRESHOLD + 64]);
4858 hc.backend.bt_mut().ldm_sequences.push(HcRawSeq {
4859 lit_length: 8,
4860 offset: 16,
4861 match_length: 32,
4862 });
4863 assert!(
4864 !hc.should_run_btultra2_seed_pass::<super::strategy::BtUltra2>(HC_PREDEF_THRESHOLD + 32),
4865 "btultra2 warmup must not run when LDM already produced sequences"
4866 );
4867}
4868
4869#[test]
4870fn literal_price_uses_eight_bits_when_literals_uncompressed() {
4871 let profile = HcOptimalCostProfile::const_for_strategy::<super::strategy::BtUltra2>();
4872 let mut stats = HcOptState::new();
4873 stats.set_literals_compressed_for_tests(false);
4874 stats.price_type = HcOptPriceType::Predefined;
4875 assert_eq!(
4876 profile.literal_price(&stats, b'a'),
4877 8 * HC_BITCOST_MULTIPLIER,
4878 "uncompressed literals should cost 8 bits regardless of price mode"
4879 );
4880}
4881
4882#[test]
4883fn update_stats_skips_literal_frequencies_when_uncompressed() {
4884 let mut stats = HcOptState::new();
4885 stats.set_literals_compressed_for_tests(false);
4886 stats.update_stats(3, b"abc", 4, 8);
4887 assert_eq!(
4888 stats.lit_sum, 0,
4889 "literal sum must remain unchanged when literal compression is disabled"
4890 );
4891 assert_eq!(
4892 stats.lit_freq.iter().copied().sum::<u32>(),
4893 0,
4894 "literal frequencies must not be updated when literal compression is disabled"
4895 );
4896 assert_eq!(
4897 stats.lit_length_sum, 1,
4898 "literal-length stats still update for sequence modeling"
4899 );
4900 assert_eq!(
4901 stats.match_length_sum, 1,
4902 "match-length stats still update for sequence modeling"
4903 );
4904 assert_eq!(
4905 stats.off_code_sum, 1,
4906 "offset-code stats still update for sequence modeling"
4907 );
4908}
4909
4910#[test]
4911#[allow(clippy::borrow_deref_ref)]
4912fn dictionary_huffman_seed_ignored_when_literals_uncompressed() {
4913 let mut stats = HcOptState::new();
4914 stats.set_literals_compressed_for_tests(false);
4915 let huff = crate::huff0::huff0_encoder::HuffmanTable::build_from_data(
4916 b"aaaaabbbbcccddeeff00112233445566778899",
4917 );
4918 let ll = crate::fse::fse_encoder::default_ll_table();
4919 let ml = crate::fse::fse_encoder::default_ml_table();
4920 let of = crate::fse::fse_encoder::default_of_table();
4921 stats.seed_dictionary_entropy(Some(&huff), Some(&*ll), Some(&*ml), Some(&*of));
4922 stats.rescale_freqs(
4923 b"abcd",
4924 HcOptimalCostProfile::const_for_strategy::<super::strategy::BtUltra2>(),
4925 );
4926 assert_eq!(
4927 stats.lit_sum, 0,
4928 "literal sum must stay zero when literals are uncompressed"
4929 );
4930 assert_eq!(
4931 stats.lit_freq.iter().copied().sum::<u32>(),
4932 0,
4933 "literal frequencies must ignore dictionary huffman seed when uncompressed"
4934 );
4935}
4936
4937#[test]
4938fn hc_repcode_candidates_respect_litlen_dependent_rep_order() {
4939 let mut hc = HcMatchGenerator::new(64);
4940 hc.table.history = b"xxxxxxABCDEFABCDEF".to_vec();
4941 hc.table.history_start = 0;
4942 hc.table.history_abs_start = 0;
4943
4944 let abs_pos = 12usize; let current_abs_end = hc.table.history.len();
4946 let reps = [6u32, 3u32, 9u32];
4947
4948 let mut lit_pos_candidates = Vec::new();
4949 hc.hc.for_each_repcode_candidate_with_reps(
4950 &hc.table,
4951 abs_pos,
4952 1,
4953 reps,
4954 current_abs_end,
4955 HC_OPT_MIN_MATCH_LEN,
4956 |c| {
4957 lit_pos_candidates.push(c.offset);
4958 },
4959 );
4960 assert!(
4961 lit_pos_candidates.contains(&6),
4962 "when lit_len>0, rep0 should be considered and match"
4963 );
4964
4965 let mut ll0_candidates = Vec::new();
4966 hc.hc.for_each_repcode_candidate_with_reps(
4967 &hc.table,
4968 abs_pos,
4969 0,
4970 reps,
4971 current_abs_end,
4972 HC_OPT_MIN_MATCH_LEN,
4973 |c| {
4974 ll0_candidates.push(c.offset);
4975 },
4976 );
4977 assert!(
4978 !ll0_candidates.contains(&6),
4979 "when lit_len==0, rep0 is not directly eligible (ll0 semantics)"
4980 );
4981}
4982
4983#[test]
4984fn hc_collect_optimal_candidates_keeps_reps_when_chain_depth_zero() {
4985 let mut hc = HcMatchGenerator::new(64);
4986 hc.hc.search_depth = 0;
4987 hc.table.history = b"xyzxyzxyzxyz".to_vec();
4988 hc.table.history_start = 0;
4989 hc.table.history_abs_start = 0;
4990
4991 let abs_pos = 6usize;
4992 let current_abs_end = hc.table.history.len();
4993 let profile = HcOptimalCostProfile {
4994 max_chain_depth: 0,
4995 sufficient_match_len: usize::MAX / 2,
4996 accurate: false,
4997 favor_small_offsets: false,
4998 };
4999 let mut out = Vec::new();
5000 hc.collect_optimal_candidates(
5001 abs_pos,
5002 current_abs_end,
5003 profile,
5004 HcCandidateQuery {
5005 reps: [3, 6, 9],
5006 lit_len: 1,
5007 ldm_candidate: None,
5008 },
5009 &mut out,
5010 );
5011 assert!(
5012 !out.is_empty(),
5013 "rep candidates should remain available even when chain depth is zero"
5014 );
5015 assert!(
5016 out.iter().any(|c| c.offset == 3),
5017 "rep0 candidate should be retained"
5018 );
5019}
5020
5021#[test]
5022fn hc_collect_optimal_candidates_rep_tail_match_skips_chain_probe() {
5023 let mut hc = HcMatchGenerator::new(64);
5024 hc.table.history = b"aaaaaaaaaa".to_vec();
5025 hc.table.history_start = 0;
5026 hc.table.history_abs_start = 0;
5027 hc.table.position_base = 0;
5028 hc.hc.search_depth = 32;
5029 let abs_pos = 6usize;
5030 hc.table.ensure_tables();
5031 hc.table.insert_positions(0, abs_pos);
5032
5033 let profile = HcOptimalCostProfile {
5034 max_chain_depth: 32,
5035 sufficient_match_len: usize::MAX / 2,
5036 accurate: true,
5037 favor_small_offsets: false,
5038 };
5039 let mut out = Vec::new();
5040 hc.collect_optimal_candidates(
5041 abs_pos,
5042 hc.table.history.len(),
5043 profile,
5044 HcCandidateQuery {
5045 reps: [1, 4, 8],
5046 lit_len: 1,
5047 ldm_candidate: None,
5048 },
5049 &mut out,
5050 );
5051
5052 assert!(
5053 out.iter()
5054 .all(|candidate| matches!(candidate.offset, 1 | 4)),
5055 "terminal rep match should return before chain probing adds non-rep offsets"
5056 );
5057}
5058
5059#[test]
5060fn hc_collect_optimal_candidates_long_chain_match_advances_skip_window() {
5061 let mut hc = HcMatchGenerator::new(128);
5062 hc.table.history = b"abcabcabcabcabcabcabcabc".to_vec();
5063 hc.table.history_start = 0;
5064 hc.table.history_abs_start = 0;
5065 hc.table.position_base = 0;
5066 hc.hc.search_depth = 32;
5067 let abs_pos = 9usize;
5068 hc.table.ensure_tables();
5069 hc.table.insert_positions(0, abs_pos);
5070 hc.table.skip_insert_until_abs = 0;
5071
5072 let profile = HcOptimalCostProfile {
5073 max_chain_depth: 32,
5074 sufficient_match_len: usize::MAX / 2,
5075 accurate: true,
5076 favor_small_offsets: false,
5077 };
5078 let mut out = Vec::new();
5079 hc.collect_optimal_candidates(
5080 abs_pos,
5081 hc.table.history.len(),
5082 profile,
5083 HcCandidateQuery {
5084 reps: [1, 4, 8],
5085 lit_len: 1,
5086 ldm_candidate: None,
5087 },
5088 &mut out,
5089 );
5090
5091 assert!(
5092 hc.table.skip_insert_until_abs > abs_pos,
5093 "long chain match should advance skip window to avoid redundant immediate insertions"
5094 );
5095}
5096
5097#[test]
5098fn hc_collect_optimal_candidates_chain_fast_skip_uses_match_end_minus_8() {
5099 let mut hc = HcMatchGenerator::new(128);
5100 hc.table.history = b"abcabcabcabcabcabcabcabc".to_vec();
5101 hc.table.history_start = 0;
5102 hc.table.history_abs_start = 0;
5103 hc.table.position_base = 0;
5104 hc.hc.search_depth = 32;
5105 let abs_pos = 9usize;
5106 hc.table.ensure_tables();
5107 hc.table.insert_positions(0, abs_pos);
5108 hc.table.skip_insert_until_abs = 0;
5109
5110 let profile = HcOptimalCostProfile {
5111 max_chain_depth: 32,
5112 sufficient_match_len: 10,
5113 accurate: true,
5114 favor_small_offsets: false,
5115 };
5116 let mut out = Vec::new();
5117 hc.collect_optimal_candidates(
5118 abs_pos,
5119 hc.table.history.len(),
5120 profile,
5121 HcCandidateQuery {
5122 reps: [1, 4, 8],
5123 lit_len: 1,
5124 ldm_candidate: None,
5125 },
5126 &mut out,
5127 );
5128
5129 let best_match_end = out
5130 .iter()
5131 .map(|candidate| candidate.start.saturating_add(candidate.match_len))
5132 .max()
5133 .expect("expected at least one candidate");
5134 assert!(
5135 hc.table.skip_insert_until_abs > abs_pos,
5136 "chain fast-skip must advance past current position"
5137 );
5138 assert!(
5139 hc.table.skip_insert_until_abs <= best_match_end.saturating_sub(8),
5140 "chain fast-skip must not exceed donor-style matchEndIdx - 8 bound"
5141 );
5142}
5143
5144#[test]
5145fn hc_collect_optimal_candidates_advances_skip_window_on_plain_bt_path() {
5146 let mut hc = HcMatchGenerator::new(256);
5147 hc.table.history = b"abcdefghijklmnop".to_vec();
5148 hc.table.history_start = 0;
5149 hc.table.history_abs_start = 0;
5150 hc.table.position_base = 0;
5151 hc.hc.search_depth = 0;
5152 hc.table.ensure_tables();
5153
5154 let abs_pos = 8usize;
5155 hc.table.skip_insert_until_abs = 0;
5156
5157 let profile = HcOptimalCostProfile {
5158 max_chain_depth: 0,
5159 sufficient_match_len: usize::MAX / 2,
5160 accurate: true,
5161 favor_small_offsets: false,
5162 };
5163 let mut out = Vec::new();
5164 hc.collect_optimal_candidates(
5165 abs_pos,
5166 hc.table.history.len(),
5167 profile,
5168 HcCandidateQuery {
5169 reps: [1, 4, 8],
5170 lit_len: 1,
5171 ldm_candidate: None,
5172 },
5173 &mut out,
5174 );
5175
5176 assert_eq!(
5177 hc.table.skip_insert_until_abs,
5178 abs_pos.saturating_add(1),
5179 "plain BT path should advance skip window by 1 via donor matchEndIdx baseline"
5180 );
5181}
5182
5183#[test]
5196fn hc_ldm_candidates_are_merged_into_optimal_candidates() {
5197 let mut hc = HcMatchGenerator::new(512);
5198 hc.table.history = (0..256).map(|i| (i % 251) as u8).collect();
5199 hc.table.history_start = 0;
5200 hc.table.history_abs_start = 0;
5201
5202 let abs_pos = 128usize;
5203 let current_abs_end = 256usize;
5204 let ldm = MatchCandidate {
5205 start: abs_pos,
5206 offset: 96,
5207 match_len: 40,
5208 };
5209
5210 let profile = HcOptimalCostProfile {
5211 max_chain_depth: 0,
5212 sufficient_match_len: usize::MAX / 2,
5213 accurate: true,
5214 favor_small_offsets: false,
5215 };
5216 let mut out = Vec::new();
5217 hc.collect_optimal_candidates(
5218 abs_pos,
5219 current_abs_end,
5220 profile,
5221 HcCandidateQuery {
5222 reps: [1, 4, 8],
5223 lit_len: 1,
5224 ldm_candidate: Some(ldm),
5225 },
5226 &mut out,
5227 );
5228 assert!(
5229 out.iter().any(
5230 |candidate| candidate.offset == ldm.offset && candidate.match_len == ldm.match_len
5231 ),
5232 "LDM candidate should be present in optimal candidate set"
5233 );
5234}
5235
5236#[test]
5237fn btultra_and_btultra2_both_keep_dictionary_candidates() {
5238 use super::strategy::StrategyTag;
5246
5247 let test_config = HcConfig {
5248 hash_log: 23,
5249 chain_log: 22,
5250 search_depth: 32,
5251 target_len: 256,
5252 };
5253 let window_log = 20u8;
5254
5255 let prepare_history = |hc: &mut HcMatchGenerator, abs_pos: usize| {
5256 hc.table.history = alloc::vec![0u8; 160];
5257 for i in 0..64 {
5258 hc.table.history[i] = b'a' + (i % 7) as u8;
5259 }
5260 for i in 64..160 {
5261 hc.table.history[i] = b'k' + (i % 5) as u8;
5262 }
5263 for i in 0..24 {
5264 hc.table.history[abs_pos + i] = hc.table.history[16 + i];
5265 }
5266 hc.table.history_start = 0;
5267 hc.table.history_abs_start = 0;
5268 hc.table.position_base = 0;
5269 hc.table.ensure_tables();
5270 hc.table.insert_positions(0, abs_pos);
5271 hc.table.dictionary_limit_abs = Some(64);
5272 hc.table.skip_insert_until_abs = 0;
5273 };
5274
5275 let profile = HcOptimalCostProfile {
5276 max_chain_depth: 32,
5277 sufficient_match_len: usize::MAX / 2,
5278 accurate: true,
5279 favor_small_offsets: false,
5280 };
5281 let abs_pos = 96usize;
5282 let mut out = Vec::new();
5283
5284 let mut hc = HcMatchGenerator::new(256);
5285 hc.configure(test_config, StrategyTag::BtUltra2, window_log);
5286 prepare_history(&mut hc, abs_pos);
5287 hc.collect_optimal_candidates(
5288 abs_pos,
5289 160,
5290 profile,
5291 HcCandidateQuery {
5292 reps: [1, 4, 8],
5293 lit_len: 1,
5294 ldm_candidate: None,
5295 },
5296 &mut out,
5297 );
5298 assert!(
5299 out.iter().any(|candidate| candidate.offset >= 32),
5300 "btultra2 should retain dictionary candidates on donor-parity path"
5301 );
5302
5303 let mut hc = HcMatchGenerator::new(256);
5304 hc.configure(test_config, StrategyTag::BtUltra, window_log);
5305 prepare_history(&mut hc, abs_pos);
5306 hc.collect_optimal_candidates(
5307 abs_pos,
5308 160,
5309 profile,
5310 HcCandidateQuery {
5311 reps: [1, 4, 8],
5312 lit_len: 1,
5313 ldm_candidate: None,
5314 },
5315 &mut out,
5316 );
5317 assert!(
5318 out.iter().any(|candidate| candidate.offset >= 32),
5319 "btultra should retain dictionary candidates"
5320 );
5321}
5322
5323#[test]
5324fn driver_small_source_hint_shrinks_dfast_hash_tables() {
5325 let mut driver = MatchGeneratorDriver::new(32, 2);
5326
5327 driver.reset(CompressionLevel::Level(2));
5328 let mut space = driver.get_next_space();
5329 space[..12].copy_from_slice(b"abcabcabcabc");
5330 space.truncate(12);
5331 driver.commit_space(space);
5332 driver.skip_matching_with_hint(None);
5333 let full_long = driver.dfast_matcher().long_hash.len();
5336 let full_short = driver.dfast_matcher().short_hash.len();
5337 assert_eq!(full_long, 1 << DFAST_HASH_BITS);
5338 assert_eq!(
5339 full_short,
5340 1 << (DFAST_HASH_BITS - DFAST_SHORT_HASH_BITS_DELTA)
5341 );
5342
5343 driver.set_source_size_hint(1024);
5344 driver.reset(CompressionLevel::Level(2));
5345 let mut space = driver.get_next_space();
5346 space[..12].copy_from_slice(b"xyzxyzxyzxyz");
5347 space.truncate(12);
5348 driver.commit_space(space);
5349 driver.skip_matching_with_hint(None);
5350 let hinted_long = driver.dfast_matcher().long_hash.len();
5351 let hinted_short = driver.dfast_matcher().short_hash.len();
5352
5353 assert_eq!(driver.window_size(), 1 << MIN_HINTED_WINDOW_LOG);
5354 assert_eq!(hinted_long, 1 << MIN_HINTED_WINDOW_LOG);
5361 let expected_hinted_short_bits = (MIN_HINTED_WINDOW_LOG as usize)
5362 .saturating_sub(DFAST_SHORT_HASH_BITS_DELTA)
5363 .max(MIN_WINDOW_LOG as usize);
5364 assert_eq!(
5365 hinted_short,
5366 1 << expected_hinted_short_bits,
5367 "short table must sit one DFAST_SHORT_HASH_BITS_DELTA below the long table \
5368 (clamped at MIN_WINDOW_LOG) — a regression that pulls it up to the long-table \
5369 floor would still satisfy the `< full_short` bound below and slip through"
5370 );
5371 assert!(
5372 hinted_long < full_long && hinted_short < full_short,
5373 "tiny source hint should reduce both dfast tables"
5374 );
5375}
5376
5377#[test]
5378fn driver_small_source_hint_shrinks_row_hash_tables() {
5379 let mut driver = MatchGeneratorDriver::new(32, 2);
5380
5381 driver.reset(CompressionLevel::Level(4));
5382 let mut space = driver.get_next_space();
5383 space[..12].copy_from_slice(b"abcabcabcabc");
5384 space.truncate(12);
5385 driver.commit_space(space);
5386 driver.skip_matching_with_hint(None);
5387 let full_rows = driver.row_matcher().row_heads.len();
5388 assert_eq!(full_rows, 1 << (ROW_HASH_BITS - ROW_LOG));
5389
5390 driver.set_source_size_hint(1024);
5391 driver.reset(CompressionLevel::Level(4));
5392 let mut space = driver.get_next_space();
5393 space[..12].copy_from_slice(b"xyzxyzxyzxyz");
5394 space.truncate(12);
5395 driver.commit_space(space);
5396 driver.skip_matching_with_hint(None);
5397 let hinted_rows = driver.row_matcher().row_heads.len();
5398
5399 assert_eq!(driver.window_size(), 1 << MIN_HINTED_WINDOW_LOG);
5400 assert_eq!(
5401 hinted_rows,
5402 1 << ((MIN_HINTED_WINDOW_LOG as usize) - ROW_LOG)
5403 );
5404 assert!(
5405 hinted_rows < full_rows,
5406 "tiny source hint should reduce row hash table footprint"
5407 );
5408}
5409
5410#[test]
5411fn row_matches_roundtrip_multi_block_pattern() {
5412 let pattern = [7, 13, 44, 184, 19, 96, 171, 109, 141, 251];
5413 let first_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
5414 let second_block: Vec<u8> = pattern.iter().copied().cycle().take(128 * 1024).collect();
5415
5416 let mut matcher = RowMatchGenerator::new(1 << 22);
5417 matcher.configure(ROW_CONFIG);
5418 matcher.ensure_tables();
5419 let replay_sequence = |decoded: &mut Vec<u8>, seq: Sequence<'_>| match seq {
5420 Sequence::Literals { literals } => decoded.extend_from_slice(literals),
5421 Sequence::Triple {
5422 literals,
5423 offset,
5424 match_len,
5425 } => {
5426 decoded.extend_from_slice(literals);
5427 let start = decoded.len() - offset;
5428 for i in 0..match_len {
5429 let byte = decoded[start + i];
5430 decoded.push(byte);
5431 }
5432 }
5433 };
5434
5435 matcher.add_data(first_block.clone(), |_| {});
5436 let mut history = Vec::new();
5437 matcher.start_matching(|seq| replay_sequence(&mut history, seq));
5438 assert_eq!(history, first_block);
5439
5440 matcher.add_data(second_block.clone(), |_| {});
5441 let prefix_len = history.len();
5442 matcher.start_matching(|seq| replay_sequence(&mut history, seq));
5443
5444 assert_eq!(&history[prefix_len..], second_block.as_slice());
5445
5446 let third_block: Vec<u8> = (0u8..=255).collect();
5448 matcher.add_data(third_block.clone(), |_| {});
5449 let third_prefix = history.len();
5450 matcher.start_matching(|seq| replay_sequence(&mut history, seq));
5451 assert_eq!(&history[third_prefix..], third_block.as_slice());
5452}
5453
5454#[test]
5455fn row_short_block_emits_literals_only() {
5456 let mut matcher = RowMatchGenerator::new(1 << 22);
5457 matcher.configure(ROW_CONFIG);
5458
5459 matcher.add_data(b"abcde".to_vec(), |_| {});
5460
5461 let mut saw_triple = false;
5462 let mut reconstructed = Vec::new();
5463 matcher.start_matching(|seq| match seq {
5464 Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
5465 Sequence::Triple { .. } => saw_triple = true,
5466 });
5467
5468 assert!(
5469 !saw_triple,
5470 "row backend must not emit triples for short blocks"
5471 );
5472 assert_eq!(reconstructed, b"abcde");
5473
5474 saw_triple = false;
5476 matcher.add_data(b"abcdeabcde".to_vec(), |_| {});
5477 matcher.start_matching(|seq| {
5478 if let Sequence::Triple { .. } = seq {
5479 saw_triple = true;
5480 }
5481 });
5482 assert!(
5483 saw_triple,
5484 "row backend should emit triples on repeated data"
5485 );
5486}
5487
5488#[test]
5489fn row_pick_lazy_returns_best_when_lookahead_is_out_of_bounds() {
5490 let mut matcher = RowMatchGenerator::new(1 << 22);
5491 matcher.configure(ROW_CONFIG);
5492 matcher.add_data(b"abcabc".to_vec(), |_| {});
5493
5494 let best = MatchCandidate {
5495 start: 0,
5496 offset: 1,
5497 match_len: ROW_MIN_MATCH_LEN,
5498 };
5499 let picked = matcher
5500 .pick_lazy_match(0, 0, Some(best))
5501 .expect("best candidate must survive");
5502
5503 assert_eq!(picked.start, best.start);
5504 assert_eq!(picked.offset, best.offset);
5505 assert_eq!(picked.match_len, best.match_len);
5506}
5507
5508#[test]
5509fn row_backfills_previous_block_tail_for_cross_boundary_match() {
5510 let mut matcher = RowMatchGenerator::new(1 << 22);
5511 matcher.configure(ROW_CONFIG);
5512
5513 let mut first_block = alloc::vec![0xA5; 64];
5514 first_block.extend_from_slice(b"XYZ");
5515 let second_block = b"XYZXYZtail".to_vec();
5516
5517 let replay_sequence = |decoded: &mut Vec<u8>, seq: Sequence<'_>| match seq {
5518 Sequence::Literals { literals } => decoded.extend_from_slice(literals),
5519 Sequence::Triple {
5520 literals,
5521 offset,
5522 match_len,
5523 } => {
5524 decoded.extend_from_slice(literals);
5525 let start = decoded.len() - offset;
5526 for i in 0..match_len {
5527 let byte = decoded[start + i];
5528 decoded.push(byte);
5529 }
5530 }
5531 };
5532
5533 matcher.add_data(first_block.clone(), |_| {});
5534 let mut reconstructed = Vec::new();
5535 matcher.start_matching(|seq| replay_sequence(&mut reconstructed, seq));
5536 assert_eq!(reconstructed, first_block);
5537
5538 matcher.add_data(second_block.clone(), |_| {});
5539 let mut saw_cross_boundary = false;
5540 let prefix_len = reconstructed.len();
5541 matcher.start_matching(|seq| {
5542 if let Sequence::Triple {
5543 literals,
5544 offset,
5545 match_len,
5546 } = seq
5547 && literals.is_empty()
5548 && offset == 3
5549 && match_len >= ROW_MIN_MATCH_LEN
5550 {
5551 saw_cross_boundary = true;
5552 }
5553 replay_sequence(&mut reconstructed, seq);
5554 });
5555
5556 assert!(
5557 saw_cross_boundary,
5558 "row matcher should reuse the 3-byte previous-block tail"
5559 );
5560 assert_eq!(&reconstructed[prefix_len..], second_block.as_slice());
5561}
5562
5563#[test]
5564fn row_skip_matching_with_incompressible_hint_uses_sparse_prefix() {
5565 let data = deterministic_high_entropy_bytes(0xA713_9C5D_44E2_10B1, 4096);
5566
5567 let mut dense = RowMatchGenerator::new(1 << 22);
5568 dense.configure(ROW_CONFIG);
5569 dense.add_data(data.clone(), |_| {});
5570 dense.skip_matching_with_hint(Some(false));
5571 let dense_slots = dense
5572 .row_positions
5573 .iter()
5574 .filter(|&&pos| pos != ROW_EMPTY_SLOT)
5575 .count();
5576
5577 let mut sparse = RowMatchGenerator::new(1 << 22);
5578 sparse.configure(ROW_CONFIG);
5579 sparse.add_data(data, |_| {});
5580 sparse.skip_matching_with_hint(Some(true));
5581 let sparse_slots = sparse
5582 .row_positions
5583 .iter()
5584 .filter(|&&pos| pos != ROW_EMPTY_SLOT)
5585 .count();
5586
5587 assert!(
5588 sparse_slots < dense_slots,
5589 "incompressible hint should seed fewer row slots (sparse={sparse_slots}, dense={dense_slots})"
5590 );
5591}
5592
5593#[test]
5594fn driver_unhinted_level2_keeps_default_dfast_hash_table_size() {
5595 let mut driver = MatchGeneratorDriver::new(32, 2);
5596
5597 driver.reset(CompressionLevel::Level(2));
5598 let mut space = driver.get_next_space();
5599 space[..12].copy_from_slice(b"abcabcabcabc");
5600 space.truncate(12);
5601 driver.commit_space(space);
5602 driver.skip_matching_with_hint(None);
5603
5604 let long_len = driver.dfast_matcher().long_hash.len();
5608 let short_len = driver.dfast_matcher().short_hash.len();
5609 assert_eq!(
5610 long_len,
5611 1 << DFAST_HASH_BITS,
5612 "unhinted Level(2) should keep default long-hash table size"
5613 );
5614 assert_eq!(
5615 short_len,
5616 1 << (DFAST_HASH_BITS - DFAST_SHORT_HASH_BITS_DELTA),
5617 "unhinted Level(2) short-hash should be one bit smaller than long-hash"
5618 );
5619}
5620
5621#[test]
5622fn simple_backend_rejects_undersized_pooled_suffix_store() {
5623 let mut driver = MatchGeneratorDriver::new(128 * 1024, 2);
5624 driver.reset(CompressionLevel::Fastest);
5625
5626 driver.suffix_pool.push(SuffixStore::with_capacity(1024));
5627
5628 let mut space = driver.get_next_space();
5629 space.clear();
5630 space.resize(4096, 0xAB);
5631 driver.commit_space(space);
5632
5633 let last_suffix_slots = driver
5634 .simple()
5635 .window
5636 .last()
5637 .expect("window entry must exist after commit")
5638 .suffixes
5639 .slots
5640 .len();
5641 assert!(
5642 last_suffix_slots >= 4096,
5643 "undersized pooled suffix store must not be reused for larger blocks"
5644 );
5645}
5646
5647#[test]
5648fn source_hint_clamps_driver_slice_size_to_window() {
5649 let mut driver = MatchGeneratorDriver::new(128 * 1024, 2);
5650 driver.set_source_size_hint(1024);
5651 driver.reset(CompressionLevel::Default);
5652
5653 let window = driver.window_size() as usize;
5654 assert_eq!(window, 1 << MIN_HINTED_WINDOW_LOG);
5655 assert_eq!(driver.slice_size, window);
5656
5657 let space = driver.get_next_space();
5658 assert_eq!(space.len(), window);
5659 driver.commit_space(space);
5660}
5661
5662#[test]
5663fn pooled_space_keeps_capacity_when_slice_size_shrinks() {
5664 let mut driver = MatchGeneratorDriver::new(128 * 1024, 2);
5665 driver.reset(CompressionLevel::Default);
5666
5667 let large = driver.get_next_space();
5668 let large_capacity = large.capacity();
5669 assert!(large_capacity >= 128 * 1024);
5670 driver.commit_space(large);
5671
5672 driver.set_source_size_hint(1024);
5673 driver.reset(CompressionLevel::Default);
5674
5675 let small = driver.get_next_space();
5676 assert_eq!(small.len(), 1 << MIN_HINTED_WINDOW_LOG);
5677 assert!(
5678 small.capacity() >= large_capacity,
5679 "pooled buffer capacity should be preserved to avoid shrink/grow churn"
5680 );
5681}
5682
5683#[test]
5684fn driver_best_to_fastest_releases_oversized_hc_tables() {
5685 let mut driver = MatchGeneratorDriver::new(32, 2);
5686
5687 driver.reset(CompressionLevel::Best);
5689 assert_eq!(driver.window_size(), (1u64 << 24));
5690
5691 let mut space = driver.get_next_space();
5693 space[..12].copy_from_slice(b"abcabcabcabc");
5694 space.truncate(12);
5695 driver.commit_space(space);
5696 driver.skip_matching_with_hint(None);
5697
5698 driver.reset(CompressionLevel::Fastest);
5713 assert_eq!(driver.window_size(), (1u64 << 19));
5714 assert_eq!(driver.active_backend(), super::strategy::BackendTag::Simple);
5715}
5716
5717#[test]
5718fn driver_better_to_best_resizes_hc_tables() {
5719 let mut driver = MatchGeneratorDriver::new(32, 2);
5720
5721 driver.reset(CompressionLevel::Better);
5723 assert_eq!(driver.window_size(), (1u64 << 23));
5724
5725 let mut space = driver.get_next_space();
5726 space[..12].copy_from_slice(b"abcabcabcabc");
5727 space.truncate(12);
5728 driver.commit_space(space);
5729 driver.skip_matching_with_hint(None);
5730
5731 let hc = driver.hc_matcher();
5732 let better_hash_len = hc.table.hash_table.len();
5733 let better_chain_len = hc.table.chain_table.len();
5734
5735 driver.reset(CompressionLevel::Best);
5737 assert_eq!(driver.window_size(), (1u64 << 24));
5738
5739 let mut space = driver.get_next_space();
5741 space[..12].copy_from_slice(b"xyzxyzxyzxyz");
5742 space.truncate(12);
5743 driver.commit_space(space);
5744 driver.skip_matching_with_hint(None);
5745
5746 let hc = driver.hc_matcher();
5747 assert!(
5748 hc.table.hash_table.len() > better_hash_len,
5749 "Best hash_table ({}) should be larger than Better ({})",
5750 hc.table.hash_table.len(),
5751 better_hash_len
5752 );
5753 assert!(
5754 hc.table.chain_table.len() > better_chain_len,
5755 "Best chain_table ({}) should be larger than Better ({})",
5756 hc.table.chain_table.len(),
5757 better_chain_len
5758 );
5759}
5760
5761#[test]
5762fn prime_with_dictionary_preserves_history_for_first_full_block() {
5763 let mut driver = MatchGeneratorDriver::new(8, 1);
5764 driver.reset(CompressionLevel::Fastest);
5765
5766 driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
5767
5768 let mut space = driver.get_next_space();
5769 space.clear();
5770 space.extend_from_slice(b"abcdefgh");
5771 driver.commit_space(space);
5772
5773 let mut saw_match = false;
5774 driver.start_matching(|seq| {
5775 if let Sequence::Triple {
5776 literals,
5777 offset,
5778 match_len,
5779 } = seq
5780 && literals.is_empty()
5781 && offset == 8
5782 && match_len >= MIN_MATCH_LEN
5783 {
5784 saw_match = true;
5785 }
5786 });
5787
5788 assert!(
5789 saw_match,
5790 "first full block should still match dictionary-primed history"
5791 );
5792}
5793
5794#[test]
5795fn prime_with_large_dictionary_preserves_early_history_until_first_block() {
5796 let mut driver = MatchGeneratorDriver::new(8, 1);
5797 driver.reset(CompressionLevel::Fastest);
5798
5799 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
5800
5801 let mut space = driver.get_next_space();
5802 space.clear();
5803 space.extend_from_slice(b"abcdefgh");
5804 driver.commit_space(space);
5805
5806 let mut saw_match = false;
5807 driver.start_matching(|seq| {
5808 if let Sequence::Triple {
5809 literals,
5810 offset,
5811 match_len,
5812 } = seq
5813 && literals.is_empty()
5814 && offset == 24
5815 && match_len >= MIN_MATCH_LEN
5816 {
5817 saw_match = true;
5818 }
5819 });
5820
5821 assert!(
5822 saw_match,
5823 "dictionary bytes should remain addressable until frame output exceeds the live window"
5824 );
5825}
5826
5827#[test]
5828fn prime_with_dictionary_applies_offset_history_even_when_content_is_empty() {
5829 let mut driver = MatchGeneratorDriver::new(8, 1);
5830 driver.reset(CompressionLevel::Fastest);
5831
5832 driver.prime_with_dictionary(&[], [11, 7, 3]);
5833
5834 assert_eq!(driver.simple_mut().offset_hist, [11, 7, 3]);
5835}
5836
5837#[test]
5838fn hc_prime_with_empty_dictionary_disables_btultra2_seed_pass() {
5839 let mut driver = MatchGeneratorDriver::new(8, 1);
5840 driver.reset(CompressionLevel::Better);
5841
5842 driver.prime_with_dictionary(&[], [11, 7, 3]);
5843
5844 assert_eq!(driver.hc_matcher().table.offset_hist, [11, 7, 3]);
5845 assert!(
5846 !driver
5847 .hc_matcher()
5848 .should_run_btultra2_seed_pass::<super::strategy::BtUltra2>(HC_PREDEF_THRESHOLD + 1),
5849 "btultra2 warmup must stay disabled after dictionary priming, even when dict content is empty"
5850 );
5851}
5852
5853#[test]
5854fn hc_prime_with_dictionary_disables_btultra2_seed_pass() {
5855 let mut driver = MatchGeneratorDriver::new(8, 1);
5856 driver.reset(CompressionLevel::Better);
5857
5858 driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
5859
5860 assert!(
5861 !driver
5862 .hc_matcher()
5863 .should_run_btultra2_seed_pass::<super::strategy::BtUltra2>(HC_PREDEF_THRESHOLD + 1),
5864 "btultra2 warmup must stay disabled after dictionary priming with content"
5865 );
5866}
5867
5868#[test]
5869fn dfast_prime_with_dictionary_preserves_history_for_first_full_block() {
5870 let mut driver = MatchGeneratorDriver::new(8, 1);
5871 driver.reset(CompressionLevel::Level(2));
5872
5873 driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
5874
5875 let mut space = driver.get_next_space();
5876 space.clear();
5877 space.extend_from_slice(b"abcdefgh");
5878 driver.commit_space(space);
5879
5880 let mut saw_match = false;
5881 driver.start_matching(|seq| {
5882 if let Sequence::Triple {
5883 literals,
5884 offset,
5885 match_len,
5886 } = seq
5887 && literals.is_empty()
5888 && offset == 8
5889 && match_len >= DFAST_MIN_MATCH_LEN
5890 {
5891 saw_match = true;
5892 }
5893 });
5894
5895 assert!(
5896 saw_match,
5897 "dfast backend should match dictionary-primed history in first full block"
5898 );
5899}
5900
5901#[test]
5902fn prime_with_dictionary_does_not_inflate_reported_window_size() {
5903 let mut driver = MatchGeneratorDriver::new(8, 1);
5904 driver.reset(CompressionLevel::Fastest);
5905
5906 let before = driver.window_size();
5907 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
5908 let after = driver.window_size();
5909
5910 assert_eq!(
5911 after, before,
5912 "dictionary retention budget must not change reported frame window size"
5913 );
5914}
5915
5916#[test]
5917fn prime_with_dictionary_does_not_reuse_tiny_suffix_store() {
5918 let mut driver = MatchGeneratorDriver::new(8, 2);
5919 driver.reset(CompressionLevel::Fastest);
5920
5921 driver.prime_with_dictionary(b"abcdefghi", [1, 4, 8]);
5924
5925 assert!(
5926 driver
5927 .simple()
5928 .window
5929 .iter()
5930 .all(|entry| entry.data.len() >= MIN_MATCH_LEN),
5931 "dictionary priming must not commit tails shorter than MIN_MATCH_LEN"
5932 );
5933}
5934
5935#[test]
5936fn prime_with_dictionary_counts_only_committed_tail_budget() {
5937 let mut driver = MatchGeneratorDriver::new(8, 1);
5938 driver.reset(CompressionLevel::Fastest);
5939
5940 let before = driver.simple_mut().max_window_size;
5941 driver.prime_with_dictionary(b"abcdefghi", [1, 4, 8]);
5943
5944 assert_eq!(
5945 driver.simple_mut().max_window_size,
5946 before + 8,
5947 "retention budget must account only for dictionary bytes actually committed to history"
5948 );
5949}
5950
5951#[test]
5952fn dfast_prime_with_dictionary_counts_four_byte_tail_budget() {
5953 let mut driver = MatchGeneratorDriver::new(8, 1);
5954 driver.reset(CompressionLevel::Level(2));
5955
5956 let before = driver.dfast_matcher().max_window_size;
5957 driver.prime_with_dictionary(b"abcdefghijkl", [1, 4, 8]);
5960
5961 assert_eq!(
5962 driver.dfast_matcher().max_window_size,
5963 before + 12,
5964 "dfast retention budget should include 4-byte dictionary tails"
5965 );
5966}
5967
5968#[test]
5969fn row_prime_with_dictionary_preserves_history_for_first_full_block() {
5970 let mut driver = MatchGeneratorDriver::new(8, 1);
5971 driver.reset(CompressionLevel::Level(4));
5972
5973 driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
5974
5975 let mut space = driver.get_next_space();
5976 space.clear();
5977 space.extend_from_slice(b"abcdefgh");
5978 driver.commit_space(space);
5979
5980 let mut saw_match = false;
5981 driver.start_matching(|seq| {
5982 if let Sequence::Triple {
5983 literals,
5984 offset,
5985 match_len,
5986 } = seq
5987 && literals.is_empty()
5988 && offset == 8
5989 && match_len >= ROW_MIN_MATCH_LEN
5990 {
5991 saw_match = true;
5992 }
5993 });
5994
5995 assert!(
5996 saw_match,
5997 "row backend should match dictionary-primed history in first full block"
5998 );
5999}
6000
6001#[test]
6002fn row_prime_with_dictionary_subtracts_uncommitted_tail_budget() {
6003 let mut driver = MatchGeneratorDriver::new(8, 1);
6004 driver.reset(CompressionLevel::Level(4));
6005
6006 let base_window = driver.row_matcher().max_window_size;
6007 driver.prime_with_dictionary(b"abcdefghi", [1, 4, 8]);
6010
6011 assert_eq!(
6012 driver.row_matcher().max_window_size,
6013 base_window + 8,
6014 "row retained window must exclude uncommitted 1-byte tail"
6015 );
6016}
6017
6018#[test]
6019fn prime_with_dictionary_budget_shrinks_after_row_eviction() {
6020 let mut driver = MatchGeneratorDriver::new(8, 1);
6021 driver.reset(CompressionLevel::Level(4));
6022 driver.row_matcher_mut().max_window_size = 8;
6024 driver.reported_window_size = 8;
6025
6026 let base_window = driver.row_matcher().max_window_size;
6027 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
6028 assert_eq!(driver.row_matcher().max_window_size, base_window + 24);
6029
6030 for block in [b"AAAAAAAA", b"BBBBBBBB"] {
6031 let mut space = driver.get_next_space();
6032 space.clear();
6033 space.extend_from_slice(block);
6034 driver.commit_space(space);
6035 driver.skip_matching_with_hint(None);
6036 }
6037
6038 assert_eq!(
6039 driver.dictionary_retained_budget, 0,
6040 "dictionary budget should be fully retired once primed dict slices are evicted"
6041 );
6042 assert_eq!(
6043 driver.row_matcher().max_window_size,
6044 base_window,
6045 "retired dictionary budget must not remain reusable for live history"
6046 );
6047}
6048
6049#[test]
6059fn row_get_last_space_then_reset_to_fastest_drops_row_variant() {
6060 let mut driver = MatchGeneratorDriver::new(8, 1);
6061 driver.reset(CompressionLevel::Level(4));
6062 assert_eq!(driver.active_backend(), super::strategy::BackendTag::Row);
6063
6064 let mut space = driver.get_next_space();
6065 space.clear();
6066 space.extend_from_slice(b"row-data");
6067 driver.commit_space(space);
6068
6069 assert_eq!(driver.get_last_space(), b"row-data");
6070
6071 driver.reset(CompressionLevel::Fastest);
6072 assert_eq!(driver.active_backend(), super::strategy::BackendTag::Simple);
6073}
6074
6075#[test]
6086fn driver_reset_from_row_backend_recycles_row_buffers_into_pool() {
6087 let mut driver = MatchGeneratorDriver::new(8, 1);
6088 driver.reset(CompressionLevel::Level(4));
6089 assert_eq!(driver.active_backend(), super::strategy::BackendTag::Row);
6090
6091 let mut space = driver.get_next_space();
6092 space.extend_from_slice(b"row-data-to-recycle");
6093 driver.commit_space(space);
6094
6095 let before_pool = driver.vec_pool.len();
6096 driver.reset(CompressionLevel::Fastest);
6097
6098 assert_eq!(driver.active_backend(), super::strategy::BackendTag::Simple);
6099 assert!(
6105 driver.vec_pool.len() > before_pool,
6106 "row reset must recycle the committed row history buffer into vec_pool \
6107 (before_pool = {before_pool}, after = {})",
6108 driver.vec_pool.len()
6109 );
6110}
6111
6112#[test]
6113fn adjust_params_for_zero_source_size_uses_min_hinted_window_floor() {
6114 let mut params = resolve_level_params(CompressionLevel::Level(4), None);
6115 params.window_log = 22;
6116 let adjusted = adjust_params_for_source_size(params, 0);
6117 assert_eq!(adjusted.window_log, MIN_HINTED_WINDOW_LOG);
6118}
6119
6120#[test]
6121fn common_prefix_len_matches_scalar_reference_across_offsets() {
6122 fn scalar_reference(a: &[u8], b: &[u8]) -> usize {
6123 a.iter()
6124 .zip(b.iter())
6125 .take_while(|(lhs, rhs)| lhs == rhs)
6126 .count()
6127 }
6128
6129 for total_len in [
6130 0usize, 1, 5, 15, 16, 17, 31, 32, 33, 64, 65, 127, 191, 257, 320,
6131 ] {
6132 let base: Vec<u8> = (0..total_len)
6133 .map(|i| ((i * 13 + 7) & 0xFF) as u8)
6134 .collect();
6135
6136 for start in [0usize, 1, 3] {
6137 if start > total_len {
6138 continue;
6139 }
6140 let a = &base[start..];
6141 let b = a.to_vec();
6142 assert_eq!(
6143 common_prefix_len(a, &b),
6144 scalar_reference(a, &b),
6145 "equal slices total_len={total_len} start={start}"
6146 );
6147
6148 let len = a.len();
6149 for mismatch in [0usize, 1, 7, 15, 16, 31, 32, 47, 63, 95, 127, 128, 129, 191] {
6150 if mismatch >= len {
6151 continue;
6152 }
6153 let mut altered = b.clone();
6154 altered[mismatch] ^= 0x5A;
6155 assert_eq!(
6156 common_prefix_len(a, &altered),
6157 scalar_reference(a, &altered),
6158 "total_len={total_len} start={start} mismatch={mismatch}"
6159 );
6160 }
6161
6162 if len > 0 {
6163 let mismatch = len - 1;
6164 let mut altered = b.clone();
6165 altered[mismatch] ^= 0xA5;
6166 assert_eq!(
6167 common_prefix_len(a, &altered),
6168 scalar_reference(a, &altered),
6169 "tail mismatch total_len={total_len} start={start} mismatch={mismatch}"
6170 );
6171 }
6172 }
6173 }
6174
6175 let long = alloc::vec![0xAB; 320];
6176 let shorter = alloc::vec![0xAB; 137];
6177 assert_eq!(
6178 common_prefix_len(&long, &shorter),
6179 scalar_reference(&long, &shorter)
6180 );
6181}
6182
6183#[test]
6184fn row_pick_lazy_returns_none_when_next_is_better() {
6185 let mut matcher = RowMatchGenerator::new(1 << 22);
6186 matcher.configure(ROW_CONFIG);
6187 matcher.add_data(alloc::vec![b'a'; 64], |_| {});
6188 matcher.ensure_tables();
6189
6190 let abs_pos = matcher.history_abs_start + 16;
6191 let best = MatchCandidate {
6192 start: abs_pos,
6193 offset: 8,
6194 match_len: ROW_MIN_MATCH_LEN,
6195 };
6196 assert!(
6197 matcher.pick_lazy_match(abs_pos, 0, Some(best)).is_none(),
6198 "lazy picker should defer when next position is clearly better"
6199 );
6200}
6201
6202#[test]
6203fn row_pick_lazy_depth2_returns_none_when_next2_significantly_better() {
6204 let mut matcher = RowMatchGenerator::new(1 << 22);
6205 matcher.configure(ROW_CONFIG);
6206 matcher.lazy_depth = 2;
6207 matcher.search_depth = 0;
6208 matcher.offset_hist = [6, 9, 1];
6209
6210 let mut data = alloc::vec![b'x'; 40];
6211 data[11..30].copy_from_slice(b"EFABCABCAEFABCAEFAB");
6212 matcher.add_data(data, |_| {});
6213 matcher.ensure_tables();
6214
6215 let abs_pos = matcher.history_abs_start + 20;
6216 let best = matcher
6217 .best_match(abs_pos, 0)
6218 .expect("expected baseline repcode match");
6219 assert_eq!(best.offset, 9);
6220 assert_eq!(best.match_len, ROW_MIN_MATCH_LEN);
6221
6222 if let Some(next) = matcher.best_match(abs_pos + 1, 1) {
6223 assert!(next.match_len <= best.match_len);
6224 }
6225
6226 let next2 = matcher
6227 .best_match(abs_pos + 2, 2)
6228 .expect("expected +2 candidate");
6229 assert!(
6230 next2.match_len > best.match_len + 1,
6231 "+2 candidate must be significantly better for depth-2 lazy skip"
6232 );
6233 assert!(
6234 matcher.pick_lazy_match(abs_pos, 0, Some(best)).is_none(),
6235 "lazy picker should defer when +2 candidate is significantly better"
6236 );
6237}
6238
6239#[test]
6240fn row_pick_lazy_depth2_keeps_best_when_next2_is_only_one_byte_better() {
6241 let mut matcher = RowMatchGenerator::new(1 << 22);
6242 matcher.configure(ROW_CONFIG);
6243 matcher.lazy_depth = 2;
6244 matcher.search_depth = 0;
6245 matcher.offset_hist = [6, 9, 1];
6246
6247 let mut data = alloc::vec![b'x'; 40];
6248 data[11..30].copy_from_slice(b"EFABCABCAEFABCAEFAZ");
6249 matcher.add_data(data, |_| {});
6250 matcher.ensure_tables();
6251
6252 let abs_pos = matcher.history_abs_start + 20;
6253 let best = matcher
6254 .best_match(abs_pos, 0)
6255 .expect("expected baseline repcode match");
6256 assert_eq!(best.offset, 9);
6257 assert_eq!(best.match_len, ROW_MIN_MATCH_LEN);
6258
6259 let next2 = matcher
6260 .best_match(abs_pos + 2, 2)
6261 .expect("expected +2 candidate");
6262 assert_eq!(next2.match_len, best.match_len + 1);
6263 let chosen = matcher
6264 .pick_lazy_match(abs_pos, 0, Some(best))
6265 .expect("lazy picker should keep current best");
6266 assert_eq!(chosen.start, best.start);
6267 assert_eq!(chosen.offset, best.offset);
6268 assert_eq!(chosen.match_len, best.match_len);
6269}
6270
6271#[test]
6273fn row_hash_and_row_extracts_high_bits() {
6274 let mut matcher = RowMatchGenerator::new(1 << 22);
6275 matcher.configure(ROW_CONFIG);
6276 matcher.add_data(
6277 alloc::vec![
6278 0xAA, 0xBB, 0xCC, 0x11, 0x10, 0x20, 0x30, 0x40, 0xAA, 0xBB, 0xCC, 0x22, 0x50, 0x60,
6279 0x70, 0x80,
6280 ],
6281 |_| {},
6282 );
6283 matcher.ensure_tables();
6284
6285 let pos = matcher.history_abs_start + 8;
6286 let (row, tag) = matcher
6287 .hash_and_row(pos)
6288 .expect("row hash should be available");
6289
6290 let idx = pos - matcher.history_abs_start;
6291 let concat = matcher.live_history();
6292 let value = u32::from_le_bytes(concat[idx..idx + ROW_HASH_KEY_LEN].try_into().unwrap()) as u64;
6293 let hash = crate::encoding::fastpath::hash_mix_u64_with_kernel(matcher.hash_kernel, value);
6294 let total_bits = matcher.row_hash_log + ROW_TAG_BITS;
6295 let combined = hash >> (u64::BITS as usize - total_bits);
6296 let expected_row =
6297 ((combined >> ROW_TAG_BITS) as usize) & ((1usize << matcher.row_hash_log) - 1);
6298 let expected_tag = combined as u8;
6299
6300 assert_eq!(row, expected_row);
6301 assert_eq!(tag, expected_tag);
6302}
6303
6304#[test]
6305fn row_repcode_skips_candidate_before_history_start() {
6306 let mut matcher = RowMatchGenerator::new(1 << 22);
6307 matcher.configure(ROW_CONFIG);
6308 matcher.history = alloc::vec![b'a'; 20];
6309 matcher.history_start = 0;
6310 matcher.history_abs_start = 10;
6311 matcher.offset_hist = [3, 0, 0];
6312
6313 assert!(matcher.repcode_candidate(12, 1).is_none());
6314}
6315
6316#[test]
6317fn row_repcode_returns_none_when_position_too_close_to_history_end() {
6318 let mut matcher = RowMatchGenerator::new(1 << 22);
6319 matcher.configure(ROW_CONFIG);
6320 matcher.history = b"abcde".to_vec();
6321 matcher.history_start = 0;
6322 matcher.history_abs_start = 0;
6323 matcher.offset_hist = [1, 0, 0];
6324
6325 assert!(matcher.repcode_candidate(4, 1).is_none());
6326}
6327
6328#[cfg(all(feature = "std", target_arch = "x86_64"))]
6329#[test]
6330fn hash_mix_sse42_path_is_available_and_matches_accelerated_impl_when_supported() {
6331 use crate::encoding::fastpath::{self, FastpathKernel};
6332 if !is_x86_feature_detected!("sse4.2") {
6333 return;
6334 }
6335 let v = 0x0123_4567_89AB_CDEFu64;
6336 let accelerated = unsafe { fastpath::sse42::hash_mix_u64(v) };
6338 let dispatched = fastpath::dispatch_hash_mix_u64(v);
6340 let kernel = fastpath::select_kernel();
6341 if kernel == FastpathKernel::Sse42 {
6342 assert_eq!(dispatched, accelerated);
6343 } else {
6344 assert_eq!(dispatched, accelerated, "AVX2/SSE4.2 share CRC32 mix");
6346 }
6347}
6348
6349#[cfg(all(feature = "std", target_arch = "aarch64", target_endian = "little"))]
6350#[test]
6351fn hash_mix_crc_path_is_available_and_matches_accelerated_impl_when_supported() {
6352 use crate::encoding::fastpath;
6353 if !is_aarch64_feature_detected!("crc") {
6354 return;
6355 }
6356 let v = 0x0123_4567_89AB_CDEFu64;
6357 let accelerated = unsafe { fastpath::neon::hash_mix_u64(v) };
6359 let dispatched = fastpath::dispatch_hash_mix_u64(v);
6360 assert_eq!(dispatched, accelerated);
6361}
6362
6363#[test]
6364fn hc_hash3_position_matches_donor_formula() {
6365 let bytes = [b'a', b'b', b'c', b'd'];
6366 let read32 = u32::from_le_bytes(bytes);
6367 let expected = (((read32 << 8).wrapping_mul(HC_PRIME3BYTES)) >> (32 - HC3_HASH_LOG)) as usize;
6368 assert_eq!(
6369 super::match_table::storage::MatchTable::hash3_position(&bytes, HC3_HASH_LOG),
6370 expected
6371 );
6372}
6373
6374#[test]
6375fn hc_hash_position_matches_donor_hash4_formula() {
6376 let mut hc = HcMatchGenerator::new(1 << 20);
6377 hc.configure(HC_CONFIG, super::strategy::StrategyTag::Lazy, 22);
6378 let bytes = [b'a', b'b', b'c', b'd'];
6379 let read32 = u32::from_le_bytes(bytes);
6380 let expected = ((read32.wrapping_mul(HC_PRIME4BYTES)) >> (32 - hc.table.hash_log)) as usize;
6381 assert_eq!(hc.table.hash_position(&bytes), expected);
6382}
6383
6384#[test]
6385fn btultra2_main_hash_uses_donor_hash4_formula() {
6386 let mut hc = HcMatchGenerator::new(1 << 20);
6387 hc.configure(
6388 BTULTRA2_HC_CONFIG_L22,
6389 super::strategy::StrategyTag::BtUltra2,
6390 27,
6391 );
6392 let bytes = [b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h'];
6393 let read32 = u32::from_le_bytes(bytes[..4].try_into().unwrap());
6394 let expected = ((read32.wrapping_mul(HC_PRIME4BYTES)) >> (32 - hc.table.hash_log)) as usize;
6395 let actual = super::match_table::storage::MatchTable::hash_position_with_mls(
6396 &bytes,
6397 hc.table.hash_log,
6398 super::bt::BtMatcher::HASH_MLS,
6399 );
6400 assert_eq!(actual, expected);
6401}
6402
6403#[test]
6404fn row_candidate_returns_none_when_abs_pos_near_end_of_history() {
6405 let mut matcher = RowMatchGenerator::new(1 << 22);
6406 matcher.configure(ROW_CONFIG);
6407 matcher.history = b"abcde".to_vec();
6408 matcher.history_start = 0;
6409 matcher.history_abs_start = 0;
6410
6411 assert!(matcher.row_candidate(0, 0).is_none());
6412}
6413
6414#[test]
6415fn hc_chain_candidates_returns_sentinels_for_short_suffix() {
6416 let mut hc = HcMatchGenerator::new(32);
6417 hc.table.history = b"abc".to_vec();
6418 hc.table.history_start = 0;
6419 hc.table.history_abs_start = 0;
6420 hc.table.ensure_tables();
6421
6422 let candidates = hc.hc.chain_candidates(&hc.table, 0);
6423 assert!(candidates.iter().all(|&pos| pos == usize::MAX));
6424}
6425
6426#[test]
6427fn hc_reset_refills_existing_tables_with_empty_sentinel() {
6428 let mut hc = HcMatchGenerator::new(32);
6429 hc.table.add_data(b"abcdeabcde".to_vec(), |_| {});
6430 hc.table.ensure_tables();
6431 assert!(!hc.table.hash_table.is_empty());
6432 assert!(!hc.table.chain_table.is_empty());
6433 hc.table.hash_table.fill(123);
6434 hc.table.chain_table.fill(456);
6435
6436 hc.reset(|_| {});
6437
6438 assert!(hc.table.hash_table.iter().all(|&v| v == HC_EMPTY));
6439 assert!(hc.table.chain_table.iter().all(|&v| v == HC_EMPTY));
6440}
6441
6442#[test]
6443fn hc_start_matching_returns_early_for_empty_current_block() {
6444 let mut hc = HcMatchGenerator::new(32);
6445 hc.table.add_data(Vec::new(), |_| {});
6446 let mut called = false;
6447 hc.start_matching(|_| called = true);
6448 assert!(!called, "empty current block should not emit sequences");
6449}
6450
6451#[cfg(test)]
6452fn deterministic_high_entropy_bytes(seed: u64, len: usize) -> Vec<u8> {
6453 let mut out = Vec::with_capacity(len);
6454 let mut state = seed;
6455 for _ in 0..len {
6456 state ^= state << 13;
6457 state ^= state >> 7;
6458 state ^= state << 17;
6459 out.push((state >> 40) as u8);
6460 }
6461 out
6462}
6463
6464#[cfg(test)]
6465fn level22_donor_block_ranges(data: &[u8]) -> Vec<(usize, usize)> {
6466 let mut ranges = Vec::new();
6467 let mut cursor = 0usize;
6468 let mut savings = 0i64;
6469 while cursor < data.len() {
6470 let remaining = data.len() - cursor;
6471 let candidate_len = remaining.min(HC_BLOCKSIZE_MAX);
6472 let block_len = crate::encoding::frame_compressor::donor_optimal_block_size(
6473 CompressionLevel::Level(22),
6474 &data[cursor..cursor + candidate_len],
6475 remaining,
6476 HC_BLOCKSIZE_MAX,
6477 savings,
6478 )
6479 .min(candidate_len)
6480 .max(1);
6481 ranges.push((cursor, block_len));
6482 cursor += block_len;
6483 if cursor >= HC_BLOCKSIZE_MAX {
6487 savings = 3;
6488 }
6489 }
6490 ranges
6491}
6492
6493#[cfg(test)]
6494fn merge_block_delimiters_like_donor(
6495 sequences: Vec<(usize, usize, usize)>,
6496) -> Vec<(usize, usize, usize)> {
6497 let mut out = Vec::with_capacity(sequences.len());
6498 let mut pending_lits = 0usize;
6499 for (lit_len, offset, match_len) in sequences {
6500 if offset == 0 && match_len == 0 {
6501 pending_lits = pending_lits.saturating_add(lit_len);
6502 continue;
6503 }
6504 out.push((lit_len.saturating_add(pending_lits), offset, match_len));
6505 pending_lits = 0;
6506 }
6507 if pending_lits > 0 {
6508 out.push((pending_lits, 0, 0));
6509 }
6510 out
6511}
6512
6513#[cfg(test)]
6514fn collect_level22_sequences(data: &[u8]) -> Vec<(usize, usize, usize)> {
6515 merge_block_delimiters_like_donor(collect_level22_sequences_with_delimiters(data))
6516 .into_iter()
6517 .filter(|(_, offset, match_len)| *offset != 0 || *match_len != 0)
6518 .collect()
6519}
6520
6521#[cfg(test)]
6522fn collect_level22_sequences_with_delimiters(data: &[u8]) -> Vec<(usize, usize, usize)> {
6523 let mut driver = MatchGeneratorDriver::new(HC_BLOCKSIZE_MAX, 1);
6524 driver.set_source_size_hint(data.len() as u64);
6525 driver.reset(CompressionLevel::Level(22));
6526
6527 let mut sequences = Vec::new();
6528 for (chunk_start, chunk_len) in level22_donor_block_ranges(data) {
6529 let chunk = &data[chunk_start..chunk_start + chunk_len];
6530 let mut space = driver.get_next_space();
6531 space[..chunk.len()].copy_from_slice(chunk);
6532 space.truncate(chunk.len());
6533 driver.commit_space(space);
6534 driver.start_matching(|seq| {
6535 let entry = match seq {
6536 Sequence::Literals { literals } => (literals.len(), 0usize, 0usize),
6537 Sequence::Triple {
6538 literals,
6539 offset,
6540 match_len,
6541 } => (literals.len(), offset, match_len),
6542 };
6543 sequences.push(entry);
6544 });
6545 }
6546 sequences
6547}
6548
6549#[cfg(test)]
6550fn donor_level22_sequences(data: &[u8]) -> Vec<(usize, usize, usize)> {
6551 merge_block_delimiters_like_donor(donor_level22_sequences_with_delimiters(data))
6552 .into_iter()
6553 .filter(|(_, offset, match_len)| *offset != 0 || *match_len != 0)
6554 .collect()
6555}
6556
6557#[cfg(test)]
6558fn donor_level22_sequences_with_delimiters(data: &[u8]) -> Vec<(usize, usize, usize)> {
6559 use zstd::zstd_safe;
6560 use zstd::zstd_safe::zstd_sys;
6561
6562 fn assert_zstd_ok(code: usize, context: &str) {
6563 assert_eq!(
6564 unsafe { zstd_sys::ZSTD_isError(code) },
6565 0,
6566 "{context} failed: {}",
6567 zstd_safe::get_error_name(code)
6568 );
6569 }
6570
6571 unsafe {
6572 let cctx = zstd_sys::ZSTD_createCCtx();
6573 assert!(!cctx.is_null(), "ZSTD_createCCtx returned null");
6574
6575 assert_zstd_ok(
6576 zstd_sys::ZSTD_CCtx_setParameter(
6577 cctx,
6578 zstd_sys::ZSTD_cParameter::ZSTD_c_compressionLevel,
6579 22,
6580 ),
6581 "ZSTD_c_compressionLevel",
6582 );
6583
6584 let seq_capacity = zstd_safe::sequence_bound(data.len());
6585 let mut seqs = alloc::vec![
6586 zstd_sys::ZSTD_Sequence {
6587 offset: 0,
6588 litLength: 0,
6589 matchLength: 0,
6590 rep: 0,
6591 };
6592 seq_capacity
6593 ];
6594
6595 let seq_count = zstd_sys::ZSTD_generateSequences(
6596 cctx,
6597 seqs.as_mut_ptr(),
6598 seqs.len(),
6599 data.as_ptr().cast(),
6600 data.len(),
6601 );
6602 assert_zstd_ok(seq_count, "ZSTD_generateSequences");
6603 let rc = zstd_sys::ZSTD_freeCCtx(cctx);
6604 assert_eq!(rc, 0, "ZSTD_freeCCtx failed");
6605
6606 seqs.truncate(seq_count);
6607 seqs.into_iter()
6608 .map(|seq| {
6609 (
6610 seq.litLength as usize,
6611 seq.offset as usize,
6612 seq.matchLength as usize,
6613 )
6614 })
6615 .collect()
6616 }
6617}
6618
6619#[test]
6620fn level22_sequences_match_donor_on_corpus_proxy() {
6621 let data = include_bytes!("../../decodecorpus_files/z000033");
6622 assert_level22_sequences_match_donor(data);
6623}
6624
6625#[test]
6626fn level22_sequences_match_donor_on_small_corpus_proxy() {
6627 let data = include_bytes!("../../decodecorpus_files/z000030");
6628 assert_level22_sequences_match_donor(data);
6629}
6630
6631#[cfg(test)]
6632fn assert_level22_sequences_match_donor(data: &[u8]) {
6633 let rust = collect_level22_sequences(data);
6634 let donor = donor_level22_sequences(data);
6635
6636 if rust != donor {
6637 let first_diff = rust
6638 .iter()
6639 .zip(donor.iter())
6640 .position(|(lhs, rhs)| lhs != rhs)
6641 .unwrap_or_else(|| rust.len().min(donor.len()));
6642 let rust_pos = rust
6643 .iter()
6644 .take(first_diff)
6645 .fold(0usize, |acc, seq| acc + seq.0 + seq.2);
6646 let donor_pos = donor
6647 .iter()
6648 .take(first_diff)
6649 .fold(0usize, |acc, seq| acc + seq.0 + seq.2);
6650 let start = first_diff.saturating_sub(4);
6651 let rust_window = &rust[start..rust.len().min(first_diff + 4)];
6652 let donor_window = &donor[start..donor.len().min(first_diff + 4)];
6653 let mut reps = [1u32, 4, 8];
6654 for (lit_len, offset, _) in rust.iter().take(first_diff) {
6655 let _ = encode_offset_with_history(*offset as u32, *lit_len as u32, &mut reps);
6656 }
6657 panic!(
6658 "level22 sequence path diverged at idx {}: rust={:?} donor={:?} (rust_len={} donor_len={} rust_pos={} donor_pos={} reps_before={:?} rust_window={:?} donor_window={:?} block_ranges={:?})",
6659 first_diff,
6660 rust.get(first_diff),
6661 donor.get(first_diff),
6662 rust.len(),
6663 donor.len(),
6664 rust_pos,
6665 donor_pos,
6666 reps,
6667 rust_window,
6668 donor_window,
6669 level22_donor_block_ranges(data)
6670 .into_iter()
6671 .filter(|(start, len)| *start <= rust_pos && rust_pos < start + len)
6672 .collect::<Vec<_>>(),
6673 );
6674 }
6675}
6676
6677#[test]
6678fn hc_sparse_skip_matching_preserves_tail_cross_block_match() {
6679 let mut matcher = HcMatchGenerator::new(1 << 22);
6680 let tail = b"Qz9kLm2Rp";
6681 let mut first = deterministic_high_entropy_bytes(0xD1B5_4A32_9C77_0E19, 4096);
6682 let tail_start = first.len() - tail.len();
6683 first[tail_start..].copy_from_slice(tail);
6684 matcher.table.add_data(first.clone(), |_| {});
6685 matcher.skip_matching(Some(true));
6686
6687 let mut second = tail.to_vec();
6688 second.extend_from_slice(b"after-tail-literals");
6689 matcher.table.add_data(second, |_| {});
6690
6691 let mut first_sequence = None;
6692 matcher.start_matching(|seq| {
6693 if first_sequence.is_some() {
6694 return;
6695 }
6696 first_sequence = Some(match seq {
6697 Sequence::Literals { literals } => (literals.len(), 0usize, 0usize),
6698 Sequence::Triple {
6699 literals,
6700 offset,
6701 match_len,
6702 } => (literals.len(), offset, match_len),
6703 });
6704 });
6705
6706 let (literals_len, offset, match_len) =
6707 first_sequence.expect("expected at least one sequence after sparse skip");
6708 assert_eq!(
6709 literals_len, 0,
6710 "first sequence should start at block boundary"
6711 );
6712 assert_eq!(
6713 offset,
6714 tail.len(),
6715 "first match should reference previous tail"
6716 );
6717 assert!(
6718 match_len >= tail.len(),
6719 "tail-aligned cross-block match must be preserved"
6720 );
6721}
6722
6723#[test]
6724fn btultra2_sparse_skip_matching_preserves_tail_cross_block_match() {
6725 let mut matcher = HcMatchGenerator::new(1 << 20);
6726 matcher.configure(
6727 BTULTRA2_HC_CONFIG_L22,
6728 super::strategy::StrategyTag::BtUltra2,
6729 20,
6730 );
6731 let tail = b"Bt9kLm2Rp";
6732 let mut first = deterministic_high_entropy_bytes(0xA9C3_7F21_D4E8_510B, 4096);
6733 let tail_start = first.len() - tail.len();
6734 first[tail_start..].copy_from_slice(tail);
6735 matcher.table.add_data(first, |_| {});
6736 matcher.skip_matching(Some(true));
6737
6738 let mut second = tail.to_vec();
6739 second.extend_from_slice(b"after-tail-literals");
6740 matcher.table.add_data(second, |_| {});
6741
6742 let mut first_sequence = None;
6743 matcher.start_matching(|seq| {
6744 if first_sequence.is_some() {
6745 return;
6746 }
6747 first_sequence = Some(match seq {
6748 Sequence::Literals { literals } => (literals.len(), 0usize, 0usize),
6749 Sequence::Triple {
6750 literals,
6751 offset,
6752 match_len,
6753 } => (literals.len(), offset, match_len),
6754 });
6755 });
6756
6757 let (literals_len, offset, match_len) =
6758 first_sequence.expect("expected at least one sequence after sparse BT skip");
6759 assert_eq!(
6760 literals_len, 0,
6761 "BT sparse skip should preserve an immediate boundary match"
6762 );
6763 assert_eq!(
6764 offset,
6765 tail.len(),
6766 "first BT match should reference previous tail"
6767 );
6768 assert!(
6769 match_len >= tail.len(),
6770 "BT sparse skip must seed the dense tail for cross-block matching"
6771 );
6772}
6773
6774#[test]
6775fn hc_sparse_skip_matching_does_not_reinsert_sparse_tail_positions() {
6776 let mut matcher = HcMatchGenerator::new(1 << 22);
6777 let first = deterministic_high_entropy_bytes(0xC2B2_AE3D_27D4_EB4F, 4096);
6778 matcher.table.add_data(first.clone(), |_| {});
6779 matcher.skip_matching(Some(true));
6780
6781 let current_len = first.len();
6782 let current_abs_start =
6783 matcher.table.history_abs_start + matcher.table.window_size - current_len;
6784 let current_abs_end = current_abs_start + current_len;
6785 let dense_tail = HC_MIN_MATCH_LEN + INCOMPRESSIBLE_SKIP_STEP;
6786 let tail_start = current_abs_end
6787 .saturating_sub(dense_tail)
6788 .max(matcher.table.history_abs_start)
6789 .max(current_abs_start);
6790
6791 let overlap_pos = (tail_start..current_abs_end)
6792 .find(|&pos| (pos - current_abs_start).is_multiple_of(INCOMPRESSIBLE_SKIP_STEP))
6793 .expect("fixture should contain at least one sparse-grid overlap in dense tail");
6794
6795 let rel = matcher
6796 .table
6797 .relative_position(overlap_pos)
6798 .expect("overlap position should be representable as relative position");
6799 let chain_idx = rel as usize & ((1 << matcher.table.chain_log) - 1);
6800 assert_ne!(
6801 matcher.table.chain_table[chain_idx],
6802 rel + 1,
6803 "sparse-grid tail positions must not be reinserted (self-loop chain entry)"
6804 );
6805}
6806
6807#[test]
6808fn hc_compact_history_drains_when_threshold_crossed() {
6809 let mut hc = HcMatchGenerator::new(8);
6810 hc.table.history = b"abcdefghijklmnopqrstuvwxyz".to_vec();
6811 hc.table.history_start = 16;
6812 hc.table.compact_history();
6813 assert_eq!(hc.table.history_start, 0);
6814 assert_eq!(hc.table.history, b"qrstuvwxyz");
6815}
6816
6817#[test]
6818fn hc_insert_position_no_rebase_returns_when_relative_pos_unavailable() {
6819 let mut hc = HcMatchGenerator::new(32);
6820 hc.table.history = b"abcdefghijklmnop".to_vec();
6821 hc.table.history_abs_start = 0;
6822 hc.table.position_base = 1;
6823 hc.table.ensure_tables();
6824 let before_hash = hc.table.hash_table.clone();
6825 let before_chain = hc.table.chain_table.clone();
6826
6827 hc.table.insert_position_no_rebase(0);
6828
6829 assert_eq!(hc.table.hash_table, before_hash);
6830 assert_eq!(hc.table.chain_table, before_chain);
6831}
6832
6833#[test]
6834fn hc_insert_positions_advances_next_to_update3_for_contiguous_range() {
6835 let mut hc = HcMatchGenerator::new(64);
6836 hc.table.history = b"abcdefghijklmnopqrstuvwxyz".to_vec();
6837 hc.table.history_start = 0;
6838 hc.table.history_abs_start = 0;
6839 hc.table.position_base = 0;
6840 hc.table.ensure_tables();
6841 hc.table.next_to_update3 = 0;
6842
6843 hc.table.insert_positions(0, 9);
6844
6845 assert_eq!(
6846 hc.table.next_to_update3, 9,
6847 "contiguous insert_positions should advance hash3 update cursor"
6848 );
6849}
6850
6851#[test]
6852fn hc_insert_positions_with_step_keeps_next_to_update3_cursor_for_sparse_ranges() {
6853 let mut hc = HcMatchGenerator::new(64);
6854 hc.table.history = b"abcdefghijklmnopqrstuvwxyz".to_vec();
6855 hc.table.history_start = 0;
6856 hc.table.history_abs_start = 0;
6857 hc.table.position_base = 0;
6858 hc.table.ensure_tables();
6859 hc.table.next_to_update3 = 0;
6860
6861 hc.table.insert_positions_with_step(0, 16, 4);
6862
6863 assert_eq!(
6864 hc.table.next_to_update3, 0,
6865 "sparse insert_positions_with_step must not mark skipped positions as hash3-updated"
6866 );
6867}
6868
6869#[test]
6870fn prime_with_dictionary_budget_shrinks_after_simple_eviction() {
6871 let mut driver = MatchGeneratorDriver::new(8, 1);
6872 driver.reset(CompressionLevel::Fastest);
6873 driver.simple_mut().max_window_size = 8;
6876 driver.reported_window_size = 8;
6877
6878 let base_window = driver.simple_mut().max_window_size;
6879 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
6880 assert_eq!(driver.simple_mut().max_window_size, base_window + 24);
6881
6882 for block in [b"AAAAAAAA", b"BBBBBBBB"] {
6883 let mut space = driver.get_next_space();
6884 space.clear();
6885 space.extend_from_slice(block);
6886 driver.commit_space(space);
6887 driver.skip_matching_with_hint(None);
6888 }
6889
6890 assert_eq!(
6891 driver.dictionary_retained_budget, 0,
6892 "dictionary budget should be fully retired once primed dict slices are evicted"
6893 );
6894 assert_eq!(
6895 driver.simple_mut().max_window_size,
6896 base_window,
6897 "retired dictionary budget must not remain reusable for live history"
6898 );
6899}
6900
6901#[test]
6902fn prime_with_dictionary_budget_shrinks_after_dfast_eviction() {
6903 let mut driver = MatchGeneratorDriver::new(8, 1);
6904 driver.reset(CompressionLevel::Level(2));
6905 driver.dfast_matcher_mut().max_window_size = 8;
6908 driver.reported_window_size = 8;
6909
6910 let base_window = driver.dfast_matcher().max_window_size;
6911 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
6912 assert_eq!(driver.dfast_matcher().max_window_size, base_window + 24);
6913
6914 for block in [b"AAAAAAAA", b"BBBBBBBB"] {
6915 let mut space = driver.get_next_space();
6916 space.clear();
6917 space.extend_from_slice(block);
6918 driver.commit_space(space);
6919 driver.skip_matching_with_hint(None);
6920 }
6921
6922 assert_eq!(
6923 driver.dictionary_retained_budget, 0,
6924 "dictionary budget should be fully retired once primed dict slices are evicted"
6925 );
6926 assert_eq!(
6927 driver.dfast_matcher().max_window_size,
6928 base_window,
6929 "retired dictionary budget must not remain reusable for live history"
6930 );
6931}
6932
6933#[test]
6934fn hc_prime_with_dictionary_preserves_history_for_first_full_block() {
6935 let mut driver = MatchGeneratorDriver::new(8, 1);
6936 driver.reset(CompressionLevel::Better);
6937
6938 driver.prime_with_dictionary(b"abcdefgh", [1, 4, 8]);
6939
6940 let mut space = driver.get_next_space();
6941 space.clear();
6942 space.extend_from_slice(b"abcdefgh");
6945 driver.commit_space(space);
6946
6947 let mut saw_match = false;
6948 driver.start_matching(|seq| {
6949 if let Sequence::Triple {
6950 literals,
6951 offset,
6952 match_len,
6953 } = seq
6954 && literals.is_empty()
6955 && offset == 8
6956 && match_len >= HC_MIN_MATCH_LEN
6957 {
6958 saw_match = true;
6959 }
6960 });
6961
6962 assert!(
6963 saw_match,
6964 "hash-chain backend should match dictionary-primed history in first full block"
6965 );
6966}
6967
6968#[test]
6969fn prime_with_dictionary_budget_shrinks_after_hc_eviction() {
6970 let mut driver = MatchGeneratorDriver::new(8, 1);
6971 driver.reset(CompressionLevel::Better);
6972 driver.hc_matcher_mut().table.max_window_size = 8;
6974 driver.reported_window_size = 8;
6975
6976 let base_window = driver.hc_matcher().table.max_window_size;
6977 driver.prime_with_dictionary(b"abcdefghABCDEFGHijklmnop", [1, 4, 8]);
6978 assert_eq!(driver.hc_matcher().table.max_window_size, base_window + 24);
6979
6980 for block in [b"AAAAAAAA", b"BBBBBBBB"] {
6981 let mut space = driver.get_next_space();
6982 space.clear();
6983 space.extend_from_slice(block);
6984 driver.commit_space(space);
6985 driver.skip_matching_with_hint(None);
6986 }
6987
6988 assert_eq!(
6989 driver.dictionary_retained_budget, 0,
6990 "dictionary budget should be fully retired once primed dict slices are evicted"
6991 );
6992 assert_eq!(
6993 driver.hc_matcher().table.max_window_size,
6994 base_window,
6995 "retired dictionary budget must not remain reusable for live history"
6996 );
6997}
6998
6999#[test]
7000fn hc_rebases_positions_after_u32_boundary() {
7001 let mut matcher = HcMatchGenerator::new(64);
7002 matcher.table.add_data(b"abcdeabcdeabcde".to_vec(), |_| {});
7003 matcher.table.ensure_tables();
7004 matcher.table.position_base = 0;
7005 let history_abs_start: usize = match (u64::from(u32::MAX) + 64).try_into() {
7006 Ok(value) => value,
7007 Err(_) => return,
7008 };
7009 matcher.table.history_abs_start = history_abs_start;
7012 matcher.skip_matching(None);
7013 assert_eq!(
7014 matcher.table.position_base, matcher.table.history_abs_start,
7015 "rebase should anchor to the oldest live absolute position"
7016 );
7017
7018 assert!(
7019 matcher
7020 .table
7021 .hash_table
7022 .iter()
7023 .any(|entry| *entry != HC_EMPTY),
7024 "HC hash table should still be populated after crossing u32 boundary"
7025 );
7026
7027 let abs_pos = matcher.table.history_abs_start + 10;
7029 let candidates = matcher.hc.chain_candidates(&matcher.table, abs_pos);
7030 assert!(
7031 candidates.iter().any(|candidate| *candidate != usize::MAX),
7032 "chain_candidates should return valid matches after rebase"
7033 );
7034}
7035
7036#[test]
7037fn hc_rebase_rebuilds_only_inserted_prefix() {
7038 let mut matcher = HcMatchGenerator::new(64);
7039 matcher.table.add_data(b"abcdeabcdeabcde".to_vec(), |_| {});
7040 matcher.table.ensure_tables();
7041 matcher.table.position_base = 0;
7042 let history_abs_start: usize = match (u64::from(u32::MAX) + 64).try_into() {
7043 Ok(value) => value,
7044 Err(_) => return,
7045 };
7046 matcher.table.history_abs_start = history_abs_start;
7047 let abs_pos = matcher.table.history_abs_start + 6;
7048
7049 let mut expected = HcMatchGenerator::new(64);
7050 expected.table.add_data(b"abcdeabcdeabcde".to_vec(), |_| {});
7051 expected.table.ensure_tables();
7052 expected.table.history_abs_start = history_abs_start;
7053 expected.table.position_base = expected.table.history_abs_start;
7054 expected.table.hash_table.fill(HC_EMPTY);
7055 expected.table.chain_table.fill(HC_EMPTY);
7056 for pos in expected.table.history_abs_start..abs_pos {
7057 expected.table.insert_position_no_rebase(pos);
7058 }
7059
7060 matcher.table.maybe_rebase_positions(abs_pos);
7061
7062 assert_eq!(
7063 matcher.table.position_base, matcher.table.history_abs_start,
7064 "rebase should still anchor to the oldest live absolute position"
7065 );
7066 assert_eq!(
7067 matcher.table.hash_table, expected.table.hash_table,
7068 "rebase must rebuild only positions already inserted before abs_pos"
7069 );
7070 assert_eq!(
7071 matcher.table.chain_table, expected.table.chain_table,
7072 "future positions must not be pre-seeded into HC chains during rebase"
7073 );
7074}
7075
7076#[test]
7077fn suffix_store_with_single_slot_does_not_panic_on_keying() {
7078 let mut suffixes = SuffixStore::with_capacity(1);
7079 suffixes.insert(b"abcde", 0);
7080 assert!(suffixes.contains_key(b"abcde"));
7081 assert_eq!(suffixes.get(b"abcde"), Some(0));
7082}
7083
7084#[test]
7085fn fastest_reset_uses_interleaved_hash_fill_step() {
7086 let mut driver = MatchGeneratorDriver::new(32, 2);
7087
7088 driver.reset(CompressionLevel::Uncompressed);
7089 assert_eq!(driver.simple().hash_fill_step, 1);
7090
7091 driver.reset(CompressionLevel::Fastest);
7092 assert_eq!(driver.simple().hash_fill_step, FAST_HASH_FILL_STEP);
7093
7094 driver.reset(CompressionLevel::Better);
7097 assert_eq!(
7098 driver.active_backend(),
7099 super::strategy::BackendTag::HashChain
7100 );
7101 assert_eq!(driver.window_size(), (1u64 << 23));
7102 assert_eq!(driver.hc_matcher().hc.lazy_depth, 2);
7103}
7104
7105#[test]
7106fn simple_matcher_updates_offset_history_after_emitting_match() {
7107 let mut matcher = MatchGenerator::new(64);
7108 matcher.add_data(
7109 b"abcdeabcdeabcde".to_vec(),
7110 SuffixStore::with_capacity(64),
7111 |_, _| {},
7112 );
7113
7114 assert!(matcher.next_sequence(|seq| {
7115 assert_eq!(
7116 seq,
7117 Sequence::Triple {
7118 literals: b"abcde",
7119 offset: 5,
7120 match_len: 10,
7121 }
7122 );
7123 }));
7124 assert_eq!(matcher.offset_hist, [5, 1, 4]);
7125}
7126
7127#[test]
7128fn simple_matcher_zero_literal_repcode_checks_rep1_before_hash_lookup() {
7129 let mut matcher = MatchGenerator::new(64);
7130 matcher.add_data(
7131 b"abcdefghijabcdefghij".to_vec(),
7132 SuffixStore::with_capacity(64),
7133 |_, _| {},
7134 );
7135
7136 matcher.suffix_idx = 10;
7137 matcher.last_idx_in_sequence = 10;
7138 matcher.offset_hist = [99, 10, 4];
7139
7140 let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data[10..], 0);
7141 assert_eq!(candidate, Some((10, 10)));
7142}
7143
7144#[test]
7145fn simple_matcher_repcode_can_target_previous_window_entry() {
7146 let mut matcher = MatchGenerator::new(64);
7147 matcher.add_data(
7148 b"abcdefghij".to_vec(),
7149 SuffixStore::with_capacity(64),
7150 |_, _| {},
7151 );
7152 matcher.skip_matching();
7153 matcher.add_data(
7154 b"abcdefghij".to_vec(),
7155 SuffixStore::with_capacity(64),
7156 |_, _| {},
7157 );
7158
7159 matcher.offset_hist = [99, 10, 4];
7160
7161 let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data, 0);
7162 assert_eq!(candidate, Some((10, 10)));
7163}
7164
7165#[test]
7166fn simple_matcher_zero_literal_repcode_checks_rep2() {
7167 let mut matcher = MatchGenerator::new(64);
7168 matcher.add_data(
7169 b"abcdefghijabcdefghij".to_vec(),
7170 SuffixStore::with_capacity(64),
7171 |_, _| {},
7172 );
7173 matcher.suffix_idx = 10;
7174 matcher.last_idx_in_sequence = 10;
7175 matcher.offset_hist = [99, 4, 10];
7177
7178 let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data[10..], 0);
7179 assert_eq!(candidate, Some((10, 10)));
7180}
7181
7182#[test]
7183fn simple_matcher_zero_literal_repcode_checks_rep0_minus1() {
7184 let mut matcher = MatchGenerator::new(64);
7185 matcher.add_data(
7186 b"abcdefghijabcdefghij".to_vec(),
7187 SuffixStore::with_capacity(64),
7188 |_, _| {},
7189 );
7190 matcher.suffix_idx = 10;
7191 matcher.last_idx_in_sequence = 10;
7192 matcher.offset_hist = [11, 4, 99];
7194
7195 let candidate = matcher.repcode_candidate(&matcher.window.last().unwrap().data[10..], 0);
7196 assert_eq!(candidate, Some((10, 10)));
7197}
7198
7199#[test]
7200fn simple_matcher_repcode_rejects_offsets_beyond_searchable_prefix() {
7201 let mut matcher = MatchGenerator::new(64);
7202 matcher.add_data(
7203 b"abcdefghij".to_vec(),
7204 SuffixStore::with_capacity(64),
7205 |_, _| {},
7206 );
7207 matcher.skip_matching();
7208 matcher.add_data(
7209 b"klmnopqrst".to_vec(),
7210 SuffixStore::with_capacity(64),
7211 |_, _| {},
7212 );
7213 matcher.suffix_idx = 3;
7214
7215 let candidate = matcher.offset_match_len(14, &matcher.window.last().unwrap().data[3..]);
7216 assert_eq!(candidate, None);
7217}
7218
7219#[test]
7220fn simple_matcher_skip_matching_seeds_every_position_even_with_fast_step() {
7221 let mut matcher = MatchGenerator::new(64);
7222 matcher.hash_fill_step = FAST_HASH_FILL_STEP;
7223 matcher.add_data(
7224 b"abcdefghijklmnop".to_vec(),
7225 SuffixStore::with_capacity(64),
7226 |_, _| {},
7227 );
7228 matcher.skip_matching();
7229 matcher.add_data(b"bcdef".to_vec(), SuffixStore::with_capacity(64), |_, _| {});
7230
7231 assert!(matcher.next_sequence(|seq| {
7232 assert_eq!(
7233 seq,
7234 Sequence::Triple {
7235 literals: b"",
7236 offset: 15,
7237 match_len: 5,
7238 }
7239 );
7240 }));
7241 assert!(!matcher.next_sequence(|_| {}));
7242}
7243
7244#[test]
7245fn simple_matcher_skip_matching_with_incompressible_hint_uses_sparse_prefix() {
7246 let mut matcher = MatchGenerator::new(128);
7247 let first = b"abcdefghijklmnopqrstuvwxyz012345".to_vec();
7248 let sparse_probe = first[3..3 + MIN_MATCH_LEN].to_vec();
7249 let tail_start = first.len() - MIN_MATCH_LEN;
7250 let tail_probe = first[tail_start..tail_start + MIN_MATCH_LEN].to_vec();
7251 matcher.add_data(first, SuffixStore::with_capacity(256), |_, _| {});
7252
7253 matcher.skip_matching_with_hint(Some(true));
7254
7255 matcher.add_data(sparse_probe, SuffixStore::with_capacity(256), |_, _| {});
7257 let mut sparse_first_is_literals = None;
7258 assert!(matcher.next_sequence(|seq| {
7259 if sparse_first_is_literals.is_none() {
7260 sparse_first_is_literals = Some(matches!(seq, Sequence::Literals { .. }));
7261 }
7262 }));
7263 assert!(
7264 sparse_first_is_literals.unwrap_or(false),
7265 "sparse-start probe should not produce an immediate match"
7266 );
7267
7268 let mut matcher = MatchGenerator::new(128);
7270 matcher.add_data(
7271 b"abcdefghijklmnopqrstuvwxyz012345".to_vec(),
7272 SuffixStore::with_capacity(256),
7273 |_, _| {},
7274 );
7275 matcher.skip_matching_with_hint(Some(true));
7276 matcher.add_data(tail_probe, SuffixStore::with_capacity(256), |_, _| {});
7277 let mut tail_first_is_immediate_match = None;
7278 assert!(matcher.next_sequence(|seq| {
7279 if tail_first_is_immediate_match.is_none() {
7280 tail_first_is_immediate_match =
7281 Some(matches!(seq, Sequence::Triple { literals, .. } if literals.is_empty()));
7282 }
7283 }));
7284 assert!(
7285 tail_first_is_immediate_match.unwrap_or(false),
7286 "dense tail probe should match immediately at block start"
7287 );
7288}
7289
7290#[test]
7291fn simple_matcher_add_suffixes_till_backfills_last_searchable_anchor() {
7292 let mut matcher = MatchGenerator::new(64);
7293 matcher.hash_fill_step = FAST_HASH_FILL_STEP;
7294 matcher.add_data(
7295 b"01234abcde".to_vec(),
7296 SuffixStore::with_capacity(64),
7297 |_, _| {},
7298 );
7299 matcher.add_suffixes_till(10, FAST_HASH_FILL_STEP);
7300
7301 let last = matcher.window.last().unwrap();
7302 let tail = &last.data[5..10];
7303 assert_eq!(last.suffixes.get(tail), Some(5));
7304}
7305
7306#[test]
7307fn simple_matcher_add_suffixes_till_skips_when_idx_below_min_match_len() {
7308 let mut matcher = MatchGenerator::new(128);
7309 matcher.hash_fill_step = FAST_HASH_FILL_STEP;
7310 matcher.add_data(
7311 b"abcdefghijklmnopqrstuvwxyz".to_vec(),
7312 SuffixStore::with_capacity(1 << 16),
7313 |_, _| {},
7314 );
7315
7316 matcher.add_suffixes_till(MIN_MATCH_LEN - 1, FAST_HASH_FILL_STEP);
7317
7318 let last = matcher.window.last().unwrap();
7319 let first_key = &last.data[..MIN_MATCH_LEN];
7320 assert_eq!(last.suffixes.get(first_key), None);
7321}
7322
7323#[test]
7324fn simple_matcher_add_suffixes_till_fast_step_registers_interleaved_positions() {
7325 let mut matcher = MatchGenerator::new(128);
7326 matcher.hash_fill_step = FAST_HASH_FILL_STEP;
7327 matcher.add_data(
7328 b"abcdefghijklmnopqrstuvwxyz".to_vec(),
7329 SuffixStore::with_capacity(1 << 16),
7330 |_, _| {},
7331 );
7332
7333 matcher.add_suffixes_till(17, FAST_HASH_FILL_STEP);
7334
7335 let last = matcher.window.last().unwrap();
7336 for pos in [0usize, 3, 6, 9, 12] {
7337 let key = &last.data[pos..pos + MIN_MATCH_LEN];
7338 assert_eq!(
7339 last.suffixes.get(key),
7340 Some(pos),
7341 "expected interleaved suffix registration at pos {pos}"
7342 );
7343 }
7344}
7345
7346#[test]
7347fn dfast_skip_matching_handles_window_eviction() {
7348 let mut matcher = DfastMatchGenerator::new(16);
7349
7350 matcher.add_data(alloc::vec![1, 2, 3, 4, 5, 6], |_| {});
7351 matcher.skip_matching(None);
7352 matcher.add_data(alloc::vec![7, 8, 9, 10, 11, 12], |_| {});
7353 matcher.skip_matching(None);
7354 matcher.add_data(alloc::vec![7, 8, 9, 10, 11, 12], |_| {});
7355
7356 let mut reconstructed = alloc::vec![7, 8, 9, 10, 11, 12];
7357 matcher.start_matching(|seq| match seq {
7358 Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
7359 Sequence::Triple {
7360 literals,
7361 offset,
7362 match_len,
7363 } => {
7364 reconstructed.extend_from_slice(literals);
7365 let start = reconstructed.len() - offset;
7366 for i in 0..match_len {
7367 let byte = reconstructed[start + i];
7368 reconstructed.push(byte);
7369 }
7370 }
7371 });
7372
7373 assert_eq!(reconstructed, [7, 8, 9, 10, 11, 12, 7, 8, 9, 10, 11, 12]);
7374}
7375
7376#[test]
7377fn dfast_add_data_callback_reports_evicted_len_not_capacity() {
7378 let mut matcher = DfastMatchGenerator::new(8);
7379
7380 let mut first = Vec::with_capacity(64);
7381 first.extend_from_slice(b"abcdefgh");
7382 matcher.add_data(first, |_| {});
7383
7384 let mut second = Vec::with_capacity(64);
7385 second.extend_from_slice(b"ijklmnop");
7386
7387 let mut observed_evicted_len = None;
7388 matcher.add_data(second, |data| {
7389 observed_evicted_len = Some(data.len());
7390 });
7391
7392 assert_eq!(
7393 observed_evicted_len,
7394 Some(8),
7395 "eviction callback must report evicted byte length, not backing capacity"
7396 );
7397}
7398
7399#[test]
7463fn dfast_commit_space_eviction_uses_window_size_delta() {
7464 use crate::encoding::CompressionLevel;
7465
7466 let mut driver = MatchGeneratorDriver::new(10, 1);
7467 driver.reset(CompressionLevel::Level(2));
7468 assert!(matches!(driver.storage, MatcherStorage::Dfast(_)));
7469
7470 driver.dfast_matcher_mut().max_window_size = 10;
7475 driver.dictionary_retained_budget = 100;
7476
7477 let mut space1 = Vec::with_capacity(64);
7478 space1.extend_from_slice(b"abcd");
7479 driver.commit_space(space1);
7480 assert_eq!(
7481 driver.dictionary_retained_budget, 100,
7482 "1st commit fills window 0 → 4, no eviction, no retire"
7483 );
7484
7485 let mut space2 = Vec::with_capacity(64);
7486 space2.extend_from_slice(b"efgh");
7487 driver.commit_space(space2);
7488 assert_eq!(
7489 driver.dictionary_retained_budget, 100,
7490 "2nd commit fills window 4 → 8, no eviction, no retire"
7491 );
7492
7493 let mut space3 = Vec::with_capacity(64);
7494 space3.extend_from_slice(b"ijklm");
7495 driver.commit_space(space3);
7496 assert_eq!(
7497 driver.dictionary_retained_budget, 87,
7498 "3rd commit + trim_after_budget_retire cascade. With the fix \
7499 (evicted=4 from window_size delta) the cascade reclaims 100 \
7500 → 96 → 92 → 87. With the bug (evicted=5 from data.len()) the \
7501 3rd commit would panic on `data.len() <= max_window_size` \
7502 after the 2nd commit's cascade had already shrunk \
7503 max_window_size to 0."
7504 );
7505 assert_eq!(
7506 driver.dfast_matcher_mut().max_window_size,
7507 0,
7508 "cascade drains max_window_size to 0 once budget reclaim \
7509 exceeds the initial window size"
7510 );
7511}
7512
7513#[test]
7514fn dfast_trim_to_window_evicts_oldest_block_by_length() {
7515 let mut matcher = DfastMatchGenerator::new(16);
7524
7525 let mut first = Vec::with_capacity(64);
7526 first.extend_from_slice(b"abcdefgh");
7527 matcher.add_data(first, |_| {});
7528
7529 let mut second = Vec::with_capacity(64);
7530 second.extend_from_slice(b"ijklmnop");
7531 matcher.add_data(second, |_| {});
7532
7533 assert_eq!(matcher.window_size, 16);
7534 assert_eq!(matcher.window_blocks.len(), 2);
7535
7536 matcher.max_window_size = 8;
7537
7538 matcher.trim_to_window();
7539
7540 assert_eq!(
7548 matcher.window_size, 8,
7549 "exactly one 8-byte block must remain"
7550 );
7551 assert_eq!(matcher.window_blocks.len(), 1);
7552 assert_eq!(matcher.history_abs_start, 8);
7553}
7554
7555#[test]
7556fn dfast_inserts_tail_positions_for_next_block_matching() {
7557 let mut matcher = DfastMatchGenerator::new(1 << 22);
7558
7559 matcher.add_data(b"012345bcdea".to_vec(), |_| {});
7560 let mut history = Vec::new();
7561 matcher.start_matching(|seq| match seq {
7562 Sequence::Literals { literals } => history.extend_from_slice(literals),
7563 Sequence::Triple { .. } => unreachable!("first block should not match history"),
7564 });
7565 assert_eq!(history, b"012345bcdea");
7566
7567 matcher.add_data(b"bcdeabcdeab".to_vec(), |_| {});
7568 let mut saw_first_sequence = false;
7569 matcher.start_matching(|seq| {
7570 assert!(!saw_first_sequence, "expected a single cross-block match");
7571 saw_first_sequence = true;
7572 match seq {
7573 Sequence::Literals { .. } => {
7574 panic!("expected tail-anchored cross-block match before any literals")
7575 }
7576 Sequence::Triple {
7577 literals,
7578 offset,
7579 match_len,
7580 } => {
7581 assert_eq!(literals, b"");
7582 assert_eq!(offset, 5);
7583 assert_eq!(match_len, 11);
7584 let start = history.len() - offset;
7585 for i in 0..match_len {
7586 let byte = history[start + i];
7587 history.push(byte);
7588 }
7589 }
7590 }
7591 });
7592
7593 assert!(
7594 saw_first_sequence,
7595 "expected tail-anchored cross-block match"
7596 );
7597 assert_eq!(history, b"012345bcdeabcdeabcdeab");
7598}
7599
7600#[test]
7601fn dfast_dense_skip_matching_backfills_previous_tail_for_next_block() {
7602 let mut matcher = DfastMatchGenerator::new(1 << 22);
7603 let tail = b"Qz9kLm2Rp";
7604 let mut first = b"0123456789abcdef".to_vec();
7605 first.extend_from_slice(tail);
7606 matcher.add_data(first.clone(), |_| {});
7607 matcher.skip_matching(Some(false));
7608
7609 let mut second = tail.to_vec();
7610 second.extend_from_slice(b"after-tail-literals");
7611 matcher.add_data(second, |_| {});
7612
7613 let mut first_sequence = None;
7614 matcher.start_matching(|seq| {
7615 if first_sequence.is_some() {
7616 return;
7617 }
7618 first_sequence = Some(match seq {
7619 Sequence::Literals { literals } => (literals.len(), 0usize, 0usize),
7620 Sequence::Triple {
7621 literals,
7622 offset,
7623 match_len,
7624 } => (literals.len(), offset, match_len),
7625 });
7626 });
7627
7628 let (lit_len, offset, match_len) = first_sequence.expect("expected at least one sequence");
7629 assert_eq!(
7630 lit_len, 0,
7631 "expected immediate cross-block match at block start"
7632 );
7633 assert_eq!(
7634 offset,
7635 tail.len(),
7636 "expected dense skip to preserve cross-boundary tail match"
7637 );
7638 assert!(
7639 match_len >= DFAST_MIN_MATCH_LEN,
7640 "match length should satisfy dfast minimum match length"
7641 );
7642}
7643
7644#[test]
7645fn dfast_sparse_skip_matching_preserves_tail_cross_block_match() {
7646 let mut matcher = DfastMatchGenerator::new(1 << 22);
7647 let tail = b"Qz9kLm2Rp";
7648 let mut first = deterministic_high_entropy_bytes(0x9E37_79B9_7F4A_7C15, 4096);
7649 let tail_start = first.len() - tail.len();
7650 first[tail_start..].copy_from_slice(tail);
7651 matcher.add_data(first.clone(), |_| {});
7652
7653 matcher.skip_matching(Some(true));
7654
7655 let mut second = tail.to_vec();
7656 second.extend_from_slice(b"after-tail-literals");
7657 matcher.add_data(second, |_| {});
7658
7659 let mut first_sequence = None;
7660 matcher.start_matching(|seq| {
7661 if first_sequence.is_some() {
7662 return;
7663 }
7664 first_sequence = Some(match seq {
7665 Sequence::Literals { literals } => (literals.len(), 0usize, 0usize),
7666 Sequence::Triple {
7667 literals,
7668 offset,
7669 match_len,
7670 } => (literals.len(), offset, match_len),
7671 });
7672 });
7673
7674 let (lit_len, offset, match_len) = first_sequence.expect("expected at least one sequence");
7675 assert_eq!(
7676 lit_len, 0,
7677 "expected immediate cross-block match at block start"
7678 );
7679 assert_eq!(
7680 offset,
7681 tail.len(),
7682 "expected match against densely seeded tail"
7683 );
7684 assert!(
7685 match_len >= DFAST_MIN_MATCH_LEN,
7686 "match length should satisfy dfast minimum match length"
7687 );
7688}
7689
7690#[test]
7691fn dfast_skip_matching_dense_backfills_newly_hashable_long_tail_positions() {
7692 let mut matcher = DfastMatchGenerator::new(1 << 22);
7693 let first = deterministic_high_entropy_bytes(0x7A64_0315_D4E1_91C3, 4096);
7694 let first_len = first.len();
7695 matcher.add_data(first, |_| {});
7696 matcher.skip_matching_dense();
7697
7698 matcher.add_data(alloc::vec![0xAB], |_| {});
7701 matcher.skip_matching_dense();
7702
7703 let target_abs_pos = first_len - 7;
7704 let target_rel = target_abs_pos - matcher.history_abs_start;
7705 let live = matcher.live_history();
7706 assert!(
7707 target_rel + 8 <= live.len(),
7708 "fixture must make the boundary start long-hashable"
7709 );
7710 let long_hash = matcher.long_hash_index(&live[target_rel..]);
7711 let target_slot = matcher.pack_slot(target_abs_pos);
7712 assert_ne!(
7715 target_slot, DFAST_EMPTY_SLOT,
7716 "pack_slot must never return the empty-slot sentinel for a real position"
7717 );
7718 assert_eq!(
7719 matcher.long_hash[long_hash], target_slot,
7720 "dense skip must seed long-hash entry for newly hashable boundary start"
7721 );
7722}
7723
7724#[test]
7725fn dfast_seed_remaining_hashable_starts_seeds_last_short_hash_positions() {
7726 let mut matcher = DfastMatchGenerator::new(1 << 20);
7727 let block = deterministic_high_entropy_bytes(0x13F0_9A6D_55CE_7B21, 64);
7728 matcher.add_data(block, |_| {});
7729 matcher.ensure_hash_tables();
7730
7731 let current_len = matcher.window_blocks.back().copied().unwrap_or(0);
7732 let current_abs_start = matcher.history_abs_start + matcher.window_size - current_len;
7733 let seed_start = current_len - DFAST_MIN_MATCH_LEN;
7734 matcher.seed_remaining_hashable_starts(current_abs_start, current_len, seed_start);
7735
7736 let target_abs_pos = current_abs_start + current_len - 4;
7737 let target_rel = target_abs_pos - matcher.history_abs_start;
7738 let live = matcher.live_history();
7739 assert!(
7740 target_rel + 4 <= live.len(),
7741 "fixture must leave the last short-hash start valid"
7742 );
7743 let short_hash = matcher.short_hash_index(&live[target_rel..]);
7744 let target_slot = matcher.pack_slot(target_abs_pos);
7745 assert_ne!(
7746 target_slot, DFAST_EMPTY_SLOT,
7747 "pack_slot must never return the empty-slot sentinel for a real position"
7748 );
7749 assert_eq!(
7750 matcher.short_hash[short_hash], target_slot,
7751 "tail seeding must include the last 4-byte-hashable start"
7752 );
7753}
7754
7755#[test]
7756fn dfast_seed_remaining_hashable_starts_handles_pos_at_block_end() {
7757 let mut matcher = DfastMatchGenerator::new(1 << 20);
7758 let block = deterministic_high_entropy_bytes(0x7BB2_DA91_441E_C0EF, 64);
7759 matcher.add_data(block, |_| {});
7760 matcher.ensure_hash_tables();
7761
7762 let current_len = matcher.window_blocks.back().copied().unwrap_or(0);
7763 let current_abs_start = matcher.history_abs_start + matcher.window_size - current_len;
7764 matcher.seed_remaining_hashable_starts(current_abs_start, current_len, current_len);
7765
7766 let target_abs_pos = current_abs_start + current_len - 4;
7767 let target_rel = target_abs_pos - matcher.history_abs_start;
7768 let live = matcher.live_history();
7769 assert!(
7770 target_rel + 4 <= live.len(),
7771 "fixture must leave the last short-hash start valid"
7772 );
7773 let short_hash = matcher.short_hash_index(&live[target_rel..]);
7774 let target_slot = matcher.pack_slot(target_abs_pos);
7775 assert_ne!(
7776 target_slot, DFAST_EMPTY_SLOT,
7777 "pack_slot must never return the empty-slot sentinel for a real position"
7778 );
7779 assert_eq!(
7780 matcher.short_hash[short_hash], target_slot,
7781 "tail seeding must still include the last 4-byte-hashable start when pos is at block end"
7782 );
7783}
7784
7785#[test]
7801fn dfast_ensure_room_for_rebases_above_guard_band() {
7802 let mut dfast = DfastMatchGenerator::new(1 << 22);
7803 dfast.set_hash_bits(10);
7804 dfast.ensure_hash_tables();
7805
7806 let early_abs = 1024usize;
7814 let early_packed = dfast.pack_slot(early_abs);
7815 assert_ne!(early_packed, DFAST_EMPTY_SLOT);
7816 dfast.short_hash[0] = early_packed;
7817 dfast.long_hash[0] = early_packed;
7818
7819 let trigger_abs = (u32::MAX as usize) - (DFAST_REBASE_GUARD_BAND as usize) + 1;
7825 assert_eq!(dfast.position_base, 0);
7826 dfast.ensure_room_for(trigger_abs);
7827 assert_eq!(
7828 dfast.position_base, DFAST_REBASE_GUARD_BAND as usize,
7829 "rebase must advance position_base by DFAST_REBASE_GUARD_BAND"
7830 );
7831
7832 assert_eq!(
7838 dfast.short_hash[0], DFAST_EMPTY_SLOT,
7839 "pre-rebase short-hash entries below the reducer must become empty"
7840 );
7841 assert_eq!(
7842 dfast.long_hash[0], DFAST_EMPTY_SLOT,
7843 "pre-rebase long-hash entries below the reducer must become empty"
7844 );
7845
7846 let post_packed = dfast.pack_slot(trigger_abs);
7850 assert_ne!(post_packed, DFAST_EMPTY_SLOT);
7851 let unpacked = dfast.position_base + (post_packed as usize) - 1;
7852 assert_eq!(
7853 unpacked, trigger_abs,
7854 "post-rebase pack/unpack must round-trip the absolute position"
7855 );
7856}
7857
7858#[test]
7859fn dfast_sparse_skip_matching_backfills_previous_tail_for_consecutive_sparse_blocks() {
7860 let mut matcher = DfastMatchGenerator::new(1 << 22);
7861 let boundary_prefix = [0xFA, 0xFB, 0xFC];
7862 let boundary_suffix = [0xFD, 0xEE, 0xAD, 0xBE, 0xEF, 0x11, 0x22, 0x33];
7863
7864 let mut first = deterministic_high_entropy_bytes(0xA5A5_5A5A_C3C3_3C3C, 4096);
7865 let first_tail_start = first.len() - boundary_prefix.len();
7866 first[first_tail_start..].copy_from_slice(&boundary_prefix);
7867 matcher.add_data(first, |_| {});
7868 matcher.skip_matching(Some(true));
7869
7870 let mut second = deterministic_high_entropy_bytes(0xA5A5_5A5A_C3C3_3C3C, 4096);
7871 second[..boundary_suffix.len()].copy_from_slice(&boundary_suffix);
7872 matcher.add_data(second.clone(), |_| {});
7873 matcher.skip_matching(Some(true));
7874
7875 let mut third = boundary_prefix.to_vec();
7876 third.extend_from_slice(&boundary_suffix);
7877 third.extend_from_slice(b"-trailing-literals");
7878 matcher.add_data(third, |_| {});
7879
7880 let mut first_sequence = None;
7881 matcher.start_matching(|seq| {
7882 if first_sequence.is_some() {
7883 return;
7884 }
7885 first_sequence = Some(match seq {
7886 Sequence::Literals { literals } => (literals.len(), 0usize, 0usize),
7887 Sequence::Triple {
7888 literals,
7889 offset,
7890 match_len,
7891 } => (literals.len(), offset, match_len),
7892 });
7893 });
7894
7895 let (lit_len, offset, match_len) = first_sequence.expect("expected at least one sequence");
7896 assert_eq!(
7897 lit_len, 0,
7898 "expected immediate match from the prior sparse-skip boundary"
7899 );
7900 assert_eq!(
7901 offset,
7902 second.len() + boundary_prefix.len(),
7903 "expected match against backfilled first→second boundary start"
7904 );
7905 assert!(
7906 match_len >= DFAST_MIN_MATCH_LEN,
7907 "match length should satisfy dfast minimum match length"
7908 );
7909}
7910
7911#[test]
7912fn fastest_hint_iteration_23_sequences_reconstruct_source() {
7913 fn generate_data(seed: u64, len: usize) -> Vec<u8> {
7914 let mut state = seed;
7915 let mut data = Vec::with_capacity(len);
7916 for _ in 0..len {
7917 state = state
7918 .wrapping_mul(6364136223846793005)
7919 .wrapping_add(1442695040888963407);
7920 data.push((state >> 33) as u8);
7921 }
7922 data
7923 }
7924
7925 let i = 23u64;
7926 let len = (i * 89 % 16384) as usize;
7927 let mut data = generate_data(i, len);
7928 let repeat = data[128..256].to_vec();
7931 data.extend_from_slice(&repeat);
7932 data.extend_from_slice(&repeat);
7933
7934 let mut driver = MatchGeneratorDriver::new(1024 * 128, 1);
7935 driver.set_source_size_hint(data.len() as u64);
7936 driver.reset(CompressionLevel::Fastest);
7937 let mut space = driver.get_next_space();
7938 space[..data.len()].copy_from_slice(&data);
7939 space.truncate(data.len());
7940 driver.commit_space(space);
7941
7942 let mut rebuilt = Vec::with_capacity(data.len());
7943 let mut saw_triple = false;
7944 driver.start_matching(|seq| match seq {
7945 Sequence::Literals { literals } => rebuilt.extend_from_slice(literals),
7946 Sequence::Triple {
7947 literals,
7948 offset,
7949 match_len,
7950 } => {
7951 saw_triple = true;
7952 rebuilt.extend_from_slice(literals);
7953 assert!(offset > 0, "offset must be non-zero");
7954 assert!(
7955 offset <= rebuilt.len(),
7956 "offset must reference already-produced bytes: offset={} produced={}",
7957 offset,
7958 rebuilt.len()
7959 );
7960 let start = rebuilt.len() - offset;
7961 for idx in 0..match_len {
7962 let b = rebuilt[start + idx];
7963 rebuilt.push(b);
7964 }
7965 }
7966 });
7967
7968 assert!(saw_triple, "fixture must emit at least one match");
7969 assert_eq!(rebuilt, data);
7970}